From 711eb1d91832267bdd1fe2bc57eeebba9e637c52 Mon Sep 17 00:00:00 2001 From: Kai Stevenson Date: Wed, 20 Nov 2024 21:11:38 -0800 Subject: init --- README | 1 + src/echo.c | 29 ++++++++++ src/echo.h | 4 ++ src/lexer.c | 190 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/lexer.h | 49 ++++++++++++++++ src/parse.c | 24 ++++++++ src/parse.h | 25 ++++++++ 7 files changed, 322 insertions(+) create mode 100644 README create mode 100644 src/echo.c create mode 100644 src/echo.h create mode 100644 src/lexer.c create mode 100644 src/lexer.h create mode 100644 src/parse.c create mode 100644 src/parse.h diff --git a/README b/README new file mode 100644 index 0000000..3e492b3 --- /dev/null +++ b/README @@ -0,0 +1 @@ +Statically typed, compiled, and imperative programming language diff --git a/src/echo.c b/src/echo.c new file mode 100644 index 0000000..1efbcda --- /dev/null +++ b/src/echo.c @@ -0,0 +1,29 @@ +#include +#include +#include "echo.h" +#include +int main(int argc, char** argv) { + char* path; + if (argc > 0) { + path = argv[1]; + } + else { + exit(0); + } + char* content = read_file(path); + parse(content); +} +char* read_file(char* path) { + FILE* file = fopen(path, "r"); + if (file == NULL) { + printf("file%s does not exist\n", path); + } + fseek(file, 0, SEEK_END); + unsigned long int length = ftell(file); + fseek(file, 0, SEEK_SET); + char* out = malloc((length + 1) * sizeof(char)); + fread(out, length, 1, file); + fclose(file); + out[length] = '\0'; + return out +} diff --git a/src/echo.h b/src/echo.h new file mode 100644 index 0000000..32af69f --- /dev/null +++ b/src/echo.h @@ -0,0 +1,4 @@ +#ifndef ECHO +#define ECHO +char* read_file(char* path); +#endif diff --git a/src/lexer.c b/src/lexer.c new file mode 100644 index 0000000..f950dcb --- /dev/null +++ b/src/lexer.c @@ -0,0 +1,190 @@ +#include +#include +#include "lexer.h" + +token_t init_token(tokenType_t type, void* value) { + token_t t; + t.type = type; + t.value = value; + return t; +} +tokenSet_t init_tokenset(token_t* tokens, int count) { + tokenSet_t tokenSet; + tokenSet.tokens = tokens; + tokenSet.count = count; + return tokenSet; +} +char* tttos(tokenType_t tokenType) { + switch (tokenType) { + case ERR: + return "ERR"; + case EOL: + return "EOL"; + case INDENT: + return "INDENT"; + case STRING: + return "STRING"; + case INT: + return "INT"; + case OPEN_PAREN: + return "OPEN_PAREN"; + case CLOSE_PAREN: + return "CLOSE_PAREN"; + case START_COMMENT: + return "START_COMMENT"; + default: + return "???"; + } +} +int advance_whitespace(char** linePointer) { + int64_t a = (int64_t)linePointer; + while (**linePointer == ' ' || **linePointer == ' ') { + (*linePointer)++; + } + return (int)((int64_t)linePointer - a); +} +keyword_t try_get_keyword(char* name) { + if (strcmp(name, "VOID") == 0) { + return K_VOID; + } + else if (strcmp(name, "INT") == 0) { + return K_INT; + } + else if (strcmp(name, "FLOAT") == 0) { + return K_FLOAT; + } + else if (strcmp(name, "STRING") == 0) { + return K_STRING; + } + return K_NONE; +} +char keyword_is_type(keyword_t keyword) { + return keyword == K_VOID || keyword == K_INT + || keyword == K_FLOAT || keyword == K_STRING; +} +token_t lex_first_token(char** content) { + int m = 8 + char* s = malloc(sizeof(char) * m); + int c = 0; + advance_whitespace(content); + while (**content != '\n') { + s[c] = **content; + s[c+1] = '\0'; + (*content)++; + /* check if we need to parse a string or an int */ + //TODO: string parsing with quotes + /*if (s[c] >= 65 && s[c] <= 90) { + free(s); + (*content)--; + return lex_first_string(content); + }*/ + if (s[c] >= 48 && s[c] <= 57) { + free(s); + (*content)--; + return lex_first_int(content); + } + else if (s[c] == ' ' || s[c] == '\n') { + if (strcmp(s, ";") == 0) { + free(s); + return init_token(SEMICOLON, NULL); + } + else if (strcmp(s, "(") == 0) { + free(s); + return init_token(OPEN_PAREN, NULL); + } + else if (strcmp(s, ")") == 0) { + free(s); + return init_token(CLOSE_PAREN, NULL); + } + else if (strcmp(s, "{") == 0) { + free(s); + return init_token(OPEN_BRACE, NULL); + } + else if (strcmp(s, "}") == 0) { + free(s); + return init_token(CLOSE_BRACE, NULL); + } + else if (strcmp(s, "[") == 0) { + free(s); + return init_token(OPEN_BRACKET, NULL); + } + else if (strcmp(s, "]") == 0) { + free(s); + return init_token(CLOSE_BRACKET, NULL); + } + else if (strcmp(s, "#") == 0 || strcmp(s, "//") == 0) { + free(s); + return init_token(START_COMMENT, NULL); + } + else if (strcmp(s, "=") == 0) { + free(s); + return init_token(EQUALS, NULL); + } + else if (strcmp(s, "==") == 0) { + free(s); + return init_token(DOUBLE_EQUALS, NULL); + } + else if (strcmp(s, "!") == 0) { + free(s); + return init_token(NEGATION, NULL); + } + else if (strcmp(s, "*") == 0) { + free(s); + return init_token(ASTERISK, NULL); + } + else if (strcmp(s, "+") == 0) { + free(s); + return init_token(PLUS_SIGN, NULL); + } + else if (strcmp(s, "func") == 0) { + free(s); + return init_token(FUNCTION, NULL); + } + keyword_t keyword = try_get_keyword(s); + if (keyword != K_NONE) { + free(s); + keyword* kp = malloc(sizeof(keyword_t)); + *kp = keyword; + return init_token(KEYWORD, kp); + } + char* ns_p = malloc(sizeof(char) * c); + strcpy(ns_p, s); + free(s); + return init_token(NAME, ns_p); + } + else if (++c == m - 1) { + m *= 2; + s = realloc(s, sizeof(char) * m) + } + } + return init_token(EOL, NULL); +} +token_t lex_first_string(char** content) { + int m = 50; + char* s = malloc(sizeof(char) * m); + int c = 0; + advance_whitespace(content); + while (**content >= 65 && **content <= 90) { + s[c] = **content; + (*content)++; + c++; + } + s[c] = '\0'; + return init_token(STRING, s); +} +token_t lex_first_int(char** content) { + int m = 10; + char* s = malloc(sizeof(char) * m); + int c = 0; + advance_whitespace(content); + while (**content >= 48 && **content <= 57) { + s[c] = **content; + (*content)++; + c++; + } + s[c] = '\0'; + int* ip = malloc(sizeof(int)); + *ip = atoi(s); + free(s); + return init_token(INT, (void*)ip); +} diff --git a/src/lexer.h b/src/lexer.h new file mode 100644 index 0000000..52d00fa --- /dev/null +++ b/src/lexer.h @@ -0,0 +1,49 @@ +#ifndef LEXER +#define LEXER +typedef enum TokenType { + ERR, + EOL, + NAME, + KEYWORD, + STRING_LIT, + INT_LIT, + SEMICOLON, + OPEN_PAREN, + CLOSE_PAREN, + OPEN_BRACE, + CLOSE_BRACE, + OPEN_BRACKET, + CLOSE_BRACKET, + START_COMMENT, + EQUALS, + DOUBLE_EQUALS, + NEGATION, + ASTERISK, + PLUS_SIGN, + FUNCTION +} tokenType_t; +typedef enum Keyword { + K_NONE, + K_VOID, + K_INT, + K_FLOAT, + K_STRING +} keyword_t; +typedef struct Token { + tokenType_t type; + void* value; +} token_t; +token_t init_token(tokenType_t type, void* value); +typedef struct TokenSet { + token_t* tokens; + int count; +} tokenSet_t; +tokenSet_t init_tokenset(token_t* tokens, int count); +char* tttos(tokenType_t tokenType); +int advance_whitespace(char** linePointer); +keyword_t try_get_keyword(char* name); +char keyword_is_type(keyword_t keyword); +token_t lex_first_token(char** content); +token_t lex_first_string(char** content); +token_t lex_first_int(char** content); +#endif diff --git a/src/parse.c b/src/parse.c new file mode 100644 index 0000000..10e8b6b --- /dev/null +++ b/src/parse.c @@ -0,0 +1,24 @@ +#include "parse.h" +#include "lexer.h" +#include +#include +#include + +sym_t parse(tokenSet_t tokens, int* seek) { + token_t token = tokens.tokens[(*seek)++]; + if (token.type == ERR || token.type == EOL) { + exit(1); + } + else if (token.type == NAME) { + return parse_name(seek); + } +} +sym_t parse_name(tokenSet_t tokens, int* seek) { + token_t token = tokens.tokens[(*seek)++]; + if (token.type == KEYWORD) { + if (keyword_is_type(token.value)) { + + } + } +} +sym_t parse_definition(tokenSet_t tokens, int* seek, diff --git a/src/parse.h b/src/parse.h new file mode 100644 index 0000000..144e88c --- /dev/null +++ b/src/parse.h @@ -0,0 +1,25 @@ +#include "lexer.h" +#ifndef PARSE +#define PARSE +typedef enum PrimitiveType { + VOID, + INT, + FLOAT, + STRING +} primitive_t; +typedef enum AbstractType { + FUNCTION, + ARRAY +} abstract_t; +typedef struct Type { + primitive_t primitive; + abstract_t abstract; + struct Type* referent; +} type_t; +typedef struct Symbol { + type_t type; + void* name; + struct Symbol* args; +} sym_t; +sym_t parse(tokenSet_t tokens) +#endif -- cgit v1.2.3-70-g09d2