From 711eb1d91832267bdd1fe2bc57eeebba9e637c52 Mon Sep 17 00:00:00 2001 From: Kai Stevenson Date: Wed, 20 Nov 2024 21:11:38 -0800 Subject: init --- src/lexer.c | 190 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 190 insertions(+) create mode 100644 src/lexer.c (limited to 'src/lexer.c') diff --git a/src/lexer.c b/src/lexer.c new file mode 100644 index 0000000..f950dcb --- /dev/null +++ b/src/lexer.c @@ -0,0 +1,190 @@ +#include +#include +#include "lexer.h" + +token_t init_token(tokenType_t type, void* value) { + token_t t; + t.type = type; + t.value = value; + return t; +} +tokenSet_t init_tokenset(token_t* tokens, int count) { + tokenSet_t tokenSet; + tokenSet.tokens = tokens; + tokenSet.count = count; + return tokenSet; +} +char* tttos(tokenType_t tokenType) { + switch (tokenType) { + case ERR: + return "ERR"; + case EOL: + return "EOL"; + case INDENT: + return "INDENT"; + case STRING: + return "STRING"; + case INT: + return "INT"; + case OPEN_PAREN: + return "OPEN_PAREN"; + case CLOSE_PAREN: + return "CLOSE_PAREN"; + case START_COMMENT: + return "START_COMMENT"; + default: + return "???"; + } +} +int advance_whitespace(char** linePointer) { + int64_t a = (int64_t)linePointer; + while (**linePointer == ' ' || **linePointer == ' ') { + (*linePointer)++; + } + return (int)((int64_t)linePointer - a); +} +keyword_t try_get_keyword(char* name) { + if (strcmp(name, "VOID") == 0) { + return K_VOID; + } + else if (strcmp(name, "INT") == 0) { + return K_INT; + } + else if (strcmp(name, "FLOAT") == 0) { + return K_FLOAT; + } + else if (strcmp(name, "STRING") == 0) { + return K_STRING; + } + return K_NONE; +} +char keyword_is_type(keyword_t keyword) { + return keyword == K_VOID || keyword == K_INT + || keyword == K_FLOAT || keyword == K_STRING; +} +token_t lex_first_token(char** content) { + int m = 8 + char* s = malloc(sizeof(char) * m); + int c = 0; + advance_whitespace(content); + while (**content != '\n') { + s[c] = **content; + s[c+1] = '\0'; + (*content)++; + /* check if we need to parse a string or an int */ + //TODO: string parsing with quotes + /*if (s[c] >= 65 && s[c] <= 90) { + free(s); + (*content)--; + return lex_first_string(content); + }*/ + if (s[c] >= 48 && s[c] <= 57) { + free(s); + (*content)--; + return lex_first_int(content); + } + else if (s[c] == ' ' || s[c] == '\n') { + if (strcmp(s, ";") == 0) { + free(s); + return init_token(SEMICOLON, NULL); + } + else if (strcmp(s, "(") == 0) { + free(s); + return init_token(OPEN_PAREN, NULL); + } + else if (strcmp(s, ")") == 0) { + free(s); + return init_token(CLOSE_PAREN, NULL); + } + else if (strcmp(s, "{") == 0) { + free(s); + return init_token(OPEN_BRACE, NULL); + } + else if (strcmp(s, "}") == 0) { + free(s); + return init_token(CLOSE_BRACE, NULL); + } + else if (strcmp(s, "[") == 0) { + free(s); + return init_token(OPEN_BRACKET, NULL); + } + else if (strcmp(s, "]") == 0) { + free(s); + return init_token(CLOSE_BRACKET, NULL); + } + else if (strcmp(s, "#") == 0 || strcmp(s, "//") == 0) { + free(s); + return init_token(START_COMMENT, NULL); + } + else if (strcmp(s, "=") == 0) { + free(s); + return init_token(EQUALS, NULL); + } + else if (strcmp(s, "==") == 0) { + free(s); + return init_token(DOUBLE_EQUALS, NULL); + } + else if (strcmp(s, "!") == 0) { + free(s); + return init_token(NEGATION, NULL); + } + else if (strcmp(s, "*") == 0) { + free(s); + return init_token(ASTERISK, NULL); + } + else if (strcmp(s, "+") == 0) { + free(s); + return init_token(PLUS_SIGN, NULL); + } + else if (strcmp(s, "func") == 0) { + free(s); + return init_token(FUNCTION, NULL); + } + keyword_t keyword = try_get_keyword(s); + if (keyword != K_NONE) { + free(s); + keyword* kp = malloc(sizeof(keyword_t)); + *kp = keyword; + return init_token(KEYWORD, kp); + } + char* ns_p = malloc(sizeof(char) * c); + strcpy(ns_p, s); + free(s); + return init_token(NAME, ns_p); + } + else if (++c == m - 1) { + m *= 2; + s = realloc(s, sizeof(char) * m) + } + } + return init_token(EOL, NULL); +} +token_t lex_first_string(char** content) { + int m = 50; + char* s = malloc(sizeof(char) * m); + int c = 0; + advance_whitespace(content); + while (**content >= 65 && **content <= 90) { + s[c] = **content; + (*content)++; + c++; + } + s[c] = '\0'; + return init_token(STRING, s); +} +token_t lex_first_int(char** content) { + int m = 10; + char* s = malloc(sizeof(char) * m); + int c = 0; + advance_whitespace(content); + while (**content >= 48 && **content <= 57) { + s[c] = **content; + (*content)++; + c++; + } + s[c] = '\0'; + int* ip = malloc(sizeof(int)); + *ip = atoi(s); + free(s); + return init_token(INT, (void*)ip); +} -- cgit v1.2.3-70-g09d2