#include #include #include "lexer.h" token_t init_token(tokenType_t type, void* value) { token_t t; t.type = type; t.value = value; return t; } tokenSet_t init_tokenset(token_t* tokens, int count) { tokenSet_t tokenSet; tokenSet.tokens = tokens; tokenSet.count = count; return tokenSet; } char* tttos(tokenType_t tokenType) { switch (tokenType) { case ERR: return "ERR"; case EOL: return "EOL"; case INDENT: return "INDENT"; case STRING: return "STRING"; case INT: return "INT"; case OPEN_PAREN: return "OPEN_PAREN"; case CLOSE_PAREN: return "CLOSE_PAREN"; case START_COMMENT: return "START_COMMENT"; default: return "???"; } } int advance_whitespace(char** linePointer) { int64_t a = (int64_t)linePointer; while (**linePointer == ' ' || **linePointer == ' ') { (*linePointer)++; } return (int)((int64_t)linePointer - a); } keyword_t try_get_keyword(char* name) { if (strcmp(name, "VOID") == 0) { return K_VOID; } else if (strcmp(name, "INT") == 0) { return K_INT; } else if (strcmp(name, "FLOAT") == 0) { return K_FLOAT; } else if (strcmp(name, "STRING") == 0) { return K_STRING; } return K_NONE; } char keyword_is_type(keyword_t keyword) { return keyword == K_VOID || keyword == K_INT || keyword == K_FLOAT || keyword == K_STRING; } token_t lex_first_token(char** content) { int m = 8 char* s = malloc(sizeof(char) * m); int c = 0; advance_whitespace(content); while (**content != '\n') { s[c] = **content; s[c+1] = '\0'; (*content)++; /* check if we need to parse a string or an int */ //TODO: string parsing with quotes /*if (s[c] >= 65 && s[c] <= 90) { free(s); (*content)--; return lex_first_string(content); }*/ if (s[c] >= 48 && s[c] <= 57) { free(s); (*content)--; return lex_first_int(content); } else if (s[c] == ' ' || s[c] == '\n') { if (strcmp(s, ";") == 0) { free(s); return init_token(SEMICOLON, NULL); } else if (strcmp(s, "(") == 0) { free(s); return init_token(OPEN_PAREN, NULL); } else if (strcmp(s, ")") == 0) { free(s); return init_token(CLOSE_PAREN, NULL); } else if (strcmp(s, "{") == 0) { free(s); return init_token(OPEN_BRACE, NULL); } else if (strcmp(s, "}") == 0) { free(s); return init_token(CLOSE_BRACE, NULL); } else if (strcmp(s, "[") == 0) { free(s); return init_token(OPEN_BRACKET, NULL); } else if (strcmp(s, "]") == 0) { free(s); return init_token(CLOSE_BRACKET, NULL); } else if (strcmp(s, "#") == 0 || strcmp(s, "//") == 0) { free(s); return init_token(START_COMMENT, NULL); } else if (strcmp(s, "=") == 0) { free(s); return init_token(EQUALS, NULL); } else if (strcmp(s, "==") == 0) { free(s); return init_token(DOUBLE_EQUALS, NULL); } else if (strcmp(s, "!") == 0) { free(s); return init_token(NEGATION, NULL); } else if (strcmp(s, "*") == 0) { free(s); return init_token(ASTERISK, NULL); } else if (strcmp(s, "+") == 0) { free(s); return init_token(PLUS_SIGN, NULL); } else if (strcmp(s, "func") == 0) { free(s); return init_token(FUNCTION, NULL); } keyword_t keyword = try_get_keyword(s); if (keyword != K_NONE) { free(s); keyword* kp = malloc(sizeof(keyword_t)); *kp = keyword; return init_token(KEYWORD, kp); } char* ns_p = malloc(sizeof(char) * c); strcpy(ns_p, s); free(s); return init_token(NAME, ns_p); } else if (++c == m - 1) { m *= 2; s = realloc(s, sizeof(char) * m) } } return init_token(EOL, NULL); } token_t lex_first_string(char** content) { int m = 50; char* s = malloc(sizeof(char) * m); int c = 0; advance_whitespace(content); while (**content >= 65 && **content <= 90) { s[c] = **content; (*content)++; c++; } s[c] = '\0'; return init_token(STRING, s); } token_t lex_first_int(char** content) { int m = 10; char* s = malloc(sizeof(char) * m); int c = 0; advance_whitespace(content); while (**content >= 48 && **content <= 57) { s[c] = **content; (*content)++; c++; } s[c] = '\0'; int* ip = malloc(sizeof(int)); *ip = atoi(s); free(s); return init_token(INT, (void*)ip); }