summaryrefslogtreecommitdiff
path: root/src/lexer.c
diff options
context:
space:
mode:
authorKai Stevenson <kai@kaistevenson.com>2024-11-20 21:11:38 -0800
committerKai Stevenson <kai@kaistevenson.com>2024-11-20 21:11:38 -0800
commit711eb1d91832267bdd1fe2bc57eeebba9e637c52 (patch)
tree6cbd10ee276f1cb8119d2528cc1f7a04894228de /src/lexer.c
Diffstat (limited to 'src/lexer.c')
-rw-r--r--src/lexer.c190
1 files changed, 190 insertions, 0 deletions
diff --git a/src/lexer.c b/src/lexer.c
new file mode 100644
index 0000000..f950dcb
--- /dev/null
+++ b/src/lexer.c
@@ -0,0 +1,190 @@
+#include <stdlib.h>
+#include <string.h>
+#include "lexer.h"
+
+token_t init_token(tokenType_t type, void* value) {
+ token_t t;
+ t.type = type;
+ t.value = value;
+ return t;
+}
+tokenSet_t init_tokenset(token_t* tokens, int count) {
+ tokenSet_t tokenSet;
+ tokenSet.tokens = tokens;
+ tokenSet.count = count;
+ return tokenSet;
+}
+char* tttos(tokenType_t tokenType) {
+ switch (tokenType) {
+ case ERR:
+ return "ERR";
+ case EOL:
+ return "EOL";
+ case INDENT:
+ return "INDENT";
+ case STRING:
+ return "STRING";
+ case INT:
+ return "INT";
+ case OPEN_PAREN:
+ return "OPEN_PAREN";
+ case CLOSE_PAREN:
+ return "CLOSE_PAREN";
+ case START_COMMENT:
+ return "START_COMMENT";
+ default:
+ return "???";
+ }
+}
+int advance_whitespace(char** linePointer) {
+ int64_t a = (int64_t)linePointer;
+ while (**linePointer == ' ' || **linePointer == ' ') {
+ (*linePointer)++;
+ }
+ return (int)((int64_t)linePointer - a);
+}
+keyword_t try_get_keyword(char* name) {
+ if (strcmp(name, "VOID") == 0) {
+ return K_VOID;
+ }
+ else if (strcmp(name, "INT") == 0) {
+ return K_INT;
+ }
+ else if (strcmp(name, "FLOAT") == 0) {
+ return K_FLOAT;
+ }
+ else if (strcmp(name, "STRING") == 0) {
+ return K_STRING;
+ }
+ return K_NONE;
+}
+char keyword_is_type(keyword_t keyword) {
+ return keyword == K_VOID || keyword == K_INT
+ || keyword == K_FLOAT || keyword == K_STRING;
+}
+token_t lex_first_token(char** content) {
+ int m = 8
+ char* s = malloc(sizeof(char) * m);
+ int c = 0;
+ advance_whitespace(content);
+ while (**content != '\n') {
+ s[c] = **content;
+ s[c+1] = '\0';
+ (*content)++;
+ /* check if we need to parse a string or an int */
+ //TODO: string parsing with quotes
+ /*if (s[c] >= 65 && s[c] <= 90) {
+ free(s);
+ (*content)--;
+ return lex_first_string(content);
+ }*/
+ if (s[c] >= 48 && s[c] <= 57) {
+ free(s);
+ (*content)--;
+ return lex_first_int(content);
+ }
+ else if (s[c] == ' ' || s[c] == '\n') {
+ if (strcmp(s, ";") == 0) {
+ free(s);
+ return init_token(SEMICOLON, NULL);
+ }
+ else if (strcmp(s, "(") == 0) {
+ free(s);
+ return init_token(OPEN_PAREN, NULL);
+ }
+ else if (strcmp(s, ")") == 0) {
+ free(s);
+ return init_token(CLOSE_PAREN, NULL);
+ }
+ else if (strcmp(s, "{") == 0) {
+ free(s);
+ return init_token(OPEN_BRACE, NULL);
+ }
+ else if (strcmp(s, "}") == 0) {
+ free(s);
+ return init_token(CLOSE_BRACE, NULL);
+ }
+ else if (strcmp(s, "[") == 0) {
+ free(s);
+ return init_token(OPEN_BRACKET, NULL);
+ }
+ else if (strcmp(s, "]") == 0) {
+ free(s);
+ return init_token(CLOSE_BRACKET, NULL);
+ }
+ else if (strcmp(s, "#") == 0 || strcmp(s, "//") == 0) {
+ free(s);
+ return init_token(START_COMMENT, NULL);
+ }
+ else if (strcmp(s, "=") == 0) {
+ free(s);
+ return init_token(EQUALS, NULL);
+ }
+ else if (strcmp(s, "==") == 0) {
+ free(s);
+ return init_token(DOUBLE_EQUALS, NULL);
+ }
+ else if (strcmp(s, "!") == 0) {
+ free(s);
+ return init_token(NEGATION, NULL);
+ }
+ else if (strcmp(s, "*") == 0) {
+ free(s);
+ return init_token(ASTERISK, NULL);
+ }
+ else if (strcmp(s, "+") == 0) {
+ free(s);
+ return init_token(PLUS_SIGN, NULL);
+ }
+ else if (strcmp(s, "func") == 0) {
+ free(s);
+ return init_token(FUNCTION, NULL);
+ }
+ keyword_t keyword = try_get_keyword(s);
+ if (keyword != K_NONE) {
+ free(s);
+ keyword* kp = malloc(sizeof(keyword_t));
+ *kp = keyword;
+ return init_token(KEYWORD, kp);
+ }
+ char* ns_p = malloc(sizeof(char) * c);
+ strcpy(ns_p, s);
+ free(s);
+ return init_token(NAME, ns_p);
+ }
+ else if (++c == m - 1) {
+ m *= 2;
+ s = realloc(s, sizeof(char) * m)
+ }
+ }
+ return init_token(EOL, NULL);
+}
+token_t lex_first_string(char** content) {
+ int m = 50;
+ char* s = malloc(sizeof(char) * m);
+ int c = 0;
+ advance_whitespace(content);
+ while (**content >= 65 && **content <= 90) {
+ s[c] = **content;
+ (*content)++;
+ c++;
+ }
+ s[c] = '\0';
+ return init_token(STRING, s);
+}
+token_t lex_first_int(char** content) {
+ int m = 10;
+ char* s = malloc(sizeof(char) * m);
+ int c = 0;
+ advance_whitespace(content);
+ while (**content >= 48 && **content <= 57) {
+ s[c] = **content;
+ (*content)++;
+ c++;
+ }
+ s[c] = '\0';
+ int* ip = malloc(sizeof(int));
+ *ip = atoi(s);
+ free(s);
+ return init_token(INT, (void*)ip);
+}