summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorKai Stevenson <kai@kaistevenson.com>2024-11-20 21:11:38 -0800
committerKai Stevenson <kai@kaistevenson.com>2024-11-20 21:11:38 -0800
commit711eb1d91832267bdd1fe2bc57eeebba9e637c52 (patch)
tree6cbd10ee276f1cb8119d2528cc1f7a04894228de /src
Diffstat (limited to 'src')
-rw-r--r--src/echo.c29
-rw-r--r--src/echo.h4
-rw-r--r--src/lexer.c190
-rw-r--r--src/lexer.h49
-rw-r--r--src/parse.c24
-rw-r--r--src/parse.h25
6 files changed, 321 insertions, 0 deletions
diff --git a/src/echo.c b/src/echo.c
new file mode 100644
index 0000000..1efbcda
--- /dev/null
+++ b/src/echo.c
@@ -0,0 +1,29 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include "echo.h"
+#include <string.h>
+int main(int argc, char** argv) {
+ char* path;
+ if (argc > 0) {
+ path = argv[1];
+ }
+ else {
+ exit(0);
+ }
+ char* content = read_file(path);
+ parse(content);
+}
+char* read_file(char* path) {
+ FILE* file = fopen(path, "r");
+ if (file == NULL) {
+ printf("file%s does not exist\n", path);
+ }
+ fseek(file, 0, SEEK_END);
+ unsigned long int length = ftell(file);
+ fseek(file, 0, SEEK_SET);
+ char* out = malloc((length + 1) * sizeof(char));
+ fread(out, length, 1, file);
+ fclose(file);
+ out[length] = '\0';
+ return out
+}
diff --git a/src/echo.h b/src/echo.h
new file mode 100644
index 0000000..32af69f
--- /dev/null
+++ b/src/echo.h
@@ -0,0 +1,4 @@
+#ifndef ECHO
+#define ECHO
+char* read_file(char* path);
+#endif
diff --git a/src/lexer.c b/src/lexer.c
new file mode 100644
index 0000000..f950dcb
--- /dev/null
+++ b/src/lexer.c
@@ -0,0 +1,190 @@
+#include <stdlib.h>
+#include <string.h>
+#include "lexer.h"
+
+token_t init_token(tokenType_t type, void* value) {
+ token_t t;
+ t.type = type;
+ t.value = value;
+ return t;
+}
+tokenSet_t init_tokenset(token_t* tokens, int count) {
+ tokenSet_t tokenSet;
+ tokenSet.tokens = tokens;
+ tokenSet.count = count;
+ return tokenSet;
+}
+char* tttos(tokenType_t tokenType) {
+ switch (tokenType) {
+ case ERR:
+ return "ERR";
+ case EOL:
+ return "EOL";
+ case INDENT:
+ return "INDENT";
+ case STRING:
+ return "STRING";
+ case INT:
+ return "INT";
+ case OPEN_PAREN:
+ return "OPEN_PAREN";
+ case CLOSE_PAREN:
+ return "CLOSE_PAREN";
+ case START_COMMENT:
+ return "START_COMMENT";
+ default:
+ return "???";
+ }
+}
+int advance_whitespace(char** linePointer) {
+ int64_t a = (int64_t)linePointer;
+ while (**linePointer == ' ' || **linePointer == ' ') {
+ (*linePointer)++;
+ }
+ return (int)((int64_t)linePointer - a);
+}
+keyword_t try_get_keyword(char* name) {
+ if (strcmp(name, "VOID") == 0) {
+ return K_VOID;
+ }
+ else if (strcmp(name, "INT") == 0) {
+ return K_INT;
+ }
+ else if (strcmp(name, "FLOAT") == 0) {
+ return K_FLOAT;
+ }
+ else if (strcmp(name, "STRING") == 0) {
+ return K_STRING;
+ }
+ return K_NONE;
+}
+char keyword_is_type(keyword_t keyword) {
+ return keyword == K_VOID || keyword == K_INT
+ || keyword == K_FLOAT || keyword == K_STRING;
+}
+token_t lex_first_token(char** content) {
+ int m = 8
+ char* s = malloc(sizeof(char) * m);
+ int c = 0;
+ advance_whitespace(content);
+ while (**content != '\n') {
+ s[c] = **content;
+ s[c+1] = '\0';
+ (*content)++;
+ /* check if we need to parse a string or an int */
+ //TODO: string parsing with quotes
+ /*if (s[c] >= 65 && s[c] <= 90) {
+ free(s);
+ (*content)--;
+ return lex_first_string(content);
+ }*/
+ if (s[c] >= 48 && s[c] <= 57) {
+ free(s);
+ (*content)--;
+ return lex_first_int(content);
+ }
+ else if (s[c] == ' ' || s[c] == '\n') {
+ if (strcmp(s, ";") == 0) {
+ free(s);
+ return init_token(SEMICOLON, NULL);
+ }
+ else if (strcmp(s, "(") == 0) {
+ free(s);
+ return init_token(OPEN_PAREN, NULL);
+ }
+ else if (strcmp(s, ")") == 0) {
+ free(s);
+ return init_token(CLOSE_PAREN, NULL);
+ }
+ else if (strcmp(s, "{") == 0) {
+ free(s);
+ return init_token(OPEN_BRACE, NULL);
+ }
+ else if (strcmp(s, "}") == 0) {
+ free(s);
+ return init_token(CLOSE_BRACE, NULL);
+ }
+ else if (strcmp(s, "[") == 0) {
+ free(s);
+ return init_token(OPEN_BRACKET, NULL);
+ }
+ else if (strcmp(s, "]") == 0) {
+ free(s);
+ return init_token(CLOSE_BRACKET, NULL);
+ }
+ else if (strcmp(s, "#") == 0 || strcmp(s, "//") == 0) {
+ free(s);
+ return init_token(START_COMMENT, NULL);
+ }
+ else if (strcmp(s, "=") == 0) {
+ free(s);
+ return init_token(EQUALS, NULL);
+ }
+ else if (strcmp(s, "==") == 0) {
+ free(s);
+ return init_token(DOUBLE_EQUALS, NULL);
+ }
+ else if (strcmp(s, "!") == 0) {
+ free(s);
+ return init_token(NEGATION, NULL);
+ }
+ else if (strcmp(s, "*") == 0) {
+ free(s);
+ return init_token(ASTERISK, NULL);
+ }
+ else if (strcmp(s, "+") == 0) {
+ free(s);
+ return init_token(PLUS_SIGN, NULL);
+ }
+ else if (strcmp(s, "func") == 0) {
+ free(s);
+ return init_token(FUNCTION, NULL);
+ }
+ keyword_t keyword = try_get_keyword(s);
+ if (keyword != K_NONE) {
+ free(s);
+ keyword* kp = malloc(sizeof(keyword_t));
+ *kp = keyword;
+ return init_token(KEYWORD, kp);
+ }
+ char* ns_p = malloc(sizeof(char) * c);
+ strcpy(ns_p, s);
+ free(s);
+ return init_token(NAME, ns_p);
+ }
+ else if (++c == m - 1) {
+ m *= 2;
+ s = realloc(s, sizeof(char) * m)
+ }
+ }
+ return init_token(EOL, NULL);
+}
+token_t lex_first_string(char** content) {
+ int m = 50;
+ char* s = malloc(sizeof(char) * m);
+ int c = 0;
+ advance_whitespace(content);
+ while (**content >= 65 && **content <= 90) {
+ s[c] = **content;
+ (*content)++;
+ c++;
+ }
+ s[c] = '\0';
+ return init_token(STRING, s);
+}
+token_t lex_first_int(char** content) {
+ int m = 10;
+ char* s = malloc(sizeof(char) * m);
+ int c = 0;
+ advance_whitespace(content);
+ while (**content >= 48 && **content <= 57) {
+ s[c] = **content;
+ (*content)++;
+ c++;
+ }
+ s[c] = '\0';
+ int* ip = malloc(sizeof(int));
+ *ip = atoi(s);
+ free(s);
+ return init_token(INT, (void*)ip);
+}
diff --git a/src/lexer.h b/src/lexer.h
new file mode 100644
index 0000000..52d00fa
--- /dev/null
+++ b/src/lexer.h
@@ -0,0 +1,49 @@
+#ifndef LEXER
+#define LEXER
+typedef enum TokenType {
+ ERR,
+ EOL,
+ NAME,
+ KEYWORD,
+ STRING_LIT,
+ INT_LIT,
+ SEMICOLON,
+ OPEN_PAREN,
+ CLOSE_PAREN,
+ OPEN_BRACE,
+ CLOSE_BRACE,
+ OPEN_BRACKET,
+ CLOSE_BRACKET,
+ START_COMMENT,
+ EQUALS,
+ DOUBLE_EQUALS,
+ NEGATION,
+ ASTERISK,
+ PLUS_SIGN,
+ FUNCTION
+} tokenType_t;
+typedef enum Keyword {
+ K_NONE,
+ K_VOID,
+ K_INT,
+ K_FLOAT,
+ K_STRING
+} keyword_t;
+typedef struct Token {
+ tokenType_t type;
+ void* value;
+} token_t;
+token_t init_token(tokenType_t type, void* value);
+typedef struct TokenSet {
+ token_t* tokens;
+ int count;
+} tokenSet_t;
+tokenSet_t init_tokenset(token_t* tokens, int count);
+char* tttos(tokenType_t tokenType);
+int advance_whitespace(char** linePointer);
+keyword_t try_get_keyword(char* name);
+char keyword_is_type(keyword_t keyword);
+token_t lex_first_token(char** content);
+token_t lex_first_string(char** content);
+token_t lex_first_int(char** content);
+#endif
diff --git a/src/parse.c b/src/parse.c
new file mode 100644
index 0000000..10e8b6b
--- /dev/null
+++ b/src/parse.c
@@ -0,0 +1,24 @@
+#include "parse.h"
+#include "lexer.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+sym_t parse(tokenSet_t tokens, int* seek) {
+ token_t token = tokens.tokens[(*seek)++];
+ if (token.type == ERR || token.type == EOL) {
+ exit(1);
+ }
+ else if (token.type == NAME) {
+ return parse_name(seek);
+ }
+}
+sym_t parse_name(tokenSet_t tokens, int* seek) {
+ token_t token = tokens.tokens[(*seek)++];
+ if (token.type == KEYWORD) {
+ if (keyword_is_type(token.value)) {
+
+ }
+ }
+}
+sym_t parse_definition(tokenSet_t tokens, int* seek,
diff --git a/src/parse.h b/src/parse.h
new file mode 100644
index 0000000..144e88c
--- /dev/null
+++ b/src/parse.h
@@ -0,0 +1,25 @@
+#include "lexer.h"
+#ifndef PARSE
+#define PARSE
+typedef enum PrimitiveType {
+ VOID,
+ INT,
+ FLOAT,
+ STRING
+} primitive_t;
+typedef enum AbstractType {
+ FUNCTION,
+ ARRAY
+} abstract_t;
+typedef struct Type {
+ primitive_t primitive;
+ abstract_t abstract;
+ struct Type* referent;
+} type_t;
+typedef struct Symbol {
+ type_t type;
+ void* name;
+ struct Symbol* args;
+} sym_t;
+sym_t parse(tokenSet_t tokens)
+#endif