initHEAD master

author: Kai Stevenson <kai@kaistevenson.com> 2024-11-20 21:11:38 -0800
committer: Kai Stevenson <kai@kaistevenson.com> 2024-11-20 21:11:38 -0800
commit: 711eb1d91832267bdd1fe2bc57eeebba9e637c52 (patch)
tree: 6cbd10ee276f1cb8119d2528cc1f7a04894228de /src
6 files changed, 321 insertions, 0 deletions
diff --git a/src/echo.c b/src/echo.c
new file mode 100644
index 0000000..1efbcda
--- /dev/null
+++ b/src/echo.c
@@ -0,0 +1,29 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include "echo.h"
+#include <string.h>
+int main(int argc, char** argv) {
+	char* path;
+	if (argc > 0) {
+		path = argv[1];
+	}
+	else {
+		exit(0);
+	}
+	char* content = read_file(path);
+	parse(content);
+}
+char* read_file(char* path) {
+	FILE* file = fopen(path, "r");
+	if (file == NULL) {
+		printf("file%s does not exist\n", path);
+	}
+	fseek(file, 0, SEEK_END);
+	unsigned long int length = ftell(file);
+	fseek(file, 0, SEEK_SET);
+	char* out = malloc((length + 1) * sizeof(char));
+	fread(out, length, 1, file);
+	fclose(file);
+	out[length] = '\0';
+	return out
+}
diff --git a/src/echo.h b/src/echo.h
new file mode 100644
index 0000000..32af69f
--- /dev/null
+++ b/src/echo.h
@@ -0,0 +1,4 @@
+#ifndef ECHO
+#define ECHO
+char* read_file(char* path);
+#endif
diff --git a/src/lexer.c b/src/lexer.c
new file mode 100644
index 0000000..f950dcb
--- /dev/null
+++ b/src/lexer.c
@@ -0,0 +1,190 @@
+#include <stdlib.h>
+#include <string.h>
+#include "lexer.h"
+
+token_t init_token(tokenType_t type, void* value) {
+	token_t t;
+	t.type = type;
+	t.value = value;
+	return t;
+}
+tokenSet_t init_tokenset(token_t* tokens, int count) {
+	tokenSet_t tokenSet;
+	tokenSet.tokens = tokens;
+	tokenSet.count = count;
+	return tokenSet;
+}
+char* tttos(tokenType_t tokenType) {
+	switch (tokenType) {
+		case ERR:
+			return "ERR";
+		case EOL:
+			return "EOL";
+		case INDENT:
+			return "INDENT";
+		case STRING:
+			return "STRING";
+		case INT:
+			return "INT";
+		case OPEN_PAREN:
+			return "OPEN_PAREN";
+		case CLOSE_PAREN:
+			return "CLOSE_PAREN";
+		case START_COMMENT:
+			return "START_COMMENT";
+		default:
+			return "???";
+	}
+}
+int advance_whitespace(char** linePointer) {
+	int64_t a = (int64_t)linePointer;
+	while (**linePointer == ' ' || **linePointer == '	') {
+		(*linePointer)++;
+	}
+	return (int)((int64_t)linePointer - a);
+}
+keyword_t try_get_keyword(char* name) {
+	if (strcmp(name, "VOID") == 0) {
+		return K_VOID;
+	}
+	else if (strcmp(name, "INT") == 0) {
+		return K_INT;
+	}
+	else if (strcmp(name, "FLOAT") == 0) {
+		return K_FLOAT;
+	}
+	else if (strcmp(name, "STRING") == 0) {
+		return K_STRING;
+	}
+	return K_NONE;
+}
+char keyword_is_type(keyword_t keyword) {
+	return keyword == K_VOID || keyword == K_INT
+	|| keyword == K_FLOAT || keyword == K_STRING;
+}
+token_t lex_first_token(char** content) {
+	int m = 8
+	char* s = malloc(sizeof(char) * m);
+	int c = 0;
+	advance_whitespace(content);
+	while (**content != '\n') {
+		s[c] = **content;
+		s[c+1] = '\0';
+		(*content)++;
+		/* check if we need to parse a string or an int */
+		//TODO: string parsing with quotes
+		/*if (s[c] >= 65 && s[c] <= 90) {
+			free(s);
+			(*content)--;
+			return lex_first_string(content);
+		}*/
+		if (s[c] >= 48 && s[c] <= 57) {
+			free(s);
+			(*content)--;
+			return lex_first_int(content);
+		}
+		else if (s[c] == ' ' || s[c] == '\n') {
+			if (strcmp(s, ";") == 0) {
+				free(s);
+				return init_token(SEMICOLON, NULL);
+			}
+			else if (strcmp(s, "(") == 0) {
+				free(s);
+				return init_token(OPEN_PAREN, NULL);
+			}
+			else if (strcmp(s, ")") == 0) {
+				free(s);
+				return init_token(CLOSE_PAREN, NULL);
+			}
+			else if (strcmp(s, "{") == 0) {
+				free(s);
+				return init_token(OPEN_BRACE, NULL);
+			}
+			else if (strcmp(s, "}") == 0) {
+				free(s);
+				return init_token(CLOSE_BRACE, NULL);
+			}
+			else if (strcmp(s, "[") == 0) {
+				free(s);
+				return init_token(OPEN_BRACKET, NULL);
+			}
+			else if (strcmp(s, "]") == 0) {
+				free(s);
+				return init_token(CLOSE_BRACKET, NULL);
+			}
+			else if (strcmp(s, "#") == 0 || strcmp(s, "//") == 0) {
+				free(s);
+				return init_token(START_COMMENT, NULL);
+			}
+			else if (strcmp(s, "=") == 0) {
+				free(s);
+				return init_token(EQUALS, NULL);
+			}
+			else if (strcmp(s, "==") == 0) {
+				free(s);
+				return init_token(DOUBLE_EQUALS, NULL);
+			}
+			else if (strcmp(s, "!") == 0) {
+				free(s);
+				return init_token(NEGATION, NULL);
+			}
+			else if (strcmp(s, "*") == 0) {
+				free(s);
+				return init_token(ASTERISK, NULL);
+			}
+			else if (strcmp(s, "+") == 0) {
+				free(s);
+				return init_token(PLUS_SIGN, NULL);
+			}
+			else if (strcmp(s, "func") == 0) {
+				free(s);
+				return init_token(FUNCTION, NULL);
+			}
+			keyword_t keyword = try_get_keyword(s);
+			if (keyword != K_NONE) {
+				free(s);
+				keyword* kp = malloc(sizeof(keyword_t));
+				*kp = keyword;
+				return init_token(KEYWORD, kp);
+			}
+			char* ns_p = malloc(sizeof(char) * c);
+			strcpy(ns_p, s);
+			free(s);
+			return init_token(NAME, ns_p);
+		}
+		else if (++c == m - 1) {
+			m *= 2;
+			s = realloc(s, sizeof(char) * m)
+		}
+	}
+	return init_token(EOL, NULL);
+}
+token_t lex_first_string(char** content) {
+	int m = 50;
+	char* s = malloc(sizeof(char) * m);
+	int c = 0;
+	advance_whitespace(content);
+	while (**content >= 65 && **content <= 90) {
+		s[c] = **content;
+		(*content)++;
+		c++;
+	}
+	s[c] = '\0';
+	return init_token(STRING, s);
+}
+token_t lex_first_int(char** content) {
+	int m = 10;
+	char* s = malloc(sizeof(char) * m);
+	int c = 0;
+	advance_whitespace(content);
+	while (**content >= 48 && **content <= 57) {
+		s[c] = **content;
+		(*content)++;
+		c++;
+	}
+	s[c] = '\0';
+	int* ip = malloc(sizeof(int));
+	*ip = atoi(s);
+	free(s);
+	return init_token(INT, (void*)ip);
+}
diff --git a/src/lexer.h b/src/lexer.h
new file mode 100644
index 0000000..52d00fa
--- /dev/null
+++ b/src/lexer.h
@@ -0,0 +1,49 @@
+#ifndef LEXER
+#define LEXER
+typedef enum TokenType {
+	ERR,
+	EOL,
+	NAME,
+	KEYWORD,
+	STRING_LIT,
+	INT_LIT,
+	SEMICOLON,
+	OPEN_PAREN,
+	CLOSE_PAREN,
+	OPEN_BRACE,
+	CLOSE_BRACE,
+	OPEN_BRACKET,
+	CLOSE_BRACKET,
+	START_COMMENT,
+	EQUALS,
+	DOUBLE_EQUALS,
+	NEGATION,
+	ASTERISK,
+	PLUS_SIGN,
+	FUNCTION
+} tokenType_t;
+typedef enum Keyword {
+	K_NONE,
+	K_VOID,
+	K_INT,
+	K_FLOAT,
+	K_STRING
+} keyword_t;
+typedef struct Token {
+	tokenType_t type;
+	void* value;
+} token_t;
+token_t init_token(tokenType_t type, void* value);
+typedef struct TokenSet {
+	token_t* tokens;
+	int count;
+} tokenSet_t;
+tokenSet_t init_tokenset(token_t* tokens, int count);
+char* tttos(tokenType_t tokenType);
+int advance_whitespace(char** linePointer);
+keyword_t try_get_keyword(char* name);
+char keyword_is_type(keyword_t keyword);
+token_t lex_first_token(char** content);
+token_t lex_first_string(char** content);
+token_t lex_first_int(char** content);
+#endif
diff --git a/src/parse.c b/src/parse.c
new file mode 100644
index 0000000..10e8b6b
--- /dev/null
+++ b/src/parse.c
@@ -0,0 +1,24 @@
+#include "parse.h"
+#include "lexer.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+sym_t parse(tokenSet_t tokens, int* seek) {
+	token_t token = tokens.tokens[(*seek)++];
+	if (token.type == ERR || token.type == EOL) {
+		exit(1);
+	}
+	else if (token.type == NAME) {
+		return parse_name(seek);
+	}
+}
+sym_t parse_name(tokenSet_t tokens, int* seek) {
+	token_t token = tokens.tokens[(*seek)++];
+	if (token.type == KEYWORD) {
+		if (keyword_is_type(token.value)) {
+
+		}
+	}
+}
+sym_t parse_definition(tokenSet_t tokens, int* seek, 
diff --git a/src/parse.h b/src/parse.h
new file mode 100644
index 0000000..144e88c
--- /dev/null
+++ b/src/parse.h
@@ -0,0 +1,25 @@
+#include "lexer.h"
+#ifndef PARSE
+#define PARSE
+typedef enum PrimitiveType {
+	VOID,
+	INT,
+	FLOAT,
+	STRING
+} primitive_t;
+typedef enum AbstractType {
+	FUNCTION,
+	ARRAY
+} abstract_t;
+typedef struct Type {
+	primitive_t primitive;
+	abstract_t abstract;
+	struct Type* referent;
+} type_t;
+typedef struct Symbol {
+	type_t type;
+	void* name;
+	struct Symbol* args;
+} sym_t;
+sym_t parse(tokenSet_t tokens)
+#endif
author	Kai Stevenson <kai@kaistevenson.com>	2024-11-20 21:11:38 -0800
committer	Kai Stevenson <kai@kaistevenson.com>	2024-11-20 21:11:38 -0800
commit	711eb1d91832267bdd1fe2bc57eeebba9e637c52 (patch)
tree	6cbd10ee276f1cb8119d2528cc1f7a04894228de /src