diff options
| author | Nakidai <nakidai@disroot.org> | 2025-03-24 21:01:37 +0300 |
|---|---|---|
| committer | Nakidai <nakidai@disroot.org> | 2025-03-24 21:01:37 +0300 |
| commit | f312b357ab2ec3cf83a67945f3641b964a59e8d2 (patch) | |
| tree | 425f3371eee770f64e268e9964dba29ad17bd410 | |
| parent | ad9d6a199db7c28f8b20f131dfb55a26e0e251de (diff) | |
| download | 3cl-f312b357ab2ec3cf83a67945f3641b964a59e8d2.tar.gz 3cl-f312b357ab2ec3cf83a67945f3641b964a59e8d2.zip | |
Add code
| -rw-r--r-- | Makefile | 17 | ||||
| -rw-r--r-- | cccl.c | 13 | ||||
| -rw-r--r-- | cccl.h | 69 | ||||
| -rw-r--r-- | main.c | 42 | ||||
| -rw-r--r-- | parser.c | 130 | ||||
| -rw-r--r-- | readfile.c | 19 | ||||
| -rw-r--r-- | tokenizer.c | 65 |
7 files changed, 355 insertions, 0 deletions
diff --git a/Makefile b/Makefile index 23ba51d..28c5ad9 100644 --- a/Makefile +++ b/Makefile @@ -1,2 +1,19 @@ +OBJS += cccl.o +OBJS += main.o +OBJS += parser.o +OBJS += readfile.o +OBJS += tokenizer.o + +RM ?= rm -f + + +all: 3cl + +3cl: ${OBJS} + ${CC} ${LDFLAGS} -o 3cl ${OBJS} ${LDLIBS} + +clean: + ${RM} ${OBJS} 3cl + README: README.7 mandoc -Tascii README.7 | col -b > README diff --git a/cccl.c b/cccl.c new file mode 100644 index 0000000..58301f5 --- /dev/null +++ b/cccl.c @@ -0,0 +1,13 @@ +#include "cccl.h" + +#include <stddef.h> +#include <stdio.h> + + +void cccl(struct cccl_File file) +{ + struct cccl_Token tokens[TOKENS_LIMIT]; + + size_t tokens_amount = tokenize(file.buffer, file.size, tokens, TOKENS_LIMIT); + printf("Read: %lu\n", tokens_amount); +} diff --git a/cccl.h b/cccl.h new file mode 100644 index 0000000..7e5cc01 --- /dev/null +++ b/cccl.h @@ -0,0 +1,69 @@ +#ifndef __CCCL_H__ +#define __CCCL_H__ + +#include <stddef.h> + + +#define TOKENS_LIMIT 16384 + +struct cccl_File +{ + char *buffer; + size_t size; +}; + +enum cccl_TokenType +{ + cccl_Token_IDENTIFIER, + cccl_Token_COMMAND, + cccl_Token_COMMANDWITHARG, + cccl_Token_BLOCKSTART, + cccl_Token_BLOCKEND, +}; + +struct cccl_Token +{ + enum cccl_TokenType type; + char value; +}; + +enum cccl_NodeType +{ + cccl_Node_CODE, + + cccl_Node_PUSHZERO, + cccl_Node_INCREMENT, + cccl_Node_DECREMENT, + cccl_Node_ADD, + cccl_Node_SUBTRACT, + cccl_Node_REVERSE, + cccl_Node_ASSIGN, + cccl_Node_DELETE, + cccl_Node_PUSHVAR, + cccl_Node_ASSIGNLOCAL, + cccl_Node_OUTPUT, + cccl_Node_INPUT, + cccl_Node_CALL, + cccl_Node_END, + cccl_Node_CONTINUE, + + cccl_Node_PROCEDURE, + cccl_Node_INFINITE, + cccl_Node_REPEAT, + cccl_Node_CONDITIONAL, +}; + +struct cccl_Node +{ + enum cccl_NodeType type; + struct cccl_Node **in; + size_t in_length; + char value; +}; + +int cccl_allocfile(const char *path, struct cccl_File *file); + +void cccl(struct cccl_File file); +size_t tokenize(const char *code, size_t size, struct cccl_Token tokens[], size_t tokens_length); + +#endif /* __CCCL_H__ */ diff --git a/main.c b/main.c new file mode 100644 index 0000000..a5f79e7 --- /dev/null +++ b/main.c @@ -0,0 +1,42 @@ +#include "cccl.h" + +#include <assert.h> +#include <err.h> +#include <stdio.h> + + +int main(int argc, char **argv) +{ + if (!argv[1]) + return 1; + + struct cccl_File file; + + int error = cccl_allocfile(argv[1], &file); + if (error) + err(1, "cccl_readfile()"); + + FILE *f = fopen(argv[1], "r"); + if (!f) + err(1, "fopen()"); + int bytes_read = 0; + while (bytes_read < file.size) + { + int read_now = fread( + file.buffer + bytes_read, + sizeof(*file.buffer), + (file.size - bytes_read) % 2048, + f + ); + if (read_now == 0) + { + if (ferror(f)) + errx(1, "couldn't read %s", argv[1]); + else + break; + } + } + fclose(f); + + cccl(file); +} diff --git a/parser.c b/parser.c new file mode 100644 index 0000000..87eb498 --- /dev/null +++ b/parser.c @@ -0,0 +1,130 @@ +#include "cccl.h" + +#include <assert.h> +#include <err.h> +#include <stddef.h> +#include <stdlib.h> + + +static enum cccl_NodeType get_nodetype(char c) +{ + switch (c) + { +#define X(char, name) case char: return cccl_Node_##name + X('^', PUSHZERO); + X('+', INCREMENT); + X('-', DECREMENT); + X('*', ADD); + X('~', SUBTRACT); + X('%', REVERSE); + X('=', ASSIGN); + X('!', DELETE); + X('$', PUSHVAR); + X('&', ASSIGNLOCAL); + X('<', OUTPUT); + X('>', INPUT); + X('{', PROCEDURE); + X('}', PROCEDURE); + X('@', CALL); + X('(', INFINITE); + X(')', INFINITE); + X('[', REPEAT); + X(']', REPEAT); + X('#', END); + X(':', CONTINUE); + X('?', CONDITIONAL); + X(';', CONDITIONAL); +#undef X + } + err(1, "Unrecognized command: [%d] %c", c, c); +} + + +struct cccl_Node *cccl_parse(struct cccl_Token tokens[], size_t tokens_length, enum cccl_NodeType type, char value) +{ + struct cccl_Node *res = malloc(sizeof(*res)); + *res = (struct cccl_Node) + { + .type = type, + .in = NULL, + .in_length = 0, + .value = value, + }; + + for (size_t i = 0; i < tokens_length; ++i) + { + switch (tokens[i].type) + { + case cccl_Token_COMMAND: case cccl_Token_COMMANDWITHARG: case cccl_Token_BLOCKSTART: + { + if (!res->in) + { + res->in_length = 1; + res->in = malloc(sizeof(*res->in)); + } else + { + ++res->in_length; + res->in = realloc(res->in, sizeof(*res->in) * res->in_length); + } + } break; + } + + switch (tokens[i].type) + { + case cccl_Token_COMMAND: + { + res->in[res->in_length - 1] = malloc(sizeof(struct cccl_Node)); + *res->in[res->in_length - 1] = (struct cccl_Node) + { + .type = get_nodetype(tokens[i].value), + }; + } break; + case cccl_Token_COMMANDWITHARG: + { + assert(i + 1 != tokens_length); + res->in[res->in_length - 1] = malloc(sizeof(struct cccl_Node)); + *res->in[res->in_length - 1] = (struct cccl_Node) + { + .type = get_nodetype(tokens[i].value), + .value = tokens[i + 1].value, + }; + } break; + case cccl_Token_BLOCKSTART: + { + assert(i > 0); + char opening = tokens[i].value, closing; + switch (tokens[i].value) + { + break; case '{': closing = '}'; + break; case '(': closing = ')'; + break; case '[': closing = ']'; + break; case '?': closing = ';'; + } + + size_t oldi = i; + int depth = 1; + for (; i < tokens_length; ++i) + { + if (tokens[i].value == opening) + ++depth; + else if (tokens[i].value == closing) + --depth; + + if (depth == 0) + goto end; + } + errx(1, "No matching bracket for %c", opening); + +end: + res->in[res->in_length - 1] = cccl_parse( + tokens + oldi + 1, + i - oldi - 1, + get_nodetype(tokens[i].value), + tokens[oldi - 1].value + ); + } break; + } + } + + return res; +} diff --git a/readfile.c b/readfile.c new file mode 100644 index 0000000..d4d17f8 --- /dev/null +++ b/readfile.c @@ -0,0 +1,19 @@ +#include "cccl.h" + +#include <stdlib.h> + +#include <sys/stat.h> + + +int cccl_allocfile(const char *path, struct cccl_File *file) +{ + struct stat st; + int err = stat(path, &st); + if (err) + return err; + + file->size = st.st_size; + file->buffer = malloc(file->size); + + return !file->buffer; +} diff --git a/tokenizer.c b/tokenizer.c new file mode 100644 index 0000000..04e2137 --- /dev/null +++ b/tokenizer.c @@ -0,0 +1,65 @@ +#include "cccl.h" + +#include <assert.h> +#include <err.h> +#include <stddef.h> + + +size_t tokenize(const char *code, size_t size, struct cccl_Token tokens[], size_t tokens_length) +{ + size_t i = 0, tokeni = 0; + int comment = 0; + + do + { + switch (code[i]) + { +#define X(name) \ + { \ + if (comment) \ + break; \ + tokens[tokeni++] = (struct cccl_Token) \ + { \ + .type = cccl_Token_##name, \ + .value = code[i], \ + }; \ + } break + case '/': + { + comment = 1; + } break; + case '\n': + { + comment = 0; + } break; + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': + case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': + case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': + X(IDENTIFIER); + case '^': case '+': case '-': case '*': case '~': + X(COMMAND); + case '%': case '=': case '!': case '$': case '&': + case '<': case '>': case '#': case ':': case '@': + X(COMMANDWITHARG); + case '{': case '(': case '[': case '?': + X(BLOCKSTART); + case '}': case ')': case ']': case ';': + X(BLOCKEND); + case ' ': case '\t': + { } break; + default: + { + if (comment) + break; + errx(1, "Illegal symbol in a code: [%d] %c", code[i], code[i]); + } break; +#undef X + } + assert(tokeni < tokens_length); + } while(++i < size); + + return tokeni; +} |