summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--Makefile17
-rw-r--r--cccl.c13
-rw-r--r--cccl.h69
-rw-r--r--main.c42
-rw-r--r--parser.c130
-rw-r--r--readfile.c19
-rw-r--r--tokenizer.c65
7 files changed, 355 insertions, 0 deletions
diff --git a/Makefile b/Makefile
index 23ba51d..28c5ad9 100644
--- a/Makefile
+++ b/Makefile
@@ -1,2 +1,19 @@
+OBJS += cccl.o
+OBJS += main.o
+OBJS += parser.o
+OBJS += readfile.o
+OBJS += tokenizer.o
+
+RM ?= rm -f
+
+
+all: 3cl
+
+3cl: ${OBJS}
+	${CC} ${LDFLAGS} -o 3cl ${OBJS} ${LDLIBS}
+
+clean:
+	${RM} ${OBJS} 3cl
+
 README: README.7
 	mandoc -Tascii README.7 | col -b > README
diff --git a/cccl.c b/cccl.c
new file mode 100644
index 0000000..58301f5
--- /dev/null
+++ b/cccl.c
@@ -0,0 +1,13 @@
+#include "cccl.h"
+
+#include <stddef.h>
+#include <stdio.h>
+
+
+void cccl(struct cccl_File file)
+{
+    struct cccl_Token tokens[TOKENS_LIMIT];
+
+    size_t tokens_amount = tokenize(file.buffer, file.size, tokens, TOKENS_LIMIT);
+    printf("Read: %lu\n", tokens_amount);
+}
diff --git a/cccl.h b/cccl.h
new file mode 100644
index 0000000..7e5cc01
--- /dev/null
+++ b/cccl.h
@@ -0,0 +1,69 @@
+#ifndef __CCCL_H__
+#define __CCCL_H__
+
+#include <stddef.h>
+
+
+#define TOKENS_LIMIT 16384
+
+struct cccl_File
+{
+    char *buffer;
+    size_t size;
+};
+
+enum cccl_TokenType
+{
+    cccl_Token_IDENTIFIER,
+    cccl_Token_COMMAND,
+    cccl_Token_COMMANDWITHARG,
+    cccl_Token_BLOCKSTART,
+    cccl_Token_BLOCKEND,
+};
+
+struct cccl_Token
+{
+    enum cccl_TokenType type;
+    char value;
+};
+
+enum cccl_NodeType
+{
+    cccl_Node_CODE,
+
+    cccl_Node_PUSHZERO,
+    cccl_Node_INCREMENT,
+    cccl_Node_DECREMENT,
+    cccl_Node_ADD,
+    cccl_Node_SUBTRACT,
+    cccl_Node_REVERSE,
+    cccl_Node_ASSIGN,
+    cccl_Node_DELETE,
+    cccl_Node_PUSHVAR,
+    cccl_Node_ASSIGNLOCAL,
+    cccl_Node_OUTPUT,
+    cccl_Node_INPUT,
+    cccl_Node_CALL,
+    cccl_Node_END,
+    cccl_Node_CONTINUE,
+
+    cccl_Node_PROCEDURE,
+    cccl_Node_INFINITE,
+    cccl_Node_REPEAT,
+    cccl_Node_CONDITIONAL,
+};
+
+struct cccl_Node
+{
+    enum cccl_NodeType type;
+    struct cccl_Node **in;
+    size_t in_length;
+    char value;
+};
+
+int cccl_allocfile(const char *path, struct cccl_File *file);
+
+void cccl(struct cccl_File file);
+size_t tokenize(const char *code, size_t size, struct cccl_Token tokens[], size_t tokens_length);
+
+#endif /* __CCCL_H__ */
diff --git a/main.c b/main.c
new file mode 100644
index 0000000..a5f79e7
--- /dev/null
+++ b/main.c
@@ -0,0 +1,42 @@
+#include "cccl.h"
+
+#include <assert.h>
+#include <err.h>
+#include <stdio.h>
+
+
+int main(int argc, char **argv)
+{
+    if (!argv[1])
+        return 1;
+
+    struct cccl_File file;
+
+    int error = cccl_allocfile(argv[1], &file);
+    if (error)
+        err(1, "cccl_readfile()");
+
+    FILE *f = fopen(argv[1], "r");
+    if (!f)
+        err(1, "fopen()");
+    int bytes_read = 0;
+    while (bytes_read < file.size)
+    {
+        int read_now = fread(
+            file.buffer + bytes_read,
+            sizeof(*file.buffer),
+            (file.size - bytes_read) % 2048,
+            f
+        );
+        if (read_now == 0)
+        {
+            if (ferror(f))
+                errx(1, "couldn't read %s", argv[1]);
+            else
+                break;
+        }
+    }
+    fclose(f);
+
+    cccl(file);
+}
diff --git a/parser.c b/parser.c
new file mode 100644
index 0000000..87eb498
--- /dev/null
+++ b/parser.c
@@ -0,0 +1,130 @@
+#include "cccl.h"
+
+#include <assert.h>
+#include <err.h>
+#include <stddef.h>
+#include <stdlib.h>
+
+
+static enum cccl_NodeType get_nodetype(char c)
+{
+    switch (c)
+    {
+#define X(char, name) case char: return cccl_Node_##name
+        X('^', PUSHZERO);
+        X('+', INCREMENT);
+        X('-', DECREMENT);
+        X('*', ADD);
+        X('~', SUBTRACT);
+        X('%', REVERSE);
+        X('=', ASSIGN);
+        X('!', DELETE);
+        X('$', PUSHVAR);
+        X('&', ASSIGNLOCAL);
+        X('<', OUTPUT);
+        X('>', INPUT);
+        X('{', PROCEDURE);
+        X('}', PROCEDURE);
+        X('@', CALL);
+        X('(', INFINITE);
+        X(')', INFINITE);
+        X('[', REPEAT);
+        X(']', REPEAT);
+        X('#', END);
+        X(':', CONTINUE);
+        X('?', CONDITIONAL);
+        X(';', CONDITIONAL);
+#undef X
+    }
+    err(1, "Unrecognized command: [%d] %c", c, c);
+}
+
+
+struct cccl_Node *cccl_parse(struct cccl_Token tokens[], size_t tokens_length, enum cccl_NodeType type, char value)
+{
+    struct cccl_Node *res = malloc(sizeof(*res));
+    *res = (struct cccl_Node)
+    {
+        .type = type,
+        .in = NULL,
+        .in_length = 0,
+        .value = value,
+    };
+
+    for (size_t i = 0; i < tokens_length; ++i)
+    {
+        switch (tokens[i].type)
+        {
+        case cccl_Token_COMMAND: case cccl_Token_COMMANDWITHARG: case cccl_Token_BLOCKSTART:
+        {
+            if (!res->in)
+            {
+                res->in_length = 1;
+                res->in = malloc(sizeof(*res->in));
+            } else
+            {
+                ++res->in_length;
+                res->in = realloc(res->in, sizeof(*res->in) * res->in_length);
+            }
+        } break;
+        }
+
+        switch (tokens[i].type)
+        {
+        case cccl_Token_COMMAND:
+        {
+            res->in[res->in_length - 1] = malloc(sizeof(struct cccl_Node));
+            *res->in[res->in_length - 1] = (struct cccl_Node)
+            {
+                .type = get_nodetype(tokens[i].value),
+            };
+        } break;
+        case cccl_Token_COMMANDWITHARG:
+        {
+            assert(i + 1 != tokens_length);
+            res->in[res->in_length - 1] = malloc(sizeof(struct cccl_Node));
+            *res->in[res->in_length - 1] = (struct cccl_Node)
+            {
+                .type = get_nodetype(tokens[i].value),
+                .value = tokens[i + 1].value,
+            };
+        } break;
+        case cccl_Token_BLOCKSTART:
+        {
+            assert(i > 0);
+            char opening = tokens[i].value, closing;
+            switch (tokens[i].value)
+            {
+            break; case '{': closing = '}';
+            break; case '(': closing = ')';
+            break; case '[': closing = ']';
+            break; case '?': closing = ';';
+            }
+
+            size_t oldi = i;
+            int depth = 1;
+            for (; i < tokens_length; ++i)
+            {
+                if (tokens[i].value == opening)
+                    ++depth;
+                else if (tokens[i].value == closing)
+                    --depth;
+
+                if (depth == 0)
+                    goto end;
+            }
+            errx(1, "No matching bracket for %c", opening);
+
+end:
+            res->in[res->in_length - 1] = cccl_parse(
+                tokens + oldi + 1,
+                i - oldi - 1,
+                get_nodetype(tokens[i].value),
+                tokens[oldi - 1].value
+            );
+        } break;
+        }
+    }
+
+    return res;
+}
diff --git a/readfile.c b/readfile.c
new file mode 100644
index 0000000..d4d17f8
--- /dev/null
+++ b/readfile.c
@@ -0,0 +1,19 @@
+#include "cccl.h"
+
+#include <stdlib.h>
+
+#include <sys/stat.h>
+
+
+int cccl_allocfile(const char *path, struct cccl_File *file)
+{
+    struct stat st;
+    int err = stat(path, &st);
+    if (err)
+        return err;
+
+    file->size = st.st_size;
+    file->buffer = malloc(file->size);
+
+    return !file->buffer;
+}
diff --git a/tokenizer.c b/tokenizer.c
new file mode 100644
index 0000000..04e2137
--- /dev/null
+++ b/tokenizer.c
@@ -0,0 +1,65 @@
+#include "cccl.h"
+
+#include <assert.h>
+#include <err.h>
+#include <stddef.h>
+
+
+size_t tokenize(const char *code, size_t size, struct cccl_Token tokens[], size_t tokens_length)
+{
+    size_t i = 0, tokeni = 0;
+    int comment = 0;
+
+    do
+    {
+        switch (code[i])
+        {
+#define X(name)                                 \
+    {                                           \
+        if (comment)                            \
+            break;                              \
+        tokens[tokeni++] = (struct cccl_Token)  \
+        {                                       \
+            .type = cccl_Token_##name,          \
+            .value = code[i],                   \
+        };                                      \
+    } break
+        case '/':
+        {
+            comment = 1;
+        } break;
+        case '\n':
+        {
+            comment = 0;
+        } break;
+        case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': 
+        case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': 
+        case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
+        case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': 
+        case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': 
+        case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z':
+            X(IDENTIFIER);
+        case '^': case '+': case '-': case '*': case '~':
+            X(COMMAND);
+        case '%': case '=': case '!': case '$': case '&':
+        case '<': case '>': case '#': case ':': case '@':
+            X(COMMANDWITHARG);
+        case '{': case '(': case '[': case '?':
+            X(BLOCKSTART);
+        case '}': case ')': case ']': case ';':
+            X(BLOCKEND);
+        case ' ': case '\t':
+        { } break;
+        default:
+        {
+            if (comment)
+                break;
+            errx(1, "Illegal symbol in a code: [%d] %c", code[i], code[i]);
+        } break;
+#undef X
+        }
+        assert(tokeni < tokens_length);
+    } while(++i < size);
+
+    return tokeni;
+}