summary refs log tree commit diff
path: root/tokenizer.c
blob: 04e213728ccdae6ccb9741e9808e24da8814ee59 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#include "cccl.h"

#include <assert.h>
#include <err.h>
#include <stddef.h>


size_t tokenize(const char *code, size_t size, struct cccl_Token tokens[], size_t tokens_length)
{
    size_t i = 0, tokeni = 0;
    int comment = 0;

    do
    {
        switch (code[i])
        {
#define X(name)                                 \
    {                                           \
        if (comment)                            \
            break;                              \
        tokens[tokeni++] = (struct cccl_Token)  \
        {                                       \
            .type = cccl_Token_##name,          \
            .value = code[i],                   \
        };                                      \
    } break
        case '/':
        {
            comment = 1;
        } break;
        case '\n':
        {
            comment = 0;
        } break;
        case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': 
        case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': 
        case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
        case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': 
        case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': 
        case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z':
            X(IDENTIFIER);
        case '^': case '+': case '-': case '*': case '~':
            X(COMMAND);
        case '%': case '=': case '!': case '$': case '&':
        case '<': case '>': case '#': case ':': case '@':
            X(COMMANDWITHARG);
        case '{': case '(': case '[': case '?':
            X(BLOCKSTART);
        case '}': case ')': case ']': case ';':
            X(BLOCKEND);
        case ' ': case '\t':
        { } break;
        default:
        {
            if (comment)
                break;
            errx(1, "Illegal symbol in a code: [%d] %c", code[i], code[i]);
        } break;
#undef X
        }
        assert(tokeni < tokens_length);
    } while(++i < size);

    return tokeni;
}