From 8afa976e073c7bc29c878230eead6dddfdcc08a1 Mon Sep 17 00:00:00 2001 From: Nakidai Date: Sun, 5 Apr 2026 19:51:14 +0300 Subject: Add code --- lex.c | 279 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 279 insertions(+) create mode 100644 lex.c (limited to 'lex.c') diff --git a/lex.c b/lex.c new file mode 100644 index 0000000..29b56f0 --- /dev/null +++ b/lex.c @@ -0,0 +1,279 @@ +/* + * Copyright (c) 2026 Nakidai Perumenei + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "thac.h" + +#include +#include +#include +#include +#include + + +static struct tok storage; +struct tok curtok; +static int isunget; +ulong curline; + +static int +isidish(int ch) +{ + static char lut[256] = { + ['a'] = 1, ['b'] = 1, ['c'] = 1, ['d'] = 1, ['e'] = 1, ['f'] = 1, + ['g'] = 1, ['h'] = 1, ['i'] = 1, ['j'] = 1, ['k'] = 1, ['l'] = 1, + ['m'] = 1, ['n'] = 1, ['o'] = 1, ['p'] = 1, ['q'] = 1, ['r'] = 1, + ['s'] = 1, ['t'] = 1, ['u'] = 1, ['v'] = 1, ['w'] = 1, ['x'] = 1, + ['y'] = 1, ['z'] = 1, ['A'] = 1, ['B'] = 1, ['C'] = 1, ['D'] = 1, + ['E'] = 1, ['F'] = 1, ['G'] = 1, ['H'] = 1, ['I'] = 1, ['J'] = 1, + ['K'] = 1, ['L'] = 1, ['M'] = 1, ['N'] = 1, ['O'] = 1, ['P'] = 1, + ['Q'] = 1, ['R'] = 1, ['S'] = 1, ['T'] = 1, ['U'] = 1, ['V'] = 1, + ['W'] = 1, ['X'] = 1, ['Y'] = 1, ['Z'] = 1, ['@'] = 1, ['_'] = 1, + ['0'] = 1, ['1'] = 1, ['2'] = 1, ['3'] = 1, ['4'] = 1, ['5'] = 1, + ['6'] = 1, ['7'] = 1, ['8'] = 1, ['9'] = 1, ['\''] = 1, + }; + + if (ch < 0 || ch > 255) + return 0; + + return lut[ch]; +} + +static void +append(int ch) +{ + if (curtok.info.cap < curtok.info.len + 1) + curtok.info.p = realloc( + curtok.info.p, + curtok.info.cap = (curtok.info.cap + sizeof(*curtok.info.p)) * 2 + ); + if (!curtok.info.p) + dieno(1, "realloc()"); + curtok.info.p[curtok.info.len++] = ch; +} + +static int +get(void) +{ + int ch; + + if ((ch = getc(curfile)) == EOF) + return -1; + append(ch); + + curline += (ch == '\n'); + + return ch; +} + +static int +unget(int ch) +{ + --curtok.info.len; + curline -= (ch == '\n'); + return ungetc(ch, curfile); +} + +void +word(void) +{ + char *s; + int ch; + + while ((ch = get()) != EOF) + if (!isidish(ch)) + break; + if (ch != EOF) + unget(ch); + append('\0'); + + s = curtok.info.p; + + curtok.type = TID; +#define is(wanted) !strcmp(s, (wanted)) + curtok.type = isdigit(*s) ? TNUM : + is("assert") ? TKEYASSERT : + is("if") ? TKEYIF : + is("else") ? TKEYELSE : + is("for") ? TKEYFOR : + is("foreach") ? TKEYFOREACH : + is("len") ? TKEYLEN : + is("mod") ? TKEYMOD : + is("node") ? TKEYNODE : + is("with") ? TKEYWITH : + is("break") ? TKEYBREAK : + is("continue") ? TKEYCONT : + TID; +#undef is + + return; +} + +void +oper(void) +{ + int ch; + + switch (ch = get()) + { + /* these are not start of any other operator */ +#define SINGLE(ch, t) case (ch): curtok.type = (t); append('\0'); return + SINGLE('{', TOPBRACE); + SINGLE('}', TCLBRACE); + SINGLE('[', TOPBRACK); + SINGLE(']', TCLBRACK); + SINGLE('(', TOPPAREN); + SINGLE(')', TCLPAREN); + SINGLE(':', TCOL); + SINGLE(',', TCOMMA); + SINGLE('?', TQUEST); + SINGLE(';', TSEMICOL); + SINGLE('~', TTILDE); +#undef SINGLE + /* + * these are cases when operator is one of: + * t1 = {ch1} + * t21 = {ch1}{ch21} + * t22 = {ch1}{ch22} + */ +#define DOUBLE(ch1, ch21, ch22, t1, t21, t22) case (ch1): \ +{ \ + int next; \ + curtok.type = (t1); \ + if ((next = get()) == EOF) \ + (void)0; \ + else if (next == (ch21)) \ + curtok.type = (t21); \ + else if (next == (ch22)) \ + curtok.type = (t22); \ + else \ + unget(next); \ + append('\0'); \ + return; \ +} + DOUBLE('&', '=', '&', TAMPER, TASSAMPER, TAND); + DOUBLE('^', '=', EOF, TCARET, TASSCARET, 0); + DOUBLE('=', '=', EOF, TASSIGN, TEQ, 0); + DOUBLE('|', '=', '|', TPIPE, TASSPIPE, TOR); + DOUBLE('/', '=', EOF, TSLASH, TASSSLASH, 0); + DOUBLE('%', '=', EOF, TPERC, TASSPERC, 0); + DOUBLE('!', '=', EOF, TEXCLAM, TNEQ, 0); + DOUBLE('-', '=', '-', TMINUS, TASSMINUS, TDECR); + DOUBLE('+', '=', '+', TPLUS, TASSPLUS, TINCR); + DOUBLE('*', '=', '*', TASTER, TASSASTER, TPOW); +#undef DOUBLE + /* + * these are cases when operator is one of: + * t1 = {ch1} + * t21 = {ch1}{ch21} + * t22 = {ch1}{ch22} + * t23 = {ch1}{ch1} + * t3 = {ch1}{ch1}{ch3} + */ +#define TRIPLE(ch1, ch21, ch22, ch3, t1, t21, t22, t23, t3) case (ch1): \ +{ \ + int next; \ + curtok.type = (t1); \ + if ((next = get()) == EOF) \ + (void)0; \ + else if (next == (ch21)) \ + curtok.type = (t21); \ + else if (next == (ch22)) \ + curtok.type = (t22); \ + else if (next == ch) \ + { \ + curtok.type = (t23); \ + if ((next = get()) == EOF) \ + (void)0; \ + else if (next == (ch3)) \ + curtok.type = (t3); \ + else \ + unget(next); \ + } \ + else \ + unget(next); \ + append('\0'); \ + return; \ +} + TRIPLE('>', '=', '<', '=', TGREAT, TGREATEQ, TCONCAT, TRSHIFT, TASSRSHIFT); + TRIPLE('<', '=', '-', '=', TLESS, TLESSEQ, TARRLEFT, TLSHIFT, TASSLSHIFT); +#undef TRIPLE + } + + complain(1, "unknown operator starting with %c", ch); +} + +enum tok_t +gettok(void) +{ + int ch; + + curtok.info = (struct tinfo){NULL, 0, 0}; + + if (isunget) + { + isunget = 0; + curtok = storage; + /*say("reread %s(`%s')", tokname(curtok.type), curtok.info.p)*/; + return curtok.type; + } + + for (; (ch = get()) != EOF; curtok.info.len = 0) + if (!isspace(ch)) + goto found; + return curtok.type = TEOF; +found: + unget(ch); + + if (isidish(ch)) + word(); + else + oper(); + + /*say("read %s(`%s')", tokname(curtok.type), curtok.info.p)*/; + return curtok.type; +} + +void +ungettok() +{ + storage = curtok; + isunget = 1; +} + +int +_exptok(enum tok_t first, ...) +{ + enum tok_t next; + va_list ap; + int res; + + if (gettok() == TEOF) + return 0; + if (curtok.type == first) + return 1; + + res = 0; + va_start(ap, first); + while ((next = va_arg(ap, enum tok_t)) != TEOF) + if (curtok.type == next) + { + res = 1; + break; + } + va_end(ap); + + return res; +} -- cgit 1.4.1