/* * Copyright (c) 2026 Nakidai Perumenei * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include "thac.h" #include #include #include #include #include static struct tok storage; struct tok curtok; static int isunget; ulong curline; static int isidish(int ch) { static char lut[256] = { ['a'] = 1, ['b'] = 1, ['c'] = 1, ['d'] = 1, ['e'] = 1, ['f'] = 1, ['g'] = 1, ['h'] = 1, ['i'] = 1, ['j'] = 1, ['k'] = 1, ['l'] = 1, ['m'] = 1, ['n'] = 1, ['o'] = 1, ['p'] = 1, ['q'] = 1, ['r'] = 1, ['s'] = 1, ['t'] = 1, ['u'] = 1, ['v'] = 1, ['w'] = 1, ['x'] = 1, ['y'] = 1, ['z'] = 1, ['A'] = 1, ['B'] = 1, ['C'] = 1, ['D'] = 1, ['E'] = 1, ['F'] = 1, ['G'] = 1, ['H'] = 1, ['I'] = 1, ['J'] = 1, ['K'] = 1, ['L'] = 1, ['M'] = 1, ['N'] = 1, ['O'] = 1, ['P'] = 1, ['Q'] = 1, ['R'] = 1, ['S'] = 1, ['T'] = 1, ['U'] = 1, ['V'] = 1, ['W'] = 1, ['X'] = 1, ['Y'] = 1, ['Z'] = 1, ['@'] = 1, ['_'] = 1, ['0'] = 1, ['1'] = 1, ['2'] = 1, ['3'] = 1, ['4'] = 1, ['5'] = 1, ['6'] = 1, ['7'] = 1, ['8'] = 1, ['9'] = 1, ['\''] = 1, }; if (ch < 0 || ch > 255) return 0; return lut[ch]; } static void append(int ch) { if (curtok.info.cap < curtok.info.len + 1) curtok.info.p = realloc( curtok.info.p, curtok.info.cap = (curtok.info.cap + sizeof(*curtok.info.p)) * 2 ); if (!curtok.info.p) dieno(1, "realloc()"); curtok.info.p[curtok.info.len++] = ch; } static int get(void) { int ch; if ((ch = getc(curfile)) == EOF) return -1; append(ch); curline += (ch == '\n'); return ch; } static int unget(int ch) { --curtok.info.len; curline -= (ch == '\n'); return ungetc(ch, curfile); } void word(void) { char *s; int ch; while ((ch = get()) != EOF) if (!isidish(ch)) break; if (ch != EOF) unget(ch); append('\0'); s = curtok.info.p; curtok.type = TID; #define is(wanted) !strcmp(s, (wanted)) curtok.type = isdigit(*s) ? TNUM : is("assert") ? TKEYASSERT : is("if") ? TKEYIF : is("else") ? TKEYELSE : is("for") ? TKEYFOR : is("foreach") ? TKEYFOREACH : is("len") ? TKEYLEN : is("mod") ? TKEYMOD : is("node") ? TKEYNODE : is("with") ? TKEYWITH : is("break") ? TKEYBREAK : is("continue") ? TKEYCONT : TID; #undef is return; } void oper(void) { int ch; switch (ch = get()) { /* these are not start of any other operator */ #define SINGLE(ch, t) case (ch): curtok.type = (t); append('\0'); return SINGLE('{', TOPBRACE); SINGLE('}', TCLBRACE); SINGLE('[', TOPBRACK); SINGLE(']', TCLBRACK); SINGLE('(', TOPPAREN); SINGLE(')', TCLPAREN); SINGLE(':', TCOL); SINGLE(',', TCOMMA); SINGLE('?', TQUEST); SINGLE(';', TSEMICOL); SINGLE('~', TTILDE); #undef SINGLE /* * these are cases when operator is one of: * t1 = {ch1} * t21 = {ch1}{ch21} * t22 = {ch1}{ch22} */ #define DOUBLE(ch1, ch21, ch22, t1, t21, t22) case (ch1): \ { \ int next; \ curtok.type = (t1); \ if ((next = get()) == EOF) \ (void)0; \ else if (next == (ch21)) \ curtok.type = (t21); \ else if (next == (ch22)) \ curtok.type = (t22); \ else \ unget(next); \ append('\0'); \ return; \ } DOUBLE('&', '=', '&', TAMPER, TASSAMPER, TAND); DOUBLE('^', '=', EOF, TCARET, TASSCARET, 0); DOUBLE('=', '=', EOF, TASSIGN, TEQ, 0); DOUBLE('|', '=', '|', TPIPE, TASSPIPE, TOR); DOUBLE('/', '=', EOF, TSLASH, TASSSLASH, 0); DOUBLE('%', '=', EOF, TPERC, TASSPERC, 0); DOUBLE('!', '=', EOF, TEXCLAM, TNEQ, 0); DOUBLE('-', '=', '-', TMINUS, TASSMINUS, TDECR); DOUBLE('+', '=', '+', TPLUS, TASSPLUS, TINCR); DOUBLE('*', '=', '*', TASTER, TASSASTER, TPOW); #undef DOUBLE /* * these are cases when operator is one of: * t1 = {ch1} * t21 = {ch1}{ch21} * t22 = {ch1}{ch22} * t23 = {ch1}{ch1} * t3 = {ch1}{ch1}{ch3} */ #define TRIPLE(ch1, ch21, ch22, ch3, t1, t21, t22, t23, t3) case (ch1): \ { \ int next; \ curtok.type = (t1); \ if ((next = get()) == EOF) \ (void)0; \ else if (next == (ch21)) \ curtok.type = (t21); \ else if (next == (ch22)) \ curtok.type = (t22); \ else if (next == ch) \ { \ curtok.type = (t23); \ if ((next = get()) == EOF) \ (void)0; \ else if (next == (ch3)) \ curtok.type = (t3); \ else \ unget(next); \ } \ else \ unget(next); \ append('\0'); \ return; \ } TRIPLE('>', '=', '<', '=', TGREAT, TGREATEQ, TCONCAT, TRSHIFT, TASSRSHIFT); TRIPLE('<', '=', '-', '=', TLESS, TLESSEQ, TARRLEFT, TLSHIFT, TASSLSHIFT); #undef TRIPLE } complain(1, "unknown operator starting with %c", ch); } enum tok_t gettok(void) { int ch; curtok.info = (struct tinfo){NULL, 0, 0}; if (isunget) { isunget = 0; curtok = storage; /*say("reread %s(`%s')", tokname(curtok.type), curtok.info.p)*/; return curtok.type; } for (; (ch = get()) != EOF; curtok.info.len = 0) if (!isspace(ch)) goto found; return curtok.type = TEOF; found: unget(ch); if (isidish(ch)) word(); else oper(); /*say("read %s(`%s')", tokname(curtok.type), curtok.info.p)*/; return curtok.type; } void ungettok() { storage = curtok; isunget = 1; } int _exptok(enum tok_t first, ...) { enum tok_t next; va_list ap; int res; if (gettok() == TEOF) return 0; if (curtok.type == first) return 1; res = 0; va_start(ap, first); while ((next = va_arg(ap, enum tok_t)) != TEOF) if (curtok.type == next) { res = 1; break; } va_end(ap); return res; }