diff options
Diffstat (limited to 'parse.c')
| -rw-r--r-- | parse.c | 637 |
1 files changed, 637 insertions, 0 deletions
diff --git a/parse.c b/parse.c new file mode 100644 index 0000000..b334db1 --- /dev/null +++ b/parse.c @@ -0,0 +1,637 @@ +/* + * Copyright (c) 2026 Nakidai Perumenei <nakidai at disroot dot org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "thac.h" + +#include <stddef.h> +#include <stdlib.h> + + +/* + * expr = expr-assign + */ +#define pexpr pexpr_assign +static struct node *pexpr(void); +static struct node *pstmt(void); + +static enum noper_t +associate(enum tok_t t) +{ + switch (t) + { + case TKEYLEN: return OLEN; + case TKEYASSERT: return OASSERT; + case TARRLEFT: return OCONNECT; + case TCONCAT: return OCONCAT; + case TAMPER: return OBAND; + case TPIPE: return OBOR; + case TCARET: return OBXOR; + case TLSHIFT: return OLSHIFT; + case TRSHIFT: return ORSHIFT; + case TTILDE: return OINV; + case TAND: return OAND; + case TEXCLAM: return ONOT; + case TOR: return OOR; + case TEQ: return OEQ; + case TNEQ: return ONEQ; + case TLESS: return OLESS; + case TLESSEQ: return OLESSEQ; + case TGREAT: return OGREAT; + case TGREATEQ: return OGREATEQ; + case TPLUS: return OSUM; + case TMINUS: return OSUB; + case TASTER: return OMUL; + case TSLASH: return ODIV; + case TPERC: return OMOD; + case TPOW: return OPOW; + case TASSAMPER: return OASSBAND; + case TASSASTER: return OASSMUL; + case TASSCARET: return OASSBXOR; + case TASSIGN: return OASSIGN; + case TASSLSHIFT: return OASSLSHIFT; + case TASSMINUS: return OASSSUB; + case TASSPERC: return OASSMOD; + case TASSPIPE: return OASSBOR; + case TASSPLUS: return OASSSUM; + case TASSRSHIFT: return OASSRSHIFT; + case TASSSLASH: return OASSDIV; + default: complain(1, "%s cannot be associated", tokname(t)); + } + + /* not reached */ + return 0; +} + +static struct node * +mknode(enum node_t t) +{ + struct node *res = malloc(sizeof(*res)); + if (!res) + dieno(1, "malloc()"); + res->type = t; + return res; +} + +#define BINLEFT(NAME, NEXT, ...) static struct node *NAME(void) \ +{ \ + struct node *child, *node; \ + if (!(child = NEXT())) \ + return NULL; \ +loop: \ + if (!exptok(__VA_ARGS__)) \ + { \ + ungettok(); \ + return child; \ + } \ + node = mknode(NOPER); \ + node->noper.type = associate(curtok.type); \ + node->noper.l = child; \ + if (!(node->noper.r = NEXT())) \ + complain(1, "no right side for %s", nopername(node->noper.type)); \ + child = node; \ + goto loop; \ +} + +#define BINRIGHT(NAME, NEXT, ...) static struct node *NAME(void) \ +{ \ + struct node *child, *node; \ + if (!(child = NEXT())) \ + return NULL; \ + if (!exptok(__VA_ARGS__)) \ + { \ + ungettok(); \ + return child; \ + } \ + node = mknode(NOPER); \ + node->noper.type = associate(curtok.type); \ + node->noper.l = child; \ + if (!(node->noper.r = NAME())) \ + complain(1, "no right side for %s", nopername(node->noper.type)); \ + return node; \ +} + +/* + * comp = "{" *(stmt) "}" + */ +static struct node * +pcomp(void) +{ + struct node *node; + ulong i = 0; + + if (!exptok(TOPBRACE)) + return NULL; + node = mknode(NCOMP); + node->ncomp.stmts = NULL; + +loop: + /* TODO: add cap and increase like in lex.c */ + node->ncomp.stmts = realloc( + node->ncomp.stmts, + sizeof(struct node *) * (i + 1) + ); + + switch (gettok()) + { + break; case TEOF: complain(1, "no closing brace"); + break; case TCLBRACE: goto end; + break; default: + ungettok(); + if (!(node->ncomp.stmts[i] = pstmt())) + complain(1, "no statement"); + } + ++i; + goto loop; +end: + node->ncomp.len = i; + return node; +} + +/* + * expr-primary = NUMBER / id / "nil" / expr-node / expr-mod / "(" expr ")" + * expr-node = "node" "(" [*(expr ",") expr] ")" + * expr-mod = "mod" "(" [*(id ",") id] ")" comp + */ +static struct node * +pexpr_primary(void) +{ + struct node *node = NULL; + ulong i = 0; + + switch (gettok()) + { + break; case TEOF: return NULL; + break; case TNUM: + node = mknode(NNUM); + node->nnum = atol(curtok.info.p); + break; case TID: + node = mknode(NVAR); + node->nvar = curtok.info.p; + break; case TOPPAREN: + if (!(node = pexpr())) + complain(1, "empty paretheses"); + if (!exptok(TCLPAREN)) + complain(1, "no closing parenthesis"); + break; case TKEYNODE: + node = mknode(NNODE); + node->nnode.params = NULL; + + if (!exptok(TOPPAREN)) + complain(1, "no parentheses after node"); +nodeloop: + /* TODO: add cap and increase like in lex.c */ + node->nnode.params = realloc( + node->nnode.params, + sizeof(struct node *) * (i + 1) + ); + if (!node->nnode.params) + dieno(1, "realloc()"); + + switch(gettok()) + { + break; case TEOF: complain(1, "no closing parenthesis"); + break; case TCLPAREN: goto nodeend; + break; case TCOMMA: + if (!i || !(node->nnode.params[i] = pexpr())) + complain(1, "empty param in node"); + break; default: + ungettok(); + if (i) + complain(1, "no comma"); + else if (!(node->nnode.params[i] = pexpr())) + complain(1, "empty param in node"); + } + ++i; + goto nodeloop; +nodeend: + node->nnode.len = i; + break; case TKEYMOD: + node = mknode(NMOD); + node->nmod.params = NULL; + + if (!exptok(TOPPAREN)) + complain(1, "no parentheses after mod"); +modloop: + /* TODO: add cap and increase like in lex.c */ + node->nmod.params = realloc( + node->nmod.params, + sizeof(char *) * (i + 1) + ); + if (!node->nmod.params) + dieno(1, "realloc()"); + + switch(gettok()) + { + break; case TEOF: complain(1, "no closing parenthesis"); + break; case TCLPAREN: goto modend; + break; case TCOMMA: + if (!i || gettok() != TID) + complain(1, "expected id as a param name"); + else + node->nmod.params[i] = curtok.info.p; + break; default: + ungettok(); + if (i) + complain(1, "no comma"); + else if (gettok() != TID) + complain(1, "expected id as a param name"); + else + node->nmod.params[i] = curtok.info.p; + } + ++i; + goto modloop; +modend: + node->nmod.len = i; + + if (!(node->nmod.code = pcomp())) + complain(1, "no comp for mod"); + break; default: + ungettok(); + } + + return node; +} + +/* + * expr-post = expr-primary *("--" / "++" / "[" expr [":" expr] "]") + */ +static struct node * +pexpr_post(void) +{ + struct node *child, *node; + + child = pexpr_primary(); + if (!child) + return NULL; + +loop: + switch (gettok()) + { + break; case TEOF: return child; + break; case TDECR: case TINCR: + node = mknode(NOPER); + node->noper.type = curtok.type == TDECR ? OPOSTDECR : OPOSTINCR; + node->noper.m = child; + break; case TOPBRACK: + node = mknode(NOPER); + node->noper.type = OINDEX; + node->noper.l = child; + + if (!(node->noper.r = pexpr())) + complain(1, "no inside for []"); + switch (gettok()) + { + break; case TEOF: complain(1, "no closing bracket"); + break; case TCOL: + node->noper.type = OSLICE; + node->noper.m = node->noper.r; + if (!(node->noper.r = pexpr())) + complain(1, "no right side for []"); + if (!exptok(TCLBRACK)) + complain(1, "invalid syntax for []"); + /*FT*/ case TCLBRACK: + break; default: complain(1, "invalid syntax for []"); + } + break; default: + ungettok(); + return child; + } + + child = node; + goto loop; +} + +/* + * expr-pref = expr-post / ("assert" / "len" / "~" / "!" / "--" / "++") expr-pref + */ +static struct node * +pexpr_pref(void) +{ + struct node *node; + + switch (gettok()) + { + break; case TEOF: return NULL; + break; case TKEYASSERT: case TKEYLEN: case TTILDE: + case TEXCLAM: case TDECR: case TINCR: + node = mknode(NOPER); + node->noper.type = curtok.type == TDECR ? OPREDECR : + curtok.type == TINCR ? OPREINCR : + associate(curtok.type); + if (!(node->noper.m = pexpr_pref())) + complain(1, "no operand for %s", nopername(node->noper.type)); + break; case TOPBRACK: + node = mknode(NOPER); + node->noper.type = OARRAY; + + if (!(node->noper.l = pexpr())) + complain(1, "no inside for []"); + if (!exptok(TCLBRACK)) + complain(1, "invalid syntax for []"); + if (!(node->noper.r = pexpr_pref())) + complain(1, "no operand for []"); + break; default: + ungettok(); + return pexpr_post(); + } + + return node; +} + +/* expr-pow = expr-pref ["**" expr-pow] */ +BINRIGHT(pexpr_pow, pexpr_pref, TPOW) +/* expr-mult = expr-pow *(("*" / "><" / "%" / "/") expr-pow) */ +BINLEFT(pexpr_mult, pexpr_pow, TASTER, TCONCAT, TPERC, TSLASH) +/* expr-add = expr-mult *(("<-" / "-" / "+") expr-mult) */ +BINLEFT(pexpr_add, pexpr_mult, TARRLEFT, TMINUS, TPLUS) +/* expr-shift = expr-add *(("<<" / ">>") expr-add) */ +BINLEFT(pexpr_shift, pexpr_add, TLSHIFT, TRSHIFT) +/* expr-rel = expr-shift *(("<" / "<=" / ">" / ">=") expr-shift) */ +BINLEFT(pexpr_rel, pexpr_shift, TLESS, TLESSEQ, TGREAT, TGREATEQ) +/* expr-eq = expr-rel *(("==" / "!=") expr-rel) */ +BINLEFT(pexpr_eq, pexpr_rel, TEQ, TNEQ) +/* expr-band = expr-eq *("&" expr-eq) */ +BINLEFT(pexpr_band, pexpr_eq, TAMPER) +/* expr-bxor = expr-band *("^" expr-band) */ +BINLEFT(pexpr_bxor, pexpr_band, TCARET) +/* expr-bor = expr-bxor *("|" expr-bxor) */ +BINLEFT(pexpr_bor, pexpr_bxor, TPIPE) +/* expr-and = expr-bor *("&&" expr-bor) */ +BINLEFT(pexpr_and, pexpr_bor, TAND) +/* expr-or = expr-and *("||" expr-and) */ +BINLEFT(pexpr_or, pexpr_and, TOR) + +/* + * expr-cond = expr-or ? expr : expr-cond + */ +static struct node * +pexpr_cond(void) +{ + struct node *child, *node; + + child = pexpr_or(); + if (!child) + return NULL; + + if (!exptok(TQUEST)) + { + ungettok(); + return child; + } + + node = mknode(NOPER); + node->noper.type = OCOND; + node->noper.l = child; + + if (!(node->noper.m = pexpr())) + complain(1, "no then branch in ternary if"); + + if (!exptok(TCOL)) + complain(1, "no colon for ternary if"); + + if (!(node->noper.r = pexpr_cond())) + complain(1, "no else branch for ternary if"); + + return node; +} + +/* + * expr-assign = expr-cond [expr-assign-oper expr-assign] + * expr-assign-oper = "+=" / "-=" / "*=" / "/=" / "%=" / "&=" + * expr-assign-oper =/ "|=" / "^=" / "=" / ">>=" / "<<=" / "%=" + */ +BINRIGHT( + pexpr_assign, pexpr_cond, + TASSAMPER, TASSASTER, TASSCARET, TASSIGN, TASSLSHIFT, + TASSMINUS, TASSPERC, TASSPIPE, TASSPLUS, TASSRSHIFT, + TASSSLASH +) + +/* + * for = "for" "(" [expr] ";" [expr] ";" [expr] ")" stmt + */ +struct node * +pfor(void) +{ + struct node *node; + + if (!exptok(TKEYFOR)) + { + say("%s:%d: pfor selected but no TKEYFOR?", __FILE__, __LINE__); + ungettok(); + return NULL; + } + + node = mknode(NFOR); + + if (!exptok(TOPPAREN)) + complain(1, "no parentheses after for"); + node->nfor.before = pexpr(); + if (!exptok(TSEMICOL)) + complain(1, "invalid for parameters"); + node->nfor.cond = pexpr(); + if (!exptok(TSEMICOL)) + complain(1, "invalid for parameters"); + node->nfor.between = pexpr(); + if (!exptok(TCLPAREN)) + complain(1, "invalid for parameters"); + if (!(node->nfor.code = pstmt())) + complain(1, "no statement for for"); + + return node; +} + +/* + * foreach = "foreach" "(" expr ")" stmt + */ +struct node * +pforeach(void) +{ + struct node *node; + + if (!exptok(TKEYFOREACH)) + { + say("%s:%d: pforeach selected but no TKEYFOREACH?", __FILE__, __LINE__); + ungettok(); + return NULL; + } + + node = mknode(NFOREACH); + + if (!exptok(TOPPAREN) + || !(node->nforeach.obj = pexpr()) + || !exptok(TCLPAREN)) + complain(1, "invalid foreach parameter"); + if (!(node->nforeach.code = pstmt())) + complain(1, "no statement for foreach"); + + return node; +} + +/* + * if = "if" "(" expr ")" stmt ["else" stmt] + */ +struct node * +pif(void) +{ + struct node *node; + + if (!exptok(TKEYIF)) + { + say("%s:%d: pif selected but no TKEYIF?", __FILE__, __LINE__); + ungettok(); + return NULL; + } + + node = mknode(NCOND); + + if (!exptok(TOPPAREN) + || !(node->ncond.cond = pexpr()) + || !exptok(TCLPAREN)) + complain(1, "invalid if parameter"); + if (!(node->ncond.t = pstmt())) + complain(1, "no statement for then if branch"); + + node->ncond.f = NULL; + if (exptok(TKEYELSE)) + if (!(node->ncond.f = pstmt())) + complain(1, "no statement for else if branch"); + else; + else + ungettok(); + + return node; +} + +/* + * with = "with" expr "(" [*(expr ",") expr] ")" stmt + */ +struct node * +pwith(void) +{ + struct node *node; + ulong i = 0; + + if (!exptok(TKEYWITH)) + { + say("%s:%d: pwith selected but no TKEYWITH?", __FILE__, __LINE__); + ungettok(); + return NULL; + } + + node = mknode(NWITH); + node->nwith.args = NULL; + + if (!(node->nwith.mod = pexpr())) + complain(1, "no module specified"); + if (!exptok(TOPPAREN)) + complain(1, "no parameters in with"); +loop: + /* TODO: add cap and increase like in lex.c */ + node->nwith.args = realloc( + node->nwith.args, + sizeof(struct nnode *) * (i + 1) + ); + if (!node->nwith.args) + dieno(1, "realloc()"); + + switch(gettok()) + { + break; case TEOF: complain(1, "no closing parenthesis"); + break; case TCLPAREN: goto end; + break; case TCOMMA: + if (!i || !(node->nwith.args[i] = pexpr())) + complain(1, "empty param in with"); + break; default: + ungettok(); + if (i) + complain(1, "no comma"); + else if (!(node->nwith.args[i] = pexpr())) + complain(1, "empty param in with"); + } + ++i; + goto loop; +end: + if (!(node->nwith.code = pstmt())) + complain(1, "no statement for with"); + node->nwith.len = i; + + return node; +} + +/* + * break = "break" ";" + * continue = "continue" ";" + */ +struct node * +pbreak(void) +{ + struct node *node; + enum tok_t t; + + if (!exptok(TKEYBREAK, TKEYCONT)) + { + say("%s:%d: break selected but no break/continue?", __FILE__, __LINE__); + ungettok(); + return NULL; + } + + node = mknode(NWITH); + node->type = (t = curtok.type) == TKEYBREAK ? NBREAK : NCONT; + + if (!exptok(TSEMICOL)) + complain(1, "expected `;' after %s", tokname(t)); + + return node; +} + +/* + * stmt = comp / for / foreach / if / with / expr ";" / break / continue + */ +struct node * +pstmt(void) +{ + struct node *node; + + if (gettok() == TEOF) + return NULL; + ungettok(); + + switch (curtok.type) + { + break; case TOPBRACE: return pcomp(); + break; case TKEYFOR: return pfor(); + break; case TKEYFOREACH: return pforeach(); + break; case TKEYIF: return pif(); + break; case TKEYWITH: return pwith(); + break; case TKEYBREAK: + /*FT*/ case TKEYCONT: return pbreak(); + break; default: + node = pexpr(); + if (!exptok(TSEMICOL)) + complain(1, "expected `;' at the end of an expression"); + return node ? node : pstmt(); + } + + /* not reached */ + return NULL; +} + +struct node * +getstmt(void) +{ + return pstmt(); +} |