summary refs log tree commit diff
path: root/parse.c
diff options
context:
space:
mode:
authorNakidai <nakidai@disroot.org>2026-04-05 19:51:14 +0300
committerNakidai <nakidai@disroot.org>2026-04-05 20:01:32 +0300
commit8afa976e073c7bc29c878230eead6dddfdcc08a1 (patch)
tree044b8ddf87b9a3c8b5a229b27e84e870468fb950 /parse.c
downloadthac-8afa976e073c7bc29c878230eead6dddfdcc08a1.tar.gz
thac-8afa976e073c7bc29c878230eead6dddfdcc08a1.zip
Add code v0.1.0
Diffstat (limited to 'parse.c')
-rw-r--r--parse.c637
1 files changed, 637 insertions, 0 deletions
diff --git a/parse.c b/parse.c
new file mode 100644
index 0000000..b334db1
--- /dev/null
+++ b/parse.c
@@ -0,0 +1,637 @@
+/*
+ * Copyright (c) 2026 Nakidai Perumenei <nakidai at disroot dot org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include "thac.h"
+
+#include <stddef.h>
+#include <stdlib.h>
+
+
+/*
+ * expr = expr-assign
+ */
+#define pexpr pexpr_assign
+static struct node *pexpr(void);
+static struct node *pstmt(void);
+
+static enum noper_t
+associate(enum tok_t t)
+{
+	switch (t)
+	{
+	case TKEYLEN:    return OLEN;
+	case TKEYASSERT: return OASSERT;
+	case TARRLEFT:   return OCONNECT;
+	case TCONCAT:    return OCONCAT;
+	case TAMPER:     return OBAND;
+	case TPIPE:      return OBOR;
+	case TCARET:     return OBXOR;
+	case TLSHIFT:    return OLSHIFT;
+	case TRSHIFT:    return ORSHIFT;
+	case TTILDE:     return OINV;
+	case TAND:       return OAND;
+	case TEXCLAM:    return ONOT;
+	case TOR:        return OOR;
+	case TEQ:        return OEQ;
+	case TNEQ:       return ONEQ;
+	case TLESS:      return OLESS;
+	case TLESSEQ:    return OLESSEQ;
+	case TGREAT:     return OGREAT;
+	case TGREATEQ:   return OGREATEQ;
+	case TPLUS:      return OSUM;
+	case TMINUS:     return OSUB;
+	case TASTER:     return OMUL;
+	case TSLASH:     return ODIV;
+	case TPERC:      return OMOD;
+	case TPOW:       return OPOW;
+	case TASSAMPER:  return OASSBAND;
+	case TASSASTER:  return OASSMUL;
+	case TASSCARET:  return OASSBXOR;
+	case TASSIGN:    return OASSIGN;
+	case TASSLSHIFT: return OASSLSHIFT;
+	case TASSMINUS:  return OASSSUB;
+	case TASSPERC:   return OASSMOD;
+	case TASSPIPE:   return OASSBOR;
+	case TASSPLUS:   return OASSSUM;
+	case TASSRSHIFT: return OASSRSHIFT;
+	case TASSSLASH:  return OASSDIV;
+	default: complain(1, "%s cannot be associated", tokname(t));
+	}
+
+	/* not reached */
+	return 0;
+}
+
+static struct node *
+mknode(enum node_t t)
+{
+	struct node *res = malloc(sizeof(*res));
+	if (!res)
+		dieno(1, "malloc()");
+	res->type = t;
+	return res;
+}
+
+#define BINLEFT(NAME, NEXT, ...) static struct node *NAME(void) \
+{ \
+	struct node *child, *node; \
+	if (!(child = NEXT())) \
+		return NULL; \
+loop: \
+	if (!exptok(__VA_ARGS__)) \
+	{ \
+		ungettok(); \
+		return child; \
+	} \
+	node = mknode(NOPER); \
+	node->noper.type = associate(curtok.type); \
+	node->noper.l = child; \
+	if (!(node->noper.r = NEXT())) \
+		complain(1, "no right side for %s", nopername(node->noper.type)); \
+	child = node; \
+	goto loop; \
+}
+
+#define BINRIGHT(NAME, NEXT, ...) static struct node *NAME(void) \
+{ \
+	struct node *child, *node; \
+	if (!(child = NEXT())) \
+		return NULL; \
+	if (!exptok(__VA_ARGS__)) \
+	{ \
+		ungettok(); \
+		return child; \
+	} \
+	node = mknode(NOPER); \
+	node->noper.type = associate(curtok.type); \
+	node->noper.l = child; \
+	if (!(node->noper.r = NAME())) \
+		complain(1, "no right side for %s", nopername(node->noper.type)); \
+	return node; \
+}
+
+/*
+ * comp = "{" *(stmt) "}"
+ */
+static struct node *
+pcomp(void)
+{
+	struct node *node;
+	ulong i = 0;
+
+	if (!exptok(TOPBRACE))
+		return NULL;
+	node = mknode(NCOMP);
+	node->ncomp.stmts = NULL;
+
+loop:
+	/* TODO: add cap and increase like in lex.c */
+	node->ncomp.stmts = realloc(
+		node->ncomp.stmts,
+		sizeof(struct node *) * (i + 1)
+	);
+
+	switch (gettok())
+	{
+	break; case TEOF: complain(1, "no closing brace");
+	break; case TCLBRACE: goto end;
+	break; default:
+		ungettok();
+		if (!(node->ncomp.stmts[i] = pstmt()))
+			complain(1, "no statement");
+	}
+	++i;
+	goto loop;
+end:
+	node->ncomp.len = i;
+	return node;
+}
+
+/*
+ * expr-primary = NUMBER / id / "nil" / expr-node / expr-mod / "(" expr ")"
+ * expr-node = "node" "(" [*(expr ",") expr] ")"
+ * expr-mod = "mod" "(" [*(id ",") id] ")" comp
+ */
+static struct node *
+pexpr_primary(void)
+{
+	struct node *node = NULL;
+	ulong i = 0;
+
+	switch (gettok())
+	{
+	break; case TEOF: return NULL;
+	break; case TNUM:
+		node = mknode(NNUM);
+		node->nnum = atol(curtok.info.p);
+	break; case TID:
+		node = mknode(NVAR);
+		node->nvar = curtok.info.p;
+	break; case TOPPAREN:
+		if (!(node = pexpr()))
+			complain(1, "empty paretheses");
+		if (!exptok(TCLPAREN))
+			complain(1, "no closing parenthesis");
+	break; case TKEYNODE:
+		node = mknode(NNODE);
+		node->nnode.params = NULL;
+
+		if (!exptok(TOPPAREN))
+			complain(1, "no parentheses after node");
+nodeloop:
+		/* TODO: add cap and increase like in lex.c */
+		node->nnode.params = realloc(
+			node->nnode.params,
+			sizeof(struct node *) * (i + 1)
+		);
+		if (!node->nnode.params)
+			dieno(1, "realloc()");
+
+		switch(gettok())
+		{
+		break; case TEOF: complain(1, "no closing parenthesis");
+		break; case TCLPAREN: goto nodeend;
+		break; case TCOMMA:
+			if (!i || !(node->nnode.params[i] = pexpr()))
+				complain(1, "empty param in node");
+		break; default:
+			ungettok();
+			if (i)
+				complain(1, "no comma");
+			else if (!(node->nnode.params[i] = pexpr()))
+				complain(1, "empty param in node");
+		}
+		++i;
+		goto nodeloop;
+nodeend:
+		node->nnode.len = i;
+	break; case TKEYMOD:
+		node = mknode(NMOD);
+		node->nmod.params = NULL;
+
+		if (!exptok(TOPPAREN))
+			complain(1, "no parentheses after mod");
+modloop:
+		/* TODO: add cap and increase like in lex.c */
+		node->nmod.params = realloc(
+			node->nmod.params,
+			sizeof(char *) * (i + 1)
+		);
+		if (!node->nmod.params)
+			dieno(1, "realloc()");
+
+		switch(gettok())
+		{
+		break; case TEOF: complain(1, "no closing parenthesis");
+		break; case TCLPAREN: goto modend;
+		break; case TCOMMA:
+			if (!i || gettok() != TID)
+				complain(1, "expected id as a param name");
+			else
+				node->nmod.params[i] = curtok.info.p;
+		break; default:
+			ungettok();
+			if (i)
+				complain(1, "no comma");
+			else if (gettok() != TID)
+				complain(1, "expected id as a param name");
+			else
+				node->nmod.params[i] = curtok.info.p;
+		}
+		++i;
+		goto modloop;
+modend:
+		node->nmod.len = i;
+
+		if (!(node->nmod.code = pcomp()))
+			complain(1, "no comp for mod");
+	break; default:
+		ungettok();
+	}
+
+	return node;
+}
+
+/*
+ * expr-post = expr-primary *("--" / "++" / "[" expr [":" expr] "]")
+ */
+static struct node *
+pexpr_post(void)
+{
+	struct node *child, *node;
+
+	child = pexpr_primary();
+	if (!child)
+		return NULL;
+
+loop:
+	switch (gettok())
+	{
+	break; case TEOF: return child;
+	break; case TDECR: case TINCR:
+		node = mknode(NOPER);
+		node->noper.type = curtok.type == TDECR ? OPOSTDECR : OPOSTINCR;
+		node->noper.m = child;
+	break; case TOPBRACK:
+		node = mknode(NOPER);
+		node->noper.type = OINDEX;
+		node->noper.l = child;
+
+		if (!(node->noper.r = pexpr()))
+			complain(1, "no inside for []");
+		switch (gettok())
+		{
+		break; case TEOF: complain(1, "no closing bracket");
+		break; case TCOL:
+			node->noper.type = OSLICE;
+			node->noper.m = node->noper.r;
+			if (!(node->noper.r = pexpr()))
+				complain(1, "no right side for []");
+			if (!exptok(TCLBRACK))
+				complain(1, "invalid syntax for []");
+		/*FT*/ case TCLBRACK:
+		break; default: complain(1, "invalid syntax for []");
+		}
+	break; default:
+		ungettok();
+		return child;
+	}
+
+	child = node;
+	goto loop;
+}
+
+/*
+ * expr-pref = expr-post / ("assert" / "len" / "~" / "!" / "--" / "++") expr-pref
+ */
+static struct node *
+pexpr_pref(void)
+{
+	struct node *node;
+
+	switch (gettok())
+	{
+	break; case TEOF: return NULL;
+	break; case TKEYASSERT: case TKEYLEN: case TTILDE:
+	case TEXCLAM: case TDECR: case TINCR:
+		node = mknode(NOPER);
+		node->noper.type = curtok.type == TDECR ? OPREDECR :
+			   curtok.type == TINCR ? OPREINCR :
+			   associate(curtok.type);
+		if (!(node->noper.m = pexpr_pref()))
+			complain(1, "no operand for %s", nopername(node->noper.type));
+	break; case TOPBRACK:
+		node = mknode(NOPER);
+		node->noper.type = OARRAY;
+
+		if (!(node->noper.l = pexpr()))
+			complain(1, "no inside for []");
+		if (!exptok(TCLBRACK))
+			complain(1, "invalid syntax for []");
+		if (!(node->noper.r = pexpr_pref()))
+			complain(1, "no operand for []");
+	break; default:
+		ungettok();
+		return pexpr_post();
+	}
+
+	return node;
+}
+
+/* expr-pow   = expr-pref ["**" expr-pow] */
+BINRIGHT(pexpr_pow, pexpr_pref, TPOW)
+/* expr-mult  = expr-pow *(("*" / "><" / "%" / "/") expr-pow) */
+BINLEFT(pexpr_mult, pexpr_pow, TASTER, TCONCAT, TPERC, TSLASH)
+/* expr-add   = expr-mult *(("<-" / "-" / "+") expr-mult) */
+BINLEFT(pexpr_add, pexpr_mult, TARRLEFT, TMINUS, TPLUS)
+/* expr-shift = expr-add *(("<<" / ">>") expr-add) */
+BINLEFT(pexpr_shift, pexpr_add, TLSHIFT, TRSHIFT)
+/* expr-rel   = expr-shift *(("<" / "<=" / ">" / ">=") expr-shift) */
+BINLEFT(pexpr_rel, pexpr_shift, TLESS, TLESSEQ, TGREAT, TGREATEQ)
+/* expr-eq    = expr-rel *(("==" / "!=") expr-rel) */
+BINLEFT(pexpr_eq, pexpr_rel, TEQ, TNEQ)
+/* expr-band  = expr-eq *("&" expr-eq) */
+BINLEFT(pexpr_band, pexpr_eq, TAMPER)
+/* expr-bxor  = expr-band *("^" expr-band) */
+BINLEFT(pexpr_bxor, pexpr_band, TCARET)
+/* expr-bor   = expr-bxor *("|" expr-bxor) */
+BINLEFT(pexpr_bor, pexpr_bxor, TPIPE)
+/* expr-and   = expr-bor *("&&" expr-bor) */
+BINLEFT(pexpr_and, pexpr_bor, TAND)
+/* expr-or    = expr-and *("||" expr-and) */
+BINLEFT(pexpr_or, pexpr_and, TOR)
+
+/*
+ * expr-cond = expr-or ? expr : expr-cond
+ */
+static struct node *
+pexpr_cond(void)
+{
+	struct node *child, *node;
+
+	child = pexpr_or();
+	if (!child)
+		return NULL;
+
+	if (!exptok(TQUEST))
+	{
+		ungettok();
+		return child;
+	}
+
+	node = mknode(NOPER);
+	node->noper.type = OCOND;
+	node->noper.l = child;
+
+	if (!(node->noper.m = pexpr()))
+		complain(1, "no then branch in ternary if");
+
+	if (!exptok(TCOL))
+		complain(1, "no colon for ternary if");
+
+	if (!(node->noper.r = pexpr_cond()))
+		complain(1, "no else branch for ternary if");
+
+	return node;
+}
+
+/*
+ * expr-assign      =  expr-cond [expr-assign-oper expr-assign]
+ * expr-assign-oper =  "+=" / "-=" / "*=" / "/=" / "%=" / "&="
+ * expr-assign-oper =/ "|=" / "^=" / "=" / ">>=" / "<<=" / "%="
+ */
+BINRIGHT(
+	pexpr_assign, pexpr_cond,
+	TASSAMPER, TASSASTER, TASSCARET, TASSIGN, TASSLSHIFT,
+	TASSMINUS, TASSPERC, TASSPIPE, TASSPLUS, TASSRSHIFT,
+	TASSSLASH
+)
+
+/*
+ * for = "for" "(" [expr] ";" [expr] ";" [expr] ")" stmt
+ */
+struct node *
+pfor(void)
+{
+	struct node *node;
+
+	if (!exptok(TKEYFOR))
+	{
+		say("%s:%d: pfor selected but no TKEYFOR?", __FILE__, __LINE__);
+		ungettok();
+		return NULL;
+	}
+
+	node = mknode(NFOR);
+
+	if (!exptok(TOPPAREN))
+		complain(1, "no parentheses after for");
+	node->nfor.before = pexpr();
+	if (!exptok(TSEMICOL))
+		complain(1, "invalid for parameters");
+	node->nfor.cond = pexpr();
+	if (!exptok(TSEMICOL))
+		complain(1, "invalid for parameters");
+	node->nfor.between = pexpr();
+	if (!exptok(TCLPAREN))
+		complain(1, "invalid for parameters");
+	if (!(node->nfor.code = pstmt()))
+		complain(1, "no statement for for");
+
+	return node;
+}
+
+/*
+ * foreach = "foreach" "(" expr ")" stmt
+ */
+struct node *
+pforeach(void)
+{
+	struct node *node;
+
+	if (!exptok(TKEYFOREACH))
+	{
+		say("%s:%d: pforeach selected but no TKEYFOREACH?", __FILE__, __LINE__);
+		ungettok();
+		return NULL;
+	}
+
+	node = mknode(NFOREACH);
+
+	if (!exptok(TOPPAREN)
+	 || !(node->nforeach.obj = pexpr())
+	 || !exptok(TCLPAREN))
+		complain(1, "invalid foreach parameter");
+	if (!(node->nforeach.code = pstmt()))
+		complain(1, "no statement for foreach");
+
+	return node;
+}
+
+/*
+ * if = "if" "(" expr ")" stmt ["else" stmt]
+ */
+struct node *
+pif(void)
+{
+	struct node *node;
+
+	if (!exptok(TKEYIF))
+	{
+		say("%s:%d: pif selected but no TKEYIF?", __FILE__, __LINE__);
+		ungettok();
+		return NULL;
+	}
+
+	node = mknode(NCOND);
+
+	if (!exptok(TOPPAREN)
+	 || !(node->ncond.cond = pexpr())
+	 || !exptok(TCLPAREN))
+		complain(1, "invalid if parameter");
+	if (!(node->ncond.t = pstmt()))
+		complain(1, "no statement for then if branch");
+
+	node->ncond.f = NULL;
+	if (exptok(TKEYELSE))
+		if (!(node->ncond.f = pstmt()))
+			complain(1, "no statement for else if branch");
+		else;
+	else
+		ungettok();
+
+	return node;
+}
+
+/*
+ * with = "with" expr "(" [*(expr ",") expr] ")" stmt
+ */
+struct node *
+pwith(void)
+{
+	struct node *node;
+	ulong i = 0;
+
+	if (!exptok(TKEYWITH))
+	{
+		say("%s:%d: pwith selected but no TKEYWITH?", __FILE__, __LINE__);
+		ungettok();
+		return NULL;
+	}
+
+	node = mknode(NWITH);
+	node->nwith.args = NULL;
+
+	if (!(node->nwith.mod = pexpr()))
+		complain(1, "no module specified");
+	if (!exptok(TOPPAREN))
+		complain(1, "no parameters in with");
+loop:
+	/* TODO: add cap and increase like in lex.c */
+	node->nwith.args = realloc(
+		node->nwith.args,
+		sizeof(struct nnode *) * (i + 1)
+	);
+	if (!node->nwith.args)
+		dieno(1, "realloc()");
+
+	switch(gettok())
+	{
+	break; case TEOF: complain(1, "no closing parenthesis");
+	break; case TCLPAREN: goto end;
+	break; case TCOMMA:
+		if (!i || !(node->nwith.args[i] = pexpr()))
+			complain(1, "empty param in with");
+	break; default:
+		ungettok();
+		if (i)
+			complain(1, "no comma");
+		else if (!(node->nwith.args[i] = pexpr()))
+			complain(1, "empty param in with");
+	}
+	++i;
+	goto loop;
+end:
+	if (!(node->nwith.code = pstmt()))
+		complain(1, "no statement for with");
+	node->nwith.len = i;
+
+	return node;
+}
+
+/*
+ * break = "break" ";"
+ * continue = "continue" ";"
+ */
+struct node *
+pbreak(void)
+{
+	struct node *node;
+	enum tok_t t;
+
+	if (!exptok(TKEYBREAK, TKEYCONT))
+	{
+		say("%s:%d: break selected but no break/continue?", __FILE__, __LINE__);
+		ungettok();
+		return NULL;
+	}
+
+	node = mknode(NWITH);
+	node->type = (t = curtok.type) == TKEYBREAK ? NBREAK : NCONT;
+
+	if (!exptok(TSEMICOL))
+		complain(1, "expected `;' after %s", tokname(t));
+
+	return node;
+}
+
+/*
+ * stmt = comp / for / foreach / if / with / expr ";" / break / continue
+ */
+struct node *
+pstmt(void)
+{
+	struct node *node;
+
+	if (gettok() == TEOF)
+		return NULL;
+	ungettok();
+
+	switch (curtok.type)
+	{
+	break; case TOPBRACE:    return pcomp();
+	break; case TKEYFOR:     return pfor();
+	break; case TKEYFOREACH: return pforeach();
+	break; case TKEYIF:      return pif();
+	break; case TKEYWITH:    return pwith();
+	break; case TKEYBREAK:
+	/*FT*/ case TKEYCONT:    return pbreak();
+	break; default:
+		node = pexpr();
+		if (!exptok(TSEMICOL))
+			complain(1, "expected `;' at the end of an expression");
+		return node ? node : pstmt();
+	}
+
+	/* not reached */
+	return NULL;
+}
+
+struct node *
+getstmt(void)
+{
+	return pstmt();
+}