From 663ab8fd11246835fe1466b02b88ae7b9f3d0874 Mon Sep 17 00:00:00 2001 From: Will Date: Tue, 4 Feb 2025 16:55:57 -0500 Subject: [PATCH] parser + rule files --- .cursor/rules/krules.mdc | 179 ++++++++++++++++++ .cursorrules | 92 +++++++++ main.ts | 57 +++++- mod.ts | 5 + nohup.out | 18 ++ .../NGNK-Parser-Source.k | 73 +++++++ .../NGNK-Parser-Source.ts | 123 ++++++++++++ references provided by Dave/test.ts | 123 ++++++++++++ src/generator/ngnk_generator.ts | 28 +++ src/lexer/lexer.ts | 29 +++ src/parser/parser.ts | 122 ++++++++++++ src/types/token.ts | 10 + 12 files changed, 856 insertions(+), 3 deletions(-) create mode 100644 .cursor/rules/krules.mdc create mode 100644 .cursorrules create mode 100644 mod.ts create mode 100644 nohup.out create mode 100644 references provided by Dave/NGNK-Parser-Source.k create mode 100644 references provided by Dave/NGNK-Parser-Source.ts create mode 100644 references provided by Dave/test.ts create mode 100644 src/generator/ngnk_generator.ts create mode 100644 src/lexer/lexer.ts create mode 100644 src/parser/parser.ts create mode 100644 src/types/token.ts diff --git a/.cursor/rules/krules.mdc b/.cursor/rules/krules.mdc new file mode 100644 index 0000000..14d2e09 --- /dev/null +++ b/.cursor/rules/krules.mdc @@ -0,0 +1,179 @@ +--- +description: k files +globs: *.k +--- +Use ngn/k for k files. + +ngn/k Cheatsheet + +Basics + +Running k Code + +1+2 / Addition +3-1 / Subtraction +2*3 / Multiplication +10%3 / Division (floating point if needed) + +Right-to-Left Evaluation + +1+5*2-2 / Evaluates as: 1 + (5 * (2 - 2)) = 1 +1+(5*2)-2 / Parentheses force standard order + +Assignments + +a: 10 / Assign 10 to a +b: a+5 / Assign 15 to b + +Comments + +/ This is a comment + +Data Types + +Atoms (Scalars) + +42 / Integer +3.14 / Float +"c" / Character +`symbol / Symbol + +Vectors (1D Arrays) + +1 2 3 4 / Integer vector +1.2 3.4 5.6 / Float vector +"abcd" / Character vector +`foo`bar`baz / Symbol vector + +Lists (Nested Arrays) + +(1 2 3; 4 5) / List of vectors +("abc"; 1 2 3) / Mixed-type list + +Vectorized Operations + +Unlike traditional languages, operations apply element-wise: + +1 2 3 + 4 / 5 6 7 +2 * 1 2 3 / 2 4 6 + +Operations also work on nested lists: + +(1 2; 3 4) + 10 / (11 12; 13 14) + +Indexing + +x: 10 20 30 40 +x[2] / Returns 30 +x[1 3] / Returns 20 40 + +For lists: + +l: (10 20 30; "abc"; `sym) +l[1] / Returns "abc" +l[0][2] / Returns 30 + +Built-in Verbs + +Arithmetic + ++ Flip (transpose matrix), Add +- Negate (monadic), Subtract (dyadic) +* First (monadic), Multiply (dyadic) +% Square root (monadic), Divide (dyadic) +! Factorial (monadic), Modulo (dyadic) + +Comparisons + +< Less than, Ascend (monadic) +> Greater than, Descend (monadic) += Equal, Group (monadic) +~ Match (deep equality), Not (monadic) + +Logical Operations + +& Min (dyadic), Where (monadic) +| Max (dyadic), Reverse (monadic) + +Data Manipulation + +, Concat, Enlist (monadic) +^ Without (dyadic), Null (monadic) +# Reshape (dyadic), Length (monadic) +_ Drop (dyadic), Floor (monadic) +$ Cast (dyadic), String (monadic) +? Find (dyadic), Unique (monadic), Random selection +@ Apply (indexing and function calls) +. Drill (deep indexing) + +Adverbs (Higher-order Operations) + +Each (') + +2*'1 2 3 / Returns 2 4 6 + +Reduce (/) + ++/1 2 3 / Returns sum: 6 + +Scan (\) + ++\1 2 3 / Returns running sum: 1 3 6 + +Each-Prior (':) + ++': 1 2 3 4 / Returns 1 3 5 7 (pairwise sum) + +Each-Right (/:) + +1 2*/:3 4 / Returns (3 6; 4 8) + +Each-Left (\:) + +1 2*\:3 4 / Returns (3 4; 6 8) + +Window (i':) + +3':!10 / Sliding window + +Stencil (i f':) + +3+/':!10 / Stencil sum + +Bin Search (X') + +1 3 5 7 9'8 9 0 / Bin classification + +Array-Oriented Typing vs Traditional Languages + +Key Differences + +Operations are implicitly vectorized + +No need for loops to apply operations to arrays + +Example: 2 * 1 2 3 produces 2 4 6 directly + +Data is homogeneously typed + +Unlike Python lists, k enforces consistent typing in arrays + +Example: 1 2 "a" would cause an error + +Functions behave like array transformations + +Monadic (single argument) and dyadic (two arguments) versions exist + +Example: * 1 2 3 returns 1 (first element), 1*2 3 returns 2 3 + +Indexing and function application use the same syntax + +f[2] calls f with argument 2 + +arr[2] retrieves index 2 from arr + +Example: Simple Statistics + +x: 1 2 3 4 5 +mean: (+/x) % #x / Sum divided by count +mean / this will print to screen aka return from main \ No newline at end of file diff --git a/.cursorrules b/.cursorrules new file mode 100644 index 0000000..9d36630 --- /dev/null +++ b/.cursorrules @@ -0,0 +1,92 @@ +This is a deno2.0 project that contains k code as well + +[Typescript] +Write all typescript code in deno2.0 +Keep files very simple and try and have each big function in a separate file +When declaring types, make sure the type doesn't already exist in another file + +[k] +k is an array language +It's not linearly typed +It's also a functional language +Here is a cheatsheet for k: +Verbs: : + - * % ! & | < > = ~ , ^ # _ $ ? @ . 0: 1: +notation: [c]har [i]nt [n]umber(int|float) [s]ymbol [a]tom [d]ict + [f]unc(monad) [F]unc(dyad) [xyz]any +special: var:y set a:1;a -> 1 + (v;..):y unpack (b;(c;d)):(2 3;4 5);c -> 4 + :x return {:x+1;2}[3] -> 4 + $[x;y;..] cond $[0;`a;"\0";`b;`;`c;();`d;`e] -> `e + o[..] recur {$[x<2;x;+/o'x-1 2]}9 -> 34 + [..] progn [0;1;2;3] -> 3 + +:: self ::12 -> 12 + : right 1 :2 -> 2 "abc":'"d" -> "ddd" + +x flip +("ab";"cd") -> ("ac";"bd") +N+N add 1 2+3 -> 4 5 + -N negate - 1 2 -> -1 -2 +N-N subtract 1-2 3 -> -1 -2 + *x first *`a`b -> `a *(0 1;"cd") -> 0 1 +N*N multiply 1 2*3 4 -> 3 8 + %N sqrt %25 -> 5.0 %-1 -> 0n +N%N divide 4 3%2 -> 2 1 4 3%2.0 -> 2.0 1.5 + !i enum !3 -> 0 1 2 !-3 -> -3 -2 -1 + !I odometer !2 3 -> (0 0 0 1 1 1;0 1 2 0 1 2) + !d keys !`a`b!0 1 -> `a`b + !S ns keys a.b.c:1;a.b.d:2;!`a`b -> ``c`d +x!y dict `a`b!1 2 -> `a`b!1 2 +i!I div -10!1234 567 -> 123 56 +i!I mod 10!1234 567 -> 4 7 + &I where &3 -> 0 0 0 &1 0 1 4 2 -> 0 2 3 3 3 3 4 4 + &x deepwhere &(0 1 0;1 0 0;1 1 1) -> (0 1 2 2 2;1 0 0 1 2) +N&N min/and 2&-1 3 -> -1 2 0 0 1 1&0 1 0 1 -> 0 0 0 1 + |x reverse |"abc" -> "cba" |12 -> 12 +N|N max/or 2|-1 3 -> 2 3 0 0 1 1|0 1 0 1 -> 0 1 1 1 + 0 2 1 3 5 4 + >X descend >"abacus" -> 4 5 3 1 0 2 + i close >fd +N 1 0 +N>N more 0 1>0 2 -> 0 0 + =X group ="abracadabra" -> "abrcd"!(0 3 5 7 10;1 8;2 9;,4;,6) + =i unitmat =3 -> (1 0 0;0 1 0;0 0 1) +N=N equal 0 1 2=0 1 3 -> 1 1 0 + ~x not ~(0 2;``a;"a \0";::;{}) -> (1 0;1 0;0 0 1;1;0) +x~y match 2 3~2 3 -> 1 "4"~4 -> 0 0~0.0 -> 0 + ,x enlist ,0 -> ,0 ,0 1 -> ,0 1 ,`a!1 -> +(,`a)!,,1 +x,y concat 0,1 2 -> 0 1 2 "a",1 -> ("a";1) + ^x null ^(" a";0 1 0N;``a;0.0 0n) -> (1 0;0 0 1;1 0;0 1) +a^y fill 1^0 0N 2 3 0N -> 0 1 2 3 1 "b"^" " -> "b" +X^y without "abracadabra"^"bc" -> "araadara" + #x length #"abc" -> 3 #4 -> 1 #`a`b`c!0 1 0 -> 3 +i#y reshape 3#2 -> 2 2 2 +I#y reshape 2 3#` -> (```;```) +f#y replicate (3>#:')#(0;2 1 3;5 4) -> (0;5 4) {2}#"ab" -> "aabb" +x#d take `c`d`f#`a`b`c`d!1 2 3 4 -> `c`d`f!3 4 0N + _n floor _12.34 -12.34 -> 12 -13 + _c lowercase _"Ab" -> "ab" +i_Y drop 2_"abcde" -> "cde" `b_`a`b`c!0 1 2 -> `a`c!0 2 +I_Y cut 2 4 4_"abcde" -> ("cd";"";,"e") +f_Y weed out (3>#:')_(0;2 1 3;5 4) -> ,2 1 3 +X_i delete "abcde"_2 -> "abde" + $x string $(12;"ab";`cd;+) -> ("12";(,"a";,"b");"cd";,"+") +i$C pad 5$"abc" -> "abc " -3$"a" -> " a" +s$y cast `c$97 -> "a" `i$-1.2 -> -1 `$"a" -> `a +s$y int `I$"-12" -> -12 + ?x uniq ?"abacus" -> "abcus" +X?y find "abcde"?"bfe" -> 1 0N 4 +i?x roll 3?1000 -> 11 398 293 1?0 -> ,-8164324247243690787 +i?x deal -3?1000 -> 11 398 293 /guaranteed distinct + @x type @1 -> `b @"ab" -> `C @() -> `A @(@) -> `v +x@y apply(1) {x+1}@2 -> 3 "abc"@1 -> "b" (`a`b!0 1)@`b -> 1 + .S get a:1;.`a -> 1 b.c:2;.`b`c -> 2 + .C eval ."1+2" -> 3 + .d values .`a`b!0 1 -> 0 1 +x.y apply(n) {x*y+1}. 2 3 -> 8 (`a`b`c;`d`e`f). 1 0 -> `d + +@[x;y;f] amend @["ABC";1;_:] -> "AbC" @[2 3;1;{-x}] -> 2 -3 +@[x;y;F;z] amend @["abc";1;:;"x"] -> "axc" @[2 3;0;+;4] -> 6 3 +.[x;y;f] drill .[("AB";"CD");1 0;_:] -> ("AB";"cD") +.[x;y;F;z] drill .[("ab";"cd");1 0;:;"x"] -> ("ab";"xd") +.[f;y;f] try .[+;1 2;"E:",] -> 3 .[+;1,`2;"E:",] -> "E:typ" +?[x;y;z] splice ?["abcd";1 3;"xyz"] -> "axyzd" \ No newline at end of file diff --git a/main.ts b/main.ts index 292ce5f..44c833e 100644 --- a/main.ts +++ b/main.ts @@ -1,8 +1,59 @@ +declare global { + interface ImportMeta { + main: boolean; + } +} + +import { Lexer, Parser, NGNKGenerator } from "./mod.ts"; + export function add(a: number, b: number): number { return a + b; } -// Learn more at https://docs.deno.com/runtime/manual/examples/module_metadata#concepts -if (import.meta.main) { - console.log("Add 2 + 3 =", add(2, 3)); +function testParser(input: string): void { + console.log("\n=== Testing Parser ==="); + console.log("Input:", input); + + try { + const lexer = new Lexer(input); + const tokens = lexer.tokenize(); + console.log("Tokens:", tokens); + + const parser = new Parser(tokens); + const ast = parser.parseExpressions(); + console.log("AST:", JSON.stringify(ast, null, 2)); + + const generator = new NGNKGenerator(); + const output = ast.map(node => generator.generate(node)).join("; "); + console.log("Generated Output:", output); + } catch (error) { + console.error("Error:", error.message); + } +} + +// Test cases +if (import.meta.main) { + // Test 1: Basic arithmetic + testParser("1 + 2"); + + // Test 2: Function call + testParser("sum[1; 2; 3]"); + + // Test 3: Complex expression + testParser("map[x; + 2] array[1; 2; 3]"); + + // Test 4: Unary operators + testParser("# array[1; 2; 3]"); + + // Test 5: Multiple expressions + testParser("x + 1 # array[1; 2] sum[4; 5; 6]"); + + // Test 6: Non-commutative operators + testParser("10 - 5"); + + // Test 7: Nested function calls + testParser("outer[inner[1; 2]; 3]"); + + // Test 8: Error case - incomplete expression + testParser("sum[1; 2;"); } diff --git a/mod.ts b/mod.ts new file mode 100644 index 0000000..535bbdf --- /dev/null +++ b/mod.ts @@ -0,0 +1,5 @@ +export { Lexer } from "./src/lexer/lexer.ts"; +export { Parser } from "./src/parser/parser.ts"; +export { NGNKGenerator } from "./src/generator/ngnk_generator.ts"; +export { Token } from "./src/types/token.ts"; +export type { ASTNode } from "./src/types/token.ts"; \ No newline at end of file diff --git a/nohup.out b/nohup.out new file mode 100644 index 0000000..8d770a1 --- /dev/null +++ b/nohup.out @@ -0,0 +1,18 @@ +[main 2025-02-04T18:59:02.531Z] update#setState disabled +[main 2025-02-04T18:59:02.532Z] update#ctor - updates are disabled as there is no update URL +[main 2025-02-04T18:59:03.741Z] vscode-file: Refused to load resource /tmp/.mount_cursorQxbIJF/resources/app/extensions/theme-seti/icons/seti.woff from vscode-file: protocol (original URL: vscode-file://vscode-app/tmp/.mount_cursorQxbIJF/resources/app/extensions/theme-seti/icons/seti.woff) +[17306:0204/142930.023769:ERROR:atom_cache.cc(230)] Add chromium/from-privileged to kAtomsToCache +[17306:0204/145013.514948:ERROR:network_service_instance_impl.cc(608)] Network service crashed, restarting service. +[main 2025-02-04T19:50:13.523Z] Extension host with pid 17645 exited with code: 0, signal: unknown. +[17306:0204/145013.527699:ERROR:zygote_communication_linux.cc(297)] Failed to send GetTerminationStatus message to zygote +[17306:0204/145013.528222:ERROR:gpu_process_host.cc(1001)] GPU process launch failed: error_code=1002 +[17306:0204/145013.528384:ERROR:gpu_process_host.cc(1001)] GPU process launch failed: error_code=1002 +[17306:0204/145013.528533:ERROR:gpu_process_host.cc(1001)] GPU process launch failed: error_code=1002 +[17306:0204/145013.528691:ERROR:gpu_process_host.cc(1001)] GPU process launch failed: error_code=1002 +[17306:0204/145013.528832:ERROR:gpu_process_host.cc(1001)] GPU process launch failed: error_code=1002 +[17306:0204/145013.528975:ERROR:gpu_process_host.cc(1001)] GPU process launch failed: error_code=1002 +[17306:0204/145013.529147:ERROR:gpu_process_host.cc(1001)] GPU process launch failed: error_code=1002 +[17306:0204/145013.529290:ERROR:gpu_process_host.cc(1001)] GPU process launch failed: error_code=1002 +[17306:0204/145013.529433:ERROR:gpu_process_host.cc(1001)] GPU process launch failed: error_code=1002 +[17306:0204/145013.529440:FATAL:gpu_data_manager_impl_private.cc(436)] GPU process isn't usable. Goodbye. +[0204/145013.536922:ERROR:elf_dynamic_array_reader.h(64)] tag not found diff --git a/references provided by Dave/NGNK-Parser-Source.k b/references provided by Dave/NGNK-Parser-Source.k new file mode 100644 index 0000000..c0aa7af --- /dev/null +++ b/references provided by Dave/NGNK-Parser-Source.k @@ -0,0 +1,73 @@ +/ NGNK Parser and Source Code Generator + +/ Define AST Structures +ASTNode:{(x;y;z)} +FunctionCall:{(x;y)} +Conditional:{(x;y;z)} +SystemCommand:{(x)} + +/ Tokenizer using State Machine with Scan (Right-to-Left) +tokenize:{ + states: ("num"; "id"; "op"; "ws"; "adverb"); + s:(); + x:|x; + + classify:{ + ?x in "0123456789": "num" + ; ?x in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz": "id" + ; ?x in "()+-*/%=<>~,:#@.?;[]{}!": "op" + ; ?x in "' / \\ ': /: \\:": "adverb" + ; "ws" + } + + tokens:states\:classify x; + s,::tokens; + + if (#(s="ws") > 0) { s::ErrorNode["Invalid token detected"] } + + s +} + +/ Parsing Table +parseTable:(`+ + `-`*`%`!`&`|`<`>`=`~`,`^`#`_`$`?`@`. + `binaryOp@)! + binaryOp@ +parseTable[`$[]:`conditionalOp; +parseTable[`\`:systemCommandOp; +parseTable[`'`:adverbOp; +parseTable[`/`:adverbOp; +parseTable[`\\`:adverbOp; +parseTable[`':`:adverbOp; +parseTable[`/:`:adverbOp; +parseTable[`\\:`:adverbOp; + +/ Operator Handling +binaryOp:{s::(x@-2;x@-1;*|_2#x); x:_2#x} +monadicOp:{s::(x@-1;*x); x:_1#x} +dyadicOp:{s::(x@-2;x@-1;*|_2#x); x:_2#x} + +/ Specialized Operations +conditionalOp:{s::Conditional[x@-3;x@-2;x@-1]; x:_3#x} +systemCommandOp:{s::SystemCommand[x@-1]; x:_1#x} +adverbOp:{s::FunctionCall["adverb"; x@-1]; x:_1#x} + +/ Parser +parse:{ + s:(); + x:|x; + s,::(t in parseTable ? parseTable[t] s : t like "0123456789" | t like '"*"' | t like '`*' ? t : FunctionCall[t;!s] if t like '*]')@\:x; + *s +} + +/ Convert AST back to NGNK source code +generateSource:{ + t:type x; + $[t=`FunctionCall; x 0, "[", ";" join generateSource each 1_x, "]" + ;t=`Conditional; "$[", ";" join generateSource each x, "]" + ;t=`SystemCommand; "\\", x 0 + ;x] +} + +/ Example +code:"\\clear; x:2*3+4; while[x<20; x:x+2]; try[1/0]"; tokens:tokenize code; ast:parse tokens; `0:generateSource ast \ No newline at end of file diff --git a/references provided by Dave/NGNK-Parser-Source.ts b/references provided by Dave/NGNK-Parser-Source.ts new file mode 100644 index 0000000..078b226 --- /dev/null +++ b/references provided by Dave/NGNK-Parser-Source.ts @@ -0,0 +1,123 @@ +class Token { + constructor(public type: string, public value: string) {} +} + +class Lexer { + private position = 0; + private tokens: Token[] = []; + + constructor(private input: string) {} + + tokenize(): Token[] { + const regex = /([a-zA-Z_][a-zA-Z0-9_]*)|([0-9]+)|([+\-*%!@^\/\\])|([()\[\]{};,])/g; + let match; + while ((match = regex.exec(this.input)) !== null) { + if (match[1]) this.tokens.push(new Token("IDENTIFIER", match[1])); + else if (match[2]) this.tokens.push(new Token("NUMBER", match[2])); + else if (match[3]) this.tokens.push(new Token("OPERATOR", match[3])); + else if (match[4]) this.tokens.push(new Token("SYMBOL", match[4])); + } + return this.tokens; + } +} + +class Parser { + private position = 0; + + constructor(private tokens: Token[]) {} + + parseExpressions(): any[] { + let expressions = []; + while (this.position < this.tokens.length) { + expressions.push(this.parseExpression()); + } + return expressions; + } + + parseExpression(): any { + let token = this.tokens[this.position]; + if (!token) return null; + if (token.type === "IDENTIFIER") { + this.position++; + if (this.match("SYMBOL", "[")) { + return this.parseIndexingOrFunctionCall(token); + } + return token; + } else if (token.type === "NUMBER") { + this.position++; + return token; + } else if (token.type === "OPERATOR") { + return this.parseOperatorOrUnary(); + } + throw new Error(`Unexpected token: ${token.value}`); + } + + parseIndexingOrFunctionCall(identifier: Token): any { + let args = []; + this.position++; // Consume "[" + while (!this.match("SYMBOL", "]") && !this.match("SYMBOL", "}")) { + let expr = this.parseExpression(); + if (!expr) throw new Error("Expected expression inside indexing or function call"); + args.push(expr); + if (this.match("SYMBOL", ";")) this.position++; + } + let closingSymbol = this.tokens[this.position].value; + this.position++; // Consume closing bracket or brace + return { type: closingSymbol === "]" ? "FunctionCall" : "Indexing", name: identifier.value, args }; + } + + parseOperatorOrUnary(): any { + let operator = this.tokens[this.position]; + this.position++; + if (this.isUnaryOperator(operator.value)) { + let operand = this.parseExpression(); + return { type: "UnaryExpression", operator: operator.value, operand }; + } + let left = this.parseExpression(); + let right = this.parseExpression(); + + // Reverse arguments for non-commutative operators + if (this.isNonCommutative(operator.value)) { + return { type: "BinaryExpression", operator: operator.value, left: right, right: left }; + } + + return { type: "BinaryExpression", operator: operator.value, left, right }; + } + + isUnaryOperator(op: string): boolean { + return ["!", "#", "_", "$"].includes(op); + } + + isNonCommutative(op: string): boolean { + return ["-", "%", "!", "^"].includes(op); + } + + generateNGNK(ast: any): string { + if (!ast) return ""; + switch (ast.type) { + case "FunctionCall": + return `${ast.name}[${ast.args.map(this.generateNGNK).join(";")}]`; + case "Indexing": + return `${ast.name}[${ast.args.map(this.generateNGNK).join(";")}]`; + case "BinaryExpression": + return `${this.generateNGNK(ast.left)} ${ast.operator} ${this.generateNGNK(ast.right)}`; + case "UnaryExpression": + return `${this.generateNGNK(ast.operand)} ${ast.operator}`; + case "NUMBER": + case "IDENTIFIER": + return ast.value; + default: + return ""; + } + } +} + +// Example usage for Deno +if (import.meta.main) { + const input = "sum[1; 2; 3] + array{5} $[ x > y ; \"greater\" ; \"smaller\" ] while[ x < 10 ; x + 1 ] 'fold /+ [1;2;3] \\load \"file.pong\" x ! 15 # 20 _ \"abc\" $ 10 - 3 8 % 2 5 ! 2 3 ^ 4"; + const lexer = new Lexer(input); + const tokens = lexer.tokenize(); + const parser = new Parser(tokens); + const ast = parser.parseExpressions(); + console.log(ast.map(parser.generateNGNK).join("; ")); +} \ No newline at end of file diff --git a/references provided by Dave/test.ts b/references provided by Dave/test.ts new file mode 100644 index 0000000..078b226 --- /dev/null +++ b/references provided by Dave/test.ts @@ -0,0 +1,123 @@ +class Token { + constructor(public type: string, public value: string) {} +} + +class Lexer { + private position = 0; + private tokens: Token[] = []; + + constructor(private input: string) {} + + tokenize(): Token[] { + const regex = /([a-zA-Z_][a-zA-Z0-9_]*)|([0-9]+)|([+\-*%!@^\/\\])|([()\[\]{};,])/g; + let match; + while ((match = regex.exec(this.input)) !== null) { + if (match[1]) this.tokens.push(new Token("IDENTIFIER", match[1])); + else if (match[2]) this.tokens.push(new Token("NUMBER", match[2])); + else if (match[3]) this.tokens.push(new Token("OPERATOR", match[3])); + else if (match[4]) this.tokens.push(new Token("SYMBOL", match[4])); + } + return this.tokens; + } +} + +class Parser { + private position = 0; + + constructor(private tokens: Token[]) {} + + parseExpressions(): any[] { + let expressions = []; + while (this.position < this.tokens.length) { + expressions.push(this.parseExpression()); + } + return expressions; + } + + parseExpression(): any { + let token = this.tokens[this.position]; + if (!token) return null; + if (token.type === "IDENTIFIER") { + this.position++; + if (this.match("SYMBOL", "[")) { + return this.parseIndexingOrFunctionCall(token); + } + return token; + } else if (token.type === "NUMBER") { + this.position++; + return token; + } else if (token.type === "OPERATOR") { + return this.parseOperatorOrUnary(); + } + throw new Error(`Unexpected token: ${token.value}`); + } + + parseIndexingOrFunctionCall(identifier: Token): any { + let args = []; + this.position++; // Consume "[" + while (!this.match("SYMBOL", "]") && !this.match("SYMBOL", "}")) { + let expr = this.parseExpression(); + if (!expr) throw new Error("Expected expression inside indexing or function call"); + args.push(expr); + if (this.match("SYMBOL", ";")) this.position++; + } + let closingSymbol = this.tokens[this.position].value; + this.position++; // Consume closing bracket or brace + return { type: closingSymbol === "]" ? "FunctionCall" : "Indexing", name: identifier.value, args }; + } + + parseOperatorOrUnary(): any { + let operator = this.tokens[this.position]; + this.position++; + if (this.isUnaryOperator(operator.value)) { + let operand = this.parseExpression(); + return { type: "UnaryExpression", operator: operator.value, operand }; + } + let left = this.parseExpression(); + let right = this.parseExpression(); + + // Reverse arguments for non-commutative operators + if (this.isNonCommutative(operator.value)) { + return { type: "BinaryExpression", operator: operator.value, left: right, right: left }; + } + + return { type: "BinaryExpression", operator: operator.value, left, right }; + } + + isUnaryOperator(op: string): boolean { + return ["!", "#", "_", "$"].includes(op); + } + + isNonCommutative(op: string): boolean { + return ["-", "%", "!", "^"].includes(op); + } + + generateNGNK(ast: any): string { + if (!ast) return ""; + switch (ast.type) { + case "FunctionCall": + return `${ast.name}[${ast.args.map(this.generateNGNK).join(";")}]`; + case "Indexing": + return `${ast.name}[${ast.args.map(this.generateNGNK).join(";")}]`; + case "BinaryExpression": + return `${this.generateNGNK(ast.left)} ${ast.operator} ${this.generateNGNK(ast.right)}`; + case "UnaryExpression": + return `${this.generateNGNK(ast.operand)} ${ast.operator}`; + case "NUMBER": + case "IDENTIFIER": + return ast.value; + default: + return ""; + } + } +} + +// Example usage for Deno +if (import.meta.main) { + const input = "sum[1; 2; 3] + array{5} $[ x > y ; \"greater\" ; \"smaller\" ] while[ x < 10 ; x + 1 ] 'fold /+ [1;2;3] \\load \"file.pong\" x ! 15 # 20 _ \"abc\" $ 10 - 3 8 % 2 5 ! 2 3 ^ 4"; + const lexer = new Lexer(input); + const tokens = lexer.tokenize(); + const parser = new Parser(tokens); + const ast = parser.parseExpressions(); + console.log(ast.map(parser.generateNGNK).join("; ")); +} \ No newline at end of file diff --git a/src/generator/ngnk_generator.ts b/src/generator/ngnk_generator.ts new file mode 100644 index 0000000..3d70907 --- /dev/null +++ b/src/generator/ngnk_generator.ts @@ -0,0 +1,28 @@ +import { ASTNode } from "../types/token.ts"; + +export class NGNKGenerator { + generate(ast: ASTNode): string { + if (!ast) return ""; + + switch (ast.type) { + case "FunctionCall": + return `${ast.name}[${ast.args.map(arg => this.generate(arg)).join(";")}]`; + + case "Indexing": + return `${ast.name}[${ast.args.map(arg => this.generate(arg)).join(";")}]`; + + case "BinaryExpression": + return `${this.generate(ast.left)} ${ast.operator} ${this.generate(ast.right)}`; + + case "UnaryExpression": + return `${this.generate(ast.operand)} ${ast.operator}`; + + case "Number": + case "Identifier": + return ast.value; + + default: + return ""; + } + } +} \ No newline at end of file diff --git a/src/lexer/lexer.ts b/src/lexer/lexer.ts new file mode 100644 index 0000000..64476e1 --- /dev/null +++ b/src/lexer/lexer.ts @@ -0,0 +1,29 @@ +import { Token, TokenType } from "../types/token.ts"; + +export class Lexer { + private position = 0; + private tokens: Token[] = []; + + constructor(private input: string) {} + + tokenize(): Token[] { + // Match identifiers, numbers, operators, symbols, and strings + const regex = /([a-zA-Z_][a-zA-Z0-9_]*)|([0-9]+(?:\.[0-9]+)?)|([+\-*%!@^\/\\#_$])|([[\](){};,])|("(?:[^"\\]|\\.)*")/g; + + let match; + while ((match = regex.exec(this.input)) !== null) { + if (match[1]) { + this.tokens.push(new Token("IDENTIFIER", match[1])); + } else if (match[2]) { + this.tokens.push(new Token("NUMBER", match[2])); + } else if (match[3]) { + this.tokens.push(new Token("OPERATOR", match[3])); + } else if (match[4]) { + this.tokens.push(new Token("SYMBOL", match[4])); + } else if (match[5]) { + this.tokens.push(new Token("STRING", match[5])); + } + } + return this.tokens; + } +} \ No newline at end of file diff --git a/src/parser/parser.ts b/src/parser/parser.ts new file mode 100644 index 0000000..5216613 --- /dev/null +++ b/src/parser/parser.ts @@ -0,0 +1,122 @@ +import { Token } from "../types/token.ts"; +import { ASTNode } from "../types/token.ts"; + +export class Parser { + private position = 0; + + constructor(private tokens: Token[]) {} + + private match(type: string, value?: string): boolean { + const token = this.tokens[this.position]; + if (!token) return false; + if (value !== undefined) { + return token.type === type && token.value === value; + } + return token.type === type; + } + + private peek(): Token | null { + return this.tokens[this.position] || null; + } + + parseExpressions(): ASTNode[] { + const expressions: ASTNode[] = []; + while (this.position < this.tokens.length) { + const expr = this.parseExpression(); + if (expr) { + expressions.push(expr); + } + } + return expressions; + } + + private parseExpression(): ASTNode | null { + const token = this.peek(); + if (!token) return null; + + if (token.type === "IDENTIFIER") { + this.position++; + if (this.match("SYMBOL", "[")) { + return this.parseIndexingOrFunctionCall(token); + } + return { type: "Identifier", value: token.value }; + } + + if (token.type === "NUMBER") { + this.position++; + return { type: "Number", value: token.value }; + } + + if (token.type === "OPERATOR") { + return this.parseOperatorOrUnary(); + } + + throw new Error(`Unexpected token: ${token.value}`); + } + + private parseIndexingOrFunctionCall(identifier: Token): ASTNode { + const args: ASTNode[] = []; + this.position++; // Consume "[" + + while (!this.match("SYMBOL", "]") && !this.match("SYMBOL", "}")) { + const expr = this.parseExpression(); + if (!expr) { + throw new Error("Expected expression inside indexing or function call"); + } + args.push(expr); + if (this.match("SYMBOL", ";")) { + this.position++; + } + } + + const closingSymbol = this.tokens[this.position].value; + this.position++; // Consume closing bracket or brace + + return { + type: closingSymbol === "]" ? "FunctionCall" : "Indexing", + name: identifier.value, + args + }; + } + + private parseOperatorOrUnary(): ASTNode { + const operator = this.tokens[this.position]; + this.position++; + + if (this.isUnaryOperator(operator.value)) { + const operand = this.parseExpression(); + return { + type: "UnaryExpression", + operator: operator.value, + operand + }; + } + + const left = this.parseExpression(); + const right = this.parseExpression(); + + if (this.isNonCommutative(operator.value)) { + return { + type: "BinaryExpression", + operator: operator.value, + left: right, + right: left + }; + } + + return { + type: "BinaryExpression", + operator: operator.value, + left, + right + }; + } + + private isUnaryOperator(op: string): boolean { + return ["!", "#", "_", "$"].includes(op); + } + + private isNonCommutative(op: string): boolean { + return ["-", "%", "!", "^"].includes(op); + } +} \ No newline at end of file diff --git a/src/types/token.ts b/src/types/token.ts new file mode 100644 index 0000000..c784daa --- /dev/null +++ b/src/types/token.ts @@ -0,0 +1,10 @@ +export class Token { + constructor(public type: string, public value: string) {} +} + +export type TokenType = "IDENTIFIER" | "NUMBER" | "OPERATOR" | "SYMBOL" | "STRING"; + +export interface ASTNode { + type: string; + [key: string]: any; +} \ No newline at end of file