From dcc6d354a21c484b0a83d1947ace2d180d3ce432 Mon Sep 17 00:00:00 2001 From: Natuie Date: Fri, 4 Apr 2025 22:58:12 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E5=88=9D=E5=A7=8B=E5=8C=96=20Vety=20?= =?UTF-8?q?=E9=A1=B9=E7=9B=AE=E5=B9=B6=E5=AE=9E=E7=8E=B0=E5=9F=BA=E7=A1=80?= =?UTF-8?q?=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 添加虚拟机、解析器、AST 打印等核心模块 - 实现基本的语法解析和虚拟机执行简单语句 - 添加原生函数支持和简单的错误处理 - 创建测试框架和示例代码 - 编写项目文档和 README 文件 --- .gitignore | 5 + CMakeLists.txt | 14 + README.md | 116 ++- demo/annotate.vt | 43 + demo/array.vt | 5 + demo/assignment.vt | 3 + demo/control_stmt.vt | 15 + demo/float.vt | 3 + demo/lang.ast | Bin 0 -> 74466 bytes demo/lexer_test.vt | 50 ++ demo/loop.vt | 23 + demo/main.vt | 47 ++ demo/map.vt | 20 + demo/operators.vt | 64 ++ demo/pi.vt | 26 + demo/test_example.vt | 42 + demo/try-catch.vt | 11 + doc/00_guide.md | 132 +++ doc/01_basic_syntax.md | 137 ++++ doc/02_control_flow.md | 131 +++ doc/03_functions_and_modules.md | 142 ++++ doc/04_composite_types.md | 180 ++++ lib/io.vt | 1 + parser/CMakeLists.txt | 24 + parser/ast.c | 40 + parser/ast.h | 146 ++++ parser/ast_printer.c | 94 +++ parser/ast_printer.h | 24 + parser/error.c | 97 +++ parser/error.h | 15 + parser/lexer.c | 556 +++++++++++++ parser/lexer.h | 132 +++ parser/parser.c | 1365 +++++++++++++++++++++++++++++++ parser/parser.h | 70 ++ test/Makefile | 37 + test/main.vt | 22 + test/test.c | 92 +++ utils/CMakeLists.txt | 11 + utils/file.c | 68 ++ utils/file.h | 12 + vm/CMakeLists.txt | 24 + vm/main.c | 153 ++++ vm/native.c | 54 ++ vm/native.h | 5 + vm/vm.c | 618 ++++++++++++++ vm/vm.h | 219 +++++ 46 files changed, 5086 insertions(+), 2 deletions(-) create mode 100644 CMakeLists.txt create mode 100644 demo/annotate.vt create mode 100644 demo/array.vt create mode 100644 demo/assignment.vt create mode 100644 demo/control_stmt.vt create mode 100644 demo/float.vt create mode 100644 demo/lang.ast create mode 100644 demo/lexer_test.vt create mode 100644 demo/loop.vt create mode 100644 demo/main.vt create mode 100644 demo/map.vt create mode 100644 demo/operators.vt create mode 100644 demo/pi.vt create mode 100644 demo/test_example.vt create mode 100644 demo/try-catch.vt create mode 100644 doc/00_guide.md create mode 100644 doc/01_basic_syntax.md create mode 100644 doc/02_control_flow.md create mode 100644 doc/03_functions_and_modules.md create mode 100644 doc/04_composite_types.md create mode 100644 lib/io.vt create mode 100644 parser/CMakeLists.txt create mode 100644 parser/ast.c create mode 100644 parser/ast.h create mode 100644 parser/ast_printer.c create mode 100644 parser/ast_printer.h create mode 100644 parser/error.c create mode 100644 parser/error.h create mode 100644 parser/lexer.c create mode 100644 parser/lexer.h create mode 100644 parser/parser.c create mode 100644 parser/parser.h create mode 100644 test/Makefile create mode 100644 test/main.vt create mode 100644 test/test.c create mode 100644 utils/CMakeLists.txt create mode 100644 utils/file.c create mode 100644 utils/file.h create mode 100644 vm/CMakeLists.txt create mode 100644 vm/main.c create mode 100644 vm/native.c create mode 100644 vm/native.h create mode 100644 vm/vm.c create mode 100644 vm/vm.h diff --git a/.gitignore b/.gitignore index c6127b3..d38f71b 100644 --- a/.gitignore +++ b/.gitignore @@ -50,3 +50,8 @@ modules.order Module.symvers Mkfile.old dkms.conf +build/ +.history/ +cmake-build-debug/ +.idea/ +.vscode/ \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..04ce9bb --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,14 @@ +cmake_minimum_required(VERSION 3.28) +project(vety C) + +set(CMAKE_C_STANDARD 11) + +# 设置输出目录 +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) + +# 添加子目录 +add_subdirectory(utils) +add_subdirectory(parser) +add_subdirectory(vm) \ No newline at end of file diff --git a/README.md b/README.md index 3243ec3..f46ae15 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,114 @@ -# vety-language -vety +# Vety 编程语言 + +Vety 是一个现代化的静态类型编程语言,专注于简洁性、安全性和高性能。它结合了多种现代编程语言的优秀特性,提供了优雅的语法和强大的类型系统。 + +## 主要特性 + +- **静态类型系统**:提供强大的类型检查和类型推导 +- **现代化语法**:简洁直观的语法设计 +- **函数注解**:支持 `@test`、`@debug` 等注解 +- **变量声明**:支持 `let` 和 `const` 声明 +- **复杂数据类型**:支持数组、映射等数据结构 +- **表达式语法**:支持条件表达式、三元运算符等 +- **错误处理**:内置的异常处理机制 +- **内存安全**:自动内存管理 +上面你就看看就行了,基本都没实现。 + +## 注意 +现在只是暂存代码和测试,现在啥也没完成。 + +## 测试 +``` bash +cmake --build build --target vety +./build/vety +``` + +## 项目结构 +``` +Vety +├── demo # 示例代码 +├── doc # 文档 +├── lib # vt库文件 +├── parser # 语法分析器 +├── test # 测试 +├── utils # 工具 +├── vm # 虚拟机 +├── CMakeLists.txt # cmake文件 +├── README.md # 项目说明 +``` + +## 语法示例 + +### 基础语法 + +```vety +// 变量声明 +let a: i32 = 1 +const b: i32 = 2 + +// 数组 +let arr: i32[] = [64, 34, 25, 12, 22, 11, 90] + +// Map对象 +let m: any = { + a: 6, + b: 9, + c: { + d: 9 + } +} + +// 函数定义 +func add(a: i32, b: i32): i32 { + return a + b +} +``` + +### 控制流 + +```vety +// if 语句 +if (condition) { + // ... +} else if (condition2) { + // ... +} else { + // ... +} + +// for 循环 +for (let i: i32 = 0; i < n; i++) { + // ... +} + +// while 循环 +while (condition) { + // ... +} +``` + +### 错误处理 + +```vety +try { + // 可能抛出异常的代码 +} catch (e) { + // 处理异常 +} +``` + +## 安装 + +目前 Vety 正在积极开发中,安装步骤将在稳定版本发布后提供。 + +## 使用文档 + +详细的语言规范和 API 文档正在编写中。 + +## 贡献 + +Vety是一个开源项目,我们欢迎社区贡献。如果你发现了bug或有改进建议,请提交issue或pull request。 + +## 许可证 + +MIT License diff --git a/demo/annotate.vt b/demo/annotate.vt new file mode 100644 index 0000000..dfda6b8 --- /dev/null +++ b/demo/annotate.vt @@ -0,0 +1,43 @@ +// 测试基础注解 +@Test +func testFunction() { + // 函数体 +} + +// 测试带参数的注解 +@Author("John Doe") +@Version(1.35) +let projectVersion = 4; + +@Config(key="debug_mode", enabled=true) +let debugSettings = { + logLevel: "verbose", + timeout: 5000 +}; + +// 测试嵌套注解 +@Deprecated("Use newMethod instead") +func oldMethod() { + // 已弃用方法 +} + +// 测试参数化注解组合 +@Metrics(track=true, category="performance") +@Retry(maxAttempts=3) +func networkCall() { + // 网络调用逻辑 +} + +// 测试无参数注解 +@ReadOnly +var configData = [1, 2, 3]; + +// 测试复杂参数结构 +@DatabaseConnection( + host="localhost", + port=5432, + options={ssl: true, poolSize: 5} +) +func connectToDatabase() { + // 数据库连接逻辑 +} \ No newline at end of file diff --git a/demo/array.vt b/demo/array.vt new file mode 100644 index 0000000..0dc9c22 --- /dev/null +++ b/demo/array.vt @@ -0,0 +1,5 @@ +// 数组字面量示例 +let arr = [1, 2, 3, 4, 5]; +let arr1: int32[] = [1, 2, 3, 4, 5]; +let arr2: i32[8] = [1, 2, 3, 4, 5,6,6,8]; +let numbers: array = [1, 2, 3, 4, 5] diff --git a/demo/assignment.vt b/demo/assignment.vt new file mode 100644 index 0000000..bca92c4 --- /dev/null +++ b/demo/assignment.vt @@ -0,0 +1,3 @@ +let b:i32 = 6 + 5; + +let c:i32 = 1 - 2 +3 +(1/8); \ No newline at end of file diff --git a/demo/control_stmt.vt b/demo/control_stmt.vt new file mode 100644 index 0000000..2fe0772 --- /dev/null +++ b/demo/control_stmt.vt @@ -0,0 +1,15 @@ +if (a >= 6 and a <= 10) { + print("a is between 6 and 10"); +} else if (a >= 11 && a <= 15) { + print("a is between 11 and 15"); +} else if (a >= 16 && a <= 20) { + print("a is between 16 and 20"); +} else { + print("a is not between 6 and 20"); +} + +if (a == 6 or a == 10) { + print("a is 6 or 10"); +} else { + print("a is not 6 or 10"); +} \ No newline at end of file diff --git a/demo/float.vt b/demo/float.vt new file mode 100644 index 0000000..5f1b85a --- /dev/null +++ b/demo/float.vt @@ -0,0 +1,3 @@ +let f = 6.555; +let f1 = 6444848484.5554484897874; +let f2 = 58555.48484 +55.444 - 7.88 *44 - 4 /9.22 *(5.666+3) diff --git a/demo/lang.ast b/demo/lang.ast new file mode 100644 index 0000000000000000000000000000000000000000..1ee2c0943e7b16bbc616896b5482a9a7a4227252 GIT binary patch literal 74466 zcmds=OOqYPafNSq=?Jatr5Dn|vaAP5y!cRvir@nfDUl!p7)sX02ml00NCdzDL5bGZ zKg$2kcAd`orn;;9bXMQ#OFA3~vgQuZC!dv>l~rB$fByUI@aXXM;l<(c;pyS?!w(Oi zm7l&Wzx!qR`K9vb<-@1t)04xq^3(nDZ=RN)UX-6+l;6H|_)8goaQOT3Z{yFKhhLYU z{^4KrySEN^4`=1m+2MKlw?fo!|M8y><rr! zo#6e``M^Oc=E0C-0ec?tk5|-ykCC0UHEeJ zc$yJzmr$f3w3p$*&HDrYw+Bvc6>e@8`Gq5G$_lp*-xLi$EI&OidVYE#<*SL59~bFD zllO{l-aps;PY%-1)x#_1X{39*Jo8b}>hCV-UA30@{XnVOLEOt;I zP(%+)`V6bQK5#ts0<6Kfh9Z96Dq4JT_@Zd=`M^~VCfMlYr$-kQohG3J{*p$8`}>0Ry!cl2r#Ct@ z+@jC5A(ShJ>&DJJ-;Qv5Yw$SD$R%A|DUsz`nc13ELC1M8p5W%yS4H+`hd&h^Xim*_ z5&r682;9nQ%bZ>xcnp-|=Zt3QmBZ^sm&mwe;g~a57GtN={SuBm<(;Cbr$hW#9{5S& zXelDMsCq}eGAR@4 zbJft5;;ZZN3VEQnrlYHe*E-Qa&9wBWSoMD4Us3wg5#QS>!T;MOHhxrQ@Y39UQD)pX z6a8o=FY71cyz2dKL3~mEM3wUOxfE;pA6`E!{`m2X)39D-$F}A`uZ(aB(#;_n+&TR5 zoQvzj?Ctdd(40?yS^VqsqT5HMwt7@-7MzNP(;|E_B6;T3!hc@;;#tWK_;bjTQG$Zg z$c;fGnO@;H(}=A8q;U9b$m*dFQOLLiA8vgVzfr0d{i)dV%m$asm8fbg&k@fqMP4F! zPgX{fB^*WYcZ-&k@s#bNLTA4Bptt&(=X_N^zbw(tRihxZBYbBb`N_E_J}nabwPX>^ zc~x@s@s$@#XofTt-H=w}) z_ChL#w>5LA^61v*S4KSTfp>L;mq%Wt&E{O_Op$IH<9L2K|6ea&eVLkb-Tvqp9+vz& zYR<6zgJOTJBch9-DA0B>*SrmCj97L#;$9mfNvkcMbt9P0qg83D!VC{wtF0Ubbd_a0 zt?`s)zZ|ODDb4;W1bLi})b-q3M_&zb9UUD*0Uh}{aieq<71_cLqS3Dt(49`Se%V_m z&xYI_{ng0O*M@b{Ryy#`s(L9divRiA)cIrkdprgT(@sWk?bj|Xn)?hDH_yH5BTvzdWv8!ipY(aD z9`BbbZT}t8cCH}(`o5ex`EG>*f}5|ZgrgP~it{YAsCv^^c0$lz!1~On9Ie-Nd!a?H z2{b1k-jegiI!cl{o%f@~4wHDBuVO%9#0J@{4YcDtNj-BHH>+;6OF!S`4o{*c_r3(q z*q6@ATH)h@s+v=C@WrS`kfvRK0G*7nmNKjKyO&>X+gI?`q6QLgd--MXwwGTDt-m+p zC5^q_Ks(MawR3SfzYJ}!?X={6N!=RzD9$JQ=j-c3R$t$h>GN=5NW2X#(;MFAJ*4`q z>g;Kz=jruoWqtp!%Cnqho|b*WCuLtV&b_F9T-z-Kjr?-2bfcDWBYUZ;1Jkd>kaIoj z#Qw|Wy&>l0UT85V1I^Xk{k|1*GSFIY0EH1-c;AXSDKtgesc(V8hz&ei%*j9_zizAZ zwU&Kc_UoSxJJhO5_O1BU;X5uL>Pct4s^?`-e=vA>Pj@k&HIFaqG~v1Ef~|bd`ezqB z`CdGhukyL|L&Q&UEOP5b$9*XG9-ctR=iw7VwZM zvz9RWtLh%KyoV2u$m~&naiO=|qd;W@-TOpc<>Vd)DkJ*!=yhe#->=J2BUXpi-2Pw7 zqu1}t7y8XtFQ9JGYhE#enoAxvOGwT<+>!~RkEK%u>d4(2_7R?(BpbBVLe-tPy)2~zf@MK|W(dR2!s zaN#^d=is7GBxH5Gwt9%`CkyM^tSW;qHzIkqH>h3o$0dIJa9F|UB!%9c(VHOhOJ*gO z5-+~@g^q3(P2VlOGp%iI6_n6)e;?M*=^r0{e1WG^Y};(-afuc32*1j5x<+p28=O?= ziEAwl?C5i|>``^>YAA>6bwVVGS<>sx(COF7Rhu=yYR4X()DGX(v?~tYG{Q*k{#`XN zmLpB9NBg`LX)_H@BaS*fTWti-F7ib8Xe%oGgMNGj3eT=FsP?pui9xc#o-rtFx_=A` zKHF6|T6JUQEfItIbkNekl4~(2s+hpvKL&-aw|Zi~PEHYnl%EurgZFF)1RYOfke=Nu z2F1z5Q^p{jxY}wX53y}!%Vmt(Nee)6*-3uM0*g1>1T;i-%t z^tK#B)M?Zzyf2EtiCZO2>)CjNH{*{P4xcV=1X_Pzu*L0<7|IqgGpmKxSIzM5vPEe5 zxO~y~Y%8s^c2qKb)&_H{)ladG(TZMvn&}Xp+Yt-(djWZ4$s5Fr=3nS>OV{_a((8Rz zdNTh%cKtpZs^hh+fZo?{Z+Nxrf!-MYsM;1@YiE3LeNRlY)U8}VaIx#NLRZv<;bK;= zG}nu_UN1FH{SW!-@82k#zFz*Tr}Y$VrZW?I>ryL%A1`&?7s{menfIrI=ETR1i&@dm zKt5v!k5+equ3Jlh#+akly*r)Lo$47BH|FS71A5bB8#+{TDpiL3d=aQ_%&EoxdDI8J z6@sJ2@1eRL4{ns15%J)q(HyEB(srJJzI9}NW!&5-@hRpsJZ$eaw7m7veyvb`z5Ic0 zeukx`!)NIE4v-|Qbh2&-xQspcpzCd&zBvNwx`o~-TGFY3iqDQL+_fXPJ_=RumpArt z+w0vgm7N*=f!`aO_2I2pcjRweWYH}Zitc>*b~p)luT&{<$Jo9nl$2rH-YA+|&vffm zI2bO&KNxoRbS5v(C3VWlgHr zhRBO&&QGGi&AO#jA-h=vg0s^Xg9nO(wUq=jcukgXOUEX5FQ*lItJ!r~)a&y*ikURE z;=fn+WOdF|lC8Zfie&3C?^=m@;z}ox-nhV=cRk>5UB>8qxe5>rXR)t~2R<%V(`m-X z!+l_J!fdU7*e9j#byKc=boj-2b_z~g--5$+DdSy8SIdCmq*VOSeO}dy!gD(7lTRz3 zSBBhmbC6TM5nQ&oaJT$scXSEbXIejBdL#Sp;5&3z*7F=PE#c@b8{NLOe4CJ>?%qJ_Kk*DMJZIg}+vlFI zSRdXWxoT@a1v#9KrZY@5{MkZe0Ml7fT}79Hz+gllj=!l5!$Vx|1(OjzoelA~w!-0g zH^Osf!?%W+iv^*!w*_l_2nzcDw6=g>XD?OA?P?0ydDhuyJr^mDXhu)@0Xw%(Jl%Gv zd7cp-m+uvUhhDD%%}G3-F#tr{`!&8g0HQ# z_o%2B`kkL>fw%QFNZe^QugzM&ah6u)p?Ao}lJ=}7GwZ%raCPt6v%}Aelz$j@Ky};Y zkIrR|RdDOCeKN}mmsvyWbo0F;aoi83TWDp6Ky5ifkJz*ShL_;6?cVoF7^fIqh5^M{ zH6;ABXk9m(gmgXoYHQMcRy6iy(cBns{@xOtt$VHNJ6Z5p`Q~rf24ZHt^|xW+DyxZ+ z(a&wpUx$>J$DV9kc|1D(f#S?$1UYZvsN`It^8I9-9^ zX3#f-r$0ZRJ3?|&5gE9PY3XhcJwNY#q7O!R0;#KC>R95pi_9Ms5^oqFP?l$*pn+ z@|~vlt_9Hx7%^T8+Z(%!nrZEU$0a_;;P*6MMW~bek6>P7!qiH9}v~RrST`PEWUE^43^zyk)^UJFkWc|b8 zz2XTU4DX6-eZgH4>e^?SX03ZM{#GbqwQI^&hN=M9VHJ~8we(MO-{HFJF) z1(PweexT)dDS*caZniGetqA&-&!^=-_15+MjZyn5QU>*U9v*Zw;ygq(OplK!cc>54 zekTEM6^jEQ`7=5#M?UuJyGScuwk~RMo$aZv#ijFOuo$sTmN}6nwg6;nK)$80$2Su^!wA~o19{*Qg#zc&d=8=vK6 z^BrzzGxGG(oAEk(<@!22&xpQ#uB`8N3%5Tav=lOJ2|bHlU@}%MxNdXwn$_RvsZznM zS{Z-WIw&pz@N_%E<=dr_zFF$&-!=U(&W^dBn8T~@JabbRXNuhFuSMc>l~+-4sp7{A zps-a3Ev0pDfXY@mpn}GIOsAi#wcz%;82stn4X*pEcerH)H8O|#Yq!hUW!(fft`Eem ze3==(W#2^KQ>eA%{`)3ygDiN|`=u+P7<{MnC#G^piuS4Xr&Z&fm7Yhe6QzT!j;LHL z-<_=QfdVb_=nU&YS*L@QM@ek$=D}wZG#42v%2>;~t?~V$tGh+Rx=*3XC=Z?XRFG<6O5$ClO+FxHGXa(rvGYHPRqbnnXtPb2FjquzoM&CXo#5p8k z%)430v!s!@O_=WmRL9ow0E*MfgJEU}A&ZM!v=Tk!cHZOPxnANtRiL!?MxrIX=xx@R z>#AtmC!r65(@Fl$9tgQk)U}NFPsNebF&g1y(fo{3tOMOVY$XF{dO}I{sM84uj7aEo z6thW)ZZ?6$h>U(V#NMtjou0sgyJb1o_98%F1j)%6^FxUH$rh zN(b?noeKDy`)%;po1w5uCU;b$F1~=`#?0p2YAz~UIfo_^cyA9cP}QdB-*!FVaXrdK zr_p%;7|yrZr%=Dpc{dp2let7aRcqtZKf-YyjXuUWOWqQ_OD7T9g)6V!$gH8C{08(K zFESE%F5C9=H^!S+$lx%dJuh#pErY*#rHOp=iaS;h%CwmWI4|Pun~LPD2N$(AZr^8% zjGtvWv{7*?ZE0Y-+CYcx2sciyT4l?BmgZ=S$Y=Otp7?K!8qc2PY6ncX-Hti;1~B1s z+EEB;_1?<59^uIC)jinDQJ zKT#>@Wx4@>?TzwnS_aX_5x(CcaNE@`R$7iD^0YjTEZrt>cUXl)m#L-ctht1vj5Ll% zeTYy%JMP`JweArZ?TE&lU+WHq2j!-NSkDK)+01Kcz_+(*RMvFyvJQyj^?rFjENY{$ z^s#Y1o}Sk{tz77?&RfO#-lstW#Y;6t?X4-%`IdML=T84{4ED0zy-_NYJ61OElvOk> zZ`I6Vf_`5-!+=C+Ww|NCft7w=M`ru!R@VLunRiOygazhVl1eP zXw#wNOM%|n1m0ZMP&RbAoM%1dZ%X`pP)?{lD!pf&X^Q?&ym#Ajf;^zs88)p5;_19D zpW{R&`)v2C^EoZ;Y(FW7G)U27cirQ#K9fLg$6T7Y zsTXbaMyQq!pEIe5hoQUP*$sL-BAxfJSha?9+oL)j)}E>YZ>v?{x961Au{6*Tw0it& z`?vnI@m}`yw_d?hJHjJ+p2q)pGh%Dhh6bJp_B0~|c5f{LW;?=vmLY=LW#<;uk<3~4 zPv`L|s9tuCwN$rd5b@AoO+w3!=-bbCGD^^OHMGO$_h=a3aT!Kb`lAZy?TA!)#tC|E zE!yH{pOLc1kNmmJj7t0WxwK@YkoH&`wsiT7)Lz?D`dec>bg~gXoc76-p@n1XzKjd3 z;kcms3@*0TQvKB3c~tUu&_6a_qig2#Jf91H`?oE$@}bSX{JsxOc!0LE;6bUztoN<;L$Pt78D&-kz?b z<9=W1FQ3z^eH%1??A&u4@cU}>Wlq9x(Z^o7Z%%#P4tv#Z&}H0ehvV0qP;?`9@H2w% znb1tk`_PU~@9#6sjQv91aoqa2mm9R>E6>PO;itX}*GrFo2iu-^(8`P;Gbv22a8f5fd!d&iEJqh9|A{d|A( zH|U=#zoV6{_Bmaiha>FcXAt>b8ajQ1gRe|e6GrX4d)F`t5~Q^nzyEBra1Pr=vs z)&H8-n!!t3S)t*qYF3Y=eHOid;cDKoKMTjz-r$VSs?)Kbo_W`9jXEkWf?}aD()`pr zC|Ul5FK_QxOaCn@`rzyGh`MvTBIftu)yY~G3op6lY-iNuB zS)MoV5A*!;xj~(b>WSgEOLzFA@(#b=Jd zc_>(naO>vP^tAbLG_Z=dYMeF?blv^6u3_+;hFhMaepttB_2G0ZELTl{(T(#?3=~B-WX(dIXITD{hO`K_jy-*j0aHtz~s29A+!IYKLrVY{YZ{Sc$)4h1pWS-wEX)8`W{2go8Gl#yGo zt-TF2mw9w>on=-0g%R!@I(`%A#DKH1hoL(#bi0t=pVFPRs%PaBFO@$pm-E-!<#<+p z(k(=Le|LlGhgOg1X}#9dlg_h1aJmRdG&_ajs$(?j-^|z3J}zS!xB+K+ql996Z;b+y zlWY2YH(De0<#;5`-opHC@q;glr^K2mf3qKcmnByO!w=Dw8$oiC*YCLz>Q=dZ2G$N| zL~-wJ@QfChrQJ5GGkNtc?AxR@v8X6rX+{T1ar*Y}L73mjN?bW41=s zCYoAuBbF0Mg zH}Cxgp7So`Y{zvgt7?$1^wyOQ-Nh?3Au3xrhZerzqZc=1ge<*vP>YY|r+G_x@01#f za}`15bQ!x0h7N)Z!7;en;5V1b=ki}$2qgOBeYo1<8OCFMEQitCD zKGPldYRNu%z1^a_&;8o$&G(K{`}>yivUZR4l9pQgIny?GcZIgM&asx7`-~2J_TqLg z@i9NS#PjR1S^!EnS19$CUewY@Yd_!xy)lf(cR#Y>$Y@Qw9Bt<%)K&xTe>YeU8-)( zzV|+Bh1^;Mow7}%RLxS+=%X+g~Q_l`!s#$z;5d(_*#xSYkd)(bQ0-4 zu%$a~*19)f*10r|L3&nMV|@)XM^OywdlvhF;iRjlu!NhRcK{c!&F4^1xPN}{a&Vt{ z2Iy6PNZ$WmOeT5so`WAUEYi6VzH+H+;}CZk_GpfWweP!wx78}}+w*?=u{6*T zw45czbNsUR*TCym?}113JdOX+{m-&D^0p(M2=+811op9!4O;M@We8F0k8hy65y_lo z|1?wp5j{fA%g!@6fpVTf;;o3-jnawg^*R9jY((FFzLQZxRjkqS7B#oODQ) zXPltdyEb$5_&wJ2epG$vyT89CeCJyCo;6ZsBT{>9lMzGPTVs4ipTCC>r+qSIaB*zS z2Q@e1dyWgTL~yaSmU2}2eWmW-ov1#Te~U1@BrZ6P)z-YPNEON4W@u--aFb%6)KIsju5%Yh??5Hda6AvxDynXOyzP zM-zTV#3|zNhrH9A(aZim)6Cc}E?Rry%9#@Ih%|99`$0Vj;T{t^1wJFm~VaQQvx zpDMq@$5#8CF3-ae_VF`_d@l{1KElCQrYYh|aCNGA4L6KvE6@MI*WU3C`rcZ^yEsXE z`mTkWPp6Hy*dz0jalDH?{O>B>c?&V0LPL9d2%2ffOjFFK;ObOyxaA6e&gWC`^?mif z#`o7yOSF|0vMiqlQQ!agQ)oKLsdpUJ1q&qCW5Le#6vOlsNIdO#!FfLK?bjV?Fr9VK z%yjj_Q>`9bAAzIw)d=5W)|&}Hb^V5K&vmU8R&P}R<~H4lf;wk?oj7#4He+J<1C3-6 z53V!)s)E-JUmu^KI{n5sqQ`fgomRiW_kQ|S6(933koVHBDu(>TB(o#a@AKg-?lR94 z+zBdKFC3n;`5o^|>@~mtcJ6gT?e#i1-4;KC@2&IO*JnQ+alXB~g$y1)>Oa2Au22%Z?)**cG)*6WF)vVDKQqPu;+c&`v$^+sCvy$9LzNR4j24aeVV(4V_od}q6u zJL+cl&mFLHo!&UlUxB~<&4Ota2g+6tgHNwxZemq^hyb|aR|G_BgaH%hg%a!KMO zfqExfGJH#i&s${bw=U@Ih;(_qiG01Y4CI@K56WE-H_J|M>se75Z_U-A=l10YS>A*ew7rI+yyu2Sn6={3?d0TzITr2CeinP{S z3uapRs(in(Isq?AWgq|N#epev}n!Yu$Z=KiTW;c7hSJow8mOsx9KQDFG zAIkW=eERh8qoL{w>bbSilGC}~JJ@fnx}Brohs_$67qs)`WSb#*^#a6V-fq z)@r$NwrQiN(6?6AeqHtQkO6|@t+gZC$g8Ko-#V|W*Sd~C^j?LY$5^;sj;X4edaE3e zoW*KJp(@jURd0+zYF8f!Yx~JdP#w>$mj#t9(t~<-c(2UCk4go1udLqA4j-2Plufs2 z6)f*fafga9ou`5!8=oECDO|Pd$iDhJv+$!qy1>eMcJMTfHt}p5(TEA{2*$GSMvbwp z!pJ0?`*3_4`8F!>?R+^fGYz)d;4|nxy2V;GsARqjsvFT*R$qVe@W1}^l{i^p`D*)Kl+`XIwmonxBR7yzZJ?riu-qzAiIp O*=&NZC0XjX7yK_2D4VJP literal 0 HcmV?d00001 diff --git a/demo/lexer_test.vt b/demo/lexer_test.vt new file mode 100644 index 0000000..23a5413 --- /dev/null +++ b/demo/lexer_test.vt @@ -0,0 +1,50 @@ +// 测试多进制数字解析 +let binary = 0b1010 // 二进制数字 +let octal = 0755 // 八进制数字 +let hex = 0xFF // 十六进制数字 + +// 测试浮点数和科学计数法 +let float_num = 3.14159 +let scientific = 1.23e-4 +let big_num = 6.022E23 + +// 测试注解语法 +@deprecated +func old_function() { + return 0 +} + +// 测试函数调用和命名参数 +func test_function(name: string, age: i32) { + pri32("Name: ", name) + pri32("Age: ", age) +} + +// 测试函数调用 +test_function(name= "Alice", age= 25) + +// 测试字符串字面量 +let message = "Hello, World!" + +// 测试布尔值 +let is_active = true; +let is_done = false; + +// 测试数组和映射 +let numbers = [1, 2, 3, 4, 5] +let user = { + "name": "Bob", + "age": 30, + "scores": [85, 92, 78] +} + +// 测试注释 +/* 这是一个 + 多行注释 + 测试 */ + +// 测试运算符 +let a = 10 +let b = 20 +let result = (a + b) * 2 +let compare = a <= b && b >= 15 \ No newline at end of file diff --git a/demo/loop.vt b/demo/loop.vt new file mode 100644 index 0000000..268a260 --- /dev/null +++ b/demo/loop.vt @@ -0,0 +1,23 @@ +let a:i32 = 4; +let b:i32 = 3; +let c:i32 = 56; + +for (let i:i32 = 0; i < 10; i++) { + pri32(i); +} + +while (true) { + pri32("infinite loop"); +} + +while (a > 0 and (b > 0 or c > 0)) { + pri32("infinite loop"); +} + +for (let i:i32 = 0; i < 10; i++) { + if (i == 5) { + break; + } else if (i == 3) { + continue; + } +} \ No newline at end of file diff --git a/demo/main.vt b/demo/main.vt new file mode 100644 index 0000000..2bab948 --- /dev/null +++ b/demo/main.vt @@ -0,0 +1,47 @@ + + + +let a: i32 = 1 +const b: i32 = 2 +let m:any = { + a:6, + b:9, + c: { + d:9 + } +} +let a1:i32[] = [1,2,2,3] + +func test(msg: i32): i32 { + print("a is 1") + return 0 +} + +@test(debug=6) +func add(a: i32, b: i32):i32 { + return a + b; +} + +// 冒泡排序 +func bubbleSort(arr: array, n: i32): void { + for(let i:i32 = 0; i < n-1; i++) { + for (let j:i32 = 0; j < n-i-1; j++) { + if (arr[j] > arr[j+1]) { + let temp = arr[j]; + arr[j] = arr[j+1]; + arr[j+1] = temp; + } + } + } +} + +let arr: i32[] = [64, 34, 25, 12, 22, 11, 90]; +let b1:i32 = 4, c:i32 = 5; + +@test(debug=6) +@debug(a=1,4) +func add1(a: i32, b: i32[]):i32[] { + return a + b; +} +print("hi"); + diff --git a/demo/map.vt b/demo/map.vt new file mode 100644 index 0000000..68bcc61 --- /dev/null +++ b/demo/map.vt @@ -0,0 +1,20 @@ +// map字面量示例 +let map = { + name: "John", + age: 30, + isStudent: false +}; + +let map1 = { + b: "a", + w: { + w:2, + p:3 + } +} + +let map2 = { + a: { b:{ c: { d: 9 } } } +} + +let map3 = {a:1,w:3,} diff --git a/demo/operators.vt b/demo/operators.vt new file mode 100644 index 0000000..cf84006 --- /dev/null +++ b/demo/operators.vt @@ -0,0 +1,64 @@ +// 位运算符示例 +let a = 5 // 二进制: 0101 +let b = 3 // 二进制: 0011 + +// 按位与运算 +let and_result = a & b // 结果: 1 (0001) + +// 按位或运算 +let or_result = a | b // 结果: 7 (0111) + +// 按位异或运算 +let xor_result = a ^ b // 结果: 6 (0110) + +// 按位取反运算 +let not_result = ~a // 结果: -6 + +// 左移运算 +let shl_result = a << 1 // 结果: 10 (1010) + +// 右移运算 +let shr_result = a >> 1 // 结果: 2 (0010) + +// 自增自减运算符 +let i = 0 +i++ // i = 1 +i-- // i = 0 +++i // i = 1 +--i // i = 0 + +// 复合赋值运算符 +let x = 10 +x += 5 // x = 15 +x -= 3 // x = 12 +x *= 2 // x = 24 +x /= 4 // x = 6 +x %= 4 // x = 2 + +// 位运算复合赋值运算符 +let y = 12 // 二进制: 1100 +y &= 10 // y = 8 (1000) +y |= 5 // y = 13 (1101) +y ^= 9 // y = 4 (0100) +y <<= 2 // y = 16 (10000) +y >>= 1 // y = 8 (1000) + +// 三元表达式示例 +let age = 20 +let canVote = age >= 18 ? "可以投票" : "不能投票" + +let score = 85 +let result = score >= 90 ? "优秀" : (score >= 60 ? "及格" : "不及格") + +// 打印结果 +print("按位与结果: " + and_result) +print("按位或结果: " + or_result) +print("按位异或结果: " + xor_result) +print("按位取反结果: " + not_result) +print("左移结果: " + shl_result) +print("右移结果: " + shr_result) +print("最终i的值: " + i) +print("最终x的值: " + x) +print("位运算复合赋值后y的值: " + y) +print("投票权: " + canVote) +print("考试结果: " + result) \ No newline at end of file diff --git a/demo/pi.vt b/demo/pi.vt new file mode 100644 index 0000000..831735d --- /dev/null +++ b/demo/pi.vt @@ -0,0 +1,26 @@ +func calculate_pi(terms: i32): f64 { + let sum: f64 = 0; + for (let i: i32 = 0; i < terms; i++) { + let term: f64 = 1.0 / (2 * i + 1); + if (i % 2 == 0) { + sum = sum + term; + } else { + sum = sum - term; + } + if (i % 100000 == 0) { + + print(term); + } + } + return sum * 4; // 确保乘以4得到正确π值 +} + +func main(): i32 { + let terms: i32 = 1000000; + let pi: f64 = calculate_pi(terms); + + let output: string = "Calculated π: " + pi.toString() + " (using " + terms.toString() + " terms)"; + print(output); + + return 0; +} \ No newline at end of file diff --git a/demo/test_example.vt b/demo/test_example.vt new file mode 100644 index 0000000..277ad22 --- /dev/null +++ b/demo/test_example.vt @@ -0,0 +1,42 @@ +// 测试程序 + +// 简单函数定义 +func add(i32: a, i32: b):i32 { + return a + b; +} + +// 冒泡排序 +func bubbleSort(arr: array, n: i32): void { + for(let i32:i = 0; i < n-1; i++) { + for (let j32:j = 0; j < n-i-1; j++) { + if (arr[j] > arr[j+1]) { + let temp = arr[j]; + arr[j] = arr[j+1]; + arr[j+1] = temp; + } + } + } +} +// 主函数 +func main():i32 { + let x:i32 = 10; + let y:i32 = 20; + let result = add(x, y); + + // 测试表达式 + let z:i32 = (x + y) * 2; + + // 测试条件语句 + if (z > 50) { + z = z - 10; + } else { + z = z + 10; + } + + println("Hello, World!"); + println("Result:", result); + + print(bubbleSort(arr, n)); + + return z; +} \ No newline at end of file diff --git a/demo/try-catch.vt b/demo/try-catch.vt new file mode 100644 index 0000000..f4b543e --- /dev/null +++ b/demo/try-catch.vt @@ -0,0 +1,11 @@ + +// try-catch结构示例 +try { + let result = 10 / 0; + println(result); + throw 9; +} catch (e) { + println("捕获到异常: " + e); +} + +try {} catch(e) {} \ No newline at end of file diff --git a/doc/00_guide.md b/doc/00_guide.md new file mode 100644 index 0000000..8f6684a --- /dev/null +++ b/doc/00_guide.md @@ -0,0 +1,132 @@ +# Vety语言入门指南 + +## 简介 + +Vety是一门现代化的编程语言,旨在提供简洁、高效且安全的编程体验。它结合了静态类型系统的安全性和动态语言的灵活性,使开发者能够快速构建可靠的应用程序。 + +## 设计理念 + +- **简洁性**:语法简单直观,减少不必要的复杂性 +- **类型安全**:强大的静态类型系统,在编译时捕获潜在错误 +- **性能优先**:高效的运行时性能,适合构建各类应用 +- **原生集成**:良好的原生函数支持,易于与现有系统集成 +- **模块化**:强大的模块系统,支持代码复用和组织 + +## 快速开始 + +### 1. 基本语法 + +```vety +// 变量声明 +let message: string = "Hello, Vety!" +let number: i32 = 42 + +// 函数定义 +func add(a: i32, b: i32): i32 { + return a + b +} + +// 条件语句 +if (number > 0) { + print("Positive number") +} else { + print("Non-positive number") +} + +// 循环 +for (i: i32 = 0; i < 5; i++) { + print(i) +} +``` + +### 2. 类型系统 + +Vety提供了丰富的内置类型: + +- **基本类型**: + - `i32`:32位整数 + - `i64`:64位整数 + - `f64`:64位浮点数 + - `bool`:布尔值 + - `string`:字符串 + - `void`:无返回值 + +- **复合类型**: + - `array`:数组 + - `map`:映射 + - 自定义结构体 + +### 3. 错误处理 + +Vety使用try-catch机制处理错误: + +```vety +try { + let result = some_risky_operation() +} catch(e) { + print("操作失败") +} +``` + +### 4. 模块系统 + +```vety +// 导入标准库模块 +import io +import math + +// 使用模块功能 +let random_number = math.random(1, 100) +io.print("随机数:" + random_number) +``` + +### 5. 内置函数 + +Vety提供了一系列实用的内置函数: + +- `print()`:输出信息 +- `read_line()`:读取用户输入 +- `len()`:获取集合长度 +- `type_of()`:获取值的类型 + +## 最佳实践 + +1. **命名规范** + - 变量和函数使用小写字母和下划线 + - 类型名使用大驼峰命名法 + +2. **代码组织** + - 相关功能放在同一模块中 + - 适当使用注释说明代码功能 + +3. **错误处理** + - 合理使用try-catch处理异常 + - 提供有意义的错误信息 + +4. **性能优化** + - 避免不必要的内存分配 + - 合理使用循环和递归 + +## 下一步 + +- 阅读更详细的[语法指南](01_basic_syntax.md) +- 了解[控制流](02_control_flow.md) +- 学习[函数和模块](03_functions_and_modules.md) +- 探索[复合类型](04_composite_types.md) + +## 示例项目 + +查看`demo`目录中的示例代码,了解更多Vety语言的实际应用: + +- `hello.vt`:基本语法示例 +- `array.vt`:数组操作示例 +- `function.vt`:函数使用示例 +- `try-catch.vt`:错误处理示例 + +## 贡献 + +Vety是一个开源项目,我们欢迎社区贡献。如果你发现了bug或有改进建议,请提交issue或pull request。 + +## 许可证 + +Vety使用MIT许可证,详细信息请查看LICENSE文件。 \ No newline at end of file diff --git a/doc/01_basic_syntax.md b/doc/01_basic_syntax.md new file mode 100644 index 0000000..b618b31 --- /dev/null +++ b/doc/01_basic_syntax.md @@ -0,0 +1,137 @@ +# Vety语言基础语法 + +## 1. 变量声明 + +Vety语言使用`let`关键字声明变量,使用`const`关键字声明常量。 + +```vety +// 变量声明 +let name: string = "Vety" +let age: i32 = 1 + +// 常量声明 +const PI: f64 = 3.14159 +``` + +## 2. 基本数据类型 + +### 2.1 整数类型 + +Vety提供了有符号和无符号整数类型: + +- 有符号整数: + - `i8`: 8位有符号整数 (-128 到 127) + - `i16`: 16位有符号整数 (-32,768 到 32,767) + - `i32`: 32位有符号整数 (-2,147,483,648 到 2,147,483,647) + - `i64`: 64位有符号整数 + +- 无符号整数: + - `u8`: 8位无符号整数 (0 到 255) + - `u16`: 16位无符号整数 (0 到 65,535) + - `u32`: 32位无符号整数 (0 到 4,294,967,295) + - `u64`: 64位无符号整数 + +```vety +let age: i32 = 25 +let distance: u64 = 1000000 +``` + +### 2.2 浮点数类型 + +- `f32`: 32位浮点数 +- `f64`: 64位浮点数 + +```vety +let pi: f32 = 3.14159 +let e: f64 = 2.71828 +``` + +### 2.3 布尔类型 + +`bool`类型表示布尔值,可以是`true`或`false`。 + +```vety +let is_valid: bool = true +let has_error: bool = false +``` + +### 2.4 字符串类型 + +`string`类型用于表示文本数据。字符串使用双引号(")包围。 + +```vety +let message: string = "Hello, Vety!" +let name: string = "John" +``` + +字符串支持转义字符: +- `\n`: 换行 +- `\t`: 制表符 +- `\r`: 回车 +- `\"`: 双引号 +- `\\`: 反斜杠 + +### 2.5 void类型 + +`void`类型表示没有返回值的函数的返回类型。 + +```vety +func print_message(): void { + // 函数体 +} +``` + +## 3. 类型推断 + +Vety支持类型推断,当变量的类型可以从初始值推断出来时,可以省略类型注解: + +```vety +let name = "Vety" // 推断为string类型 +let age = 25 // 推断为i32类型 +let is_valid = true // 推断为bool类型 +``` + +## 4. 基本运算符 + +### 4.1 算术运算符 + +- `+`: 加法 +- `-`: 减法 +- `*`: 乘法 +- `/`: 除法 +- `%`: 取模 + +### 4.2 比较运算符 + +- `==`: 等于 +- `!=`: 不等于 +- `<`: 小于 +- `>`: 大于 +- `<=`: 小于等于 +- `>=`: 大于等于 + +### 4.3 逻辑运算符 + +- `&&`: 逻辑与 +- `||`: 逻辑或 +- `!`: 逻辑非 + +### 4.4 位运算符 + +- `&`: 按位与 +- `|`: 按位或 +- `^`: 按位异或 +- `~`: 按位取反 + +## 5. 注释 + +Vety支持两种注释方式: + +```vety +// 单行注释 + +/* +多行注释 +可以跨越多行 +*/ +``` \ No newline at end of file diff --git a/doc/02_control_flow.md b/doc/02_control_flow.md new file mode 100644 index 0000000..e455d6c --- /dev/null +++ b/doc/02_control_flow.md @@ -0,0 +1,131 @@ +# Vety语言控制流程 + +## 1. 条件语句 + +### 1.1 if-else语句 + +Vety使用`if`和`else`关键字进行条件控制: + +```vety +let score: i32 = 85 + +if (score >= 90) { + print("优秀") +} else if (score >= 80) { + print("良好") +} else if (score >= 60) { + print("及格") +} else { + print("不及格") +} +``` + +条件表达式必须是布尔类型,Vety不会进行隐式的真值转换。 + +### 1.2 条件表达式的组合 + +可以使用逻辑运算符组合多个条件: + +```vety +let age: i32 = 25 +let has_ticket: bool = true + +if (age >= 18 && has_ticket) { + print("允许入场") +} else { + print("不允许入场") +} +``` + +## 2. 循环语句 + +### 2.1 while循环 + +`while`循环在条件为真时重复执行代码块: + +```vety +let count: i32 = 0 + +while (count < 5) { + print(count) + count = count + 1 +} +``` + +### 2.2 for循环 + +Vety的`for`循环支持遍历数组和范围: + +```vety +// 遍历数组 +let numbers: array = [1, 2, 3, 4, 5] + +for (let i: i32 = 0; i < numbers.len; i++) { + print(numbers[i]) +} +``` + +## 3. 循环控制 + +### 3.1 break语句 + +使用`break`语句可以提前退出循环: + +```vety +let i: i32 = 0 +while (true) { + if (i >= 5) { + break + } + print(i) + i = i + 1 +} +``` + +### 3.2 continue语句 + +使用`continue`语句可以跳过当前循环的剩余部分,直接进入下一次循环: + +```vety +for (let i: i32 = 0; i < 10; i++) { + if (i % 2 == 0) { + continue // 跳过偶数 + } + print(i) // 只打印奇数 +} +``` + +## 4. 错误处理 + +### 4.1 try-catch语句 + +Vety使用`try`和`catch`进行错误处理: + +```vety +try { + // 可能产生错误的代码 + let result = dangerous_operation() +} catch(e) { + // 错误处理代码 + print("操作失败") +} +``` + +### 4.2 错误传播 + +函数可以使用返回值来传播错误: + +```vety +func divide(a: i32, b: i32): i32 { + if b == 0 { + throw "除数不能为零" + } + return a / b +} + +try { + let result = divide(10, 0) +} catch(e) { + print("除法运算失败") +} +``` \ No newline at end of file diff --git a/doc/03_functions_and_modules.md b/doc/03_functions_and_modules.md new file mode 100644 index 0000000..7982f11 --- /dev/null +++ b/doc/03_functions_and_modules.md @@ -0,0 +1,142 @@ +# Vety语言函数和模块 + +## 1. 函数定义 + +### 1.1 基本函数定义 + +使用`func`关键字定义函数: + +```vety +func add(a: i32, b: i32): i32 { + return a + b +} + +// 无返回值的函数 +func print_message(msg: string): void { + print(msg) +} +``` + +### 1.2 函数参数 + +函数参数必须指定类型: + +```vety +func greet(name: string, age: i32): string { + return "Hello, " + name + "! You are " + age + " years old." +} +``` + +### 1.3 返回值 + +- 函数必须指定返回值类型 +- 使用`return`语句返回值 +- 如果函数不需要返回值,使用`void`类型 + +```vety +func calculate_area(width: f64, height: f64): f64 { + return width * height +} +``` + +## 2. 原生函数 + +使用`native`关键字声明原生函数,这些函数由底层实现: + +```vety +native func print(message: string): void +native func read_line(): string +``` + +## 3. 模块系统 + +### 3.1 导入模块 + +使用`import`关键字导入其他模块: + +```vety +import io // 导入标准库的io模块 +import math // 导入数学模块 +``` + +### 3.2 模块别名 + +可以使用`as`关键字为导入的模块指定别名: + +```vety +import module_name as alias_name +``` + +## 4. 函数调用 + +### 4.1 基本调用 + +```vety +let result = add(5, 3) +print("Hello, World!") +``` + +### 4.2 模块函数调用 + +```vety +let current_time = time.now() // 调用模块中的函数 +let random_number = math.random(1, 100) +``` + +## 5. 注解 + +使用`@`符号添加函数注解: + +```vety +@deprecated +func old_function(): void { + // 已废弃的函数 +} + +@test +func test_feature(): void { + // 测试函数 +} +``` + +## 6. 错误处理 + +函数可以抛出错误,调用者需要处理这些错误: + +```vety +func divide(a: f64, b: f64): f64 { + if (b == 0) { + throw "Division by zero" + } + return a / b +} + +try { + let result = divide(10.0, 0.0) +} catch(e) { + print("除法运算失败") +} +``` + +## 7. 最佳实践 + +- 函数名使用小写字母和下划线 +- 函数应该只做一件事情 +- 参数数量不宜过多 +- 适当添加注释说明函数功能 +- 处理所有可能的错误情况 + +```vety +// 好的函数示例 +func calculate_average(numbers: array<): f64 { + if (numbers.length == 0) { + throw "Empty array" + } + + let sum: f64 = 0.0 + for num in numbers { + sum = sum + num + } + return sum / numbers.length +} +``` \ No newline at end of file diff --git a/doc/04_composite_types.md b/doc/04_composite_types.md new file mode 100644 index 0000000..e77f1fa --- /dev/null +++ b/doc/04_composite_types.md @@ -0,0 +1,180 @@ +# Vety语言复合数据类型 + +## 1. 数组(Array) + +### 1.1 数组声明 + +数组是相同类型元素的有序集合,使用方括号`[]`表示: + +```vety +// 显式类型声明 +let numbers: array = [1, 2, 3, 4, 5] + +// 空数组 +let empty_array: array = [] + +// 类型推断 +let fruits = ["apple", "banana", "orange"] +``` + +### 1.2 数组访问 + +使用索引访问数组元素(索引从0开始): + +```vety +let numbers = [10, 20, 30, 40, 50] + +// 访问元素 +let first = numbers[0] // 10 +let second = numbers[1] // 20 + +// 修改元素 +numbers[2] = 35 // [10, 20, 35, 40, 50] +``` + +### 1.3 数组操作 + +```vety +// 获取数组长度 +let length = numbers.length + +// 遍历数组 +for num in numbers { + print(num) +} + +// 使用索引遍历 +for (let i = 0; i < numbers.length; i++) { + print(numbers[i]) +} +``` + +## 2. 映射(Map) + +### 2.1 映射声明 + +映射是键值对的集合,键和值可以是不同类型: + +```vety +// 显式类型声明 +let scores: map = { + "Alice": 95, + "Bob": 87, + "Charlie": 92 +} + +// 空映射 +let empty_map: map = {} + +// 类型推断 +let config = { + "debug": true, + "port": 8080, + "host": "localhost" +} +``` + +### 2.2 映射访问 + +使用键访问映射中的值: + +```vety +let scores = {"Alice": 95, "Bob": 87} + +// 访问值 +let alice_score = scores["Alice"] // 95 + +// 修改值 +scores["Bob"] = 90 // {"Alice": 95, "Bob": 90} + +// 添加新键值对 +scores["Charlie"] = 88 // {"Alice": 95, "Bob": 90, "Charlie": 88} +``` + +### 2.3 映射操作 + +```vety +// 检查键是否存在 +if "Alice" in scores { + print("Found Alice's score") +} + +// 遍历映射 +for key in scores { + let value = scores[key] + print(key + ": " + value) +} +``` + +## 3. 复合类型嵌套 + +数组和映射可以相互嵌套,创建更复杂的数据结构: + +### 3.1 数组的数组 + +```vety +// 二维数组 +let matrix: array> = [ + [1, 2, 3], + [4, 5, 6], + [7, 8, 9] +] + +// 访问嵌套元素 +let value = matrix[1][2] // 6 +``` + +### 3.2 映射的数组 + +```vety +// 用户列表 +let users: array> = [ + {"name": "Alice", "email": "alice@example.com"}, + {"name": "Bob", "email": "bob@example.com"} +] + +// 访问嵌套数据 +let first_user_email = users[0]["email"] +``` + +### 3.3 映射的映射 + +```vety +// 嵌套配置 +let config: map> = { + "database": { + "host": "localhost", + "port": "5432", + "name": "mydb" + }, + "server": { + "host": "0.0.0.0", + "port": "8080" + } +} + +// 访问嵌套值 +let db_host = config["database"]["host"] +``` + +## 4. 最佳实践 + +- 选择合适的数据结构 +- 注意类型一致性 +- 处理边界情况 +- 合理使用嵌套 + +```vety +// 好的实践示例 +func process_scores(scores: map): f64 { + if scores.length == 0 { + return 0.0 + } + + let total: i32 = 0 + for name in scores { + total = total + scores[name] + } + return total / scores.length +} +``` \ No newline at end of file diff --git a/lib/io.vt b/lib/io.vt new file mode 100644 index 0000000..a32d8fb --- /dev/null +++ b/lib/io.vt @@ -0,0 +1 @@ +native func print(msg: string); \ No newline at end of file diff --git a/parser/CMakeLists.txt b/parser/CMakeLists.txt new file mode 100644 index 0000000..20008ef --- /dev/null +++ b/parser/CMakeLists.txt @@ -0,0 +1,24 @@ +# 添加解析器库(动态库) +add_library(vety_parser + lexer.c + lexer.h + ast.c + ast.h + parser.c + parser.h + error.c + error.h + ast_printer.c + ast_printer.h +) + +# 设置包含路径 +target_include_directories(vety_parser PUBLIC + ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/utils +) + +# 添加链接库 +target_link_libraries(vety_parser + vety_utils +) \ No newline at end of file diff --git a/parser/ast.c b/parser/ast.c new file mode 100644 index 0000000..6d1bfe5 --- /dev/null +++ b/parser/ast.c @@ -0,0 +1,40 @@ +// +// Created by Natuie on 2025/3/22. +// + +#include +#include +#include "ast.h" + +// 创建一个节点 +ASTNode *create_node(NodeType type) { + ASTNode *node = malloc(sizeof(ASTNode)); + node->type = type; + node->value = NULL; + node->line = 0; + node->column = 0; + node->children = NULL; + node->children_count = 0; + return node; +} + +// 添加子节点 +ASTNode add_child(ASTNode *parent, ASTNode *child) { + parent->children = realloc(parent->children, (parent->children_count + 1) * sizeof(ASTNode *)); + if (parent->children == NULL) { + fprintf(stderr, "Memory allocation error\n"); + exit(1); + } + parent->children[parent->children_count] = child; + parent->children_count++; + return *parent; +} + +void set_node_position(ASTNode *node, int line, int column) { + node->line = line; + node->column = column; +} + +void set_node_value(ASTNode *node, char *value) { + node->value = value; +} \ No newline at end of file diff --git a/parser/ast.h b/parser/ast.h new file mode 100644 index 0000000..38d83f8 --- /dev/null +++ b/parser/ast.h @@ -0,0 +1,146 @@ +// +// Created by Natuie on 2025/3/22. +// + +#ifndef VETY_AST_H +#define VETY_AST_H + +// 节点类型枚举 +typedef enum { + // 程序结构 + NODE_PROGRAM, // 程序根节点 + NODE_BLOCK, // 代码块 + NODE_EMPTY_BLOCK, // 空代码块 + + // 声明 + NODE_VAR_DECL, // 变量声明 + NODE_VAR_DECL_LIST, // 多变量声明列表 + NODE_FUNC_DECL, // 函数声明 + NODE_PARAM_LIST, // 参数列表 + NODE_PARAM, // 单个参数 + + // 语句 + NODE_IF_STMT, // if语句 + NODE_ELSE_IF, // else if分支 + NODE_ELSE_BLOCK, // else分支 + NODE_WHILE_STMT, // while循环语句 + NODE_FOR_STMT, // for循环语句 + NODE_FOR_INIT, // for循环初始化部分 + NODE_FOR_CONDITION, // for循环条件部分 + NODE_FOR_UPDATE, // for循环更新部分 + NODE_BREAK_STMT, // break语句 + NODE_CONTINUE_STMT, // continue语句 + NODE_RETURN_STMT, // return语句 + NODE_EXPR_STMT, // 表达式语句 + NODE_IMPORT_STMT, // import语句 + NODE_IMPORT_PATH, // import路径 + NODE_IMPORT_ALIAS, // import别名 + NODE_REQUEST_STMT, // 请求语句 + + // 表达式 + NODE_BINARY_EXPR, // 二元表达式 + NODE_UNARY_EXPR, // 一元表达式 + NODE_POSTFIX_EXPR, // 后缀表达式 + NODE_CALL_EXPR, // 函数调用 + NODE_MEMBER_EXPR, // 成员访问表达式 + NODE_INDEX_EXPR, // 索引表达式 + NODE_TERNARY_EXPR, // 三元表达式 + + // 标识符和字面量 + NODE_IDENTIFIER, // 标识符 + NODE_INT_LITERAL, // 整数字面量 + NODE_FLOAT_LITERAL, // 浮点数字面量 + NODE_STRING_LITERAL, // 字符串字面量 + NODE_BOOL_LITERAL, // 布尔字面量 + + // 复合类型 + NODE_ARRAY_TYPE, // 数组类型 + NODE_ARRAY_LITERAL, // 数组字面量 + NODE_ARRAY_ITEM, // 数组元素 + NODE_MAP_LITERAL, // map字面量 + NODE_MAP_ENTRY, // map条目 + + // 类型系统 + NODE_TYPE_IDENTIFIER, // 类型标识符 + NODE_TYPE_GENERIC, // 泛型类型 (如Array) + NODE_GENERIC_PARAMS, // 泛型参数 + NODE_TYPE_UNION, // 联合类型 + NODE_CAST_EXPR, // 类型转换 + + // 注解系统 + NODE_ANNOTATION, // 注解 + NODE_ANNOTATIONS, // 注解列表 + NODE_POS_ARG, // 位置参数 + NODE_NAMED_ARG, // 命名参数 + + // 错误处理 + NODE_TRY_STMT, // try语句 + NODE_CATCH_BLOCK, // catch块 + NODE_THROW_STMT, // throw语句 + NODE_ERROR_NODE, // 错误节点 +} NodeType; + +// 操作符类型枚举 +typedef enum { + OP_UNKNOWN, + // 算术运算符 + OP_ADD, // + + OP_SUB, // - + OP_MUL, // * + OP_DIV, // / + OP_MOD, // % + + // 逻辑运算符 + OP_AND, // and + OP_OR, // or + OP_NOT, // not + + // 比较运算符 + OP_EQ, // == + OP_NE, // != + OP_LT, // < + OP_GT, // > + OP_LE, // <= + OP_GE, // >= + + // 位运算符 + OP_BITAND, // & + OP_BITOR, // | + OP_BITXOR, // ^ + OP_BITNOT, // ~ + OP_SHL, // << + OP_SHR, // >> + + // 赋值运算符 + OP_ASSIGN, // = + OP_PLUS_ASSIGN, // += + OP_MINUS_ASSIGN, // -= + OP_MUL_ASSIGN, // *= + OP_DIV_ASSIGN, // /= + OP_MOD_ASSIGN, // %= + OP_BITAND_ASSIGN, // &= + OP_BITOR_ASSIGN, // |= + OP_BITXOR_ASSIGN, // ^= + OP_SHL_ASSIGN, // <<= + OP_SHR_ASSIGN, // >>= + + // 三元运算符 + OP_TERNARY, // ? : +} OperatorType; + +typedef struct ASTNode{ + int line; // 行号 + int column; // 列号 + int children_count; // 子节点数量 + char *value; // 值 + NodeType type; + OperatorType op_type; // 操作符类型 + struct ASTNode **children; +} ASTNode; + +ASTNode *create_node(NodeType type); + +ASTNode add_child(ASTNode *parent, ASTNode *child); + +void set_node_position(ASTNode *node, int line, int column); +#endif //VETY_AST_H diff --git a/parser/ast_printer.c b/parser/ast_printer.c new file mode 100644 index 0000000..fc30279 --- /dev/null +++ b/parser/ast_printer.c @@ -0,0 +1,94 @@ +#include "ast_printer.h" +#include "parser.h" +#include +#include + +static void print_indent(int level, int is_last) { + for(int i=0; itype) { + case NODE_PROGRAM: print_node("PROGRAM", node->value, level, is_last); break; + case NODE_VAR_DECL: print_node("VAR_DECL", node->value, level, is_last); break; + case NODE_FUNC_DECL: print_node("FUNC_DECL", node->value, level, is_last); break; + case NODE_PARAM: print_node("PARAM", node->value, level, is_last); break; + case NODE_BLOCK: print_node("BLOCK", node->value, level, is_last); break; + case NODE_IF_STMT: print_node("IF_STMT", node->value, level, is_last); break; + case NODE_WHILE_STMT: print_node("WHILE_STMT", node->value, level, is_last); break; + case NODE_FOR_STMT: print_node("FOR_STMT", node->value, level, is_last); break; + case NODE_BREAK_STMT: print_node("BREAK_STMT", node->value, level, is_last); break; + case NODE_CONTINUE_STMT: print_node("CONTINUE_STMT", node->value, level, is_last); break; + case NODE_RETURN_STMT: print_node("RETURN_STMT", node->value, level, is_last); break; + case NODE_EXPR_STMT: print_node("EXPR_STMT", node->value, level, is_last); break; + case NODE_IMPORT_STMT: print_node("IMPORT_STMT", node->value, level, is_last); break; + case NODE_REQUEST_STMT: print_node("REQUEST_STMT", node->value, level, is_last); break; + case NODE_BINARY_EXPR: print_node("BINARY_EXPR", node->value, level, is_last); break; + case NODE_UNARY_EXPR: print_node("UNARY_EXPR", node->value, level, is_last); break; + case NODE_POSTFIX_EXPR: print_node("POSTFIX_EXPR", node->value, level, is_last); break; + case NODE_CALL_EXPR: print_node("CALL_EXPR", node->value, level, is_last); break; + case NODE_MEMBER_EXPR: print_node("MEMBER_EXPR", node->value, level, is_last); break; + case NODE_IDENTIFIER: print_node("IDENTIFIER", node->value, level, is_last); break; + case NODE_INT_LITERAL: print_node("INT_LITERAL", node->value, level, is_last); break; + case NODE_FLOAT_LITERAL: print_node("FLOAT_LITERAL", node->value, level, is_last); break; + case NODE_STRING_LITERAL: print_node("STRING_LITERAL", node->value, level, is_last); break; + case NODE_BOOL_LITERAL: print_node("BOOL_LITERAL", node->value, level, is_last); break; + case NODE_ARRAY_LITERAL: print_node("ARRAY_LITERAL", node->value, level, is_last); break; + case NODE_MAP_LITERAL: print_node("MAP_LITERAL", node->value, level, is_last); break; + case NODE_INDEX_EXPR: print_node("INDEX_EXPR", node->value, level, is_last); break; + case NODE_ANNOTATION: print_node("ANNOTATION", node->value, level, is_last); break; + case NODE_ANNOTATIONS: print_node("ANNOTATIONS", node->value, level, is_last); break; + case NODE_NAMED_ARG: print_node("NAMED_ARG", node->value, level, is_last); break; + case NODE_TRY_STMT: print_node("TRY_STMT", node->value, level, is_last); break; + case NODE_CATCH_BLOCK: print_node("CATCH_BLOCK", node->value, level, is_last); break; + case NODE_MAP_ENTRY: print_node("MAP_ENTRY", node->value, level, is_last); break; + case NODE_THROW_STMT: print_node("THROW_STMT", node->value, level, is_last); break; + case NODE_TYPE_IDENTIFIER: print_node("TYPE_IDENTIFIER", node->value, level, is_last); break; + case NODE_ELSE_IF: print_node("ELSE_IF", node->value, level, is_last); break; + case NODE_ELSE_BLOCK: print_node("ELSE_BLOCK", node->value, level, is_last); break; + case NODE_FOR_INIT: print_node("FOR_INIT", node->value, level, is_last); break; + case NODE_FOR_CONDITION: print_node("FOR_CONDITION", node->value, level, is_last); break; + case NODE_FOR_UPDATE: print_node("FOR_UPDATE", node->value, level, is_last); break; + case NODE_VAR_DECL_LIST: print_node("VAR_DECL_LIST", node->value, level, is_last); break; + case NODE_PARAM_LIST: print_node("PARAM_LIST", node->value, level, is_last); break; + case NODE_ARRAY_ITEM: print_node("ARRAY_ITEM", node->value, level, is_last); break; + case NODE_POS_ARG: print_node("POS_ARG", node->value, level, is_last); break; + case NODE_EMPTY_BLOCK: print_node("EMPTY_BLOCK", node->value, level, is_last); break; + case NODE_ERROR_NODE: print_node("ERROR_NODE", node->value, level, is_last); break; + case NODE_TYPE_GENERIC: print_node("TYPE_GENERIC", node->value, level, is_last); break; + case NODE_TYPE_UNION: print_node("TYPE_UNION", node->value, level, is_last); break; + case NODE_IMPORT_PATH: print_node("IMPORT_PATH", node->value, level, is_last); break; + case NODE_IMPORT_ALIAS: print_node("IMPORT_ALIAS", node->value, level, is_last); break; + case NODE_CAST_EXPR: print_node("CAST_EXPR", node->value, level, is_last); break; + case NODE_GENERIC_PARAMS: print_node("GENERIC_PARAMS", node->value, level, is_last); break; + case NODE_TERNARY_EXPR: print_node("TERNARY_EXPR", node->value, level, is_last); break; + case NODE_ARRAY_TYPE: print_node("ARRAY_TYPE", node->value, level, is_last); break; + default: + print_node("UNKNOWN_NODE", node->value, level, is_last); + break; + } + + printf(COLOR_COMMENT " (%d:%d)" COLOR_RESET "\n", node->line, node->column); + + for(int i = 0; i < node->children_count; i++) { + print_ast_recursive(node->children[i], level + 1, i == node->children_count - 1); + } +} + +void ast_pretty_print(ASTNode *node) { + printf(COLOR_KEY "AST Tree:\n" COLOR_RESET); + print_ast_recursive(node, 0, 1); +} \ No newline at end of file diff --git a/parser/ast_printer.h b/parser/ast_printer.h new file mode 100644 index 0000000..256b307 --- /dev/null +++ b/parser/ast_printer.h @@ -0,0 +1,24 @@ +#ifndef AST_PRINTER_H +#define AST_PRINTER_H + +#include "ast.h" + +// Unicode符号定义 +#define VERTICAL_LINE "│" // │ +#define HORIZONTAL_LINE "─" // ─ +#define BRANCH_CORNER "└" // └ +#define NODE_CORNER "├" // ├ + +// ANSI颜色代码 +#define COLOR_RESET "\033[0m" +#define COLOR_KEY "\033[94m" // 蓝色 +#define COLOR_TYPE "\033[93m" // 黄色 +#define COLOR_LITERAL "\033[92m" // 绿色 +#define COLOR_COMMENT "\033[90m" // 灰色 +#define COLOR_CYAN "\033[36m" +#define COLOR_ERROR "\033[91m" // 红色 + +// 打印入口函数 +void ast_pretty_print(ASTNode *node); + +#endif // AST_PRINTER_H \ No newline at end of file diff --git a/parser/error.c b/parser/error.c new file mode 100644 index 0000000..00872a5 --- /dev/null +++ b/parser/error.c @@ -0,0 +1,97 @@ +// +// Created for error reporting +// + +#include +#include +#include +#include "parser.h" +#include "../utils/file.h" + +// ANSI 转义序列颜色代码 +#define ANSI_COLOR_RED "\033[31m" +#define ANSI_COLOR_YELLOW "\033[33m" +#define ANSI_COLOR_CYAN "\033[36m" +#define ANSI_COLOR_RESET "\033[0m" + +// 获取指定行的代码 +static char* get_line_at(const char* source, int line_number) { + if (source == NULL || line_number <= 0) return NULL; + + int current_line = 1; + const char* line_start = source; + const char* p = source; + + // 找到指定行的起始位置 + while (*p != '\0') { + if (current_line == line_number) { + line_start = p; + break; + } + if (*p == '\n') { + current_line++; + } + p++; + } + + // 如果没找到指定行,返回NULL + if (current_line != line_number) return NULL; + + // 找到行的结束位置 + p = line_start; + while (*p != '\0' && *p != '\n') p++; + + // 复制行内容 + int length = p - line_start; + char* line = (char*)malloc(length + 1); + if (line == NULL) return NULL; + + strncpy(line, line_start, length); + line[length] = '\0'; + + return line; +} + +// 在指定位置打印错误指示箭头 +static void print_error_indicator(int column) { + // 打印错误指示箭头 + fprintf(stderr, " | " ANSI_COLOR_RED); + for (int i = 1; i < column; i++) { + fprintf(stderr, "~"); + } + // 打印错误指示箭头 + fprintf(stderr, "^" ANSI_COLOR_RESET "\n"); +} + +// 在当前token位置报告错误 +void parser_error_at_current(Parser* parser, const char* message) { + parser_error_at(parser, &parser->current_token, message); + exit(0); +} + +void parser_error_at(Parser* parser, Token* token, const char* message) { + if (parser->had_error) return; + // 添加错误到errors数组 + parser->error_count++; + parser->errors = realloc(parser->errors, sizeof(*parser->errors) * parser->error_count); + parser->errors[parser->error_count - 1].message = strdup(message); + parser->errors[parser->error_count - 1].line = token->line; + parser->errors[parser->error_count - 1].column = token->column; + + // 打印错误位置和消息 + fprintf(stderr, ANSI_COLOR_CYAN "%s" ANSI_COLOR_RESET ":" + ANSI_COLOR_YELLOW "%d:%d" ANSI_COLOR_RESET ": " + ANSI_COLOR_RED "Syntax error: %s" ANSI_COLOR_RESET "\n", + parser->filename, token->line, token->column, message); + + char* source = read_file(parser->filename); + if (source != NULL) { + char* line = get_line_at(source, token->line); + if (line != NULL) { + fprintf(stderr, "%4d | %s\n", token->line, line); + print_error_indicator(token->column); + free(line); + } + free(source); + } +} \ No newline at end of file diff --git a/parser/error.h b/parser/error.h new file mode 100644 index 0000000..bae8d3a --- /dev/null +++ b/parser/error.h @@ -0,0 +1,15 @@ +#ifndef VETY_ERROR_H +#define VETY_ERROR_H + +#include "parser.h" + +// 在当前token位置报告错误 +void parser_error_at_current(Parser* parser, const char* message); + +// 在上一个token位置报告错误 +void parser_error(Parser* parser, const char* message); + +// 在指定token位置报告错误 +void parser_error_at(Parser* parser, Token* token, const char* message); + +#endif // VETY_ERROR_H \ No newline at end of file diff --git a/parser/lexer.c b/parser/lexer.c new file mode 100644 index 0000000..b821d62 --- /dev/null +++ b/parser/lexer.c @@ -0,0 +1,556 @@ +// +// Created by Natuie on 2025/3/22. +// + +#include +#include +#include +#include +#include "lexer.h" + +void lexer_init(Lexer *lexer, char *source) { + lexer->source = source; + lexer->current_pos = 0; + lexer->line = 1; + lexer->column = 1; +} + +void lexer_free(Lexer *lexer) { + free(lexer->source); +} + +// 跳过空白字符 +void skip_whitespace_and_comments(Lexer *lexer) { + while (1) { + char c = lexer->source[lexer->current_pos]; + if (c == ' ' || c == '\t' || c == '\r') { + lexer->current_pos++; + lexer->column++; + } else if (c == '\n') { + lexer->current_pos++; + lexer->line++; + lexer->column = 1; + } else if (c == '/' && lexer->source[lexer->current_pos+1] == '/') { + lexer->current_pos += 2; // 跳过"//" + lexer->column += 2; + while (lexer->source[lexer->current_pos] != '\n' && + lexer->source[lexer->current_pos] != '\0') { + lexer->current_pos++; + lexer->column++; + } + // 处理换行符 + if (lexer->source[lexer->current_pos] == '\n') { + lexer->current_pos++; + lexer->line++; + lexer->column = 1; + } else { + // 文件末尾 + break; + } + } else if (c == '/' && lexer->source[lexer->current_pos+1] == '*') { + lexer->current_pos += 2; // 跳过"/*" + lexer->column += 2; + int in_comment = 1; + while (in_comment && lexer->source[lexer->current_pos] != '\0') { + c = lexer->source[lexer->current_pos]; + if (c == '\n') { + lexer->line++; + lexer->column = 1; + } else if (c == '*' && lexer->source[lexer->current_pos+1] == '/') { + // 结束注释 + lexer->current_pos += 2; + lexer->column += 2; + in_comment = 0; + } else { + lexer->column++; + } + lexer->current_pos++; + } + } else { + break; + } + } +} + +// 读取标识符 +Token read_identifier(Lexer *lexer) { + Token token; + int pos = 0; + token.type = TOKEN_IDENTIFIER; + token.line = lexer->line; + token.column = lexer->column; + + while (isalnum(lexer->source[lexer->current_pos]) || + (lexer->source[lexer->current_pos] == '_')) { + if (pos < 255) { + token.value[pos++] = lexer->source[lexer->current_pos]; + } + lexer->current_pos++; + lexer->column++; + } + token.value[pos] = '\0'; + + // 关键字识别 + if (strcmp(token.value, "func") == 0) token.type = TOKEN_FUNC; + else if (strcmp(token.value, "let") == 0) token.type = TOKEN_LET; + else if (strcmp(token.value, "const") == 0) token.type = TOKEN_CONST; + else if (strcmp(token.value, "i8") == 0) token.type = TOKEN_TYPE_I8; + else if (strcmp(token.value, "i16") == 0) token.type = TOKEN_TYPE_I16; + else if (strcmp(token.value, "i32") == 0) token.type = TOKEN_TYPE_I32; + else if (strcmp(token.value, "i64") == 0) token.type = TOKEN_TYPE_I64; + else if (strcmp(token.value, "u8") == 0) token.type = TOKEN_TYPE_U8; + else if (strcmp(token.value, "u16") == 0) token.type = TOKEN_TYPE_U16; + else if (strcmp(token.value, "u32") == 0) token.type = TOKEN_TYPE_U32; + else if (strcmp(token.value, "u64") == 0) token.type = TOKEN_TYPE_U64; + else if (strcmp(token.value, "f32") == 0) token.type = TOKEN_TYPE_F32; + else if (strcmp(token.value, "f64") == 0) token.type = TOKEN_TYPE_F64; + else if (strcmp(token.value, "void") == 0) token.type = TOKEN_TYPE_VOID; + else if (strcmp(token.value, "any") == 0) token.type = TOKEN_TYPE_ANY; + else if (strcmp(token.value, "int") == 0) token.type = TOKEN_INT; + else if (strcmp(token.value, "float") == 0) token.type = TOKEN_FLOAT; + else if (strcmp(token.value, "string") == 0) token.type = TOKEN_STRING; + else if (strcmp(token.value, "bool") == 0) token.type = TOKEN_BOOL; + else if (strcmp(token.value, "array") == 0) token.type = TOKEN_ARRAY; + else if (strcmp(token.value, "map") == 0) token.type = TOKEN_MAP; + else if (strcmp(token.value, "true") == 0) token.type = TOKEN_TRUE; + else if (strcmp(token.value, "false") == 0) token.type = TOKEN_FALSE; + else if (strcmp(token.value, "if") == 0) token.type = TOKEN_IF; + else if (strcmp(token.value, "else") == 0) token.type = TOKEN_ELSE; + else if (strcmp(token.value, "while") == 0) token.type = TOKEN_WHILE; + else if (strcmp(token.value, "for") == 0) token.type = TOKEN_FOR; + else if (strcmp(token.value, "break") == 0) token.type = TOKEN_BREAK; + else if (strcmp(token.value, "continue") == 0) token.type = TOKEN_CONTINUE; + else if (strcmp(token.value, "return") == 0) token.type = TOKEN_RETURN; + else if (strcmp(token.value, "import") == 0) token.type = TOKEN_IMPORT; + else if (strcmp(token.value, "as") == 0) token.type = TOKEN_AS; + else if (strcmp(token.value, "and") == 0) token.type = TOKEN_AND; + else if (strcmp(token.value, "or") == 0) token.type = TOKEN_OR; + else if (strcmp(token.value, "in") == 0) token.type = TOKEN_IN; + else if (strcmp(token.value, "native") == 0) token.type = TOKEN_NATIVE; + else if (strcmp(token.value, "try") == 0) token.type = TOKEN_TRY; + else if (strcmp(token.value, "catch") == 0) token.type = TOKEN_CATCH; + else if (strcmp(token.value, "throw") == 0) token.type = TOKEN_THROW; + else token.type = TOKEN_IDENTIFIER; + return token; +} + +// 读取数字 +Token read_number(Lexer *lexer) { + Token token; + int pos = 0; + int has_dot = 0; + int has_exp = 0; + int base = 10; // 默认十进制 + token.line = lexer->line; + token.column = lexer->column; + + // 检查是否是特殊进制数 + if (lexer->source[lexer->current_pos] == '0') { + char next = lexer->source[lexer->current_pos + 1]; + if (next == 'b' || next == 'B') { // 二进制 + base = 2; + token.value[pos++] = '0'; + token.value[pos++] = next; + lexer->current_pos += 2; + lexer->column += 2; + // 读取二进制数字 + while (lexer->source[lexer->current_pos] == '0' || + lexer->source[lexer->current_pos] == '1') { + if (pos < 255) { + token.value[pos++] = lexer->source[lexer->current_pos]; + } + lexer->current_pos++; + lexer->column++; + } + if (pos <= 2) { // 只有前缀没有数字 + fprintf(stderr, "Error: Invalid binary number at line %d, column %d\n", + lexer->line, lexer->column); + token.type = TOKEN_ERROR; + return token; + } + } else if (next == 'x' || next == 'X') { // 十六进制 + base = 16; + token.value[pos++] = '0'; + token.value[pos++] = next; + lexer->current_pos += 2; + lexer->column += 2; + // 读取十六进制数字 + while (isxdigit(lexer->source[lexer->current_pos])) { + if (pos < 255) { + token.value[pos++] = lexer->source[lexer->current_pos]; + } + lexer->current_pos++; + lexer->column++; + } + if (pos <= 2) { // 只有前缀没有数字 + fprintf(stderr, "Error: Invalid hexadecimal number at line %d, column %d\n", + lexer->line, lexer->column); + token.type = TOKEN_ERROR; + return token; + } + } else { // 八进制 + base = 8; + token.value[pos++] = '0'; + lexer->current_pos++; // 跳过0 + lexer->column++; + // 读取后续的八进制数字 + while (lexer->source[lexer->current_pos] >= '0' && + lexer->source[lexer->current_pos] <= '7') { + if (pos < 255) { + token.value[pos++] = lexer->source[lexer->current_pos]; + } + lexer->current_pos++; + lexer->column++; + } + // 检查是否后续字符是数字但超出八进制范围 + if (isdigit(lexer->source[lexer->current_pos])) { + fprintf(stderr, "Error: Invalid octal number at line %d, column %d\n", + lexer->line, lexer->column); + token.type = TOKEN_ERROR; + return token; + } + } + } + + // 如果不是特殊进制,按照十进制处理 + if (base == 10) { + // 读取整数部分 + while (isdigit(lexer->source[lexer->current_pos])) { + if (pos < 255) { + token.value[pos++] = lexer->source[lexer->current_pos]; + } + lexer->current_pos++; + lexer->column++; + } + + // 处理小数点和小数部分 + if (lexer->source[lexer->current_pos] == '.') { + has_dot = 1; + if (pos < 255) { + token.value[pos++] = lexer->source[lexer->current_pos]; + } + lexer->current_pos++; + lexer->column++; + + // 检查小数点后是否有数字 + if (!isdigit(lexer->source[lexer->current_pos])) { + fprintf(stderr, "Error: Expected digit after decimal point at line %d, column %d\n", + lexer->line, lexer->column); + token.type = TOKEN_ERROR; + return token; + } + + // 读取小数部分 + while (isdigit(lexer->source[lexer->current_pos])) { + if (pos < 255) { + token.value[pos++] = lexer->source[lexer->current_pos]; + } + lexer->current_pos++; + lexer->column++; + } + } + + // 处理科学计数法 + if (lexer->source[lexer->current_pos] == 'e' || lexer->source[lexer->current_pos] == 'E') { + has_exp = 1; + if (pos < 255) { + token.value[pos++] = lexer->source[lexer->current_pos]; + } + lexer->current_pos++; + lexer->column++; + + // 处理指数的符号 + if (lexer->source[lexer->current_pos] == '+' || lexer->source[lexer->current_pos] == '-') { + if (pos < 255) { + token.value[pos++] = lexer->source[lexer->current_pos]; + } + lexer->current_pos++; + lexer->column++; + } + + // 读取指数部分 + if (!isdigit(lexer->source[lexer->current_pos])) { + // 错误处理:科学计数法后面必须有数字 + fprintf(stderr, "Error: Invalid scientific notation at line %d, column %d\n", + lexer->line, lexer->column); + token.type = TOKEN_ERROR; + return token; + } + + while (isdigit(lexer->source[lexer->current_pos])) { + if (pos < 255) { + token.value[pos++] = lexer->source[lexer->current_pos]; + } + lexer->current_pos++; + lexer->column++; + } + } + } + token.value[pos] = '\0'; + token.type = (has_dot || has_exp) ? TOKEN_FLOAT_LITERAL : TOKEN_INT_LITERAL; + return token; +} + +// 读取字符串 +Token read_string(Lexer *lexer) { + Token token; + int pos = 0; + token.type = TOKEN_STRING_LITERAL; + token.line = lexer->line; + token.column = lexer->column; + + lexer->current_pos++; // 跳过引号 + lexer->column++; + + while (lexer->source[lexer->current_pos] != '"') { + if (lexer->source[lexer->current_pos] == '\0') { + token.type = TOKEN_ERROR; + strcpy(token.value, "Unterminated string literal"); + return token; + } + + if (lexer->source[lexer->current_pos] == '\\') { + lexer->current_pos++; + lexer->column++; + + if (pos >= 255) continue; + + switch (lexer->source[lexer->current_pos]) { + case 'n': token.value[pos++] = '\n'; break; + case 't': token.value[pos++] = '\t'; break; + case 'r': token.value[pos++] = '\r'; break; + case '\"': token.value[pos++] = '\"'; break; + case '\\': token.value[pos++] = '\\'; break; + default: + fprintf(stderr, "Error: Invalid escape sequence '\\%c' at line %d, column %d\n", + lexer->source[lexer->current_pos], lexer->line, lexer->column); + token.type = TOKEN_ERROR; + return token; + } + } else if (pos < 255) { + token.value[pos++] = lexer->source[lexer->current_pos]; + } + lexer->current_pos++; + lexer->column++; + } + token.value[pos] = '\0'; + if (lexer->source[lexer->current_pos] == '"') { + lexer->current_pos++; + lexer->column++; + } else { + token.type = TOKEN_ERROR; + strcpy(token.value, "Unterminated string literal"); + } + return token; +} + +Token lexer_next_token(Lexer *lexer) { + skip_whitespace_and_comments(lexer); + char c = lexer->source[lexer->current_pos]; + + Token token; + token.line = lexer->line; + token.column = lexer->column; + token.value[0] = '\0'; + + if (c == '\0') { + token.type = TOKEN_EOF; + return token; + } else if (isalpha(c) || (c == '_')) { + return read_identifier(lexer); + } else if (isdigit(c)) { + return read_number(lexer); + } else if (c == '"') { + return read_string(lexer); + } else { + token.value[0] = c; + token.value[1] = '\0'; + lexer->current_pos++; + lexer->column++; + + // 处理双字符和三字符操作符 + char next_c = lexer->source[lexer->current_pos]; + char next_next_c = (next_c != '\0') ? lexer->source[lexer->current_pos + 1] : '\0'; + + // 处理三字符操作符 + if (c == '<' && next_c == '<' && next_next_c == '=') { + token.value[1] = '<'; + token.value[2] = '='; + token.value[3] = '\0'; + lexer->current_pos += 2; + lexer->column += 2; + token.type = TOKEN_SHL_ASSIGN; + return token; + } else if (c == '>' && next_c == '>' && next_next_c == '=') { + token.value[1] = '>'; + token.value[2] = '='; + token.value[3] = '\0'; + lexer->current_pos += 2; + lexer->column += 2; + token.type = TOKEN_SHR_ASSIGN; + return token; + } + + // 处理双字符操作符 + if (c == '=' && next_c == '=') { + token.value[1] = '='; + token.value[2] = '\0'; + lexer->current_pos++; + lexer->column++; + token.type = TOKEN_EQ_EQ; + return token; + } else if (c == '!' && next_c == '=') { + token.value[1] = '='; + token.value[2] = '\0'; + lexer->current_pos++; + lexer->column++; + token.type = TOKEN_NE; + return token; + } else if (c == '<' && next_c == '=') { + token.value[1] = '='; + token.value[2] = '\0'; + lexer->current_pos++; + lexer->column++; + token.type = TOKEN_LE; + return token; + } else if (c == '>' && next_c == '=') { + token.value[1] = '='; + token.value[2] = '\0'; + lexer->current_pos++; + lexer->column++; + token.type = TOKEN_GE; + return token; + } else if (c == '+' && next_c == '=') { + token.value[1] = '='; + token.value[2] = '\0'; + lexer->current_pos++; + lexer->column++; + token.type = TOKEN_PLUS_ASSIGN; + return token; + } else if (c == '-' && next_c == '=') { + token.value[1] = '='; + token.value[2] = '\0'; + lexer->current_pos++; + lexer->column++; + token.type = TOKEN_MINUS_ASSIGN; + return token; + } else if (c == '*' && next_c == '=') { + token.value[1] = '='; + token.value[2] = '\0'; + lexer->current_pos++; + lexer->column++; + token.type = TOKEN_MUL_ASSIGN; + return token; + } else if (c == '/' && next_c == '=') { + token.value[1] = '='; + token.value[2] = '\0'; + lexer->current_pos++; + lexer->column++; + token.type = TOKEN_DIV_ASSIGN; + return token; + } else if (c == '%' && next_c == '=') { + token.value[1] = '='; + token.value[2] = '\0'; + lexer->current_pos++; + lexer->column++; + token.type = TOKEN_MOD_ASSIGN; + return token; + } else if (c == '&' && next_c == '=') { + token.value[1] = '='; + token.value[2] = '\0'; + lexer->current_pos++; + lexer->column++; + token.type = TOKEN_BITAND_ASSIGN; + return token; + } else if (c == '|' && next_c == '=') { + token.value[1] = '='; + token.value[2] = '\0'; + lexer->current_pos++; + lexer->column++; + token.type = TOKEN_BITOR_ASSIGN; + return token; + } else if (c == '^' && next_c == '=') { + token.value[1] = '='; + token.value[2] = '\0'; + lexer->current_pos++; + lexer->column++; + token.type = TOKEN_BITXOR_ASSIGN; + return token; + } else if (c == '<' && next_c == '<') { + token.value[1] = '<'; + token.value[2] = '\0'; + lexer->current_pos++; + lexer->column++; + token.type = TOKEN_SHL; + return token; + } else if (c == '>' && next_c == '>') { + token.value[1] = '>'; + token.value[2] = '\0'; + lexer->current_pos++; + lexer->column++; + token.type = TOKEN_SHR; + return token; + } else if (c == '+' && next_c == '+') { + token.value[1] = '+'; + token.value[2] = '\0'; + lexer->current_pos++; + lexer->column++; + token.type = TOKEN_PLUS_PLUS; + return token; + } else if (c == '-' && next_c == '-') { + token.value[1] = '-'; + token.value[2] = '\0'; + lexer->current_pos++; + lexer->column++; + token.type = TOKEN_MINUS_MINUS; + return token; + } else if (c == '&' && next_c == '&') { + token.value[1] = '&'; + token.value[2] = '\0'; + lexer->current_pos++; + lexer->column++; + token.type = TOKEN_AND; + return token; + } else if (c == '|' && next_c == '|') { + token.value[1] = '|'; + token.value[2] = '\0'; + lexer->current_pos++; + lexer->column++; + token.type = TOKEN_OR; + return token; + } + + switch (c) { + case ':': token.type = TOKEN_COLON; break; + case '=': token.type = TOKEN_EQ; break; + case '(': token.type = TOKEN_LPAREN; break; + case ')': token.type = TOKEN_RPAREN; break; + case '{': token.type = TOKEN_LBRACE; break; + case '}': token.type = TOKEN_RBRACE; break; + case '[': token.type = TOKEN_LBRACKET; break; + case ']': token.type = TOKEN_RBRACKET; break; + case ',': token.type = TOKEN_COMMA; break; + case ';': token.type = TOKEN_SEMICOLON; break; + case '+': token.type = TOKEN_PLUS; break; + case '-': token.type = TOKEN_MINUS; break; + case '*': token.type = TOKEN_STAR; break; + case '/': token.type = TOKEN_SLASH; break; + case '.': token.type = TOKEN_DOT; break; + case '<': token.type = TOKEN_LT; break; + case '>': token.type = TOKEN_GT; break; + case '!': token.type = TOKEN_BANG; break; + case '|': token.type = TOKEN_BITOR; break; + case '&': token.type = TOKEN_BITAND; break; + case '^': token.type = TOKEN_BITXOR; break; + case '%': token.type = TOKEN_MOD; break; + case '@': token.type = TOKEN_AT; break; + case '~': token.type = TOKEN_TILDE; break; + case '?': token.type = TOKEN_QUESTION; break; + + default: + token.type = TOKEN_ERROR; + sprintf(token.value, "Unexpected character: %c", c); + } + return token; + } +} \ No newline at end of file diff --git a/parser/lexer.h b/parser/lexer.h new file mode 100644 index 0000000..f8f6960 --- /dev/null +++ b/parser/lexer.h @@ -0,0 +1,132 @@ +// +// Created by Natuie on 2025/3/22. +// + +#ifndef VETY_LEXER_H +#define VETY_LEXER_H + +typedef enum { + TOKEN_EOF, // 文件结束 + TOKEN_IDENTIFIER, // 标识符 + TOKEN_INT, // 整数 + TOKEN_FLOAT, // 浮点数 + TOKEN_STRING, // 字符串 + TOKEN_KEYWORD, // 关键字 + TOKEN_ASSIGN, // 赋值 + TOKEN_PLUS, // 加号 + TOKEN_MINUS, // 减号 + TOKEN_MUL, // 乘号 + TOKEN_DIV, // 除号 + TOKEN_MOD, // 取模 + TOKEN_PLUS_PLUS, // 自增 + TOKEN_MINUS_MINUS, // 自减 + TOKEN_PLUS_ASSIGN, // += + TOKEN_MINUS_ASSIGN, // -= + TOKEN_MUL_ASSIGN, // *= + TOKEN_DIV_ASSIGN, // /= + TOKEN_MOD_ASSIGN, // %= + TOKEN_LPAREN, // 左括号 + TOKEN_RPAREN, // 右括号 + TOKEN_LBRACE, // 左花括号 + TOKEN_RBRACE, // 右花括号 + TOKEN_LBRACKET, // 左中括号 + TOKEN_RBRACKET, // 右中括号 + TOKEN_COMMA, // 逗号 + TOKEN_SEMICOLON, // 分号 + TOKEN_COLON, // 冒号 + TOKEN_DOT, // 点号 + TOKEN_QUESTION, // 问号 + TOKEN_LT, // 小于号 + TOKEN_GT, // 大于号 + TOKEN_LE, // 小于等于号 + TOKEN_GE, // 大于等于号 + TOKEN_EQ, // 等于号 + TOKEN_EQ_EQ, // 等于等于号 + TOKEN_NE, // 不等于号 + TOKEN_AND, // 与运算符 + TOKEN_OR, // 或运算符 + TOKEN_NOT, // 非运算符 + TOKEN_BITAND, // 按位与运算符 + TOKEN_BITOR, // 按位或运算符 + TOKEN_BITXOR, // 按位异或运算符 + TOKEN_TILDE, // ~ + TOKEN_SHL, // 左移运算符 + TOKEN_SHR, // 右移运算符 + TOKEN_BITAND_ASSIGN, // &= + TOKEN_BITOR_ASSIGN, // |= + TOKEN_BITXOR_ASSIGN, // ^= + TOKEN_SHL_ASSIGN, // <<= + TOKEN_SHR_ASSIGN, // >>= + TOKEN_IF, // if 关键字 + TOKEN_ELSE, // else 关键字 + TOKEN_WHILE, // while 关键字 + TOKEN_FOR, // for 关键字 + TOKEN_RETURN, // return 关键字 + TOKEN_BREAK, // break 关键字 + TOKEN_CONTINUE, // continue 关键字 + TOKEN_LET, // let 关键字 + TOKEN_CONST, // const 关键字 + TOKEN_TRUE, // true 关键字 + TOKEN_FALSE, // false 关键字 + TOKEN_NULL, // null 关键字 + TOKEN_IMPORT, // import 关键字 + TOKEN_AS, // as 关键字 + TOKEN_IN, // in 关键字 + TOKEN_NATIVE, // native 关键字 + TOKEN_FUNC, // func 关键字 + TOKEN_BOOL, // bool 关键字 + TOKEN_MAP, // map 关键字 + TOKEN_ARRAY, // array 关键字 + + // 类型关键字 + TOKEN_TYPE_I8, // i8类型 + TOKEN_TYPE_I16, // i16类型 + TOKEN_TYPE_I32, // i32类型 + TOKEN_TYPE_I64, // i64类型 + TOKEN_TYPE_U8, // u8类型 + TOKEN_TYPE_U16, // u16类型 + TOKEN_TYPE_U32, // u32类型 + TOKEN_TYPE_U64, // u64类型 + TOKEN_TYPE_F32, // f32类型 + TOKEN_TYPE_F64, // f64类型 + TOKEN_TYPE_ANY, // any类型 + TOKEN_TYPE_VOID, // void类型 + + TOKEN_INT_LITERAL, // 整数字面量 + TOKEN_FLOAT_LITERAL, // 浮点数字面量 + TOKEN_STRING_LITERAL, // 字符串字面量 + TOKEN_BOOL_LITERAL, // 布尔字面量 + TOKEN_STAR, // 星号 + TOKEN_SLASH, // 斜杠 + TOKEN_BANG, // 感叹号 + TOKEN_AT, // 注解符号 + TOKEN_TRY, // try关键字 + TOKEN_CATCH, // catch关键字 + TOKEN_ERROR, // 错误 + TOKEN_THROW, // throw关键字 +} TokenType; + +typedef struct { + TokenType type; + char value[256]; + int line; + int column; +} Token; + +typedef struct { + char *source; + int length; + int line; + int column; + int current_pos; +} Lexer; + +void skip_whitespace_and_comments(Lexer *lexer); +Token read_identifier(Lexer *lexer); +Token read_number(Lexer *lexer); +Token read_string(Lexer *lexer); +void lexer_init(Lexer *lexer, char *source); +void lexer_free(Lexer *lexer); +Token lexer_next_token(Lexer *lexer); +Token lexer_back_token(Lexer *lexer); +#endif //VETY_LEXER_H diff --git a/parser/parser.c b/parser/parser.c new file mode 100644 index 0000000..74289a5 --- /dev/null +++ b/parser/parser.c @@ -0,0 +1,1365 @@ +#include "parser.h" +#include "ast_printer.h" +#include +#include +#include + +char* token_type_to_string(TokenType type) { + switch (type) { + case TOKEN_EOF: return "eof"; + case TOKEN_IDENTIFIER: return "identifier"; + case TOKEN_INT: return "int"; + case TOKEN_FLOAT: return "float"; + case TOKEN_STRING: return "string"; + case TOKEN_KEYWORD: return "keyword"; + case TOKEN_ASSIGN: return "="; + case TOKEN_PLUS: return "+"; + case TOKEN_MINUS: return "-"; + case TOKEN_MUL: return "*"; + case TOKEN_DIV: return "/"; + case TOKEN_MOD: return "%"; + case TOKEN_PLUS_PLUS: return "++"; + case TOKEN_MINUS_MINUS: return "--"; + case TOKEN_LPAREN: return "("; + case TOKEN_RPAREN: return ")"; + case TOKEN_LBRACE: return "{"; + case TOKEN_RBRACE: return "}"; + case TOKEN_LBRACKET: return "["; + case TOKEN_RBRACKET: return "]"; + case TOKEN_COMMA: return ","; + case TOKEN_SEMICOLON: return ";"; + case TOKEN_COLON: return ":"; + case TOKEN_DOT: return "."; + case TOKEN_LT: return "<"; + case TOKEN_GT: return ">"; + case TOKEN_LE: return "<="; + case TOKEN_GE: return ">="; + case TOKEN_EQ: return "="; + case TOKEN_EQ_EQ: return "=="; + case TOKEN_NE: return "!="; + case TOKEN_AND: return "and"; + case TOKEN_OR: return "or"; + case TOKEN_NOT: return "not"; + case TOKEN_BITAND: return "&"; + case TOKEN_BITOR: return "|"; + case TOKEN_BITXOR: return "^"; + case TOKEN_TILDE: return "~"; + case TOKEN_SHL: return "<<"; + case TOKEN_SHR: return ">>"; + case TOKEN_IF: return "if"; + case TOKEN_ELSE: return "else"; + case TOKEN_WHILE: return "while"; + case TOKEN_FOR: return "for"; + case TOKEN_RETURN: return "return"; + case TOKEN_BREAK: return "break"; + case TOKEN_CONTINUE: return "continue"; + case TOKEN_LET: return "let"; + case TOKEN_CONST: return "const"; + case TOKEN_TRUE: return "true"; + case TOKEN_FALSE: return "false"; + case TOKEN_NULL: return "null"; + case TOKEN_IMPORT: return "import"; + case TOKEN_AS: return "as"; + case TOKEN_IN: return "in"; + case TOKEN_NATIVE: return "native"; + case TOKEN_FUNC: return "func"; + case TOKEN_BOOL: return "bool"; + case TOKEN_MAP: return "map"; + case TOKEN_ARRAY: return "array"; + case TOKEN_INT_LITERAL: return "int_literal"; + case TOKEN_FLOAT_LITERAL: return "float_literal"; + case TOKEN_STRING_LITERAL: return "string_literal"; + case TOKEN_BOOL_LITERAL: return "bool_literal"; + case TOKEN_STAR: return "*"; + case TOKEN_SLASH: return "/"; + case TOKEN_BANG: return "!"; + case TOKEN_AT: return "@"; + case TOKEN_THROW: return "throw"; + case TOKEN_ERROR: return "error"; + default: return "unknown" ; + } +} + +void parser_init(Parser* parser, Lexer* lexer, char* filename) +{ + parser->lexer = lexer; + parser->filename = filename; + parser->had_error = 0; + parser->error_count = 0; + parser->errors = NULL; + parser->current_token = lexer_next_token(lexer); +} + +void parser_free(Parser* parser) { + free(parser->filename); + + // 释放错误信息数组 + if (parser->errors) { + for (int i = 0; i < parser->error_count; i++) { + free(parser->errors[i].message); + } + free(parser->errors); + } +} + +Token consume_token(Parser* parser) { + Token previous_token = parser->current_token; + parser->current_token = lexer_next_token(parser->lexer); + return previous_token; +} + +int match_token(Parser* parser, TokenType type) { + return parser->current_token.type == type; +} + +Token expect_token(Parser* parser, TokenType type) { + if (!match_token(parser, type)) { + char message[256]; + snprintf(message, sizeof(message), "Expected '%s', got '%s'", + token_type_to_string(type), + token_type_to_string(parser->current_token.type)); + parser_error_at_current(parser, message); + } + return consume_token(parser); +} + + +// 解析函数 +/* + * func func_name(p: int): int {} + */ + +ASTNode* parse_function_declaration(Parser* parser) { + ASTNode* node = create_node(NODE_FUNC_DECL); + set_node_position(node, parser->lexer->line, parser->lexer->column); + + // 处理native修饰符 + if (match_token(parser, TOKEN_NATIVE)) { + consume_token(parser); + node->value = strdup("native"); + } else { + node->value = NULL; + } + + // 解析func关键字 + expect_token(parser, TOKEN_FUNC); + + // 解析函数名 + Token identifier = consume_token(parser); + ASTNode* id_node = create_node(NODE_IDENTIFIER); + set_node_position(id_node, identifier.line, identifier.column); + id_node->value = strdup(identifier.value); + add_child(node, id_node); + + // 解析参数列表 + expect_token(parser, TOKEN_LPAREN); + ASTNode* params_node = create_node(NODE_PARAM_LIST); + set_node_position(params_node, parser->lexer->line, parser->lexer->column); + while (!match_token(parser, TOKEN_RPAREN)) { + // 创建单个参数节点 + ASTNode* param_node = create_node(NODE_PARAM); + set_node_position(param_node, parser->lexer->line, parser->lexer->column); + + // 解析参数名 + Token param = consume_token(parser); + ASTNode* id_node = create_node(NODE_IDENTIFIER); + set_node_position(id_node, param.line, param.column); + id_node->value = strdup(param.value); + add_child(param_node, id_node); + + // 处理类型 + if (match_token(parser, TOKEN_COLON)) { + consume_token(parser); + Token type = consume_token(parser); + ASTNode* type_node = create_node(NODE_TYPE_IDENTIFIER); + type_node->value = strdup(type.value); + set_node_position(type_node, type.line, type.column); + + // 处理数组类型 + while (match_token(parser, TOKEN_LBRACKET)) { + consume_token(parser); // 消费左中括号 + type_node->type = NODE_ARRAY_TYPE; + + // 解析数组大小(如果有) + if (!match_token(parser, TOKEN_RBRACKET)) { + Token size_token = consume_token(parser); + ASTNode* size_node = create_node(NODE_INT_LITERAL); + size_node->value = strdup(size_token.value); + set_node_position(size_node, size_token.line, size_token.column); + add_child(type_node, size_node); + } + + expect_token(parser, TOKEN_RBRACKET); + } + + add_child(param_node, type_node); + } + + // 将参数节点添加到参数列表 + add_child(params_node, param_node); + + // 处理逗号 + if (match_token(parser, TOKEN_COMMA)) { + consume_token(parser); + } else { + break; + } + } + expect_token(parser, TOKEN_RPAREN); + add_child(node, params_node); + + // 处理返回值类型 + if (match_token(parser, TOKEN_COLON)) { + consume_token(parser); + Token return_type = consume_token(parser); + ASTNode* return_type_node = create_node(NODE_TYPE_IDENTIFIER); + return_type_node->value = strdup(return_type.value); + set_node_position(return_type_node, return_type.line, return_type.column); + + // 处理数组类型 + while (match_token(parser, TOKEN_LBRACKET)) { + consume_token(parser); // 消费左中括号 + return_type_node->type = NODE_ARRAY_TYPE; // 标记为数组类型 + + // 解析数组大小(如果有) + if (!match_token(parser, TOKEN_RBRACKET)) { + Token size_token = consume_token(parser); + ASTNode* size_node = create_node(NODE_INT_LITERAL); + size_node->value = strdup(size_token.value); + set_node_position(size_node, size_token.line, size_token.column); + add_child(return_type_node, size_node); + } + + expect_token(parser, TOKEN_RBRACKET); + } + + add_child(node, return_type_node); + } + + // 处理分号或函数体 + if (match_token(parser, TOKEN_SEMICOLON)) { + consume_token(parser); + } else { + // 解析函数体 + ASTNode* body = parse_block(parser); + if (body != NULL) { + add_child(node, body); + } + } + + return node; +} +// 解析变量申明 +// let/const var: int/空 = exp +ASTNode* parse_var_declaration(Parser* parser) { + // 记录声明类型(let/const) + char* decl_type; + if (match_token(parser, TOKEN_LET)) { + consume_token(parser); + decl_type = strdup("let"); + } else if (match_token(parser, TOKEN_CONST)) { + consume_token(parser); + decl_type = strdup("const"); + } else { + parser_error_at_current(parser, "Expected let or const"); + return NULL; + } + + // 创建第一个变量声明节点 + ASTNode* first_var = create_node(NODE_VAR_DECL); + set_node_position(first_var, parser->lexer->line, parser->lexer->column); + first_var->value = decl_type; + + // 获取标识符 + Token identifier = consume_token(parser); + ASTNode* id_node = create_node(NODE_IDENTIFIER); + set_node_position(id_node, identifier.line, identifier.column); + id_node->value = strdup(identifier.value); + add_child(first_var, id_node); + + // 处理类型 + ASTNode* type_node = NULL; + if (match_token(parser, TOKEN_COLON)) { + consume_token(parser); // 消费冒号 + // 解析类型标识符 + Token type = consume_token(parser); + type_node = create_node(NODE_TYPE_IDENTIFIER); + type_node->value = strdup(type.value); + set_node_position(type_node, type.line, type.column); + + // 处理数组类型 + while (match_token(parser, TOKEN_LBRACKET)) { + consume_token(parser); // 消费左中括号 + type_node->type = NODE_ARRAY_TYPE; // 标记为数组类型 + + // 解析数组大小(如果有) + if (!match_token(parser, TOKEN_RBRACKET)) { + Token size_token = consume_token(parser); + ASTNode* size_node = create_node(NODE_INT_LITERAL); + size_node->value = strdup(size_token.value); + set_node_position(size_node, size_token.line, size_token.column); + add_child(type_node, size_node); + } + + expect_token(parser, TOKEN_RBRACKET); + } + + // 处理泛型类型参数 + if (match_token(parser, TOKEN_LT)) { + consume_token(parser); // 消费 < + ASTNode* generic_params = create_node(NODE_GENERIC_PARAMS); + set_node_position(generic_params, parser->lexer->line, parser->lexer->column); + + do { + Token param = consume_token(parser); + ASTNode* param_node = create_node(NODE_TYPE_IDENTIFIER); + param_node->value = strdup(param.value); + set_node_position(param_node, param.line, param.column); + add_child(generic_params, param_node); + + if (match_token(parser, TOKEN_COMMA)) { + consume_token(parser); + } + } while (!match_token(parser, TOKEN_GT)); + + expect_token(parser, TOKEN_GT); + add_child(type_node, generic_params); + } + + add_child(first_var, type_node); + } + + // 处理值 + if (match_token(parser, TOKEN_EQ)) { + consume_token(parser); // 消费等号 + + // 解析初始化表达式 + ASTNode* init_expr = parse_expression(parser); + if (init_expr != NULL) { + add_child(first_var, init_expr); + + // 如果没有显式类型注解,根据初始化表达式推断类型 + if (type_node == NULL) { + type_node = create_node(NODE_TYPE_IDENTIFIER); + set_node_position(type_node, init_expr->line, init_expr->column); + + // 根据表达式类型推断变量类型 + switch (init_expr->type) { + case NODE_INT_LITERAL: + type_node->value = strdup("i32"); // 默认整数类型为i32 + break; + case NODE_FLOAT_LITERAL: + type_node->value = strdup("f64"); // 默认浮点类型为f64 + break; + case NODE_STRING_LITERAL: + type_node->value = strdup("string"); + break; + case NODE_BOOL_LITERAL: + type_node->value = strdup("bool"); + break; + case NODE_ARRAY_LITERAL: + type_node->value = strdup("array"); + break; + case NODE_MAP_LITERAL: + type_node->value = strdup("map"); + break; + default: + type_node->value = strdup("any"); + } + add_child(first_var, type_node); + } + } + } + + // 检查是否有更多变量声明 + if (!match_token(parser, TOKEN_COMMA)) { + // 单变量声明,直接返回 + if (match_token(parser, TOKEN_SEMICOLON)) { + consume_token(parser); + } + return first_var; + } + + // 多变量声明,创建列表节点 + ASTNode* list_node = create_node(NODE_VAR_DECL_LIST); + set_node_position(list_node, parser->lexer->line, parser->lexer->column); + list_node->value = decl_type; + add_child(list_node, first_var); + + // 处理后续变量 + while (match_token(parser, TOKEN_COMMA)) { + consume_token(parser); // 消费逗号 + + ASTNode* var_node = create_node(NODE_VAR_DECL); + set_node_position(var_node, parser->lexer->line, parser->lexer->column); + var_node->value = strdup(decl_type); + + // 获取标识符 + identifier = consume_token(parser); + id_node = create_node(NODE_IDENTIFIER); + set_node_position(id_node, identifier.line, identifier.column); + id_node->value = strdup(identifier.value); + add_child(var_node, id_node); + + // 处理类型 + if (match_token(parser, TOKEN_COLON)) { + consume_token(parser); // 消费冒号 + Token type = consume_token(parser); + ASTNode* type_node = create_node(NODE_TYPE_IDENTIFIER); + type_node->value = strdup(type.value); + set_node_position(type_node, type.line, type.column); + + // 处理数组维度 + if (match_token(parser, TOKEN_LBRACKET)) { + consume_token(parser); // 消费左中括号 + + // 解析数组大小 + if (!match_token(parser, TOKEN_RBRACKET)) { + Token size_token = consume_token(parser); + ASTNode* size_node = create_node(NODE_INT_LITERAL); + size_node->value = strdup(size_token.value); + set_node_position(size_node, size_token.line, size_token.column); + add_child(type_node, size_node); + } + + expect_token(parser, TOKEN_RBRACKET); + } + + add_child(var_node, type_node); + } + + // 处理值 + if (match_token(parser, TOKEN_EQ)) { + consume_token(parser); // 消费等号 + ASTNode* init_expr = parse_expression(parser); + if (init_expr != NULL) { + add_child(var_node, init_expr); + } + } + + add_child(list_node, var_node); + } + + // 可选择的分号 + if (match_token(parser, TOKEN_SEMICOLON)) { + consume_token(parser); + } + return list_node; +} + +// 解析键值对字面量 +/* + * {a:6} + */ +ASTNode* parse_key_value_literal(Parser* parser) { + ASTNode* node = create_node(NODE_MAP_LITERAL); + set_node_position(node, parser->lexer->line, parser->lexer->column); + // 解析左大括号 + expect_token(parser, TOKEN_LBRACE); + // 解析键值对 + do { + // 创建键值对节点 + ASTNode* entry_node = create_node(NODE_MAP_ENTRY); + set_node_position(entry_node, parser->lexer->line, parser->lexer->column); + + // 解析键 + Token key = consume_token(parser); + ASTNode* key_node = create_node(NODE_IDENTIFIER); + set_node_position(key_node, key.line, key.column); + key_node->value = strdup(key.value); + add_child(entry_node, key_node); + + // 解析冒号 + expect_token(parser, TOKEN_COLON); + + // 解析值 + ASTNode* value_node = parse_expression(parser); + add_child(entry_node, value_node); + + // 将键值对节点添加到主节点 + add_child(node, entry_node); + + // 处理逗号 + if (match_token(parser, TOKEN_COMMA)) { + consume_token(parser); // 消费逗号 + } + } while (!match_token(parser, TOKEN_RBRACE)); + expect_token(parser, TOKEN_RBRACE); + return node; +} + + +// 解析数组字面量 +/* + * [1, 2, 3] + */ +ASTNode* parse_array_literal(Parser* parser) { + ASTNode* node = create_node(NODE_ARRAY_LITERAL); + set_node_position(node, parser->lexer->line, parser->lexer->column); + // 解析左中括号 + expect_token(parser, TOKEN_LBRACKET); + + // 解析元素 + do { + // 创建数组元素节点 + //ASTNode* item_node = create_node(NODE_ARRAY_ITEM); + //set_node_position(item_node, parser->lexer->line, parser->lexer->column); + + // 解析元素值 + ASTNode* value_node = parse_expression(parser); + add_child(node, value_node); + + // 将元素节点添加到数组 + //add_child(node, item_node); + + // 处理逗号 + if (match_token(parser, TOKEN_COMMA)) { + consume_token(parser); // 消费逗号 + } + } while (!match_token(parser, TOKEN_RBRACKET)); + expect_token(parser, TOKEN_RBRACKET); + return node; +} + +// 解析try-catch语句 +/* + * try { + * // code + * } catch (e) { + * // handler + * } + */ +// 解析try-catch结构 +ASTNode* parse_try_catch_statement(Parser* parser) { + ASTNode* node = create_node(NODE_TRY_STMT); + set_node_position(node, parser->lexer->line, parser->lexer->column); + + // 解析try块 + expect_token(parser, TOKEN_TRY); + ASTNode* try_block = parse_block(parser); + add_child(node, try_block); + + // 解析catch块 + expect_token(parser, TOKEN_CATCH); + expect_token(parser, TOKEN_LPAREN); + + // 解析异常变量 + Token exception_var = expect_token(parser, TOKEN_IDENTIFIER); + ASTNode* var_node = create_node(NODE_IDENTIFIER); + set_node_position(var_node, exception_var.line, exception_var.column); + var_node->value = strdup(exception_var.value); + + // 创建catch块节点 + ASTNode* catch_node = create_node(NODE_CATCH_BLOCK); + set_node_position(catch_node, parser->lexer->line, parser->lexer->column); + add_child(catch_node, var_node); + + expect_token(parser, TOKEN_RPAREN); + ASTNode* catch_block = parse_block(parser); + add_child(catch_node, catch_block); + add_child(node, catch_node); + return node; +} + +ASTNode* parse_block(Parser* parser) { + ASTNode* node = create_node(NODE_BLOCK); + set_node_position(node, parser->lexer->line, parser->lexer->column); + expect_token(parser, TOKEN_LBRACE); + while (!match_token(parser, TOKEN_RBRACE) && !match_token(parser, TOKEN_EOF)) { + ASTNode* stmt = parse_statement(parser); + if (stmt != NULL) { + add_child(node, stmt); + } + + } + expect_token(parser, TOKEN_RBRACE); + return node; +} + +// 解析if语句 +/* + * if (a >= 6 or b >= 7) {...} else if (a > 7) {...} else {...} + * 等价于 + * if (a >= 6 or b >= 7) {...} else { + * if (a > 7) {...} else {...} + * } + */ +ASTNode* parse_if_statement(Parser* parser) { + ASTNode* node = create_node(NODE_IF_STMT); + set_node_position(node, parser->lexer->line, parser->lexer->column); + expect_token(parser, TOKEN_IF); + expect_token(parser, TOKEN_LPAREN); + + // 解析条件表达式 + ASTNode* cond_expr = parse_expression(parser); + add_child(node, cond_expr); + expect_token(parser, TOKEN_RPAREN); + + // 解析if体 + if (match_token(parser, TOKEN_LBRACE)) { + ASTNode* if_body = parse_block(parser); + add_child(node, if_body); + } + + // 处理else if和else + while (match_token(parser, TOKEN_ELSE)) { + consume_token(parser); // 消费else + + if (match_token(parser, TOKEN_IF)) { + // 处理else if分支 + consume_token(parser); // 消费if + ASTNode* else_if_node = create_node(NODE_ELSE_IF); + set_node_position(else_if_node, parser->lexer->line, parser->lexer->column); + + expect_token(parser, TOKEN_LPAREN); + ASTNode* else_if_cond = parse_expression(parser); + add_child(else_if_node, else_if_cond); + expect_token(parser, TOKEN_RPAREN); + + if (match_token(parser, TOKEN_LBRACE)) { + ASTNode* else_if_body = parse_block(parser); + add_child(else_if_node, else_if_body); + } + + add_child(node, else_if_node); + } else if (match_token(parser, TOKEN_LBRACE)) { + // 处理else分支 + ASTNode* else_node = create_node(NODE_ELSE_BLOCK); + set_node_position(else_node, parser->lexer->line, parser->lexer->column); + ASTNode* else_body = parse_block(parser); + add_child(else_node, else_body); + add_child(node, else_node); + break; + } + } + return node; +} + +// 解析for语句 +/* + * for (let i:int = 0, j:int = 0; i < 10; i++) {...} + */ +ASTNode* parse_for_statement(Parser* parser) { + ASTNode* node = create_node(NODE_FOR_STMT); + set_node_position(node, parser->lexer->line, parser->lexer->column); + + expect_token(parser, TOKEN_FOR); + expect_token(parser, TOKEN_LPAREN); + + // 解析初始化语句 + ASTNode* init_list = create_node(NODE_FOR_INIT); + set_node_position(init_list, parser->lexer->line, parser->lexer->column); + + ASTNode* init_stmt = parse_statement(parser); + add_child(init_list, init_stmt); + add_child(node, init_list); + + // 解析条件表达式 + ASTNode* cond_node = create_node(NODE_FOR_CONDITION); + set_node_position(cond_node, parser->lexer->line, parser->lexer->column); + if (!match_token(parser, TOKEN_SEMICOLON)) { + ASTNode* cond_expr = parse_expression(parser); + add_child(cond_node, cond_expr); + } else { + ASTNode* empty_expr = create_node(NODE_EXPR_STMT); + set_node_position(empty_expr, parser->lexer->line, parser->lexer->column); + add_child(cond_node, empty_expr); + } + add_child(node, cond_node); + + // 解析更新语句 + ASTNode* update_node = create_node(NODE_FOR_UPDATE); + set_node_position(update_node, parser->lexer->line, parser->lexer->column); + if (!match_token(parser, TOKEN_RPAREN)) { + ASTNode* update_stmt = parse_expression(parser); + add_child(update_node, update_stmt); + } else { + ASTNode* empty_stmt = create_node(NODE_EXPR_STMT); + set_node_position(empty_stmt, parser->lexer->line, parser->lexer->column); + add_child(update_node, empty_stmt); + } + add_child(node, update_node); + + expect_token(parser, TOKEN_RPAREN); + + // 解析循环体 + ASTNode* body = parse_block(parser); + add_child(node, body); + return node; +} + +// 解析while语句 +ASTNode* parse_while_statement(Parser* parser) { + ASTNode* node = create_node(NODE_WHILE_STMT); + set_node_position(node, parser->lexer->line, parser->lexer->column); + + expect_token(parser, TOKEN_WHILE); + expect_token(parser, TOKEN_LPAREN); + + // 解析条件表达式 + ASTNode* cond_expr = parse_expression(parser); + add_child(node, cond_expr); + expect_token(parser, TOKEN_RPAREN); + + // 解析循环体 + ASTNode* body = parse_block(parser); + add_child(node, body); + return node; +} + +ASTNode* parse_expression_statement(Parser* parser) { + //ASTNode* node = create_node(NODE_EXPR_STMT); + //set_node_position(node, parser->lexer->line, parser->lexer->column); + + ASTNode* expr = parse_expression(parser); + set_node_position(expr, parser->lexer->line, parser->lexer->column); + //add_child(node, expr); + + if (match_token(parser, TOKEN_SEMICOLON)) consume_token(parser); + return expr; +} + +// 解析return语句 +ASTNode* parse_return_statement(Parser* parser) { + ASTNode* node = create_node(NODE_RETURN_STMT); + set_node_position(node, parser->lexer->line, parser->lexer->column); + expect_token(parser, TOKEN_RETURN); + // 如果有返回值 + if (!match_token(parser, TOKEN_SEMICOLON)) add_child(node, parse_expression(parser)); + if (match_token(parser, TOKEN_SEMICOLON)) consume_token(parser); + return node; +} + +ASTNode* parse_break_statement(Parser* parser) { + ASTNode* node = create_node(NODE_BREAK_STMT); + set_node_position(node, parser->lexer->line, parser->lexer->column); + expect_token(parser, TOKEN_BREAK); + if (match_token(parser, TOKEN_SEMICOLON)) consume_token(parser); + return node; +} + +ASTNode* parse_continue_statement(Parser* parser) { + ASTNode* node = create_node(NODE_CONTINUE_STMT); + set_node_position(node, parser->lexer->line, parser->lexer->column); + expect_token(parser, TOKEN_CONTINUE); + if (match_token(parser, TOKEN_SEMICOLON)) consume_token(parser); + return node; +} + +ASTNode* parse_import_statement(Parser* parser) { + ASTNode* node = create_node(NODE_IMPORT_STMT); + set_node_position(node, parser->lexer->line, parser->lexer->column); + expect_token(parser, TOKEN_IMPORT); + + // 解析导入路径 + ASTNode* path_node = create_node(NODE_IMPORT_PATH); + set_node_position(path_node, parser->lexer->line, parser->lexer->column); + + // 解析第一个标识符 + Token identifier = consume_token(parser); + ASTNode* id_node = create_node(NODE_IDENTIFIER); + set_node_position(id_node, identifier.line, identifier.column); + id_node->value = strdup(identifier.value); + add_child(path_node, id_node); + + // 解析后续的路径部分 + while (match_token(parser, TOKEN_DOT)) { + consume_token(parser); // 消费点号 + + // 处理连续的点号(..) + if (match_token(parser, TOKEN_DOT)) { + consume_token(parser); // 消费第二个点号 + ASTNode* parent_dir_node = create_node(NODE_IMPORT_PATH); + set_node_position(parent_dir_node, parser->lexer->line, parser->lexer->column); + add_child(path_node, parent_dir_node); + } else { + // 解析常规路径部分 + identifier = consume_token(parser); + id_node = create_node(NODE_IDENTIFIER); + set_node_position(id_node, identifier.line, identifier.column); + id_node->value = strdup(identifier.value); + add_child(path_node, id_node); + } + } + + add_child(node, path_node); + + // 处理别名(as) + if (match_token(parser, TOKEN_AS)) { + consume_token(parser); // 消费as关键字 + Token alias = consume_token(parser); + ASTNode* alias_node = create_node(NODE_IMPORT_ALIAS); + set_node_position(alias_node, alias.line, alias.column); + alias_node->value = strdup(alias.value); + add_child(node, alias_node); + } + + if (match_token(parser, TOKEN_SEMICOLON)) consume_token(parser); + return node; +} + + +// 解析注解 +/* + * @name 或 @name(param1, param2, ...) 或 @name(key1=value1, key2=value2, ...) + */ +ASTNode* parse_annotation(Parser* parser) { + ASTNode* node = create_node(NODE_ANNOTATION); + set_node_position(node, parser->lexer->line, parser->lexer->column); + + // 解析@符号 + expect_token(parser, TOKEN_AT); + + // 解析注解名称 + Token name = expect_token(parser, TOKEN_IDENTIFIER); + node->value = strdup(name.value); + + // 解析可选的参数列表 + if (match_token(parser, TOKEN_LPAREN)) { + consume_token(parser); // 消费左括号 + + // 解析参数 + while (!match_token(parser, TOKEN_RPAREN)) { + // 检查是否是命名参数 (name=value形式) + if (match_token(parser, TOKEN_IDENTIFIER)) { + Token identifier = parser->current_token; + consume_token(parser); + + // 检查是否有等号 + if (match_token(parser, TOKEN_EQ)) { + consume_token(parser); // 消费等号 + + // 创建命名参数节点 + ASTNode* named_arg = create_node(NODE_NAMED_ARG); + set_node_position(named_arg, identifier.line, identifier.column); + + // 添加参数名 + ASTNode* name_node = create_node(NODE_IDENTIFIER); + set_node_position(name_node, identifier.line, identifier.column); + name_node->value = strdup(identifier.value); + add_child(named_arg, name_node); + + // 解析参数值 + ASTNode* value = parse_expression_with_precedence(parser, PREC_NONE); + add_child(named_arg, value); + + // 将命名参数添加到注解 + add_child(node, named_arg); + } else { + // 不是命名参数,回退并按普通参数处理 + parser->lexer->current_pos--; // 回退pos + parser->lexer->column = parser->lexer->column - strlen(identifier.value); + parser->current_token = identifier; // 回退token + ASTNode* param = parse_expression(parser); + add_child(node, param); + } + } else { + // 普通参数 + ASTNode* pos_arg = create_node(NODE_POS_ARG); + set_node_position(pos_arg, parser->lexer->line, parser->lexer->column); + + // 解析参数值 + ASTNode* value = parse_expression(parser); + add_child(pos_arg, value); + + // 将位置参数添加到注解节点 + add_child(node, pos_arg); + } + + // 处理逗号 + if (match_token(parser, TOKEN_COMMA)) { + consume_token(parser); // 消费逗号 + } else { + break; + } + } + + expect_token(parser, TOKEN_RPAREN); // 消费右括号 + } + + return node; +} + +// 解析语句 +ASTNode* parse_statement(Parser* parser) { + // 处理注解(支持多个注解) + ASTNode* annotations = create_node(NODE_ANNOTATIONS); + set_node_position(annotations, parser->lexer->line, parser->lexer->column); + + // 循环处理所有连续的注解 + while (match_token(parser, TOKEN_AT)) { + ASTNode* current_annotation = parse_annotation(parser); + add_child(annotations, current_annotation); + } + + // 解析实际语句 + ASTNode* stmt = NULL; + + if (match_token(parser, TOKEN_LET) || match_token(parser, TOKEN_CONST)) { + stmt = parse_var_declaration(parser); + } else if (match_token(parser, TOKEN_FUNC) || match_token(parser, TOKEN_NATIVE)) { + stmt = parse_function_declaration(parser); + } else if (match_token(parser, TOKEN_IF)) { + stmt = parse_if_statement(parser); + } else if (match_token(parser, TOKEN_FOR)) { + stmt = parse_for_statement(parser); + } else if (match_token(parser, TOKEN_WHILE)) { + stmt = parse_while_statement(parser); + } else if (match_token(parser, TOKEN_RETURN)) { + stmt = parse_return_statement(parser); + } else if (match_token(parser, TOKEN_BREAK)) { + stmt = parse_break_statement(parser); + } else if (match_token(parser, TOKEN_CONTINUE)) { + stmt = parse_continue_statement(parser); + } else if (match_token(parser, TOKEN_IMPORT)) { + stmt = parse_import_statement(parser); + } else if (match_token(parser, TOKEN_TRY)) { + stmt = parse_try_catch_statement(parser); + } else if (match_token(parser, TOKEN_THROW)) { + stmt = parse_throw_statement(parser); + } else { + // 表达式语句 + stmt = parse_expression_statement(parser); + if (match_token(parser, TOKEN_SEMICOLON)) { + consume_token(parser); // 消费分号 + } + } + + // 如果有注解,将注解附加到语句上 + if (annotations->children_count > 0 && stmt != NULL) { + // 将注解作为语句的第一个子节点 + // 先保存原有的子节点 + int original_count = stmt->children_count; + ASTNode** original_children = malloc(sizeof(ASTNode*) * original_count); + for (int i = 0; i < original_count; i++) { + original_children[i] = stmt->children[i]; + } + + // 清空子节点数组 + stmt->children_count = 0; + + // 添加注解作为第一个子节点 + add_child(stmt, annotations); + + // 重新添加原有的子节点 + for (int i = 0; i < original_count; i++) { + add_child(stmt, original_children[i]); + } + + // 释放临时数组 + free(original_children); + } else { + // 如果没有注解,释放annotations节点 + free(annotations); + } + + return stmt; +} + +ASTNode* parse_throw_statement(Parser* parser) { + ASTNode* node = create_node(NODE_THROW_STMT); + set_node_position(node, parser->lexer->line, parser->lexer->column); + consume_token(parser); + // 解析throw后面的表达式 + ASTNode* expr = parse_expression(parser); + add_child(node, expr); + + // 可选的分号 + if (match_token(parser, TOKEN_SEMICOLON)) consume_token(parser); + + return node; +} + +// 解析表达式(处理逻辑运算符) +// 获取运算符的优先级 +int get_precedence(TokenType type) { + switch (type) { + case TOKEN_EQ: return PREC_ASSIGNMENT; + case TOKEN_PLUS_ASSIGN: + case TOKEN_MINUS_ASSIGN: + case TOKEN_MUL_ASSIGN: + case TOKEN_DIV_ASSIGN: + case TOKEN_MOD_ASSIGN: + case TOKEN_BITAND_ASSIGN: + case TOKEN_BITOR_ASSIGN: + case TOKEN_BITXOR_ASSIGN: + case TOKEN_SHL_ASSIGN: + case TOKEN_SHR_ASSIGN: return PREC_ASSIGNMENT; + case TOKEN_QUESTION: return PREC_TERNARY; + case TOKEN_OR: return PREC_OR; + case TOKEN_AND: return PREC_AND; + case TOKEN_EQ_EQ: + case TOKEN_NE: return PREC_EQUALITY; + case TOKEN_LT: + case TOKEN_GT: + case TOKEN_LE: + case TOKEN_GE: return PREC_COMPARISON; + case TOKEN_BITAND: + case TOKEN_BITOR: + case TOKEN_BITXOR: return PREC_BITWISE; + case TOKEN_SHL: + case TOKEN_SHR: return PREC_SHIFT; + case TOKEN_PLUS: + case TOKEN_MINUS: return PREC_TERM; + case TOKEN_STAR: + case TOKEN_SLASH: + case TOKEN_MOD: return PREC_FACTOR; + case TOKEN_LPAREN: + case TOKEN_DOT: + case TOKEN_LBRACKET: return PREC_CALL; // 函数调用优先级 + case TOKEN_AS: return PREC_CAST; // 类型转换优先级 + default: return PREC_NONE; + } +} + +// 解析一元表达式 +ASTNode* parse_unary(Parser* parser) { + // 处理前缀运算符(一元运算符) + if (match_token(parser, TOKEN_MINUS) || match_token(parser, TOKEN_NOT) || + match_token(parser, TOKEN_BANG) || match_token(parser, TOKEN_PLUS_PLUS) || + match_token(parser, TOKEN_MINUS_MINUS) || match_token(parser, TOKEN_TILDE)) { + Token op = consume_token(parser); + ASTNode* operand = parse_unary(parser); // 递归处理嵌套的一元表达式 + + ASTNode* unary = create_node(NODE_UNARY_EXPR); + set_node_position(unary, op.line, op.column); + unary->value = strdup(op.value); + add_child(unary, operand); + + return unary; + } + + return parse_primary(parser); +} + +// 解析表达式 +ASTNode* parse_expression(Parser* parser) { + return parse_expression_with_precedence(parser, PREC_NONE); +} + +// 根据优先级解析表达式 +ASTNode* parse_expression_with_precedence(Parser* parser, int precedence) { + ASTNode* left = parse_unary(parser); + + // 处理后缀表达式(自增、自减) + while (match_token(parser, TOKEN_PLUS_PLUS) || match_token(parser, TOKEN_MINUS_MINUS)) { + Token op = consume_token(parser); + ASTNode* postfix = create_node(NODE_POSTFIX_EXPR); + set_node_position(postfix, op.line, op.column); + postfix->value = strdup(op.value); + add_child(postfix, left); + left = postfix; + } + + while (precedence < get_precedence(parser->current_token.type)) { + TokenType op_type = parser->current_token.type; + Token op = consume_token(parser); + + // 处理类型转换 + if (op_type == TOKEN_AS) { + ASTNode* cast = create_node(NODE_CAST_EXPR); + set_node_position(cast, op.line, op.column); + add_child(cast, left); + + // 解析目标类型 + Token type_token = consume_token(parser); + ASTNode* type_node = create_node(NODE_TYPE_IDENTIFIER); + set_node_position(type_node, type_token.line, type_token.column); + type_node->value = strdup(type_token.value); + add_child(cast, type_node); + + left = cast; + continue; + } + + // 处理三元运算符 + if (op_type == TOKEN_QUESTION) { + // 创建三元表达式节点 + ASTNode* ternary = create_node(NODE_TERNARY_EXPR); + set_node_position(ternary, op.line, op.column); + ternary->op_type = OP_TERNARY; + + // 添加条件表达式 + add_child(ternary, left); + + // 解析真值表达式 + ASTNode* true_expr = parse_expression_with_precedence(parser, PREC_NONE); + add_child(ternary, true_expr); + + // 解析冒号 + expect_token(parser, TOKEN_COLON); + + // 解析假值表达式 + ASTNode* false_expr = parse_expression_with_precedence(parser, PREC_TERNARY); + add_child(ternary, false_expr); + + left = ternary; + continue; + } + + // 处理函数调用、成员访问和索引访问(从左到右结合) + if (op_type == TOKEN_LPAREN || op_type == TOKEN_DOT || op_type == TOKEN_LBRACKET) { + if (op_type == TOKEN_LPAREN) { + ASTNode* call = create_node(NODE_CALL_EXPR); + set_node_position(call, op.line, op.column); + add_child(call, left); + + if (!match_token(parser, TOKEN_RPAREN)) { + // 创建参数列表节点 + ASTNode* params_node = create_node(NODE_PARAM_LIST); + set_node_position(params_node, parser->lexer->line, parser->lexer->column); + add_child(call, params_node); + + while (!match_token(parser, TOKEN_RPAREN)) { + if (match_token(parser, TOKEN_IDENTIFIER)) { + Token identifier = parser->current_token; + consume_token(parser); + + // 检查是否有等号(命名参数) + if (match_token(parser, TOKEN_EQ)) { + consume_token(parser); + + ASTNode* named_arg = create_node(NODE_NAMED_ARG); + set_node_position(named_arg, identifier.line, identifier.column); + + // 添加参数名节点 + ASTNode* name_node = create_node(NODE_IDENTIFIER); + name_node->value = strdup(identifier.value); + set_node_position(name_node, identifier.line, identifier.column); + add_child(named_arg, name_node); + + // 解析并添加参数值 + ASTNode* value = parse_expression_with_precedence(parser, PREC_NONE); + add_child(named_arg, value); + + add_child(params_node, named_arg); + } else { + // 不是命名参数,回退并按普通参数处理 + parser->lexer->current_pos--; + parser->lexer->column = parser->lexer->column - strlen(identifier.value); + parser->current_token = identifier; + + ASTNode* arg = parse_expression_with_precedence(parser, PREC_NONE); + add_child(params_node, arg); + } + } else { + // 普通参数 + ASTNode* arg = parse_expression_with_precedence(parser, PREC_NONE); + add_child(params_node, arg); + } + + if (match_token(parser, TOKEN_COMMA)) { + consume_token(parser); + } else { + break; + } + } + } + expect_token(parser, TOKEN_RPAREN); + left = call; + } + else if (op_type == TOKEN_DOT) { + ASTNode* member = create_node(NODE_MEMBER_EXPR); + set_node_position(member, op.line, op.column); + add_child(member, left); + + Token identifier = expect_token(parser, TOKEN_IDENTIFIER); + ASTNode* id_node = create_node(NODE_IDENTIFIER); + set_node_position(id_node, identifier.line, identifier.column); + id_node->value = strdup(identifier.value); + add_child(member, id_node); + + left = member; + } + else { // TOKEN_LBRACKET + ASTNode* index = create_node(NODE_INDEX_EXPR); + set_node_position(index, op.line, op.column); + add_child(index, left); + + ASTNode* expr = parse_expression_with_precedence(parser, PREC_NONE); + add_child(index, expr); + + expect_token(parser, TOKEN_RBRACKET); + left = index; + } + } + // 处理二元运算符(从左到右结合) + else { + int new_precedence = get_precedence(op_type); + // 对于相同优先级的运算符,增加一个微小的优先级来确保左结合性 + ASTNode* right = parse_expression_with_precedence(parser, new_precedence + 1); + + // 创建二元表达式节点 + ASTNode* binary = create_node(NODE_BINARY_EXPR); + set_node_position(binary, op.line, op.column); + + // 设置操作符类型 + switch (op_type) { + // 算术运算符 + case TOKEN_PLUS: + binary->op_type = OP_ADD; + break; + case TOKEN_MINUS: + binary->op_type = OP_SUB; + break; + case TOKEN_STAR: + binary->op_type = OP_MUL; + break; + case TOKEN_SLASH: + binary->op_type = OP_DIV; + break; + case TOKEN_MOD: + binary->op_type = OP_MOD; + break; + + // 逻辑运算符 + case TOKEN_AND: + binary->op_type = OP_AND; + break; + case TOKEN_OR: + binary->op_type = OP_OR; + break; + + // 比较运算符 + case TOKEN_EQ_EQ: + binary->op_type = OP_EQ; + break; + case TOKEN_NE: + binary->op_type = OP_NE; + break; + case TOKEN_LT: + binary->op_type = OP_LT; + break; + case TOKEN_GT: + binary->op_type = OP_GT; + break; + case TOKEN_LE: + binary->op_type = OP_LE; + break; + case TOKEN_GE: + binary->op_type = OP_GE; + break; + + // 位运算符 + case TOKEN_BITAND: + binary->op_type = OP_BITAND; + break; + case TOKEN_BITOR: + binary->op_type = OP_BITOR; + break; + case TOKEN_BITXOR: + binary->op_type = OP_BITXOR; + break; + case TOKEN_SHL: + binary->op_type = OP_SHL; + break; + case TOKEN_SHR: + binary->op_type = OP_SHR; + break; + + // 赋值运算符 + case TOKEN_EQ: + binary->op_type = OP_ASSIGN; + break; + case TOKEN_PLUS_ASSIGN: + binary->op_type = OP_PLUS_ASSIGN; + break; + case TOKEN_MINUS_ASSIGN: + binary->op_type = OP_MINUS_ASSIGN; + break; + case TOKEN_MUL_ASSIGN: + binary->op_type = OP_MUL_ASSIGN; + break; + case TOKEN_DIV_ASSIGN: + binary->op_type = OP_DIV_ASSIGN; + break; + case TOKEN_MOD_ASSIGN: + binary->op_type = OP_MOD_ASSIGN; + break; + case TOKEN_BITAND_ASSIGN: + binary->op_type = OP_BITAND_ASSIGN; + break; + case TOKEN_BITOR_ASSIGN: + binary->op_type = OP_BITOR_ASSIGN; + break; + case TOKEN_BITXOR_ASSIGN: + binary->op_type = OP_BITXOR_ASSIGN; + break; + case TOKEN_SHL_ASSIGN: + binary->op_type = OP_SHL_ASSIGN; + break; + case TOKEN_SHR_ASSIGN: + binary->op_type = OP_SHR_ASSIGN; + break; + default: + parser_error_at_current(parser, "Unknown binary operator"); + return NULL; + } + + binary->value = strdup(op.value); + add_child(binary, left); + add_child(binary, right); + left = binary; + } + } + if (match_token(parser, TOKEN_SEMICOLON)) consume_token(parser); + return left; +} + +ASTNode* parse_primary(Parser* parser) { + ASTNode *node = NULL; + if (match_token(parser, TOKEN_INT_LITERAL)) { + Token token = consume_token(parser); + node = create_node(NODE_INT_LITERAL); + set_node_position(node, token.line, token.column); + node->value = strdup(token.value); + } else if (match_token(parser, TOKEN_FLOAT_LITERAL)) { + Token token = consume_token(parser); + node = create_node(NODE_FLOAT_LITERAL); + set_node_position(node, token.line, token.column); + node->value = strdup(token.value); + } else if (match_token(parser, TOKEN_STRING_LITERAL)) { + Token token = consume_token(parser); + node = create_node(NODE_STRING_LITERAL); + set_node_position(node, token.line, token.column); + node->value = strdup(token.value); + } else if (match_token(parser, TOKEN_IDENTIFIER)) { + Token token = consume_token(parser); + node = create_node(NODE_IDENTIFIER); + set_node_position(node, token.line, token.column); + node->value = strdup(token.value); + } else if (match_token(parser, TOKEN_TRUE) || match_token(parser, TOKEN_FALSE)) { + Token token = consume_token(parser); + node = create_node(NODE_BOOL_LITERAL); + set_node_position(node, token.line, token.column); + node->value = strdup(token.value); + } else if (match_token(parser, TOKEN_LPAREN)) { + consume_token(parser); + ASTNode* expr = parse_expression(parser); + expect_token(parser, TOKEN_RPAREN); + return expr; + } else if (match_token(parser, TOKEN_LBRACKET)) { + return parse_array_literal(parser); + } else if (match_token(parser, TOKEN_LBRACE)) { + return parse_key_value_literal(parser); + } else { + char error_message[256]; + snprintf(error_message, sizeof(error_message), "unexpected token '%s'", + token_type_to_string(parser->current_token.type)); + parser_error_at_current(parser, error_message); + } + return node; +} + +// 解析程序 +ASTNode* parse_program(Parser* parser) { + ASTNode* program = create_node(NODE_PROGRAM); + set_node_position(program, parser->lexer->line, parser->lexer->column); + while (!match_token(parser, TOKEN_EOF)) { + ASTNode *stmt = parse_statement(parser); + if (stmt != NULL) { + add_child(program, stmt); + } + } + return program; +} \ No newline at end of file diff --git a/parser/parser.h b/parser/parser.h new file mode 100644 index 0000000..25fac52 --- /dev/null +++ b/parser/parser.h @@ -0,0 +1,70 @@ +// +// Created by Natuie on 2025/3/22. +// + +#ifndef VETY_PARSER_H +#define VETY_PARSER_H + +#include "lexer.h" +#include "ast.h" + +// 运算符优先级枚举 +typedef enum { + PREC_NONE, // 无优先级 + PREC_ASSIGNMENT, // =, +=, etc + PREC_TERNARY, // ?: + PREC_OR, // || + PREC_AND, // && + PREC_EQUALITY, // ==, != + PREC_COMPARISON, // <, >, <=, >= + PREC_BITWISE, // |, &, ^ + PREC_SHIFT, // <<, >> + PREC_TERM, // +, - + PREC_FACTOR, // *, /, % + PREC_UNARY, // !, -, ~ + PREC_CALL, // (), [], . + PREC_CAST, // as + PREC_PRIMARY // 基本表达式 +} OperatorPrecedence; + +typedef struct { + Lexer *lexer; + Token current_token; + Token previous_token; + char *filename; + int had_error; // 标记是否发生错误 + int error_count; // 错误计数 + struct { + char* message; + int line; + int column; + } *errors; // 错误信息数组 +} Parser; + +void parser_init(Parser* parser, Lexer* lexer, char* filename); +void parser_free(Parser* parser); +Token consume_token(Parser* parser); +int match_token(Parser* parser, TokenType type); +Token expect_token(Parser* parser, TokenType type); +ASTNode* parse_statement(Parser* parser); +int get_precedence(TokenType type); +ASTNode* parse_unary(Parser* parser); +ASTNode* parse_expression(Parser* parser); +ASTNode* parse_expression_with_precedence(Parser* parser, int precedence); + +ASTNode* parse_primary(Parser* parser); +ASTNode* parse_block(Parser* parser); +ASTNode* parse_program(Parser* parser); +ASTNode* parse_var_declaration(Parser* parser); +ASTNode* parse_function_declaration(Parser* parser); +ASTNode* parse_import_statement(Parser* parser); +ASTNode* parse_break_statement(Parser* parser); +ASTNode* parse_continue_statement(Parser* parser); +ASTNode* parse_expression_statement(Parser* parser); +ASTNode* parse_while_statement(Parser* parser); +ASTNode* parse_for_statement(Parser* parser); +ASTNode* parse_if_statement(Parser* parser); +ASTNode* parse_return_statement(Parser* parser); +ASTNode* parse_throw_statement(Parser* parser); +ASTNode* parse_annotation(Parser* parser); +#endif //VETY_PARSER_H diff --git a/test/Makefile b/test/Makefile new file mode 100644 index 0000000..1f03084 --- /dev/null +++ b/test/Makefile @@ -0,0 +1,37 @@ +CC = gcc +CFLAGS = -Wall -Wextra -g -I../parser -I../utils + +OBJ_DIR = ../build/test +TARGET = $(OBJ_DIR)/test +SRCS = test.c ../parser/parser.c ../parser/lexer.c ../parser/ast.c ../parser/ast_printer.c ../parser/error.c ../utils/file.c ../utils/log.c + +# 将源文件转换为目标文件列表 +OBJS = $(SRCS:%.c=$(OBJ_DIR)/%.o) + +# 创建目录的函数 +create_dirs := $(shell if not exist "$(OBJ_DIR)" mkdir "$(OBJ_DIR)" && \ + if not exist "$(OBJ_DIR)\..\parser" mkdir "$(OBJ_DIR)\..\parser" && \ + if not exist "$(OBJ_DIR)\..\utils" mkdir "$(OBJ_DIR)\..\utils") + + +# 默认目标 +all: $(TARGET) + +# 链接目标文件生成可执行文件 +$(TARGET): $(OBJS) + $(CC) $(OBJS) -o $(TARGET) + +# 编译源文件为目标文件的规则 +$(OBJ_DIR)/%.o: %.c + $(CC) $(CFLAGS) -c $< -o $@ + +# 清理规则 +clean: + del /F /Q $(OBJ_DIR)\*.o + del /F /Q $(TARGET).exe + +# 运行测试 +test: $(TARGET) + $(TARGET) + +.PHONY: all clean test \ No newline at end of file diff --git a/test/main.vt b/test/main.vt new file mode 100644 index 0000000..da9dc9f --- /dev/null +++ b/test/main.vt @@ -0,0 +1,22 @@ +import io +// 主函数 +func main():i32 { + print("hi"); + return 0; +} + +let arr: int[6] = [1, 2, 3]; +let dict: map = { + "a": 1, + "b": 2, + "c": 3 +}; +let a: int = 1; +printf(a); +let x = data as i8; +let y = ((a + b) as i32); +printf(66 as string); +printf((data as string)); +printfss(5 + data as string); +printfss(data + string2); +//printfss(data as string); \ No newline at end of file diff --git a/test/test.c b/test/test.c new file mode 100644 index 0000000..3ef13f2 --- /dev/null +++ b/test/test.c @@ -0,0 +1,92 @@ +#include +#include +#include +#include +#include +#include "../parser/parser.h" +#include "../parser/ast_printer.h" +#include "../utils/file.h" + + +#define DEMO_DIR "../demo/" +#define VT_EXT ".vt" +#define PASS_SYMBOL "✓" // Unicode checkmark + +// Function to test a single .vt file +void test_single_file(const char* filename, int isPT) { + printf("Testing file: %s \n", filename); + + // 初始化词法分析器和语法分析器 + FILE* file = fopen(filename, "r"); + if (!file) { + printf("Error opening file: %s\n", filename); + return; + } + + char* source = read_file(filename); + + Lexer lexer; + lexer_init(&lexer, source); + + Parser parser; + parser_init(&parser, &lexer, (char*)filename); + + // 解析整个文件 + ASTNode* ast = parse_program(&parser); + printf(" => "); + if (parser.had_error) { + printf("✗\n"); + } else { + printf("%s\n", PASS_SYMBOL); + if (isPT) ast_pretty_print(ast); + } + // 清理资源 + fclose(file); +} + +// Function to test all .vt files in demo directory +void test_all_demo_files() { + DIR *dir; + struct dirent *entry; + + dir = opendir(DEMO_DIR); + if (dir == NULL) { + perror("Error opening demo directory"); + return; + } + + printf("Testing all .vt files in demo directory:\n"); + + while ((entry = readdir(dir)) != NULL) { + char path[256]; + snprintf(path, sizeof(path), "%s%s", DEMO_DIR, entry->d_name); + + struct stat file_stat; + if (stat(path, &file_stat) == 0 && S_ISREG(file_stat.st_mode)) { + char *ext = strrchr(entry->d_name, '.'); + if (ext && strcmp(ext, VT_EXT) == 0) { + char* filepath = malloc(strlen(DEMO_DIR) + strlen(entry->d_name) + 1); + if (filepath == NULL) { + perror("Memory allocation failed"); + continue; + } + strcpy(filepath, DEMO_DIR); + strcat(filepath, entry->d_name); + test_single_file(filepath, 0); + } + } + } + + closedir(dir); +} + +int main() { + system("chcp 65001"); + test_all_demo_files(); + test_single_file("main.vt", 1); + //test_single_file("../demo/operators.vt", 1); + // Example of testing single file + // test_single_file("example.vt"); + + return 0; +} diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt new file mode 100644 index 0000000..29d37a6 --- /dev/null +++ b/utils/CMakeLists.txt @@ -0,0 +1,11 @@ +# 添加工具库 +add_library(vety_utils STATIC + file.c + file.h + ../vm/main.c +) + +# 设置包含路径 +target_include_directories(vety_utils PUBLIC + ${CMAKE_CURRENT_SOURCE_DIR} +) \ No newline at end of file diff --git a/utils/file.c b/utils/file.c new file mode 100644 index 0000000..a606228 --- /dev/null +++ b/utils/file.c @@ -0,0 +1,68 @@ +// +// Created by Natuie on 2025/3/22. +// + +#include +#include +#include +#include +#include "file.h" + +// 读取文件 +char* read_file(const char* filename) { + FILE* file = fopen(filename, "r"); + if (!file) { + fprintf(stderr, "Failed to read file: %s\n", filename); + return NULL; + } + + // 获取文件大小 + fseek(file, 0, SEEK_END); + long file_size = ftell(file); + rewind(file); + + // 分配内存 + char* buffer = (char*)malloc(file_size + 1); + if (!buffer) { + fprintf(stderr, "内存分配失败\n"); + fclose(file); + return NULL; + } + + // 读取文件内容 + size_t read_size = fread(buffer, 1, file_size, file); + buffer[read_size] = '\0'; // 添加字符串结束符 + + fclose(file); + return buffer; +} + +// 文件是否存在 +bool file_exists(const char* filename) { + FILE* file = fopen(filename, "r"); + if (file) { + fclose(file); + return true; + } + return false; +} + +// 构建目录 +// 处理'./'与'../'还有'/'等目录相加 +char* build_path(const char* path1, const char* path2) { + size_t len1 = strlen(path1); + size_t len2 = strlen(path2); + char* result = (char*)malloc(len1 + len2 + 2); // 加上可能的'/'和'\0' + if (!result) { + fprintf(stderr, "内存分配失败\n"); + return NULL; + } + + strcpy(result, path1); + if (path1[len1 - 1] != '/') { + strcat(result, "/"); + } + strcat(result, path2); + + return result; +} \ No newline at end of file diff --git a/utils/file.h b/utils/file.h new file mode 100644 index 0000000..5b681ad --- /dev/null +++ b/utils/file.h @@ -0,0 +1,12 @@ +// +// Created by Natuie on 2025/3/22. +// + +#ifndef VETY_FILE_H +#define VETY_FILE_H +#include +char* read_file(const char* filename); + +bool file_exists(const char* filename); + +#endif //VETY_FILE_H diff --git a/vm/CMakeLists.txt b/vm/CMakeLists.txt new file mode 100644 index 0000000..3970a7a --- /dev/null +++ b/vm/CMakeLists.txt @@ -0,0 +1,24 @@ +# 添加虚拟机库 +add_library(vety_vm + vm.c + vm.h + native.c + native.h +) + +# 添加可执行文件 +add_executable(vety + main.c +) + +# 设置包含路径 +target_include_directories(vety PUBLIC + ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_SOURCE_DIR}/utils +) + +# 添加链接库 +target_link_libraries(vety + vety_vm + vety_utils +) \ No newline at end of file diff --git a/vm/main.c b/vm/main.c new file mode 100644 index 0000000..d74ca36 --- /dev/null +++ b/vm/main.c @@ -0,0 +1,153 @@ +#include "vm.h" +#include "native.h" +#include "file.h" +#include +#include +#include +#include + +int main() { + system("chcp 65001"); + char* filepath = "../main.vt"; + // 创建虚拟机实例 + VM* vm = vm_create(); + if (!vm) { + fprintf(stderr, "Failed to create VM\n"); + return 1; + } + + // 注册原生函数 + vm_register_native_function(vm, "print", native_print_const); + + // 定义一个使用嵌套if语句的程序 + uint8_t code[] = { + // 外层if: 比较R0和R1 + OP_LOAD_CONST, R0, 0, // R0 = 10 + OP_LOAD_CONST, R1, 1, // R1 = 20 + OP_CMP_NE, R2, R0, R1, // R2 = R0 != R1 + OP_JMP_IF_FALSE, R2, 0, 50, // 如果R2为假,跳转到外层else分支 + + // 外层if的then分支 + OP_MOV_IMM, R3, 2, + OP_NATIVE_CALL, 0, 1, R3, + + // 内层if: 比较R0是否小于R1 + OP_CMP_LT, R4, R0, R1, + OP_JMP_IF_FALSE, R4, 0, 39, + OP_MOV_IMM, R3, 4, + OP_NATIVE_CALL, 0, 1, R3, + OP_JMP, 0, 45, // 跳过内层else分支 + + // 内层else分支 + OP_MOV_IMM, R3, 5, + OP_NATIVE_CALL, 0, 1, R3, + OP_END_IF, // 内层if结束 + + OP_JMP, 0, 58, // 跳过外层else分支 + + // 外层else分支 + OP_MOV_IMM, R3, 3, + OP_NATIVE_CALL, 0, 1, R3, + OP_END_IF, // 外层if结束 + + OP_HALT // 程序结束 + }; + + Program program = { + .code_size = sizeof(code), + .constants.entries = (Constant[]){ + {.type = TYPE_INT64, .value = {.int64_val = 10}}, // 常量0: 10 + {.type = TYPE_INT64, .value = {.int64_val = 20}}, // 常量1: 20 + {.type = TYPE_STRING, .value = {.string_val = {.value = "Numbers are equal\n", .len = 18}}}, + {.type = TYPE_STRING, .value = {.string_val = {.value = "Numbers are not equal\n", .len = 22}}}, + {.type = TYPE_STRING, .value = {.string_val = {.value = "First number is smaller\n", .len = 22}}} + }, + .constants.count = 5, + .constants.capacity = 10 + }; + + // 加载程序到虚拟机 + program.code = code; + vm_load(vm, &program); + + // 执行程序 + vm_eval(vm); + + if (0) return 0; //是否打印 + // 打印字节码 + + print_bytecode(program); + // 在vm_eval调用之前添加以下代码 + printf("-------------------------------------------\n\n"); + + + // 检查执行是否出错 + if (vm->error != VM_SUCCESS) { + fprintf(stderr, "执行错误: %s\n", vm_get_error_message(vm)); + fprintf(stderr, "指令位置: %lu\n", vm->error_ip); + fprintf(stderr, "操作码: 0x%02X\n", vm->error_opcode); + vm_destroy(vm); + return 1; + } + + printf("-------------------------------------------\n"); + // 打印常量池 + printf("常量池:\n"); + printf("索引 | 类型 | 值\n"); + printf("-------------------------------------------\n"); + for (int i = 0; i < program.constants.count; i++) { + Constant constant = program.constants.entries[i]; + switch (constant.type) { + case TYPE_INT64: + printf("%d | i64 | %lld \n", i, constant.value.int64_val); + break; + case TYPE_STRING: + printf("%d | string | \"%s\" \n", i, constant.value.string_val.value); + break; + default: + break; + } + } + printf("-------------------------------------------\n\n"); + // 栈 + printf("栈:\n"); + printf("索引 | 值\n"); + printf("-------------------------------------------\n"); + for (int i = 0; i < vm->sp; i++) { + printf("%d | %llu \n", i, vm->stack[i]); + } + if (vm->sp == 0) printf("空栈\n"); + printf("-------------------------------------------\n\n"); + + // 打印寄存器 + printf("寄存器:\n"); + printf("索引 | 值\n"); + printf("-------------------------------------------\n"); + for (int i = 0; i < 5; i++) { + printf("%d | %llu \n", i, vm->registers[i]); + } + printf("-------------------------------------------\n\n"); + // 打印函数表 + printf("函数表:\n"); + printf("索引 | 地址 | 局部变量个数 | 参数个数 \n"); + printf("------------------------------------------------------------------\n"); + for (int i = 0; i < vm->function_count; i++) { + Function function = vm->functions[i]; + printf("%d(%s) | %llu | %d | %d \n", i, function.name, function.code_offset, function.local_count, function.param_count); + } + + // 打印原生函数表 + printf("-------------------------------------------\n\n"); + printf("原生函数表:\n"); + printf("索引 | 函数名\n"); + printf("-------------------------------------------\n"); + for (int i = 0; i < vm->native_function_count; i++) { + printf("%d | %s \n", i, vm->native_functions[i].name); + } + printf("-------------------------------------------\n\n"); + + + vm_destroy(vm); + + return 0; +} diff --git a/vm/native.c b/vm/native.c new file mode 100644 index 0000000..62419fb --- /dev/null +++ b/vm/native.c @@ -0,0 +1,54 @@ +#include "vm.h" +#include "native.h" +#ifdef _WIN32 +#include +#else +#include +#endif + +Value native_print_const(VM* vm, Value* argv, uint8_t argc) { + if (argc != 1) { + fprintf(stderr, "print_const requires exactly 1 argument, got %d\n", argc); + return (Value){.int64_val = -1}; + } + uint64_t idx = argv[0].int64_val; + if (!vm->program || idx >= vm->program->constants.count) { + fprintf(stderr, "Invalid constant index or no program loaded\n"); + return (Value){.int64_val = -1}; + } + Constant c = vm->program->constants.entries[idx]; + switch (c.type) { + case TYPE_BOOL: + print(c.value.bool_val ? "true" : "false"); + break; + case TYPE_CHAR: + print(&c.value.char_val); + break; + case TYPE_FLOAT32: + printf("%f", c.value.float32_val); + break; + case TYPE_FLOAT64: + printf("%f", c.value.float64_val); + break; + case TYPE_INT16: + printf("%d", c.value.int16_val); + break; + case TYPE_STRING: + print(c.value.string_val.value); + break; + default: + printf("%d\n", c.value.int64_val); + break; + } + return (Value) {.int32_val = 0}; +} + +void print(const char *str) { +#ifdef _WIN32 + HANDLE hConsole = GetStdHandle(STD_OUTPUT_HANDLE); + DWORD written; + WriteFile(hConsole, str, strlen(str), &written, NULL); +#else + write(1, str, strlen(str)); +#endif +} \ No newline at end of file diff --git a/vm/native.h b/vm/native.h new file mode 100644 index 0000000..f0e82b0 --- /dev/null +++ b/vm/native.h @@ -0,0 +1,5 @@ +#ifndef NATIVE_H +#define NATIVE_H +Value native_print_const(VM* vm, Value* argv, uint8_t argc); +void print(const char *str); +#endif // NATIVE_H \ No newline at end of file diff --git a/vm/vm.c b/vm/vm.c new file mode 100644 index 0000000..fb02f4e --- /dev/null +++ b/vm/vm.c @@ -0,0 +1,618 @@ +#include "vm.h" +#include +#include +#include +#include + +const char* vm_error_to_string(VMError error) { + switch (error) { + case VM_SUCCESS: return "No error"; + case VM_ERROR_STACK_OVERFLOW: return "Stack overflow"; + case VM_ERROR_STACK_UNDERFLOW: return "Stack underflow"; + case VM_ERROR_INVALID_OPCODE: return "Invalid opcode"; + case VM_ERROR_INVALID_REGISTER: return "Invalid register access"; + case VM_ERROR_INVALID_CONSTANT: return "Invalid constant index"; + case VM_ERROR_INVALID_JUMP: return "Invalid jump address"; + case VM_ERROR_INVALID_NATIVE_FUNCTION: return "Invalid native function"; + case VM_ERROR_MEMORY_ALLOCATION: return "Memory allocation failed"; + case VM_ERROR_DIVISION_BY_ZERO: return "Division by zero"; + case VM_ERROR_INVALID_FRAME: return "Invalid frame operation"; + default: return "Unknown error"; + } +} + +void vm_set_error(VM* vm, VMError error, const char* message, uint64_t ip, uint8_t opcode) { + if (!vm) return; + vm->error = error; + vm->error_ip = ip; + vm->error_opcode = opcode; + + // 格式化错误消息,包含位置和操作码信息 + char full_message[256]; + snprintf(full_message, sizeof(full_message), + "[IP: %lu, Opcode: 0x%02X] %s", + ip, opcode, message ? message : vm_error_to_string(error)); + + strncpy(vm->error_message, full_message, sizeof(vm->error_message) - 1); + vm->error_message[sizeof(vm->error_message) - 1] = '\0'; +} + +VMError vm_get_error(VM* vm) { + return vm ? vm->error : VM_ERROR_INVALID_REGISTER; +} + +const char* vm_get_error_message(VM* vm) { + return vm ? vm->error_message : "Invalid VM instance"; +} + +VM* vm_create() { + VM* vm = (VM*)malloc(sizeof(VM)); + if (!vm) return NULL; + + // 初始化寄存器 + memset(vm->registers, 0, sizeof(uint64_t) * REGISTER_COUNT); + + // 初始化栈 + vm->sp = 0; + vm->ip = 0; + vm->frame_count = 0; + vm->frames = NULL; + + // 初始化程序 + vm->program = NULL; + + // 初始化全局变量表 + vm->global_count = 0; + memset(vm->globals, 0, sizeof(GlobalVar) * GLOBAL_VAR_COUNT); + + // 初始化函数表 + vm->native_functions = malloc(sizeof(NativeFunction) * 10); // 增加初始容量 + vm->native_function_count = 0; + vm->functions = NULL; + vm->function_count = 0; + + // 初始化错误状态 + vm->error = VM_SUCCESS; + vm->error_ip = 0; + vm->error_opcode = 0; + memset(vm->error_message, 0, sizeof(vm->error_message)); + + vm->block_count = 0; + + return vm; +} + +void vm_destroy(VM* vm) { + if (!vm) return; + + // 释放程序内存 + if (vm->program) { + if (vm->program->constants.entries) { + // 释放常量池中的字符串 + for (uint64_t i = 0; i < vm->program->constants.count; i++) { + if (vm->program->constants.entries[i].type == TYPE_STRING) { + //free(vm->program->constants.entries[i].value.string_val.value); + } + } + //free(vm->program->constants.entries); + } + //free(vm->program); + } + + // 释放全局变量表中的字符串 + for (size_t i = 0; i < vm->global_count; i++) { + free(vm->globals[i].name); + if (vm->globals[i].type == TYPE_STRING) { + free(vm->globals[i].value.string_val.value); + } + } + + // 释放函数表 + if (vm->native_functions) { + for (size_t i = 0; i < vm->native_function_count; i++) { + free(vm->native_functions[i].name); + } + free(vm->native_functions); + } + + if (vm->functions) { + for (size_t i = 0; i < vm->function_count; i++) { + free(vm->functions[i].name); + } + free(vm->functions); + } + + // 释放栈帧 + free(vm->frames); + free(vm); +} + +void vm_set_register(VM* vm, uint64_t index, uint64_t value) { + if (!vm || index >= REGISTER_COUNT) { + // 无效的寄存器访问 + fprintf(stderr, "Invalid register access\n"); + return; + } + vm->registers[index] = value; +} + +uint64_t vm_get_register(VM* vm, uint64_t index) { + if (!vm || index >= REGISTER_COUNT) { + // 无效的寄存器访问 + fprintf(stderr, "Invalid register access\n"); + return 0; + } + return vm->registers[index]; +} + +void vm_push(VM* vm, uint64_t value) { + if (!vm || vm->sp >= STACK_SIZE) { + // 栈溢出处理 + fprintf(stderr, "Stack overflow\n"); + return; + } + vm->stack[vm->sp++] = value; +} + +uint64_t vm_pop(VM* vm) { + if (!vm || vm->sp == 0) { + // 栈下溢处理 + fprintf(stderr, "Stack underflow\n"); + return 0; + } + return vm->stack[--vm->sp]; +} + +uint64_t vm_peek(VM* vm, uint64_t offset) { + if (!vm || vm->sp <= offset) { + // 无效的栈访问 + fprintf(stderr, "Invalid stack access\n"); + return 0; + } + return vm->stack[vm->sp - 1 - offset]; +} + +int vm_register_native_function(VM* vm, const char* name, Value (*func)(VM* vm, Value* args, uint8_t argc)) { + if (!vm || !name || !func) { + fprintf(stderr, "Invalid parameters for native function registration\n"); + return -1; + } + + // 检查函数是否已经注册 + for (size_t i = 0; i < vm->native_function_count; i++) { + if (strcmp(vm->native_functions[i].name, name) == 0) { + // 函数已存在,更新函数指针 + vm->native_functions[i].func = func; + return i; + } + } + + // 如果需要,扩展原生函数表 + if (vm->native_function_count % 10 == 0 && vm->native_function_count > 0) { + size_t new_size = (vm->native_function_count + 10) * sizeof(NativeFunction); + NativeFunction* new_functions = realloc(vm->native_functions, new_size); + if (!new_functions) { + fprintf(stderr, "Failed to allocate memory for native functions\n"); + return -1; + } + vm->native_functions = new_functions; + } + + // 添加新函数 + size_t idx = vm->native_function_count; + vm->native_functions[idx].name = strdup(name); + vm->native_functions[idx].func = func; + vm->native_function_count++; + + return idx; +} + +void vm_load(VM* vm, Program *program) { + if (!vm || !program) return; + vm->program = program; + + // 重置虚拟机状态 + vm->ip = 0; + vm->sp = 0; + vm->frame_count = 0; + + // 初始化全局变量表 + vm->global_count = 0; + memset(vm->globals, 0, sizeof(GlobalVar) * GLOBAL_VAR_COUNT); + + // 初始化函数表 + vm->functions = NULL; + vm->function_count = 0; +} + + +void vm_eval(VM* vm) { + vm->error = VM_SUCCESS; + if (!vm || !vm->program) { + vm_set_error(vm, VM_ERROR_INVALID_REGISTER, "Invalid VM or no program loaded", 0, 0); + return; + } + + uint8_t dst_reg = 0; + uint8_t src_reg1 = 0; + uint8_t src_reg2 = 0; + uint8_t cond_reg = 0; + + while (vm->ip < vm->program->code_size) { + uint8_t opcode = vm->program->code[vm->ip++]; + + switch (opcode) { + case OP_LOAD_CONST: { + uint8_t dst_reg = vm->program->code[vm->ip++]; + uint8_t const_idx = vm->program->code[vm->ip++]; + if (const_idx >= vm->program->constants.count) { + vm_set_error(vm, VM_ERROR_INVALID_CONSTANT, + "Invalid constant index", + vm->ip - 3, opcode); + return; + } + Constant constant = vm->program->constants.entries[const_idx]; + switch (constant.type) { + case TYPE_INT8: + vm->registers[dst_reg] = constant.value.int8_val; + break; + case TYPE_INT16: + vm->registers[dst_reg] = constant.value.int16_val; + break; + case TYPE_INT32: + vm->registers[dst_reg] = constant.value.int32_val; + break; + case TYPE_INT64: + vm->registers[dst_reg] = constant.value.int64_val; + break; + case TYPE_BOOL: + vm->registers[dst_reg] = constant.value.bool_val; + break; + case TYPE_CHAR: + vm->registers[dst_reg] = constant.value.char_val; + break; + default: + vm_set_error(vm, VM_ERROR_INVALID_CONSTANT, + "Unsupported constant type", + vm->ip - 3, opcode); + return; + } + break; + } + case OP_GLOBAL_LOAD: { + uint8_t dst_reg = vm->program->code[vm->ip++]; + uint8_t global_idx = vm->program->code[vm->ip++]; + if (global_idx >= vm->global_count) { + fprintf(stderr, "Invalid global variable index: %d\n", global_idx); + return; + } + vm->registers[dst_reg] = vm->globals[global_idx].value.int32_val; + break; + } + case OP_GLOBAL_STORE: { + uint8_t src_reg = vm->program->code[vm->ip++]; + uint8_t global_idx = vm->program->code[vm->ip++]; + if (global_idx >= vm->global_count) { + fprintf(stderr, "Invalid global variable index: %d\n", global_idx); + return; + } + vm->globals[global_idx].value.int32_val = vm->registers[src_reg]; + break; + } + case OP_LOAD_LOCAL: { + if (vm->frame_count == 0) { + fprintf(stderr, "No active frame for local variable access\n"); + return; + } + uint8_t dst_reg = vm->program->code[vm->ip++]; + uint8_t local_idx = vm->program->code[vm->ip++]; + Frame* frame = &vm->frames[vm->frame_count - 1]; + vm->registers[dst_reg] = frame->locals[local_idx]; + break; + } + case OP_STORE_LOCAL: { + if (vm->frame_count == 0) { + fprintf(stderr, "No active frame for local variable access\n"); + return; + } + uint8_t src_reg = vm->program->code[vm->ip++]; + uint8_t local_idx = vm->program->code[vm->ip++]; + Frame* frame = &vm->frames[vm->frame_count - 1]; + frame->locals[local_idx] = vm->registers[src_reg]; + break; + } + case OP_FUNC_DEF: { + uint8_t func_name_idx = vm->program->code[vm->ip++]; + uint8_t param_count = vm->program->code[vm->ip++]; + uint8_t local_count = vm->program->code[vm->ip++]; + uint16_t instruction_count = (vm->program->code[vm->ip] << 8) | vm->program->code[vm->ip + 1]; + vm->ip += 2; // Skip instruction count bytes + + // 扩展函数表 + size_t new_size = (vm->function_count + 1) * sizeof(Function); + Function* new_functions = realloc(vm->functions, new_size); + if (!new_functions) { + fprintf(stderr, "Failed to allocate memory for function\n"); + return; + } + vm->functions = new_functions; + + Function* func = &vm->functions[vm->function_count++]; + func->name = strdup(vm->program->constants.entries[func_name_idx].value.string_val.value); + func->code_offset = vm->ip; + func->param_count = param_count; + func->local_count = local_count; + func->instructions_count = instruction_count; + + // 跳过函数体 + vm->ip += instruction_count; + break; + } + case OP_CALL: { + uint8_t func_idx = vm->program->code[vm->ip++]; + if (func_idx >= vm->function_count) { + fprintf(stderr, "Invalid function index: %d\n", func_idx); + return; + } + + Function* func = &vm->functions[func_idx]; + + // 创建新的栈帧 + vm->frames = realloc(vm->frames, (vm->frame_count + 1) * sizeof(Frame)); + if (!vm->frames) { + fprintf(stderr, "Failed to allocate memory for frame\n"); + return; + } + + Frame* frame = &vm->frames[vm->frame_count++]; + frame->ip = vm->ip; + frame->locals = calloc(func->local_count, sizeof(uint64_t)); + frame->stack = calloc(STACK_FRAME_SIZE, sizeof(uint64_t)); + frame->sp = 0; + + // 保存当前IP并跳转到函数代码 + vm->ip = func->code_offset; + break; + } + + case OP_RETURN: { + if (vm->frame_count == 0) { + fprintf(stderr, "No frame to return from\n"); + return; + } + + // 恢复前一个栈帧 + Frame* frame = &vm->frames[--vm->frame_count]; + vm->ip = frame->ip; + + // 释放栈帧资源 + free(frame->locals); + free(frame->stack); + break; + } + case OP_ADD: { + dst_reg = vm->program->code[vm->ip++]; + uint8_t src_reg1 = vm->program->code[vm->ip++]; + uint8_t src_reg2 = vm->program->code[vm->ip++]; + vm->registers[dst_reg] = vm->registers[src_reg1] + vm->registers[src_reg2]; + break; + } + case OP_JMP: { + uint16_t offset = (vm->program->code[vm->ip] << 8) | vm->program->code[vm->ip + 1]; + if (offset >= vm->program->code_size) { + vm_set_error(vm, VM_ERROR_INVALID_JUMP, + "Invalid jump address", + vm->ip, opcode); + return; + } + vm->ip = offset; + vm->ip += 2; // 跳过偏移量字节 + break; + } + case OP_JMP_IF_FALSE: { + cond_reg = vm->program->code[vm->ip++]; + uint16_t offset = (vm->program->code[vm->ip] << 8) | vm->program->code[vm->ip + 1]; + vm->ip += 2; + + if (!vm->registers[cond_reg]) { + // 条件为假时,跳转到指定位置 + if (offset >= vm->program->code_size) { + vm_set_error(vm, VM_ERROR_INVALID_JUMP, + "Invalid jump address", + vm->ip - 3, opcode); + return; + } + vm->ip = offset; + + // 跳过此块中的所有嵌套if,直到找到匹配的END_IF + vm->block_count++; + } + break; + } + + case OP_END_IF: { + // 处理if块的结束 + if (vm->block_count > 0) { + vm->block_count--; + } + break; + } + + case OP_JMP_IF_TRUE: { + cond_reg = vm->program->code[vm->ip++]; + uint16_t offset = (vm->program->code[vm->ip] << 8) | vm->program->code[vm->ip + 1]; + if (vm->registers[cond_reg]) { + vm->ip = offset; + } else { + vm->ip += 2; + } + break; + } + case OP_CMP_EQ: { + dst_reg = vm->program->code[vm->ip++]; + uint8_t src_reg1 = vm->program->code[vm->ip++]; + uint8_t src_reg2 = vm->program->code[vm->ip++]; + vm->registers[dst_reg] = vm->registers[src_reg1] == vm->registers[src_reg2]; + break; + } + case OP_CMP_NE: { + dst_reg = vm->program->code[vm->ip++]; + uint8_t src_reg1 = vm->program->code[vm->ip++]; + uint8_t src_reg2 = vm->program->code[vm->ip++]; + vm->registers[dst_reg] = vm->registers[src_reg1] != vm->registers[src_reg2]; + break; + } + case OP_CMP_LT: { + dst_reg = vm->program->code[vm->ip++]; + uint8_t src_reg1 = vm->program->code[vm->ip++]; + uint8_t src_reg2 = vm->program->code[vm->ip++]; + vm->registers[dst_reg] = vm->registers[src_reg1] < vm->registers[src_reg2]; + break; + } + case OP_CMP_LE: { + dst_reg = vm->program->code[vm->ip++]; + uint8_t src_reg1 = vm->program->code[vm->ip++]; + uint8_t src_reg2 = vm->program->code[vm->ip++]; + vm->registers[dst_reg] = vm->registers[src_reg1] <= vm->registers[src_reg2]; + break; + } + case OP_CMP_GT: { + dst_reg = vm->program->code[vm->ip++]; + uint8_t src_reg1 = vm->program->code[vm->ip++]; + uint8_t src_reg2 = vm->program->code[vm->ip++]; + vm->registers[dst_reg] = vm->registers[src_reg1] > vm->registers[src_reg2]; + break; + } + case OP_CMP_GE: { + dst_reg = vm->program->code[vm->ip++]; + uint8_t src_reg1 = vm->program->code[vm->ip++]; + uint8_t src_reg2 = vm->program->code[vm->ip++]; + vm->registers[dst_reg] = vm->registers[src_reg1] >= vm->registers[src_reg2]; + break; + } + case OP_NATIVE_CALL: { + uint8_t func_idx = vm->program->code[vm->ip++]; + uint8_t argc = vm->program->code[vm->ip++]; + + if (func_idx >= vm->native_function_count) { + vm_set_error(vm, VM_ERROR_INVALID_NATIVE_FUNCTION, + "Invalid native function index", + vm->ip - 2, opcode); + return; + } + + // 准备参数数组 + Value args[argc]; + for (int i = 0; i < argc; i++) { + uint8_t arg_reg = vm->program->code[vm->ip++]; + args[i].int64_val = vm->registers[arg_reg]; + } + + // 调用原生函数 + NativeFunction* func = &vm->native_functions[func_idx]; + Value result = func->func(vm, args, argc); + vm->registers[R0] = result.int64_val; + break; + } + case OP_HALT: + return; + case OP_MOV: { + uint8_t dst_reg = vm->program->code[vm->ip++]; + uint8_t src_reg = vm->program->code[vm->ip++]; + vm->registers[dst_reg] = vm->registers[src_reg]; + break; + } + case OP_MOV_IMM: { + uint8_t dst_reg = vm->program->code[vm->ip++]; + uint8_t imm = vm->program->code[vm->ip++]; + vm->registers[dst_reg] = imm; + break; + } + default: + vm_set_error(vm, VM_ERROR_INVALID_OPCODE, + "Unknown instruction", + vm->ip - 1, opcode); + return; + } + + // 检查每条指令执行后是否有错误 + if (vm->error != VM_SUCCESS) { + return; + } + } +} + +void print_bytecode(Program program) { + printf("字节码指令:\n"); + printf("地址 | 操作码 | 操作数\n"); + printf("-------------------------------------------\n"); + int indent = 0; + for (size_t i = 0; i < program.code_size;) { + for (int j = 0; j < indent; j++) printf(" "); + printf("0x%04d | ", i); + + uint8_t opcode = program.code[i++]; + switch (opcode) { + case OP_BLOCK_START: + printf("BLOCK_START\n"); + indent++; + break; + case OP_BLOCK_END: + indent--; + printf("BLOCK_END\n"); + break; + case OP_LOAD_CONST: + printf("LOAD_CONST R%d, %d\n", program.code[i], program.code[i+1]); + i += 2; + break; + case OP_MOV: + printf("MOV R%d, %d\n", program.code[i], program.code[i+1]); + i += 2; + break; + case OP_MOV_IMM: + printf("MOV_IMM R%d, %d\n", program.code[i], program.code[i+1]); + i += 2; + break; + case OP_ADD: + printf("ADD R%d, R%d, R%d\n", + program.code[i], program.code[i+1], program.code[i+2]); + i += 3; + break; + case OP_CMP_EQ: + printf("CMP_EQ R%d, R%d, R%d\n", + program.code[i], program.code[i+1], program.code[i+2]); + i += 3; + break; + case OP_JMP_IF_FALSE: + printf("JMP_IF_FALSE"); + for (int j = 0; j < indent; j++) printf(" "); + printf(" R%d, %d, %d\n", + program.code[i], program.code[i+1], program.code[i+2]); + i += 3; + indent++; // 增加缩进层级 + break; + case OP_JMP: + printf("JMP %d, %d\n", program.code[i], program.code[i+1]); + i += 2; + break; + case OP_NATIVE_CALL: + printf("NATIVE_CALL %d, %d, R%d\n", + program.code[i], program.code[i+1], program.code[i+2]); + i += 3; + break; + case OP_HALT: + printf("HALT\n"); + break; + case OP_END_IF: + indent--; // 减少缩进层级 + printf("END_IF"); + for (int j = 0; j < indent; j++) printf(" "); + printf("\n"); + break; + default: + printf("UNKNOWN 0x%02X\n", opcode); + i++; + break; + } + } +} diff --git a/vm/vm.h b/vm/vm.h new file mode 100644 index 0000000..1460c95 --- /dev/null +++ b/vm/vm.h @@ -0,0 +1,219 @@ +#ifndef VETY_VM_H +#define VETY_VM_H + +#include +#include +#include + + +#define MEMORY_SIZE 1024 * 1024 // 虚拟机内存大小 +#define REGISTER_COUNT 64 // 虚拟机寄存器数量 +#define STACK_SIZE 64 // 虚拟机栈大小 +#define STACK_FRAME_SIZE 64 // 虚拟机栈帧大小 +#define STACK_LOCAL_SIZE 64 +#define CONSTANT_POOL_SIZE 64 // 虚拟机常量池大小 +#define GLOBAL_VAR_COUNT 64 // 虚拟机全局变量数量 + + enum { + OP_PUSH, + OP_POP, + OP_MOV, // 移动寄存器到寄存器 + OP_MOV_IMM, // 移动立即数到寄存器 + OP_LOAD_CONST, // 加载常量到寄存器 + OP_STORE_CONST, // 存储寄存器值到常量池 + OP_GLOBAL_LOAD, // 加载全局变量到寄存器 + OP_GLOBAL_STORE, // 存储寄存器值到全局变量 + OP_LOAD_LOCAL, // 加载局部变量到寄存器 + OP_STORE_LOCAL, // 存储寄存器值到局部变量 + + OP_FUNC_DEF, // 函数定义 + OP_CALL, // 调用函数 + OP_NATIVE_CALL, // 调用本地函数 + OP_RETURN, // 函数返回 + OP_ADD, // 加法运算 + OP_SUB, // 减法运算 + OP_MUL, // 乘法运算 + OP_DIV, // 除法运算 + OP_JMP, // 无条件跳转 + OP_JMP_IF_FALSE, // 条件跳转(如果为假) + OP_JMP_IF_TRUE, // 条件跳转(如果为真) + OP_CMP_EQ, // 比较相等 + OP_CMP_NE, // 比较不相等 + OP_CMP_LT, // 比较小于 + OP_CMP_LE, // 比较小于等于 + OP_CMP_GT, // 比较大于 + OP_CMP_GE, // 比较大于等于 + OP_BLOCK_START, // 块开始 + OP_BLOCK_END, // 块结束 + OP_END_IF, // 结束if语句 + OP_HALT, // 停止执行 + OP_IF_START, // if块开始 + OP_ELSE, // else块 + OP_IF_END // if块结束 +}; + +typedef enum { + TYPE_INT8, + TYPE_INT16, + TYPE_INT32, + TYPE_INT64, + TYPE_FLOAT32, + TYPE_FLOAT64, + TYPE_STRING, + TYPE_BOOL, + TYPE_CHAR, + TYPE_OBJECT, +} VarType; + +typedef union { + int8_t int8_val; + int16_t int16_val; + int32_t int32_val; + int64_t int64_val; + float float32_val; + double float64_val; + uint8_t bool_val; + char char_val; + void* object_val; + struct { + uint64_t len; + char* value; + } string_val; + struct { + uint64_t code_offset; + uint16_t* arity; + } func_val; +} Value; + + +typedef struct { + VarType type; + Value value; +} Constant; + +typedef struct { + Constant* entries; + uint64_t count; + uint64_t capacity; +} ConstantPool; + +typedef struct Frame { + uint64_t ip; // 指令指针 + uint64_t* locals; + uint64_t *stack; + uint64_t sp; + struct Frame* parent; // 父作用域 + uint8_t scope_depth; // 作用域深度 +} Frame; + +typedef struct { + char* name; + Value value; + VarType type; +} GlobalVar; + + +typedef struct VM VM; + +typedef struct { + char* name; + Value (*func)(VM*, Value* args, uint8_t argc); +} NativeFunction; + +typedef struct { + char* name; + uint64_t code_offset; + uint8_t param_count; + uint8_t local_count; + uint8_t instructions_count; +} Function; + +typedef struct { + uint8_t* code; + size_t code_size; + ConstantPool constants; + size_t constants_size; +} Program; + +enum {R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31}; + +// 错误类型定义 +typedef enum { + VM_SUCCESS = 0, + VM_ERROR_STACK_OVERFLOW, + VM_ERROR_STACK_UNDERFLOW, + VM_ERROR_INVALID_OPCODE, + VM_ERROR_INVALID_REGISTER, + VM_ERROR_INVALID_CONSTANT, + VM_ERROR_INVALID_JUMP, + VM_ERROR_INVALID_NATIVE_FUNCTION, + VM_ERROR_MEMORY_ALLOCATION, + VM_ERROR_DIVISION_BY_ZERO, + VM_ERROR_INVALID_FRAME, +} VMError; + +// 修改虚拟机结构体 +typedef struct BlockInfo { + uint64_t start_ip; + uint64_t end_ip; +} BlockInfo; + +typedef struct VM { + Program* program; + ConstantPool constants; + uint64_t registers[REGISTER_COUNT]; + size_t frame_count; + Frame* frames; + uint64_t stack[STACK_SIZE]; + uint64_t sp; + uint64_t ip; + + // 全局变量表 + GlobalVar globals[GLOBAL_VAR_COUNT]; + size_t global_count; + + // 本地函数表 + NativeFunction* native_functions; + size_t native_function_count; + + // 用户定义函数表 + Function* functions; + size_t function_count; + + // 错误处理 + VMError error; + char error_message[256]; + uint64_t error_ip; // 错误发生的指令位置 + uint8_t error_opcode; // 错误发生时的操作码 + + // 块追踪 + BlockInfo block_stack[STACK_SIZE]; + uint64_t block_count; + + // if语句跟踪 + uint64_t if_stack[256]; // 存储if语句开始位置 + uint64_t else_stack[256]; // 存储else语句位置 + int if_sp; // if栈指针 +} VM; + + + +// 虚拟机创建 +VM *vm_create(); +void vm_destroy(VM *vm); +void vm_load(VM* vm, Program *program); +void vm_eval(VM *vm); + +// 栈 +void vm_push(VM *vm, uint64_t value); +uint64_t vm_pop(VM *vm); +uint64_t vm_peek(VM *vm, uint64_t offset); +int vm_register_native_function(VM* vm, const char* name, Value (*func)(VM*, Value* args, uint8_t argc)); // 更新函数声明 + +// 错误处理函数 +const char* vm_error_to_string(VMError error); +void vm_set_error(VM* vm, VMError error, const char* message, uint64_t ip, uint8_t opcode); +VMError vm_get_error(VM* vm); +const char* vm_get_error_message(VM* vm); +void print_bytecode(Program program); +#endif //VETY_VM_H