From 5d227bcd0055d02e1d49a3dcd27e80a756923d5b Mon Sep 17 00:00:00 2001 From: Malte Voos Date: Sun, 23 Jun 2024 23:31:59 +0200 Subject: split code into smaller libraries and make a better repl --- src/parser/Eff.ml | 1 + src/parser/Grammar.mly | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/parser/Lexer.mll | 37 +++++++++++++++++++++++ src/parser/Parser.ml | 13 ++++++++ src/parser/dune | 13 ++++++++ 5 files changed, 144 insertions(+) create mode 100644 src/parser/Eff.ml create mode 100644 src/parser/Grammar.mly create mode 100644 src/parser/Lexer.mll create mode 100644 src/parser/Parser.ml create mode 100644 src/parser/dune (limited to 'src/parser') diff --git a/src/parser/Eff.ml b/src/parser/Eff.ml new file mode 100644 index 0000000..45887b8 --- /dev/null +++ b/src/parser/Eff.ml @@ -0,0 +1 @@ +include Algaeff.Reader.Make (struct type nonrec t = Asai.Range.source end) \ No newline at end of file diff --git a/src/parser/Grammar.mly b/src/parser/Grammar.mly new file mode 100644 index 0000000..bb98029 --- /dev/null +++ b/src/parser/Grammar.mly @@ -0,0 +1,80 @@ +%{ +open Ast +%} + +%token IDENT +%token LPR RPR +%token LBR RBR +%token ARROW ASTERISK BACKSLASH COLON DOUBLE_COLON COMMA DOT FATARROW UNDERSCORE +%token BOOL TRUE FALSE BOOL_ELIM AT +%token FST SND +%token TYPE +%token EOF + +%nonassoc DOUBLE_COLON +%right ARROW +%right ASTERISK +%left IDENT LPR APP BACKSLASH FST SND TYPE BOOL TRUE FALSE BOOL_ELIM + +%start start_expr + +%% + +%inline +locate(X): e = X { Asai.Range.locate_lex ~source:(Eff.read()) $loc e } + +raw_ident: id = separated_nonempty_list(DOT, IDENT) { id } + +%inline +ident: id = locate(raw_ident) { id } + +raw_local_name: + | name = IDENT { Some name } + | UNDERSCORE { None } + +local_name: name = locate(raw_local_name) { name } + +arg: LPR; name = local_name; COLON; tp = expr; RPR + { (name, tp) } + +raw_expr: + | name = raw_ident + { Var name } + | LPR; e = raw_expr; RPR + { e } + | tm = expr; DOUBLE_COLON; tp = expr + { Check (tm, tp) } + + | a = arg; ARROW; b = expr + { Pi (a, b) } + | BACKSLASH; a = local_name; ARROW; b = expr + { Lam (a, b) } + | a = expr; b = expr %prec APP + { App (a, b) } + + | a = arg; ASTERISK; b = expr + { Sg (a, b) } + | LPR; a = expr; COMMA; b = expr; RPR + { Pair (a, b) } + | FST; p = expr + { Fst p } + | SND; p = expr + { Snd p } + + | TYPE + { Type } + + | BOOL + { Bool } + | TRUE + { True } + | FALSE + { False } + | BOOL_ELIM; scrut = expr; AT; motive_var = local_name; ARROW; motive_body = expr; + LBR; TRUE; FATARROW; true_case = expr; COMMA; FALSE; FATARROW; false_case = expr; RBR + { BoolElim { motive_var; motive_body; true_case; false_case; scrut } } + +%inline +expr: e = locate(raw_expr) { e } + +start_expr: e = expr; EOF { e } diff --git a/src/parser/Lexer.mll b/src/parser/Lexer.mll new file mode 100644 index 0000000..708bb64 --- /dev/null +++ b/src/parser/Lexer.mll @@ -0,0 +1,37 @@ +{ +open Grammar + +exception IllegalCharacter of char +} + +let whitespace = [' ' '\t' '\r' '\n']+ +let letter = ['a'-'z' 'A'-'Z'] +let ident = letter+ + +rule token = + parse + | whitespace { token lexbuf } + | "(" { LPR } + | ")" { RPR } + | "[" { LBR } + | "]" { RBR } + | "->" { ARROW } + | "*" { ASTERISK } + | "\\" { BACKSLASH } + | "::" { DOUBLE_COLON } + | ":" { COLON } + | "," { COMMA } + | "." { DOT } + | "=>" { FATARROW } + | "_" { UNDERSCORE } + | "at" { AT } + | "fst" { FST } + | "snd" { SND } + | "type" { TYPE } + | "bool" { BOOL } + | "true" { TRUE } + | "false" { FALSE } + | "bool-elim" { BOOL_ELIM } + | ident { IDENT (Lexing.lexeme lexbuf) } + | eof { EOF } + | (_ as illegal_char) { raise (IllegalCharacter illegal_char) } diff --git a/src/parser/Parser.ml b/src/parser/Parser.ml new file mode 100644 index 0000000..4593d82 --- /dev/null +++ b/src/parser/Parser.ml @@ -0,0 +1,13 @@ +let parse_expr (s : string) : Ast.expr = + let lexbuf = Lexing.from_string s in + let string_source : Asai.Range.string_source = { + title = None; + content = s; + } in + let source = `String string_source in + Eff.run ~env:source @@ fun () -> + try Grammar.start_expr Lexer.token lexbuf with + | Lexer.IllegalCharacter illegal_char -> + Reporter.illegal_character ~loc:(Asai.Range.of_lexbuf ~source lexbuf) illegal_char + | Grammar.Error -> + Reporter.syntax_error ~loc:(Asai.Range.of_lexbuf ~source lexbuf) diff --git a/src/parser/dune b/src/parser/dune new file mode 100644 index 0000000..fd4aae7 --- /dev/null +++ b/src/parser/dune @@ -0,0 +1,13 @@ +(menhir + (explain true) + (modules Grammar)) + +(ocamllex Lexer) + +(library + (name Parser) + (public_name toytt.parser) + (libraries + asai + toytt.ast + toytt.reporter)) -- cgit 1.4.1