(* This file is hereby released in the public domain. -- Victor Nicollet, 2008 *) module type TOKENS = sig type token type error val string_of_error : error -> string val eof : token end module RecursiveDescentParser = functor (Tokens : TOKENS) -> struct type position = int type failure_cause = | FailStart of Tokens.error | FailEnd of Tokens.error * position | FailInternal type failure = position * failure_cause list let rec _unique = function | [] -> [] | h ::t -> if List.mem h t then _unique t else h :: _unique t let pretty_print (p,f) = "Token "^string_of_int p^" expected : " ^ String.concat " , " (_unique (List.map (function | FailInternal -> "Internal error "^string_of_int p | FailStart s -> Tokens.string_of_error s | FailEnd (s,i) -> Tokens.string_of_error s^" to close "^string_of_int i) f)) type expect = Tokens.token -> bool type ctx = { tokens : Tokens.token list ; position : position ; best : failure } type 'a rule = ctx -> 'a * ctx let next (ctx:ctx) = match ctx.tokens with | [] -> Tokens.eof | h :: _ -> h let shift (ctx:ctx) = match ctx.tokens with | [] -> ctx | _ :: t -> { ctx with tokens = t ; position = ctx.position + 1 } exception ParseError of failure let stream_of_list lst = { tokens = lst ; position = 0 ; best = (-1,[]) } let add_failure (failure:failure) (ctx:ctx) = if fst failure < fst ctx.best then ctx else if fst failure > fst ctx.best then { ctx with best = failure } else { ctx with best = fst failure, snd failure @ snd ctx.best } let fail fail (ctx:ctx) = raise (ParseError (add_failure (ctx.position,[FailStart fail]) ctx) .best) let fail_close fail (old:ctx) (ctx:ctx) = raise (ParseError (add_failure (ctx.position,[FailEnd (fail,old.position)]) ctx) .best) let expect (t,e) (ctx:ctx) = if t (next ctx) then shift ctx else fail e ctx let expect_close (t,e) old ctx = if t (next ctx) then shift ctx else fail_close e old ctx let between (o,oe) (r:'a rule) (c,ce) = (fun ctx -> let s = expect (o,oe) ctx in let d, s = r s in let s = expect_close (c,ce) ctx s in d, s : 'a rule) let one_of (list:'a rule list) = let rec aux lst ctx = match lst with | [] -> raise (ParseError ctx.best) | r :: t -> try r ctx with ParseError fail -> aux t (add_failure fail ctx) in (aux list : 'a rule) let many (rule : 'a rule) = let rec aux ctx = try let d, ctx = rule ctx in let l, ctx = aux ctx in d :: l, ctx with ParseError fail -> [], add_failure fail ctx in (aux : 'a list rule) type 'a wc = 'a option ref let get (w:'a wc) (r:'a rule) (ctx:ctx) = let d, ctx = r ctx in w := Some d; ctx exception NotBound of int let tuple1 f = let r1 = (ref None : 'a wc) in let ctx = f r1 in match !r1 with None -> raise (NotBound 1) | Some x -> x, (ctx:ctx) let tuple2 f = let r1 = (ref None : 'a wc) in let r2 = (ref None : 'b wc) in let ctx = f (r1,r2) in let x1 = match !r1 with | None -> raise (NotBound 1) | Some x -> x and x2 = match !r2 with | None -> raise (NotBound 2) | Some x -> x in (x1,x2), (ctx:ctx) let tuple3 f = let r1 = (ref None : 'a wc) in let r2 = (ref None : 'b wc) in let r3 = (ref None : 'c wc) in let ctx = f (r1,r2,r3) in let x1 = match !r1 with | None -> raise (NotBound 1) | Some x -> x and x2 = match !r2 with | None -> raise (NotBound 2) | Some x -> x and x3 = match !r3 with | None -> raise (NotBound 3) | Some x -> x in (x1,x2,x3), (ctx:ctx) end module Tokens = struct type token = | Eof | Value of int | Plus | Open | Close type error = string let string_of_error e = e let eof = Eof end module MyParser = RecursiveDescentParser(Tokens);; type expr = Int of int | Add of expr * expr let rec print = function | Int i -> print_int i | Add (a,b) -> print_string "("; print a; print_string "+"; print b; print_string ")" let (>>) x f = f x let token_plus = ((=) Tokens.Plus, "'+'") let token_open = ((=) Tokens.Open, "'('") let token_close = ((=) Tokens.Close, "')'") let token_eof = ((=) Tokens.Eof, "") let rec ruleValue ctx = match MyParser.next ctx with | Tokens.Value i -> Int i, MyParser.shift ctx | _ -> MyParser.fail "" ctx and ruleAdd ctx = let (a,b) , ctx = MyParser.tuple2 (fun (a,b) -> ctx >> MyParser.get a ruleValue >> MyParser.expect token_plus >> MyParser.get b ruleExpr) in Add (a,b), ctx and (ruleExpr : expr MyParser.rule) = fun ctx -> MyParser.one_of [ ruleAdd ; ruleValue ; MyParser.between token_open ruleExpr token_close ] ctx and ruleProgram ctx = let l, ctx = ruleExpr ctx in let _ = MyParser.expect token_eof ctx in l let _ = try let e = ruleProgram (MyParser.stream_of_list [ Tokens.Value 10; Tokens.Plus; Tokens.Open; Tokens.Value 20; Tokens.Plus; Tokens.Value 10; Tokens.Close; ]) in print e; print_newline () with MyParser.ParseError e -> print_endline (MyParser.pretty_print e)