164 lines
4.1 KiB
OCaml
164 lines
4.1 KiB
OCaml
(*
|
|
* uTop_lexer.mll
|
|
* --------------
|
|
* Copyright : (c) 2011, Jeremie Dimino <jeremie@dimino.org>
|
|
* Licence : BSD3
|
|
*
|
|
* This file is a part of utop.
|
|
*)
|
|
|
|
(* Lexer for the OCaml language. *)
|
|
|
|
{
|
|
open UTop_token
|
|
}
|
|
|
|
let uchar = ['\x00' - '\x7f'] | _ [ '\x80' - '\xbf' ]*
|
|
|
|
let blank = [' ' '\009' '\012']
|
|
let lowercase = ['a'-'z' '\223'-'\246' '\248'-'\255' '_']
|
|
let uppercase = ['A'-'Z' '\192'-'\214' '\216'-'\222']
|
|
let identchar =
|
|
['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' '\'' '0'-'9']
|
|
let ident = (lowercase|uppercase) identchar*
|
|
let locname = ident
|
|
let not_star_symbolchar =
|
|
['$' '!' '%' '&' '+' '-' '.' '/' ':' '<' '=' '>' '?' '@' '^' '|' '~' '\\']
|
|
let symbolchar = '*' | not_star_symbolchar
|
|
let quotchar =
|
|
['!' '%' '&' '+' '-' '.' '/' ':' '=' '?' '@' '^' '|' '~' '\\' '*']
|
|
let hexa_char = ['0'-'9' 'A'-'F' 'a'-'f']
|
|
let decimal_literal =
|
|
['0'-'9'] ['0'-'9' '_']*
|
|
let hex_literal =
|
|
'0' ['x' 'X'] hexa_char ['0'-'9' 'A'-'F' 'a'-'f' '_']*
|
|
let oct_literal =
|
|
'0' ['o' 'O'] ['0'-'7'] ['0'-'7' '_']*
|
|
let bin_literal =
|
|
'0' ['b' 'B'] ['0'-'1'] ['0'-'1' '_']*
|
|
let int_literal =
|
|
decimal_literal | hex_literal | oct_literal | bin_literal
|
|
let float_literal =
|
|
['0'-'9'] ['0'-'9' '_']*
|
|
('.' ['0'-'9' '_']* )?
|
|
(['e' 'E'] ['+' '-']? ['0'-'9'] ['0'-'9' '_']*)?
|
|
|
|
let safe_delimchars = ['%' '&' '/' '@' '^']
|
|
|
|
let delimchars = safe_delimchars | ['|' '<' '>' ':' '=' '.']
|
|
|
|
let left_delims = ['(' '[' '{']
|
|
let right_delims = [')' ']' '}']
|
|
|
|
let left_delimitor =
|
|
left_delims delimchars* safe_delimchars (delimchars|left_delims)*
|
|
| '(' (['|' ':'] delimchars*)?
|
|
| '[' ['|' ':']?
|
|
| ['[' '{'] delimchars* '<'
|
|
| '{' (['|' ':'] delimchars*)?
|
|
|
|
let right_delimitor =
|
|
(delimchars|right_delims)* safe_delimchars (delimchars|right_delims)* right_delims
|
|
| (delimchars* ['|' ':'])? ')'
|
|
| ['|' ':']? ']'
|
|
| '>' delimchars* [']' '}']
|
|
| (delimchars* ['|' ':'])? '}'
|
|
|
|
rule token = parse
|
|
| ('\n' | blank)+
|
|
{ Blanks }
|
|
| "true"
|
|
{ Constant }
|
|
| "false"
|
|
{ Constant }
|
|
| lowercase identchar*
|
|
{ Lident }
|
|
| uppercase identchar*
|
|
{ Uident }
|
|
| int_literal "l"
|
|
{ Constant }
|
|
| int_literal "L"
|
|
{ Constant }
|
|
| int_literal "n"
|
|
{ Constant }
|
|
| int_literal
|
|
{ Constant }
|
|
| float_literal
|
|
{ Constant }
|
|
| '"'
|
|
{ String (string lexbuf) }
|
|
| "'" [^'\'' '\\'] "'"
|
|
{ Char }
|
|
| "'\\" ['\\' '"' 'n' 't' 'b' 'r' ' ' '\'' 'x' '0'-'9'] eof
|
|
{ Char }
|
|
| "'\\" ['\\' '"' 'n' 't' 'b' 'r' ' ' '\''] "'"
|
|
{ Char }
|
|
| "'\\" (['0'-'9'] ['0'-'9'] | 'x' hexa_char) eof
|
|
{ Char }
|
|
| "'\\" (['0'-'9'] ['0'-'9'] ['0'-'9'] | 'x' hexa_char hexa_char) eof
|
|
{ Char }
|
|
| "'\\" (['0'-'9'] ['0'-'9'] ['0'-'9'] | 'x' hexa_char hexa_char) "'"
|
|
{ Char }
|
|
| "'\\" uchar
|
|
{ Error }
|
|
| "(**"
|
|
{ Doc (comment 0 lexbuf) }
|
|
| "(*"
|
|
{ Comment (comment 0 lexbuf) }
|
|
| '<' (':' ident)? ('@' locname)? '<'
|
|
{ Quotation (quotation lexbuf) }
|
|
| ( "#" | "`" | "'" | "," | "." | ".." | ":" | "::"
|
|
| ":=" | ":>" | ";" | ";;" | "_"
|
|
| left_delimitor | right_delimitor )
|
|
{ Symbol }
|
|
| ['~' '?' '!' '=' '<' '>' '|' '&' '@' '^' '+' '-' '*' '/' '%' '\\' '$'] symbolchar*
|
|
{ Symbol }
|
|
| uchar
|
|
{ Error }
|
|
| eof
|
|
{ raise End_of_file }
|
|
|
|
and comment depth = parse
|
|
| "(*"
|
|
{ comment (depth + 1) lexbuf }
|
|
| "*)"
|
|
{ if depth > 0 then comment (depth - 1) lexbuf else true }
|
|
| uchar
|
|
{ comment depth lexbuf }
|
|
| eof
|
|
{ false }
|
|
|
|
and string = parse
|
|
| '"'
|
|
{ true }
|
|
| "\\\""
|
|
{ string lexbuf }
|
|
| uchar
|
|
{ string lexbuf }
|
|
| eof
|
|
{ false }
|
|
|
|
and quotation = parse
|
|
| ">>"
|
|
{ true }
|
|
| uchar
|
|
{ quotation lexbuf }
|
|
| eof
|
|
{ false }
|
|
|
|
{
|
|
let lex_string str =
|
|
let lexbuf = Lexing.from_string str in
|
|
let rec loop idx ofs_a =
|
|
match try Some (token lexbuf) with End_of_file -> None with
|
|
| Some token ->
|
|
let ofs_b = Lexing.lexeme_end lexbuf in
|
|
let src = String.sub str ofs_a (ofs_b - ofs_a) in
|
|
let idx' = idx + Zed_utf8.length src in
|
|
(token, idx, idx', src) :: loop idx' ofs_b
|
|
| None ->
|
|
[]
|
|
in
|
|
loop 0 0
|
|
}
|