UTop_lexer: support combined glyph

This commit is contained in:
ZAN DoYe 2019-04-27 16:21:36 +08:00
parent fa6f1ee90f
commit dd1f834985
1 changed files with 70 additions and 26 deletions

View File

@ -115,13 +115,13 @@ rule tokens syntax context idx acc = parse
tokens syntax context loc.idx2 ((tok, loc) :: acc) lexbuf }
| '"'
{ let ofs = lexeme_start lexbuf in
let idx2, terminated = string (idx + 1) lexbuf in
let idx2, terminated = string (idx + 1) false lexbuf in
let loc = mkloc idx idx2 ofs (lexeme_end lexbuf) in
tokens syntax context idx2 ((String (1, terminated), loc) :: acc) lexbuf }
| '{' (lowercase* as tag) '|'
{ let ofs = lexeme_start lexbuf in
let delim_len = String.length tag + 2 in
let idx2, terminated = quoted_string (idx + delim_len) tag lexbuf in
let idx2, terminated = quoted_string (idx + delim_len) tag false lexbuf in
let loc = mkloc idx idx2 ofs (lexeme_end lexbuf) in
tokens syntax context idx2 ((String (delim_len, terminated), loc) :: acc) lexbuf }
| "'" [^'\'' '\\'] "'"
@ -143,12 +143,12 @@ rule tokens syntax context idx acc = parse
tokens syntax context loc.idx2 ((Comment (Comment_doc, true), loc) :: acc) lexbuf }
| "(**"
{ let ofs = lexeme_start lexbuf in
let idx2, terminated = comment (idx + 3) 0 lexbuf in
let idx2, terminated = comment (idx + 3) 0 false lexbuf in
let loc = mkloc idx idx2 ofs (lexeme_end lexbuf) in
tokens syntax context idx2 ((Comment (Comment_doc, terminated), loc) :: acc) lexbuf }
| "(*"
{ let ofs = lexeme_start lexbuf in
let idx2, terminated = comment (idx + 2) 0 lexbuf in
let idx2, terminated = comment (idx + 2) 0 false lexbuf in
let loc = mkloc idx idx2 ofs (lexeme_end lexbuf) in
tokens syntax context idx2 ((Comment (Comment_reg, terminated), loc) :: acc) lexbuf }
| ""
@ -180,7 +180,7 @@ and symbol syntax context idx acc = parse
and camlp4_toplevel syntax context idx acc = parse
| '<' (':' ident)? ('@' lident)? '<'
{ let ofs = lexeme_start lexbuf in
let idx2, items, terminated = quotation syntax 0 idx (idx + lexeme_size lexbuf) (lexeme_start lexbuf) lexbuf in
let idx2, items, terminated = quotation syntax 0 idx (idx + lexeme_size lexbuf) (lexeme_start lexbuf) false lexbuf in
let ofs2 = lexeme_end lexbuf in
tokens syntax context idx2
((Quotation (items, terminated), mkloc idx idx2 ofs ofs2) :: acc)
@ -194,56 +194,89 @@ and camlp4_antiquot syntax context idx acc = parse
| ""
{ camlp4_toplevel syntax context idx acc lexbuf }
and comment idx depth = parse
and comment idx depth combining= parse
| "(*"
{ comment (idx + 2) (depth + 1) lexbuf }
{ comment (idx + 2) (depth + 1) false lexbuf }
| "*)"
{ if depth = 0 then
(idx + 2, true)
else
comment (idx + 2) (depth - 1) lexbuf }
comment (idx + 2) (depth - 1) false lexbuf }
| '"'
{ let idx, terminated = string (idx + 1) lexbuf in
{ let idx, terminated = string (idx + 1) false lexbuf in
if terminated then
comment idx depth lexbuf
comment idx depth false lexbuf
else
(idx, false) }
| uchar
{ comment (idx + 1) depth lexbuf }
| uchar as uchar
{ let uChar= Zed_utf8.unsafe_extract uchar 0 in
if not combining then
if Zed_char.is_combining_mark uChar then
comment (idx + 1) depth false lexbuf
else
comment (idx + 1) depth true lexbuf
else
if Zed_char.is_combining_mark uChar then
comment idx depth true lexbuf
else
comment (idx + 1) depth true lexbuf
}
| eof
{ (idx, false) }
and string idx = parse
and string idx combining= parse
| '"'
{ (idx + 1, true) }
| "\\\""
{ string (idx + 2) lexbuf }
| uchar
{ string (idx + 1) lexbuf }
{ string (idx + 2) false lexbuf }
| uchar as uchar
{ let uChar= Zed_utf8.unsafe_extract uchar 0 in
if not combining then
if Zed_char.is_combining_mark uChar then
string (idx + 1) false lexbuf
else
string (idx + 1) true lexbuf
else
if Zed_char.is_combining_mark uChar then
string idx true lexbuf
else
string (idx + 1) true lexbuf
}
| eof
{ (idx, false) }
and quoted_string idx tag = parse
and quoted_string idx tag combining= parse
| '|' (lowercase* as tag2) '}'
{ let idx = idx + 2 + String.length tag2 in
if tag = tag2 then
(idx, true)
else
quoted_string idx tag lexbuf }
quoted_string idx tag false lexbuf }
| eof
{ (idx, false) }
| uchar
{ quoted_string (idx + 1) tag lexbuf }
| uchar as uchar
{ let uChar= Zed_utf8.unsafe_extract uchar 0 in
if not combining then
if Zed_char.is_combining_mark uChar then
quoted_string (idx + 1) tag false lexbuf
else
quoted_string (idx + 1) tag true lexbuf
else
if Zed_char.is_combining_mark uChar then
quoted_string idx tag true lexbuf
else
quoted_string (idx + 1) tag true lexbuf
}
and quotation syntax depth idx1 idx2 ofs1 = parse
and quotation syntax depth idx1 idx2 ofs1 combining= parse
| '<' (':' ident)? ('@' lident)? '<'
{ quotation syntax (depth + 1) idx1 (idx2 + lexeme_size lexbuf) ofs1 lexbuf }
{ quotation syntax (depth + 1) idx1 (idx2 + lexeme_size lexbuf) ofs1 false lexbuf }
| ">>"
{ if depth = 0 then
let loc = mkloc idx1 (idx2 + 2) ofs1 (lexeme_end lexbuf) in
(idx2 + 2, [(Quot_data, loc)], true)
else
quotation syntax (depth - 1) idx1 (idx2 + 2) ofs1 lexbuf }
quotation syntax (depth - 1) idx1 (idx2 + 2) ofs1 false lexbuf }
| '$'
{ let quot_data_loc =
if idx1 = idx2 then
@ -262,15 +295,26 @@ and quotation syntax depth idx1 idx2 ofs1 = parse
} in
let ofs = lexeme_end lexbuf in
let loc = mkloc opening_loc.idx1 idx opening_loc.ofs2 ofs in
let idx, quot_items, terminated = quotation syntax depth idx idx ofs lexbuf in
let idx, quot_items, terminated = quotation syntax depth idx idx ofs false lexbuf in
let quot_items = (Quot_anti anti, loc) :: quot_items in
match quot_data_loc with
| Some loc ->
(idx, (Quot_data, loc) :: quot_items, terminated)
| None ->
(idx, quot_items, terminated) }
| uchar
{ quotation syntax depth idx1 (idx2 + 1) ofs1 lexbuf }
| uchar as uchar
{ let uChar= Zed_utf8.unsafe_extract uchar 0 in
if not combining then
if Zed_char.is_combining_mark uChar then
quotation syntax depth idx1 (idx2 + 1) ofs1 false lexbuf
else
quotation syntax depth idx1 (idx2 + 1) ofs1 true lexbuf
else
if Zed_char.is_combining_mark uChar then
quotation syntax depth idx1 idx2 ofs1 true lexbuf
else
quotation syntax depth idx1 (idx2 + 1) ofs1 true lexbuf
}
| eof
{ if idx1 = idx2 then
(idx2, [], false)