UTop_lexer: support combined glyph
This commit is contained in:
parent
fa6f1ee90f
commit
dd1f834985
|
@ -115,13 +115,13 @@ rule tokens syntax context idx acc = parse
|
||||||
tokens syntax context loc.idx2 ((tok, loc) :: acc) lexbuf }
|
tokens syntax context loc.idx2 ((tok, loc) :: acc) lexbuf }
|
||||||
| '"'
|
| '"'
|
||||||
{ let ofs = lexeme_start lexbuf in
|
{ let ofs = lexeme_start lexbuf in
|
||||||
let idx2, terminated = string (idx + 1) lexbuf in
|
let idx2, terminated = string (idx + 1) false lexbuf in
|
||||||
let loc = mkloc idx idx2 ofs (lexeme_end lexbuf) in
|
let loc = mkloc idx idx2 ofs (lexeme_end lexbuf) in
|
||||||
tokens syntax context idx2 ((String (1, terminated), loc) :: acc) lexbuf }
|
tokens syntax context idx2 ((String (1, terminated), loc) :: acc) lexbuf }
|
||||||
| '{' (lowercase* as tag) '|'
|
| '{' (lowercase* as tag) '|'
|
||||||
{ let ofs = lexeme_start lexbuf in
|
{ let ofs = lexeme_start lexbuf in
|
||||||
let delim_len = String.length tag + 2 in
|
let delim_len = String.length tag + 2 in
|
||||||
let idx2, terminated = quoted_string (idx + delim_len) tag lexbuf in
|
let idx2, terminated = quoted_string (idx + delim_len) tag false lexbuf in
|
||||||
let loc = mkloc idx idx2 ofs (lexeme_end lexbuf) in
|
let loc = mkloc idx idx2 ofs (lexeme_end lexbuf) in
|
||||||
tokens syntax context idx2 ((String (delim_len, terminated), loc) :: acc) lexbuf }
|
tokens syntax context idx2 ((String (delim_len, terminated), loc) :: acc) lexbuf }
|
||||||
| "'" [^'\'' '\\'] "'"
|
| "'" [^'\'' '\\'] "'"
|
||||||
|
@ -143,12 +143,12 @@ rule tokens syntax context idx acc = parse
|
||||||
tokens syntax context loc.idx2 ((Comment (Comment_doc, true), loc) :: acc) lexbuf }
|
tokens syntax context loc.idx2 ((Comment (Comment_doc, true), loc) :: acc) lexbuf }
|
||||||
| "(**"
|
| "(**"
|
||||||
{ let ofs = lexeme_start lexbuf in
|
{ let ofs = lexeme_start lexbuf in
|
||||||
let idx2, terminated = comment (idx + 3) 0 lexbuf in
|
let idx2, terminated = comment (idx + 3) 0 false lexbuf in
|
||||||
let loc = mkloc idx idx2 ofs (lexeme_end lexbuf) in
|
let loc = mkloc idx idx2 ofs (lexeme_end lexbuf) in
|
||||||
tokens syntax context idx2 ((Comment (Comment_doc, terminated), loc) :: acc) lexbuf }
|
tokens syntax context idx2 ((Comment (Comment_doc, terminated), loc) :: acc) lexbuf }
|
||||||
| "(*"
|
| "(*"
|
||||||
{ let ofs = lexeme_start lexbuf in
|
{ let ofs = lexeme_start lexbuf in
|
||||||
let idx2, terminated = comment (idx + 2) 0 lexbuf in
|
let idx2, terminated = comment (idx + 2) 0 false lexbuf in
|
||||||
let loc = mkloc idx idx2 ofs (lexeme_end lexbuf) in
|
let loc = mkloc idx idx2 ofs (lexeme_end lexbuf) in
|
||||||
tokens syntax context idx2 ((Comment (Comment_reg, terminated), loc) :: acc) lexbuf }
|
tokens syntax context idx2 ((Comment (Comment_reg, terminated), loc) :: acc) lexbuf }
|
||||||
| ""
|
| ""
|
||||||
|
@ -180,7 +180,7 @@ and symbol syntax context idx acc = parse
|
||||||
and camlp4_toplevel syntax context idx acc = parse
|
and camlp4_toplevel syntax context idx acc = parse
|
||||||
| '<' (':' ident)? ('@' lident)? '<'
|
| '<' (':' ident)? ('@' lident)? '<'
|
||||||
{ let ofs = lexeme_start lexbuf in
|
{ let ofs = lexeme_start lexbuf in
|
||||||
let idx2, items, terminated = quotation syntax 0 idx (idx + lexeme_size lexbuf) (lexeme_start lexbuf) lexbuf in
|
let idx2, items, terminated = quotation syntax 0 idx (idx + lexeme_size lexbuf) (lexeme_start lexbuf) false lexbuf in
|
||||||
let ofs2 = lexeme_end lexbuf in
|
let ofs2 = lexeme_end lexbuf in
|
||||||
tokens syntax context idx2
|
tokens syntax context idx2
|
||||||
((Quotation (items, terminated), mkloc idx idx2 ofs ofs2) :: acc)
|
((Quotation (items, terminated), mkloc idx idx2 ofs ofs2) :: acc)
|
||||||
|
@ -194,56 +194,89 @@ and camlp4_antiquot syntax context idx acc = parse
|
||||||
| ""
|
| ""
|
||||||
{ camlp4_toplevel syntax context idx acc lexbuf }
|
{ camlp4_toplevel syntax context idx acc lexbuf }
|
||||||
|
|
||||||
and comment idx depth = parse
|
and comment idx depth combining= parse
|
||||||
| "(*"
|
| "(*"
|
||||||
{ comment (idx + 2) (depth + 1) lexbuf }
|
{ comment (idx + 2) (depth + 1) false lexbuf }
|
||||||
| "*)"
|
| "*)"
|
||||||
{ if depth = 0 then
|
{ if depth = 0 then
|
||||||
(idx + 2, true)
|
(idx + 2, true)
|
||||||
else
|
else
|
||||||
comment (idx + 2) (depth - 1) lexbuf }
|
comment (idx + 2) (depth - 1) false lexbuf }
|
||||||
| '"'
|
| '"'
|
||||||
{ let idx, terminated = string (idx + 1) lexbuf in
|
{ let idx, terminated = string (idx + 1) false lexbuf in
|
||||||
if terminated then
|
if terminated then
|
||||||
comment idx depth lexbuf
|
comment idx depth false lexbuf
|
||||||
else
|
else
|
||||||
(idx, false) }
|
(idx, false) }
|
||||||
| uchar
|
| uchar as uchar
|
||||||
{ comment (idx + 1) depth lexbuf }
|
{ let uChar= Zed_utf8.unsafe_extract uchar 0 in
|
||||||
|
if not combining then
|
||||||
|
if Zed_char.is_combining_mark uChar then
|
||||||
|
comment (idx + 1) depth false lexbuf
|
||||||
|
else
|
||||||
|
comment (idx + 1) depth true lexbuf
|
||||||
|
else
|
||||||
|
if Zed_char.is_combining_mark uChar then
|
||||||
|
comment idx depth true lexbuf
|
||||||
|
else
|
||||||
|
comment (idx + 1) depth true lexbuf
|
||||||
|
}
|
||||||
| eof
|
| eof
|
||||||
{ (idx, false) }
|
{ (idx, false) }
|
||||||
|
|
||||||
and string idx = parse
|
and string idx combining= parse
|
||||||
| '"'
|
| '"'
|
||||||
{ (idx + 1, true) }
|
{ (idx + 1, true) }
|
||||||
| "\\\""
|
| "\\\""
|
||||||
{ string (idx + 2) lexbuf }
|
{ string (idx + 2) false lexbuf }
|
||||||
| uchar
|
| uchar as uchar
|
||||||
{ string (idx + 1) lexbuf }
|
{ let uChar= Zed_utf8.unsafe_extract uchar 0 in
|
||||||
|
if not combining then
|
||||||
|
if Zed_char.is_combining_mark uChar then
|
||||||
|
string (idx + 1) false lexbuf
|
||||||
|
else
|
||||||
|
string (idx + 1) true lexbuf
|
||||||
|
else
|
||||||
|
if Zed_char.is_combining_mark uChar then
|
||||||
|
string idx true lexbuf
|
||||||
|
else
|
||||||
|
string (idx + 1) true lexbuf
|
||||||
|
}
|
||||||
| eof
|
| eof
|
||||||
{ (idx, false) }
|
{ (idx, false) }
|
||||||
|
|
||||||
and quoted_string idx tag = parse
|
and quoted_string idx tag combining= parse
|
||||||
| '|' (lowercase* as tag2) '}'
|
| '|' (lowercase* as tag2) '}'
|
||||||
{ let idx = idx + 2 + String.length tag2 in
|
{ let idx = idx + 2 + String.length tag2 in
|
||||||
if tag = tag2 then
|
if tag = tag2 then
|
||||||
(idx, true)
|
(idx, true)
|
||||||
else
|
else
|
||||||
quoted_string idx tag lexbuf }
|
quoted_string idx tag false lexbuf }
|
||||||
| eof
|
| eof
|
||||||
{ (idx, false) }
|
{ (idx, false) }
|
||||||
| uchar
|
| uchar as uchar
|
||||||
{ quoted_string (idx + 1) tag lexbuf }
|
{ let uChar= Zed_utf8.unsafe_extract uchar 0 in
|
||||||
|
if not combining then
|
||||||
|
if Zed_char.is_combining_mark uChar then
|
||||||
|
quoted_string (idx + 1) tag false lexbuf
|
||||||
|
else
|
||||||
|
quoted_string (idx + 1) tag true lexbuf
|
||||||
|
else
|
||||||
|
if Zed_char.is_combining_mark uChar then
|
||||||
|
quoted_string idx tag true lexbuf
|
||||||
|
else
|
||||||
|
quoted_string (idx + 1) tag true lexbuf
|
||||||
|
}
|
||||||
|
|
||||||
and quotation syntax depth idx1 idx2 ofs1 = parse
|
and quotation syntax depth idx1 idx2 ofs1 combining= parse
|
||||||
| '<' (':' ident)? ('@' lident)? '<'
|
| '<' (':' ident)? ('@' lident)? '<'
|
||||||
{ quotation syntax (depth + 1) idx1 (idx2 + lexeme_size lexbuf) ofs1 lexbuf }
|
{ quotation syntax (depth + 1) idx1 (idx2 + lexeme_size lexbuf) ofs1 false lexbuf }
|
||||||
| ">>"
|
| ">>"
|
||||||
{ if depth = 0 then
|
{ if depth = 0 then
|
||||||
let loc = mkloc idx1 (idx2 + 2) ofs1 (lexeme_end lexbuf) in
|
let loc = mkloc idx1 (idx2 + 2) ofs1 (lexeme_end lexbuf) in
|
||||||
(idx2 + 2, [(Quot_data, loc)], true)
|
(idx2 + 2, [(Quot_data, loc)], true)
|
||||||
else
|
else
|
||||||
quotation syntax (depth - 1) idx1 (idx2 + 2) ofs1 lexbuf }
|
quotation syntax (depth - 1) idx1 (idx2 + 2) ofs1 false lexbuf }
|
||||||
| '$'
|
| '$'
|
||||||
{ let quot_data_loc =
|
{ let quot_data_loc =
|
||||||
if idx1 = idx2 then
|
if idx1 = idx2 then
|
||||||
|
@ -262,15 +295,26 @@ and quotation syntax depth idx1 idx2 ofs1 = parse
|
||||||
} in
|
} in
|
||||||
let ofs = lexeme_end lexbuf in
|
let ofs = lexeme_end lexbuf in
|
||||||
let loc = mkloc opening_loc.idx1 idx opening_loc.ofs2 ofs in
|
let loc = mkloc opening_loc.idx1 idx opening_loc.ofs2 ofs in
|
||||||
let idx, quot_items, terminated = quotation syntax depth idx idx ofs lexbuf in
|
let idx, quot_items, terminated = quotation syntax depth idx idx ofs false lexbuf in
|
||||||
let quot_items = (Quot_anti anti, loc) :: quot_items in
|
let quot_items = (Quot_anti anti, loc) :: quot_items in
|
||||||
match quot_data_loc with
|
match quot_data_loc with
|
||||||
| Some loc ->
|
| Some loc ->
|
||||||
(idx, (Quot_data, loc) :: quot_items, terminated)
|
(idx, (Quot_data, loc) :: quot_items, terminated)
|
||||||
| None ->
|
| None ->
|
||||||
(idx, quot_items, terminated) }
|
(idx, quot_items, terminated) }
|
||||||
| uchar
|
| uchar as uchar
|
||||||
{ quotation syntax depth idx1 (idx2 + 1) ofs1 lexbuf }
|
{ let uChar= Zed_utf8.unsafe_extract uchar 0 in
|
||||||
|
if not combining then
|
||||||
|
if Zed_char.is_combining_mark uChar then
|
||||||
|
quotation syntax depth idx1 (idx2 + 1) ofs1 false lexbuf
|
||||||
|
else
|
||||||
|
quotation syntax depth idx1 (idx2 + 1) ofs1 true lexbuf
|
||||||
|
else
|
||||||
|
if Zed_char.is_combining_mark uChar then
|
||||||
|
quotation syntax depth idx1 idx2 ofs1 true lexbuf
|
||||||
|
else
|
||||||
|
quotation syntax depth idx1 (idx2 + 1) ofs1 true lexbuf
|
||||||
|
}
|
||||||
| eof
|
| eof
|
||||||
{ if idx1 = idx2 then
|
{ if idx1 = idx2 then
|
||||||
(idx2, [], false)
|
(idx2, [], false)
|
||||||
|
|
Loading…
Reference in New Issue