diff --git a/src/camlp4/uTop_camlp4.ml b/src/camlp4/uTop_camlp4.ml index 7453e16..b189c3e 100644 --- a/src/camlp4/uTop_camlp4.ml +++ b/src/camlp4/uTop_camlp4.ml @@ -65,7 +65,6 @@ let parse_toplevel_phrase str eos_is_error = UTop.Error (locs, msg) let () = - UTop.set_camlp4 true; UTop.parse_toplevel_phrase := parse_toplevel_phrase; (* Force camlp4 to display its welcome message. *) try diff --git a/src/lib/uTop.ml b/src/lib/uTop.ml index 742d644..be5c76f 100644 --- a/src/lib/uTop.ml +++ b/src/lib/uTop.ml @@ -59,7 +59,12 @@ let make_variable ?eq x = let signal, set = S.create ?eq x in (signal, (fun () -> S.value signal), set) -let camlp4, get_camlp4, set_camlp4 = make_variable true +type syntax = + | Normal + | Camlp4o + | Camlp4r + +let syntax, get_syntax, set_syntax = make_variable Normal let phrase_terminator, get_phrase_terminator, set_phrase_terminator = make_variable ";;" let auto_run_lwt, get_auto_run_lwt, set_auto_run_lwt = make_variable true @@ -455,6 +460,7 @@ let () = "camlp4o" (Toploop.Directive_none (fun () -> + set_syntax Camlp4o; set_phrase_terminator ";;"; try Topfind.syntax "camlp4o"; @@ -467,6 +473,7 @@ let () = "camlp4r" (Toploop.Directive_none (fun () -> + set_syntax Camlp4r; set_phrase_terminator ";"; try Topfind.syntax "camlp4r"; diff --git a/src/lib/uTop.mli b/src/lib/uTop.mli index 68120f1..8a061a5 100644 --- a/src/lib/uTop.mli +++ b/src/lib/uTop.mli @@ -29,16 +29,25 @@ type ui = Console | Emacs val get_ui : unit -> ui (** Returns the user interface in use. *) -val camlp4 : bool signal - (** [true] if the lexer should recognize camlp4 quotations. This - variable is automatically set to [true] when you type [#camlp4o] - or [#camlp4r]. *) +(** Syntax. *) +type syntax = + | Normal + (** No camlp4. *) + | Camlp4o + (** Camlp4, original syntax. *) + | Camlp4r + (** Camlp4, revised syntax. *) -val get_camlp4 : unit -> bool - (** Returns the current value of {!camlp4}. *) +val syntax : syntax signal + (** The syntax in use. If it is {!Camlp4o} or {!Camlp4r} quotations + are recognized. It is modified when you type [#camlp4o] or + [#camlp4r]. *) -val set_camlp4 : bool -> unit - (** Modifies {!camlp4}. *) +val get_syntax : unit -> syntax + (** Returns the current value of {!syntax}. *) + +val set_syntax : syntax -> unit + (** Modifies {!syntax}. *) val phrase_terminator : string signal (** The phrase terminator. It is ";;" by default and ";" when you diff --git a/src/lib/uTop_complete.ml b/src/lib/uTop_complete.ml index 6d88433..ca60d14 100644 --- a/src/lib/uTop_complete.ml +++ b/src/lib/uTop_complete.ml @@ -60,9 +60,9 @@ type value_or_field = Value | Field let parse_longident tokens = let rec loop acc tokens = match tokens with - | (Symbol, _, _, ".") :: (Uident, _, _, id) :: tokens -> + | (Symbol ".", _) :: (Uident id, _) :: tokens -> loop (id :: acc) tokens - | (Symbol, _, _, ".") :: (Lident, _, _, id) :: tokens -> + | (Symbol ".", _) :: (Lident id, _) :: tokens -> (Field, match acc with | [] -> None @@ -74,18 +74,18 @@ let parse_longident tokens = | l -> Some (longident_of_list l)) in match tokens with - | ((Comment false | Doc false | String false | Quotation false), _, _, _) :: _ -> + | ((Comment (_, false) | String false | Quotation (_, false)), _) :: _ -> (* An unterminated command, string, or quotation. *) None - | ((Uident | Lident), start, _, id) :: tokens -> + | ((Uident id | Lident id), { idx1 = start }) :: tokens -> (* An identifier. *) let kind, path = loop [] tokens in Some (kind, path, start, id) - | (Blanks, _, stop, _) :: tokens -> + | (Blanks, { idx2 = stop }) :: tokens -> (* Some blanks at the end. *) let kind, path = loop [] tokens in Some (kind, path, stop, "") - | (_, _, stop, _) :: _ -> + | (_, { idx2 = stop }) :: _ -> (* Otherwise complete after the last token. *) let kind, path = loop [] tokens in Some (kind, path, stop, "") @@ -98,7 +98,7 @@ let parse_method tokens = longidentifier. *) let rec loop_uidents acc tokens = match tokens with - | (Symbol, _, _, ".") :: (Uident, _, _, id) :: tokens -> + | (Symbol ".", _) :: (Uident id, _) :: tokens -> loop_uidents (id :: acc) tokens | _ -> longident_of_list acc @@ -106,21 +106,21 @@ let parse_method tokens = (* Collect [m1#m2# ... #mp] *) let rec loop_methods acc tokens = match tokens with - | (Lident, _, _, meth) :: (Symbol, _, _, "#") :: tokens -> + | (Lident meth, _) :: (Symbol "#", _) :: tokens -> loop_methods (meth :: acc) tokens - | (Lident, _, _, id) :: tokens -> + | (Lident id, _) :: tokens -> Some (loop_uidents [id] tokens, acc) | _ -> None in match tokens with - | (Lident, start, _, meth) :: (Symbol, _, _, "#") :: tokens -> begin + | (Lident meth, { idx1 = start }) :: (Symbol "#", _) :: tokens -> begin match loop_methods [] tokens with | None -> None | Some (path, meths) -> Some (path, meths, start, meth) end - | (Symbol, _, stop, "#") :: tokens - | (Blanks, _, stop, _) :: (Symbol, _, _, "#") :: tokens -> begin + | (Symbol "#", { idx2 = stop }) :: tokens + | (Blanks, { idx2 = stop }) :: (Symbol "#", _) :: tokens -> begin match loop_methods [] tokens with | None -> None | Some (path, meths) -> Some (path, meths, stop, "") @@ -140,21 +140,21 @@ let parse_label tokens = (* Collect [M1.M2. ... .Mn] *) let rec loop_uidents acc_uidents acc_methods tokens = match tokens with - | (Lident, _, _, "new") :: _ -> + | (Lident "new", _) :: _ -> Some (New, longident_of_list acc_uidents, acc_methods) - | ((Lident | Uident), _, _, id) :: _ when String_set.mem id !UTop.keywords -> + | ((Lident id | Uident id), _) :: _ when String_set.mem id !UTop.keywords -> Some (Fun, longident_of_list acc_uidents, acc_methods) - | (Symbol, _, _, ".") :: (Uident, _, _, id) :: tokens -> + | (Symbol ".", _) :: (Uident id, _) :: tokens -> loop_uidents (id :: acc_uidents) acc_methods tokens - | (Symbol, _, _, ("~" | "?" | ":" | "." | "#" | "!" | "`")) :: tokens -> + | (Symbol ("~" | "?" | ":" | "." | "#" | "!" | "`"), _) :: tokens -> search tokens - | (Symbol, _, _, ")") :: tokens -> + | (Symbol ")", _) :: tokens -> skip tokens "(" [] - | (Symbol, _, _, "}") :: tokens -> + | (Symbol "}", _) :: tokens -> skip tokens "{" [] - | (Symbol, _, _, "]") :: tokens -> + | (Symbol "]", _) :: tokens -> skip tokens "[" [] - | (Symbol, _, _, _) :: _ -> + | (Symbol _, _) :: _ -> Some (Fun, longident_of_list acc_uidents, acc_methods) | [] -> Some (Fun, longident_of_list acc_uidents, acc_methods) @@ -162,21 +162,21 @@ let parse_label tokens = search tokens and loop_methods acc tokens = match tokens with - | ((Lident | Uident), _, _, id) :: _ when String_set.mem id !UTop.keywords -> + | ((Lident id | Uident id), _) :: _ when String_set.mem id !UTop.keywords -> None - | (Symbol, _, _, ("~" | "?" | ":" | "." | "#" | "!" | "`")) :: tokens -> + | (Symbol ("~" | "?" | ":" | "." | "#" | "!" | "`"), _) :: tokens -> search tokens - | (Symbol, _, _, ")") :: tokens -> + | (Symbol ")", _) :: tokens -> skip tokens "(" [] - | (Symbol, _, _, "}") :: tokens -> + | (Symbol "}", _) :: tokens -> skip tokens "{" [] - | (Symbol, _, _, "]") :: tokens -> + | (Symbol "]", _) :: tokens -> skip tokens "[" [] - | (Symbol, _, _, _) :: _ -> + | (Symbol _, _) :: _ -> None - | (Lident, _, _, id) :: (Symbol, _, _, "#") :: tokens -> + | (Lident id, _) :: (Symbol "#", _) :: tokens -> loop_methods (id :: acc) tokens - | (Lident, _, _, id) :: tokens -> + | (Lident id, _) :: tokens -> loop_uidents [id] acc tokens | [] -> None @@ -184,21 +184,21 @@ let parse_label tokens = search tokens and search tokens = match tokens with - | ((Lident | Uident), _, _, id) :: _ when String_set.mem id !UTop.keywords -> + | ((Lident id | Uident id), _) :: _ when String_set.mem id !UTop.keywords -> None - | (Symbol, _, _, ("~" | "?" | ":" | "." | "#" | "!" | "`")) :: tokens -> + | (Symbol ("~" | "?" | ":" | "." | "#" | "!" | "`"), _) :: tokens -> search tokens - | (Symbol, _, _, ")") :: tokens -> + | (Symbol ")", _) :: tokens -> skip tokens "(" [] - | (Symbol, _, _, "}") :: tokens -> + | (Symbol "}", _) :: tokens -> skip tokens "{" [] - | (Symbol, _, _, "]") :: tokens -> + | (Symbol "]", _) :: tokens -> skip tokens "[" [] - | (Symbol, _, _, _) :: _ -> + | (Symbol _, _) :: _ -> None - | (Lident, _, _, id) :: (Symbol, _, _, "#") :: tokens -> + | (Lident id, _) :: (Symbol "#", _) :: tokens -> loop_methods [id] tokens - | (Lident, _, _, id) :: tokens -> + | (Lident id, _) :: tokens -> loop_uidents [id] [] tokens | _ :: tokens -> search tokens @@ -206,16 +206,16 @@ let parse_label tokens = None and skip tokens top stack = match tokens with - | (Symbol, _, _, symbol) :: tokens when symbol = top -> begin + | (Symbol symbol, _) :: tokens when symbol = top -> begin match stack with | [] -> search tokens | top :: stack -> skip tokens top stack end - | (Symbol, _, _, ")") :: tokens -> + | (Symbol ")", _) :: tokens -> skip tokens "(" (top :: stack) - | (Symbol, _, _, "}") :: tokens -> + | (Symbol "}", _) :: tokens -> skip tokens "{" (top :: stack) - | (Symbol, _, _, "]") :: tokens -> + | (Symbol "]", _) :: tokens -> skip tokens "[" (top :: stack) | _ :: tokens -> skip tokens top stack @@ -223,22 +223,22 @@ let parse_label tokens = None in match tokens with - | (Lident, start, _, label) :: (Symbol, _, _, "~") :: tokens -> begin + | (Lident label, { idx1 = start }) :: (Symbol "~", _) :: tokens -> begin match search tokens with | None -> None | Some (kind, id, meths) -> Some (kind, id, meths, Required, start, label) end - | (Symbol, _, stop, "~") :: tokens -> begin + | (Symbol "~", { idx2 = stop }) :: tokens -> begin match search tokens with | None -> None | Some (kind, id, meths) -> Some (kind, id, meths, Required, stop, "") end - | (Lident, start, _, label) :: (Symbol, _, _, "?") :: tokens -> begin + | (Lident label, { idx1 = start }) :: (Symbol "?", _) :: tokens -> begin match search tokens with | None -> None | Some (kind, id, meths) -> Some (kind, id, meths, Optional, start, label) end - | (Symbol, _, stop, "?") :: tokens -> begin + | (Symbol "?", { idx2 = stop }) :: tokens -> begin match search tokens with | None -> None | Some (kind, id, meths) -> Some (kind, id, meths, Optional, stop, "") @@ -250,13 +250,13 @@ let parse_label tokens = | Directive listing | +-----------------------------------------------------------------+ *) -let list_directives () = +let list_directives phrase_terminator = String_map.bindings (Hashtbl.fold (fun dir kind map -> let suffix = match kind with - | Toploop.Directive_none _ -> ";;" + | Toploop.Directive_none _ -> phrase_terminator | Toploop.Directive_string _ -> " \"" | Toploop.Directive_bool _ | Toploop.Directive_int _ | Toploop.Directive_ident _ -> " " in @@ -634,50 +634,79 @@ let labels_of_newclass longident = labels_of_type String_map.empty type_expr (* +-----------------------------------------------------------------+ - | Filtering | + | Tokens processing | +-----------------------------------------------------------------+ *) (* Filter blanks and comments except for the last token. *) let rec filter tokens = match tokens with | [] -> [] - | [((Blanks | Comment true | Doc true), start, stop, src)] -> [(Blanks, start, stop, src)] - | ((Blanks | Comment true | Doc true), _, _, _) :: rest -> filter rest + | [((Blanks | Comment (_, true)), loc)] -> [(Blanks, loc)] + | ((Blanks | Comment (_, true)), _) :: rest -> filter rest | x :: rest -> x :: filter rest +(* Reverse and filter blanks and comments except for the last + token. *) +let rec rev_filter acc tokens = + match tokens with + | [] -> acc + | [((Blanks | Comment (_, true)), loc)] -> (Blanks, loc) :: acc + | ((Blanks | Comment (_, true)), _) :: rest -> rev_filter acc rest + | x :: rest -> rev_filter (x :: acc) rest + +(* Find the current context. *) +let rec find_context tokens = function + | [] -> + Some (rev_filter [] tokens) + | [(Quotation (items, false), _)] -> + find_context_in_quotation items + | _ :: rest -> + find_context tokens rest + +and find_context_in_quotation = function + | [] -> + None + | [(Quot_anti { a_closing = None; a_contents = tokens }, _)] -> + find_context tokens tokens + | _ :: rest -> + find_context_in_quotation rest + (* +-----------------------------------------------------------------+ | Completion | +-----------------------------------------------------------------+ *) -let complete str = - let tokens = UTop_lexer.lex_string ~camlp4:(UTop.get_camlp4 ()) str in +let complete ~syntax ~phrase_terminator ~input = + let tokens = UTop_lexer.lex_string syntax input in (* Filter blanks and comments. *) let tokens = filter tokens in match tokens with (* Completion on directive names. *) - | [(Symbol, _, stop, "#")] - | [(Symbol, _, _, "#"); (Blanks, _, stop, _)] -> - (stop, list_directives ()) - | [(Symbol, _, _, "#"); ((Lident | Uident), start, _, src)] -> - (start, lookup_assoc src (list_directives ())) + | [(Symbol "#", { idx2 = stop })] + | [(Symbol "#", _); (Blanks, { idx2 = stop })] -> + (stop, list_directives phrase_terminator) + | [(Symbol "#", _); ((Lident src | Uident src), { idx1 = start })] -> + (start, lookup_assoc src (list_directives phrase_terminator)) (* Complete with ";;" when possible. *) - | [(Symbol, _, _, "#"); ((Lident | Uident), _, _, _); (String true, _, stop, _)] - | [(Symbol, _, _, "#"); ((Lident | Uident), _, _, _); (String true, _, _, _); (Blanks, _, stop, _)] -> - (stop, [(";;", "")]) - | [(Symbol, _, _, "#"); ((Lident | Uident), _, _, _); (String true, _, _, _); (Symbol, start, _, ";")] -> - (start, [(";;", "")]) + | [(Symbol "#", _); ((Lident _ | Uident _), _); (String true, { idx2 = stop })] + | [(Symbol "#", _); ((Lident _ | Uident _), _); (String true, _); (Blanks, { idx2 = stop })] -> + (stop, [(phrase_terminator, "")]) + | [(Symbol "#", _); ((Lident _ | Uident _), _); (String true, _); (Symbol sym, { idx1 = start })] -> + if Zed_utf8.starts_with phrase_terminator sym then + (start, [(phrase_terminator, "")]) + else + (0, []) (* Completion on #require. *) - | [(Symbol, _, _, "#"); (Lident, _, _, "require"); (String false, start, stop, str)] -> - let pkg = String.sub str 1 (String.length str - 1) in + | [(Symbol "#", _); (Lident "require", _); (String false, loc)] -> + let pkg = String.sub input (loc.ofs1 + 1) (String.length input - loc.ofs1 - 1) in let pkgs = lookup pkg (Fl_package_base.list_packages ()) in - (start + 1, List.map (fun pkg -> (pkg, "\";;")) (List.sort compare pkgs)) + (loc.idx1 + 1, List.map (fun pkg -> (pkg, "\"" ^ phrase_terminator)) (List.sort compare pkgs)) (* Completion on #load. *) - | [(Symbol, _, _, "#"); (Lident, _, _, "load"); (String false, start, stop, str)] -> - let file = String.sub str 1 (String.length str - 1) in + | [(Symbol "#", _); (Lident "load", _); (String false, loc)] -> + let file = String.sub input (loc.ofs1 + 1) (String.length input - loc.ofs1 - 1) in let filter name = Filename.check_suffix name ".cma" || Filename.check_suffix name ".cmo" in let map = if Filename.is_relative file then @@ -692,12 +721,12 @@ let complete str = let list = String_map.bindings map in let name = basename file in let result = lookup_assoc name list in - (stop - Zed_utf8.length name, - List.map (function (w, Directory) -> (w, "") | (w, File) -> (w, "\";;")) result) + (loc.idx2 - Zed_utf8.length name, + List.map (function (w, Directory) -> (w, "") | (w, File) -> (w, "\"" ^ phrase_terminator)) result) (* Completion on #use. *) - | [(Symbol, _, _, "#"); (Lident, _, _, "use"); (String false, start, stop, str)] -> - let file = String.sub str 1 (String.length str - 1) in + | [(Symbol "#", _); (Lident "use", _); (String false, loc)] -> + let file = String.sub input (loc.ofs1 + 1) (String.length input - loc.ofs1 - 1) in let filter name = match try Some (String.rindex name '.') with Not_found -> None with | None -> @@ -719,28 +748,28 @@ let complete str = let list = String_map.bindings map in let name = basename file in let result = lookup_assoc name list in - (stop - Zed_utf8.length name, - List.map (function (w, Directory) -> (w, "") | (w, File) -> (w, "\";;")) result) + (loc.idx2 - Zed_utf8.length name, + List.map (function (w, Directory) -> (w, "") | (w, File) -> (w, "\"" ^ phrase_terminator)) result) (* Completion on #directory and #cd. *) - | [(Symbol, _, _, "#"); (Lident, _, _, ("cd" | "directory")); (String false, start, stop, str)] -> - let file = String.sub str 1 (String.length str - 1) in + | [(Symbol "#", _); (Lident ("cd" | "directory"), _); (String false, loc)] -> + let file = String.sub input (loc.ofs1 + 1) (String.length input - loc.ofs1 - 1) in let list = list_directories (Filename.dirname file) in let name = basename file in let result = lookup name list in - (stop - Zed_utf8.length name, List.map (function dir -> (dir, "")) result) + (loc.idx2 - Zed_utf8.length name, List.map (function dir -> (dir, "")) result) (* Generic completion on directives. *) - | [(Symbol, _, _, "#"); ((Lident | Uident), _, _, dir); (Blanks, _, stop, _)] -> + | [(Symbol "#", _); ((Lident dir | Uident dir), _); (Blanks, { idx2 = stop })] -> (stop, match try Some (Hashtbl.find Toploop.directive_table dir) with Not_found -> None with - | Some (Toploop.Directive_none _) -> [(";;", "")] + | Some (Toploop.Directive_none _) -> [(phrase_terminator, "")] | Some (Toploop.Directive_string _) -> [(" \"", "")] - | Some (Toploop.Directive_bool _) -> [("true", ";;"); ("false", ";;")] + | Some (Toploop.Directive_bool _) -> [("true", phrase_terminator); ("false", phrase_terminator)] | Some (Toploop.Directive_int _) -> [] | Some (Toploop.Directive_ident _) -> List.map (fun w -> (w, "")) (String_set.elements (Lazy.force !global_names)) | None -> []) - | (Symbol, _, _, "#") :: ((Lident | Uident), _, _, dir) :: tokens -> begin + | (Symbol "#", _) :: ((Lident dir | Uident dir), _) :: tokens -> begin match try Some (Hashtbl.find Toploop.directive_table dir) with Not_found -> None with | Some (Toploop.Directive_none _) -> (0, []) @@ -748,8 +777,8 @@ let complete str = (0, []) | Some (Toploop.Directive_bool _) -> begin match tokens with - | [(Lident, start, _, id)] -> - (start, lookup_assoc id [("true", ";;"); ("false", ";;")]) + | [(Lident id, { idx1 = start })] -> + (start, lookup_assoc id [("true", phrase_terminator); ("false", phrase_terminator)]) | _ -> (0, []) end @@ -769,32 +798,35 @@ let complete str = end (* Completion on identifiers. *) - | [] -> - (0, List.map (fun w -> (w, "")) (String_set.elements (String_set.union !UTop.keywords (Lazy.force !global_names)))) | _ -> - let tokens = List.rev tokens in - match parse_method tokens with - | Some (longident, meths, start, meth) -> - (start, List.map (fun w -> (w, "")) (lookup meth (methods_of_object longident meths))) + match find_context tokens tokens with | None -> - match parse_label tokens with - | Some (Fun, longident, meths, Optional, start, label) -> - (start, List.map (fun (w, kind) -> (w, ":")) (lookup_assoc label (List.filter (function (w, Optional) -> true | (w, Required) -> false) (labels_of_function longident meths)))) - | Some (Fun, longident, meths, Required, start, label) -> - (start, List.map (fun (w, kind) -> (w, ":")) (lookup_assoc label (labels_of_function longident meths))) - | Some (New, longident, meths, Optional, start, label) -> - (start, List.map (fun (w, kind) -> (w, ":")) (lookup_assoc label (List.filter (function (w, Optional) -> true | (w, Required) -> false) (labels_of_newclass longident)))) - | Some (New, longident, meths, Required, start, label) -> - (start, List.map (fun (w, kind) -> (w, ":")) (lookup_assoc label (labels_of_newclass longident))) + (0, []) + | Some [] -> + (0, List.map (fun w -> (w, "")) (String_set.elements (String_set.union !UTop.keywords (Lazy.force !global_names)))) + | Some tokens -> + match parse_method tokens with + | Some (longident, meths, start, meth) -> + (start, List.map (fun w -> (w, "")) (lookup meth (methods_of_object longident meths))) | None -> - match parse_longident tokens with + match parse_label tokens with + | Some (Fun, longident, meths, Optional, start, label) -> + (start, List.map (fun (w, kind) -> (w, ":")) (lookup_assoc label (List.filter (function (w, Optional) -> true | (w, Required) -> false) (labels_of_function longident meths)))) + | Some (Fun, longident, meths, Required, start, label) -> + (start, List.map (fun (w, kind) -> (w, ":")) (lookup_assoc label (labels_of_function longident meths))) + | Some (New, longident, meths, Optional, start, label) -> + (start, List.map (fun (w, kind) -> (w, ":")) (lookup_assoc label (List.filter (function (w, Optional) -> true | (w, Required) -> false) (labels_of_newclass longident)))) + | Some (New, longident, meths, Required, start, label) -> + (start, List.map (fun (w, kind) -> (w, ":")) (lookup_assoc label (labels_of_newclass longident))) | None -> - (0, []) - | Some (Value, None, start, id) -> - (start, List.map (fun w -> (w, "")) (lookup id (String_set.elements (String_set.union !UTop.keywords (Lazy.force !global_names))))) - | Some (Value, Some longident, start, id) -> - (start, List.map (fun w -> (w, "")) (lookup id (String_set.elements (names_of_module longident)))) - | Some (Field, None, start, id) -> - (start, List.map (fun w -> (w, "")) (lookup id (String_set.elements (Lazy.force !global_fields)))) - | Some (Field, Some longident, start, id) -> - (start, List.map (fun w -> (w, "")) (lookup id (String_set.elements (fields_of_module longident)))) + match parse_longident tokens with + | None -> + (0, []) + | Some (Value, None, start, id) -> + (start, List.map (fun w -> (w, "")) (lookup id (String_set.elements (String_set.union !UTop.keywords (Lazy.force !global_names))))) + | Some (Value, Some longident, start, id) -> + (start, List.map (fun w -> (w, "")) (lookup id (String_set.elements (names_of_module longident)))) + | Some (Field, None, start, id) -> + (start, List.map (fun w -> (w, "")) (lookup id (String_set.elements (Lazy.force !global_fields)))) + | Some (Field, Some longident, start, id) -> + (start, List.map (fun w -> (w, "")) (lookup id (String_set.elements (fields_of_module longident)))) diff --git a/src/lib/uTop_complete.mli b/src/lib/uTop_complete.mli index 828b62b..f1604ea 100644 --- a/src/lib/uTop_complete.mli +++ b/src/lib/uTop_complete.mli @@ -9,9 +9,10 @@ (** OCaml completion. *) -val complete : string -> int * (string * string) list - (** [complete str] returns the start of the completed word in [str] - and the list of possible completions with their suffixes. *) +val complete : syntax : UTop.syntax -> phrase_terminator : string -> input : string -> int * (string * string) list + (** [complete ~syntax ~phrase_terminator ~input] returns the start + of the completed word in [input] and the list of possible + completions with their suffixes. *) val reset : unit -> unit (** Reset global cache. It must be called before each interactive diff --git a/src/lib/uTop_lexer.mli b/src/lib/uTop_lexer.mli index 534581d..c8de756 100644 --- a/src/lib/uTop_lexer.mli +++ b/src/lib/uTop_lexer.mli @@ -7,9 +7,6 @@ * This file is a part of utop. *) -val lex_string : ?camlp4 : bool -> string -> (UTop_token.t * int * int * string) list - (** [lex_string str] returns all the tokens contained in [str]. It - returns a list of [(token, start_index, stop_index, - contents)]. Indexes are in unicode characters. - - If [camlp4] is [true] then quotations are parsed. *) +val lex_string : UTop.syntax -> string -> (UTop_token.t * UTop_token.location) list + (** [lex_string syntax str] returns all the tokens contained in + [str]. *) diff --git a/src/lib/uTop_lexer.mll b/src/lib/uTop_lexer.mll index c3a3979..85eaafe 100644 --- a/src/lib/uTop_lexer.mll +++ b/src/lib/uTop_lexer.mll @@ -10,7 +10,40 @@ (* Lexer for the OCaml language. *) { + open Lexing open UTop_token + + (* Return the size in bytes. *) + let lexeme_size lexbuf = + lexeme_end lexbuf - lexeme_start lexbuf + + let mkloc idx1 idx2 ofs1 ofs2 = { + idx1 = idx1; + idx2 = idx2; + ofs1 = ofs1; + ofs2 = ofs2; + } + + (* Only for ascii-only lexemes. *) + let lexeme_loc idx lexbuf = + let ofs1 = lexeme_start lexbuf and ofs2 = lexeme_end lexbuf in + { + idx1 = idx; + idx2 = idx + (ofs2 - ofs1); + ofs1 = ofs1; + ofs2 = ofs2; + } + + let merge_loc l1 l2 = { + idx1 = l1.idx1; + idx2 = l2.idx2; + ofs1 = l1.ofs1; + ofs2 = l2.ofs2; + } + + type context = + | Toplevel + | Antiquot } let uchar = ['\x00' - '\x7f'] | _ [ '\x80' - '\xbf' ]* @@ -42,51 +75,98 @@ let float_literal = let symbolchar = ['!' '$' '%' '&' '*' '+' '-' '.' '/' ':' '<' '=' '>' '?' '@' '^' '|' '~'] -rule token fallback = parse +rule tokens syntax context idx = parse + | eof + { (idx, None, []) } | ('\n' | blank)+ - { Blanks } - | "true" - { Constant } - | "false" - { Constant } + { let loc = lexeme_loc idx lexbuf in + let idx, res, l = tokens syntax context loc.idx2 lexbuf in + (idx, res, (Blanks, loc) :: l) } | lident - { Lident } + { let src = lexeme lexbuf in + let loc = lexeme_loc idx lexbuf in + let tok = + match syntax, src with + | (UTop.Normal | UTop.Camlp4o), ("true" | "false") -> + Constant src + | _ -> + Lident src + in + let idx, res, l = tokens syntax context loc.idx2 lexbuf in + (idx, res, (tok, loc) :: l) } | uident - { Uident } + { let src = lexeme lexbuf in + let loc = lexeme_loc idx lexbuf in + let tok = + match syntax, src with + | UTop.Camlp4r, "True" -> + Constant "true" + | UTop.Camlp4r, "False" -> + Constant "false" + | _ -> + Uident src + in + let idx, res, l = tokens syntax context loc.idx2 lexbuf in + (idx, res, (tok, loc) :: l) } | int_literal "l" - { Constant } | int_literal "L" - { Constant } | int_literal "n" - { Constant } | int_literal - { Constant } | float_literal - { Constant } + { let loc = lexeme_loc idx lexbuf in + let tok = Constant (lexeme lexbuf) in + let idx, res, l = tokens syntax context loc.idx2 lexbuf in + (idx, res, (tok, loc) :: l) } | '"' - { String (string lexbuf) } + { let ofs = lexeme_start lexbuf in + let idx2, terminated = string (idx + 1) lexbuf in + let loc = mkloc idx idx2 ofs (lexeme_end lexbuf) in + let idx, res, l = tokens syntax context idx2 lexbuf in + (idx, res, (String terminated, loc) :: l) } | "'" [^'\'' '\\'] "'" - { Char } | "'\\" ['\\' '"' 'n' 't' 'b' 'r' ' ' '\'' 'x' '0'-'9'] eof - { Char } | "'\\" ['\\' '"' 'n' 't' 'b' 'r' ' ' '\''] "'" - { Char } | "'\\" (['0'-'9'] ['0'-'9'] | 'x' hexa_char) eof - { Char } | "'\\" (['0'-'9'] ['0'-'9'] ['0'-'9'] | 'x' hexa_char hexa_char) eof - { Char } | "'\\" (['0'-'9'] ['0'-'9'] ['0'-'9'] | 'x' hexa_char hexa_char) "'" - { Char } + { let loc = lexeme_loc idx lexbuf in + let idx, res, l = tokens syntax context loc.idx2 lexbuf in + (idx, res, (Char, loc) :: l) } | "'\\" uchar - { Error } + { let loc = mkloc idx (idx + 3) (lexeme_start lexbuf) (lexeme_end lexbuf) in + let idx, res, l = tokens syntax context loc.idx2 lexbuf in + (idx, res, (Error, loc) :: l) } + | "(*)" + { let loc = lexeme_loc idx lexbuf in + let idx, res, l = tokens syntax context loc.idx2 lexbuf in + (idx, res, (Comment (Comment_reg, true), loc) :: l) } + | "(**)" + { let loc = lexeme_loc idx lexbuf in + let idx, res, l = tokens syntax context loc.idx2 lexbuf in + (idx, res, (Comment (Comment_doc, true), loc) :: l) } | "(**" - { Doc (comment 0 lexbuf) } + { let ofs = lexeme_start lexbuf in + let idx2, terminated = comment (idx + 3) 0 lexbuf in + let loc = mkloc idx idx2 ofs (lexeme_end lexbuf) in + let idx, res, l = tokens syntax context idx2 lexbuf in + (idx, res, (Comment (Comment_doc, terminated), loc) :: l) } | "(*" - { Comment (comment 0 lexbuf) } + { let ofs = lexeme_start lexbuf in + let idx2, terminated = comment (idx + 2) 0 lexbuf in + let loc = mkloc idx idx2 ofs (lexeme_end lexbuf) in + let idx, res, l = tokens syntax context idx2 lexbuf in + (idx, res, (Comment (Comment_reg, terminated), loc) :: l) } | "" - { fallback lexbuf } + { if syntax = UTop.Normal then + symbol syntax context idx lexbuf + else + match context with + | Toplevel -> + camlp4_toplevel syntax context idx lexbuf + | Antiquot -> + camlp4_antiquot syntax context idx lexbuf } -and token_fallback = parse +and symbol syntax context idx = parse | "(" | ")" | "[" | "]" | "{" | "}" @@ -95,71 +175,114 @@ and token_fallback = parse | "," | ";" | ";;" | symbolchar+ - { Symbol } + { let loc = lexeme_loc idx lexbuf in + let tok = Symbol (lexeme lexbuf) in + let idx, res, l = tokens syntax context loc.idx2 lexbuf in + (idx, res, (tok, loc) :: l) } | uchar - { Error } - | eof - { raise End_of_file } + { let loc = mkloc idx (idx + 1) (lexeme_start lexbuf) (lexeme_end lexbuf) in + let idx, res, l = tokens syntax context loc.idx2 lexbuf in + (idx, res, (Error, loc) :: l) } -and token_fallback_camlp4 = parse +and camlp4_toplevel syntax context idx = parse | '<' (':' ident)? ('@' lident)? '<' - { Quotation (quotation lexbuf) } + { let ofs = lexeme_start lexbuf in + let idx2, items, terminated = quotation syntax 0 idx (idx + lexeme_size lexbuf) (lexeme_start lexbuf) lexbuf in + let ofs2 = lexeme_end lexbuf in + let idx3, res, l = tokens syntax context idx2 lexbuf in + (idx3, res, (Quotation (items, terminated), mkloc idx idx2 ofs ofs2) :: l) } | "" - { token_fallback lexbuf } + { symbol syntax context idx lexbuf } -and comment depth = parse +and camlp4_antiquot syntax context idx = parse + | '$' + { (idx + 1, Some (lexeme_loc idx lexbuf), []) } + | "" + { camlp4_toplevel syntax context idx lexbuf } + +and comment idx depth = parse | "(*" - { comment (depth + 1) lexbuf } + { comment (idx + 2) (depth + 1) lexbuf } | "*)" - { if depth > 0 then comment (depth - 1) lexbuf else true } + { if depth = 0 then + (idx + 2, true) + else + comment (idx + 2) (depth - 1) lexbuf } | '"' - { string lexbuf && comment depth lexbuf } + { let idx, terminated = string (idx + 1) lexbuf in + if terminated then + comment idx depth lexbuf + else + (idx, false) } | uchar - { comment depth lexbuf } + { comment (idx + 1) depth lexbuf } | eof - { false } + { (idx, false) } -and string = parse +and string idx = parse | '"' - { true } + { (idx + 1, true) } | "\\\"" - { string lexbuf } + { string (idx + 2) lexbuf } | uchar - { string lexbuf } + { string (idx + 1) lexbuf } | eof - { false } + { (idx, false) } -and quotation = parse +and quotation syntax depth idx1 idx2 ofs1 = parse + | '<' (':' ident)? ('@' lident)? '<' + { quotation syntax (depth + 1) idx1 (idx2 + lexeme_size lexbuf) ofs1 lexbuf } | ">>" - { true } + { if depth = 0 then + let loc = mkloc idx1 (idx2 + 2) ofs1 (lexeme_end lexbuf) in + (idx2 + 2, [(Quot_data, loc)], true) + else + quotation syntax (depth - 1) idx1 (idx2 + 2) ofs1 lexbuf } | '$' - { antiquotation lexbuf } + { let quot_data_loc = + if idx1 = idx2 then + None + else + Some (mkloc idx1 idx2 ofs1 (lexeme_start lexbuf)) + in + let opening_loc = lexeme_loc idx2 lexbuf in + let idx, name = quotation_name (idx2 + 1) lexbuf in + let idx, closing_loc, items = tokens syntax Antiquot idx lexbuf in + let anti = { + a_opening = opening_loc; + a_closing = closing_loc; + a_name = name; + a_contents = items; + } in + let ofs = lexeme_end lexbuf in + let loc = mkloc opening_loc.idx1 idx opening_loc.ofs2 ofs in + let idx, quot_items, terminated = quotation syntax depth idx idx ofs lexbuf in + let quot_items = (Quot_anti anti, loc) :: quot_items in + match quot_data_loc with + | Some loc -> + (idx, (Quot_data, loc) :: quot_items, terminated) + | None -> + (idx, quot_items, terminated) } | uchar - { quotation lexbuf } + { quotation syntax depth idx1 (idx2 + 1) ofs1 lexbuf } | eof - { false } + { if idx1 = idx2 then + (idx2, [], false) + else + let loc = mkloc idx1 idx2 ofs1 (lexeme_end lexbuf) in + (idx2, [(Quot_data, loc)], false) } -and antiquotation = parse - | '$' - { quotation lexbuf } - | eof - { false } +and quotation_name idx = parse + | '`'? (identchar*|['.' '!']+) ':' + { let len = lexeme_size lexbuf in + let ofs = lexeme_start lexbuf in + (idx + len, Some (mkloc idx (idx + len - 1) ofs (ofs + len - 1), + mkloc (idx + len - 1) (idx + len) (ofs + len - 1) (ofs + len))) } | "" - { ignore (token token_fallback_camlp4 lexbuf); antiquotation lexbuf } + { (idx, None) } { - let lex_string ?(camlp4=false) str = - let fallback = if camlp4 then token_fallback_camlp4 else token_fallback in - let lexbuf = Lexing.from_string str in - let rec loop idx ofs_a = - match try Some (token fallback lexbuf) with End_of_file -> None with - | Some token -> - let ofs_b = Lexing.lexeme_end lexbuf in - let src = String.sub str ofs_a (ofs_b - ofs_a) in - let idx' = idx + Zed_utf8.length src in - (token, idx, idx', src) :: loop idx' ofs_b - | None -> - [] - in - loop 0 0 + let lex_string syntax str = + let _, _, items = tokens syntax Toplevel 0 (Lexing.from_string str) in + items } diff --git a/src/lib/uTop_main.ml b/src/lib/uTop_main.ml index 8abdf4c..e65a4f0 100644 --- a/src/lib/uTop_main.ml +++ b/src/lib/uTop_main.ml @@ -122,13 +122,13 @@ class read_phrase ~term = object(self) let styled, position = super#stylise last in (* Syntax highlighting *) - let stylise start stop token_style = - for i = start to stop - 1 do + let stylise loc token_style = + for i = loc.idx1 to loc.idx2 - 1 do let ch, style = styled.(i) in styled.(i) <- (ch, LTerm_style.merge token_style style) done in - UTop_styles.stylise stylise (UTop_lexer.lex_string ~camlp4:(UTop.get_camlp4 ()) (LTerm_text.to_string styled)); + UTop_styles.stylise stylise (UTop_lexer.lex_string (UTop.get_syntax ()) (LTerm_text.to_string styled)); if not last then (* Parenthesis matching. *) @@ -151,7 +151,12 @@ class read_phrase ~term = object(self) (styled, position) method completion = - let pos, words = UTop_complete.complete (Zed_rope.to_string self#input_prev) in + let pos, words = + UTop_complete.complete + ~syntax:(UTop.get_syntax ()) + ~phrase_terminator:(UTop.get_phrase_terminator ()) + ~input:(Zed_rope.to_string self#input_prev) + in self#set_completion pos words initializer @@ -207,13 +212,13 @@ let print_out_phrase term printer pp out_phrase = Format.pp_print_flush pp (); let string = fix_string (Buffer.contents buffer) in let styled = LTerm_text.of_string string in - let stylise start stop token_style = - for i = start to stop - 1 do + let stylise loc token_style = + for i = loc.idx1 to loc.idx2 - 1 do let ch, style = styled.(i) in styled.(i) <- (ch, LTerm_style.merge token_style style) done in - UTop_styles.stylise stylise (UTop_lexer.lex_string string); + UTop_styles.stylise stylise (UTop_lexer.lex_string (UTop.get_syntax ()) string); Lwt_main.run (LTerm.fprints term styled) (* +-----------------------------------------------------------------+ @@ -579,7 +584,12 @@ module Emacs(M : sig end) = struct loop () | Some ("complete", _) -> let input = read_data () in - let start, words = UTop_complete.complete input in + let start, words = + UTop_complete.complete + ~syntax:(UTop.get_syntax ()) + ~phrase_terminator:(UTop.get_phrase_terminator ()) + ~input + in let words = List.map fst words in let prefix = LTerm_read_line.common_prefix words in let index = String.length input - start in diff --git a/src/lib/uTop_styles.ml b/src/lib/uTop_styles.ml index 6b18ee6..cb7ace4 100644 --- a/src/lib/uTop_styles.ml +++ b/src/lib/uTop_styles.ml @@ -91,209 +91,103 @@ let load () = | Unix.Unix_error (error, func, arg) -> Lwt_log.error_f "cannot load styles from %S: %s: %s" fn func (Unix.error_message error) +let rec stylise_filter_layout stylise tokens = + match tokens with + | [] -> + [] + | (Comment (Comment_reg, _), loc) :: tokens -> + stylise loc styles.style_comment; + stylise_filter_layout stylise tokens + | (Comment (Comment_doc, _), loc) :: tokens -> + stylise loc styles.style_doc; + stylise_filter_layout stylise tokens + | (Blanks, loc) :: tokens -> + stylise loc styles.style_blanks; + stylise_filter_layout stylise tokens + | x :: tokens -> + x :: stylise_filter_layout stylise tokens + +let rec stylise_rec stylise tokens = + match tokens with + | [] -> + () + | (Symbol _, loc) :: tokens -> + stylise loc styles.style_symbol; + stylise_rec stylise tokens + | (Lident id, loc) :: tokens -> + stylise loc + (if String_set.mem id !UTop.keywords then + styles.style_keyword + else + styles.style_ident); + stylise_rec stylise tokens + | (Uident id, loc) :: tokens when String_set.mem id !UTop.keywords -> + stylise loc styles.style_keyword; + stylise_rec stylise tokens + | (Uident id, loc1) :: (Symbol ".", loc2) :: tokens -> + stylise loc1 styles.style_module; + stylise loc2 styles.style_symbol; + stylise_rec stylise tokens + | (Uident id, loc) :: tokens -> + stylise loc styles.style_ident; + stylise_rec stylise tokens + | (Constant _, loc) :: tokens -> + stylise loc styles.style_constant; + stylise_rec stylise tokens + | (Char, loc) :: tokens -> + stylise loc styles.style_char; + stylise_rec stylise tokens + | (String _, loc) :: tokens -> + stylise loc styles.style_string; + stylise_rec stylise tokens + | (Quotation (items, _), _) :: tokens -> + stylise_quotation_items stylise items; + stylise_rec stylise tokens + | (Error, loc) :: tokens -> + stylise loc styles.style_error; + stylise_rec stylise tokens + | ((Comment _ | Blanks), _) :: _ -> + assert false + +and stylise_quotation_items stylise items = + match items with + | [] -> + () + | (Quot_data, loc) :: items -> + stylise loc styles.style_quotation; + stylise_quotation_items stylise items + | (Quot_anti anti, _) :: items -> + stylise anti.a_opening styles.style_symbol; + (match anti.a_name with + | None -> + () + | Some (loc1, loc2) -> + stylise loc1 styles.style_module; + stylise loc2 styles.style_symbol); + let tokens = stylise_filter_layout stylise anti.a_contents in + stylise_rec stylise tokens; + (match anti.a_closing with + | None -> + () + | Some loc -> + stylise loc styles.style_symbol); + stylise_quotation_items stylise items + let stylise stylise tokens = - let rec loop tokens = - match tokens with - | [] -> - () - | (token, start, stop, src) :: rest -> - match token with - | Symbol -> - stylise start stop styles.style_symbol; - loop rest - | Lident -> - stylise start stop - (if String_set.mem src !UTop.keywords then - styles.style_keyword - else - styles.style_ident); - loop rest - | Uident -> - if String_set.mem src !UTop.keywords then begin - stylise start stop styles.style_keyword; - loop rest - end else - loop_after_uident start stop rest - | Constant -> - stylise start stop styles.style_constant; - loop rest - | Char -> - stylise start stop styles.style_char; - loop rest - | String _ -> - stylise start stop styles.style_string; - loop rest - | Quotation _ -> - stylise start stop styles.style_quotation; - loop rest - | Comment _ -> - stylise start stop styles.style_comment; - loop rest - | Doc _ -> - stylise start stop styles.style_doc; - loop rest - | Blanks -> - stylise start stop styles.style_blanks; - loop rest - | Error -> - stylise start stop styles.style_error; - loop rest - and loop_after_uident uid_start uid_stop tokens = - match tokens with - | [] -> - () - | (token, start, stop, src) :: rest -> - match token with - | Symbol -> - if src = "." then - stylise uid_start uid_stop styles.style_module - else - stylise uid_start uid_stop styles.style_ident; - stylise start stop styles.style_symbol; - loop rest - | Lident -> - stylise uid_start uid_stop styles.style_ident; - stylise start stop - (if String_set.mem src !UTop.keywords then - styles.style_keyword - else - styles.style_ident); - loop rest - | Uident -> - stylise uid_start uid_stop styles.style_ident; - if String_set.mem src !UTop.keywords then begin - stylise start stop styles.style_keyword; - loop rest - end else - loop_after_uident start stop rest - | Constant -> - stylise uid_start uid_stop styles.style_ident; - stylise start stop styles.style_constant; - loop rest - | Char -> - stylise uid_start uid_stop styles.style_ident; - stylise start stop styles.style_char; - loop rest - | String _ -> - stylise uid_start uid_stop styles.style_ident; - stylise start stop styles.style_string; - loop rest - | Quotation _ -> - stylise uid_start uid_stop styles.style_ident; - stylise start stop styles.style_quotation; - loop rest - | Comment _ -> - stylise uid_start uid_stop styles.style_ident; - stylise start stop styles.style_comment; - loop_after_uident uid_start uid_stop rest - | Doc _ -> - stylise uid_start uid_stop styles.style_ident; - stylise start stop styles.style_doc; - loop_after_uident uid_start uid_stop rest - | Blanks -> - stylise uid_start uid_stop styles.style_ident; - stylise start stop styles.style_blanks; - loop_after_uident uid_start uid_stop rest - | Error -> - stylise uid_start uid_stop styles.style_ident; - stylise start stop styles.style_error; - loop rest - and loop_sharp tokens = - match tokens with - | [] -> - () - | (token, start, stop, src) :: rest -> - match token with - | Symbol -> - if src = "#" then begin - stylise start stop styles.style_directive; - loop_directive rest - end else begin - stylise start stop styles.style_symbol; - loop rest - end - | Lident -> - stylise start stop - (if String_set.mem src !UTop.keywords then - styles.style_keyword - else - styles.style_ident); - loop rest - | Uident -> - if String_set.mem src !UTop.keywords then begin - stylise start stop styles.style_keyword; - loop rest - end else - loop_after_uident start stop rest - | Constant -> - stylise start stop styles.style_constant; - loop rest - | Char -> - stylise start stop styles.style_char; - loop rest - | String _ -> - stylise start stop styles.style_string; - loop rest - | Quotation _ -> - stylise start stop styles.style_quotation; - loop rest - | Comment _ -> - stylise start stop styles.style_comment; - loop_sharp rest - | Doc _ -> - stylise start stop styles.style_doc; - loop_sharp rest - | Blanks -> - stylise start stop styles.style_blanks; - loop_sharp rest - | Error -> - stylise start stop styles.style_error; - loop rest - and loop_directive tokens = - match tokens with - | [] -> - () - | (token, start, stop, src) :: rest -> - match token with - | Symbol -> - stylise start stop styles.style_symbol; - loop rest - | Lident -> - stylise start stop - (if String_set.mem src !UTop.keywords then - styles.style_keyword - else - styles.style_directive); - loop rest - | Uident -> - if String_set.mem src !UTop.keywords then begin - stylise start stop styles.style_keyword; - loop rest - end else - loop_after_uident start stop rest - | Constant -> - stylise start stop styles.style_constant; - loop rest - | Char -> - stylise start stop styles.style_char; - loop rest - | String _ -> - stylise start stop styles.style_string; - loop rest - | Quotation _ -> - stylise start stop styles.style_quotation; - loop rest - | Comment _ -> - stylise start stop styles.style_comment; - loop_directive rest - | Doc _ -> - stylise start stop styles.style_doc; - loop_directive rest - | Blanks -> - stylise start stop styles.style_blanks; - loop_directive rest - | Error -> - stylise start stop styles.style_error; - loop rest - in - loop_sharp tokens + let tokens = stylise_filter_layout stylise tokens in + match tokens with + | (Symbol "#", loc) :: tokens -> begin + stylise loc styles.style_directive; + match tokens with + | ((Lident id | Uident id), loc) :: tokens -> + stylise loc + (if String_set.mem id !UTop.keywords then + styles.style_keyword + else + styles.style_directive); + stylise_rec stylise tokens + | tokens -> + stylise_rec stylise tokens + end + | tokens -> + stylise_rec stylise tokens diff --git a/src/lib/uTop_styles.mli b/src/lib/uTop_styles.mli index 1d512dc..7bc2bb8 100644 --- a/src/lib/uTop_styles.mli +++ b/src/lib/uTop_styles.mli @@ -37,6 +37,6 @@ val styles : styles val load : unit -> unit Lwt.t (** Load resources into [styles]. *) -val stylise : (int -> int -> LTerm_style.t -> unit) -> (UTop_token.t * int * int * string) list -> unit - (** [stylise apply tokens] calls [apply] on all tokens boundary with +val stylise : (UTop_token.location -> LTerm_style.t -> unit) -> (UTop_token.t * UTop_token.location) list -> unit + (** [stylise apply tokens] calls [apply] on all token locations with the associated style. *) diff --git a/src/lib/uTop_token.ml b/src/lib/uTop_token.ml index e22dcc4..b077c26 100644 --- a/src/lib/uTop_token.ml +++ b/src/lib/uTop_token.ml @@ -7,17 +7,55 @@ * This file is a part of utop. *) -(** Type of tokens. Tokens with a boolean parameter takes as argument - wheter the token is terminated or not. *) +(** Tokens. + + The type of tokens is semi-structured: parentheses construct and + quotations are nested and others tokens are flat list. *) + +(** Locations in the source string, which is encoded in UTF-8. *) +type location = { + idx1 : int; + (** Start position in unicode characters. *) + idx2 : int; + (** Stop position in unicode characters. *) + ofs1 : int; + (** Start position in bytes. *) + ofs2 : int; + (** Stop position in bytes. *) +} + type t = - | Symbol - | Lident - | Uident - | Constant + | Symbol of string + | Lident of string + | Uident of string + | Constant of string | Char | String of bool - | Quotation of bool - | Comment of bool - | Doc of bool + (** [String terminated]. *) + | Comment of comment_kind * bool + (** [Comment (kind, terminated)]. *) | Blanks | Error + | Quotation of (quotation_item * location) list * bool + (** [Quotation (items, terminated)]. *) + +and comment_kind = + | Comment_reg + (** Regular comment. *) + | Comment_doc + (** Documentation comment. *) + +and quotation_item = + | Quot_data + | Quot_anti of antiquotation + +and antiquotation = { + a_opening : location; + (** Location of the opening [$]. *) + a_closing : location option; + (** Location of the closing [$]. *) + a_name : (location * location) option; + (** Location of the name and colon if any. *) + a_contents : (t * location) list; + (** Contents of the location. *) +}