ppx_unicode/ppx/ppx_unicode.ml

62 lines
2.2 KiB
OCaml
Raw Normal View History

2024-04-25 04:14:52 +00:00
open Ppxlib
let has_attr name attrs =
List.find_opt (fun attr -> attr.attr_name.txt = name) attrs
|> Option.is_some
let decode_single_uchar_opt str =
if String.is_valid_utf_8 str then
let res = String.get_utf_8_uchar str 0 in
if (Uchar.utf_decode_is_valid res) && (Uchar.utf_decode_length res == String.length str) then
Some (Uchar.utf_decode_uchar res |> Uchar.to_int)
else
None
else
None
let nfc_mapper = object
inherit Ast_traverse.map as super
method! expression =
fun { pexp_desc; pexp_loc; pexp_loc_stack; pexp_attributes } ->
if has_attr "uchar" pexp_attributes then
let pexp_attributes =
List.filter (fun attr -> attr.attr_name.txt <> "uchar") pexp_attributes in
match pexp_desc with
| Pexp_constant (Pconst_string (value, loc, _)) ->
begin match decode_single_uchar_opt value with
| Some cp ->
let cp = Pconst_integer (Int.to_string cp, None) in
let cp = {
pexp_desc = Pexp_constant cp;
pexp_loc; pexp_loc_stack; pexp_attributes
} in
[%expr Uchar.of_int [%e cp]]
| None ->
let err_ext =
Pexp_extension begin Location.error_extensionf ~loc:loc
"[@uchar] string must consist of a single valid utf-8 codepoint" end
in { pexp_desc = err_ext; pexp_loc; pexp_loc_stack; pexp_attributes }
end
| _ ->
let err_ext =
Pexp_extension begin Location.error_extensionf ~loc:pexp_loc
"[@uchar] attribute must be on a string constant" end
in { pexp_desc = err_ext; pexp_loc; pexp_loc_stack; pexp_attributes }
else
super#expression { pexp_desc; pexp_loc; pexp_loc_stack; pexp_attributes }
method! constant = function
| Pconst_string (value, loc, delim) ->
if String.is_valid_utf_8 value then
Pconst_string ((Uunf_string.normalize_utf_8 `NFC value), loc, delim)
else
Location.raise_errorf ~loc "invalid utf-8 in string literal"
| const -> super#constant const
end
let () =
Driver.register_transformation
~impl:nfc_mapper#structure
"ppx_unicode"