open Ppxlib let has_attr name attrs = List.find_opt (fun attr -> attr.attr_name.txt = name) attrs |> Option.is_some let decode_single_uchar_opt str = if String.is_valid_utf_8 str then let res = String.get_utf_8_uchar str 0 in if (Uchar.utf_decode_is_valid res) && (Uchar.utf_decode_length res == String.length str) then Some (Uchar.utf_decode_uchar res |> Uchar.to_int) else None else None let nfc_mapper = object inherit Ast_traverse.map as super method! expression = fun { pexp_desc; pexp_loc; pexp_loc_stack; pexp_attributes } -> if has_attr "uchar" pexp_attributes then let pexp_attributes = List.filter (fun attr -> attr.attr_name.txt <> "uchar") pexp_attributes in match pexp_desc with | Pexp_constant (Pconst_string (value, loc, _)) -> begin match decode_single_uchar_opt value with | Some cp -> let cp = Pconst_integer (Int.to_string cp, None) in let cp = { pexp_desc = Pexp_constant cp; pexp_loc; pexp_loc_stack; pexp_attributes } in [%expr Uchar.of_int [%e cp]] | None -> let err_ext = Pexp_extension begin Location.error_extensionf ~loc:loc "[@uchar] string must consist of a single valid utf-8 codepoint" end in { pexp_desc = err_ext; pexp_loc; pexp_loc_stack; pexp_attributes } end | _ -> let err_ext = Pexp_extension begin Location.error_extensionf ~loc:pexp_loc "[@uchar] attribute must be on a string constant" end in { pexp_desc = err_ext; pexp_loc; pexp_loc_stack; pexp_attributes } else super#expression { pexp_desc; pexp_loc; pexp_loc_stack; pexp_attributes } method! constant = function | Pconst_string (value, loc, delim) -> if String.is_valid_utf_8 value then Pconst_string ((Uunf_string.normalize_utf_8 `NFC value), loc, delim) else Location.raise_errorf ~loc "invalid utf-8 in string literal" | const -> super#constant const end let () = Driver.register_transformation ~impl:nfc_mapper#structure "ppx_unicode"