open Ppxlib let attr_filter attr = let n = attr.attr_name.txt in n = "uchar" || n = "unicode.uchar" || n = "bytes" || n = "unicode.bytes" || n = "utf8" || n = "unicode.utf8" let attr_uchar_filter attr = let n = attr.attr_name.txt in n = "uchar" || n = "unicode.uchar" let attr_bytes_filter attr = let n = attr.attr_name.txt in n = "bytes" || n = "unicode.bytes" let decode_single_uchar_opt str = if String.is_valid_utf_8 str then let res = String.get_utf_8_uchar str 0 in if (Uchar.utf_decode_is_valid res) && (Uchar.utf_decode_length res == String.length str) then Some (Uchar.utf_decode_uchar res |> Uchar.to_int |> Int.to_string) else None else None let utf8_attr_mapper = object inherit Ast_traverse.map as super method! expression = fun { pexp_desc; pexp_loc; pexp_loc_stack; pexp_attributes } -> match pexp_desc with | Pexp_constant (Pconst_string (value, str_loc, delim)) -> begin let loc = pexp_loc in let attrs = pexp_attributes in let pexp_attributes = List.filter (fun attr -> not @@ attr_filter attr) attrs in let make_expr pexp_desc = { pexp_desc; pexp_loc; pexp_loc_stack; pexp_attributes } in let make_err msg = make_expr @@ Pexp_extension (Location.error_extensionf ~loc msg) in if List.find_opt attr_uchar_filter attrs |> Option.is_some then match decode_single_uchar_opt value with | Some value -> let value = Pexp_constant (Pconst_integer (value, None)) in let value = { pexp_desc = value; pexp_loc; pexp_loc_stack; pexp_attributes = [] } in { [%expr Uchar.of_int [%e value]] with pexp_attributes } | None -> make_err "[@uchar] must be a single utf-8 codepoint literal" else if List.find_opt attr_bytes_filter attrs |> Option.is_some then let value_expr = Pexp_constant (Pconst_string (value, str_loc, delim)) in let value_expr = { pexp_desc = value_expr; pexp_loc; pexp_loc_stack; pexp_attributes = [] } in { [%expr Bytes.of_string [%e value_expr]] with pexp_attributes } else if String.is_valid_utf_8 value then let value = Uunf_string.normalize_utf_8 `NFC value in make_expr @@ Pexp_constant (Pconst_string (value, str_loc, delim)) else make_err "string literal is not valid utf-8" end | _ -> super#expression { pexp_desc; pexp_loc; pexp_loc_stack; pexp_attributes } end let () = Driver.register_transformation ~impl:utf8_attr_mapper#structure "unicode"