refactor, add bytes

This commit is contained in:
xenia 2024-04-25 02:07:52 -04:00
parent 2759b0a004
commit e727a180ef
2 changed files with 51 additions and 41 deletions

View File

@ -1,61 +1,67 @@
open Ppxlib open Ppxlib
let has_attr name attrs = let attr_filter attr =
List.find_opt (fun attr -> attr.attr_name.txt = name) attrs let n = attr.attr_name.txt in
|> Option.is_some n = "uchar" || n = "unicode.uchar" || n = "bytes" || n = "unicode.bytes"
|| n = "utf8" || n = "unicode.utf8"
let attr_uchar_filter attr =
let n = attr.attr_name.txt in
n = "uchar" || n = "unicode.uchar"
let attr_bytes_filter attr =
let n = attr.attr_name.txt in
n = "bytes" || n = "unicode.bytes"
let decode_single_uchar_opt str = let decode_single_uchar_opt str =
if String.is_valid_utf_8 str then if String.is_valid_utf_8 str then
let res = String.get_utf_8_uchar str 0 in let res = String.get_utf_8_uchar str 0 in
if (Uchar.utf_decode_is_valid res) && (Uchar.utf_decode_length res == String.length str) then if (Uchar.utf_decode_is_valid res) && (Uchar.utf_decode_length res == String.length str) then
Some (Uchar.utf_decode_uchar res |> Uchar.to_int) Some (Uchar.utf_decode_uchar res |> Uchar.to_int |> Int.to_string)
else else
None None
else else
None None
let nfc_mapper = object let utf8_attr_mapper = object
inherit Ast_traverse.map as super inherit Ast_traverse.map as super
method! expression = method! expression =
fun { pexp_desc; pexp_loc; pexp_loc_stack; pexp_attributes } -> fun { pexp_desc; pexp_loc; pexp_loc_stack; pexp_attributes } ->
if has_attr "uchar" pexp_attributes then
let pexp_attributes =
List.filter (fun attr -> attr.attr_name.txt <> "uchar") pexp_attributes in
match pexp_desc with match pexp_desc with
| Pexp_constant (Pconst_string (value, loc, _)) -> | Pexp_constant (Pconst_string (value, str_loc, delim)) ->
begin match decode_single_uchar_opt value with begin
| Some cp -> let loc = pexp_loc in
let cp = Pconst_integer (Int.to_string cp, None) in let attrs = pexp_attributes in
let cp = { let pexp_attributes = List.filter (fun attr -> not @@ attr_filter attr) attrs in
pexp_desc = Pexp_constant cp; let make_expr pexp_desc =
pexp_loc; pexp_loc_stack; pexp_attributes { pexp_desc; pexp_loc; pexp_loc_stack; pexp_attributes } in
} in let make_err msg =
[%expr Uchar.of_int [%e cp]] make_expr @@ Pexp_extension (Location.error_extensionf ~loc msg) in
if List.find_opt attr_uchar_filter attrs |> Option.is_some then
match decode_single_uchar_opt value with
| Some value ->
let value = Pexp_constant (Pconst_integer (value, None)) in
let value = { pexp_desc = value; pexp_loc; pexp_loc_stack; pexp_attributes = [] } in
{ [%expr Uchar.of_int [%e value]] with pexp_attributes }
| None -> | None ->
let err_ext = make_err "[@uchar] must be a single utf-8 codepoint literal"
Pexp_extension begin Location.error_extensionf ~loc:loc else if List.find_opt attr_bytes_filter attrs |> Option.is_some then
"[@uchar] string must consist of a single valid utf-8 codepoint" end let value_expr = Pexp_constant (Pconst_string (value, str_loc, delim)) in
in { pexp_desc = err_ext; pexp_loc; pexp_loc_stack; pexp_attributes } let value_expr =
end { pexp_desc = value_expr; pexp_loc; pexp_loc_stack; pexp_attributes = [] } in
| _ -> { [%expr Bytes.of_string [%e value_expr]] with pexp_attributes }
let err_ext = else if String.is_valid_utf_8 value then
Pexp_extension begin Location.error_extensionf ~loc:pexp_loc let value = Uunf_string.normalize_utf_8 `NFC value in
"[@uchar] attribute must be on a string constant" end make_expr @@ Pexp_constant (Pconst_string (value, str_loc, delim))
in { pexp_desc = err_ext; pexp_loc; pexp_loc_stack; pexp_attributes }
else else
super#expression { pexp_desc; pexp_loc; pexp_loc_stack; pexp_attributes } make_err "string literal is not valid utf-8"
end
| _ -> super#expression { pexp_desc; pexp_loc; pexp_loc_stack; pexp_attributes }
end
method! constant = function
| Pconst_string (value, loc, delim) ->
if String.is_valid_utf_8 value then
Pconst_string ((Uunf_string.normalize_utf_8 `NFC value), loc, delim)
else
Location.raise_errorf ~loc "invalid utf-8 in string literal"
| const -> super#constant const
end
let () = let () =
Driver.register_transformation Driver.register_transformation
~impl:nfc_mapper#structure ~impl:utf8_attr_mapper#structure
"ppx_unicode" "unicode"

View File

@ -1,3 +1,7 @@
let () = let () =
let a = "å"[@uchar] in let a = "å"[@uchar] in
ignore a let b = Buffer.create 16 in
Buffer.add_utf_8_uchar b a;
Buffer.add_utf_8_uchar b a;
let str = Buffer.to_bytes b |> String.of_bytes in
Printf.printf "meow meow: %s\n%!" str