refactor, add bytes
This commit is contained in:
parent
2759b0a004
commit
e727a180ef
|
@ -1,61 +1,67 @@
|
||||||
open Ppxlib
|
open Ppxlib
|
||||||
|
|
||||||
let has_attr name attrs =
|
let attr_filter attr =
|
||||||
List.find_opt (fun attr -> attr.attr_name.txt = name) attrs
|
let n = attr.attr_name.txt in
|
||||||
|> Option.is_some
|
n = "uchar" || n = "unicode.uchar" || n = "bytes" || n = "unicode.bytes"
|
||||||
|
|| n = "utf8" || n = "unicode.utf8"
|
||||||
|
|
||||||
|
let attr_uchar_filter attr =
|
||||||
|
let n = attr.attr_name.txt in
|
||||||
|
n = "uchar" || n = "unicode.uchar"
|
||||||
|
|
||||||
|
let attr_bytes_filter attr =
|
||||||
|
let n = attr.attr_name.txt in
|
||||||
|
n = "bytes" || n = "unicode.bytes"
|
||||||
|
|
||||||
let decode_single_uchar_opt str =
|
let decode_single_uchar_opt str =
|
||||||
if String.is_valid_utf_8 str then
|
if String.is_valid_utf_8 str then
|
||||||
let res = String.get_utf_8_uchar str 0 in
|
let res = String.get_utf_8_uchar str 0 in
|
||||||
if (Uchar.utf_decode_is_valid res) && (Uchar.utf_decode_length res == String.length str) then
|
if (Uchar.utf_decode_is_valid res) && (Uchar.utf_decode_length res == String.length str) then
|
||||||
Some (Uchar.utf_decode_uchar res |> Uchar.to_int)
|
Some (Uchar.utf_decode_uchar res |> Uchar.to_int |> Int.to_string)
|
||||||
else
|
else
|
||||||
None
|
None
|
||||||
else
|
else
|
||||||
None
|
None
|
||||||
|
|
||||||
let nfc_mapper = object
|
let utf8_attr_mapper = object
|
||||||
inherit Ast_traverse.map as super
|
inherit Ast_traverse.map as super
|
||||||
|
|
||||||
method! expression =
|
method! expression =
|
||||||
fun { pexp_desc; pexp_loc; pexp_loc_stack; pexp_attributes } ->
|
fun { pexp_desc; pexp_loc; pexp_loc_stack; pexp_attributes } ->
|
||||||
if has_attr "uchar" pexp_attributes then
|
|
||||||
let pexp_attributes =
|
|
||||||
List.filter (fun attr -> attr.attr_name.txt <> "uchar") pexp_attributes in
|
|
||||||
match pexp_desc with
|
match pexp_desc with
|
||||||
| Pexp_constant (Pconst_string (value, loc, _)) ->
|
| Pexp_constant (Pconst_string (value, str_loc, delim)) ->
|
||||||
begin match decode_single_uchar_opt value with
|
begin
|
||||||
| Some cp ->
|
let loc = pexp_loc in
|
||||||
let cp = Pconst_integer (Int.to_string cp, None) in
|
let attrs = pexp_attributes in
|
||||||
let cp = {
|
let pexp_attributes = List.filter (fun attr -> not @@ attr_filter attr) attrs in
|
||||||
pexp_desc = Pexp_constant cp;
|
let make_expr pexp_desc =
|
||||||
pexp_loc; pexp_loc_stack; pexp_attributes
|
{ pexp_desc; pexp_loc; pexp_loc_stack; pexp_attributes } in
|
||||||
} in
|
let make_err msg =
|
||||||
[%expr Uchar.of_int [%e cp]]
|
make_expr @@ Pexp_extension (Location.error_extensionf ~loc msg) in
|
||||||
|
if List.find_opt attr_uchar_filter attrs |> Option.is_some then
|
||||||
|
match decode_single_uchar_opt value with
|
||||||
|
| Some value ->
|
||||||
|
let value = Pexp_constant (Pconst_integer (value, None)) in
|
||||||
|
let value = { pexp_desc = value; pexp_loc; pexp_loc_stack; pexp_attributes = [] } in
|
||||||
|
{ [%expr Uchar.of_int [%e value]] with pexp_attributes }
|
||||||
| None ->
|
| None ->
|
||||||
let err_ext =
|
make_err "[@uchar] must be a single utf-8 codepoint literal"
|
||||||
Pexp_extension begin Location.error_extensionf ~loc:loc
|
else if List.find_opt attr_bytes_filter attrs |> Option.is_some then
|
||||||
"[@uchar] string must consist of a single valid utf-8 codepoint" end
|
let value_expr = Pexp_constant (Pconst_string (value, str_loc, delim)) in
|
||||||
in { pexp_desc = err_ext; pexp_loc; pexp_loc_stack; pexp_attributes }
|
let value_expr =
|
||||||
end
|
{ pexp_desc = value_expr; pexp_loc; pexp_loc_stack; pexp_attributes = [] } in
|
||||||
| _ ->
|
{ [%expr Bytes.of_string [%e value_expr]] with pexp_attributes }
|
||||||
let err_ext =
|
else if String.is_valid_utf_8 value then
|
||||||
Pexp_extension begin Location.error_extensionf ~loc:pexp_loc
|
let value = Uunf_string.normalize_utf_8 `NFC value in
|
||||||
"[@uchar] attribute must be on a string constant" end
|
make_expr @@ Pexp_constant (Pconst_string (value, str_loc, delim))
|
||||||
in { pexp_desc = err_ext; pexp_loc; pexp_loc_stack; pexp_attributes }
|
|
||||||
else
|
else
|
||||||
super#expression { pexp_desc; pexp_loc; pexp_loc_stack; pexp_attributes }
|
make_err "string literal is not valid utf-8"
|
||||||
|
end
|
||||||
|
| _ -> super#expression { pexp_desc; pexp_loc; pexp_loc_stack; pexp_attributes }
|
||||||
|
end
|
||||||
|
|
||||||
method! constant = function
|
|
||||||
| Pconst_string (value, loc, delim) ->
|
|
||||||
if String.is_valid_utf_8 value then
|
|
||||||
Pconst_string ((Uunf_string.normalize_utf_8 `NFC value), loc, delim)
|
|
||||||
else
|
|
||||||
Location.raise_errorf ~loc "invalid utf-8 in string literal"
|
|
||||||
| const -> super#constant const
|
|
||||||
end
|
|
||||||
|
|
||||||
let () =
|
let () =
|
||||||
Driver.register_transformation
|
Driver.register_transformation
|
||||||
~impl:nfc_mapper#structure
|
~impl:utf8_attr_mapper#structure
|
||||||
"ppx_unicode"
|
"unicode"
|
||||||
|
|
|
@ -1,3 +1,7 @@
|
||||||
let () =
|
let () =
|
||||||
let a = "å"[@uchar] in
|
let a = "å"[@uchar] in
|
||||||
ignore a
|
let b = Buffer.create 16 in
|
||||||
|
Buffer.add_utf_8_uchar b a;
|
||||||
|
Buffer.add_utf_8_uchar b a;
|
||||||
|
let str = Buffer.to_bytes b |> String.of_bytes in
|
||||||
|
Printf.printf "meow meow: %s\n%!" str
|
||||||
|
|
Loading…
Reference in New Issue