commit 2759b0a00499477f1f122712e4e6d332d370e3b3 Author: xenia Date: Thu Apr 25 00:14:52 2024 -0400 initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e867f84 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/_build +/result diff --git a/dune-project b/dune-project new file mode 100644 index 0000000..d607736 --- /dev/null +++ b/dune-project @@ -0,0 +1,26 @@ +(lang dune 3.15) + +(name ppx_unicode) + +(generate_opam_files true) + +(source + (github username/reponame)) + +(authors "Author Name") + +(maintainers "Maintainer Name") + +(license LICENSE) + +(documentation https://url/to/documentation) + +(package + (name ppx_unicode) + (synopsis "A short synopsis") + (description "A longer description") + (depends ocaml dune) + (tags + (topics "to describe" your project))) + +; See the complete stanza docs at https://dune.readthedocs.io/en/stable/dune-files.html#dune-project diff --git a/ppx/dune b/ppx/dune new file mode 100644 index 0000000..fe17f3f --- /dev/null +++ b/ppx/dune @@ -0,0 +1,8 @@ +(library + (name ppx_unicode) + (public_name ppx_unicode) + (synopsis "ppx rewriters for unicode normalization") + (preprocess + (pps ppxlib.metaquot)) + (libraries ppxlib uunf) + (kind ppx_deriver)) diff --git a/ppx/ppx_unicode.ml b/ppx/ppx_unicode.ml new file mode 100644 index 0000000..8fb0c0d --- /dev/null +++ b/ppx/ppx_unicode.ml @@ -0,0 +1,61 @@ +open Ppxlib + +let has_attr name attrs = + List.find_opt (fun attr -> attr.attr_name.txt = name) attrs + |> Option.is_some + +let decode_single_uchar_opt str = + if String.is_valid_utf_8 str then + let res = String.get_utf_8_uchar str 0 in + if (Uchar.utf_decode_is_valid res) && (Uchar.utf_decode_length res == String.length str) then + Some (Uchar.utf_decode_uchar res |> Uchar.to_int) + else + None + else + None + +let nfc_mapper = object + inherit Ast_traverse.map as super + + method! expression = + fun { pexp_desc; pexp_loc; pexp_loc_stack; pexp_attributes } -> + if has_attr "uchar" pexp_attributes then + let pexp_attributes = + List.filter (fun attr -> attr.attr_name.txt <> "uchar") pexp_attributes in + match pexp_desc with + | Pexp_constant (Pconst_string (value, loc, _)) -> + begin match decode_single_uchar_opt value with + | Some cp -> + let cp = Pconst_integer (Int.to_string cp, None) in + let cp = { + pexp_desc = Pexp_constant cp; + pexp_loc; pexp_loc_stack; pexp_attributes + } in + [%expr Uchar.of_int [%e cp]] + | None -> + let err_ext = + Pexp_extension begin Location.error_extensionf ~loc:loc + "[@uchar] string must consist of a single valid utf-8 codepoint" end + in { pexp_desc = err_ext; pexp_loc; pexp_loc_stack; pexp_attributes } + end + | _ -> + let err_ext = + Pexp_extension begin Location.error_extensionf ~loc:pexp_loc + "[@uchar] attribute must be on a string constant" end + in { pexp_desc = err_ext; pexp_loc; pexp_loc_stack; pexp_attributes } + else + super#expression { pexp_desc; pexp_loc; pexp_loc_stack; pexp_attributes } + + method! constant = function + | Pconst_string (value, loc, delim) -> + if String.is_valid_utf_8 value then + Pconst_string ((Uunf_string.normalize_utf_8 `NFC value), loc, delim) + else + Location.raise_errorf ~loc "invalid utf-8 in string literal" + | const -> super#constant const +end + +let () = + Driver.register_transformation + ~impl:nfc_mapper#structure + "ppx_unicode" diff --git a/ppx_unicode.opam b/ppx_unicode.opam new file mode 100644 index 0000000..d500377 --- /dev/null +++ b/ppx_unicode.opam @@ -0,0 +1,31 @@ +# This file is generated by dune, edit dune-project instead +opam-version: "2.0" +synopsis: "A short synopsis" +description: "A longer description" +maintainer: ["Maintainer Name"] +authors: ["Author Name"] +license: "LICENSE" +tags: ["topics" "to describe" "your" "project"] +homepage: "https://github.com/username/reponame" +doc: "https://url/to/documentation" +bug-reports: "https://github.com/username/reponame/issues" +depends: [ + "ocaml" + "dune" {>= "3.15"} + "odoc" {with-doc} +] +build: [ + ["dune" "subst"] {dev} + [ + "dune" + "build" + "-p" + name + "-j" + jobs + "@install" + "@runtest" {with-test} + "@doc" {with-doc} + ] +] +dev-repo: "git+https://github.com/username/reponame.git" diff --git a/test/dune b/test/dune new file mode 100644 index 0000000..6534eca --- /dev/null +++ b/test/dune @@ -0,0 +1,3 @@ +(test + (name test_ppx_unicode) + (preprocess (pps ppx_unicode))) diff --git a/test/test_ppx_unicode.ml b/test/test_ppx_unicode.ml new file mode 100644 index 0000000..923edc6 --- /dev/null +++ b/test/test_ppx_unicode.ml @@ -0,0 +1,3 @@ +let () = + let a = "å"[@uchar] in + ignore a