Uucp.CaseCase properties, mappings and foldings.
These properties can implement Unicode's default case detection, case conversion and caseless equality over Unicode text, see the examples.
val is_case_ignorable : Uchar.t -> boolis_case_ignorable is true iff u has the Case_Ignorable derived property.
These character mapping functions return `Self whenever a character maps to itself.
module Map : sig ... endCase mappings.
module Fold : sig ... endCase folding.
module Nfkc_fold : sig ... endNFKC case folding.
These examples use Uutf to fold over the characters of UTF-8 encoded OCaml strings and to UTF-8 encode mapped characters in an OCaml Buffer.t value.
The value casemap_utf_8 cmap s is the UTF-8 encoded string resulting from applying the character map cmap to every character of the UTF-8 encoded string s.
let cmap_utf_8 cmap s =
  let b = Buffer.create (String.length s * 2) in
  let rec add_map _ _ u =
    let u = match u with `Malformed _ -> Uutf.u_rep | `Uchar u -> u in
    match cmap u with
    | `Self -> Uutf.Buffer.add_utf_8 b u
    | `Uchars us -> List.iter (Uutf.Buffer.add_utf_8 b) us
  in
  Uutf.String.fold_utf_8 add_map () s; Buffer.contents bUsing the function cmap_utf_8, Unicode's default case conversions can be implemented with:
let lowercase_utf_8 s = cmap_utf_8 Uucp.Case.Map.to_lower s
let uppercase_utf_8 s = cmap_utf_8 Uucp.Case.Map.to_upper sHowever strictly speaking lowercase_utf_8 is not conformant as it doesn't handle the context sensitive mapping of capital sigma U+03A3 to final sigma U+03C2.
Note that applying Unicode's default case algorithms to a normalized string does not preserve its normalization form.
These examples use Uunf to normalize character sequences
Unicode canonical caseless matching (D145) is defined by normalizing to NFD, applying the Case_Folding mapping, normalizing again to NFD and test the result for binary equality:
let canonical_caseless_key s =
  let b = Buffer.create (String.length s * 2) in
  let to_nfd_and_utf_8 =
    let n = Uunf.create `NFD in
    let rec add v = match Uunf.add n v with
    | `Await | `End -> ()
    | `Uchar u -> Uutf.Buffer.add_utf_8 b u; add `Await
    in
    add
  in
  let add =
    let n = Uunf.create `NFD in
    let rec add v = match Uunf.add n v with
    | `Await | `End -> ()
    | `Uchar u ->
        begin match Uucp.Case.Fold.fold u with
        | `Self -> to_nfd_and_utf_8 (`Uchar u)
        | `Uchars us -> List.iter (fun u -> to_nfd_and_utf_8 (`Uchar u)) us
        end;
        add `Await
    in
    add
  in
  let add_uchar _ _ = function
  | `Malformed  _ -> add (`Uchar Uutf.u_rep)
  | `Uchar _ as u -> add u
  in
  Uutf.String.fold_utf_8 add_uchar () s;
  add `End;
  to_nfd_and_utf_8 `End;
  Buffer.contents b
let canonical_caseless_eq s0 s1 =
  canonical_caseless_key s0 = canonical_caseless_key s1Unicode's caseless matching for identifiers (D147, see also UAX 31) is defined by normalizing to NFD, applying the NFKC_Casefold mapping and test the result for binary equality:
let id_caseless_key s =
  let b = Buffer.create (String.length s * 3) in
  let n = Uunf.create `NFD in
  let rec add v = match Uunf.add n v with
  | `Await | `End -> ()
  | `Uchar u ->
      begin match Uucp.Case.Nfkc_fold.fold u with
      | `Self -> Uutf.Buffer.add_utf_8 b u; add `Await
      | `Uchars us -> List.iter (Uutf.Buffer.add_utf_8 b) us; add `Await
      end
  in
  let add_uchar _ _ = function
  | `Malformed  _ -> add (`Uchar Uutf.u_rep)
  | `Uchar _ as u -> add u
  in
  Uutf.String.fold_utf_8 add_uchar () s;
  add `End;
  Buffer.contents b
let id_caseless_eq s0 s1 = id_caseless_key s0 = id_caseless_key s1