Module Uutf.String
Fold over the characters of UTF encoded OCaml string
values.
Encoding guess
val encoding_guess : string -> [ `UTF_8 | `UTF_16BE | `UTF_16LE ] * bool
encoding_guess s
is the encoding guessed fors
coupled withtrue
iff there's an initial BOM.
String folders
Note. Initial BOMs are also folded over.
type 'a folder
= 'a -> int -> [ `Uchar of Stdlib.Uchar.t | `Malformed of string ] -> 'a
The type for character folders. The integer is the index in the string where the
`Uchar
or`Malformed
starts.
val fold_utf_8 : ?pos:int -> ?len:int -> 'a folder -> 'a -> string -> 'a
fold_utf_8 f a s ?pos ?len ()
isf (
...(f (f a pos u
0) j
1u
1)
...)
...) j
nu
n whereu
i,j
i are characters and their start position in the UTF-8 encoded substrings
starting atpos
andlen
long. The default value forpos
is0
andlen
isString.length s - pos
.
val fold_utf_16be : ?pos:int -> ?len:int -> 'a folder -> 'a -> string -> 'a
fold_utf_16be f a s ?pos ?len ()
isf (
...(f (f a pos u
0) j
1u
1)
...)
...) j
nu
n whereu
i,j
i are characters and their start position in the UTF-8 encoded substrings
starting atpos
andlen
long. The default value forpos
is0
andlen
isString.length s - pos
.
val fold_utf_16le : ?pos:int -> ?len:int -> 'a folder -> 'a -> string -> 'a
fold_utf_16le f a s ?pos ?len ()
isf (
...(f (f a pos u
0) j
1u
1)
...)
...) j
nu
n whereu
i,j
i are characters and their start position in the UTF-8 encoded substrings
starting atpos
andlen
long. The default value forpos
is0
andlen
isString.length s - pos
.