Go to the first, previous, next, last section, table of contents.


The Pcre module

Exceptions

Gets raised when the regular expression is malformed

exception BadPattern of string * int

Gets raised when the C-library exhibits undefined behaviour

exception InternalError of string

Compilation and runtime flags and their conversion functions

type icflag (* Internal representation of compilation flags *)
and  irflag (* Internal representation of runtime flags *)

Compilation flags

and cflag =
  [ `CASELESS       (* Case insensitive matching *)
  | `MULTILINE      (* '^'
 and '$' match before/after newlines,
                       not just at the beginning/end of a string *)
  | `DOTALL         (* '.' matches all characters (newlines, too) *)
  | `EXTENDED       (* Ignores whitespace
 and PERL-comments. Behaves
                       like the '/x'-option in PERL *)
  | `ANCHORED       (* Pattern matches only at start of string *)
  | `DOLLAR_ENDONLY (* '$' in pattern matches only at end of string *)
  | `EXTRA          (* Reserved for future extensions of PCRE *)
  | `UNGREEDY ]     (* Quantifiers not greedy anymore, only
                       if followed by '?' *)

val cflags : cflag list -> icflag

cflags cflag_list converts a list of compilation flags to their internal representation

val cflag_list : icflag -> cflag list

cflag_list cflags converts internal representation of compilation flags to a list

Runtime flags

type rflag =
  [ `ANCHORED   (* Treats pattern as if it were anchored *)
  | `NOTBOL     (* Beginning of string is not treated as beginning of line *)
  | `NOTEOL     (* End of string is not treated as end of line *)
  | `NOTEMPTY ] (* Empty strings are not considered to be a valid match *)

val rflags : rflag list -> irflag

rflags rflag_list converts a list of runtime flags to their internal representation

val rflag_list : irflag -> rflag list

rflag_list rflags converts internal representation of runtime flags to a list

Information on patterns

Information on matching of "first chars" in patterns

type firstchar_info =
  [ `Char of char  (* Fixed first character *)
  | `Start_only    (* Pattern matches at beginning
 and end of newlines *)
  | `ANCHORED ]    (* Pattern is anchored *)

Information on the study status of patterns

type study_stat =
  [ `Not_studied (* Pattern has not yet been studied *)
  | `Studied     (* Pattern has been studied successfully *)
  | `Optimal ]   (* Pattern could not be improved by studying *)
type regexp (* Compiled regular expressions *)

options regexp returns compilation flags of regexp.

external options : regexp -> icflag = "pcre_options_wrapper"

size regexp returns memory size of regexp

external size : regexp -> int = "pcre_size_wrapper"

capturecount regexp returns number of capturing subpatterns in regexp.

external capturecount : regexp -> int = "pcre_capturecount_wrapper"

backrefmax regexp returns number of highest backreference in regexp.

external backrefmax : regexp -> int = "pcre_backrefmax_wrapper"

firstchar regexp returns firstchar info on regexp.

external firstchar : regexp -> firstchar_info = "pcre_firstchar_wrapper"

firsttable regexp returns some 256-bit (32-byte) fixed set table in form of a string for regexp if available, None otherwise.

external firsttable : regexp -> string option = "pcre_firsttable_wrapper"

lastliteral regexp returns some last matching character of regexp if available, None otherwise.

external lastliteral : regexp -> char option = "pcre_lastliteral_wrapper"

study_stat regexp returns study status of regexp.

external study_stat :
  regexp -> study_stat = "pcre_study_stat_wrapper" "noalloc"

Compilation of patterns

For detailed documentation on how you can specify PERL-style regular expressions (=patterns), please consult PERL-manuals or the man-page of PCRE!

type chtables (* Alternative set of char tables for pattern matching *)

external maketables : unit -> chtables = "pcre_maketables_wrapper"

Generates new set of char tables for the current locale

val regexp :
  ?study: bool ->        (* Default: true *)
  ?iflags: icflag ->     (* Default: no extra flags *)
  ?flags: cflag list ->  (* Default: not considered *)
  ?chtables: chtables -> (* Default: builtin char tables *)
  string -> regexp

regexp ?study ?iflags ?flags ?chtables pattern compiles pattern with flags when given, with iflags otherwise, and with char tables chtables. If study is true, then the resulting regular expression will be studied.

val quote : string -> string

quote str returns the quoted string of str

Matching of patterns and subpattern extraction

type substrings (* Information on substrings after pattern matching *)

val num_of_subs : substrings -> int

num_of_subs substrings returns number of strings in substrings (whole match inclusive).

val get_substring : substrings -> int -> string

get_substring substrings n returns the nth substring (0 is whole match) of substrings or the empty string if the corresponding subpattern did not capture a substring. Raises Invalid_argument if n is not in the range of the number of substrings.

val get_substring_ofs : substrings -> int -> int * int

get_substring_ofs substrings n returns the offset tuple of the nth substring of substrings (0 is whole match). Raises Invalid_argument if n is not in the range of the number of substrings and Not_found if the corresponding subpattern did not capture a substring.

val get_substrings : substrings -> string array

get_substrings substrings returns the array of substrings in substrings (whole match on index 0). If a subpattern did not capture a substring, the empty string is returned in the corresponding position instead.

val pcre_exec :
  ?iflags: irflag ->    (* Default: no extra flags *)
  ?flags: rflag list -> (* Default: not considered *)
  ?rex: regexp ->       (* Default: matches whitespace "\s+" *)
  ?pat: string ->       (* Default: not considered *)
  ?pos: int ->          (* Default: 0 *)
  string -> int array

pcre_exec ?iflags ?flags ?rex ?pat ?pos subj returns an array of offsets that describe the position of matched subpatterns in the string subj starting at position pos with pattern pat when given, regular expression rex otherwise. Uses flags when given, the precompiled iflags otherwise. Raises Not_found if pattern does not match.

val exec :
  ?iflags: irflag ->    (* Default: no extra flags *)
  ?flags: rflag list -> (* Default: not considered *)
  ?rex: regexp ->       (* Default: matches whitespace "\s+" *)
  ?pat: string ->       (* Default: not considered *)
  ?pos: int ->          (* Default: 0 *)
  string -> substrings

exec ?iflags ?flags ?rex ?pat ?pos subj returns substring information on string subj starting at position pos with pattern pat when given, regular expression rex otherwise. Uses flags when given, the precompiled iflags otherwise. Raises Not_found if pattern does not match.

val exec_all :
  ?iflags: irflag ->    (* Default: no extra flags *)
  ?flags: rflag list -> (* Default: not considered *)
  ?rex: regexp ->       (* Default: matches whitespace "\s+" *)
  ?pat: string ->       (* Default: not considered *)
  ?pos: int ->          (* Default: 0 *)
  string -> substrings array

exec_all ?iflags ?flags ?rex ?pat ?pos subj returns an array of substring information of all matching substrings in string subj starting at position pos with pattern pat when given, regular expression rex otherwise. Uses flags when given, the precompiled iflags otherwise. Raises Not_found if pattern does not match.

val next_match :
  ?iflags: irflag ->    (* Default: no extra flags *)
  ?flags: rflag list -> (* Default: not considered *)
  ?rex: regexp ->       (* Default: matches whitespace "\s+" *)
  ?pat: string ->       (* Default: not considered *)
  ?pos: int ->          (* Default: 0 *)
  substrings -> substrings

next_match ?iflags ?flags ?rex ?pat ?pos substrs returns substring information on the match that follows on the last match denoted by substrs, jumping over pos characters (also backwards!), using pattern pat when given, regular expression rex otherwise. Uses flags when given, the precompiled iflags otherwise. Raises Not_found if pattern does not match and Invalid_arg if pos would let matching start outside of the subject string.

val extract :
  ?iflags: irflag ->    (* Default: no extra flags *)
  ?flags: rflag list -> (* Default: not considered *)
  ?rex: regexp ->       (* Default: matches whitespace "\s+" *)
  ?pat: string ->       (* Default: not considered *)
  ?pos: int ->          (* Default: 0 *)
  string -> string array

extract ?iflags ?flags ?rex ?pat ?pos subj returns the array of substrings that match subj starting at position pos, using pattern pat when given, regular expression rex otherwise. Uses flags when given, the precompiled iflags otherwise. Raises Not_found if pattern does not match.

val pmatch :
  ?iflags: irflag ->    (* Default: no extra flags *)
  ?flags: rflag list -> (* Default: not considered *)
  ?rex: regexp ->       (* Default: matches whitespace "\s+" *)
  ?pat: string ->       (* Default: not considered *)
  ?pos: int ->          (* Default: 0 *)
  string -> bool

pmatch ?iflags ?flags ?rex ?pat ?pos subj returns true if subj is matched by pattern pat when given, regular expression rex otherwise, starting at position pos. Uses flags when given, the precompiled iflags otherwise.

String substition

Information on substitution patterns

type substitution

val subst : string -> substitution

subst str converts the string str representing a substitution pattern to the internal representation

The contents of the substitution string str can be normal text mixed with any of the following (mostly as in PERL):

$0-9+ - a "$" immediately followed by an arbitrary number. "$0" stands for the name of the executable, any other number for the n-th backreference. $& - the whole matched pattern $` - the text before the match $' - the text after the match $+ - the last group that matched $$ - a single "$" $! - Delimiter which does not appear in the substitution. Can be used to part "$0-9+" from an immediately following other number.

val replace :
  ?iflags: irflag ->       (* Default: no extra flags *)
  ?flags: rflag list ->    (* Default: not considered *)
  ?rex: regexp ->          (* Default: matches whitespace "\s+" *)
  ?pat: string ->          (* Default: not considered *)
  ?pos: int ->             (* Default: 0 *)
  ?itempl: substitution -> (* Default: empty string *)
  ?templ: string ->        (* Default: not considered *)
  string -> string

replace ?iflags ?flags ?rex ?pat ?pos ?itempl ?templ subj replaces all substrings of subj matching pattern pat when given, regular expression rex otherwise, starting at position pos with the substitution string templ when given, itempl otherwise. Uses flags when given, the precompiled iflags otherwise. Raises Failure if there are backreferences to nonexistent subpatterns.

val qreplace :
  ?iflags: irflag ->    (* Default: no extra flags *)
  ?flags: rflag list -> (* Default: not considered *)
  ?rex: regexp ->       (* Default: matches whitespace "\s+" *)
  ?pat: string ->       (* Default: not considered *)
  ?pos: int ->          (* Default: 0 *)
  ?templ: string ->     (* Default: empty string *)
  string -> string

qreplace ?iflags ?flags ?rex ?pat ?pos ?templ subj replaces all substrings of subj matching pattern pat when given, regular expression rex otherwise, starting at position pos with the string templ. Uses flags when given, the precompiled iflags otherwise.

val substitute :
  ?iflags: irflag ->    (* Default: no extra flags *)
  ?flags: rflag list -> (* Default: not considered *)
  ?rex: regexp ->       (* Default: matches whitespace "\s+" *)
  ?pat: string ->       (* Default: not considered *)
  ?pos: int ->          (* Default: 0 *)
  subst: (string -> string) -> string -> string

substitute ?iflags ?flags ?rex ?pat ?pos ~subst subj replaces all substrings of subj matching pattern pat when given, regular expression rex otherwise, starting at position pos with the result of function subst applied to the match. Uses flags when given, the precompiled iflags otherwise.

val replace_first :
  ?iflags: irflag ->       (* Default: no extra flags *)
  ?flags: rflag list ->    (* Default: not considered *)
  ?rex: regexp ->          (* Default: matches whitespace "\s+" *)
  ?pat: string ->          (* Default: not considered *)
  ?pos: int ->             (* Default: 0 *)
  ?itempl: substitution -> (* Default: empty string *)
  ?templ: string ->        (* Default: not considered *)
  string -> string

replace_first ?iflags ?flags ?rex ?pat ?pos ?itempl ?templ subj replaces the first substring of subj matching pattern pat when given, regular expression rex otherwise, starting at position pos with the substitution string templ when given, itempl otherwise. Uses flags when given, the precompiled iflags otherwise. Raises Failure if there are backreferences to nonexistent subpatterns.

val qreplace_first :
  ?iflags: irflag ->    (* Default: no extra flags *)
  ?flags: rflag list -> (* Default: not considered *)
  ?rex: regexp ->       (* Default: matches whitespace "\s+" *)
  ?pat: string ->       (* Default: not considered *)
  ?pos: int ->          (* Default: 0 *)
  ?templ: string ->     (* Default: empty string *)
  string -> string

qreplace_first ?iflags ?flags ?rex ?pat ?pos ?templ subj replaces the first substring of subj matching pattern pat when given, regular expression rex otherwise, starting at position pos with the string templ. Uses flags when given, the precompiled iflags otherwise.

val substitute_first :
  ?iflags: irflag ->    (* Default: no extra flags *)
  ?flags: rflag list -> (* Default: not considered *)
  ?rex: regexp ->       (* Default: matches whitespace "\s+" *)
  ?pat: string ->       (* Default: not considered *)
  ?pos: int ->          (* Default: 0 *)
  subst: (string -> string) -> string -> string

substitute_first ?iflags ?flags ?rex ?pat ?pos ~subst subj replaces the first substring of subj matching pattern pat when given, regular expression rex otherwise, starting at position pos with the result of function subst applied to the match. Uses flags when given, the precompiled iflags otherwise.

Splitting

Splitting compatible to PERL

val split :
  ?iflags: irflag ->    (* Default: no extra flags *)
  ?flags: rflag list -> (* Default: not considered *)
  ?rex: regexp ->       (* Default: matches whitespace "\s+" *)
  ?pat: string ->       (* Default: not considered *)
  ?pos: int ->          (* Default: 0 *)
  ?max: int ->          (* Default: 0 *)
  string -> string list

split ?iflags ?flags ?rex ?pat ?pos ?max subj splits subj into a list of at most max strings, using as delimiter pattern pat when given, regular expression rex otherwise, starting at position pos. Uses flags when given, the precompiled iflags otherwise. If max is zero, trailing empty fields are stripped. If it is negative, it is treated as arbitrarily large. If neither pat nor rex are specified, leading whitespace will be stripped! Should behave exactly as in PERL.

type split_result = Text of string        (* Text part of splitted string *)
                  | Delim of string       (* Delimiter part of splitted
                                             string *)
                  | Group of int * string (* Subgroup of matched delimiter
                                             (subgroup_nr, subgroup_str) *)
                  | NoGroup               (* Unmatched subgroup *)

val full_split :
  ?iflags: irflag ->    (* Default: no extra flags *)
  ?flags: rflag list -> (* Default: not considered *)
  ?rex: regexp ->       (* Default: matches whitespace "\s+" *)
  ?pat: string ->       (* Default: not considered *)
  ?pos: int ->          (* Default: 0 *)
  ?max: int ->          (* Default: 0 *)
  string -> split_result list

full_split ?iflags ?flags ?rex ?pat ?pos ?max subj splits subj into a list of at most max elements of type "split_result", using as delimiter pattern pat when given, regular expression rex otherwise, starting at position pos. Uses flags when given, the precompiled iflags otherwise. If max is zero, trailing empty fields are stripped. If it is negative, it is treated as arbitrarily large. Should behave exactly as in PERL.

Version information

val version : string  (* Version of the PCRE-C-library *)

Additional convenience functions useful in combination with this library

val foreach_line :
  ?ic: in_channel -> (* Default: stdin *)
  (string -> unit) -> unit

foreach_line ?ic f applies f to each line in inchannel ic until the end-of-file is reached

val foreach_file : string list -> (string -> in_channel -> unit) -> unit

foreach_file filenames f opens each file in the list filenames for input and applies f to each filename and the corresponding channel. Channels are closed after each operation (even when exceptions occur - they get reraised afterwards!).

UNSAFE STUFF - USE WITH CAUTION!

external unsafe_pcre_exec :
  irflag -> regexp -> int -> string ->
  int -> int array -> unit = "pcre_exec_wrapper_bc" "pcre_exec_wrapper"

unsafe_pcre_exec flags rex pos subject subgroup_offsets offset_vector. You should read the C-source to know what happens. If you do not understand it - don't use this function!

val make_ovector : regexp -> int * int array

make_ovector regexp calculates the tuple (subgroups2, ovector) which is the number of subgroup offsets and the offset array


Go to the first, previous, next, last section, table of contents.