Skip to content

Instantly share code, notes, and snippets.

@dbuenzli
Last active June 20, 2025 00:45
Show Gist options
  • Save dbuenzli/e395cad463b41db96c74b94af79c81c6 to your computer and use it in GitHub Desktop.
Save dbuenzli/e395cad463b41db96c74b94af79c81c6 to your computer and use it in GitHub Desktop.
Two way string search
#!/bin/sh
# Usage:
# ./bench.sh [--search-first] [--worst-case]
# Build
rm -rf _build
mkdir _build
cp test.ml _build/
ocamlopt -o _build/test _build/test.ml
# Bench
declare -a sizes=(1 2 4 8 16 64 128 258)
for size in "${sizes[@]}"
do
hyperfine \
--parameter-list func \
--use-naive,--use-two-way \
"_build/test --bench $size $* {func}"
done
(*---------------------------------------------------------------------------
Copyright (c) 2025 Daniel C. Bünzli. All rights reserved.
SPDX-License-Identifier: ISC
---------------------------------------------------------------------------*)
(** {1:design_notes Design notes}
{ul
{- We implement string search with the
{{:https://doi.org/10.1145/116825.116845}two way algorithm}. It
has a good worse case complexity and the result of
pre-preprocessing is only two integers (versus allocating tables
for Knuth-Morris-Pratt or Boyer-Moore).}
{- We raise on invalid position, we could also return [None]
or make the functions behave like the identity (if applicable). But
this is more in the spirit of the current [String] module and it's
also unclear which one is best.}
{- In `rfind_sub`, the [start] argument has a different semantics from
the argument [r*_from] functions. The advantage of the definition
used below is that it coincides with the mental model of indices
which is the one you want to work with.}}
{- The [start] argument remains however a position (vs index)
so that definitions on empty arguments are consistent and well
defined without fuss.}} *)
(** String search and replace functions. *)
module type API = sig
val includes : affix:string -> string -> bool
(** [includes ~affix s] is [true] if and only if [s] has [affix] as
a substring.
@since X.XX *)
val find_sub : ?start:int -> sub:string -> string -> int option
(** [find_sub ~start ~sub s] is the start position (if any) of the
first occurence of [sub] in [s] after or at position [start]
(which includes index [start] if it exists, defaults to [0]).
Note if you need to search for [sub] multiple times in [s] use
{!find_all_sub} it is more efficient.
@raise Invalid_argument if [start] is not a valid position of [s].
@since X.XX *)
val rfind_sub : ?start:int -> sub:string -> string -> int option
(** [rfind_sub ~start ~sub s] is the start position (if any) of the
first occurences of [sub] in [s] before or at position [start]
(which includes index [start] if it exists, defaults to
[String.length s]).
Note if you need to search for [sub] multiple times in [s] use
{!rfind_all_sub} it is more efficient.
@raise Invalid_argument if [start] is not a valid position of [s].
@since X.XX *)
val find_all_sub :
?start:int -> (int -> 'acc -> 'acc) -> sub:string -> string -> 'acc -> 'acc
(** [find_all_sub ~start f ~sub s acc], starting with [acc], folds [f] over
all non-overlapping starting positions of [sub] in [s] after or at
position [start] (which includes index [start] if it exists, defaults
to [0]). This is [acc] if [sub] could not be found in [s].
@raise Invalid_argument if [start] is not a valid position of [s].
@since X.XX *)
val rfind_all_sub :
?start:int -> (int -> 'acc -> 'acc) -> sub:string -> string -> 'acc -> 'acc
(** [rfind_all_sub ~start f ~sub s acc], starting with [acc], folds
[f] over all non-overlapping starting positions of [sub] in [s]
before or at position [start] (which includes index [start] if
it exists, defaults to [String.length s]). This is [acc] if
[sub] could not be found in [s].
@raise Invalid_argument if [start] is not a valid position of [s].
@since X.XX *)
val replace_first : ?start:int -> sub:string -> by:string -> string -> string
(** [replace_first ~start ~sub ~by s] replaces in [s] the first occurence
of [sub] at or after position [start] (defaults to [0]) by [by].
@raise Invalid_argument if [start] is not a valid position of [s].
@since X.XX *)
val replace_all : ?start:int -> sub:string -> by:string -> string -> string
(** [replace_all ~start ~sub ~by] replaces in [s] all
non-overlapping occurences of [sub] at or after position [start]
(default to [0]) by [by].
@raise Invalid_argument if [start] is not a valid position of [s].
@since X.XX *)
end
open String (* Make it easier to prepare a PR against upstream *)
let invalid_start ~start len =
let i = Int.to_string in
invalid_arg @@ concat "" ["start: "; i start; "not in range [0;"; i len; "]"]
(** Implement API with naive string search *)
module Naive = struct
let is_sub ~sub s j =
let sublen = length sub in
let i = ref 0 in
while !i < sublen && Char.equal (get s (j + !i)) (get sub !i)
do incr i done;
!i = sublen
let primitive_find_sub ~start ~sub s =
let slen = length s in
if not (0 <= start && start <= slen) then invalid_start ~start slen else
let smax = slen - length sub in
let j = ref start in
while !j <= smax && not (is_sub ~sub s !j) do incr j done;
if !j <= smax then !j else -1
let primitive_rfind_sub ~start ~sub s =
let slen = length s in
if not (0 <= start && start <= slen) then invalid_start ~start slen else
let smax = length s - length sub in
let j = ref (if start > smax then smax else start) in
while !j >= 0 && not (is_sub ~sub s !j) do decr j done;
if !j >= 0 then !j else -1
let includes ~affix s = primitive_find_sub ~start:0 ~sub:affix s <> -1
let find_sub ?(start = 0) ~sub s =
match primitive_find_sub ~start ~sub s with -1 -> None | i -> Some i
let rfind_sub ?start ~sub s =
let start = match start with None -> length s | Some s -> s in
match primitive_rfind_sub ~start ~sub s with -1 -> None | i -> Some i
let find_all_sub ?(start = 0) f ~sub s acc =
let rec loop f acc sub s ~start ~slen =
if start > slen then acc else
match primitive_find_sub ~start ~sub s with
| -1 -> acc
| i ->
let acc = f i acc in
let start = i + Int.max (length sub) 1 in
loop f acc sub s ~start ~slen
in
let slen = length s in
if not (0 <= start && start <= slen) then invalid_start ~start slen else
loop f acc sub s ~start ~slen
let rfind_all_sub ?start f ~sub s acc =
let rec loop f acc sub s ~start ~slen =
if start < 0 then acc else
match primitive_rfind_sub ~start ~sub s with
| -1 -> acc
| i ->
let start = i - Int.max (length sub) 1 in
loop f (f i acc) sub s ~start ~slen
in
let slen = length s in
let start = match start with None -> length s | Some s -> s in
if not (0 <= start && start <= slen) then invalid_start ~start slen else
loop f acc sub s ~start ~slen
let replace_first ?(start = 0) ~sub:needle ~by s =
match primitive_find_sub ~start ~sub:needle s with
| -1 -> s
| i ->
let rest_first = i + length needle in
let rest_len = length s - i - length needle in
concat by [sub s 0 i; sub s rest_first rest_len]
let replace_all ?start ~sub:needle ~by s =
let chunk_first = ref 0 in
let add_chunk i acc =
let last_chunk = sub s !chunk_first (i - !chunk_first) in
chunk_first := i + length needle; last_chunk :: acc
in
match find_all_sub ?start add_chunk ~sub:needle s [] with
| [] -> s
| chunks ->
let last_chunk = sub s !chunk_first (length s - !chunk_first) in
concat by (List.rev (last_chunk :: chunks))
end
(** Implement API with two way string search *)
module Two_way = struct
(* See https://doi.org/10.1145/116825.116845 or
http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260 *)
let find_maximal_suffix_and_period ~sub =
let sublen = length sub in
let i = ref (-1) and j = ref 0 and k = ref 1 and p = ref 1 in
let[@inline] maximal_suffix ~order =
while (!j + !k < sublen) do
let c = order * Char.compare (get sub (!j + !k)) (get sub (!i + !k)) in
if c < 0 then (j := !j + !k; k := 1; p := !j - !i) else
if c > 0 then (i := !j; j := !i + 1; k := 1; p := 1) else (* c = 0 *)
if !k = !p then (j := !j + !p; k := 1) else incr k
done;
in
(maximal_suffix[@inlined]) ~order:1;
let l0 = !i and p0 = !p in
i := -1; j := 0; k := 1; p := 1;
(maximal_suffix[@inlined]) ~order:(-1);
let l1 = !i and p1 = !p in
if l0 > l1 then (l0, p0) else (l1, p1)
let periodic_sub ~sub ~sub_lp:(l, p) =
let i = ref 0 in
while !i <= l && Char.equal (get sub !i) (get sub (!i + p))
do incr i done;
!i > l
let primitive_find_sub ~start ~sub ~sub_lp:(l, p as sub_lp) s =
let slen = length s in
if not (0 <= start && start <= slen) then invalid_start ~start slen else
let sublen = length sub in
let smax = slen - sublen in
let j = ref start in
try
if periodic_sub ~sub ~sub_lp then begin
let memory = ref (-1) in
while (!j <= smax) do
let i = ref (1 + Int.max l !memory) in
while (!i < sublen && Char.equal (get sub !i) (get s (!i + !j)))
do incr i done;
if !i < sublen then (j := !j + (!i - l); memory := -1) else
begin
i := l;
while (!i > !memory && Char.equal (get sub !i) (get s (!i + !j)))
do decr i done;
if !i <= !memory then raise_notrace Exit else
(j := !j + p; memory := sublen - p - 1)
end
done;
-1
end else begin
let p = 1 + Int.max (l + 1) (sublen - l - 1) in
while (!j <= smax) do
let i = ref (l + 1) in
while (!i < sublen && Char.equal (get sub !i) (get s (!i + !j)))
do incr i done;
if !i < sublen then (j := !j + (!i - l)) else
begin
i := l;
while (!i >= 0 && Char.equal (get sub !i) (get s (!i + !j)))
do decr i done;
if !i < 0 then raise_notrace Exit else (j := !j + p)
end
done;
-1
end
with Exit -> !j
let primitive_rfind_sub ~start ~sub ~sub_lp:(l, p as sub_lp) s =
(* Note this is the same as above except for the assignement
and test logic on [j] where we move from right to left. *)
let slen = length s in
if not (0 <= start && start <= slen) then invalid_start ~start slen else
let sublen = length sub in
let smax = slen - sublen in
let j = ref (if start > smax then smax else start) in
try
if periodic_sub ~sub ~sub_lp then begin
let memory = ref (-1) in
while (!j >= 0) do
let i = ref (1 + Int.max l !memory) in
while (!i < sublen && Char.equal (get sub !i) (get s (!i + !j)))
do incr i done;
if !i < sublen then (j := !j - (!i - l); memory := -1) else
begin
i := l;
while (!i > !memory && Char.equal (get sub !i) (get s (!i + !j)))
do decr i done;
if !i <= !memory then raise_notrace Exit else
(j := !j - p; memory := sublen - p - 1)
end
done;
-1
end else begin
let p = 1 + Int.max (l + 1) (sublen - l - 1) in
while (!j >= 0) do
let i = ref (l + 1) in
while (!i < sublen && Char.equal (get sub !i) (get s (!i + !j)))
do incr i done;
if !i < sublen then (j := !j - (!i - l)) else
begin
i := l;
while (!i >= 0 && Char.equal (get sub !i) (get s (!i + !j)))
do decr i done;
if !i < 0 then raise_notrace Exit else (j := !j - p)
end
done;
-1
end
with Exit -> !j
let includes ~affix:sub s =
let sub_lp = find_maximal_suffix_and_period ~sub in
primitive_find_sub ~start:0 ~sub ~sub_lp s <> -1
let find_sub ?(start = 0) ~sub s =
let sub_lp = find_maximal_suffix_and_period ~sub in
match primitive_find_sub ~start ~sub_lp ~sub s with
| -1 -> None | i -> Some i
let rfind_sub ?start ~sub s =
let start = match start with None -> length s | Some s -> s in
let sub_lp = find_maximal_suffix_and_period ~sub in
match primitive_rfind_sub ~start ~sub_lp ~sub s with
| -1 -> None | i -> Some i
let find_all_sub ?(start = 0) f ~sub s acc =
let rec loop f acc sub sub_lp s ~start ~slen =
if start > slen then acc else
match primitive_find_sub ~start ~sub ~sub_lp s with
| -1 -> acc
| i ->
let acc = f i acc in
let start = i + length sub in
let start = if start = i then start + 1 else start in
loop f acc sub sub_lp s ~start ~slen
in
let slen = length s in
if not (0 <= start && start <= slen) then invalid_start ~start slen else
let sub_lp = find_maximal_suffix_and_period ~sub in
loop f acc sub sub_lp s ~start ~slen
let rfind_all_sub ?start f ~sub s acc =
let rec loop f acc sub sub_lp s ~start ~slen =
if start < 0 then acc else
match primitive_rfind_sub ~start ~sub ~sub_lp s with
| -1 -> acc
| i ->
let start = i - Int.max (length sub) 1 in
loop f (f i acc) sub sub_lp s ~start ~slen
in
let slen = length s in
let start = match start with None -> length s | Some s -> s in
if not (0 <= start && start <= slen) then invalid_start ~start slen else
let sub_lp = find_maximal_suffix_and_period ~sub in
loop f acc sub sub_lp s ~start ~slen
let replace_first ?(start = 0) ~sub:needle ~by s =
let sub_lp = find_maximal_suffix_and_period ~sub:needle in
match primitive_find_sub ~start ~sub:needle ~sub_lp s with
| -1 -> s
| i ->
let rest_first = i + length needle in
let rest_len = length s - i - length needle in
concat by [sub s 0 i; sub s rest_first rest_len]
let replace_all ?start ~sub:needle ~by s =
let chunk_first = ref 0 in
let add_chunk i acc =
let acc = sub s !chunk_first (i - !chunk_first) :: acc in
chunk_first := i + length needle; acc
in
match find_all_sub ?start add_chunk ~sub:needle s [] with
| [] -> s
| chunks ->
let chunks = sub s !chunk_first (length s - !chunk_first) :: chunks in
concat by (List.rev chunks)
end
(* Tests *)
let is_invalid_arg f = try (f (); assert false) with Invalid_argument _ -> ()
let test_find_sub (module String : API) =
is_invalid_arg (fun () -> String.find_sub ~start:(-1) ~sub:"" "");
assert (String.find_sub ~start:0 ~sub:"" "" = Some 0);
is_invalid_arg (fun () -> String.find_sub ~start:1 ~sub:"" "");
assert (String.find_sub ~start:0 ~sub:"" "ab" = Some 0);
assert (String.find_sub ~start:1 ~sub:"" "ab" = Some 1);
assert (String.find_sub ~start:2 ~sub:"" "ab" = Some 2);
is_invalid_arg (fun () -> String.find_sub ~start:3 ~sub:"" "ab");
assert (String.find_sub ~start:0 ~sub:"a" "" = None);
assert (String.find_sub ~start:0 ~sub:"a" "a" = Some 0);
assert (String.find_sub ~start:1 ~sub:"a" "a" = None);
assert (String.find_sub ~start:0 ~sub:"a" "ba" = Some 1);
assert (String.find_sub ~start:1 ~sub:"a" "ba" = Some 1);
assert (String.find_sub ~start:2 ~sub:"a" "ba" = None);
assert (String.find_sub ~start:0 ~sub:"ab" "" = None);
assert (String.find_sub ~start:0 ~sub:"ab" "ab" = Some 0);
assert (String.find_sub ~start:0 ~sub:"ab" "aab" = Some 1);
assert (String.find_sub ~start:1 ~sub:"ab" "aab" = Some 1);
assert (String.find_sub ~start:2 ~sub:"ab" "aab" = None);
assert (String.find_sub ~start:3 ~sub:"ab" "aab" = None);
is_invalid_arg (fun () -> String.find_sub ~start:(-1) ~sub:"abaa" "aba");
assert (String.find_sub ~start:0 ~sub:"abaa" "aba" = None);
assert (String.find_sub ~start:2 ~sub:"abaa" "aba" = None);
assert (String.find_sub ~start:3 ~sub:"abaa" "aba" = None);
is_invalid_arg (fun () -> String.find_sub ~start:4 ~sub:"abaa" "aba");
assert (String.find_sub ~start:0 ~sub:"aba" "ababa" = Some 0);
assert (String.find_sub ~start:1 ~sub:"aba" "ababa" = Some 2);
assert (String.find_sub ~start:2 ~sub:"aba" "ababa" = Some 2);
assert (String.find_sub ~start:3 ~sub:"aba" "ababa" = None);
assert (String.find_sub ~start:4 ~sub:"aba" "ababa" = None);
assert (String.find_sub ~start:5 ~sub:"aba" "ababa" = None);
()
let test_rfind_sub (module String : API) =
is_invalid_arg (fun () -> String.rfind_sub ~start:(-1) ~sub:"" "");
assert (String.rfind_sub ~start:0 ~sub:"" "" = Some 0);
is_invalid_arg (fun () -> String.rfind_sub ~start:1 ~sub:"" "");
assert (String.rfind_sub ~start:0 ~sub:"" "ab" = Some 0);
assert (String.rfind_sub ~start:1 ~sub:"" "ab" = Some 1);
assert (String.rfind_sub ~start:2 ~sub:"" "ab" = Some 2);
is_invalid_arg (fun () -> String.rfind_sub ~start:3 ~sub:"" "ab");
assert (String.rfind_sub ~start:0 ~sub:"a" "" = None);
assert (String.rfind_sub ~start:0 ~sub:"a" "a" = Some 0);
assert (String.rfind_sub ~start:1 ~sub:"a" "a" = Some 0);
assert (String.rfind_sub ~start:0 ~sub:"a" "ba" = None);
assert (String.rfind_sub ~start:1 ~sub:"a" "ba" = Some 1);
assert (String.rfind_sub ~start:2 ~sub:"a" "ba" = Some 1);
assert (String.rfind_sub ~start:0 ~sub:"ab" "" = None);
assert (String.rfind_sub ~start:0 ~sub:"ab" "ab" = Some 0);
assert (String.rfind_sub ~start:0 ~sub:"ab" "aab" = None);
assert (String.rfind_sub ~start:1 ~sub:"ab" "aab" = Some 1);
assert (String.rfind_sub ~start:2 ~sub:"ab" "aab" = Some 1);
assert (String.rfind_sub ~start:3 ~sub:"ab" "aab" = Some 1);
is_invalid_arg (fun () -> String.rfind_sub ~start:(-1) ~sub:"abaa" "aba");
assert (String.rfind_sub ~start:0 ~sub:"abaa" "aba" = None);
assert (String.rfind_sub ~start:2 ~sub:"abaa" "aba" = None);
assert (String.rfind_sub ~start:3 ~sub:"abaa" "aba" = None);
is_invalid_arg (fun () -> String.rfind_sub ~start:4 ~sub:"abaa" "aba");
assert (String.rfind_sub ~start:0 ~sub:"aba" "ababa" = Some 0);
assert (String.rfind_sub ~start:1 ~sub:"aba" "ababa" = Some 0);
assert (String.rfind_sub ~start:2 ~sub:"aba" "ababa" = Some 2);
assert (String.rfind_sub ~start:3 ~sub:"aba" "ababa" = Some 2);
assert (String.rfind_sub ~start:4 ~sub:"aba" "ababa" = Some 2);
assert (String.rfind_sub ~start:5 ~sub:"aba" "ababa" = Some 2);
()
let test_find_all_sub (module String : API) =
let test ~sub s occs =
assert (List.rev (String.find_all_sub List.cons ~sub s []) = occs)
in
test ~sub:"" "" [0];
test ~sub:"" "ab" [0;1;2];
test ~sub:"a" "" [];
test ~sub:"a" "a" [0];
test ~sub:"a" "ba" [1];
test ~sub:"ab" "" [];
test ~sub:"ab" "ab" [0];
test ~sub:"ab" "aab" [1];
test ~sub:"abaa" "aba" [];
test ~sub:"abaa" "aba" [];
test ~sub:"aba" "ababa" [0];
test ~sub:"aba" "babababab" [1;5];
()
let test_rfind_all_sub (module String : API) =
let test ~sub s occs =
assert (List.rev (String.rfind_all_sub List.cons ~sub s []) = occs)
in
test ~sub:"" "" [0];
test ~sub:"" "ab" [2;1;0];
test ~sub:"a" "" [];
test ~sub:"a" "a" [0];
test ~sub:"a" "ba" [1];
test ~sub:"ab" "" [];
test ~sub:"ab" "ab" [0];
test ~sub:"ab" "aab" [1];
test ~sub:"abaa" "aba" [];
test ~sub:"abaa" "aba" [];
test ~sub:"aba" "ababa" [2];
test ~sub:"aba" "babababab" [5;1];
()
let test_includes (module String : API) =
assert (String.includes ~affix:"" "" = true);
assert (String.includes ~affix:"" "a" = true);
assert (String.includes ~affix:"" "ab" = true);
assert (String.includes ~affix:"a" "" = false);
assert (String.includes ~affix:"a" "a" = true);
assert (String.includes ~affix:"a" "ab" = true);
assert (String.includes ~affix:"a" "ba" = true);
assert (String.includes ~affix:"a" "bab" = true);
assert (String.includes ~affix:"ab" "" = false);
assert (String.includes ~affix:"ab" "a" = false);
assert (String.includes ~affix:"ab" "ab" = true);
assert (String.includes ~affix:"ab" "aab" = true);
assert (String.includes ~affix:"aab" "ab" = false);
assert (String.includes ~affix:"ab" "aba" = true);
assert (String.includes ~affix:"ab" "aaba" = true);
()
let test_replace_first (module String : API) =
assert (String.replace_first ~sub:"" ~by:"" "" = "");
assert (String.replace_first ~sub:"" ~by:"a" "" = "a");
assert (String.replace_first ~sub:"" ~by:"a" "123" = "a123");
assert (String.replace_first ~start:1 ~sub:"" ~by:"a" "123" = "1a23");
assert (String.replace_first ~start:2 ~sub:"" ~by:"a" "123" = "12a3");
assert (String.replace_first ~start:3 ~sub:"" ~by:"a" "123" = "123a");
assert (String.replace_first ~sub:"1" ~by:"" "123" = "23");
assert (String.replace_first ~sub:"3" ~by:"" "123" = "12");
assert (String.replace_first ~sub:"1" ~by:"" "1" = "");
assert (String.replace_first ~sub:"12" ~by:"z" "123" = "z3");
assert (String.replace_first ~start:2 ~sub:"" ~by:"z" "123" = "12z3");
assert (String.replace_first ~start:3 ~sub:"" ~by:"z" "123" = "123z");
assert (String.replace_first ~start:3 ~sub:"a" ~by:"z" "123" = "123");
()
let test_replace_all (module String : API) =
assert (String.replace_all ~sub:"" ~by:"" "" = "");
assert (String.replace_all ~sub:"" ~by:"" "1" = "1");
assert (String.replace_all ~sub:"" ~by:"" "12" = "12");
assert (String.replace_all ~sub:"" ~by:"a" "" = "a");
assert (String.replace_all ~sub:"" ~by:"a" "1" = "a1a");
assert (String.replace_all ~sub:"" ~by:"a" "12" = "a1a2a");
assert (String.replace_all ~sub:"" ~by:"a" "123" = "a1a2a3a");
assert (String.replace_all ~start:0 ~sub:"" ~by:"a" "123" = "a1a2a3a");
assert (String.replace_all ~start:1 ~sub:"" ~by:"a" "123" = "1a2a3a");
assert (String.replace_all ~start:2 ~sub:"" ~by:"a" "123" = "12a3a");
assert (String.replace_all ~start:3 ~sub:"" ~by:"a" "123" = "123a");
assert (String.replace_all ~sub:"1" ~by:"" "121" = "2");
assert (String.replace_all ~sub:"1" ~by:"3" "121" = "323");
assert (String.replace_all ~sub:"1" ~by:"" "1" = "");
assert (String.replace_all ~sub:"12" ~by:"a" "123" = "a3");
assert (String.replace_all ~sub:"12" ~by:"a" "123112" = "a31a");
assert (String.replace_all ~start:1 ~sub:"12" ~by:"a" "123112" = "1231a");
()
let test api =
test_find_sub api;
test_rfind_sub api;
test_find_all_sub api;
test_rfind_all_sub api;
test_includes api;
test_replace_first api;
test_replace_all api;
Printf.eprintf "All test passed.\n";
()
(* Benchmarking *)
let random_ab_string n = String.init n (fun _ -> Char.chr (0x61 + Random.int 2))
let worst_ab_string n = (* a^(n-1)b *)
String.init n (fun i -> if i = n - 1 then 'b' else 'a')
let benchmark (module String : API) ~search_first ~worst_case needle_size =
let () = Random.init 45 in
let haystack_size = 4 * 1024 in
for i = 1 to 1024 do
let needle, haystack =
if worst_case
then worst_ab_string needle_size, Stdlib.String.make haystack_size 'a'
else random_ab_string needle_size, random_ab_string haystack_size
in
let by = "" in
Sys.opaque_identity @@
if search_first
then (ignore (String.replace_first ~sub:needle ~by haystack))
else (ignore (String.replace_all ~sub:needle ~by haystack))
done
let run bench impl ~search_first ~worst_case =
let api = match impl with
| `Naive -> (module Naive : API)
| `Two_way -> (module Two_way : API)
in
match bench with
| None -> test api; 0
| Some needle_size -> benchmark api ~search_first ~worst_case needle_size; 0
(* Main *)
let main () =
let bench = ref None in
let impl = ref `Two_way in
let search_first = ref false in
let worst_case = ref false in
let args =
[ "--bench", Arg.Int (fun needle_size -> bench := Some needle_size),
"<n> Test peformance for searching with a needle of given needle size";
"--use-naive", Arg.Unit (fun () -> impl := `Naive),
"Use naive search";
"--use-two-way", Arg.Unit (fun () -> impl := `Two_way),
"Use two way search";
"--search-first", Arg.Set search_first,
"Only search first occurence";
"--worst-case", Arg.Set worst_case,
"Test on worst case string search"; ]
in
Arg.parse args (fun v -> raise (Arg.Bad "Unknown positional argument")) "";
run !bench !impl ~search_first:!search_first ~worst_case:!worst_case
let () = if !Sys.interactive then () else exit (main ())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment