Regular Expression Matching Back to the Win32 Shootout
Back to dada's perl lab

[The Original Shootout]   [NEWS]   [FAQ]   [Methodology]   [Platform Details]   [Acknowledgements]   [Scorecard]  
All Source For Regular Expression Matching
regexmatch.awka
# $Id: regexmatch.gawk,v 1.2 2001/05/20 06:13:00 doug Exp $
# http://www.bagley.org/~doug/shootout/

BEGIN {
    n = (ARGV[1] < 1) ? 1 : ARGV[1];
    delete ARGV;
}

{ phones[p++] = $0; }

END {
    for (i=0; i<n; i++) {
    for (j=0; j<p; j++) {
        line = phones[j];
        if (match(line, /(^|[^0-9])(\([0-9][0-9][0-9]\)|[0-9][0-9][0-9]) [0-9][0-9][0-9][ -][0-9][0-9][0-9][0-9]($|[^0-9])/)) {
        m1 = substr(line, RSTART, RLENGTH);
        num = ""
        if (match(m1, /[0-9][0-9][0-9] [0-9][0-9][0-9][ -][0-9][0-9][0-9][0-9]/)) {
            if (substr(m1, RSTART-1, 1) == "(") {
            break;
            }
            if (x = split(substr(m1, RSTART, RLENGTH), parts, /[ -]/)) {
            num = "(" parts[1] ") " parts[2] "-" parts[3];
            }
        } else if (match(m1, /\([0-9][0-9][0-9]\) [0-9][0-9][0-9][ -][0-9][0-9][0-9][0-9]/)) {
            if (x = split(substr(m1, RSTART, RLENGTH), parts, /[ -]/)) {
            num = parts[1] " " parts[2] "-" parts[3];
            }
        }
        if (i == (n-1)) {
            count++;
            printf("%d: %s\n", count, num);
        }
        }
    }
    }
}
regexmatch.cygperl
#!/usr/local/bin/perl 
# $Id: regexmatch.perl,v 1.5 2000/10/07 08:41:43 doug Exp $
# http://www.bagley.org/~doug/shootout/

use strict;

my $re = qr{
    (?: ^ | [^\d\(])        # must be preceeded by non-digit
    ( \( )?            # match 1: possible initial left paren
    (\d\d\d)            # match 2: area code is 3 digits
    (?(1) \) )            # if match1 then match right paren
    [ ]                # area code followed by one space
    (\d\d\d)            # match 3: prefix of 3 digits
    [ -]            # separator is either space or dash
    (\d\d\d\d)            # match 4: last 4 digits
    \D                # must be followed by a non-digit
}x;

my $NUM = $ARGV[0];
$NUM = 1 if ($NUM < 1);

my @phones = <STDIN>;
my $count = 0;
my $num;
while ($NUM--) {
    foreach (@phones) {
    if (/$re/o) {
        $num = "($2) $3-$4";
        if (0 == $NUM) {
        $count++;
        print "$count: $num\n";
        }
    }
    }
}
regexmatch.erlang
%%% -*- mode: erlang -*-
%%% $Id: regexmatch.erlang,v 1.7 2001/01/06 22:35:23 doug Exp $
%%% http://www.bagley.org/~doug/shootout/

-module(regexmatch).
-export([main/0, main/1]).

%% get the program argument, which is how many test iterations to run
main() -> main(['1']).
main([Arg]) ->
    Num = list_to_integer(atom_to_list(Arg)),
    {ok, Re} = regexp:parse(
         "(^|[^0-9\\(])"        % preceeding non-digit or bol
         "("                % area code
         "\\([0-9][0-9][0-9]\\)"    % is either 3 digits in parens
         "|"                % or
         "[0-9][0-9][0-9]"        % just 3 digits
         ")"                % end of area code
         " "                % area code is followed by one space
         "[0-9][0-9][0-9]"        % exchange is 3 digits
         "[ -]"                % separator is either space or dash
         "[0-9][0-9][0-9][0-9]"        % last 4 digits
         "($|[^0-9])"            % must be followed by a non-digit
        ),
    Plist = readlines(),
    test(Num, Re, Plist),
    halt(0).


test(1, Regexp, Plist) -> 
    % display output on last iteration
    Nums = match_phones(Regexp, Plist),
    print_phones(1, Nums),
    true;
test(N, Regexp, Plist) ->
    match_phones(Regexp, Plist),
    test(N-1, Regexp, Plist).


print_phones(Count, [H|T]) ->
    [A,E,N] = H,
    % A,E,N is a list of the matching sub-expressions, which are:
    % Areacode (3 digits), Exchange (3 digits), Number (4 digits)
    io:fwrite("~w: (~s) ~s-~s~n", [Count, A,E,N]),
    print_phones(Count+1, T);
print_phones(_, []) ->
    true.


match_phones(Regexp, List) ->
    mapfilter(
      fun(String) ->
          case regexp:matches(String, Regexp) of
          {match, []}      -> false;
          {match, Matches} -> parse_phone(String, Matches);
          _                -> false
          end
      end,
      List).


parse_phone(Str, [H|T]) ->
    {Start, Len} = H,
    % Numstr is something that looks like a complete phone #
    Numstr = string:substr(Str, Start, Len),
    case regexp:matches(Numstr, "[0-9][0-9][0-9][0-9]*") of
    {match, []}      -> false;
    {match, Matches} ->
        lists:map(fun({Offset, Length}) ->
                  string:substr(Numstr, Offset, Length) end,
              Matches);
    _                -> false
    end;
parse_phone(Str, []) -> [].


mapfilter(Fun, [H|T]) ->
    case Fun(H) of
    false -> mapfilter(Fun, T);
    New   -> [New | mapfilter(Fun, T)]
    end;
mapfilter(_, []) -> [].


readlines() ->
    Port = open_port({fd, 0, 1}, [eof, {line, 512}]),
    readlines_from_stream([], Port).

readlines_from_stream(Lines, Port) ->
    receive
    {Port, eof} ->
        lists:reverse(Lines);
    {Port, {_, {_, Line}}} ->
        readlines_from_stream([Line|Lines], Port)
    end.
regexmatch.gawk
# $Id: regexmatch.gawk,v 1.2 2001/05/20 06:13:00 doug Exp $
# http://www.bagley.org/~doug/shootout/

BEGIN {
    n = (ARGV[1] < 1) ? 1 : ARGV[1];
    delete ARGV;
}

{ phones[p++] = $0; }

END {
    for (i=0; i<n; i++) {
    for (j=0; j<p; j++) {
        line = phones[j];
        if (match(line, /(^|[^0-9])(\([0-9][0-9][0-9]\)|[0-9][0-9][0-9]) [0-9][0-9][0-9][ -][0-9][0-9][0-9][0-9]($|[^0-9])/)) {
        m1 = substr(line, RSTART, RLENGTH);
        num = ""
        if (match(m1, /[0-9][0-9][0-9] [0-9][0-9][0-9][ -][0-9][0-9][0-9][0-9]/)) {
            if (substr(m1, RSTART-1, 1) == "(") {
            break;
            }
            if (x = split(substr(m1, RSTART, RLENGTH), parts, /[ -]/)) {
            num = "(" parts[1] ") " parts[2] "-" parts[3];
            }
        } else if (match(m1, /\([0-9][0-9][0-9]\) [0-9][0-9][0-9][ -][0-9][0-9][0-9][0-9]/)) {
            if (x = split(substr(m1, RSTART, RLENGTH), parts, /[ -]/)) {
            num = parts[1] " " parts[2] "-" parts[3];
            }
        }
        if (i == (n-1)) {
            count++;
            printf("%d: %s\n", count, num);
        }
        }
    }
    }
}
regexmatch.gcc
/* -*- mode: c -*-
 * $Id: regexmatch.gcc,v 1.4 2000/12/24 05:43:53 doug Exp $
 * http://www.bagley.org/~doug/shootout/
 */

#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdio.h>
#include <pcre.h>
#include <string.h>

#define MAXLINES   100
#define MAXLINELEN 132

char *pattern = 
"(?:^|[^\\d\\(])"        
"(\\()?"            
"(\\d\\d\\d)"            
"(?(1)\\))"            
"[ ]"                
"(\\d\\d\\d)"            
"[ -]"                
"(\\d\\d\\d\\d)"        
"\\D"                
;


int
main(int argc, char *argv[]) {
    int NUM = ((argc == 2) ? atoi(argv[1]) : 1);
    int count;
    char *cptr = "";
    char **phones;
    pcre *re;
    int erroffset;
    const char *errptr;
    int n, lines = 0;
    char num[256];
    int i, j, k, matchlen;
    char *matchoffset;
    int nmatches;
    int *ovec, ovecsize;
    pcre_extra *study;

    phones = (char **)malloc(MAXLINES * sizeof(char *));
    if (!phones) {
    fprintf(stderr, "malloc for phones array failed\n");
    exit(1);
    }
    lines = 0;
    while (cptr) {
    phones[lines] = (char *)malloc(MAXLINELEN);
    if (!phones[lines]) {
        fprintf(stderr, "malloc to hold line #%d failed\n", lines);
        exit(1);
    }
    cptr = fgets(phones[lines], MAXLINELEN, stdin);
    lines++;
    if (lines > MAXLINES) {
        fprintf(stderr, "MAXLINES is too small\n");
        exit(1);
    }
    }

    re = pcre_compile(pattern, 0, &errptr, &erroffset, NULL);
    if (!re) {
    fprintf(stderr, "can't open compile regexp\n");
    exit(1);
    }

    study = pcre_study(re, 0, &errptr);

    if (pcre_fullinfo(re, NULL, PCRE_INFO_CAPTURECOUNT, &nmatches) != 0) {
    fprintf(stderr, "pcre_fullinfo failed\n");
    exit(1);
    }
    nmatches++;            

    ovecsize = sizeof(int) * nmatches * 3;
    ovec = (int *)malloc(ovecsize);
    if (!ovec) {
    fprintf(stderr, "malloc for ovec array failed\n");
    exit(1);
    }

    count = 0;
    while (NUM--) {
    for (i=0; i<lines; i++) {
        n = pcre_exec(re, study,
              phones[i], strlen(phones[i]), 0,
              0, ovec, ovecsize);
        if (n == nmatches) {
        
        k = 2*2;    
        
        j = 0;
        num[j++] = '(';
        matchoffset = phones[i] + ovec[k];
        matchlen = ovec[k+1] - ovec[k];
        strncpy(num+j, matchoffset, matchlen);
        j += matchlen; k += 2;
        num[j++] = ')';
        
        num[j++] = ' ';
        
        matchoffset = phones[i] + ovec[k];
        matchlen = ovec[k+1] - ovec[k];
        strncpy(num+j, matchoffset, matchlen);
        j += matchlen; k += 2;
        
        num[j++] = '-';
        
        matchoffset = phones[i] + ovec[k];
        matchlen = ovec[k+1] - ovec[k];
        strncpy(num+j, matchoffset, matchlen);
        j += matchlen; k += 2;
        
        num[j] = 0;
        if (0 == NUM) {
            count++;
            printf("%d: %s\n", count, num);
        }
        }
    }
    }

    for (i=0; i<MAXLINES; i++) {
    free(phones[i]);
    }
    free(phones);
    free(ovec);

    return(0);
}

regexmatch.gforth
\ -*- mode: forth -*-
\ $Id: regexmatch.gforth,v 1.1 2001/05/26 15:59:44 doug Exp $
\ http://www.bagley.org/~doug/shootout/

\ from Anton Ertl:
\ this uses the Gray parser generator, which is probably too big a
\ cannon for this problem (it also needs a lot of setup code).
\ Writing a recursive descent parser by hand is probably both smaller
\ and faster in this case.


0. argc @ 1- arg >number 2drop drop constant NUM

warnings off \ Gray is a little wordy

require gray.fs

: slurp-fid { fid -- addr u }
    0 0 begin 
    dup 1024 + dup >r extend-mem 
    rot r@ fid read-file throw 
    r> 2dup =
    while 
    2drop
    repeat
    - + dup >r resize throw r> ;

: bit-equiv 
    \ w3=~w1^w2
    invert xor ;

: set-complement 
    empty ['] bit-equiv binary-set-operation ;

variable input \ pointer to next character to be scanned
variable end-input \ pointer to end of input
-1 constant eof-char

: start 
    input @ ;

: end 
    input @ over - ;

: get-input 
    start end-input @ = if
    eof-char
    else
    start c@
    endif ;

256 max-member
s" scan failed" exception constant scanfail

: ?nextchar 
    0= scanfail and throw
    1 chars input +! ;
    
: testchar? 
    get-input member? ;
' testchar? test-vector !

: .. 
 
 empty copy-set
 swap 1+ rot do
  i over add-member
 loop ;

: ` 
    \ creates anonymous terminal for the character c )
    char singleton ['] ?nextchar make-terminal ;

char 0 char 9 .. dup  ' ?nextchar  terminal digit
set-complement        ' ?nextchar  terminal nondigit
bl singleton          ' ?nextchar  terminal lspace

2variable areacode
2variable exchange
2variable last4

)
<- area-code

 || area-code ))
   lspace {{ start }} digit digit digit {{ end exchange 2! }}
   )
   {{ start }} digit digit digit digit {{ end last4 2! }}
   nondigit
)) <- telnum 

telnum parser scan-telnum 

: scan-for-nondigit 
    begin
    count  >r
    r@ '0 < r@ '9 > or  r> '( <>  and
    over end-input @ u>= or
    until ;

variable count  0 count !

: scanfile 
    over + end-input !
    begin 
    dup input !
    ['] scan-telnum catch
    dup dup scanfail <> and throw
    if 
        scan-for-nondigit
    else
        1 count +! count @ 1 u.r ." : "
        ."  " exchange 2@ type ." -" last4 2@ type
        cr
        end-input @ over - #lf scan drop \ skip rest of line
    endif
    dup end-input @ u>=
    until
    drop ;

: mainloop 
    ['] 2drop [is] type
    NUM 1 +do
    2dup scanfile
    loop
    [']  [is] type
    scanfile ;
    
stdin slurp-fid mainloop bye

regexmatch.guile
#!/usr/local/bin/guile \
-e main -s
!#

;;; $Id: regexmatch.guile,v 1.6 2001/06/29 23:12:37 doug Exp $
;;; http://www.bagley.org/~doug/shootout/

(use-modules (ice-9 format))
(use-modules (ice-9 regex))

(define regexp
  (string-append
   "(^|[^0-9\\(])"            ; (1) preceeding non-digit or bol
   "("                    ; (2) area code
   "\\(([0-9][0-9][0-9])\\)"        ; (3) is either 3 digits in parens
   "|"                    ; or
   "([0-9][0-9][0-9])"            ; (4) just 3 digits
   ")"                    ; end of area code
   " "                    ; area code is followed by one space
   "([0-9][0-9][0-9])"            ; (5) exchange is 3 digits
   "[ -]"                ; separator is either space or dash
   "([0-9][0-9][0-9][0-9])"        ; (6) last 4 digits
   "([^0-9]|$)"                ; must be followed by a non-digit
   ))


(define (main args)
  (let ((n (or (and (= (length args) 2) (string->;number (cadr args))) 1))
    (phonelines '())
    (rx (make-regexp regexp))
    (count 0))
    (let loop ((line (read-line)))
      (cond ((eof-object? line) #f)
        (else
         (set! phonelines (append phonelines (list line)))
         (loop (read-line)))))
    (while (>; n 0)
       (set! n (- n 1))
       (let loop ((phones phonelines)
              (count 0))
         (if (null? phones)
         count
         (let ((match (regexp-exec rx (car phones))))
           (if match
               (let* ((area (if (match:start match 3)
                    (match:substring match 3)
                    (match:substring match 4)))
                  (exch (match:substring match 5))
                  (numb (match:substring match 6))
                  (num (string-append "(" area ") " exch "-" numb)))
             (set! count (+ count 1))
             (if (= 0 n)
                  (display (format "~D: ~a\n" count num)))))
           (loop (cdr phones) count)))))))

regexmatch.ici
// $Id: regexmatch.ici,v 1.0 2003/01/03 12:07:00 dada Exp $
// http://dada.perl.it/shootout
//
// contributed by Tim Long

n := argv[1] ? int(argv[1]) : 1;
lines = gettokens(stdin, '\n', "");
j = 0;
while (--n)
{
    forall (l in lines)
    {
        a = l ~~~ #^[^\d(]*(?:\((\d\d\d)\)|(\d\d\d)) (\d\d\d)[ -](\d\d\d\d)(?:\D|$)#;
        if (n == 1 && a)
            printf("%d: (%s%s) %s-%s\n", ++j, a[0], a[1], a[2], a[3]);
    }
}

/*
 * The second last line of the test data is "foo (213 222-2222 bar", which
 * by my reading of the spec should match. But it is in the "shouldn't match"
 * section and no other programs matches it. So the above regexp is tailored
 * not to match it too.
 */
regexmatch.java
// $Id: regexmatch.java,v 1.2 2003/10/13 16:50:21 tomk Exp $
// http://www.bagley.org/~doug/shootout/
// contributed by Tom Kludy

import java.io.*;
import java.util.*;
import java.util.regex.*;

public class regexmatch {

  public static void main(String args[])
  throws IOException, PatternSyntaxException {

    int n = (args.length > 0) ? Integer.parseInt(args[0]) : 1;
    LinkedList lines = new LinkedList();
    BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
    String line;
    while( (line = in.readLine()) != null )
      lines.addLast(line);
    in.close();

    Pattern pattern = Pattern.compile(
      "(?:^|[^\\d\\(])"+           // must be preceeded by non-digit
      "(?:\\((\\d\\d\\d)\\)|(\\d\\d\\d))"+// area code is 3 digits (match 1&2)
      "[ ]"+                       // area code followed by one space
      "(\\d\\d\\d)"+               // match 3: prefix of 3 digits
      "[ -]"+                      // separator is either space or dash
      "(\\d\\d\\d\\d)"+            // match 4: last 4 digits
      "(?:\\D|$)"                  // must be followed by a non-digit
      );

    int count = 0;
    while(--n >= 0) {
      for (ListIterator li = lines.listIterator(); li.hasNext();) {
        Matcher matcher = pattern.matcher((String)li.next());
        if (matcher.find()) {
          StringBuffer num = new StringBuffer("(");
          String areaCode = matcher.group(1);
          if ( areaCode == null )
            areaCode = matcher.group(2);
          num.append(areaCode).append(") ").append(matcher.group(3))
             .append("-").append(matcher.group(4));
          if ( n == 0 )
            System.out.println(++count + ": " + num);
        }
      }
    }
  }
}

regexmatch.lua
-- $Id: regexmatch.lua,v 1.4 2000/12/09 20:07:45 doug Exp $
-- http://www.bagley.org/~doug/shootout/
-- implemented by: Roberto Ierusalimschy

text = read("*a")

-- I added the following line and slightly modified the pattern 
-- match below so that the program will reject the case:
-- 1(111) 111-1111 (due to preceeding digit)
-- (Doug)
text = gsub(gsub(text, "^", "   "), "\n", "\n   ")

N = tonumber((arg and arg[1])) or 1
count = 0
while N > 0 do
  gsub(text,"%D(%D)(%d%d%d)(%)?) (%d%d%d)[- ](%d+)",
    function (A,area,B,exch,digits)
      if (A == '(') == (B == ')') and strlen(digits) == 4 then
        local tel = "("..area..") "..exch.."-"..digits
        if N == 1 then
          count = count+1
          write(count, ": ", tel, "\n")
        end
      end
    end)
  N = N-1
end
regexmatch.lua5
-- $Id: regexmatch.lua,v 1.4 2000/12/09 20:07:45 doug Exp $
-- http://www.bagley.org/~doug/shootout/
-- contributed by Roberto Ierusalimschy

local text = io.read("*a")

-- make sure text does not start with a number
text = "\n" .. text

-- pattern is: not a digit, optional (, 3 digits, optional ),
-- space, 3 digits, space or hyphen, 4 digits, not a digit
local pattern = "%D(%(?)(%d%d%d)(%)?) (%d%d%d)[- ](%d%d%d%d)%f[%D]"

local N = tonumber((arg and arg[1])) or 1
local count = 0
for i=N,1,-1 do
  for open,area,close,exch,digits in string.gfind(text, pattern) do
      if (open == '(') == (close == ')') then
        local tel = "("..area..") "..exch.."-"..digits
        if i == 1 then
          count = count+1
          io.write(count, ": ", tel, "\n")
        end
      end
    end
end

regexmatch.mawk
# $Id: regexmatch.mawk,v 1.2 2001/05/20 06:13:00 doug Exp $
# http://www.bagley.org/~doug/shootout/

BEGIN {
    n = (ARGV[1] < 1) ? 1 : ARGV[1];
    delete ARGV;
}

{ phones[p++] = $0; }

END {
    for (i=0; i<n; i++) {
    for (j=0; j<p; j++) {
        line = phones[j];
        if (match(line, /(^|[^0-9])(\([0-9][0-9][0-9]\)|[0-9][0-9][0-9]) [0-9][0-9][0-9][ -][0-9][0-9][0-9][0-9]($|[^0-9])/)) {
        m1 = substr(line, RSTART, RLENGTH);
        num = ""
        if (match(m1, /[0-9][0-9][0-9] [0-9][0-9][0-9][ -][0-9][0-9][0-9][0-9]/)) {
            if (substr(m1, RSTART-1, 1) == "(") {
            break;
            }
            if (x = split(substr(m1, RSTART, RLENGTH), parts, /[ -]/)) {
            num = "(" parts[1] ") " parts[2] "-" parts[3];
            }
        } else if (match(m1, /\([0-9][0-9][0-9]\) [0-9][0-9][0-9][ -][0-9][0-9][0-9][0-9]/)) {
            if (x = split(substr(m1, RSTART, RLENGTH), parts, /[ -]/)) {
            num = parts[1] " " parts[2] "-" parts[3];
            }
        }
        if (i == (n-1)) {
            count++;
            printf("%d: %s\n", count, num);
        }
        }
    }
    }
}
regexmatch.ocaml
(*
 * $Id: regexmatch.ocaml,v 1.4 2001/01/08 13:19:11 doug Exp $
 * http://www.bagley.org/~doug/shootout/
 * from: Markus Mottl
 *)

open Pcre

let rex =
  regexp ~flags:[`EXTENDED]
    "(?: ^ | [^\d\(])     # must be preceeded by non-digit
     (\(\d\d\d\)|\d\d\d)  # match 1: area code
     [ ]                  # area code followed by one space
     \d\d\d               # prefix of 3 digits
     [ -]                 # separator is either space or dash
     \d\d\d\d             # last 4 digits
     (?: \D|$)            # must be followed by a non-digit (or EOL)"

let phones =
  let lines = ref [] in
  foreach_line (fun line -> lines := line :: !lines);
  List.rev !lines

let check_phone irflags ar cnt must_print line =
  try
    unsafe_pcre_exec irflags rex 0 line 4 ar;
    let num = String.copy "(...) ...-...." in
    let pos = Array.unsafe_get ar 2 in
    let ofs = if String.unsafe_get line pos = '(' then 1 else 0 in
    let pos = pos + ofs in
    String.unsafe_blit line pos num 1 3;
    let pos = pos + ofs + 4 in
    String.unsafe_blit line pos num 6 3;
    String.unsafe_blit line (pos + 4) num 10 4;
    if must_print then Printf.printf "%d: %s\n" !cnt num;
    incr cnt
  with Not_found -> ()

let _ =
  let n =
    try int_of_string Sys.argv.(1)
    with Invalid_argument _ -> 1 in
  for i = 2 to n do
    List.iter (check_phone (rflags []) (Array.create 6 0) (ref 1) false) phones
  done;
  List.iter (check_phone (rflags []) (Array.create 6 0) (ref 1) true) phones
regexmatch.ocamlb
(*
 * $Id: regexmatch.ocaml,v 1.4 2001/01/08 13:19:11 doug Exp $
 * http://www.bagley.org/~doug/shootout/
 * from: Markus Mottl
 *)

open Pcre

let rex =
  regexp ~flags:[`EXTENDED]
    "(?: ^ | [^\d\(])     # must be preceeded by non-digit
     (\(\d\d\d\)|\d\d\d)  # match 1: area code
     [ ]                  # area code followed by one space
     \d\d\d               # prefix of 3 digits
     [ -]                 # separator is either space or dash
     \d\d\d\d             # last 4 digits
     (?: \D|$)            # must be followed by a non-digit (or EOL)"

let phones =
  let lines = ref [] in
  foreach_line (fun line -> lines := line :: !lines);
  List.rev !lines

let check_phone irflags ar cnt must_print line =
  try
    unsafe_pcre_exec irflags rex 0 line 4 ar;
    let num = String.copy "(...) ...-...." in
    let pos = Array.unsafe_get ar 2 in
    let ofs = if String.unsafe_get line pos = '(' then 1 else 0 in
    let pos = pos + ofs in
    String.unsafe_blit line pos num 1 3;
    let pos = pos + ofs + 4 in
    String.unsafe_blit line pos num 6 3;
    String.unsafe_blit line (pos + 4) num 10 4;
    if must_print then Printf.printf "%d: %s\n" !cnt num;
    incr cnt
  with Not_found -> ()

let _ =
  let n =
    try int_of_string Sys.argv.(1)
    with Invalid_argument _ -> 1 in
  for i = 2 to n do
    List.iter (check_phone (rflags []) (Array.create 6 0) (ref 1) false) phones
  done;
  List.iter (check_phone (rflags []) (Array.create 6 0) (ref 1) true) phones
regexmatch.perl
#!/usr/local/bin/perl 
# $Id: regexmatch.perl,v 1.5 2000/10/07 08:41:43 doug Exp $
# http://www.bagley.org/~doug/shootout/

use strict;

my $re = qr{
    (?: ^ | [^\d\(])        # must be preceeded by non-digit
    ( \( )?            # match 1: possible initial left paren
    (\d\d\d)            # match 2: area code is 3 digits
    (?(1) \) )            # if match1 then match right paren
    [ ]                # area code followed by one space
    (\d\d\d)            # match 3: prefix of 3 digits
    [ -]            # separator is either space or dash
    (\d\d\d\d)            # match 4: last 4 digits
    \D                # must be followed by a non-digit
}x;

my $NUM = $ARGV[0];
$NUM = 1 if ($NUM < 1);

my @phones = <STDIN>;
my $count = 0;
my $num;
while ($NUM--) {
    foreach (@phones) {
    if (/$re/o) {
        $num = "($2) $3-$4";
        if (0 == $NUM) {
        $count++;
        print "$count: $num\n";
        }
    }
    }
}
regexmatch.pike
#!/usr/local/bin/pike// -*- mode: pike -*-
// $Id: regexmatch.pike,v 1.2 2000/12/05 16:04:06 doug Exp $
// http://www.bagley.org/~doug/shootout/
// from: Fredrik Noring

constant area = "([0-9][0-9][0-9]|\\([0-9][0-9][0-9]\\))";
constant exch = "([0-9][0-9][0-9])";
constant last = "([0-9][0-9][0-9][0-9])";

void main(int argc, array(string) argv)
{
    Regexp r = Regexp("^[^0-9\\(]*"+area+" "+exch+"[ -]"+last+"[^0-9]*$");
    array(string) phones = Stdio.stdin->read()/"\n";
    int n = (int)argv[-1];
    int count = 0;
      
    while(n--)
    foreach(phones, string phone)
        if(array(string) parts = r->split(phone))
        if(n == 0)
            if(parts[0][0] == '(')
            write("%d: %s %s-%s\n", ++count, @parts);
            else
            write("%d: (%s) %s-%s\n", ++count, @parts);
}
regexmatch.poplisp
;;; -*- mode: lisp -*-
;;; $Id: regexmatch.cmucl,v 1.1 2001/06/13 19:45:20 doug Exp $
;;; http://www.bagley.org/~doug/shootout/
;;; from Jochen Schmidt

(proclaim '(optimize (speed 3)(safety 0)(space 0)(debug 0)(compilation-speed 0)))
(setf ext:*bytes-consed-between-gcs* 5000000)
(use-package :meta)
(eval-when (compile load eval)
(meta:enable-meta-syntax)
(deftype digit () '(member #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9))
(deftype non-digit () '(not (member #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\( #\) ))))

(defun parse-tel (input)
  (meta:with-string-meta (buffer input)
    (let (last-result)
      (declare (type (or null simple-base-string) last-result))
      (labels ((skip-non-digits (&aux d)
                     (meta:match $[@(non-digit d)]))
               (digit-triplet (&aux (old-index index) d (result (make-array 3 :element-type 'base-char)))
                     (declare (type simple-base-string result))
                     (or (meta:match [@(digit d) !(setf (schar result 0) d)
                                      @(digit d) !(setf (schar result 1) d)
                                      @(digit d) !(setf (schar result 2) d)
                                      !(setf last-result result)])
                         (progn (setf index old-index) nil)))
               (digit-4tupel (&aux (old-index index) d (result (make-array 4 :element-type 'base-char)))
                     (declare (type simple-base-string result))
                     (or (meta:match [@(digit d) !(setf (schar result 0) d)
                                      @(digit d) !(setf (schar result 1) d)
                                      @(digit d) !(setf (schar result 2) d)
                                      @(digit d) !(setf (schar result 3) d)
                                      !(setf last-result result)])
                         (progn (setf index old-index) nil)))
              (telephone-nr (&aux area-code exchange d)
                    (declare (type (or null simple-base-string) area-code exchange))
                    (and (meta:match [!(skip-non-digits)
                                        {[#\( !(digit-triplet) #\)] !(digit-triplet)} !(setf area-code last-result)
                                        #\space !(digit-triplet) !(setf exchange last-result)
                                        {#\space #\-} !(digit-4tupel) {@(non-digit d) !(= index end)}])
                                      (values area-code exchange last-result))))
            (telephone-nr)))))

  (let ((n (parse-integer (or (car pop11::poparglist) "1")))
        (input (loop for line = (read-line *standard-input* nil 'eof)
                     until (eq line 'eof) collect line)))
    (loop for i of-type fixnum from 1 below n do
      (loop for line of-type simple-base-string in input
        do (parse-tel line)))
    (loop with i of-type fixnum = 0
          for line of-type string in input
          do (multiple-value-bind (area-code exchange rest) (parse-tel line)
               (when area-code
                 (format t "~A: (~A) ~A-~A~%" (incf i) area-code exchange rest)))))
regexmatch.python
#!/usr/pkg/bin/python
# $Id: regexmatch.python,v 1.9 2001/02/05 19:02:37 doug Exp $
# http://www.bagley.org/~doug/shootout/

import sys, re

def main():
    NUM = int(sys.argv[1])
    if NUM < 1:
        NUM = 1

    phones = sys.stdin.readlines()

    rx = re.compile(
        r'(?:^|[^\d\(])'
        r'(?:\((\d\d\d)\)|(\d\d\d))'
        r'[ ]'
        r'(\d\d\d)'
        r'[ -]'
        r'(\d\d\d\d)'
        r'\D'
        )

    findIt = rx.search
    count = 0
    for i in xrange(0,NUM):
        for line in phones:
            m = findIt(line)
            if m:
                g = m.group
                num = "(" + (g(1) or g(2)) + ") " + g(3) + "-" + g(4)
                if 0 == i:
                    count = count + 1
                    print "%d: %s" % (count, num)

main()
regexmatch.ruby
#!/usr/local/bin/ruby
# -*- mode: ruby -*-
# $Id: regexmatch.ruby,v 1.11 2001/07/02 04:26:31 doug Exp $
# http://www.bagley.org/~doug/shootout/

re = Regexp.new(
    '(?:^|[^\d\(])' +            # must be preceeded by non-digit
    '(?:\((\d\d\d)\)|(\d\d\d))' +    # match 1 or 2: area code is 3 digits
    '[ ]' +                # area code followed by one space
    '(\d\d\d)' +            # match 3: prefix of 3 digits
    '[ -]' +                # separator is either space or dash
    '(\d\d\d\d)' +            # match 4: last 4 digits
    '\D'                # must be followed by a non-digit
)

NUM = Integer(ARGV[0] || 1)

phones = STDIN.readlines

count = m = line = iter = 0
for iter in 1..NUM
    for line in phones
    if m = re.match(line)
        m1 = m[1];
        if m1 == "" 
            m1 = m[2];
        end
        num = '(' + m1 + ') ' + m[3] + '-' + m[4];
        if iter == NUM
        count += 1
        puts "#{count}: #{num}"
        end
    end
    end
end
regexmatch.smlnj
(* -*- mode: sml -*-
 * $Id: regexmatch.smlnj,v 1.1 2001/07/10 13:04:21 doug Exp $
 * http://www.bagley.org/~doug/shootout/
 * from Stephen Weeks
 * "ported" to SML/NJ
 * with help from Daniel Wang
 *)

structure Test : sig
    val main : (string * string list) -> OS.Process.status
end = struct


fun ++ (r: int ref): int =
   let
      val n = 1 + !r
      val _ = r := n
   in n
   end

structure Int =
   struct
      open Int

      type t = int
     
      fun exists (start: t, stop: t, f: t -> bool): bool =
     let
        fun loop i = i < stop andalso (f i orelse loop (i + 1))
     in
        loop start
     end

      fun forall (start, stop, f) = not (exists (start, stop, not o f))

      fun fold (start: t, stop: t, a: 'a, f: t * 'a -> 'a): 'a =
     let
        fun loop (i: t, a: 'a) =
           if i >= stop
          then a
           else loop (i + 1, f (i, a))
     in loop (start, a)
     end
           
      fun for (start: t, stop: t, f: t -> unit): unit =
     let
        fun loop i =
           if i >= stop
          then ()
           else (f i; loop (i + 1))
     in
        loop start
     end
   end

structure Array2 =
   struct
      open Array2

      type 'a t = 'a array

      val new = array

      fun tabulate (r, c, f) = Array2.tabulate RowMajor (r, c, f)

      fun foreachi (a, f) =
     appi RowMajor f {base = a, row = 0, col = 0,
              nrows = NONE, ncols = NONE}
   end

structure Vector =
   struct
      open Vector

      fun exists (v, f) =
     Int.exists (0, length v, fn i => f (sub (v, i)))
      fun foreach (v, f) = app f v
      fun foreachi (v, f) = appi f (v, 0, NONE)
      fun new (n, x) = tabulate (n, fn _ => x)
      fun new1 x = new (1, x)
   end

structure List =
   struct
      open List

      fun foreach (l, f) = app f l

      fun fold (l, b, f) = foldl f b l

      fun appendRev (l1, l2) = fold (l1, l2, op ::)

      fun push (r, x) = r := x :: !r

      fun keepAll (l, f) = filter f l

      fun peek (l, f) = find f l

      fun insert (l, x, op <=) =
     let
        fun loop (l, ac) =
           case l of
          [] => appendRev (ac, [x])
        | x' :: l' =>
             if x <= x'
            then appendRev (ac, x :: l)
             else loop (l', x' :: ac)
     in loop (l, [])
     end
   end

structure Array =
   struct
      open Array

      val new = array

      fun modify (a, f) = Array.modify f a

      fun foreachi (a, f) = appi f (a, 0, NONE)
     
      fun indices (a: bool array): int vector =
     let
        val n = Array.length a
        val numTrue =
           let
          fun loop (i, count) =
             if i = n
            then count
             else loop (i + 1,
                if Array.sub (a, i)
                   then count + 1
                else count)
           in loop (0, 0)
           end
        val next = ref 0
        fun loop i =
           if Array.sub (a, i)
          then (next := i + 1; i)
           else loop (i + 1)
     in Vector.tabulate (numTrue, fn _ => loop (!next))
     end
   end

structure Char =
   struct
      open Char
     
      val fromInt = chr
      val toInt = ord
   end

structure String =
   struct
      open String

      type t = string
     
      fun contains (s: t, c: char): bool =
     Int.exists (0, size s, fn i => c = sub (s, i))
   end

val numChars: int = 128

structure Regexp =
   struct
      datatype t =
     AnchorStart
       | CharSet of char -> bool
       | Or of t list
       | Seq of t list
       | Star of t
   end

structure Stack:
   sig
      type 'a t

      val clear: 'a t -> unit
      val exists: 'a t * ('a -> bool) -> bool
      val foreach: 'a t * ('a -> unit) -> unit
      val new: int * 'a -> 'a t
      val push: 'a t * 'a -> unit
   end =
   struct
      datatype 'a t = T of {elts: 'a array,
                size: int ref}

      fun new (size: int, dummy: 'a): 'a t =
     T {elts = Array.new (size, dummy),
        size = ref 0}

      fun push (T {elts, size}, x) =
     let
        val n = !size
        val _ = Array.update (elts, n, x)
        val _ = size := n + 1
     in ()
     end

      fun exists (T {elts, size, ...}, f) =
     Int.exists (0, !size, fn i => f (Array.sub (elts, i)))
     
      fun foreach (T {elts, size}, f) =
     Int.for (0, !size, fn i => f (Array.sub (elts, i)))

      fun clear (T {size, ...}) = size := 0
   end


structure NFA:
   sig
      (* The states in an NFA are indexed from 0 to n-1, where n is the number
       * of states.
       *)
      type state = int

      (* State i is final iff Array.sub (final, i).
       * The outgoing states from state i on input char c are given by
       * Array2.sub (next, i, Char.ord c).
       * anchorStarts is sorted in increasing order of state index.
       *)
      datatype t = T of {anchorStarts: state list,
             final: bool array,
             seen: bool array, 
             stack1: int Stack.t, 
             stack2: int Stack.t, 
             start: state,
             next: state vector Array2.t}
     
      val fromRegexp: Regexp.t -> t
      val match: {nfa: t,
          string: string,
          startPos: int,
          anchorStart: bool} -> int option
      val numStates: t -> int
   end =
   struct
      type state = int
      datatype t = T of {anchorStarts: state list,
             final: bool array,
             seen: bool array,
             stack1: int Stack.t,
             stack2: int Stack.t,
             start: state,
             next: state vector Array2.t}

      fun numStates (T {next, ...}) = Array2.nRows next


      (* Simulating an NFA with two stacks and a bit vector, as in Algorithm 3.4
       * (page 126) of Compilers: Principles, Techniques, and Tools by Aho,
       * Sethi, and Ullman.
       *)
      fun match {anchorStart: bool,
         nfa as T {anchorStarts, final, seen, stack1, stack2, start,
               next},
         startPos,
         string = s}: int option =
     let
        val numStates = numStates nfa
        val n = String.size s
        val _ = Array.modify (seen, fn _ => false)
        fun loop (current: state Stack.t,
              nextStates: state Stack.t,
              i: int,
              last: int option): int option =
           let
          val last = 
             if Stack.exists (current, fn s => Array.sub (final, s))
            then SOME i
             else last
           in
          if numStates = 0 orelse i = n
             then (Stack.clear stack1
               ; Stack.clear stack2
               ; last)
          else
             let
            val _ = Array.modify (seen, fn _ => false)
            val c = Char.toInt (String.sub (s, i))
            val _ =
               Stack.foreach (current, fn s =>
                      Vector.foreach
                      (Array2.sub (next, s, c),
                       fn s' =>
                       if Array.sub (seen, s')
                          then ()
                       else (Array.update (seen, s', true)
                         ; Stack.push (nextStates, s'))))
            val _ = Stack.clear current
             in loop (nextStates, current, i + 1, last)
             end
           end
        val _ = Stack.push (stack1, start)
        val _ =
           if anchorStart
          then List.foreach (anchorStarts, fn s =>
                     Stack.push (stack1, s))
           else ()
     in
        loop (stack1, stack2, startPos, NONE)
     end

      (* This conversion from a regular expression to an NFA is based on
       * Section 3.9 (pages 134 -- 140) of Compilers: Principles, Techniques,
       * and Tools by Aho, Sethi, and Ullman.
       *
       * It creates one NFA state for each CharSet (called a "position") that is
       * in the regexp.  There is also one extra state for the start state.
       * It adds edges as in rules 1 and 2 (page 138) for the followpos function.
       *)
      fun fromRegexp (r: Regexp.t): t =
     let
        fun loop (r, ac) =
           let open Regexp
           in case r of
          AnchorStart => ac + 1
        | CharSet _ => ac + 1
        | Or rs => List.fold (rs, ac, loop)
        | Seq rs => List.fold (rs, ac, loop)
        | Star r => loop (r, ac)
           end
        val numPos = loop (r, 0)
        val numStates = numPos + 1
        val start = numPos
        val posCounter = ref ~1
        val follow = Array2.new (numStates, numStates, false)
        val posChars = Array2.tabulate (numPos, numChars, fn _ => false)
        local
           datatype t = T of bool vector 
        in
           fun contains (T v, s) = Vector.sub (v, s)
           val empty: t = T (Vector.new (numPos, false))
           fun union (T v, T v'): t =
          T (Vector.tabulate (numPos, fn i =>
                      Vector.sub (v, i)
                      orelse Vector.sub (v', i)))
           fun singleton (i: int): t =
          T (Vector.tabulate (numPos, fn j => i = j))
           fun foreach (T v, f) =
          Vector.foreachi (v, fn (i, b) => if b then f i else ())
        end
        val anchorStarts = ref []
        
        fun loop r =
           case r of
          Regexp.AnchorStart =>
             let
            val i = ++ posCounter
            val _ = List.push (anchorStarts, i)
            val first = singleton i
             in
            {first = first,
             last = first,
             nullable = false}
             end
        | Regexp.CharSet f =>
             let
            val i = ++ posCounter
            val _ =
               Int.for (0, numChars, fn c =>
                    if f (Char.chr c)
                       then Array2.update (posChars, i, c, true)
                    else ())
            val first = singleton i
             in {first = first,
             last = first,
             nullable = false}
             end
        | Regexp.Or rs =>
             List.fold
             (rs, {first = empty,
               last = empty,
               nullable = false},
              fn (r, {first = f, last = l, nullable = n}) =>
              let
             val {first = f', last = l', nullable = n'} =
                loop r
              in
             {first = union (f, f'),
              last = union (l, l'),
              nullable = n orelse n'}
              end)
        | Regexp.Seq rs =>
             List.fold
             (rs, {first = empty,
               last = empty,
               nullable = true},
              fn (r, {first = f, last = l, nullable = n}) =>
              let
             val {first = f', last = l', nullable = n'} =
                loop r
             val _ =
                foreach
                (l, fn s =>
                 foreach
                 (f', fn s' => Array2.update (follow, s, s', true)))
              in
             {first = if n then union (f, f') else f,
              last = if n' then union (l, l') else l',
              nullable = n andalso n'}
              end)
        | Regexp.Star r =>
             let
            val {first = f, last = l, nullable = n} = loop r
            val _ =
               foreach
               (l, fn s =>
                foreach
                (f, fn s' => Array2.update (follow, s, s', true)))
             in
            {first = f, last = l, nullable = true}
             end
        val {first, last, nullable} = loop r
        (* Any anchor starts in first should be anchor starts.
         * This also reverses anchorStarts so they are in order.
         *)
        val anchorStarts =
           List.fold (!anchorStarts, [], fn (s, ac) =>
              if contains (first, s) then s :: ac else ac)
        
        val _ = foreach (first, fn i =>
                 Array2.update (follow, start, i, true))
        
        val final = Array.array (numStates, false)
        val _ = foreach (last, fn i => Array.update (final, i, true))
        
        val _ = if nullable then Array.update (final, start, true) else ()
        
        val a = Array.new (numStates, false)
        val next =
           Array2.tabulate
           (numStates, numChars, fn (i, c) =>
        let
           val _ =
              Int.for (0, numStates, fn j => Array.update (a, j, false))
           val _ =
              Int.for
              (0, numPos, fn j =>
               if Array2.sub (follow, i, j)
              andalso Array2.sub (posChars, j, c)
              then Array.update (a, j, true)
               else ())
        in Array.indices a
        end)
     in
        T {anchorStarts = anchorStarts,
           final = final,
           next = next,
           seen = Array.new (numStates, false),
           stack1 = Stack.new (numStates, ~1),
           stack2 = Stack.new (numStates, ~1),
           start = start}
     end
      
   end

structure DFA:
   sig
      type t

      val fromNFA: NFA.t -> t
      val match: {dfa: t,
          string: string,
          startPos: int,
          anchorStart: bool} -> int option
      val minimize: t -> t
   end =
   struct
      (* The states in a DFA are indexed from 0 to n-1, where n is the number
       * of states.
       *)
      type state = int
     
      (* State i is final iff Array.sub (final, i).
       * The outgoing state from state i on input char c is
       * Array2.sub (next, i, Char.ord c).
       *)
      datatype t = T of {anchorStart: state,
             dead: bool array,
             final: bool array,
             next: state Array2.t,
             start: state}

      fun numStates (T {next, ...}): int = Array2.nRows next


      fun match {dfa as T {anchorStart = ancSt, dead, final, start, next},
         string as s,
         startPos: int,
         anchorStart: bool}: int option =
     let
        val n = String.size s
        fun loop (i: int, state: int, last: int option): int option =
           let
          val last =
             if Array.sub (final, state)
            then SOME i
             else last
           in
          if Array.sub (dead, state) orelse i = n
             then last
          else loop (i + 1,
                 Array2.sub (next, state,
                     Char.toInt (String.sub (s, i))),
                 last)
           end
     in loop (startPos,
          if anchorStart then ancSt else start,
             NONE)
     end

      fun dead (numStates, final, next) =
     Array.tabulate
     (numStates, fn i =>
      not (Array.sub (final, i))
      andalso Int.forall (0, numChars, fn c =>
                  i = Array2.sub (next, i, c)))
     
      (* This DFA minimization algorithm is based on algorithm 3.6 (page 142)
       * of Compilers: Principles, Techniques, and Tools by Aho, Sethi, and
       * Ullman. 
       *
       * It maintains an array, r, that stores for each state s the
       * representative of the class to which s belongs.
       * It repeatedly refines an equivalence relation, represented by a list
       * of classes, where each class is a list of states (i.e. ints).
       *)
      fun minimize (dfa as T {anchorStart, final, start, next, ...}): t =
     let
        val numStates = numStates dfa
        type class = int list
        type classes = class list
        val repCounter = ref ~1
        val change = ref false
        fun newRep () = (change := true; ++ repCounter)
        val finRep = newRep ()
        val nonfinRep = newRep ()
        val r = Array.tabulate (numStates, fn i =>
                    if Array.sub (final, i)
                       then finRep
                    else nonfinRep)
        fun rep s = Array.sub (r, s)
        fun trans (s, c) = rep (Array2.sub (next, s, c))
        fun refine (class: class, ac: classes): classes =
           let
          val r =
             List.fold
             (class, [], fn (state, classes) =>
              let
             fun loop (classes, ac) =
                case classes of
                   [] =>
                  (case ac of
                      [] => [{class = [state],
                          old = state}]
                    | _ => 
                     let
                        val s = newRep ()
                        val _ = Array.update (r, state, s)
                     in {class = [state],
                         old = state} :: ac
                     end)
                 | (z as {class, old}) :: classes =>
                  if Int.forall
                     (0, numChars, fn c =>
                      trans (old, c) = trans (state, c))
                     then
                    (Array.update (r, state, rep old)
                     ; {class = state :: class,
                        old = old} :: (List.appendRev
                               (classes, ac)))
                  else loop (classes, z :: ac)
              in loop (classes, [])
              end)
           in List.fold (r, ac, fn ({class, ...}, ac) =>
                 case class of
                [_] => ac
                  | _ => class :: ac)
           end
        fun refineAll (classes: classes): unit =
           case classes of
          [] => ()
        | _ =>
             let
            val _ = change := false
            val classes =
               List.fold (classes, [], fn (class, ac) =>
                      case class of
                     [_] => ac
                       | _ => refine (class, ac))
             in if !change
               then refineAll classes
            else ()
             end
        val (fin, nonfin) =
           Int.fold (0, numStates, ([], []), fn (i, (f, n)) =>
             if Array.sub (final, i)
                then (i :: f, n)
             else (f, i :: n))
        val _ = refineAll [fin, nonfin]
        val numStates' = 1 + !repCounter
        
        val reached = Array.new (numStates', false)
        fun visit (s: int ): unit =
           let
          val s' = rep s
           in
          if Array.sub (reached, s')
             then ()
          else (Array.update (reached, s', true)
            ; Int.for (0, numChars, fn c =>
                   visit (Array2.sub (next, s, c))))
           end
        val _ = visit start
        val _ = visit anchorStart
        
        val c = ref ~1
        val newR = Array.tabulate (numStates', fn s =>
                       if Array.sub (reached, s)
                      then ++ c
                       else ~1)
        val numStates' = 1 + !c
        val _ = Array.modify (r, fn s => Array.sub (newR, s))
        val next' = Array2.new (numStates', numChars, ~1)
        val _ =
           Array2.foreachi
           (next, fn (s, c, s') =>
        Array2.update (next', rep s, c, rep s'))
        val final' = Array.array (numStates', false)
        val _ =
           Array.foreachi
           (final, fn (i, b) =>
        if b then Array.update (final', rep i, true) else ())
     in T {anchorStart = rep anchorStart,
           dead = dead (numStates', final', next'),
           final = final',
           start = rep start,
           next = next'}
     end

      (* This is the usual "subset construction", as in algorithm 3.2 (page 118)
       * of Compilers: Principles, Techniques, and Tools by Aho, Sethi, and
       * Ullman.
       *
       * It associates each (reachable) set of states in the nfa with a single
       * state in the DFA.
       *)
      fun fromNFA (nfa as NFA.T {anchorStarts, final, start, next, ...}) =
     let
        
        type states = state vector
        val counter = ref ~1
        type work = {states: states,
             state: int,
             out: int vector option ref}
        val cache: work list ref = ref []
        val todo: work list ref = ref []
        fun statesToState (ss: states): int =
           case List.peek (!cache, fn {states, ...} => ss = states) of
          NONE =>
             let
            val state = ++ counter
            val work = {out = ref NONE,
                    state = state,
                    states = ss}
            val _ = List.push (cache, work)
            val _ = List.push (todo, work)
             in
            state
             end
        | SOME {state, ...} => state
        local
           val seen = Array.array (NFA.numStates nfa, false)
        in
           fun loop () =
          case !todo of
             [] => ()
           | {states, out, ...} :: rest =>
            (todo := rest
             ; out := (SOME
                   (Vector.tabulate
                    (numChars, fn c =>
                     let
                    val _ =
                       Array.modify (seen, fn _ => false)
                    val _ = 
                       Vector.foreach
                       (states, fn s =>
                        Vector.foreach
                        (Array2.sub (next, s, c), fn s' =>
                         Array.update (seen, s', true)))
                     in statesToState (Array.indices seen)
                     end)))
             ; loop ())
        end
        
        val start' = statesToState (Vector.new1 start)
        val anchorStart' =
           statesToState
           (Vector.fromList (List.insert (anchorStarts, start, op <=)))
        val _ = loop ()
        val numStates = 1 + !counter
        val next' = Array2.new (numStates, numChars, ~1)
        val final' = Array.new (numStates, false)
        val _ =
           List.foreach
           (!cache, fn {states, state = i, out, ...}: work =>
        let
           val _ =
              Vector.foreachi
              (valOf (! out), fn (c, j) =>
               Array2.update (next', i, c, j))
           val _ =
              if Vector.exists (states, fn s => Array.sub (final, s))
             then Array.update (final', i, true)
              else ()
        in ()
        end)
        val dead' = dead (numStates, final', next')
     in T {anchorStart = anchorStart',
           dead = dead',
           final = final',
           start = start',
           next = next'}
     end

   end

structure Regexp:
   sig
      structure Compiled:
     sig
        type t

        
        val find: t * string -> {start: int, length: int} option
     end

      type t

      val anchorStart: t
      val any: t
      val char: char -> t
      val compileDFA: t -> Compiled.t
      val compileNFA: t -> Compiled.t
      val digit: t
      val nonDigit: t
      val notOneOf: string -> t
      val oneOf: string -> t
      val or: t list -> t
      val seq: t list -> t
      val star: t -> t
   end =
   struct
      open Regexp

      val anchorStart = AnchorStart
      val isChar = CharSet
      fun isNotChar f = isChar (not o f)
      fun char c = isChar (fn c' => c = c')
      val or = Or
      val seq = Seq
      val star = Star

      val any = isChar (fn _ => true)

      fun oneOf s = isChar (fn c => String.contains (s, c))
      fun notOneOf s = isNotChar (fn c => String.contains (s, c))
      val digs = "0123456789"
      val digit = oneOf digs
      val nonDigit = notOneOf digs

      val empty = Or [] 
      val emptyString = Seq [] 

     
      structure Compiled =
     struct
        datatype t =
           DFA of DFA.t
         | NFA of NFA.t

        fun find (c: t, s: string) =
           let
          val n = String.size s
          fun loop (i: int, anchorStart: bool) =
             if i = n
            then NONE
             else
            let
               val res =
                  case c of
                 DFA dfa =>
                    DFA.match {dfa = dfa,
                           string = s,
                           startPos = i,
                           anchorStart = anchorStart}
                   | NFA nfa =>
                    NFA.match {nfa = nfa,
                           string = s,
                           startPos = i,
                           anchorStart = anchorStart}
            in
               case res of
                  NONE => loop (i + 1, false)
                | SOME finish => SOME {start = i,
                           length = finish - i}
            end
           in loop (0, true)
           end
     end

      fun compileDFA r =
     Compiled.DFA (DFA.minimize (DFA.fromNFA (NFA.fromRegexp r)))
      fun compileNFA r =
     Compiled.NFA (NFA.fromRegexp r)
   end

local
   open Regexp
in
   val d = digit
   val eol = char #"#"
   val space = oneOf " \t"
   val r =
      seq [or [anchorStart, notOneOf "0123456789("],
       or [seq [char #"(", d, d, d, char #")"],
           seq [d, d, d]],
       char #" ",
       d, d, d,
       oneOf " -",
       d, d, d, d,
       or [eol, nonDigit]]

   val comp = Regexp.compileDFA r
end

fun incr (r: int ref) = r := !r + 1

val ins = TextIO.stdIn



fun printl [] = print "\n" | printl(h::t) = ( print h ; printl t )


   
local
   val form = "(...) ...-...."
   val a = CharArray.tabulate (String.size form, fn i =>
                   String.sub (form, i))
in
   fun checkPhone (mustPrint: bool, cnt: int ref, line: string) =
      case Regexp.Compiled.find (comp, line) of
     NONE => ()
       | SOME {start = pos, ...} =>
        let
           fun blit (src, dst, length) =
          let
             val stop = src + length
             fun loop (src, dst) =
            if src = stop
               then ()
            else (CharArray.update (a, dst,
                        String.sub (line, src))
                  ; loop (src + 1, dst + 1))
          in
             loop (src, dst)
          end
           val (o1, o2, o3) =
          if #"(" = String.sub (line, pos)
             then (1, 6, 10)
          else if #"(" = String.sub (line, pos + 1)
              then (2, 7, 11)
               else if Char.isDigit (String.sub (line, pos))
                   then (0, 4, 8)
                else (1, 5, 9)
           val _ = blit (pos + o1, 1, 3)
           val _ = blit (pos + o2, 6, 3)
           val _ = blit (pos + o3, 10, 4)
           val _ =
          if mustPrint
             then printl [Int.toString (!cnt), ": ",
                  CharArray.extract (a, 0, NONE)]
          else ()
           val _ = incr cnt
        in
           ()
        end
end

fun doit (phones,mustPrint: bool): unit =
   let val cnt = ref 1
   in List.foreach (phones, fn line => checkPhone (mustPrint, cnt, line))
   end

fun atoi s = case Int.fromString s of SOME num => num | NONE => 0
   
fun main (name, args) =
   let
      val n = atoi (hd (args @ ["1"]))
      val phones = 
    let
      fun loop lines =
        case TextIO.inputLine ins of
          "" => rev lines
        | line => loop (line :: lines)
    in loop []
    end
      val _ = Int.for (1, n, fn _ => doit (phones,false))
      val _ = doit (phones,true)
   in OS.Process.success
   end
end
val _ = SMLofNJ.exportFn("regexmatch", Test.main);

regexmatch.tcl
#!/usr/local/bin/tclsh
# $Id: regexmatch.tcl,v 1.9 2001/03/15 17:01:52 doug Exp $
# http://www.bagley.org/~doug/shootout/
# from: Miguel Sofer, with modifications by Kristoffer Lawson

proc main {} {
    global argv
    set NUM [lindex $argv 0]
    if {$NUM < 1} {
        set NUM 1
    }

    set phones [split [read stdin] "\n"]
    set count 0    
    set rExp {(?:^|[^\d(])(\(\d{3}\)|\d{3}) (\d{3})[ -](\d{4})($|[^\d])}

    while {$NUM > 0} {
    incr NUM -1
        foreach phone $phones {
            if {[regexp $rExp $phone match area exch num]} {
                if {! $NUM} {
                    incr count 1
            puts "$count: ([string trim $area () ]) $exch-$num"
                }
            }
        }
    }
}

main
regexmatch.vbscript
Set regEx = new RegExp
regEx.Pattern = "^[^\d\(]*(\(\d\d\d\)|\d\d\d) (\d\d\d)[- ](\d\d\d\d)(\D|$)"
regEx.Global = True

phonesBlob = WScript.Stdin.ReadAll()
phones = Split(phonesBlob, Chr(10))

N = WScript.Arguments(0)
If N < 1 Then N = 1

While N > 0
    For Each line in phones
        Set Matches = regEx.Execute(line)
        If Matches.Count > 0 Then
            ' WSCript.Echo "[" & Matches.Count & "]" & line
            tel1 = Matches(0).Submatches(0)
            If Left(tel1, 1) = "(" Then
                tel1 = Mid(tel1, 2, Len(tel1)-2)
            End If
            tel2 = Matches(0).Submatches(1)
            tel3 = Matches(0).Submatches(2)
            num = "(" & tel1 & ") " & tel2 & "-" & tel3
            If N = 1 Then
                Count = Count + 1
                WScript.Echo Count & ": " & num
            End If
        Else
            ' WScript.Echo "nomatch: " & line
        End If
    Next
    N = N - 1
Wend
regexmatch.vc
/* -*- mode: c -*-
 * $Id: regexmatch.gcc,v 1.4 2000/12/24 05:43:53 doug Exp $
 * http://www.bagley.org/~doug/shootout/
 */

#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdio.h>
#include <pcre.h>
#include <string.h>

#define MAXLINES   100
#define MAXLINELEN 132

char *pattern = 
"(?:^|[^\\d\\(])"        
"(\\()?"            
"(\\d\\d\\d)"            
"(?(1)\\))"            
"[ ]"                
"(\\d\\d\\d)"            
"[ -]"                
"(\\d\\d\\d\\d)"        
"\\D"                
;


int
main(int argc, char *argv[]) {
    int NUM = ((argc == 2) ? atoi(argv[1]) : 1);
    int count;
    char *cptr = "";
    char **phones;
    pcre *re;
    int erroffset;
    const char *errptr;
    int n, lines = 0;
    char num[256];
    int i, j, k, matchlen;
    char *matchoffset;
    int nmatches;
    int *ovec, ovecsize;
    pcre_extra *study;

    phones = (char **)malloc(MAXLINES * sizeof(char *));
    if (!phones) {
    fprintf(stderr, "malloc for phones array failed\n");
    exit(1);
    }
    lines = 0;
    while (cptr) {
    phones[lines] = (char *)malloc(MAXLINELEN);
    if (!phones[lines]) {
        fprintf(stderr, "malloc to hold line #%d failed\n", lines);
        exit(1);
    }
    cptr = fgets(phones[lines], MAXLINELEN, stdin);
    lines++;
    if (lines > MAXLINES) {
        fprintf(stderr, "MAXLINES is too small\n");
        exit(1);
    }
    }

    re = pcre_compile(pattern, 0, &errptr, &erroffset, NULL);
    if (!re) {
    fprintf(stderr, "can't open compile regexp\n");
    exit(1);
    }

    study = pcre_study(re, 0, &errptr);

    if (pcre_fullinfo(re, NULL, PCRE_INFO_CAPTURECOUNT, &nmatches) != 0) {
    fprintf(stderr, "pcre_fullinfo failed\n");
    exit(1);
    }
    nmatches++;            

    ovecsize = sizeof(int) * nmatches * 3;
    ovec = (int *)malloc(ovecsize);
    if (!ovec) {
    fprintf(stderr, "malloc for ovec array failed\n");
    exit(1);
    }

    count = 0;
    while (NUM--) {
    for (i=0; i<lines; i++) {
        n = pcre_exec(re, study,
              phones[i], strlen(phones[i]), 0,
              0, ovec, ovecsize);
        if (n == nmatches) {
        
        k = 2*2;    
        
        j = 0;
        num[j++] = '(';
        matchoffset = phones[i] + ovec[k];
        matchlen = ovec[k+1] - ovec[k];
        strncpy(num+j, matchoffset, matchlen);
        j += matchlen; k += 2;
        num[j++] = ')';
        
        num[j++] = ' ';
        
        matchoffset = phones[i] + ovec[k];
        matchlen = ovec[k+1] - ovec[k];
        strncpy(num+j, matchoffset, matchlen);
        j += matchlen; k += 2;
        
        num[j++] = '-';
        
        matchoffset = phones[i] + ovec[k];
        matchlen = ovec[k+1] - ovec[k];
        strncpy(num+j, matchoffset, matchlen);
        j += matchlen; k += 2;
        
        num[j] = 0;
        if (0 == NUM) {
            count++;
            printf("%d: %s\n", count, num);
        }
        }
    }
    }

    for (i=0; i<MAXLINES; i++) {
    free(phones[i]);
    }
    free(phones);
    free(ovec);

    return(0);
}

regexmatch.vc++
// -*- mode: c++ -*-
// $Id: regexmatch.g++,v 1.2 2001/06/20 03:20:03 doug Exp $
// http://www.bagley.org/~doug/shootout/
// From Bill Lear


#include <iostream>
#include <zopyra/regx>

using namespace std;

typedef pair<const char*, const char*> span;

int main(int ac, char* av[]) {
    zopyra::regx re(
    "(?x)              # set extended flag for embedded comment fun\n"
        "(?:^|[^\\d(])    # must be preceded by non-digit\n"
        "([(])?           # match 1: possible initial left paren\n"
        "(\\d{3})         # match 2: area code is 3 digits\n"
        "(?(1)[)])        # if match1 then match right paren\n"
        "[ ]              # area code followed by one space\n"
        "(\\d{3})         # match 3: prefix of 3 digits\n"
        "[- ]             # separator is either space or dash\n"
        "(\\d{4})         # match 4: last 4 digits\n"
        "(?:\\D|\\b)      # followed by non-digit or break\n"
    );
    
    string line;
    vector<span> lines;
    while (getline(cin, line)) {
        char* phone = new char[line.size()];
        copy(line.begin(), line.end(), phone);
        lines.push_back(span(phone, phone + line.size()));
    }

    size_t ITER = (ac == 2 ? (atoi(av[1]) < 1 ? 1 : atoi(av[1])): 1);

    char num[13];
    num[0] = '(';
    num[4] = ')';
    num[5] = ' ';
    num[9] = '-';
    size_t count = 0;
    while (ITER--) {
        vector<span>::iterator end = lines.end();
        for (vector<span>::iterator i = lines.begin(); i != end; ++i) {
            zopyra::regx::iterator p = re.find(i->first, i->second);
            if (p++ != re.end()) {
                char* num_p = &num[1];
                ++p;
                copy(p->first, p->second, num_p);

                num_p = &num[6];
                ++p;
                copy(p->first, p->second, num_p);

                num_p = &num[10];
                ++p;
                copy(p->first, p->second, num_p);

                if (!ITER) {
                    cout << ++count << ": ";
                    copy(num, num + 14, ostream_iterator<char>(cout));
                    cout << '\n';
                }
            }
        }
    }
}