Regular Expression Matching

Regular Expression Matching

Back to the Win32 Shootout
Back to dada's perl lab

[The Original Shootout] [NEWS] [FAQ] [Methodology] [Platform Details] [Acknowledgements] [Scorecard]

All Source For Regular Expression Matching
regexmatch.awka
# $Id: regexmatch.gawk,v 1.2 2001/05/20 06:13:00 doug Exp $ # http://www.bagley.org/~doug/shootout/ BEGIN { n = (ARGV[1] < 1) ? 1 : ARGV[1]; delete ARGV; } { phones[p++] = $0; } END { for (i=0; i<n; i++) { for (j=0; j<p; j++) { line = phones[j]; if (match(line, /(^\|[^0-9])($[0-9][0-9][0-9]$\|[0-9][0-9][0-9]) [0-9][0-9][0-9][ -][0-9][0-9][0-9][0-9]($\|[^0-9])/)) { m1 = substr(line, RSTART, RLENGTH); num = "" if (match(m1, /[0-9][0-9][0-9] [0-9][0-9][0-9][ -][0-9][0-9][0-9][0-9]/)) { if (substr(m1, RSTART-1, 1) == "(") { break; } if (x = split(substr(m1, RSTART, RLENGTH), parts, /[ -]/)) { num = "(" parts[1] ") " parts[2] "-" parts[3]; } } else if (match(m1, /$[0-9][0-9][0-9]$ [0-9][0-9][0-9][ -][0-9][0-9][0-9][0-9]/)) { if (x = split(substr(m1, RSTART, RLENGTH), parts, /[ -]/)) { num = parts[1] " " parts[2] "-" parts[3]; } } if (i == (n-1)) { count++; printf("%d: %s\n", count, num); } } } } }
regexmatch.cygperl
#!/usr/local/bin/perl # $Id: regexmatch.perl,v 1.5 2000/10/07 08:41:43 doug Exp $ # http://www.bagley.org/~doug/shootout/ use strict; my $re = qr{ (?: ^ \| [^\d$]) # must be preceeded by non-digit ( \( )? # match 1: possible initial left paren (\d\d\d) # match 2: area code is 3 digits (?(1) $ ) # if match1 then match right paren [ ] # area code followed by one space (\d\d\d) # match 3: prefix of 3 digits [ -] # separator is either space or dash (\d\d\d\d) # match 4: last 4 digits \D # must be followed by a non-digit }x; my $NUM = $ARGV[0]; $NUM = 1 if ($NUM < 1); my @phones = <STDIN>; my $count = 0; my $num; while ($NUM--) { foreach (@phones) { if (/$re/o) { $num = "($2) $3-$4"; if (0 == $NUM) { $count++; print "$count: $num\n"; } } } }
regexmatch.erlang
%%% -- mode: erlang -- %%% $Id: regexmatch.erlang,v 1.7 2001/01/06 22:35:23 doug Exp $ %%% http://www.bagley.org/~doug/shootout/ -module(regexmatch). -export([main/0, main/1]). %% get the program argument, which is how many test iterations to run main() -> main(['1']). main([Arg]) -> Num = list_to_integer(atom_to_list(Arg)), {ok, Re} = regexp:parse( "(^\|[^0-9\$])" % preceeding non-digit or bol "(" % area code "\\([0-9][0-9][0-9]\$" % is either 3 digits in parens "\|" % or "[0-9][0-9][0-9]" % just 3 digits ")" % end of area code " " % area code is followed by one space "[0-9][0-9][0-9]" % exchange is 3 digits "[ -]" % separator is either space or dash "[0-9][0-9][0-9][0-9]" % last 4 digits "($\|[^0-9])" % must be followed by a non-digit ), Plist = readlines(), test(Num, Re, Plist), halt(0). test(1, Regexp, Plist) -> % display output on last iteration Nums = match_phones(Regexp, Plist), print_phones(1, Nums), true; test(N, Regexp, Plist) -> match_phones(Regexp, Plist), test(N-1, Regexp, Plist). print_phones(Count, [H\|T]) -> [A,E,N] = H, % A,E,N is a list of the matching sub-expressions, which are: % Areacode (3 digits), Exchange (3 digits), Number (4 digits) io:fwrite("~w: (~s) ~s-~s~n", [Count, A,E,N]), print_phones(Count+1, T); print_phones(_, []) -> true. match_phones(Regexp, List) -> mapfilter( fun(String) -> case regexp:matches(String, Regexp) of {match, []} -> false; {match, Matches} -> parse_phone(String, Matches); _ -> false end end, List). parse_phone(Str, [H\|T]) -> {Start, Len} = H, % Numstr is something that looks like a complete phone # Numstr = string:substr(Str, Start, Len), case regexp:matches(Numstr, "[0-9][0-9][0-9][0-9]*") of {match, []} -> false; {match, Matches} -> lists:map(fun({Offset, Length}) -> string:substr(Numstr, Offset, Length) end, Matches); _ -> false end; parse_phone(Str, []) -> []. mapfilter(Fun, [H\|T]) -> case Fun(H) of false -> mapfilter(Fun, T); New -> [New \| mapfilter(Fun, T)] end; mapfilter(_, []) -> []. readlines() -> Port = open_port({fd, 0, 1}, [eof, {line, 512}]), readlines_from_stream([], Port). readlines_from_stream(Lines, Port) -> receive {Port, eof} -> lists:reverse(Lines); {Port, {_, {_, Line}}} -> readlines_from_stream([Line\|Lines], Port) end.
regexmatch.gawk
# $Id: regexmatch.gawk,v 1.2 2001/05/20 06:13:00 doug Exp $ # http://www.bagley.org/~doug/shootout/ BEGIN { n = (ARGV[1] < 1) ? 1 : ARGV[1]; delete ARGV; } { phones[p++] = $0; } END { for (i=0; i<n; i++) { for (j=0; j<p; j++) { line = phones[j]; if (match(line, /(^\|[^0-9])($[0-9][0-9][0-9]$\|[0-9][0-9][0-9]) [0-9][0-9][0-9][ -][0-9][0-9][0-9][0-9]($\|[^0-9])/)) { m1 = substr(line, RSTART, RLENGTH); num = "" if (match(m1, /[0-9][0-9][0-9] [0-9][0-9][0-9][ -][0-9][0-9][0-9][0-9]/)) { if (substr(m1, RSTART-1, 1) == "(") { break; } if (x = split(substr(m1, RSTART, RLENGTH), parts, /[ -]/)) { num = "(" parts[1] ") " parts[2] "-" parts[3]; } } else if (match(m1, /$[0-9][0-9][0-9]$ [0-9][0-9][0-9][ -][0-9][0-9][0-9][0-9]/)) { if (x = split(substr(m1, RSTART, RLENGTH), parts, /[ -]/)) { num = parts[1] " " parts[2] "-" parts[3]; } } if (i == (n-1)) { count++; printf("%d: %s\n", count, num); } } } } }
regexmatch.gcc
/* -- mode: c -- * $Id: regexmatch.gcc,v 1.4 2000/12/24 05:43:53 doug Exp $ * http://www.bagley.org/~doug/shootout/ / #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> #include <stdio.h> #include <pcre.h> #include <string.h> #define MAXLINES 100 #define MAXLINELEN 132 char* pattern = "(?:^\|[^\\d\$])" "(\\()?" "(\\d\\d\\d)" "(?(1)\$)" "[ ]" "(\\d\\d\\d)" "[ -]" "(\\d\\d\\d\\d)" "\\D" ; int* main(int argc, char argv[]) { int* NUM = ((argc == 2) ? atoi(argv[1]) : 1); int count; char cptr = ""; char* *phones; pcre re; int erroffset; const char errptr; int* n, lines = 0; char num[256]; int i, j, k, matchlen; char matchoffset; int* nmatches; int ovec, ovecsize; pcre_extra study; phones = (char *)malloc(MAXLINES sizeof(char )); if (!phones) { fprintf(stderr, "malloc for* phones array failed\n"); exit(1); } lines = 0; while (cptr) { phones[lines] = (char )malloc(MAXLINELEN); if (!phones[lines]) { fprintf(stderr, "malloc to hold line #%d failed\n", lines); exit(1); } cptr = fgets(phones[lines], MAXLINELEN, stdin); lines++; if (lines > MAXLINES) { fprintf(stderr, "MAXLINES is too small\n"); exit(1); } } re = pcre_compile(pattern, 0, &errptr, &erroffset, NULL); if (!re) { fprintf(stderr, "can't open compile regexp\n"); exit(1); } study = pcre_study(re, 0, &errptr); if (pcre_fullinfo(re, NULL, PCRE_INFO_CAPTURECOUNT, &nmatches) != 0) { fprintf(stderr, "pcre_fullinfo failed\n"); exit(1); } nmatches++; ovecsize = sizeof(int) nmatches * 3; ovec = (int )malloc(ovecsize); if (!ovec) { fprintf(stderr, "malloc for* ovec array failed\n"); exit(1); } count = 0; while (NUM--) { for (i=0; i<lines; i++) { n = pcre_exec(re, study, phones[i], strlen(phones[i]), 0, 0, ovec, ovecsize); if (n == nmatches) { k = 22; j = 0; num[j++] = '('; matchoffset = phones[i] + ovec[k]; matchlen = ovec[k+1] - ovec[k]; strncpy(num+j, matchoffset, matchlen); j += matchlen; k += 2; num[j++] = ')'; num[j++] = ' '; matchoffset = phones[i] + ovec[k]; matchlen = ovec[k+1] - ovec[k]; strncpy(num+j, matchoffset, matchlen); j += matchlen; k += 2; num[j++] = '-'; matchoffset = phones[i] + ovec[k]; matchlen = ovec[k+1] - ovec[k]; strncpy(num+j, matchoffset, matchlen); j += matchlen; k += 2; num[j] = 0; if (0 == NUM) { count++; printf("%d: %s\n", count, num); } } } } for* (i=0; i<MAXLINES; i++) { free(phones[i]); } free(phones); free(ovec); return(0); }
regexmatch.gforth
\ -- mode: forth -- \ $Id: regexmatch.gforth,v 1.1 2001/05/26 15:59:44 doug Exp $ \ http://www.bagley.org/~doug/shootout/ \ from Anton Ertl: \ this uses the Gray parser generator, which is probably too big a \ cannon for this problem (it also needs a lot of setup code). \ Writing a recursive descent parser by hand is probably both smaller \ and faster in this case. 0. argc @ 1- arg >number 2drop drop constant NUM warnings off \ Gray is a little wordy require gray.fs : slurp-fid { fid -- addr u } 0 0 begin dup 1024 + dup >r extend-mem rot r@ fid read-file throw r> 2dup = while 2drop repeat - + dup >r resize throw r> ; : bit-equiv \ w3=~w1^w2 invert xor ; : set-complement empty ['] bit-equiv binary-set-operation ; variable input \ pointer to next character to be scanned variable end-input \ pointer to end of input -1 constant eof-char : start input @ ; : end input @ over - ; : get-input start end-input @ = if eof-char else start c@ endif ; 256 max-member s" scan failed" exception constant scanfail : ?nextchar 0= scanfail and throw 1 chars input +! ; : testchar? get-input member? ; ' testchar? test-vector ! : .. empty copy-set swap 1+ rot do i over add-member loop ; : ` \ creates anonymous terminal for the character c ) char singleton ['] ?nextchar make-terminal ; char 0 char 9 .. dup ' ?nextchar terminal digit set-complement ' ?nextchar terminal nondigit bl singleton ' ?nextchar terminal lspace 2variable areacode 2variable exchange 2variable last4 ) <- area-code \|\| area-code )) lspace {{ start }} digit digit digit {{ end exchange 2! }} ) {{ start }} digit digit digit digit {{ end last4 2! }} nondigit )) <- telnum telnum parser scan-telnum : scan-for-nondigit begin count >r r@ '0 < r@ '9 > or r> '( <> and over end-input @ u>= or until ; variable count 0 count ! : scanfile over + end-input ! begin dup input ! ['] scan-telnum catch dup dup scanfail <> and throw if scan-for-nondigit else 1 count +! count @ 1 u.r ." : " ." " exchange 2@ type ." -" last4 2@ type cr end-input @ over - #lf scan drop \ skip rest of line endif dup end-input @ u>= until drop ; : mainloop ['] 2drop [is] type NUM 1 +do 2dup scanfile loop ['] [is] type scanfile ; stdin slurp-fid mainloop bye
regexmatch.guile
#!/usr/local/bin/guile \ -e main -s !# ;;; $Id: regexmatch.guile,v 1.6 2001/06/29 23:12:37 doug Exp $ ;;; http://www.bagley.org/~doug/shootout/ (use-modules (ice-9 format)) (use-modules (ice-9 regex)) (define regexp (string-append "(^\|[^0-9\$])" ; (1) preceeding non-digit or bol "(" ; (2) area code "\\(([0-9][0-9][0-9])\$" ; (3) is either 3 digits in parens "\|" ; or "([0-9][0-9][0-9])" ; (4) just 3 digits ")" ; end of area code " " ; area code is followed by one space "([0-9][0-9][0-9])" ; (5) exchange is 3 digits "[ -]" ; separator is either space or dash "([0-9][0-9][0-9][0-9])" ; (6) last 4 digits "([^0-9]\|$)" ; must be followed by a non-digit )) (define (main args) (let ((n (or (and (= (length args) 2) (string->;number (cadr args))) 1)) (phonelines '()) (rx (make-regexp regexp)) (count 0)) (let loop ((line (read-line))) (cond ((eof-object? line) #f) (else (set! phonelines (append phonelines (list line))) (loop (read-line))))) (while (>; n 0) (set! n (- n 1)) (let loop ((phones phonelines) (count 0)) (if (null? phones) count (let ((match (regexp-exec rx (car phones)))) (if match (let* ((area (if (match:start match 3) (match:substring match 3) (match:substring match 4))) (exch (match:substring match 5)) (numb (match:substring match 6)) (num (string-append "(" area ") " exch "-" numb))) (set! count (+ count 1)) (if (= 0 n) (display (format "~D: ~a\n" count num))))) (loop (cdr phones) count)))))))
regexmatch.ici
// $Id: regexmatch.ici,v 1.0 2003/01/03 12:07:00 dada Exp $ // http://dada.perl.it/shootout // // contributed by Tim Long n := argv[1] ? int(argv[1]) : 1; lines = gettokens(stdin, '\n', ""); j = 0; while (--n) { forall (l in lines) { a = l ~~~ #^[^\d(](?:$(\d\d\d)$\|(\d\d\d)) (\d\d\d)[ -](\d\d\d\d)(?:\D\|$)#; if (n == 1 && a) printf("%d: (%s%s) %s-%s\n", ++j, a[0], a[1], a[2], a[3]); } } / * The second last line of the test data is "foo (213 222-2222 bar", which * by my reading of the spec should match. But it is in the "shouldn't match" * section and no other programs matches it. So the above regexp is tailored * not to match it too. */
regexmatch.java
// $Id: regexmatch.java,v 1.2 2003/10/13 16:50:21 tomk Exp $ // http://www.bagley.org/~doug/shootout/ // contributed by Tom Kludy import java.io.; import* java.util.; import* java.util.regex.; public* class regexmatch { public static void main(String args[]) throws IOException, PatternSyntaxException { int n = (args.length > 0) ? Integer.parseInt(args[0]) : 1; LinkedList lines = new LinkedList(); BufferedReader in = new BufferedReader(new InputStreamReader(System.in)); String line; while( (line = in.readLine()) != null ) lines.addLast(line); in.close(); Pattern pattern = Pattern.compile( "(?:^\|[^\\d\$])"+ // must be preceeded by non-digit "(?:\\((\\d\\d\\d)\$\|(\\d\\d\\d))"+// area code is 3 digits (match 1&2) "[ ]"+ // area code followed by one space "(\\d\\d\\d)"+ // match 3: prefix of 3 digits "[ -]"+ // separator is either space or dash "(\\d\\d\\d\\d)"+ // match 4: last 4 digits "(?:\\D\|$)" // must be followed by a non-digit ); int count = 0; while(--n >= 0) { for (ListIterator li = lines.listIterator(); li.hasNext();) { Matcher matcher = pattern.matcher((String)li.next()); if (matcher.find()) { StringBuffer num = new StringBuffer("("); String areaCode = matcher.group(1); if ( areaCode == null ) areaCode = matcher.group(2); num.append(areaCode).append(") ").append(matcher.group(3)) .append("-").append(matcher.group(4)); if ( n == 0 ) System.out.println(++count + ": " + num); } } } } }
regexmatch.lua
-- $Id: regexmatch.lua,v 1.4 2000/12/09 20:07:45 doug Exp $ -- http://www.bagley.org/~doug/shootout/ -- implemented by: Roberto Ierusalimschy text = read("a") -- I added the following line and slightly modified the pattern -- match below so that the program will reject the case: -- 1(111) 111-1111 (due to preceeding digit) -- (Doug) text = gsub(gsub(text, "^", " "), "\n", "\n ") N = tonumber((arg and* arg[1])) or 1 count = 0 while N > 0 do gsub(text,"%D(%D)(%d%d%d)(%)?) (%d%d%d)[- ](%d+)", function (A,area,B,exch,digits) if (A == '(') == (B == ')') and strlen(digits) == 4 then local tel = "("..area..") "..exch.."-"..digits if N == 1 then count = count+1 write(count, ": ", tel, "\n") end end end) N = N-1 end
regexmatch.lua5
-- $Id: regexmatch.lua,v 1.4 2000/12/09 20:07:45 doug Exp $ -- http://www.bagley.org/~doug/shootout/ -- contributed by Roberto Ierusalimschy local text = io.read("a") -- make sure text does not start with a number text = "\n" .. text -- pattern is: not a digit, optional (, 3 digits, optional ), -- space, 3 digits, space or hyphen, 4 digits, not a digit local* pattern = "%D(%(?)(%d%d%d)(%)?) (%d%d%d)[- ](%d%d%d%d)%f[%D]" local N = tonumber((arg and arg[1])) or 1 local count = 0 for i=N,1,-1 do for open,area,close,exch,digits in string.gfind(text, pattern) do if (open == '(') == (close == ')') then local tel = "("..area..") "..exch.."-"..digits if i == 1 then count = count+1 io.write(count, ": ", tel, "\n") end end end end
regexmatch.mawk
# $Id: regexmatch.mawk,v 1.2 2001/05/20 06:13:00 doug Exp $ # http://www.bagley.org/~doug/shootout/ BEGIN { n = (ARGV[1] < 1) ? 1 : ARGV[1]; delete ARGV; } { phones[p++] = $0; } END { for (i=0; i<n; i++) { for (j=0; j<p; j++) { line = phones[j]; if (match(line, /(^\|[^0-9])($[0-9][0-9][0-9]$\|[0-9][0-9][0-9]) [0-9][0-9][0-9][ -][0-9][0-9][0-9][0-9]($\|[^0-9])/)) { m1 = substr(line, RSTART, RLENGTH); num = "" if (match(m1, /[0-9][0-9][0-9] [0-9][0-9][0-9][ -][0-9][0-9][0-9][0-9]/)) { if (substr(m1, RSTART-1, 1) == "(") { break; } if (x = split(substr(m1, RSTART, RLENGTH), parts, /[ -]/)) { num = "(" parts[1] ") " parts[2] "-" parts[3]; } } else if (match(m1, /$[0-9][0-9][0-9]$ [0-9][0-9][0-9][ -][0-9][0-9][0-9][0-9]/)) { if (x = split(substr(m1, RSTART, RLENGTH), parts, /[ -]/)) { num = parts[1] " " parts[2] "-" parts[3]; } } if (i == (n-1)) { count++; printf("%d: %s\n", count, num); } } } } }
regexmatch.ocaml
(* * $Id: regexmatch.ocaml,v 1.4 2001/01/08 13:19:11 doug Exp $ * http://www.bagley.org/~doug/shootout/ * from: Markus Mottl ) open Pcre let* rex = regexp ~flags:[`EXTENDED] "(?: ^ \| [^\d$]) # must be preceeded by non-digit (\(\d\d\d$\|\d\d\d) # match 1: area code [ ] # area code followed by one space \d\d\d # prefix of 3 digits [ -] # separator is either space or dash \d\d\d\d # last 4 digits (?: \D\|$) # must be followed by a non-digit (or EOL)" let phones = let lines = ref [] in foreach_line (fun line -> lines := line :: !lines); List.rev !lines let check_phone irflags ar cnt must_print line = try unsafe_pcre_exec irflags rex 0 line 4 ar; let num = String.copy "(...) ...-...." in let pos = Array.unsafe_get ar 2 in let ofs = if String.unsafe_get line pos = '(' then 1 else 0 in let pos = pos + ofs in String.unsafe_blit line pos num 1 3; let pos = pos + ofs + 4 in String.unsafe_blit line pos num 6 3; String.unsafe_blit line (pos + 4) num 10 4; if must_print then Printf.printf "%d: %s\n" !cnt num; incr cnt with Not_found -> () let _ = let n = try int_of_string Sys.argv.(1) with Invalid_argument _ -> 1 in for i = 2 to n do List.iter (check_phone (rflags []) (Array.create 6 0) (ref 1) false) phones done; List.iter (check_phone (rflags []) (Array.create 6 0) (ref 1) true) phones
regexmatch.ocamlb
(* * $Id: regexmatch.ocaml,v 1.4 2001/01/08 13:19:11 doug Exp $ * http://www.bagley.org/~doug/shootout/ * from: Markus Mottl ) open Pcre let* rex = regexp ~flags:[`EXTENDED] "(?: ^ \| [^\d$]) # must be preceeded by non-digit (\(\d\d\d$\|\d\d\d) # match 1: area code [ ] # area code followed by one space \d\d\d # prefix of 3 digits [ -] # separator is either space or dash \d\d\d\d # last 4 digits (?: \D\|$) # must be followed by a non-digit (or EOL)" let phones = let lines = ref [] in foreach_line (fun line -> lines := line :: !lines); List.rev !lines let check_phone irflags ar cnt must_print line = try unsafe_pcre_exec irflags rex 0 line 4 ar; let num = String.copy "(...) ...-...." in let pos = Array.unsafe_get ar 2 in let ofs = if String.unsafe_get line pos = '(' then 1 else 0 in let pos = pos + ofs in String.unsafe_blit line pos num 1 3; let pos = pos + ofs + 4 in String.unsafe_blit line pos num 6 3; String.unsafe_blit line (pos + 4) num 10 4; if must_print then Printf.printf "%d: %s\n" !cnt num; incr cnt with Not_found -> () let _ = let n = try int_of_string Sys.argv.(1) with Invalid_argument _ -> 1 in for i = 2 to n do List.iter (check_phone (rflags []) (Array.create 6 0) (ref 1) false) phones done; List.iter (check_phone (rflags []) (Array.create 6 0) (ref 1) true) phones
regexmatch.perl
#!/usr/local/bin/perl # $Id: regexmatch.perl,v 1.5 2000/10/07 08:41:43 doug Exp $ # http://www.bagley.org/~doug/shootout/ use strict; my $re = qr{ (?: ^ \| [^\d$]) # must be preceeded by non-digit ( \( )? # match 1: possible initial left paren (\d\d\d) # match 2: area code is 3 digits (?(1) $ ) # if match1 then match right paren [ ] # area code followed by one space (\d\d\d) # match 3: prefix of 3 digits [ -] # separator is either space or dash (\d\d\d\d) # match 4: last 4 digits \D # must be followed by a non-digit }x; my $NUM = $ARGV[0]; $NUM = 1 if ($NUM < 1); my @phones = <STDIN>; my $count = 0; my $num; while ($NUM--) { foreach (@phones) { if (/$re/o) { $num = "($2) $3-$4"; if (0 == $NUM) { $count++; print "$count: $num\n"; } } } }
regexmatch.pike
#!/usr/local/bin/pike// -- mode: pike -- // $Id: regexmatch.pike,v 1.2 2000/12/05 16:04:06 doug Exp $ // http://www.bagley.org/~doug/shootout/ // from: Fredrik Noring constant area = "([0-9][0-9][0-9]\|\$[0-9][0-9][0-9]\$)"; constant exch = "([0-9][0-9][0-9])"; constant last = "([0-9][0-9][0-9][0-9])"; void main(int argc, array(string) argv) { Regexp r = Regexp("^[^0-9\\(]"+area+" "+exch+"[ -]"+last+"[^0-9]$"); array(string) phones = Stdio.stdin->read()/"\n"; int n = (int)argv[-1]; int count = 0; while(n--) foreach(phones, string phone) if(array(string) parts = r->split(phone)) if(n == 0) if(parts[0][0] == '(') write("%d: %s %s-%s\n", ++count, @parts); else write("%d: (%s) %s-%s\n", ++count, @parts); }
regexmatch.poplisp
;;; -- mode: lisp -- ;;; $Id: regexmatch.cmucl,v 1.1 2001/06/13 19:45:20 doug Exp $ ;;; http://www.bagley.org/~doug/shootout/ ;;; from Jochen Schmidt (proclaim '(optimize (speed 3)(safety 0)(space 0)(debug 0)(compilation-speed 0))) (setf ext:bytes-consed-between-gcs 5000000) (use-package :meta) (eval-when (compile load eval) (meta:enable-meta-syntax) (deftype digit () '(member #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)) (deftype non-digit () '(not (member #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #$ #$ )))) (defun parse-tel (input) (meta:with-string-meta (buffer input) (let (last-result) (declare (type (or null simple-base-string) last-result)) (labels ((skip-non-digits (&aux d) (meta:match $[@(non-digit d)])) (digit-triplet (&aux (old-index index) d (result (make-array 3 :element-type 'base-char))) (declare (type simple-base-string result)) (or (meta:match [@(digit d) !(setf (schar result 0) d) @(digit d) !(setf (schar result 1) d) @(digit d) !(setf (schar result 2) d) !(setf last-result result)]) (progn (setf index old-index) nil))) (digit-4tupel (&aux (old-index index) d (result (make-array 4 :element-type 'base-char))) (declare (type simple-base-string result)) (or (meta:match [@(digit d) !(setf (schar result 0) d) @(digit d) !(setf (schar result 1) d) @(digit d) !(setf (schar result 2) d) @(digit d) !(setf (schar result 3) d) !(setf last-result result)]) (progn (setf index old-index) nil))) (telephone-nr (&aux area-code exchange d) (declare (type (or null simple-base-string) area-code exchange)) (and (meta:match [!(skip-non-digits) {[#$ !(digit-triplet) #$] !(digit-triplet)} !(setf area-code last-result) #\space !(digit-triplet) !(setf exchange last-result) {#\space #\-} !(digit-4tupel) {@(non-digit d) !(= index end)}]) (values area-code exchange last-result)))) (telephone-nr))))) (let ((n (parse-integer (or (car pop11::poparglist) "1"))) (input (loop for line = (read-line standard-input nil 'eof) until (eq line 'eof) collect line))) (loop for i of-type fixnum from 1 below n do (loop for line of-type simple-base-string in input do (parse-tel line))) (loop with i of-type fixnum = 0 for line of-type string in input do (multiple-value-bind (area-code exchange rest) (parse-tel line) (when area-code (format t "~A: (~A) ~A-~A~%" (incf i) area-code exchange rest)))))
regexmatch.python
#!/usr/pkg/bin/python # $Id: regexmatch.python,v 1.9 2001/02/05 19:02:37 doug Exp $ # http://www.bagley.org/~doug/shootout/ import sys, re def main(): NUM = int(sys.argv[1]) if NUM < 1: NUM = 1 phones = sys.stdin.readlines() rx = re.compile( r'(?:^\|[^\d$])' r'(?:\((\d\d\d)$\|(\d\d\d))' r'[ ]' r'(\d\d\d)' r'[ -]' r'(\d\d\d\d)' r'\D' ) findIt = rx.search count = 0 for i in xrange(0,NUM): for line in phones: m = findIt(line) if m: g = m.group num = "(" + (g(1) or g(2)) + ") " + g(3) + "-" + g(4) if 0 == i: count = count + 1 print "%d: %s" % (count, num) main()
regexmatch.ruby
#!/usr/local/bin/ruby # -- mode: ruby -- # $Id: regexmatch.ruby,v 1.11 2001/07/02 04:26:31 doug Exp $ # http://www.bagley.org/~doug/shootout/ re = Regexp.new( '(?:^\|[^\d$])' + # must be preceeded by non-digit '(?:\((\d\d\d)$\|(\d\d\d))' + # match 1 or 2: area code is 3 digits '[ ]' + # area code followed by one space '(\d\d\d)' + # match 3: prefix of 3 digits '[ -]' + # separator is either space or dash '(\d\d\d\d)' + # match 4: last 4 digits '\D' # must be followed by a non-digit ) NUM = Integer(ARGV[0] \|\| 1) phones = STDIN.readlines count = m = line = iter = 0 for iter in 1..NUM for line in phones if m = re.match(line) m1 = m[1]; if m1 == "" m1 = m[2]; end num = '(' + m1 + ') ' + m[3] + '-' + m[4]; if iter == NUM count += 1 puts "#{count}: #{num}" end end end end
regexmatch.smlnj
(* -- mode: sml -- * $Id: regexmatch.smlnj,v 1.1 2001/07/10 13:04:21 doug Exp $ * http://www.bagley.org/~doug/shootout/ * from Stephen Weeks * "ported" to SML/NJ * with help from Daniel Wang ) structure* Test : sig val main : (string * string list) -> OS.Process.status end = struct fun ++ (r: int ref): int = let val n = 1 + !r val _ = r := n in n end structure Int = struct open Int type t = int fun exists (start: t, stop: t, f: t -> bool): bool = let fun loop i = i < stop andalso (f i orelse loop (i + 1)) in loop start end fun forall (start, stop, f) = not (exists (start, stop, not o f)) fun fold (start: t, stop: t, a: 'a, f: t * 'a -> 'a): 'a = let fun loop (i: t, a: 'a) = if i >= stop then a else loop (i + 1, f (i, a)) in loop (start, a) end fun for (start: t, stop: t, f: t -> unit): unit = let fun loop i = if i >= stop then () else (f i; loop (i + 1)) in loop start end end structure Array2 = struct open Array2 type 'a t = 'a array val new = array fun tabulate (r, c, f) = Array2.tabulate RowMajor (r, c, f) fun foreachi (a, f) = appi RowMajor f {base = a, row = 0, col = 0, nrows = NONE, ncols = NONE} end structure Vector = struct open Vector fun exists (v, f) = Int.exists (0, length v, fn i => f (sub (v, i))) fun foreach (v, f) = app f v fun foreachi (v, f) = appi f (v, 0, NONE) fun new (n, x) = tabulate (n, fn _ => x) fun new1 x = new (1, x) end structure List = struct open List fun foreach (l, f) = app f l fun fold (l, b, f) = foldl f b l fun appendRev (l1, l2) = fold (l1, l2, op ::) fun push (r, x) = r := x :: !r fun keepAll (l, f) = filter f l fun peek (l, f) = find f l fun insert (l, x, op <=) = let fun loop (l, ac) = case l of [] => appendRev (ac, [x]) \| x' :: l' => if x <= x' then appendRev (ac, x :: l) else loop (l', x' :: ac) in loop (l, []) end end structure Array = struct open Array val new = array fun modify (a, f) = Array.modify f a fun foreachi (a, f) = appi f (a, 0, NONE) fun indices (a: bool array): int vector = let val n = Array.length a val numTrue = let fun loop (i, count) = if i = n then count else loop (i + 1, if Array.sub (a, i) then count + 1 else count) in loop (0, 0) end val next = ref 0 fun loop i = if Array.sub (a, i) then (next := i + 1; i) else loop (i + 1) in Vector.tabulate (numTrue, fn _ => loop (!next)) end end structure Char = struct open Char val fromInt = chr val toInt = ord end structure String = struct open String type t = string fun contains (s: t, c: char): bool = Int.exists (0, size s, fn i => c = sub (s, i)) end val numChars: int = 128 structure Regexp = struct datatype t = AnchorStart \| CharSet of char -> bool \| Or of t list \| Seq of t list \| Star of t end structure Stack: sig type 'a t val clear: 'a t -> unit val exists: 'a t * ('a -> bool) -> bool val foreach: 'a t * ('a -> unit) -> unit val new: int * 'a -> 'a t val push: 'a t * 'a -> unit end = struct datatype 'a t = T of {elts: 'a array, size: int ref} fun new (size: int, dummy: 'a): 'a t = T {elts = Array.new (size, dummy), size = ref 0} fun push (T {elts, size}, x) = let val n = !size val _ = Array.update (elts, n, x) val _ = size := n + 1 in () end fun exists (T {elts, size, ...}, f) = Int.exists (0, !size, fn i => f (Array.sub (elts, i))) fun foreach (T {elts, size}, f) = Int.for (0, !size, fn i => f (Array.sub (elts, i))) fun clear (T {size, ...}) = size := 0 end structure NFA: sig (* The states in an NFA are indexed from 0 to n-1, where n is the number * of states. ) type state = int ( State i is final iff Array.sub (final, i). * The outgoing states from state i on input char c are given by * Array2.sub (next, i, Char.ord c). * anchorStarts is sorted in increasing order of state index. ) datatype t = T of {anchorStarts: state list, final: bool array, seen: bool array, stack1: int Stack.t, stack2: int Stack.t, start: state, next: state vector Array2.t} val* fromRegexp: Regexp.t -> t val match: {nfa: t, string: string, startPos: int, anchorStart: bool} -> int option val numStates: t -> int end = struct type state = int datatype t = T of {anchorStarts: state list, final: bool array, seen: bool array, stack1: int Stack.t, stack2: int Stack.t, start: state, next: state vector Array2.t} fun numStates (T {next, ...}) = Array2.nRows next (* Simulating an NFA with two stacks and a bit vector, as in Algorithm 3.4 * (page 126) of Compilers: Principles, Techniques, and Tools by Aho, * Sethi, and Ullman. ) fun* match {anchorStart: bool, nfa as T {anchorStarts, final, seen, stack1, stack2, start, next}, startPos, string = s}: int option = let val numStates = numStates nfa val n = String.size s val _ = Array.modify (seen, fn _ => false) fun loop (current: state Stack.t, nextStates: state Stack.t, i: int, last: int option): int option = let val last = if Stack.exists (current, fn s => Array.sub (final, s)) then SOME i else last in if numStates = 0 orelse i = n then (Stack.clear stack1 ; Stack.clear stack2 ; last) else let val _ = Array.modify (seen, fn _ => false) val c = Char.toInt (String.sub (s, i)) val _ = Stack.foreach (current, fn s => Vector.foreach (Array2.sub (next, s, c), fn s' => if Array.sub (seen, s') then () else (Array.update (seen, s', true) ; Stack.push (nextStates, s')))) val _ = Stack.clear current in loop (nextStates, current, i + 1, last) end end val _ = Stack.push (stack1, start) val _ = if anchorStart then List.foreach (anchorStarts, fn s => Stack.push (stack1, s)) else () in loop (stack1, stack2, startPos, NONE) end (* This conversion from a regular expression to an NFA is based on * Section 3.9 (pages 134 -- 140) of Compilers: Principles, Techniques, * and Tools by Aho, Sethi, and Ullman. * * It creates one NFA state for each CharSet (called a "position") that is * in the regexp. There is also one extra state for the start state. * It adds edges as in rules 1 and 2 (page 138) for the followpos function. ) fun* fromRegexp (r: Regexp.t): t = let fun loop (r, ac) = let open Regexp in case r of AnchorStart => ac + 1 \| CharSet _ => ac + 1 \| Or rs => List.fold (rs, ac, loop) \| Seq rs => List.fold (rs, ac, loop) \| Star r => loop (r, ac) end val numPos = loop (r, 0) val numStates = numPos + 1 val start = numPos val posCounter = ref ~1 val follow = Array2.new (numStates, numStates, false) val posChars = Array2.tabulate (numPos, numChars, fn _ => false) local datatype t = T of bool vector in fun contains (T v, s) = Vector.sub (v, s) val empty: t = T (Vector.new (numPos, false)) fun union (T v, T v'): t = T (Vector.tabulate (numPos, fn i => Vector.sub (v, i) orelse Vector.sub (v', i))) fun singleton (i: int): t = T (Vector.tabulate (numPos, fn j => i = j)) fun foreach (T v, f) = Vector.foreachi (v, fn (i, b) => if b then f i else ()) end val anchorStarts = ref [] fun loop r = case r of Regexp.AnchorStart => let val i = ++ posCounter val _ = List.push (anchorStarts, i) val first = singleton i in {first = first, last = first, nullable = false} end \| Regexp.CharSet f => let val i = ++ posCounter val _ = Int.for (0, numChars, fn c => if f (Char.chr c) then Array2.update (posChars, i, c, true) else ()) val first = singleton i in {first = first, last = first, nullable = false} end \| Regexp.Or rs => List.fold (rs, {first = empty, last = empty, nullable = false}, fn (r, {first = f, last = l, nullable = n}) => let val {first = f', last = l', nullable = n'} = loop r in {first = union (f, f'), last = union (l, l'), nullable = n orelse n'} end) \| Regexp.Seq rs => List.fold (rs, {first = empty, last = empty, nullable = true}, fn (r, {first = f, last = l, nullable = n}) => let val {first = f', last = l', nullable = n'} = loop r val _ = foreach (l, fn s => foreach (f', fn s' => Array2.update (follow, s, s', true))) in {first = if n then union (f, f') else f, last = if n' then union (l, l') else l', nullable = n andalso n'} end) \| Regexp.Star r => let val {first = f, last = l, nullable = n} = loop r val _ = foreach (l, fn s => foreach (f, fn s' => Array2.update (follow, s, s', true))) in {first = f, last = l, nullable = true} end val {first, last, nullable} = loop r (* Any anchor starts in first should be anchor starts. * This also reverses anchorStarts so they are in order. ) val* anchorStarts = List.fold (!anchorStarts, [], fn (s, ac) => if contains (first, s) then s :: ac else ac) val _ = foreach (first, fn i => Array2.update (follow, start, i, true)) val final = Array.array (numStates, false) val _ = foreach (last, fn i => Array.update (final, i, true)) val _ = if nullable then Array.update (final, start, true) else () val a = Array.new (numStates, false) val next = Array2.tabulate (numStates, numChars, fn (i, c) => let val _ = Int.for (0, numStates, fn j => Array.update (a, j, false)) val _ = Int.for (0, numPos, fn j => if Array2.sub (follow, i, j) andalso Array2.sub (posChars, j, c) then Array.update (a, j, true) else ()) in Array.indices a end) in T {anchorStarts = anchorStarts, final = final, next = next, seen = Array.new (numStates, false), stack1 = Stack.new (numStates, ~1), stack2 = Stack.new (numStates, ~1), start = start} end end structure DFA: sig type t val fromNFA: NFA.t -> t val match: {dfa: t, string: string, startPos: int, anchorStart: bool} -> int option val minimize: t -> t end = struct (* The states in a DFA are indexed from 0 to n-1, where n is the number * of states. ) type state = int ( State i is final iff Array.sub (final, i). * The outgoing state from state i on input char c is * Array2.sub (next, i, Char.ord c). ) datatype t = T of {anchorStart: state, dead: bool array, final: bool array, next: state Array2.t, start: state} fun* numStates (T {next, ...}): int = Array2.nRows next fun match {dfa as T {anchorStart = ancSt, dead, final, start, next}, string as s, startPos: int, anchorStart: bool}: int option = let val n = String.size s fun loop (i: int, state: int, last: int option): int option = let val last = if Array.sub (final, state) then SOME i else last in if Array.sub (dead, state) orelse i = n then last else loop (i + 1, Array2.sub (next, state, Char.toInt (String.sub (s, i))), last) end in loop (startPos, if anchorStart then ancSt else start, NONE) end fun dead (numStates, final, next) = Array.tabulate (numStates, fn i => not (Array.sub (final, i)) andalso Int.forall (0, numChars, fn c => i = Array2.sub (next, i, c))) (* This DFA minimization algorithm is based on algorithm 3.6 (page 142) * of Compilers: Principles, Techniques, and Tools by Aho, Sethi, and * Ullman. * * It maintains an array, r, that stores for each state s the * representative of the class to which s belongs. * It repeatedly refines an equivalence relation, represented by a list * of classes, where each class is a list of states (i.e. ints). ) fun* minimize (dfa as T {anchorStart, final, start, next, ...}): t = let val numStates = numStates dfa type class = int list type classes = class list val repCounter = ref ~1 val change = ref false fun newRep () = (change := true; ++ repCounter) val finRep = newRep () val nonfinRep = newRep () val r = Array.tabulate (numStates, fn i => if Array.sub (final, i) then finRep else nonfinRep) fun rep s = Array.sub (r, s) fun trans (s, c) = rep (Array2.sub (next, s, c)) fun refine (class: class, ac: classes): classes = let val r = List.fold (class, [], fn (state, classes) => let fun loop (classes, ac) = case classes of [] => (case ac of [] => [{class = [state], old = state}] \| _ => let val s = newRep () val _ = Array.update (r, state, s) in {class = [state], old = state} :: ac end) \| (z as {class, old}) :: classes => if Int.forall (0, numChars, fn c => trans (old, c) = trans (state, c)) then (Array.update (r, state, rep old) ; {class = state :: class, old = old} :: (List.appendRev (classes, ac))) else loop (classes, z :: ac) in loop (classes, []) end) in List.fold (r, ac, fn ({class, ...}, ac) => case class of [_] => ac \| _ => class :: ac) end fun refineAll (classes: classes): unit = case classes of [] => () \| _ => let val _ = change := false val classes = List.fold (classes, [], fn (class, ac) => case class of [_] => ac \| _ => refine (class, ac)) in if !change then refineAll classes else () end val (fin, nonfin) = Int.fold (0, numStates, ([], []), fn (i, (f, n)) => if Array.sub (final, i) then (i :: f, n) else (f, i :: n)) val _ = refineAll [fin, nonfin] val numStates' = 1 + !repCounter val reached = Array.new (numStates', false) fun visit (s: int ): unit = let val s' = rep s in if Array.sub (reached, s') then () else (Array.update (reached, s', true) ; Int.for (0, numChars, fn c => visit (Array2.sub (next, s, c)))) end val _ = visit start val _ = visit anchorStart val c = ref ~1 val newR = Array.tabulate (numStates', fn s => if Array.sub (reached, s) then ++ c else ~1) val numStates' = 1 + !c val _ = Array.modify (r, fn s => Array.sub (newR, s)) val next' = Array2.new (numStates', numChars, ~1) val _ = Array2.foreachi (next, fn (s, c, s') => Array2.update (next', rep s, c, rep s')) val final' = Array.array (numStates', false) val _ = Array.foreachi (final, fn (i, b) => if b then Array.update (final', rep i, true) else ()) in T {anchorStart = rep anchorStart, dead = dead (numStates', final', next'), final = final', start = rep start, next = next'} end (* This is the usual "subset construction", as in algorithm 3.2 (page 118) * of Compilers: Principles, Techniques, and Tools by Aho, Sethi, and * Ullman. * * It associates each (reachable) set of states in the nfa with a single * state in the DFA. ) fun* fromNFA (nfa as NFA.T {anchorStarts, final, start, next, ...}) = let type states = state vector val counter = ref ~1 type work = {states: states, state: int, out: int vector option ref} val cache: work list ref = ref [] val todo: work list ref = ref [] fun statesToState (ss: states): int = case List.peek (!cache, fn {states, ...} => ss = states) of NONE => let val state = ++ counter val work = {out = ref NONE, state = state, states = ss} val _ = List.push (cache, work) val _ = List.push (todo, work) in state end \| SOME {state, ...} => state local val seen = Array.array (NFA.numStates nfa, false) in fun loop () = case !todo of [] => () \| {states, out, ...} :: rest => (todo := rest ; out := (SOME (Vector.tabulate (numChars, fn c => let val _ = Array.modify (seen, fn _ => false) val _ = Vector.foreach (states, fn s => Vector.foreach (Array2.sub (next, s, c), fn s' => Array.update (seen, s', true))) in statesToState (Array.indices seen) end))) ; loop ()) end val start' = statesToState (Vector.new1 start) val anchorStart' = statesToState (Vector.fromList (List.insert (anchorStarts, start, op <=))) val _ = loop () val numStates = 1 + !counter val next' = Array2.new (numStates, numChars, ~1) val final' = Array.new (numStates, false) val _ = List.foreach (!cache, fn {states, state = i, out, ...}: work => let val _ = Vector.foreachi (valOf (! out), fn (c, j) => Array2.update (next', i, c, j)) val _ = if Vector.exists (states, fn s => Array.sub (final, s)) then Array.update (final', i, true) else () in () end) val dead' = dead (numStates, final', next') in T {anchorStart = anchorStart', dead = dead', final = final', start = start', next = next'} end end structure Regexp: sig structure Compiled: sig type t val find: t * string -> {start: int, length: int} option end type t val anchorStart: t val any: t val char: char -> t val compileDFA: t -> Compiled.t val compileNFA: t -> Compiled.t val digit: t val nonDigit: t val notOneOf: string -> t val oneOf: string -> t val or: t list -> t val seq: t list -> t val star: t -> t end = struct open Regexp val anchorStart = AnchorStart val isChar = CharSet fun isNotChar f = isChar (not o f) fun char c = isChar (fn c' => c = c') val or = Or val seq = Seq val star = Star val any = isChar (fn _ => true) fun oneOf s = isChar (fn c => String.contains (s, c)) fun notOneOf s = isNotChar (fn c => String.contains (s, c)) val digs = "0123456789" val digit = oneOf digs val nonDigit = notOneOf digs val empty = Or [] val emptyString = Seq [] structure Compiled = struct datatype t = DFA of DFA.t \| NFA of NFA.t fun find (c: t, s: string) = let val n = String.size s fun loop (i: int, anchorStart: bool) = if i = n then NONE else let val res = case c of DFA dfa => DFA.match {dfa = dfa, string = s, startPos = i, anchorStart = anchorStart} \| NFA nfa => NFA.match {nfa = nfa, string = s, startPos = i, anchorStart = anchorStart} in case res of NONE => loop (i + 1, false) \| SOME finish => SOME {start = i, length = finish - i} end in loop (0, true) end end fun compileDFA r = Compiled.DFA (DFA.minimize (DFA.fromNFA (NFA.fromRegexp r))) fun compileNFA r = Compiled.NFA (NFA.fromRegexp r) end local open Regexp in val d = digit val eol = char #"#" val space = oneOf " \t" val r = seq [or [anchorStart, notOneOf "0123456789("], or [seq [char #"(", d, d, d, char #")"], seq [d, d, d]], char #" ", d, d, d, oneOf " -", d, d, d, d, or [eol, nonDigit]] val comp = Regexp.compileDFA r end fun incr (r: int ref) = r := !r + 1 val ins = TextIO.stdIn fun printl [] = print "\n" \| printl(h::t) = ( print h ; printl t ) local val form = "(...) ...-...." val a = CharArray.tabulate (String.size form, fn i => String.sub (form, i)) in fun checkPhone (mustPrint: bool, cnt: int ref, line: string) = case Regexp.Compiled.find (comp, line) of NONE => () \| SOME {start = pos, ...} => let fun blit (src, dst, length) = let val stop = src + length fun loop (src, dst) = if src = stop then () else (CharArray.update (a, dst, String.sub (line, src)) ; loop (src + 1, dst + 1)) in loop (src, dst) end val (o1, o2, o3) = if #"(" = String.sub (line, pos) then (1, 6, 10) else if #"(" = String.sub (line, pos + 1) then (2, 7, 11) else if Char.isDigit (String.sub (line, pos)) then (0, 4, 8) else (1, 5, 9) val _ = blit (pos + o1, 1, 3) val _ = blit (pos + o2, 6, 3) val _ = blit (pos + o3, 10, 4) val _ = if mustPrint then printl [Int.toString (!cnt), ": ", CharArray.extract (a, 0, NONE)] else () val _ = incr cnt in () end end fun doit (phones,mustPrint: bool): unit = let val cnt = ref 1 in List.foreach (phones, fn line => checkPhone (mustPrint, cnt, line)) end fun atoi s = case Int.fromString s of SOME num => num \| NONE => 0 fun main (name, args) = let val n = atoi (hd (args @ ["1"])) val phones = let fun loop lines = case TextIO.inputLine ins of "" => rev lines \| line => loop (line :: lines) in loop [] end val _ = Int.for (1, n, fn _ => doit (phones,false)) val _ = doit (phones,true) in OS.Process.success end end val _ = SMLofNJ.exportFn("regexmatch", Test.main);
regexmatch.tcl
#!/usr/local/bin/tclsh # $Id: regexmatch.tcl,v 1.9 2001/03/15 17:01:52 doug Exp $ # http://www.bagley.org/~doug/shootout/ # from: Miguel Sofer, with modifications by Kristoffer Lawson proc main {} { global argv set NUM [lindex $argv 0] if {$NUM < 1} { set NUM 1 } set phones [split [read stdin] "\n"] set count 0 set rExp {(?:^\|[^\d(])($\d{3}$\|\d{3}) (\d{3})[ -](\d{4})($\|[^\d])} while {$NUM > 0} { incr NUM -1 foreach phone $phones { if {[regexp $rExp $phone match area exch num]} { if {! $NUM} { incr count 1 puts "$count: ([string trim $area () ]) $exch-$num" } } } } } main
regexmatch.vbscript
Set regEx = new RegExp regEx.Pattern = "^[^\d$](\(\d\d\d$\|\d\d\d) (\d\d\d)[- ](\d\d\d\d)(\D\|$)" regEx.Global = True* phonesBlob = WScript.Stdin.ReadAll() phones = Split(phonesBlob, Chr(10)) N = WScript.Arguments(0) If N < 1 Then N = 1 While N > 0 For Each line in phones Set Matches = regEx.Execute(line) If Matches.Count > 0 Then ' WSCript.Echo "[" & Matches.Count & "]" & line tel1 = Matches(0).Submatches(0) If Left(tel1, 1) = "(" Then tel1 = Mid(tel1, 2, Len(tel1)-2) End If tel2 = Matches(0).Submatches(1) tel3 = Matches(0).Submatches(2) num = "(" & tel1 & ") " & tel2 & "-" & tel3 If N = 1 Then Count = Count + 1 WScript.Echo Count & ": " & num End If Else ' WScript.Echo "nomatch: " & line End If Next N = N - 1 Wend
regexmatch.vc
/* -- mode: c -- * $Id: regexmatch.gcc,v 1.4 2000/12/24 05:43:53 doug Exp $ * http://www.bagley.org/~doug/shootout/ / #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> #include <stdio.h> #include <pcre.h> #include <string.h> #define MAXLINES 100 #define MAXLINELEN 132 char* pattern = "(?:^\|[^\\d\$])" "(\\()?" "(\\d\\d\\d)" "(?(1)\$)" "[ ]" "(\\d\\d\\d)" "[ -]" "(\\d\\d\\d\\d)" "\\D" ; int* main(int argc, char argv[]) { int* NUM = ((argc == 2) ? atoi(argv[1]) : 1); int count; char cptr = ""; char* *phones; pcre re; int erroffset; const char errptr; int* n, lines = 0; char num[256]; int i, j, k, matchlen; char matchoffset; int* nmatches; int ovec, ovecsize; pcre_extra study; phones = (char *)malloc(MAXLINES sizeof(char )); if (!phones) { fprintf(stderr, "malloc for* phones array failed\n"); exit(1); } lines = 0; while (cptr) { phones[lines] = (char )malloc(MAXLINELEN); if (!phones[lines]) { fprintf(stderr, "malloc to hold line #%d failed\n", lines); exit(1); } cptr = fgets(phones[lines], MAXLINELEN, stdin); lines++; if (lines > MAXLINES) { fprintf(stderr, "MAXLINES is too small\n"); exit(1); } } re = pcre_compile(pattern, 0, &errptr, &erroffset, NULL); if (!re) { fprintf(stderr, "can't open compile regexp\n"); exit(1); } study = pcre_study(re, 0, &errptr); if (pcre_fullinfo(re, NULL, PCRE_INFO_CAPTURECOUNT, &nmatches) != 0) { fprintf(stderr, "pcre_fullinfo failed\n"); exit(1); } nmatches++; ovecsize = sizeof(int) nmatches * 3; ovec = (int )malloc(ovecsize); if (!ovec) { fprintf(stderr, "malloc for* ovec array failed\n"); exit(1); } count = 0; while (NUM--) { for (i=0; i<lines; i++) { n = pcre_exec(re, study, phones[i], strlen(phones[i]), 0, 0, ovec, ovecsize); if (n == nmatches) { k = 22; j = 0; num[j++] = '('; matchoffset = phones[i] + ovec[k]; matchlen = ovec[k+1] - ovec[k]; strncpy(num+j, matchoffset, matchlen); j += matchlen; k += 2; num[j++] = ')'; num[j++] = ' '; matchoffset = phones[i] + ovec[k]; matchlen = ovec[k+1] - ovec[k]; strncpy(num+j, matchoffset, matchlen); j += matchlen; k += 2; num[j++] = '-'; matchoffset = phones[i] + ovec[k]; matchlen = ovec[k+1] - ovec[k]; strncpy(num+j, matchoffset, matchlen); j += matchlen; k += 2; num[j] = 0; if (0 == NUM) { count++; printf("%d: %s\n", count, num); } } } } for* (i=0; i<MAXLINES; i++) { free(phones[i]); } free(phones); free(ovec); return(0); }
regexmatch.vc++
// -- mode: c++ -- // $Id: regexmatch.g++,v 1.2 2001/06/20 03:20:03 doug Exp $ // http://www.bagley.org/~doug/shootout/ // From Bill Lear #include <iostream> #include <zopyra/regx> using namespace std; typedef pair<const char, const char> span; int main(int ac, char* av[]) { zopyra::regx re( "(?x) # set extended flag for embedded comment fun\n" "(?:^\|[^\\d(]) # must be preceded by non-digit\n" "([(])? # match 1: possible initial left paren\n" "(\\d{3}) # match 2: area code is 3 digits\n" "(?(1)[)]) # if match1 then match right paren\n" "[ ] # area code followed by one space\n" "(\\d{3}) # match 3: prefix of 3 digits\n" "[- ] # separator is either space or dash\n" "(\\d{4}) # match 4: last 4 digits\n" "(?:\\D\|\\b) # followed by non-digit or break\n" ); string line; vector<span> lines; while (getline(cin, line)) { char* phone = new char[line.size()]; copy(line.begin(), line.end(), phone); lines.push_back(span(phone, phone + line.size())); } size_t ITER = (ac == 2 ? (atoi(av[1]) < 1 ? 1 : atoi(av[1])): 1); char num[13]; num[0] = '('; num[4] = ')'; num[5] = ' '; num[9] = '-'; size_t count = 0; while (ITER--) { vector<span>::iterator end = lines.end(); for (vector<span>::iterator i = lines.begin(); i != end; ++i) { zopyra::regx::iterator p = re.find(i->first, i->second); if (p++ != re.end()) { char* num_p = &num[1]; ++p; copy(p->first, p->second, num_p); num_p = &num[6]; ++p; copy(p->first, p->second, num_p); num_p = &num[10]; ++p; copy(p->first, p->second, num_p); if (!ITER) { cout << ++count << ": "; copy(num, num + 14, ostream_iterator<char**>(cout)); cout << '\n'; } } } } }