|
|
Regular Expression Matching |
Back to the Win32 Shootout Back to dada's perl lab |
| All Source For Regular Expression Matching |
|---|
| regexmatch.awka |
# $Id: regexmatch.gawk,v 1.2 2001/05/20 06:13:00 doug Exp $
# http://www.bagley.org/~doug/shootout/
BEGIN {
n = (ARGV[1] < 1) ? 1 : ARGV[1];
delete ARGV;
}
{ phones[p++] = $0; }
END {
for (i=0; i<n; i++) {
for (j=0; j<p; j++) {
line = phones[j];
if (match(line, /(^|[^0-9])(\([0-9][0-9][0-9]\)|[0-9][0-9][0-9]) [0-9][0-9][0-9][ -][0-9][0-9][0-9][0-9]($|[^0-9])/)) {
m1 = substr(line, RSTART, RLENGTH);
num = ""
if (match(m1, /[0-9][0-9][0-9] [0-9][0-9][0-9][ -][0-9][0-9][0-9][0-9]/)) {
if (substr(m1, RSTART-1, 1) == "(") {
break;
}
if (x = split(substr(m1, RSTART, RLENGTH), parts, /[ -]/)) {
num = "(" parts[1] ") " parts[2] "-" parts[3];
}
} else if (match(m1, /\([0-9][0-9][0-9]\) [0-9][0-9][0-9][ -][0-9][0-9][0-9][0-9]/)) {
if (x = split(substr(m1, RSTART, RLENGTH), parts, /[ -]/)) {
num = parts[1] " " parts[2] "-" parts[3];
}
}
if (i == (n-1)) {
count++;
printf("%d: %s\n", count, num);
}
}
}
}
}
|
| regexmatch.cygperl |
#!/usr/local/bin/perl
# $Id: regexmatch.perl,v 1.5 2000/10/07 08:41:43 doug Exp $
# http://www.bagley.org/~doug/shootout/
use strict;
my $re = qr{
(?: ^ | [^\d\(]) # must be preceeded by non-digit
( \( )? # match 1: possible initial left paren
(\d\d\d) # match 2: area code is 3 digits
(?(1) \) ) # if match1 then match right paren
[ ] # area code followed by one space
(\d\d\d) # match 3: prefix of 3 digits
[ -] # separator is either space or dash
(\d\d\d\d) # match 4: last 4 digits
\D # must be followed by a non-digit
}x;
my $NUM = $ARGV[0];
$NUM = 1 if ($NUM < 1);
my @phones = <STDIN>;
my $count = 0;
my $num;
while ($NUM--) {
foreach (@phones) {
if (/$re/o) {
$num = "($2) $3-$4";
if (0 == $NUM) {
$count++;
print "$count: $num\n";
}
}
}
}
|
| regexmatch.erlang |
%%% -*- mode: erlang -*-
%%% $Id: regexmatch.erlang,v 1.7 2001/01/06 22:35:23 doug Exp $
%%% http://www.bagley.org/~doug/shootout/
-module(regexmatch).
-export([main/0, main/1]).
%% get the program argument, which is how many test iterations to run
main() -> main(['1']).
main([Arg]) ->
Num = list_to_integer(atom_to_list(Arg)),
{ok, Re} = regexp:parse(
"(^|[^0-9\\(])" % preceeding non-digit or bol
"(" % area code
"\\([0-9][0-9][0-9]\\)" % is either 3 digits in parens
"|" % or
"[0-9][0-9][0-9]" % just 3 digits
")" % end of area code
" " % area code is followed by one space
"[0-9][0-9][0-9]" % exchange is 3 digits
"[ -]" % separator is either space or dash
"[0-9][0-9][0-9][0-9]" % last 4 digits
"($|[^0-9])" % must be followed by a non-digit
),
Plist = readlines(),
test(Num, Re, Plist),
halt(0).
test(1, Regexp, Plist) ->
% display output on last iteration
Nums = match_phones(Regexp, Plist),
print_phones(1, Nums),
true;
test(N, Regexp, Plist) ->
match_phones(Regexp, Plist),
test(N-1, Regexp, Plist).
print_phones(Count, [H|T]) ->
[A,E,N] = H,
% A,E,N is a list of the matching sub-expressions, which are:
% Areacode (3 digits), Exchange (3 digits), Number (4 digits)
io:fwrite("~w: (~s) ~s-~s~n", [Count, A,E,N]),
print_phones(Count+1, T);
print_phones(_, []) ->
true.
match_phones(Regexp, List) ->
mapfilter(
fun(String) ->
case regexp:matches(String, Regexp) of
{match, []} -> false;
{match, Matches} -> parse_phone(String, Matches);
_ -> false
end
end,
List).
parse_phone(Str, [H|T]) ->
{Start, Len} = H,
% Numstr is something that looks like a complete phone #
Numstr = string:substr(Str, Start, Len),
case regexp:matches(Numstr, "[0-9][0-9][0-9][0-9]*") of
{match, []} -> false;
{match, Matches} ->
lists:map(fun({Offset, Length}) ->
string:substr(Numstr, Offset, Length) end,
Matches);
_ -> false
end;
parse_phone(Str, []) -> [].
mapfilter(Fun, [H|T]) ->
case Fun(H) of
false -> mapfilter(Fun, T);
New -> [New | mapfilter(Fun, T)]
end;
mapfilter(_, []) -> [].
readlines() ->
Port = open_port({fd, 0, 1}, [eof, {line, 512}]),
readlines_from_stream([], Port).
readlines_from_stream(Lines, Port) ->
receive
{Port, eof} ->
lists:reverse(Lines);
{Port, {_, {_, Line}}} ->
readlines_from_stream([Line|Lines], Port)
end.
|
| regexmatch.gawk |
# $Id: regexmatch.gawk,v 1.2 2001/05/20 06:13:00 doug Exp $
# http://www.bagley.org/~doug/shootout/
BEGIN {
n = (ARGV[1] < 1) ? 1 : ARGV[1];
delete ARGV;
}
{ phones[p++] = $0; }
END {
for (i=0; i<n; i++) {
for (j=0; j<p; j++) {
line = phones[j];
if (match(line, /(^|[^0-9])(\([0-9][0-9][0-9]\)|[0-9][0-9][0-9]) [0-9][0-9][0-9][ -][0-9][0-9][0-9][0-9]($|[^0-9])/)) {
m1 = substr(line, RSTART, RLENGTH);
num = ""
if (match(m1, /[0-9][0-9][0-9] [0-9][0-9][0-9][ -][0-9][0-9][0-9][0-9]/)) {
if (substr(m1, RSTART-1, 1) == "(") {
break;
}
if (x = split(substr(m1, RSTART, RLENGTH), parts, /[ -]/)) {
num = "(" parts[1] ") " parts[2] "-" parts[3];
}
} else if (match(m1, /\([0-9][0-9][0-9]\) [0-9][0-9][0-9][ -][0-9][0-9][0-9][0-9]/)) {
if (x = split(substr(m1, RSTART, RLENGTH), parts, /[ -]/)) {
num = parts[1] " " parts[2] "-" parts[3];
}
}
if (i == (n-1)) {
count++;
printf("%d: %s\n", count, num);
}
}
}
}
}
|
| regexmatch.gcc |
/* -*- mode: c -*-
* $Id: regexmatch.gcc,v 1.4 2000/12/24 05:43:53 doug Exp $
* http://www.bagley.org/~doug/shootout/
*/
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdio.h>
#include <pcre.h>
#include <string.h>
#define MAXLINES 100
#define MAXLINELEN 132
char *pattern =
"(?:^|[^\\d\\(])"
"(\\()?"
"(\\d\\d\\d)"
"(?(1)\\))"
"[ ]"
"(\\d\\d\\d)"
"[ -]"
"(\\d\\d\\d\\d)"
"\\D"
;
int
main(int argc, char *argv[]) {
int NUM = ((argc == 2) ? atoi(argv[1]) : 1);
int count;
char *cptr = "";
char **phones;
pcre *re;
int erroffset;
const char *errptr;
int n, lines = 0;
char num[256];
int i, j, k, matchlen;
char *matchoffset;
int nmatches;
int *ovec, ovecsize;
pcre_extra *study;
phones = (char **)malloc(MAXLINES * sizeof(char *));
if (!phones) {
fprintf(stderr, "malloc for phones array failed\n");
exit(1);
}
lines = 0;
while (cptr) {
phones[lines] = (char *)malloc(MAXLINELEN);
if (!phones[lines]) {
fprintf(stderr, "malloc to hold line #%d failed\n", lines);
exit(1);
}
cptr = fgets(phones[lines], MAXLINELEN, stdin);
lines++;
if (lines > MAXLINES) {
fprintf(stderr, "MAXLINES is too small\n");
exit(1);
}
}
re = pcre_compile(pattern, 0, &errptr, &erroffset, NULL);
if (!re) {
fprintf(stderr, "can't open compile regexp\n");
exit(1);
}
study = pcre_study(re, 0, &errptr);
if (pcre_fullinfo(re, NULL, PCRE_INFO_CAPTURECOUNT, &nmatches) != 0) {
fprintf(stderr, "pcre_fullinfo failed\n");
exit(1);
}
nmatches++;
ovecsize = sizeof(int) * nmatches * 3;
ovec = (int *)malloc(ovecsize);
if (!ovec) {
fprintf(stderr, "malloc for ovec array failed\n");
exit(1);
}
count = 0;
while (NUM--) {
for (i=0; i<lines; i++) {
n = pcre_exec(re, study,
phones[i], strlen(phones[i]), 0,
0, ovec, ovecsize);
if (n == nmatches) {
k = 2*2;
j = 0;
num[j++] = '(';
matchoffset = phones[i] + ovec[k];
matchlen = ovec[k+1] - ovec[k];
strncpy(num+j, matchoffset, matchlen);
j += matchlen; k += 2;
num[j++] = ')';
num[j++] = ' ';
matchoffset = phones[i] + ovec[k];
matchlen = ovec[k+1] - ovec[k];
strncpy(num+j, matchoffset, matchlen);
j += matchlen; k += 2;
num[j++] = '-';
matchoffset = phones[i] + ovec[k];
matchlen = ovec[k+1] - ovec[k];
strncpy(num+j, matchoffset, matchlen);
j += matchlen; k += 2;
num[j] = 0;
if (0 == NUM) {
count++;
printf("%d: %s\n", count, num);
}
}
}
}
for (i=0; i<MAXLINES; i++) {
free(phones[i]);
}
free(phones);
free(ovec);
return(0);
}
|
| regexmatch.gforth |
\ -*- mode: forth -*-
\ $Id: regexmatch.gforth,v 1.1 2001/05/26 15:59:44 doug Exp $
\ http://www.bagley.org/~doug/shootout/
\ from Anton Ertl:
\ this uses the Gray parser generator, which is probably too big a
\ cannon for this problem (it also needs a lot of setup code).
\ Writing a recursive descent parser by hand is probably both smaller
\ and faster in this case.
0. argc @ 1- arg >number 2drop drop constant NUM
warnings off \ Gray is a little wordy
require gray.fs
: slurp-fid { fid -- addr u }
0 0 begin
dup 1024 + dup >r extend-mem
rot r@ fid read-file throw
r> 2dup =
while
2drop
repeat
- + dup >r resize throw r> ;
: bit-equiv
\ w3=~w1^w2
invert xor ;
: set-complement
empty ['] bit-equiv binary-set-operation ;
variable input \ pointer to next character to be scanned
variable end-input \ pointer to end of input
-1 constant eof-char
: start
input @ ;
: end
input @ over - ;
: get-input
start end-input @ = if
eof-char
else
start c@
endif ;
256 max-member
s" scan failed" exception constant scanfail
: ?nextchar
0= scanfail and throw
1 chars input +! ;
: testchar?
get-input member? ;
' testchar? test-vector !
: ..
empty copy-set
swap 1+ rot do
i over add-member
loop ;
: `
\ creates anonymous terminal for the character c )
char singleton ['] ?nextchar make-terminal ;
char 0 char 9 .. dup ' ?nextchar terminal digit
set-complement ' ?nextchar terminal nondigit
bl singleton ' ?nextchar terminal lspace
2variable areacode
2variable exchange
2variable last4
)
<- area-code
|| area-code ))
lspace {{ start }} digit digit digit {{ end exchange 2! }}
)
{{ start }} digit digit digit digit {{ end last4 2! }}
nondigit
)) <- telnum
telnum parser scan-telnum
: scan-for-nondigit
begin
count >r
r@ '0 < r@ '9 > or r> '( <> and
over end-input @ u>= or
until ;
variable count 0 count !
: scanfile
over + end-input !
begin
dup input !
['] scan-telnum catch
dup dup scanfail <> and throw
if
scan-for-nondigit
else
1 count +! count @ 1 u.r ." : "
." " exchange 2@ type ." -" last4 2@ type
cr
end-input @ over - #lf scan drop \ skip rest of line
endif
dup end-input @ u>=
until
drop ;
: mainloop
['] 2drop [is] type
NUM 1 +do
2dup scanfile
loop
['] [is] type
scanfile ;
stdin slurp-fid mainloop bye
|
| regexmatch.guile |
#!/usr/local/bin/guile \
-e main -s
!#
;;; $Id: regexmatch.guile,v 1.6 2001/06/29 23:12:37 doug Exp $
;;; http://www.bagley.org/~doug/shootout/
(use-modules (ice-9 format))
(use-modules (ice-9 regex))
(define regexp
(string-append
"(^|[^0-9\\(])" ; (1) preceeding non-digit or bol
"(" ; (2) area code
"\\(([0-9][0-9][0-9])\\)" ; (3) is either 3 digits in parens
"|" ; or
"([0-9][0-9][0-9])" ; (4) just 3 digits
")" ; end of area code
" " ; area code is followed by one space
"([0-9][0-9][0-9])" ; (5) exchange is 3 digits
"[ -]" ; separator is either space or dash
"([0-9][0-9][0-9][0-9])" ; (6) last 4 digits
"([^0-9]|$)" ; must be followed by a non-digit
))
(define (main args)
(let ((n (or (and (= (length args) 2) (string->number (cadr args))) 1))
(phonelines '())
(rx (make-regexp regexp))
(count 0))
(let loop ((line (read-line)))
(cond ((eof-object? line) #f)
(else
(set! phonelines (append phonelines (list line)))
(loop (read-line)))))
(while (> n 0)
(set! n (- n 1))
(let loop ((phones phonelines)
(count 0))
(if (null? phones)
count
(let ((match (regexp-exec rx (car phones))))
(if match
(let* ((area (if (match:start match 3)
(match:substring match 3)
(match:substring match 4)))
(exch (match:substring match 5))
(numb (match:substring match 6))
(num (string-append "(" area ") " exch "-" numb)))
(set! count (+ count 1))
(if (= 0 n)
(display (format "~D: ~a\n" count num)))))
(loop (cdr phones) count)))))))
|
| regexmatch.ici |
// $Id: regexmatch.ici,v 1.0 2003/01/03 12:07:00 dada Exp $
// http://dada.perl.it/shootout
//
// contributed by Tim Long
n := argv[1] ? int(argv[1]) : 1;
lines = gettokens(stdin, '\n', "");
j = 0;
while (--n)
{
forall (l in lines)
{
a = l ~~~ #^[^\d(]*(?:\((\d\d\d)\)|(\d\d\d)) (\d\d\d)[ -](\d\d\d\d)(?:\D|$)#;
if (n == 1 && a)
printf("%d: (%s%s) %s-%s\n", ++j, a[0], a[1], a[2], a[3]);
}
}
/*
* The second last line of the test data is "foo (213 222-2222 bar", which
* by my reading of the spec should match. But it is in the "shouldn't match"
* section and no other programs matches it. So the above regexp is tailored
* not to match it too.
*/
|
| regexmatch.java |
// $Id: regexmatch.java,v 1.2 2003/10/13 16:50:21 tomk Exp $
// http://www.bagley.org/~doug/shootout/
// contributed by Tom Kludy
import java.io.*;
import java.util.*;
import java.util.regex.*;
public class regexmatch {
public static void main(String args[])
throws IOException, PatternSyntaxException {
int n = (args.length > 0) ? Integer.parseInt(args[0]) : 1;
LinkedList lines = new LinkedList();
BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
String line;
while( (line = in.readLine()) != null )
lines.addLast(line);
in.close();
Pattern pattern = Pattern.compile(
"(?:^|[^\\d\\(])"+ // must be preceeded by non-digit
"(?:\\((\\d\\d\\d)\\)|(\\d\\d\\d))"+// area code is 3 digits (match 1&2)
"[ ]"+ // area code followed by one space
"(\\d\\d\\d)"+ // match 3: prefix of 3 digits
"[ -]"+ // separator is either space or dash
"(\\d\\d\\d\\d)"+ // match 4: last 4 digits
"(?:\\D|$)" // must be followed by a non-digit
);
int count = 0;
while(--n >= 0) {
for (ListIterator li = lines.listIterator(); li.hasNext();) {
Matcher matcher = pattern.matcher((String)li.next());
if (matcher.find()) {
StringBuffer num = new StringBuffer("(");
String areaCode = matcher.group(1);
if ( areaCode == null )
areaCode = matcher.group(2);
num.append(areaCode).append(") ").append(matcher.group(3))
.append("-").append(matcher.group(4));
if ( n == 0 )
System.out.println(++count + ": " + num);
}
}
}
}
}
|
| regexmatch.lua |
-- $Id: regexmatch.lua,v 1.4 2000/12/09 20:07:45 doug Exp $
-- http://www.bagley.org/~doug/shootout/
-- implemented by: Roberto Ierusalimschy
text = read("*a")
-- I added the following line and slightly modified the pattern
-- match below so that the program will reject the case:
-- 1(111) 111-1111 (due to preceeding digit)
-- (Doug)
text = gsub(gsub(text, "^", " "), "\n", "\n ")
N = tonumber((arg and arg[1])) or 1
count = 0
while N > 0 do
gsub(text,"%D(%D)(%d%d%d)(%)?) (%d%d%d)[- ](%d+)",
function (A,area,B,exch,digits)
if (A == '(') == (B == ')') and strlen(digits) == 4 then
local tel = "("..area..") "..exch.."-"..digits
if N == 1 then
count = count+1
write(count, ": ", tel, "\n")
end
end
end)
N = N-1
end
|
| regexmatch.lua5 |
-- $Id: regexmatch.lua,v 1.4 2000/12/09 20:07:45 doug Exp $
-- http://www.bagley.org/~doug/shootout/
-- contributed by Roberto Ierusalimschy
local text = io.read("*a")
-- make sure text does not start with a number
text = "\n" .. text
-- pattern is: not a digit, optional (, 3 digits, optional ),
-- space, 3 digits, space or hyphen, 4 digits, not a digit
local pattern = "%D(%(?)(%d%d%d)(%)?) (%d%d%d)[- ](%d%d%d%d)%f[%D]"
local N = tonumber((arg and arg[1])) or 1
local count = 0
for i=N,1,-1 do
for open,area,close,exch,digits in string.gfind(text, pattern) do
if (open == '(') == (close == ')') then
local tel = "("..area..") "..exch.."-"..digits
if i == 1 then
count = count+1
io.write(count, ": ", tel, "\n")
end
end
end
end
|
| regexmatch.mawk |
# $Id: regexmatch.mawk,v 1.2 2001/05/20 06:13:00 doug Exp $
# http://www.bagley.org/~doug/shootout/
BEGIN {
n = (ARGV[1] < 1) ? 1 : ARGV[1];
delete ARGV;
}
{ phones[p++] = $0; }
END {
for (i=0; i<n; i++) {
for (j=0; j<p; j++) {
line = phones[j];
if (match(line, /(^|[^0-9])(\([0-9][0-9][0-9]\)|[0-9][0-9][0-9]) [0-9][0-9][0-9][ -][0-9][0-9][0-9][0-9]($|[^0-9])/)) {
m1 = substr(line, RSTART, RLENGTH);
num = ""
if (match(m1, /[0-9][0-9][0-9] [0-9][0-9][0-9][ -][0-9][0-9][0-9][0-9]/)) {
if (substr(m1, RSTART-1, 1) == "(") {
break;
}
if (x = split(substr(m1, RSTART, RLENGTH), parts, /[ -]/)) {
num = "(" parts[1] ") " parts[2] "-" parts[3];
}
} else if (match(m1, /\([0-9][0-9][0-9]\) [0-9][0-9][0-9][ -][0-9][0-9][0-9][0-9]/)) {
if (x = split(substr(m1, RSTART, RLENGTH), parts, /[ -]/)) {
num = parts[1] " " parts[2] "-" parts[3];
}
}
if (i == (n-1)) {
count++;
printf("%d: %s\n", count, num);
}
}
}
}
}
|
| regexmatch.ocaml |
(*
* $Id: regexmatch.ocaml,v 1.4 2001/01/08 13:19:11 doug Exp $
* http://www.bagley.org/~doug/shootout/
* from: Markus Mottl
*)
open Pcre
let rex =
regexp ~flags:[`EXTENDED]
"(?: ^ | [^\d\(]) # must be preceeded by non-digit
(\(\d\d\d\)|\d\d\d) # match 1: area code
[ ] # area code followed by one space
\d\d\d # prefix of 3 digits
[ -] # separator is either space or dash
\d\d\d\d # last 4 digits
(?: \D|$) # must be followed by a non-digit (or EOL)"
let phones =
let lines = ref [] in
foreach_line (fun line -> lines := line :: !lines);
List.rev !lines
let check_phone irflags ar cnt must_print line =
try
unsafe_pcre_exec irflags rex 0 line 4 ar;
let num = String.copy "(...) ...-...." in
let pos = Array.unsafe_get ar 2 in
let ofs = if String.unsafe_get line pos = '(' then 1 else 0 in
let pos = pos + ofs in
String.unsafe_blit line pos num 1 3;
let pos = pos + ofs + 4 in
String.unsafe_blit line pos num 6 3;
String.unsafe_blit line (pos + 4) num 10 4;
if must_print then Printf.printf "%d: %s\n" !cnt num;
incr cnt
with Not_found -> ()
let _ =
let n =
try int_of_string Sys.argv.(1)
with Invalid_argument _ -> 1 in
for i = 2 to n do
List.iter (check_phone (rflags []) (Array.create 6 0) (ref 1) false) phones
done;
List.iter (check_phone (rflags []) (Array.create 6 0) (ref 1) true) phones
|
| regexmatch.ocamlb |
(*
* $Id: regexmatch.ocaml,v 1.4 2001/01/08 13:19:11 doug Exp $
* http://www.bagley.org/~doug/shootout/
* from: Markus Mottl
*)
open Pcre
let rex =
regexp ~flags:[`EXTENDED]
"(?: ^ | [^\d\(]) # must be preceeded by non-digit
(\(\d\d\d\)|\d\d\d) # match 1: area code
[ ] # area code followed by one space
\d\d\d # prefix of 3 digits
[ -] # separator is either space or dash
\d\d\d\d # last 4 digits
(?: \D|$) # must be followed by a non-digit (or EOL)"
let phones =
let lines = ref [] in
foreach_line (fun line -> lines := line :: !lines);
List.rev !lines
let check_phone irflags ar cnt must_print line =
try
unsafe_pcre_exec irflags rex 0 line 4 ar;
let num = String.copy "(...) ...-...." in
let pos = Array.unsafe_get ar 2 in
let ofs = if String.unsafe_get line pos = '(' then 1 else 0 in
let pos = pos + ofs in
String.unsafe_blit line pos num 1 3;
let pos = pos + ofs + 4 in
String.unsafe_blit line pos num 6 3;
String.unsafe_blit line (pos + 4) num 10 4;
if must_print then Printf.printf "%d: %s\n" !cnt num;
incr cnt
with Not_found -> ()
let _ =
let n =
try int_of_string Sys.argv.(1)
with Invalid_argument _ -> 1 in
for i = 2 to n do
List.iter (check_phone (rflags []) (Array.create 6 0) (ref 1) false) phones
done;
List.iter (check_phone (rflags []) (Array.create 6 0) (ref 1) true) phones
|
| regexmatch.perl |
#!/usr/local/bin/perl
# $Id: regexmatch.perl,v 1.5 2000/10/07 08:41:43 doug Exp $
# http://www.bagley.org/~doug/shootout/
use strict;
my $re = qr{
(?: ^ | [^\d\(]) # must be preceeded by non-digit
( \( )? # match 1: possible initial left paren
(\d\d\d) # match 2: area code is 3 digits
(?(1) \) ) # if match1 then match right paren
[ ] # area code followed by one space
(\d\d\d) # match 3: prefix of 3 digits
[ -] # separator is either space or dash
(\d\d\d\d) # match 4: last 4 digits
\D # must be followed by a non-digit
}x;
my $NUM = $ARGV[0];
$NUM = 1 if ($NUM < 1);
my @phones = <STDIN>;
my $count = 0;
my $num;
while ($NUM--) {
foreach (@phones) {
if (/$re/o) {
$num = "($2) $3-$4";
if (0 == $NUM) {
$count++;
print "$count: $num\n";
}
}
}
}
|
| regexmatch.pike |
#!/usr/local/bin/pike// -*- mode: pike -*-
// $Id: regexmatch.pike,v 1.2 2000/12/05 16:04:06 doug Exp $
// http://www.bagley.org/~doug/shootout/
// from: Fredrik Noring
constant area = "([0-9][0-9][0-9]|\\([0-9][0-9][0-9]\\))";
constant exch = "([0-9][0-9][0-9])";
constant last = "([0-9][0-9][0-9][0-9])";
void main(int argc, array(string) argv)
{
Regexp r = Regexp("^[^0-9\\(]*"+area+" "+exch+"[ -]"+last+"[^0-9]*$");
array(string) phones = Stdio.stdin->read()/"\n";
int n = (int)argv[-1];
int count = 0;
while(n--)
foreach(phones, string phone)
if(array(string) parts = r->split(phone))
if(n == 0)
if(parts[0][0] == '(')
write("%d: %s %s-%s\n", ++count, @parts);
else
write("%d: (%s) %s-%s\n", ++count, @parts);
}
|
| regexmatch.poplisp |
;;; -*- mode: lisp -*-
;;; $Id: regexmatch.cmucl,v 1.1 2001/06/13 19:45:20 doug Exp $
;;; http://www.bagley.org/~doug/shootout/
;;; from Jochen Schmidt
(proclaim '(optimize (speed 3)(safety 0)(space 0)(debug 0)(compilation-speed 0)))
(setf ext:*bytes-consed-between-gcs* 5000000)
(use-package :meta)
(eval-when (compile load eval)
(meta:enable-meta-syntax)
(deftype digit () '(member #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9))
(deftype non-digit () '(not (member #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\( #\) ))))
(defun parse-tel (input)
(meta:with-string-meta (buffer input)
(let (last-result)
(declare (type (or null simple-base-string) last-result))
(labels ((skip-non-digits (&aux d)
(meta:match $[@(non-digit d)]))
(digit-triplet (&aux (old-index index) d (result (make-array 3 :element-type 'base-char)))
(declare (type simple-base-string result))
(or (meta:match [@(digit d) !(setf (schar result 0) d)
@(digit d) !(setf (schar result 1) d)
@(digit d) !(setf (schar result 2) d)
!(setf last-result result)])
(progn (setf index old-index) nil)))
(digit-4tupel (&aux (old-index index) d (result (make-array 4 :element-type 'base-char)))
(declare (type simple-base-string result))
(or (meta:match [@(digit d) !(setf (schar result 0) d)
@(digit d) !(setf (schar result 1) d)
@(digit d) !(setf (schar result 2) d)
@(digit d) !(setf (schar result 3) d)
!(setf last-result result)])
(progn (setf index old-index) nil)))
(telephone-nr (&aux area-code exchange d)
(declare (type (or null simple-base-string) area-code exchange))
(and (meta:match [!(skip-non-digits)
{[#\( !(digit-triplet) #\)] !(digit-triplet)} !(setf area-code last-result)
#\space !(digit-triplet) !(setf exchange last-result)
{#\space #\-} !(digit-4tupel) {@(non-digit d) !(= index end)}])
(values area-code exchange last-result))))
(telephone-nr)))))
(let ((n (parse-integer (or (car pop11::poparglist) "1")))
(input (loop for line = (read-line *standard-input* nil 'eof)
until (eq line 'eof) collect line)))
(loop for i of-type fixnum from 1 below n do
(loop for line of-type simple-base-string in input
do (parse-tel line)))
(loop with i of-type fixnum = 0
for line of-type string in input
do (multiple-value-bind (area-code exchange rest) (parse-tel line)
(when area-code
(format t "~A: (~A) ~A-~A~%" (incf i) area-code exchange rest)))))
|
| regexmatch.python |
#!/usr/pkg/bin/python
# $Id: regexmatch.python,v 1.9 2001/02/05 19:02:37 doug Exp $
# http://www.bagley.org/~doug/shootout/
import sys, re
def main():
NUM = int(sys.argv[1])
if NUM < 1:
NUM = 1
phones = sys.stdin.readlines()
rx = re.compile(
r'(?:^|[^\d\(])'
r'(?:\((\d\d\d)\)|(\d\d\d))'
r'[ ]'
r'(\d\d\d)'
r'[ -]'
r'(\d\d\d\d)'
r'\D'
)
findIt = rx.search
count = 0
for i in xrange(0,NUM):
for line in phones:
m = findIt(line)
if m:
g = m.group
num = "(" + (g(1) or g(2)) + ") " + g(3) + "-" + g(4)
if 0 == i:
count = count + 1
print "%d: %s" % (count, num)
main()
|
| regexmatch.ruby |
#!/usr/local/bin/ruby
# -*- mode: ruby -*-
# $Id: regexmatch.ruby,v 1.11 2001/07/02 04:26:31 doug Exp $
# http://www.bagley.org/~doug/shootout/
re = Regexp.new(
'(?:^|[^\d\(])' + # must be preceeded by non-digit
'(?:\((\d\d\d)\)|(\d\d\d))' + # match 1 or 2: area code is 3 digits
'[ ]' + # area code followed by one space
'(\d\d\d)' + # match 3: prefix of 3 digits
'[ -]' + # separator is either space or dash
'(\d\d\d\d)' + # match 4: last 4 digits
'\D' # must be followed by a non-digit
)
NUM = Integer(ARGV[0] || 1)
phones = STDIN.readlines
count = m = line = iter = 0
for iter in 1..NUM
for line in phones
if m = re.match(line)
m1 = m[1];
if m1 == ""
m1 = m[2];
end
num = '(' + m1 + ') ' + m[3] + '-' + m[4];
if iter == NUM
count += 1
puts "#{count}: #{num}"
end
end
end
end
|
| regexmatch.smlnj |
(* -*- mode: sml -*-
* $Id: regexmatch.smlnj,v 1.1 2001/07/10 13:04:21 doug Exp $
* http://www.bagley.org/~doug/shootout/
* from Stephen Weeks
* "ported" to SML/NJ
* with help from Daniel Wang
*)
structure Test : sig
val main : (string * string list) -> OS.Process.status
end = struct
fun ++ (r: int ref): int =
let
val n = 1 + !r
val _ = r := n
in n
end
structure Int =
struct
open Int
type t = int
fun exists (start: t, stop: t, f: t -> bool): bool =
let
fun loop i = i < stop andalso (f i orelse loop (i + 1))
in
loop start
end
fun forall (start, stop, f) = not (exists (start, stop, not o f))
fun fold (start: t, stop: t, a: 'a, f: t * 'a -> 'a): 'a =
let
fun loop (i: t, a: 'a) =
if i >= stop
then a
else loop (i + 1, f (i, a))
in loop (start, a)
end
fun for (start: t, stop: t, f: t -> unit): unit =
let
fun loop i =
if i >= stop
then ()
else (f i; loop (i + 1))
in
loop start
end
end
structure Array2 =
struct
open Array2
type 'a t = 'a array
val new = array
fun tabulate (r, c, f) = Array2.tabulate RowMajor (r, c, f)
fun foreachi (a, f) =
appi RowMajor f {base = a, row = 0, col = 0,
nrows = NONE, ncols = NONE}
end
structure Vector =
struct
open Vector
fun exists (v, f) =
Int.exists (0, length v, fn i => f (sub (v, i)))
fun foreach (v, f) = app f v
fun foreachi (v, f) = appi f (v, 0, NONE)
fun new (n, x) = tabulate (n, fn _ => x)
fun new1 x = new (1, x)
end
structure List =
struct
open List
fun foreach (l, f) = app f l
fun fold (l, b, f) = foldl f b l
fun appendRev (l1, l2) = fold (l1, l2, op ::)
fun push (r, x) = r := x :: !r
fun keepAll (l, f) = filter f l
fun peek (l, f) = find f l
fun insert (l, x, op <=) =
let
fun loop (l, ac) =
case l of
[] => appendRev (ac, [x])
| x' :: l' =>
if x <= x'
then appendRev (ac, x :: l)
else loop (l', x' :: ac)
in loop (l, [])
end
end
structure Array =
struct
open Array
val new = array
fun modify (a, f) = Array.modify f a
fun foreachi (a, f) = appi f (a, 0, NONE)
fun indices (a: bool array): int vector =
let
val n = Array.length a
val numTrue =
let
fun loop (i, count) =
if i = n
then count
else loop (i + 1,
if Array.sub (a, i)
then count + 1
else count)
in loop (0, 0)
end
val next = ref 0
fun loop i =
if Array.sub (a, i)
then (next := i + 1; i)
else loop (i + 1)
in Vector.tabulate (numTrue, fn _ => loop (!next))
end
end
structure Char =
struct
open Char
val fromInt = chr
val toInt = ord
end
structure String =
struct
open String
type t = string
fun contains (s: t, c: char): bool =
Int.exists (0, size s, fn i => c = sub (s, i))
end
val numChars: int = 128
structure Regexp =
struct
datatype t =
AnchorStart
| CharSet of char -> bool
| Or of t list
| Seq of t list
| Star of t
end
structure Stack:
sig
type 'a t
val clear: 'a t -> unit
val exists: 'a t * ('a -> bool) -> bool
val foreach: 'a t * ('a -> unit) -> unit
val new: int * 'a -> 'a t
val push: 'a t * 'a -> unit
end =
struct
datatype 'a t = T of {elts: 'a array,
size: int ref}
fun new (size: int, dummy: 'a): 'a t =
T {elts = Array.new (size, dummy),
size = ref 0}
fun push (T {elts, size}, x) =
let
val n = !size
val _ = Array.update (elts, n, x)
val _ = size := n + 1
in ()
end
fun exists (T {elts, size, ...}, f) =
Int.exists (0, !size, fn i => f (Array.sub (elts, i)))
fun foreach (T {elts, size}, f) =
Int.for (0, !size, fn i => f (Array.sub (elts, i)))
fun clear (T {size, ...}) = size := 0
end
structure NFA:
sig
(* The states in an NFA are indexed from 0 to n-1, where n is the number
* of states.
*)
type state = int
(* State i is final iff Array.sub (final, i).
* The outgoing states from state i on input char c are given by
* Array2.sub (next, i, Char.ord c).
* anchorStarts is sorted in increasing order of state index.
*)
datatype t = T of {anchorStarts: state list,
final: bool array,
seen: bool array,
stack1: int Stack.t,
stack2: int Stack.t,
start: state,
next: state vector Array2.t}
val fromRegexp: Regexp.t -> t
val match: {nfa: t,
string: string,
startPos: int,
anchorStart: bool} -> int option
val numStates: t -> int
end =
struct
type state = int
datatype t = T of {anchorStarts: state list,
final: bool array,
seen: bool array,
stack1: int Stack.t,
stack2: int Stack.t,
start: state,
next: state vector Array2.t}
fun numStates (T {next, ...}) = Array2.nRows next
(* Simulating an NFA with two stacks and a bit vector, as in Algorithm 3.4
* (page 126) of Compilers: Principles, Techniques, and Tools by Aho,
* Sethi, and Ullman.
*)
fun match {anchorStart: bool,
nfa as T {anchorStarts, final, seen, stack1, stack2, start,
next},
startPos,
string = s}: int option =
let
val numStates = numStates nfa
val n = String.size s
val _ = Array.modify (seen, fn _ => false)
fun loop (current: state Stack.t,
nextStates: state Stack.t,
i: int,
last: int option): int option =
let
val last =
if Stack.exists (current, fn s => Array.sub (final, s))
then SOME i
else last
in
if numStates = 0 orelse i = n
then (Stack.clear stack1
; Stack.clear stack2
; last)
else
let
val _ = Array.modify (seen, fn _ => false)
val c = Char.toInt (String.sub (s, i))
val _ =
Stack.foreach (current, fn s =>
Vector.foreach
(Array2.sub (next, s, c),
fn s' =>
if Array.sub (seen, s')
then ()
else (Array.update (seen, s', true)
; Stack.push (nextStates, s'))))
val _ = Stack.clear current
in loop (nextStates, current, i + 1, last)
end
end
val _ = Stack.push (stack1, start)
val _ =
if anchorStart
then List.foreach (anchorStarts, fn s =>
Stack.push (stack1, s))
else ()
in
loop (stack1, stack2, startPos, NONE)
end
(* This conversion from a regular expression to an NFA is based on
* Section 3.9 (pages 134 -- 140) of Compilers: Principles, Techniques,
* and Tools by Aho, Sethi, and Ullman.
*
* It creates one NFA state for each CharSet (called a "position") that is
* in the regexp. There is also one extra state for the start state.
* It adds edges as in rules 1 and 2 (page 138) for the followpos function.
*)
fun fromRegexp (r: Regexp.t): t =
let
fun loop (r, ac) =
let open Regexp
in case r of
AnchorStart => ac + 1
| CharSet _ => ac + 1
| Or rs => List.fold (rs, ac, loop)
| Seq rs => List.fold (rs, ac, loop)
| Star r => loop (r, ac)
end
val numPos = loop (r, 0)
val numStates = numPos + 1
val start = numPos
val posCounter = ref ~1
val follow = Array2.new (numStates, numStates, false)
val posChars = Array2.tabulate (numPos, numChars, fn _ => false)
local
datatype t = T of bool vector
in
fun contains (T v, s) = Vector.sub (v, s)
val empty: t = T (Vector.new (numPos, false))
fun union (T v, T v'): t =
T (Vector.tabulate (numPos, fn i =>
Vector.sub (v, i)
orelse Vector.sub (v', i)))
fun singleton (i: int): t =
T (Vector.tabulate (numPos, fn j => i = j))
fun foreach (T v, f) =
Vector.foreachi (v, fn (i, b) => if b then f i else ())
end
val anchorStarts = ref []
fun loop r =
case r of
Regexp.AnchorStart =>
let
val i = ++ posCounter
val _ = List.push (anchorStarts, i)
val first = singleton i
in
{first = first,
last = first,
nullable = false}
end
| Regexp.CharSet f =>
let
val i = ++ posCounter
val _ =
Int.for (0, numChars, fn c =>
if f (Char.chr c)
then Array2.update (posChars, i, c, true)
else ())
val first = singleton i
in {first = first,
last = first,
nullable = false}
end
| Regexp.Or rs =>
List.fold
(rs, {first = empty,
last = empty,
nullable = false},
fn (r, {first = f, last = l, nullable = n}) =>
let
val {first = f', last = l', nullable = n'} =
loop r
in
{first = union (f, f'),
last = union (l, l'),
nullable = n orelse n'}
end)
| Regexp.Seq rs =>
List.fold
(rs, {first = empty,
last = empty,
nullable = true},
fn (r, {first = f, last = l, nullable = n}) =>
let
val {first = f', last = l', nullable = n'} =
loop r
val _ =
foreach
(l, fn s =>
foreach
(f', fn s' => Array2.update (follow, s, s', true)))
in
{first = if n then union (f, f') else f,
last = if n' then union (l, l') else l',
nullable = n andalso n'}
end)
| Regexp.Star r =>
let
val {first = f, last = l, nullable = n} = loop r
val _ =
foreach
(l, fn s =>
foreach
(f, fn s' => Array2.update (follow, s, s', true)))
in
{first = f, last = l, nullable = true}
end
val {first, last, nullable} = loop r
(* Any anchor starts in first should be anchor starts.
* This also reverses anchorStarts so they are in order.
*)
val anchorStarts =
List.fold (!anchorStarts, [], fn (s, ac) =>
if contains (first, s) then s :: ac else ac)
val _ = foreach (first, fn i =>
Array2.update (follow, start, i, true))
val final = Array.array (numStates, false)
val _ = foreach (last, fn i => Array.update (final, i, true))
val _ = if nullable then Array.update (final, start, true) else ()
val a = Array.new (numStates, false)
val next =
Array2.tabulate
(numStates, numChars, fn (i, c) =>
let
val _ =
Int.for (0, numStates, fn j => Array.update (a, j, false))
val _ =
Int.for
(0, numPos, fn j =>
if Array2.sub (follow, i, j)
andalso Array2.sub (posChars, j, c)
then Array.update (a, j, true)
else ())
in Array.indices a
end)
in
T {anchorStarts = anchorStarts,
final = final,
next = next,
seen = Array.new (numStates, false),
stack1 = Stack.new (numStates, ~1),
stack2 = Stack.new (numStates, ~1),
start = start}
end
end
structure DFA:
sig
type t
val fromNFA: NFA.t -> t
val match: {dfa: t,
string: string,
startPos: int,
anchorStart: bool} -> int option
val minimize: t -> t
end =
struct
(* The states in a DFA are indexed from 0 to n-1, where n is the number
* of states.
*)
type state = int
(* State i is final iff Array.sub (final, i).
* The outgoing state from state i on input char c is
* Array2.sub (next, i, Char.ord c).
*)
datatype t = T of {anchorStart: state,
dead: bool array,
final: bool array,
next: state Array2.t,
start: state}
fun numStates (T {next, ...}): int = Array2.nRows next
fun match {dfa as T {anchorStart = ancSt, dead, final, start, next},
string as s,
startPos: int,
anchorStart: bool}: int option =
let
val n = String.size s
fun loop (i: int, state: int, last: int option): int option =
let
val last =
if Array.sub (final, state)
then SOME i
else last
in
if Array.sub (dead, state) orelse i = n
then last
else loop (i + 1,
Array2.sub (next, state,
Char.toInt (String.sub (s, i))),
last)
end
in loop (startPos,
if anchorStart then ancSt else start,
NONE)
end
fun dead (numStates, final, next) =
Array.tabulate
(numStates, fn i =>
not (Array.sub (final, i))
andalso Int.forall (0, numChars, fn c =>
i = Array2.sub (next, i, c)))
(* This DFA minimization algorithm is based on algorithm 3.6 (page 142)
* of Compilers: Principles, Techniques, and Tools by Aho, Sethi, and
* Ullman.
*
* It maintains an array, r, that stores for each state s the
* representative of the class to which s belongs.
* It repeatedly refines an equivalence relation, represented by a list
* of classes, where each class is a list of states (i.e. ints).
*)
fun minimize (dfa as T {anchorStart, final, start, next, ...}): t =
let
val numStates = numStates dfa
type class = int list
type classes = class list
val repCounter = ref ~1
val change = ref false
fun newRep () = (change := true; ++ repCounter)
val finRep = newRep ()
val nonfinRep = newRep ()
val r = Array.tabulate (numStates, fn i =>
if Array.sub (final, i)
then finRep
else nonfinRep)
fun rep s = Array.sub (r, s)
fun trans (s, c) = rep (Array2.sub (next, s, c))
fun refine (class: class, ac: classes): classes =
let
val r =
List.fold
(class, [], fn (state, classes) =>
let
fun loop (classes, ac) =
case classes of
[] =>
(case ac of
[] => [{class = [state],
old = state}]
| _ =>
let
val s = newRep ()
val _ = Array.update (r, state, s)
in {class = [state],
old = state} :: ac
end)
| (z as {class, old}) :: classes =>
if Int.forall
(0, numChars, fn c =>
trans (old, c) = trans (state, c))
then
(Array.update (r, state, rep old)
; {class = state :: class,
old = old} :: (List.appendRev
(classes, ac)))
else loop (classes, z :: ac)
in loop (classes, [])
end)
in List.fold (r, ac, fn ({class, ...}, ac) =>
case class of
[_] => ac
| _ => class :: ac)
end
fun refineAll (classes: classes): unit =
case classes of
[] => ()
| _ =>
let
val _ = change := false
val classes =
List.fold (classes, [], fn (class, ac) =>
case class of
[_] => ac
| _ => refine (class, ac))
in if !change
then refineAll classes
else ()
end
val (fin, nonfin) =
Int.fold (0, numStates, ([], []), fn (i, (f, n)) =>
if Array.sub (final, i)
then (i :: f, n)
else (f, i :: n))
val _ = refineAll [fin, nonfin]
val numStates' = 1 + !repCounter
val reached = Array.new (numStates', false)
fun visit (s: int ): unit =
let
val s' = rep s
in
if Array.sub (reached, s')
then ()
else (Array.update (reached, s', true)
; Int.for (0, numChars, fn c =>
visit (Array2.sub (next, s, c))))
end
val _ = visit start
val _ = visit anchorStart
val c = ref ~1
val newR = Array.tabulate (numStates', fn s =>
if Array.sub (reached, s)
then ++ c
else ~1)
val numStates' = 1 + !c
val _ = Array.modify (r, fn s => Array.sub (newR, s))
val next' = Array2.new (numStates', numChars, ~1)
val _ =
Array2.foreachi
(next, fn (s, c, s') =>
Array2.update (next', rep s, c, rep s'))
val final' = Array.array (numStates', false)
val _ =
Array.foreachi
(final, fn (i, b) =>
if b then Array.update (final', rep i, true) else ())
in T {anchorStart = rep anchorStart,
dead = dead (numStates', final', next'),
final = final',
start = rep start,
next = next'}
end
(* This is the usual "subset construction", as in algorithm 3.2 (page 118)
* of Compilers: Principles, Techniques, and Tools by Aho, Sethi, and
* Ullman.
*
* It associates each (reachable) set of states in the nfa with a single
* state in the DFA.
*)
fun fromNFA (nfa as NFA.T {anchorStarts, final, start, next, ...}) =
let
type states = state vector
val counter = ref ~1
type work = {states: states,
state: int,
out: int vector option ref}
val cache: work list ref = ref []
val todo: work list ref = ref []
fun statesToState (ss: states): int =
case List.peek (!cache, fn {states, ...} => ss = states) of
NONE =>
let
val state = ++ counter
val work = {out = ref NONE,
state = state,
states = ss}
val _ = List.push (cache, work)
val _ = List.push (todo, work)
in
state
end
| SOME {state, ...} => state
local
val seen = Array.array (NFA.numStates nfa, false)
in
fun loop () =
case !todo of
[] => ()
| {states, out, ...} :: rest =>
(todo := rest
; out := (SOME
(Vector.tabulate
(numChars, fn c =>
let
val _ =
Array.modify (seen, fn _ => false)
val _ =
Vector.foreach
(states, fn s =>
Vector.foreach
(Array2.sub (next, s, c), fn s' =>
Array.update (seen, s', true)))
in statesToState (Array.indices seen)
end)))
; loop ())
end
val start' = statesToState (Vector.new1 start)
val anchorStart' =
statesToState
(Vector.fromList (List.insert (anchorStarts, start, op <=)))
val _ = loop ()
val numStates = 1 + !counter
val next' = Array2.new (numStates, numChars, ~1)
val final' = Array.new (numStates, false)
val _ =
List.foreach
(!cache, fn {states, state = i, out, ...}: work =>
let
val _ =
Vector.foreachi
(valOf (! out), fn (c, j) =>
Array2.update (next', i, c, j))
val _ =
if Vector.exists (states, fn s => Array.sub (final, s))
then Array.update (final', i, true)
else ()
in ()
end)
val dead' = dead (numStates, final', next')
in T {anchorStart = anchorStart',
dead = dead',
final = final',
start = start',
next = next'}
end
end
structure Regexp:
sig
structure Compiled:
sig
type t
val find: t * string -> {start: int, length: int} option
end
type t
val anchorStart: t
val any: t
val char: char -> t
val compileDFA: t -> Compiled.t
val compileNFA: t -> Compiled.t
val digit: t
val nonDigit: t
val notOneOf: string -> t
val oneOf: string -> t
val or: t list -> t
val seq: t list -> t
val star: t -> t
end =
struct
open Regexp
val anchorStart = AnchorStart
val isChar = CharSet
fun isNotChar f = isChar (not o f)
fun char c = isChar (fn c' => c = c')
val or = Or
val seq = Seq
val star = Star
val any = isChar (fn _ => true)
fun oneOf s = isChar (fn c => String.contains (s, c))
fun notOneOf s = isNotChar (fn c => String.contains (s, c))
val digs = "0123456789"
val digit = oneOf digs
val nonDigit = notOneOf digs
val empty = Or []
val emptyString = Seq []
structure Compiled =
struct
datatype t =
DFA of DFA.t
| NFA of NFA.t
fun find (c: t, s: string) =
let
val n = String.size s
fun loop (i: int, anchorStart: bool) =
if i = n
then NONE
else
let
val res =
case c of
DFA dfa =>
DFA.match {dfa = dfa,
string = s,
startPos = i,
anchorStart = anchorStart}
| NFA nfa =>
NFA.match {nfa = nfa,
string = s,
startPos = i,
anchorStart = anchorStart}
in
case res of
NONE => loop (i + 1, false)
| SOME finish => SOME {start = i,
length = finish - i}
end
in loop (0, true)
end
end
fun compileDFA r =
Compiled.DFA (DFA.minimize (DFA.fromNFA (NFA.fromRegexp r)))
fun compileNFA r =
Compiled.NFA (NFA.fromRegexp r)
end
local
open Regexp
in
val d = digit
val eol = char #"#"
val space = oneOf " \t"
val r =
seq [or [anchorStart, notOneOf "0123456789("],
or [seq [char #"(", d, d, d, char #")"],
seq [d, d, d]],
char #" ",
d, d, d,
oneOf " -",
d, d, d, d,
or [eol, nonDigit]]
val comp = Regexp.compileDFA r
end
fun incr (r: int ref) = r := !r + 1
val ins = TextIO.stdIn
fun printl [] = print "\n" | printl(h::t) = ( print h ; printl t )
local
val form = "(...) ...-...."
val a = CharArray.tabulate (String.size form, fn i =>
String.sub (form, i))
in
fun checkPhone (mustPrint: bool, cnt: int ref, line: string) =
case Regexp.Compiled.find (comp, line) of
NONE => ()
| SOME {start = pos, ...} =>
let
fun blit (src, dst, length) =
let
val stop = src + length
fun loop (src, dst) =
if src = stop
then ()
else (CharArray.update (a, dst,
String.sub (line, src))
; loop (src + 1, dst + 1))
in
loop (src, dst)
end
val (o1, o2, o3) =
if #"(" = String.sub (line, pos)
then (1, 6, 10)
else if #"(" = String.sub (line, pos + 1)
then (2, 7, 11)
else if Char.isDigit (String.sub (line, pos))
then (0, 4, 8)
else (1, 5, 9)
val _ = blit (pos + o1, 1, 3)
val _ = blit (pos + o2, 6, 3)
val _ = blit (pos + o3, 10, 4)
val _ =
if mustPrint
then printl [Int.toString (!cnt), ": ",
CharArray.extract (a, 0, NONE)]
else ()
val _ = incr cnt
in
()
end
end
fun doit (phones,mustPrint: bool): unit =
let val cnt = ref 1
in List.foreach (phones, fn line => checkPhone (mustPrint, cnt, line))
end
fun atoi s = case Int.fromString s of SOME num => num | NONE => 0
fun main (name, args) =
let
val n = atoi (hd (args @ ["1"]))
val phones =
let
fun loop lines =
case TextIO.inputLine ins of
"" => rev lines
| line => loop (line :: lines)
in loop []
end
val _ = Int.for (1, n, fn _ => doit (phones,false))
val _ = doit (phones,true)
in OS.Process.success
end
end
val _ = SMLofNJ.exportFn("regexmatch", Test.main);
|
| regexmatch.tcl |
#!/usr/local/bin/tclsh
# $Id: regexmatch.tcl,v 1.9 2001/03/15 17:01:52 doug Exp $
# http://www.bagley.org/~doug/shootout/
# from: Miguel Sofer, with modifications by Kristoffer Lawson
proc main {} {
global argv
set NUM [lindex $argv 0]
if {$NUM < 1} {
set NUM 1
}
set phones [split [read stdin] "\n"]
set count 0
set rExp {(?:^|[^\d(])(\(\d{3}\)|\d{3}) (\d{3})[ -](\d{4})($|[^\d])}
while {$NUM > 0} {
incr NUM -1
foreach phone $phones {
if {[regexp $rExp $phone match area exch num]} {
if {! $NUM} {
incr count 1
puts "$count: ([string trim $area () ]) $exch-$num"
}
}
}
}
}
main
|
| regexmatch.vbscript |
Set regEx = new RegExp
regEx.Pattern = "^[^\d\(]*(\(\d\d\d\)|\d\d\d) (\d\d\d)[- ](\d\d\d\d)(\D|$)"
regEx.Global = True
phonesBlob = WScript.Stdin.ReadAll()
phones = Split(phonesBlob, Chr(10))
N = WScript.Arguments(0)
If N < 1 Then N = 1
While N > 0
For Each line in phones
Set Matches = regEx.Execute(line)
If Matches.Count > 0 Then
' WSCript.Echo "[" & Matches.Count & "]" & line
tel1 = Matches(0).Submatches(0)
If Left(tel1, 1) = "(" Then
tel1 = Mid(tel1, 2, Len(tel1)-2)
End If
tel2 = Matches(0).Submatches(1)
tel3 = Matches(0).Submatches(2)
num = "(" & tel1 & ") " & tel2 & "-" & tel3
If N = 1 Then
Count = Count + 1
WScript.Echo Count & ": " & num
End If
Else
' WScript.Echo "nomatch: " & line
End If
Next
N = N - 1
Wend
|
| regexmatch.vc |
/* -*- mode: c -*-
* $Id: regexmatch.gcc,v 1.4 2000/12/24 05:43:53 doug Exp $
* http://www.bagley.org/~doug/shootout/
*/
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdio.h>
#include <pcre.h>
#include <string.h>
#define MAXLINES 100
#define MAXLINELEN 132
char *pattern =
"(?:^|[^\\d\\(])"
"(\\()?"
"(\\d\\d\\d)"
"(?(1)\\))"
"[ ]"
"(\\d\\d\\d)"
"[ -]"
"(\\d\\d\\d\\d)"
"\\D"
;
int
main(int argc, char *argv[]) {
int NUM = ((argc == 2) ? atoi(argv[1]) : 1);
int count;
char *cptr = "";
char **phones;
pcre *re;
int erroffset;
const char *errptr;
int n, lines = 0;
char num[256];
int i, j, k, matchlen;
char *matchoffset;
int nmatches;
int *ovec, ovecsize;
pcre_extra *study;
phones = (char **)malloc(MAXLINES * sizeof(char *));
if (!phones) {
fprintf(stderr, "malloc for phones array failed\n");
exit(1);
}
lines = 0;
while (cptr) {
phones[lines] = (char *)malloc(MAXLINELEN);
if (!phones[lines]) {
fprintf(stderr, "malloc to hold line #%d failed\n", lines);
exit(1);
}
cptr = fgets(phones[lines], MAXLINELEN, stdin);
lines++;
if (lines > MAXLINES) {
fprintf(stderr, "MAXLINES is too small\n");
exit(1);
}
}
re = pcre_compile(pattern, 0, &errptr, &erroffset, NULL);
if (!re) {
fprintf(stderr, "can't open compile regexp\n");
exit(1);
}
study = pcre_study(re, 0, &errptr);
if (pcre_fullinfo(re, NULL, PCRE_INFO_CAPTURECOUNT, &nmatches) != 0) {
fprintf(stderr, "pcre_fullinfo failed\n");
exit(1);
}
nmatches++;
ovecsize = sizeof(int) * nmatches * 3;
ovec = (int *)malloc(ovecsize);
if (!ovec) {
fprintf(stderr, "malloc for ovec array failed\n");
exit(1);
}
count = 0;
while (NUM--) {
for (i=0; i<lines; i++) {
n = pcre_exec(re, study,
phones[i], strlen(phones[i]), 0,
0, ovec, ovecsize);
if (n == nmatches) {
k = 2*2;
j = 0;
num[j++] = '(';
matchoffset = phones[i] + ovec[k];
matchlen = ovec[k+1] - ovec[k];
strncpy(num+j, matchoffset, matchlen);
j += matchlen; k += 2;
num[j++] = ')';
num[j++] = ' ';
matchoffset = phones[i] + ovec[k];
matchlen = ovec[k+1] - ovec[k];
strncpy(num+j, matchoffset, matchlen);
j += matchlen; k += 2;
num[j++] = '-';
matchoffset = phones[i] + ovec[k];
matchlen = ovec[k+1] - ovec[k];
strncpy(num+j, matchoffset, matchlen);
j += matchlen; k += 2;
num[j] = 0;
if (0 == NUM) {
count++;
printf("%d: %s\n", count, num);
}
}
}
}
for (i=0; i<MAXLINES; i++) {
free(phones[i]);
}
free(phones);
free(ovec);
return(0);
}
|
| regexmatch.vc++ |
// -*- mode: c++ -*-
// $Id: regexmatch.g++,v 1.2 2001/06/20 03:20:03 doug Exp $
// http://www.bagley.org/~doug/shootout/
// From Bill Lear
#include <iostream>
#include <zopyra/regx>
using namespace std;
typedef pair<const char*, const char*> span;
int main(int ac, char* av[]) {
zopyra::regx re(
"(?x) # set extended flag for embedded comment fun\n"
"(?:^|[^\\d(]) # must be preceded by non-digit\n"
"([(])? # match 1: possible initial left paren\n"
"(\\d{3}) # match 2: area code is 3 digits\n"
"(?(1)[)]) # if match1 then match right paren\n"
"[ ] # area code followed by one space\n"
"(\\d{3}) # match 3: prefix of 3 digits\n"
"[- ] # separator is either space or dash\n"
"(\\d{4}) # match 4: last 4 digits\n"
"(?:\\D|\\b) # followed by non-digit or break\n"
);
string line;
vector<span> lines;
while (getline(cin, line)) {
char* phone = new char[line.size()];
copy(line.begin(), line.end(), phone);
lines.push_back(span(phone, phone + line.size()));
}
size_t ITER = (ac == 2 ? (atoi(av[1]) < 1 ? 1 : atoi(av[1])): 1);
char num[13];
num[0] = '(';
num[4] = ')';
num[5] = ' ';
num[9] = '-';
size_t count = 0;
while (ITER--) {
vector<span>::iterator end = lines.end();
for (vector<span>::iterator i = lines.begin(); i != end; ++i) {
zopyra::regx::iterator p = re.find(i->first, i->second);
if (p++ != re.end()) {
char* num_p = &num[1];
++p;
copy(p->first, p->second, num_p);
num_p = &num[6];
++p;
copy(p->first, p->second, num_p);
num_p = &num[10];
++p;
copy(p->first, p->second, num_p);
if (!ITER) {
cout << ++count << ": ";
copy(num, num + 14, ostream_iterator<char>(cout));
cout << '\n';
}
}
}
}
}
|