All Source For Word Frequency Count |
wordfreq.awka |
# $Id: wordfreq.gawk,v 1.2 2001/05/20 06:13:00 doug Exp $
# http://www.bagley.org/~doug/shootout/
BEGIN {
delete ARGV;
FS = "[^A-Za-z][^A-Za-z]*";
}
{
for (i=1; i<=NF; i++) {
freq[tolower($(i))]++;
}
}
END {
# gawk doesn't have a builtin sort routine
# so we have to pipe through the shell sort program
sort = "/bin/sort -nr"
for (word in freq) {
if (word) {
printf "%7d\t%s\n", freq[word], word | sort
}
}
close(sort)
}
|
wordfreq.bcc |
/* -*- mode: c -*-
* $Id: wordfreq.gcc,v 1.4 2001/01/07 19:24:51 doug Exp $
* http://www.bagley.org/~doug/shootout/
*/
#include <stdio.h>
#include <ctype.h>
#include <malloc.h>
#include <stdlib.h>
#include <string.h>
#define inline
#include "simple_hash.h"
typedef int (*comparator)(const void *, const void *);
int cmp_hash(struct ht_node **a, struct ht_node **b) {
int val = (*b)->val - (*a)->val;
return((val == 0) ? strcmp((*b)->key, (*a)->key) : val);
}
int main() {
int bufsize = 80;
char *buf = (char *)malloc(bufsize + 1);
char c;
int i = 0;
struct ht_ht *ht = ht_create(75000);
struct ht_node **sort_array, **sort_tmp, *node;
while ((c = getchar()) > 0) {
if (isalpha(c)) {
buf[i++] = tolower(c);
if (i == bufsize) {
bufsize *= 2;
buf = realloc(buf, bufsize + 1);
}
} else {
if (i > 0) {
buf[i] = '\0';
++(ht_find_new(ht, buf)->val);
i = 0;
}
}
}
free(buf);
sort_array = sort_tmp =
malloc(sizeof(struct ht_node *) * ht_count(ht));
for (node=ht_first(ht); (*sort_tmp++ = node) != 0; node=ht_next(ht)) ;
qsort(sort_array, ht_count(ht), sizeof(struct ht_node *),
(comparator)cmp_hash);
for (i=0; i<ht_count(ht); i++)
printf("%7d\t%s\n", ht_val(sort_array[i]), ht_key(sort_array[i]));
ht_destroy(ht);
return(0);
}
|
wordfreq.cygperl |
#!/usr/local/bin/perl
# $Id: wordfreq.perl,v 1.13 2001/05/16 23:46:40 doug Exp $
# http://www.bagley.org/~doug/shootout/
# Tony Bowden suggested using tr versus lc and split(/[^a-z]/)
use strict;
my %count = ();
while (read(STDIN, $_, 4095) and $_ .= <STDIN>) {
tr/A-Za-z/ /cs;
++$count{$_} foreach split(' ', lc $_);
}
my @lines = ();
my ($w, $c);
push(@lines, sprintf("%7d\t%s\n", $c, $w)) while (($w, $c) = each(%count));
print sort { $b cmp $a } @lines;
|
wordfreq.delphi |
program wordfreq;
uses
SysUtils, Classes, Windows,
simpleHash in 'simpleHash.pas';
const
cWhiteSpace = [#0..#255]-['a'..'z','A'..'Z'];
BUF_SIZE = 4096;
var handle: THandle;
buf: array[1..BUF_SIZE+1] of char;
s,w: string;
words: TStringHash;
forSort: TList;
numRead,i,wStart: cardinal;
p: PHashEntry;
procedure addWord(const s: string);
var w: string; p: PHashEntry;
begin
w:=lowercase(s);
p:=words.get(w);
if p<>nil then
inc(p^.Value)
else
words.add(w,1);
end;
function sortFunc(Item1, Item2: Pointer): Integer;
begin
with PHashEntry(Item1)^ do begin
if Value<PHashEntry(Item2)^.Value then
result:=1
else if Value>PHashEntry(Item2)^.Value then
result:=-1
else begin // values equal
if Key=PHashEntry(Item2)^.Key then
result:=0
else if Key<PHashEntry(Item2)^.Key then
result:=1
else
result:=-1;
end;
end;
end;
begin
handle:=GetStdHandle(STD_INPUT_HANDLE);
words:=TStringHash.Create;
s:='';
buf[BUF_SIZE+1]:=#0;
numRead:=fileRead(handle,buf,BUF_SIZE);
while numRead>0 do begin
i:=1;
if (s<>'')and(buf[1] in cWhiteSpace) then begin
addWord(s); s:='';
end;
repeat
while (buf[i] in cWhiteSpace)and(i<=numRead) do
inc(i);
if i>numRead then
break; // repeat
wStart:=i;
repeat
inc(i);
until (buf[i] in cWhiteSpace);
setString(w,PChar(addr(buf[wStart])),i-wStart);
if s<>'' then begin
w:=s+w; s:='';
end;
if i<=numRead then
addWord(w)
else // word exceeding buffer
s:=w;
until (i>numRead);
numRead:=fileRead(handle,buf,BUF_SIZE);
end;
if (s<>'') then
addWord(s);
forSort:=TList.Create;
forSort.Capacity:=words.Count;
p:=words.getFirst;
while p<>nil do begin
forSort.Add(p);
p:=words.getNext;
end;
forSort.Sort(sortFunc);
for i:=0 to forSort.Count-1 do begin
p:=PHashEntry(forSort.Items[i]);
write(format('%7d'#9'%s'#13#10,[p^.Value,p^.Key]));
end;
words.Free; forSort.Free;
end.
|
wordfreq.erlang |
%%% -*- mode: erlang -*-
%%% $Id: wordfreq.erlang,v 1.8 2000/12/31 22:54:17 doug Exp $
%%% http://www.bagley.org/~doug/shootout/
%% Use ETS tables (Erlang's associative store).
-module(wordfreq).
-export([main/0, main/1]).
%% ignore program argument
main() -> main(['1']).
main(Args) ->
OutL = fun ({Word, Count}) ->
io:format("~7w\t~s~n", [Count, Word])
end,
lists:foreach(OutL, sortedfreqs()),
halt(0).
% sort the results, descending
sortedfreqs() ->
Port = open_port({fd, 0, 1}, [eof, {line, 512}]),
WordCountList = count_words_from_stream(ets:new(freqtab, [ordered_set]), Port),
lists:reverse(lists:keysort(2, WordCountList)).
count_words_from_stream(Table, Port) ->
receive
{Port, eof} ->
ets:delete(Table, ''),
ets:tab2list(Table);
{Port, {_, {_, Line}}} ->
count_words([], Line, Table),
count_words_from_stream(Table, Port)
end.
count_word(Word, Table) ->
WordAtom = list_to_atom(Word),
case (catch ets:update_counter(Table, WordAtom, 1)) of
{'EXIT', {badarg, _}} ->
ets:insert(Table, {WordAtom, 1});
_ ->
true
end.
% count_words(Word_Accumulator, Line_of_Chars, Table)
count_words([], [], Table) -> true;
count_words(Word, [], Table) ->
count_word(Word, Table);
count_words(Word, [H|T], Table) when H >= $a, H=< $z ->
NewWord = lists:append(Word, [H]),
count_words(NewWord, T, Table);
count_words(Word, [H|T], Table) when H >= $A, H=< $Z ->
NewWord = lists:append(Word, [(H - $A) + $a]),
count_words(NewWord, T, Table);
% we hit a non-word character so count previous word and continue
count_words(Word, [H|T], Table) ->
count_word(Word, Table),
count_words([], T, Table).
|
wordfreq.gawk |
# $Id: wordfreq.gawk,v 1.2 2001/05/20 06:13:00 doug Exp $
# http://www.bagley.org/~doug/shootout/
BEGIN {
delete ARGV;
FS = "[^A-Za-z][^A-Za-z]*";
}
{
for (i=1; i<=NF; i++) {
freq[tolower($(i))]++;
}
}
END {
# gawk doesn't have a builtin sort routine
# so we have to pipe through the shell sort program
sort = "/bin/sort -nr"
for (word in freq) {
if (word) {
printf "%7d\t%s\n", freq[word], word | sort
}
}
close(sort)
}
|
wordfreq.gcc |
/* -*- mode: c -*-
* $Id: wordfreq.gcc,v 1.4 2001/01/07 19:24:51 doug Exp $
* http://www.bagley.org/~doug/shootout/
*/
#include <stdio.h>
#include <ctype.h>
#include <malloc.h>
#include <stdlib.h>
#include <string.h>
#include "simple_hash.h"
typedef int (*comparator)(const void *, const void *);
int cmp_hash(struct ht_node **a, struct ht_node **b) {
int val = (*b)->val - (*a)->val;
return((val == 0) ? strcmp((*b)->key, (*a)->key) : val);
}
int main() {
int bufsize = 80;
char *buf = (char *)malloc(bufsize + 1);
char c;
int i = 0;
struct ht_ht *ht = ht_create(75000);
struct ht_node **sort_array, **sort_tmp, *node;
while ((c = getchar()) > 0) {
if (isalpha(c)) {
buf[i++] = tolower(c);
if (i == bufsize) {
bufsize *= 2;
buf = realloc(buf, bufsize + 1);
}
} else {
if (i > 0) {
buf[i] = '\0';
++(ht_find_new(ht, buf)->val);
i = 0;
}
}
}
free(buf);
sort_array = sort_tmp =
malloc(sizeof(struct ht_node *) * ht_count(ht));
for (node=ht_first(ht); (*sort_tmp++ = node) != 0; node=ht_next(ht)) ;
qsort(sort_array, ht_count(ht), sizeof(struct ht_node *),
(comparator)cmp_hash);
for (i=0; i<ht_count(ht); i++)
printf("%7d\t%s\n", ht_val(sort_array[i]), ht_key(sort_array[i]));
ht_destroy(ht);
return(0);
}
|
wordfreq.gforth |
\ -*- mode: forth -*-
\ $Id: wordfreq.gforth,v 1.1 2001/05/30 17:49:08 doug Exp $
\ http://www.bagley.org/~doug/shootout/
\ from Anton Ertl:
wordlist constant word-counts
create word-pointers 10000 cells allot
variable endwp word-pointers endwp !
1024 constant max-line
create line max-line 2 + allot
struct
cell% field wf-count
cell% 2* field wf-name
end-struct wf%
: count-word
2dup word-counts search-wordlist if
1 swap >body +! 2drop
else
nextname get-current word-counts set-current create set-current
here endwp @ tuck ! cell+ endwp !
1 , last @ name>string 2,
endif ;
: no-letter?
dup 'a < swap 'z > or ;
: process-word
2dup u< if
over - count-word
else
2drop
endif ;
: process-line
bounds 2dup ?do
i c@ $20 or dup i c!
no-letter? if
i process-word i 1+
endif
loop
swap process-word ;
: process-file
>r begin
line max-line r@ read-line throw
while
line swap process-line
repeat
rdrop ;
: output
endwp @ word-pointers ?do
i @ dup wf-count @ 7 .r #tab emit wf-name 2@ type cr
cell +loop ;
: wf<
over wf-count @ over wf-count @ 2dup = if
2drop >r wf-name 2@ r> wf-name 2@ compare 0>
else
u> nip nip
endif ;
cell- -1 cells ,
: partition
\ partition array addr1 u1 into all elements less than pivot and all
\ others, addr1 u2 and addr3 u3 are the two partitions.
\ lessthan-xt ( elemptr1 elemptr2 -- f ) compares the two elements
{ lessthan-xt }
over @ { pivot }
begin
2dup u<
while
begin
pivot over @ lessthan-xt execute
while
cell-
repeat
swap over @ over !
begin
2dup u>
while
pivot over @ lessthan-xt execute 0=
while
cell+
repeat then
swap over @ over !
repeat
drop pivot over ! ;
: sort1 recursive
>r begin
2dup u<
while
2dup r@ partition
rot over cell- r@ sort1
cell+ swap
repeat
rdrop 2drop ;
stdin process-file word-pointers endwp @ cell- ' wf< sort1 output bye
|
wordfreq.ghc |
-- $Id: wordfreq.ghc,v 1.2 2001/02/27 04:04:35 doug Exp $
-- http://www.bagley.org/~doug/shootout/
-- from Julian Assange
-- compile with:
-- ghc -O -package data wordfreq.hs -o wordfreq
module Main where
import List(sortBy)
import Char(toLower,isLower)
import FiniteMap(fmToList,emptyFM,addToFM_C)
main = interact $ unlines . pretty . sort . fmToList . makemap . cwords . lower
where
pretty l = [pad 7 (show n) ++ "\t" ++ w | (w,n) <- l]
where
pad n s = replicate (n - length s) ' ' ++ s
sort = sortBy (\(w0,n0) (w1,n1) -> case compare n1 n0 of
EQ -> compare w1 w0
x -> x)
makemap = addFM emptyFM
where addFM fm [] = fm
addFM fm (x:xs) = addFM (addToFM_C (+) fm x 1) xs
cwords s = case dropWhile (not . isLower) s of
"" -> []
s' -> w : (cwords s'')
where (w, s'') = span isLower s'
lower = map toLower
|
wordfreq.guile |
#!/usr/local/bin/guile \
-e main -s
!#
;;; $Id: wordfreq.guile,v 1.2 2001/06/29 23:12:37 doug Exp $
;;; http://www.bagley.org/~doug/shootout/
;;; from Brad Knotwell
(use-modules (ice-9 string-fun) (ice-9 common-list))
(use-modules (ice-9 format))
(define my-hash (make-hash-table 4000))
(define (print-sorted-hash)
(define (display-elt elt)
(display (format "~7D\t~a\n" (car elt) (cdr elt))))
(map display-elt
(sort-list (hash-fold (lambda (x y z) (cons (cons y x) z)) '() my-hash)
(lambda (x y) (or (> (car x) (car y))
(and (= (car x) (car y))
(string>=? (cdr x) (cdr y))))))))
(define (load-hash x . tl)
(define (do-entry entry)
(let ((entry-val (hash-ref my-hash entry)))
(hash-set! my-hash entry (if entry-val (1+ entry-val) 1))))
(map do-entry (remove-if (lambda (x) (string=? x "")) (cons x tl))))
(define (main args)
(do ((line (read-line) (read-line)))
((eof-object? line) (print-sorted-hash))
(separate-fields-discarding-char
#\space
(list->string (map (lambda (x) (if (char-alphabetic? x) x #\space))
(string->list (string-downcase line))))
load-hash)))
|
wordfreq.ici |
// $Id: wordfreq.ici,v 1.0 2003/01/03 12:23:00 dada Exp $
// http://dada.perl.it/shootout
//
// contributed by Tim Long
static counts = struct();
static
tolower(s)
{
s = explode(s);
forall (c, i in s)
{
if (c >= 'A' && c <= 'Z')
s[i] += 'a' - 'A';
}
return implode(s);
}
while (l = getline())
{
forall (w in smash(l, #\w+#, "\\&"))
{
if (w ~ #[A-Z]#)
w = tolower(w);
if (counts[w] == NULL)
counts[w] = 1;
else
++counts[w];
}
}
out = array();
forall (c, w in counts)
push(out, sprintf("%7d\t%s\n", c, w));
sort(out, [func(a, b){return a > b ? -1 : a < b;}]);
put(implode(out));
|
wordfreq.icon |
# -*- mode: icon -*-
# $Id: wordfreq.icon,v 1.1 2000/12/25 06:58:04 doug Exp $
# http://www.bagley.org/~doug/shootout/
procedure main(argv)
local wcount, buckets, w, c, pair, wl
wcount := table(0)
every wcount[words()] +:= 1
buckets := table()
every w := key(wcount) do {
c := wcount[w]
/ buckets[c] := list()
push(buckets[c], w)
}
buckets := sort(buckets,1)
while pair := pull(buckets) do {
c := pair[1]
wl := sort(pair[2],1)
while (write(right(c,7), "\t", pull(wl)))
}
end
procedure words()
local line, word
while line := read() do {
line := map(line)
line ? while tab(upto(&letters)) do {
word := tab(many(&letters))
suspend(word)
}
}
end
|
wordfreq.java |
// $Id: wordfreq.java,v 1.3 2000/12/17 21:40:53 doug Exp $
// http://www.bagley.org/~doug/shootout/
// Collection class code is from my friend Phil Chu, Thanks Phil!
import java.io.*;
import java.util.*;
import java.text.*;
class Counter {
int count = 1;
}
public class wordfreq {
public static void main(String[] args) {
wf();
}
public static String padleft(String s,int n,char c) {
int len = s.length();
if( len>=n ) return s;
char[] buf = new char[n];
for( int i=0;i<n-len;i++ ) buf[i]=c;
s.getChars(0,len,buf,n-len);
return new String(buf);
}
public static void wf() {
HashMap map = new HashMap();
try {
Reader r = new BufferedReader(new InputStreamReader(System.in));
StreamTokenizer st = new StreamTokenizer(r);
st.lowerCaseMode(true);
st.whitespaceChars( 0, 64 );
st.wordChars(65, 90);
st.whitespaceChars( 91, 96 );
st.wordChars(97, 122);
st.whitespaceChars( 123, 255 );
int tt = st.nextToken();
while (tt != StreamTokenizer.TT_EOF) {
if (tt == StreamTokenizer.TT_WORD) {
if (map.containsKey(st.sval)) {
((Counter)map.get(st.sval)).count++;
} else {
map.put(st.sval, new Counter());
}
}
tt = st.nextToken();
}
} catch (IOException e) {
System.err.println(e);
return;
}
Collection entries = map.entrySet();
// flatten the entries set into a vector for sorting
Vector rev_wf = new Vector(entries);
// Sort the vector according to its value
Collections.sort(rev_wf, new Comparator() {
public int compare(Object o1, Object o2) {
// First sort by frequency
int c = ((Counter)((Map.Entry)o2).getValue()).count - ((Counter)((Map.Entry)o1).getValue()).count;
if (c == 0) { // Second sort by lexicographical order
c = ((String)((Map.Entry)o2).getKey()).compareTo((String)((Map.Entry)o1).getKey());
}
return c;
}
}
);
Iterator it = rev_wf.iterator();
Map.Entry ent;
String word;
int count;
while(it.hasNext()) {
ent = (Map.Entry)it.next();
word = ((String)ent.getKey());
count = ((Counter)ent.getValue()).count;
System.out.println(padleft(Integer.toString(count),7,' ') + "\t" + word);
}
}
}
|
wordfreq.lcc |
/* -*- mode: c -*-
* $Id: wordfreq.gcc,v 1.4 2001/01/07 19:24:51 doug Exp $
* http://www.bagley.org/~doug/shootout/
*/
#include <stdio.h>
#include <ctype.h>
#include <malloc.h>
#include <stdlib.h>
#include <string.h>
#include "simple_hash.h"
typedef int (*comparator)(const void *, const void *);
int cmp_hash(struct ht_node **a, struct ht_node **b) {
int val = (*b)->val - (*a)->val;
return((val == 0) ? strcmp((*b)->key, (*a)->key) : val);
}
int main() {
int bufsize = 80;
char *buf = (char *)malloc(bufsize + 1);
char c;
int i = 0;
struct ht_ht *ht = ht_create(75000);
struct ht_node **sort_array, **sort_tmp, *node;
while ((c = getchar()) > 0) {
if (isalpha(c)) {
buf[i++] = tolower(c);
if (i == bufsize) {
bufsize *= 2;
buf = realloc(buf, bufsize + 1);
}
} else {
if (i > 0) {
buf[i] = '\0';
++(ht_find_new(ht, buf)->val);
i = 0;
}
}
}
free(buf);
sort_array = sort_tmp =
malloc(sizeof(struct ht_node *) * ht_count(ht));
for (node=ht_first(ht); (*sort_tmp++ = node) != 0; node=ht_next(ht)) ;
qsort(sort_array, ht_count(ht), sizeof(struct ht_node *),
(comparator)cmp_hash);
for (i=0; i<ht_count(ht); i++)
printf("%7d\t%s\n", ht_val(sort_array[i]), ht_key(sort_array[i]));
ht_destroy(ht);
return(0);
}
|
wordfreq.lua |
-- $Id: wordfreq.lua,v 1.3 2000/12/21 03:20:30 doug Exp $
-- http://www.bagley.org/~doug/shootout/
-- implemented by: Roberto Ierusalimschy
-- this version reads 4K chunks of input at a time
local words = {} -- list of all words (for sorting)
local count = {} -- count occurrences of each word
BUFSIZE = 2^12
while 1 do
local lines, rest = read(BUFSIZE, "*l")
if lines == nil then break end
lines = lines..(rest or '') -- ensures whole lines
gsub(strlower(lines), "(%l+)", function (w)
local cw = %count[w]
if cw == nil then -- first occurrence?
cw = 0
tinsert(%words, w)
end
%count[w] = cw + 1
end)
end
sort(words, function (a,b)
return %count[a] > %count[b] or
(%count[a] == %count[b] and a > b)
end)
for i=1,getn(words) do
local w = words[i]
write(format("%7d\t%s\n", count[w], w))
end
|
wordfreq.lua5 |
-- $Id: wordfreq.lua,v 1.3 2000/12/21 03:20:30 doug Exp $
-- http://www.bagley.org/~doug/shootout/
-- contributed by Roberto Ierusalimschy
-- this version reads 4K chunks of input at a time
local words = {} -- list of all words (for sorting)
local count = {} -- count occurrences of each word
local BUFSIZE = 2^12
while true do
local lines, rest = io.read(BUFSIZE, "*l")
if lines == nil then break end
lines = lines..(rest or '') -- ensures whole lines
for w in string.gfind(string.lower(lines), "(%l+)") do
local cw = count[w]
if not cw then -- first occurrence?
cw = 0
table.insert(words, w)
end
count[w] = cw + 1
end
end
table.sort(words, function (a,b)
return count[a] > count[b] or (count[a] == count[b] and a > b)
end)
for i=1,table.getn(words) do
local w = words[i]
io.write(string.format("%7d\t%s\n", count[w], w))
end
|
wordfreq.mawk |
# $Id: wordfreq.mawk,v 1.2 2001/05/20 06:13:00 doug Exp $
# http://www.bagley.org/~doug/shootout/
BEGIN {
delete ARGV;
FS = "[^A-Za-z][^A-Za-z]*";
}
{
for (i=1; i<=NF; i++) {
freq[tolower($(i))]++;
}
}
END {
# gawk doesn't have a builtin sort routine
# so we have to pipe through the shell sort program
sort = "/bin/sort -nr"
for (word in freq) {
if (word) {
printf "%7d\t%s\n", freq[word], word | sort
}
}
close(sort)
}
|
wordfreq.mercury |
% ---------------------------------------------------------------------------- %
% wordfreq.m
% Ralph Becket <rbeck@microsoft.com>
% Tue Jan 9 15:56:12 GMT 2001
% vim: ts=4 sw=4 et tw=0 wm=0 ff=unix
%
% ---------------------------------------------------------------------------- %
:- module mytest.
:- interface.
:- import_module io.
:- pred main(io__state, io__state).
:- mode main(di, uo) is det.
:- implementation.
:- import_module string, int, list, require, std_util, char, map.
main -->
count_word_freqs(map__init, FreqMap),
{ X = list__sort(list__map(func(K-V) = V-K, map__to_assoc_list(FreqMap))) },
{ Y = list__reverse(X) },
io__write_list(Y, "\n", write_freq_word),
io__nl.
:- pred count_word_freqs(map(string,int),map(string,int),io__state,io__state).
:- mode count_word_freqs(in, out, di, uo) is det.
count_word_freqs(FreqMap0, FreqMap) -->
io__read_line_as_string(Result),
(
{ Result = ok(Line) },
{ Words = string__words(isnt(char__is_alpha),string__to_lower(Line)) },
{ Update =
( func(S, FM) =
( if map__search(FM, S, N)
then FM ^ elem(S) := N + 1
else FM ^ elem(S) := 1
)
)
},
{ FreqMap1 = list__foldl(Update, Words, FreqMap0) },
count_word_freqs(FreqMap1, FreqMap)
;
{ Result = eof },
{ FreqMap = FreqMap0 }
;
{ Result = error(ErrNo) },
{ error(io__error_message(ErrNo)) }
).
:- pred write_freq_word(pair(int, string), io__state, io__state).
:- mode write_freq_word(in, di, uo) is det.
write_freq_word(Freq - Word) -->
io__format("%7d\t%s", [i(Freq), s(Word)]).
|
wordfreq.mingw32 |
/* -*- mode: c -*-
* $Id: wordfreq.gcc,v 1.4 2001/01/07 19:24:51 doug Exp $
* http://www.bagley.org/~doug/shootout/
*/
#include <stdio.h>
#include <ctype.h>
#include <malloc.h>
#include <stdlib.h>
#include <string.h>
#include "simple_hash.h"
typedef int (*comparator)(const void *, const void *);
int cmp_hash(struct ht_node **a, struct ht_node **b) {
int val = (*b)->val - (*a)->val;
return((val == 0) ? strcmp((*b)->key, (*a)->key) : val);
}
int main() {
int bufsize = 80;
char *buf = (char *)malloc(bufsize + 1);
char c;
int i = 0;
struct ht_ht *ht = ht_create(75000);
struct ht_node **sort_array, **sort_tmp, *node;
while ((c = getchar()) > 0) {
if (isalpha(c)) {
buf[i++] = tolower(c);
if (i == bufsize) {
bufsize *= 2;
buf = realloc(buf, bufsize + 1);
}
} else {
if (i > 0) {
buf[i] = '\0';
++(ht_find_new(ht, buf)->val);
i = 0;
}
}
}
free(buf);
sort_array = sort_tmp =
malloc(sizeof(struct ht_node *) * ht_count(ht));
for (node=ht_first(ht); (*sort_tmp++ = node) != 0; node=ht_next(ht)) ;
qsort(sort_array, ht_count(ht), sizeof(struct ht_node *),
(comparator)cmp_hash);
for (i=0; i<ht_count(ht); i++)
printf("%7d\t%s\n", ht_val(sort_array[i]), ht_key(sort_array[i]));
ht_destroy(ht);
return(0);
}
|
wordfreq.ocaml |
(*
* $Id: wordfreq.ocaml,v 1.8 2001/05/26 01:48:48 doug Exp $
* http://www.bagley.org/~doug/shootout/
*)
let max = 4096
let buf = String.create max
let count = Hashtbl.create 75000
let wbuf = Buffer.create 64
let rec scan_words i n inword =
if i < n then
let c = buf.[i] in
if 'a' <= c && c <= 'z' then begin
Buffer.add_char wbuf c;
scan_words (i+1) n true
end
else if 'A' <= c && c <= 'Z' then begin
Buffer.add_char wbuf (Char.unsafe_chr(Char.code c + 32));
scan_words (i+1) n true
end
else if inword then begin
let word = Buffer.contents wbuf in
begin try incr (Hashtbl.find count word)
with Not_found -> Hashtbl.add count word (ref 1) end;
Buffer.clear wbuf;
scan_words (i+1) n false
end else
scan_words (i+1) n false
else
let nread = input stdin buf 0 max in
if nread = 0 then () else scan_words 0 nread inword
let _ =
scan_words 0 0 false;
let out_lines = ref [] in
let to_list l w c =
l := (Printf.sprintf "%7d\t%s" !c w) :: !l in
Hashtbl.iter (to_list out_lines) count;
List.iter print_endline (List.sort (fun a b -> compare b a) !out_lines)
|
wordfreq.ocamlb |
(*
* $Id: wordfreq.ocaml,v 1.8 2001/05/26 01:48:48 doug Exp $
* http://www.bagley.org/~doug/shootout/
*)
let max = 4096
let buf = String.create max
let count = Hashtbl.create 75000
let wbuf = Buffer.create 64
let rec scan_words i n inword =
if i < n then
let c = buf.[i] in
if 'a' <= c && c <= 'z' then begin
Buffer.add_char wbuf c;
scan_words (i+1) n true
end
else if 'A' <= c && c <= 'Z' then begin
Buffer.add_char wbuf (Char.unsafe_chr(Char.code c + 32));
scan_words (i+1) n true
end
else if inword then begin
let word = Buffer.contents wbuf in
begin try incr (Hashtbl.find count word)
with Not_found -> Hashtbl.add count word (ref 1) end;
Buffer.clear wbuf;
scan_words (i+1) n false
end else
scan_words (i+1) n false
else
let nread = input stdin buf 0 max in
if nread = 0 then () else scan_words 0 nread inword
let _ =
scan_words 0 0 false;
let out_lines = ref [] in
let to_list l w c =
l := (Printf.sprintf "%7d\t%s" !c w) :: !l in
Hashtbl.iter (to_list out_lines) count;
List.iter print_endline (List.sort (fun a b -> compare b a) !out_lines)
|
wordfreq.perl |
#!/usr/local/bin/perl
# $Id: wordfreq.perl,v 1.13 2001/05/16 23:46:40 doug Exp $
# http://www.bagley.org/~doug/shootout/
# Tony Bowden suggested using tr versus lc and split(/[^a-z]/)
use strict;
my %count = ();
while (read(STDIN, $_, 4095) and $_ .= <STDIN>) {
tr/A-Za-z/ /cs;
++$count{$_} foreach split(' ', lc $_);
}
my @lines = ();
my ($w, $c);
push(@lines, sprintf("%7d\t%s\n", $c, $w)) while (($w, $c) = each(%count));
print sort { $b cmp $a } @lines;
|
wordfreq.pike |
#!/usr/local/bin/pike// -*- mode: pike -*-
// $Id: wordfreq.pike,v 1.3 2000/12/05 16:04:07 doug Exp $
// http://www.bagley.org/~doug/shootout/
// from: Fredrik Noring
void main()
{
mapping(string:int) dictionary = ([]);
string buffer = "";
array(string) f = filter(map(enumerate(128),
lambda(int i)
{
return !('A' <= i && i <= 'Z' ||
'a' <= i && i <= 'z' ||
i == ' ') &&
sprintf("%c", i);
}), `!=, 0);
array(string) t = allocate(sizeof(f), " ");
for(;;)
{
string data =
buffer + replace(lower_case(Stdio.stdin.read(4096)), f, t);
if(!sizeof(data))
break;
array(string) words = data/" ";
if(1 < sizeof(words) && sizeof(words[-1]))
buffer = words[-1],
words = words[..sizeof(words)-2];
else
buffer = "";
foreach(words, string word)
dictionary[word]++;
}
m_delete(dictionary, "");
mapping(int:array(string)) revdictionary = ([]);
array(string) words = indices(dictionary);
array(int) freqs = values(dictionary);
for(int i = 0; i < sizeof(dictionary); i++)
revdictionary[freqs[i]] += ({ words[i] });
freqs = sort(indices(revdictionary));
for(int i = sizeof(freqs)-1; 0 <= i; i--)
{
int freq = freqs[i];
words = sort(revdictionary[freq]);
for(int j = sizeof(words)-1; 0 <= j; j--)
write("%7d\t%s\n", freq, words[j]);
}
}
|
wordfreq.pliant |
# $Id: wordfreq.pliant,v 1.1 2002/03/18 14:07:00 dada Exp $
# http://dada.perl.it/shootout/
module "/pliant/language/unsafe.pli"
module "/pliant/language/context.pli"
module "/pliant/language/stream.pli"
module "/pliant/language/stream/pipe.pli"
function heapsort n ra
arg Int n ; arg_rw Array:Str ra
var Str rra
var Int i
var Int j
var Int l := (n\2) + 1
var Int ir := n
part heapsort_loop
if l>1
l := l - 1
rra := ra:l
else
rra := ra:ir
ra:ir := ra:1
ir := ir - 1
if ir=1
ra:1 := rra
leave heapsort_loop
i := l
j := l*2
while j<=ir
if j<ir and ra:j > ra:(j+1)
j := j + 1
if rra > ra:j
ra:i := ra:j
i := j
j := j + i
else
j := ir + 1
ra:i := rra
restart heapsort_loop
gvar Stream stdin
gvar Str line := ""
gvar Str l
gvar Str word
gvar Address Buf
gvar Int ReadCount
gvar CBool ok
gvar Int ReadSize := 4096
gvar Int eolpos
gvar Int i
gvar Int nl := 0
gvar Int nw := 0
gvar Int nc := 0
gvar Char ch
gvar Int j
gvar Array:Str lines
gvar (Dictionary Str Int) count
gvar Pointer:Int v
gvar Str k
stdin open "handle:0" in
while not stdin:atend
line := stdin readline
i := 0
while i<line:len
ch := line:i
if ch>="a" and ch<="z" or ch>="A" and ch<="Z"
j := i+1
part find_word
if j<line:len
ch := line:j
if ch>="a" and ch<="z" or ch>="A" and ch<="Z"
j += 1
restart find_word
word := lower (line i j-i)
if (count exists word)
count:word += 1
else
count insert word 1
i += j-i
else
i += 1
lines += ""
v :> count first
k := count key v
while addressof:v<>null
if addressof:(count first k)<>null
l := (string count:k)
l := (repeat 7-l:len " ")+l
line := l+"[tab]"+k+"[lf]"
lines += line
# console k "=" count:k eol
v :> count next v
if addressof:v<>null
k := count key v
heapsort lines:size-1 lines
for i 0 lines:size-1
console lines:i
|
wordfreq.poplisp |
;;; -*- mode: lisp -*-
;;; $Id: wordfreq.cmucl,v 1.2 2001/06/05 22:23:23 doug Exp $
;;; http://www.bagley.org/~doug/shootout/
;;; From Bulent Murtezaoglu, based on code from Andrew McDowell
;;; The Block/Buffering I/O code is adapted from material posted to comp.lang.lisp
;;; by Thomas Kirk <tk@research.att.com>.
;;; Archived here: http://www.cs.cmu.edu/afs/cs/project/ai-repository/ai/lang/lisp/code/io/fast_io/fast_io.txt
;;;
(declaim (optimize (speed 3) (debug 0) (safety 0) (space 0) (compilation-speed 0)))
(let* ((start 0)
(current 0)
(end 0)
(eof nil)
(inputbuffer (make-array 4096 :element-type '(unsigned-byte 8))))
(declare (type (simple-array (unsigned-byte 8)) inputbuffer)
(fixnum start end)
(inline code-char)
(type (unsigned-byte 8) current))
(labels
((get-char () ;;don't do this at home works through side effect unless eof
(when (= start end)
(setf start 0)
(setf end (read-sequence inputbuffer *standard-input*))
(when (zerop end)
(setf eof t)
(return-from get-char nil)))
(setf current (aref inputbuffer start))
(incf start))
(word-constituent-p () ;; side effects current rets t if word-constituent
(or (and (>= current 97) (<= current 122))
(and (>= current 65) (<= current 90)
(incf current 32))))
(reader (buffer)
(declare (type simple-base-string buffer))
(let* ((size (length buffer))
(fill-ptr 0))
(declare (type (integer 0 1000) size fill-ptr))
(loop while (and (or (get-char) (return-from reader buffer))
(not (word-constituent-p))))
(loop
(when (= fill-ptr size)
(let ((new-buffer
(make-string (the fixnum
(setf size (the fixnum (+ fill-ptr 5)))))))
(setf (subseq (the simple-base-string new-buffer) 0 fill-ptr)
buffer)
(setf buffer new-buffer)))
(setf (schar buffer fill-ptr) (code-char current))
(incf fill-ptr)
(if (get-char)
(if (not (word-constituent-p))
(return))
(return)))
(lisp::shrink-vector buffer fill-ptr))))
(let* ((h (make-hash-table :test 'equal :size 3380 :rehash-threshold 1.0))
(buffer (make-string 10)))
(declare (inline gethash maphash))
(declare (type simple-base-string buffer))
(loop
(setf buffer (reader buffer))
(if (not eof)
(if (= 1 (the fixnum (incf (the fixnum (gethash buffer h 0)))))
(setf buffer (make-string 10)));; only cons if used
(return)))
;; hast table => list
(let ((l '()))
(maphash #'(lambda (key val)
(push (cons key val) l))
h)
;; sort the list
(setf l (sort l #'(lambda (v1 v2)
(if (> (the fixnum (cdr v1)) (the fixnum (cdr v2)))
t
(if (= (the fixnum (cdr v1)) (the fixnum (cdr v2)))
(string-lessp (car v2) (car v1))
nil)))))
;; output the list of pairs
(mapcar #'(lambda (p)
(format t "~7D ~A~&" (cdr p) (car p)))
l)))))
|
wordfreq.python |
#!/usr/local/bin/python
# $Id: wordfreq.python,v 1.9 2001/05/11 17:44:00 doug Exp $
# http://www.bagley.org/~doug/shootout/
#
# adapted from Bill Lear's original python word frequency counter
#
# Joel Rosdahl suggested using translate table to speed up
# word splitting. That change alone sped this program up by
# at least a factor of 3.
#
# with further speedups from Mark Baker
import sys
def main():
count = {}
i_r = map(chr, range(256))
trans = [' '] * 256
o_a, o_z = ord('a'), (ord('z')+1)
trans[ord('A'):(ord('Z')+1)] = i_r[o_a:o_z]
trans[o_a:o_z] = i_r[o_a:o_z]
trans = ''.join(trans)
rl = sys.stdin.readlines
lines = rl(4095)
while lines:
for line in lines:
for word in line.translate(trans).split():
try:
count[word] += 1
except KeyError:
count[word] = 1
lines = rl(4095)
l = zip(count.values(), count.keys())
l.sort()
l.reverse()
print '\n'.join(["%7s\t%s" % (count, word) for (count, word) in l])
main()
|
wordfreq.rexx |
count. = 0
ws = ''
transtab = XRANGE('A', 'Z') || XRANGE('a', 'z')
transin = XRANGE('A', 'Z')
transout = XRANGE('a', 'z')
notword = XRANGE('00'x, D2C(C2D('a')-1)) || XRANGE(D2C(C2D('z')+1), 'FF'x)
DO UNTIL LINES() = 0
PARSE LINEIN L
L = TRANSLATE(TRANSLATE(L, transout, transin), '', notword, ' ')
DO i = 1 TO WORDS(L)
w = STRIP(WORD(L, i))
count.w = count.w + 1
IF count.w = 1 THEN ws = ws w
END
END
STREAM('rexx_tmp', 'C', 'CREATE')
DO i = 1 to WORDS(ws)
w = WORD(ws, i)
IF count.w > 0 THEN DO
line = format(count.w, 7) || '09'x || w
CALL LINEOUT('rexx_tmp', line)
END
END
'c:\cygwin\bin\sort -nr rexx_tmp >rexx_sorted'
DO WHILE LINES('rexx_sorted') > 0
SAY LINEIN('rexx_sorted')
END
/*
STREAM('rexx_tmp', 'C', 'CLOSE')
STREAM('rexx_sorted', 'C', 'CLOSE')
'del rexx_tmp'
'del rexx_sorted'
*/
|
wordfreq.ruby |
#!/usr/local/bin/ruby
# -*- mode: ruby -*-
# $Id: wordfreq.ruby,v 1.9 2001/05/16 23:46:40 doug Exp $
# http://www.bagley.org/~doug/shootout/
freq = Hash.new(0)
loop {
data = (STDIN.read(4095) or break) << (STDIN.gets || "")
for word in data.downcase.tr_s('^A-Za-z',' ').split(' ')
freq[word] += 1
end
}
freq.delete("")
lines = Array.new
freq.each{|w,c| lines << sprintf("%7d\t%s\n", c, w) }
print lines.sort.reverse
|
wordfreq.slang |
% $Id: wordfreq.slang,v 1.0 2003/01/03 14:43:00 dada Exp $
% http://dada.perl.it/shootout/
%
% contributed by John E. Davis
define main()
{
variable count = Assoc_Type[Int_Type, 0];
foreach (stdin) using ("line")
{
variable line = ();
foreach (strtok (strlow (line), "^a-z"))
{
variable word = ();
count [word] += 1;
}
}
variable words = assoc_get_keys (count);
variable freqs = assoc_get_values (count);
variable i = array_sort (array_map (String_Type, &sprintf, "%12d", freqs)
+ words);
i = i[[-1:0:-1]];
foreach (i)
{
i = ();
()=fprintf (stdout, "%7d\t%s\n", freqs[i], words[i]);
}
}
main();
|
wordfreq.smlnj |
(* -*- mode: sml -*-
* $Id: wordfreq.smlnj,v 1.3 2001/07/09 00:25:29 doug Exp $
* http://www.bagley.org/~doug/shootout/
* from Stephen Weeks
*)
fun for (start, stop, f) =
let
fun loop i =
if i > stop
then ()
else (f i; loop (i + 1))
in
loop start
end
fun incr r = r := 1 + !r
val sub = Array.sub
val update = Array.update
signature HASH_SET =
sig
type 'a t
val foreach: 'a t * ('a -> unit) -> unit
(* lookupOrInsert (s, h, p, f) looks in the set s for an entry with hash h
* satisfying predicate p. If the entry is there, it is returned.
* Otherwise, the function f is called to create a new entry, which is
* inserted and returned.
*)
val lookupOrInsert: 'a t * word * ('a -> bool) * (unit -> 'a) -> 'a
val new: {hash: 'a -> word} -> 'a t
val size: 'a t -> int
end
structure HashSet: HASH_SET =
struct
datatype 'a t =
T of {buckets: 'a list array ref,
hash: 'a -> word,
mask: word ref,
numItems: int ref}
val initialSize: int = 65536
val initialMask: word = Word.fromInt initialSize - 0w1
fun 'a new {hash}: 'a t =
T {buckets = ref (Array.array (initialSize, [])),
hash = hash,
numItems = ref 0,
mask = ref initialMask}
fun size (T {numItems, ...}) = !numItems
fun numBuckets (T {buckets, ...}) = Array.length (!buckets)
fun index (w: word, mask: word): int =
Word.toInt (Word.andb (w, mask))
fun resize (T {buckets, hash, mask, ...}, size: int, newMask: word): unit =
let
val newBuckets = Array.array (size, [])
in Array.app (fn r =>
List.app (fn a =>
let val j = index (hash a, newMask)
in Array.update
(newBuckets, j,
a :: Array.sub (newBuckets, j))
end) r) (!buckets)
; buckets := newBuckets
; mask := newMask
end
fun maybeGrow (s as T {buckets, mask, numItems, ...}): unit =
let
val n = Array.length (!buckets)
in if !numItems * 4 > n
then resize (s,
n * 2,
Word.orb (0w1, Word.<< (!mask, 0w1)))
else ()
end
fun peekGen (T {buckets = ref buckets, mask, ...}, w, p, no, yes) =
let
val j = index (w, !mask)
val b = Array.sub (buckets, j)
in case List.find p b of
NONE => no (j, b)
| SOME a => yes a
end
fun lookupOrInsert (table as T {buckets, numItems, ...}, w, p, f) =
let
fun no (j, b) =
let val a = f ()
val _ = incr numItems
val _ = Array.update (!buckets, j, a :: b)
val _ = maybeGrow table
in a
end
in peekGen (table, w, p, no, fn x => x)
end
fun foreach (T {buckets, ...}, f) =
Array.app (fn r => List.app f r) (!buckets)
end
structure Buffer:
sig
type t
val add: t * Word8.word -> unit
val clear: t -> unit
val contents: t -> string
val new: int -> t
end =
struct
datatype t = T of {elts: Word8Array.array ref,
size: int ref}
fun contents (T {elts, size, ...}) =
Byte.bytesToString (Word8Array.extract (!elts, 0, SOME (!size)))
fun clear (T {size, ...}) = size := 0
fun new (bufSize) =
T {elts = ref (Word8Array.array (bufSize, 0w0)),
size = ref 0}
fun add (T {elts, size}, x) =
let
val s = !size
val _ = size := s + 1
val a = !elts
val n = Word8Array.length a
in
if s = n
then
let
val a' =
Word8Array.tabulate
(2 * n, fn i =>
if i < n then Word8Array.sub (a, i) else 0w0)
val _ = elts := a'
val _ = Word8Array.update (a', s, x)
in ()
end
else Word8Array.update (a, s, x)
end
end
structure Quicksort:
sig
val quicksort: 'a array * ('a * 'a -> bool) -> unit
end =
struct
fun assert (s, f: unit -> bool) =
if true orelse f ()
then ()
else raise Fail (concat ["assert: ", s])
fun forall (low, high, f) =
let
fun loop i = i > high orelse (f i andalso loop (i + 1))
in
loop low
end
fun fold (l, u, state, f) =
let
fun loop (i, state) =
if i > u
then state
else loop (i + 1, f (i, state))
in
loop (l, state)
end
fun 'a isSorted (a: 'a array,
lo: int,
hi: int,
op <= : 'a * 'a -> bool) =
let
fun loop (i, x) =
i > hi
orelse let
val y = sub (a, i)
in
x <= y andalso loop (i + 1, y)
end
in
lo >= hi orelse loop (lo + 1, sub (a, lo))
end
local
open Word
val seed = ref 0w13
in
fun rand () =
let
val res = 0w1664525 * !seed + 0w1013904223
val _ = seed := res
in
toIntX res
end
end
fun randInt (lo, hi) = lo + Int.mod (rand(), hi - lo + 1)
fun insertionSort (a: 'a array, op <= : 'a * 'a -> bool): unit =
let
fun x i = sub (a, i)
in
for (1, Array.length a - 1, fn i =>
let
val _ =
assert ("insertionSort1", fn () =>
isSorted (a, 0, i - 1, op <=))
val t = x i
fun sift (j: int) =
(assert ("insertionSort2", fn () =>
isSorted (a, 0, j - 1, op <=)
andalso isSorted (a, j + 1, i, op <=)
andalso forall (j + 1, i, fn k => t <= x k))
; if j > 0
then
let
val j' = j - 1
val z = x j'
in if t <= z
then (update (a, j, z);
sift j')
else j
end
else j)
val _ = update (a, sift i, t)
in ()
end)
end
fun 'a quicksort (a: 'a array, op <= : 'a * 'a -> bool): unit =
let
fun x i = Array.sub (a, i)
fun swap (i, j) =
let
val t = x i
val _ = update (a, i, x j)
val _ = update (a, j, t)
in ()
end
val cutoff = 20
fun qsort (l: int, u: int): unit =
if u - l > cutoff
then
let
val _ = swap (l, randInt (l, u))
val t = x l
val m =
fold
(l + 1, u, l, fn (i, m) =>
(assert
("qsort", fn () =>
forall (l + 1, m, fn k => x k <= t)
andalso forall (m + 1, i - 1, fn k => not (x k <= t)))
; if x i <= t
then (swap (m + 1, i)
; m + 1)
else m))
val _ = swap (l, m)
val _ = qsort (l, m - 1)
val _ = qsort (m + 1, u)
in ()
end
else ()
val max = Array.length a - 1
val _ = qsort (0, max)
val _ = insertionSort (a, op <=)
in
()
end
end
structure Test : sig
val main : (string * string list) -> OS.Process.status
end = struct
(* This hash function is taken from pages 56-57 of
* The Practice of Programming by Kernighan and Pike.
*)
fun hash (s: string): word =
let
val n = String.size s
fun loop (i, w) =
if i = n
then w
else Word.fromInt (Char.ord (String.sub (s, i))) + Word.* (w, 0w31)
in
loop (0, 0w0)
end
fun hash (s: string): word =
let
val n = String.size s
fun loop (i, w) =
if i = n
then w
else loop (i + 1,
Word.fromInt (Char.ord (String.sub (s, i)))
+ Word.* (w, 0w31))
in
loop (0, 0w0)
end
val max = 4096
val buf = Word8Array.array (max, 0w0)
val count: {hash: word,
word: string,
count: int ref} HashSet.t = HashSet.new {hash = #hash}
val wbuf = Buffer.new 64
val c2b = Byte.charToByte
fun scan_words (i, n, inword) =
if i < n
then
let
val c = Word8Array.sub (buf, i)
in
if c2b #"a" <= c andalso c <= c2b #"z"
then (Buffer.add (wbuf, c);
scan_words (i + 1, n, true))
else
if c2b #"A" <= c andalso c <= c2b #"Z"
then
(Buffer.add (wbuf, c + 0w32);
scan_words (i + 1, n, true))
else
if inword
then
let
val w = Buffer.contents wbuf
val h = hash w
in
incr (#count
(HashSet.lookupOrInsert
(count, h,
fn {hash, word, ...} =>
hash = h andalso word = w,
fn () => {hash = h, word = w, count = ref 0})));
Buffer.clear wbuf;
scan_words (i + 1, n, false)
end
else scan_words (i + 1, n, false)
end
else
let
val nread =
Posix.IO.readArr (Posix.FileSys.stdin,
{buf = buf, i = 0, sz = NONE})
in
if nread = 0
then ()
else scan_words (0, nread, inword)
end
fun printl [] = print "\n" | printl(h::t) = ( print h ; printl t )
fun rightJustify (s: string, width: int) =
let
val n = String.size s
in concat [CharVector.tabulate (width - n, fn _ => #" "), s]
end
fun main (name, args) =
let
val _ = scan_words (0, 0, false)
val a = Array.array (HashSet.size count, (0, ""))
val i = ref 0
val _ = HashSet.foreach (count, fn {word, count, ...} =>
(Array.update (a, !i, (!count, word)); incr i))
val _ = Quicksort.quicksort (a, fn ((c, w), (c', w')) =>
c > c' orelse c = c' andalso w >= w')
val _ = Array.app (fn (c, w) =>
printl [rightJustify (Int.toString c, 7), "\t", w]) a
in
OS.Process.success
end
end
val _ = SMLofNJ.exportFn("wordfreq", Test.main);
|
wordfreq.tcl |
#!/usr/local/bin/tclsh
# $Id: wordfreq.tcl,v 1.11 2001/05/17 00:49:34 doug Exp $
# http://www.bagley.org/~doug/shootout/
# with help from: Tom Wilkason and Branko Vesligaj
proc main {} {
while {1} {
set data [read stdin 4096]
if {[string equal $data {}]} {break}
if {[gets stdin extra] >= 0} {
append data $extra
}
regsub -all {[^[:alpha:]]+} $data { } line
foreach word [string tolower $line] {
if {[catch {incr count($word)}]} {
set count($word) 1
}
}
}
foreach {word cnt} [array get count] {
lappend lines [format "%7d\t%s" $cnt $word]
}
puts [join [lsort -decreasing $lines] "\n"]
}
main
|
wordfreq.vbscript |
<job>
<script language=JScript runat=server>
function Descending(a, b) {
return ((b > a) ? 1 : ((b < a) ? -1 : 0));
}
function SortVBArray(arrVBArray) {
return arrVBArray.toArray().sort(Descending).join('@');
}
</script>
<script language=VBScript>
Function SortArray(arrInput)
SortArray = Split(SortVBArray(arrInput), "@")
End Function
Set Count = CreateObject("Scripting.Dictionary")
Blob = WScript.StdIn.ReadAll
Lines = Split(Blob, vbCrLf)
For Each L in Lines
Line = Trim(LCase(L))
For B = 1 To Len(Line)
C = Asc(Mid(Line, B, 1))
If C <> Asc(" ") And (C < Asc("a") Or C > Asc("z")) Then
'WSCript.Echo(Line)
'WScript.Echo(String(B-1, " ") & "^")
Line = Left(Line, B-1) & " " & Mid(Line, B+1)
'WSCript.Echo(Line)
'WScript.Echo(String(B-1, " ") & "^")
End If
Next
Words = Split(Line, " ")
For Each Word in Words
If Word <> " " And Word <> "" Then
If Count.Exists(Word) Then
Count.Item(Word) = Count.Item(Word) + 1
Else
Count.Item(Word) = 1
End If
End If
Next
Next
K = Count.Keys
Redim Lines(Count.Count-1)
For A = 0 To Count.Count-1
N = CStr(Count.Item(K(A)))
If Len(N) < 7 Then N = String(7-Len(N), " ") & N
Lines(A) = N & Chr(9) & K(A)
Next
SortedLines = SortArray(Lines)
For A = LBound(SortedLines) To UBound(SortedLines)
WScript.Echo(SortedLines(A))
Next
</script>
</job>
|
wordfreq.vc |
/* -*- mode: c -*-
* $Id: wordfreq.gcc,v 1.4 2001/01/07 19:24:51 doug Exp $
* http://www.bagley.org/~doug/shootout/
*/
#include <stdio.h>
#include <ctype.h>
#include <malloc.h>
#include <stdlib.h>
#include <string.h>
#include "simple_hash.vc.h"
typedef int (*comparator)(const void *, const void *);
int cmp_hash(struct ht_node **a, struct ht_node **b) {
int val = (*b)->val - (*a)->val;
return((val == 0) ? strcmp((*b)->key, (*a)->key) : val);
}
int main() {
int bufsize = 80;
char *buf = (char *)malloc(bufsize + 1);
char c;
int i = 0;
struct ht_ht *ht = ht_create(75000);
struct ht_node **sort_array, **sort_tmp, *node;
while ((c = getchar()) > 0) {
if (isalpha(c)) {
buf[i++] = tolower(c);
if (i == bufsize) {
bufsize *= 2;
buf = realloc(buf, bufsize + 1);
}
} else {
if (i > 0) {
buf[i] = '\0';
++(ht_find_new(ht, buf)->val);
i = 0;
}
}
}
free(buf);
sort_array = sort_tmp =
malloc(sizeof(struct ht_node *) * ht_count(ht));
for (node=ht_first(ht); (*sort_tmp++ = node) != 0; node=ht_next(ht)) ;
qsort(sort_array, ht_count(ht), sizeof(struct ht_node *),
(comparator)cmp_hash);
for (i=0; i<ht_count(ht); i++)
printf("%7d\t%s\n", ht_val(sort_array[i]), ht_key(sort_array[i]));
ht_destroy(ht);
return(0);
}
|