Word Frequency Count Back to the Win32 Shootout
Back to dada's perl lab

[The Original Shootout]   [NEWS]   [FAQ]   [Methodology]   [Platform Details]   [Acknowledgements]   [Scorecard]  
All Source For Word Frequency Count
wordfreq.awka
# $Id: wordfreq.gawk,v 1.2 2001/05/20 06:13:00 doug Exp $
# http://www.bagley.org/~doug/shootout/

BEGIN {
    delete ARGV;
    FS = "[^A-Za-z][^A-Za-z]*";
}
{
    for (i=1; i<=NF; i++) {
    freq[tolower($(i))]++;
    }
}
END {
    # gawk doesn't have a builtin sort routine
    # so we have to pipe through the shell sort program
    sort = "/bin/sort -nr"
    for (word in freq) {
    if (word) {
        printf "%7d\t%s\n", freq[word], word | sort
    }
    }
    close(sort)
}
wordfreq.bcc
/* -*- mode: c -*-
 * $Id: wordfreq.gcc,v 1.4 2001/01/07 19:24:51 doug Exp $
 * http://www.bagley.org/~doug/shootout/
 */

#include <stdio.h>
#include <ctype.h>
#include <malloc.h>
#include <stdlib.h>
#include <string.h>
#define inline
#include "simple_hash.h"

typedef int (*comparator)(const void *, const void *);

int cmp_hash(struct ht_node **a, struct ht_node **b) {
    int val = (*b)->val - (*a)->val;
    return((val == 0) ? strcmp((*b)->key, (*a)->key) : val);
}

int main() {
    int bufsize = 80;
    char *buf = (char *)malloc(bufsize + 1);
    char c;
    int i = 0;
    struct ht_ht *ht = ht_create(75000);
    struct ht_node **sort_array, **sort_tmp, *node;

    while ((c = getchar()) > 0) {
        if (isalpha(c)) {
            buf[i++] = tolower(c);
        if (i == bufsize) {
        bufsize *= 2;
        buf = realloc(buf, bufsize + 1);
        } 
        } else {
        if (i > 0) {
        buf[i] = '\0';
        ++(ht_find_new(ht, buf)->val);
        i = 0;
        }
        }
    }
    free(buf);

    sort_array = sort_tmp =
    malloc(sizeof(struct ht_node *) * ht_count(ht));

    for (node=ht_first(ht); (*sort_tmp++ = node) != 0; node=ht_next(ht)) ;

    qsort(sort_array, ht_count(ht), sizeof(struct ht_node *),
      (comparator)cmp_hash);

    for (i=0; i<ht_count(ht); i++)
    printf("%7d\t%s\n", ht_val(sort_array[i]), ht_key(sort_array[i])); 

    ht_destroy(ht);
    return(0);
}
wordfreq.cygperl
#!/usr/local/bin/perl
# $Id: wordfreq.perl,v 1.13 2001/05/16 23:46:40 doug Exp $
# http://www.bagley.org/~doug/shootout/

# Tony Bowden suggested using tr versus lc and split(/[^a-z]/)

use strict;

my %count = ();
while (read(STDIN, $_, 4095) and $_ .= <STDIN>) {
    tr/A-Za-z/ /cs;
    ++$count{$_} foreach split(' ', lc $_);
}

my @lines = ();
my ($w, $c);
push(@lines, sprintf("%7d\t%s\n", $c, $w)) while (($w, $c) = each(%count));
print sort { $b cmp $a } @lines;
wordfreq.delphi
program wordfreq;


uses
  SysUtils, Classes, Windows,
  simpleHash in 'simpleHash.pas';

const
  cWhiteSpace = [#0..#255]-['a'..'z','A'..'Z'];
  BUF_SIZE = 4096;

var handle: THandle;
    buf: array[1..BUF_SIZE+1] of char;
    s,w: string;
    words: TStringHash;
    forSort: TList;
    numRead,i,wStart: cardinal;
    p: PHashEntry;

  procedure addWord(const s: string);
  var w: string; p: PHashEntry;
  begin
    w:=lowercase(s);
    p:=words.get(w);
    if p<>nil then
      inc(p^.Value)
    else
      words.add(w,1);
  end;

  function sortFunc(Item1, Item2: Pointer): Integer;
  begin
    with PHashEntry(Item1)^ do begin
      if Value<PHashEntry(Item2)^.Value then
        result:=1
      else if Value>PHashEntry(Item2)^.Value then
        result:=-1
      else begin // values equal
        if Key=PHashEntry(Item2)^.Key then
          result:=0
        else if Key<PHashEntry(Item2)^.Key then
          result:=1
        else
          result:=-1;
      end;
    end;
  end;

begin
  handle:=GetStdHandle(STD_INPUT_HANDLE);
  words:=TStringHash.Create;

  s:='';
  buf[BUF_SIZE+1]:=#0;
  numRead:=fileRead(handle,buf,BUF_SIZE);
  while numRead>0 do begin
    i:=1;
    if (s<>'')and(buf[1] in cWhiteSpace) then begin
      addWord(s); s:='';
    end;
    repeat
      while (buf[i] in cWhiteSpace)and(i<=numRead) do
        inc(i);
      if i>numRead then
        break; // repeat
      wStart:=i;
      repeat
        inc(i);
      until (buf[i] in cWhiteSpace);
      setString(w,PChar(addr(buf[wStart])),i-wStart);
      if s<>'' then begin
        w:=s+w; s:='';
      end;
      if i<=numRead then
        addWord(w)
      else // word exceeding buffer
        s:=w;
    until (i>numRead);
    numRead:=fileRead(handle,buf,BUF_SIZE);
  end;
  if (s<>'') then
    addWord(s);

  forSort:=TList.Create;
  forSort.Capacity:=words.Count;
  p:=words.getFirst;
  while p<>nil do begin
    forSort.Add(p);
    p:=words.getNext;
  end;

  forSort.Sort(sortFunc);

  for i:=0 to forSort.Count-1 do begin
    p:=PHashEntry(forSort.Items[i]);
    write(format('%7d'#9'%s'#13#10,[p^.Value,p^.Key]));
  end;

  words.Free; forSort.Free;
end.
wordfreq.erlang
%%% -*- mode: erlang -*-
%%% $Id: wordfreq.erlang,v 1.8 2000/12/31 22:54:17 doug Exp $
%%% http://www.bagley.org/~doug/shootout/

%% Use ETS tables (Erlang's associative store).

-module(wordfreq).
-export([main/0, main/1]).

%% ignore program argument
main() -> main(['1']).
main(Args) ->
    OutL = fun ({Word, Count}) ->
        io:format("~7w\t~s~n", [Count, Word])
       end,
    lists:foreach(OutL, sortedfreqs()),
    halt(0).


% sort the results, descending
sortedfreqs() -> 
    Port = open_port({fd, 0, 1}, [eof, {line, 512}]),
    WordCountList = count_words_from_stream(ets:new(freqtab, [ordered_set]), Port),
    lists:reverse(lists:keysort(2, WordCountList)).


count_words_from_stream(Table, Port) ->
    receive
    {Port, eof} ->
        ets:delete(Table, ''),
        ets:tab2list(Table);
    {Port, {_, {_, Line}}} ->
        count_words([], Line, Table),
        count_words_from_stream(Table, Port)
    end.

count_word(Word, Table) ->
    WordAtom = list_to_atom(Word),
    case (catch ets:update_counter(Table, WordAtom, 1)) of
    {'EXIT', {badarg, _}} ->
        ets:insert(Table, {WordAtom, 1});
    _ ->
        true
    end.

% count_words(Word_Accumulator, Line_of_Chars, Table)
count_words([], [], Table) -> true;
count_words(Word, [], Table) ->
    count_word(Word, Table);
count_words(Word, [H|T], Table) when H >= $a, H=< $z ->
    NewWord = lists:append(Word, [H]),
    count_words(NewWord, T, Table);
count_words(Word, [H|T], Table) when H >= $A, H=< $Z ->
    NewWord = lists:append(Word, [(H - $A) + $a]),
    count_words(NewWord, T, Table);
% we hit a non-word character so count previous word and continue
count_words(Word, [H|T], Table) ->
    count_word(Word, Table),
    count_words([], T, Table).
wordfreq.gawk
# $Id: wordfreq.gawk,v 1.2 2001/05/20 06:13:00 doug Exp $
# http://www.bagley.org/~doug/shootout/

BEGIN {
    delete ARGV;
    FS = "[^A-Za-z][^A-Za-z]*";
}
{
    for (i=1; i<=NF; i++) {
    freq[tolower($(i))]++;
    }
}
END {
    # gawk doesn't have a builtin sort routine
    # so we have to pipe through the shell sort program
    sort = "/bin/sort -nr"
    for (word in freq) {
    if (word) {
        printf "%7d\t%s\n", freq[word], word | sort
    }
    }
    close(sort)
}
wordfreq.gcc
/* -*- mode: c -*-
 * $Id: wordfreq.gcc,v 1.4 2001/01/07 19:24:51 doug Exp $
 * http://www.bagley.org/~doug/shootout/
 */

#include <stdio.h>
#include <ctype.h>
#include <malloc.h>
#include <stdlib.h>
#include <string.h>
#include "simple_hash.h"

typedef int (*comparator)(const void *, const void *);

int cmp_hash(struct ht_node **a, struct ht_node **b) {
    int val = (*b)->val - (*a)->val;
    return((val == 0) ? strcmp((*b)->key, (*a)->key) : val);
}

int main() {
    int bufsize = 80;
    char *buf = (char *)malloc(bufsize + 1);
    char c;
    int i = 0;
    struct ht_ht *ht = ht_create(75000);
    struct ht_node **sort_array, **sort_tmp, *node;

    while ((c = getchar()) > 0) {
        if (isalpha(c)) {
            buf[i++] = tolower(c);
        if (i == bufsize) {
        bufsize *= 2;
        buf = realloc(buf, bufsize + 1);
        } 
        } else {
        if (i > 0) {
        buf[i] = '\0';
        ++(ht_find_new(ht, buf)->val);
        i = 0;
        }
        }
    }
    free(buf);

    sort_array = sort_tmp =
    malloc(sizeof(struct ht_node *) * ht_count(ht));

    for (node=ht_first(ht); (*sort_tmp++ = node) != 0; node=ht_next(ht)) ;

    qsort(sort_array, ht_count(ht), sizeof(struct ht_node *),
      (comparator)cmp_hash);

    for (i=0; i<ht_count(ht); i++)
    printf("%7d\t%s\n", ht_val(sort_array[i]), ht_key(sort_array[i])); 

    ht_destroy(ht);
    return(0);
}
wordfreq.gforth
\ -*- mode: forth -*-
\ $Id: wordfreq.gforth,v 1.1 2001/05/30 17:49:08 doug Exp $
\ http://www.bagley.org/~doug/shootout/
\ from Anton Ertl:

wordlist constant word-counts
create word-pointers 10000 cells allot
variable endwp word-pointers endwp !
1024 constant max-line
create line max-line 2 + allot

struct
    cell% field    wf-count
    cell% 2* field wf-name
end-struct wf%

: count-word 
    2dup word-counts search-wordlist if
    1 swap >body +! 2drop
    else
    nextname get-current word-counts set-current create set-current
    here endwp @ tuck ! cell+ endwp !
    1 , last @ name>string 2,
    endif ;

: no-letter? 
    dup 'a < swap 'z > or ; 

: process-word 
    2dup u< if
    over - count-word
    else
    2drop
    endif ;

: process-line 
    bounds 2dup ?do 
    i c@ $20 or dup i c! 
    no-letter? if 
        i process-word  i 1+ 
    endif
    loop 
    swap process-word ;

: process-file 
    >r begin
    line max-line r@ read-line throw
    while
    line swap process-line
    repeat
    rdrop ;

: output 
    endwp @ word-pointers ?do
    i @ dup wf-count @ 7 .r #tab emit wf-name 2@ type cr
    cell +loop ;

: wf< 
    over wf-count @ over wf-count @ 2dup = if
    2drop >r wf-name 2@ r> wf-name 2@ compare 0>
    else
    u> nip nip
    endif ;

 cell- -1 cells ,

: partition 
    \ partition array addr1 u1 into all elements less than pivot and all
    \ others, addr1 u2 and addr3 u3 are the two partitions.
    \ lessthan-xt ( elemptr1 elemptr2 -- f ) compares the two elements
    { lessthan-xt }
    over @ { pivot }
    begin 
    2dup u<
    while
    begin
        pivot over @ lessthan-xt execute
    while
        cell-
    repeat
    swap over @ over !
    begin 
        2dup u>
    while
        pivot over @ lessthan-xt execute 0=
    while
        cell+
    repeat then
    swap over @ over !
    repeat
    drop pivot over ! ;

: sort1  recursive
    >r begin
    2dup u<
    while
    2dup r@ partition 
    rot over cell- r@ sort1
    cell+ swap
    repeat
    rdrop 2drop ;

stdin process-file word-pointers endwp @ cell- ' wf< sort1 output bye
wordfreq.ghc
-- $Id: wordfreq.ghc,v 1.2 2001/02/27 04:04:35 doug Exp $
-- http://www.bagley.org/~doug/shootout/
-- from Julian Assange

-- compile with:
-- ghc -O -package data wordfreq.hs -o wordfreq

module Main where
import List(sortBy)
import Char(toLower,isLower)
import FiniteMap(fmToList,emptyFM,addToFM_C)

main = interact $ unlines . pretty . sort . fmToList . makemap . cwords . lower
 where
       pretty l  = [pad 7 (show n) ++ "\t" ++ w | (w,n) <- l]
                   where
                         pad n s = replicate (n - length s) ' ' ++ s

       sort      = sortBy (\(w0,n0) (w1,n1) -> case compare n1 n0 of
                                               EQ -> compare w1 w0
                           x  -> x)

       makemap   = addFM emptyFM
               where addFM fm [] = fm
                 addFM fm (x:xs) = addFM (addToFM_C (+) fm x 1) xs

       cwords s  = case dropWhile (not . isLower) s of
                "" -> []
                        s' -> w : (cwords s'')
                              where (w, s'') = span isLower s' 

       lower     = map toLower
wordfreq.guile
#!/usr/local/bin/guile \
-e main -s
!#

;;; $Id: wordfreq.guile,v 1.2 2001/06/29 23:12:37 doug Exp $
;;; http://www.bagley.org/~doug/shootout/
;;; from Brad Knotwell

(use-modules (ice-9 string-fun) (ice-9 common-list))
(use-modules (ice-9 format))

(define my-hash (make-hash-table 4000))

(define (print-sorted-hash) 
  (define (display-elt elt)
    (display (format "~7D\t~a\n" (car elt) (cdr elt))))
  (map display-elt 
       (sort-list (hash-fold (lambda (x y z) (cons (cons y x) z))  '() my-hash)
          (lambda (x y) (or (>; (car x) (car y))
                    (and (= (car x) (car y))
                     (string>;=? (cdr x) (cdr y))))))))
(define (load-hash x . tl)
  (define (do-entry entry)
    (let ((entry-val (hash-ref my-hash entry)))
      (hash-set! my-hash entry (if entry-val (1+ entry-val) 1))))
  (map do-entry (remove-if (lambda (x) (string=? x "")) (cons x tl))))

(define (main args)
  (do ((line (read-line) (read-line)))
      ((eof-object? line) (print-sorted-hash))
    (separate-fields-discarding-char 
     #\space 
     (list->;string (map (lambda (x) (if (char-alphabetic? x) x #\space)) 
            (string->;list (string-downcase line)))) 
     load-hash)))
wordfreq.ici
// $Id: wordfreq.ici,v 1.0 2003/01/03 12:23:00 dada Exp $
// http://dada.perl.it/shootout
//
// contributed by Tim Long

static counts = struct();

static
tolower(s)
{
    s = explode(s);
    forall (c, i in s)
    {
        if (c >= 'A' && c <= 'Z')
            s[i] += 'a' - 'A';
    }
    return implode(s);
}

while (l = getline())
{
    forall (w in smash(l, #\w+#, "\\&"))
    {
        if (w ~ #[A-Z]#)
            w = tolower(w);
        if (counts[w] == NULL)
            counts[w] = 1;
        else
            ++counts[w];
    }
}

out = array();
forall (c, w in counts)
    push(out, sprintf("%7d\t%s\n", c, w));
sort(out, [func(a, b){return a > b ? -1 : a < b;}]);
put(implode(out));
wordfreq.icon
# -*- mode: icon -*-
# $Id: wordfreq.icon,v 1.1 2000/12/25 06:58:04 doug Exp $
# http://www.bagley.org/~doug/shootout/

procedure main(argv)
    local wcount, buckets, w, c, pair, wl

    wcount := table(0)
    every wcount[words()] +:= 1

    buckets := table()
    every w := key(wcount) do {
    c := wcount[w]
    / buckets[c] := list()
    push(buckets[c], w)
    }
    buckets := sort(buckets,1)
    while pair := pull(buckets) do {
     c := pair[1]
     wl := sort(pair[2],1)
    while (write(right(c,7), "\t", pull(wl)))
    }
end

procedure words()
    local line, word
    while line := read() do {
    line := map(line)
    line ? while tab(upto(&letters)) do {
        word := tab(many(&letters))
        suspend(word)
    }
    }
end
wordfreq.java
// $Id: wordfreq.java,v 1.3 2000/12/17 21:40:53 doug Exp $
// http://www.bagley.org/~doug/shootout/
// Collection class code is from my friend Phil Chu, Thanks Phil!

import java.io.*;
import java.util.*;
import java.text.*;

class Counter {
    int count = 1; 
}

public class wordfreq {

    public static void main(String[] args) {
    wf();
    }

    public static String padleft(String s,int n,char c) {
        int len = s.length();
        if( len>=n ) return s;
        char[] buf = new char[n];
        for( int i=0;i<n-len;i++ ) buf[i]=c;
        s.getChars(0,len,buf,n-len);
        return new String(buf);
    }
  
    public static void wf() {
        HashMap map = new HashMap();
        try {
            Reader r = new BufferedReader(new InputStreamReader(System.in));
            StreamTokenizer st = new StreamTokenizer(r);
            st.lowerCaseMode(true);
            st.whitespaceChars( 0, 64 );
            st.wordChars(65, 90);
            st.whitespaceChars( 91, 96 );
            st.wordChars(97, 122);
            st.whitespaceChars( 123, 255 );
            int tt = st.nextToken();
            while (tt != StreamTokenizer.TT_EOF) {
                if (tt == StreamTokenizer.TT_WORD) {
                    if (map.containsKey(st.sval)) {
                        ((Counter)map.get(st.sval)).count++;
                    } else {
                        map.put(st.sval, new Counter());
                    }
                }
                tt = st.nextToken();
            }
        } catch (IOException e) {
            System.err.println(e);
            return;
        }

        Collection entries = map.entrySet();
    // flatten the entries set into a vector for sorting
    Vector rev_wf = new Vector(entries); 

        // Sort the vector according to its value
        Collections.sort(rev_wf, new Comparator() {
        public int compare(Object o1, Object o2) {
            // First sort by frequency
            int c = ((Counter)((Map.Entry)o2).getValue()).count - ((Counter)((Map.Entry)o1).getValue()).count;
            if (c == 0) { // Second sort by lexicographical order
            c = ((String)((Map.Entry)o2).getKey()).compareTo((String)((Map.Entry)o1).getKey());
            }
            return c;
        }
        }
             );

        Iterator it = rev_wf.iterator();
    Map.Entry ent;
    String word;
    int count;
        while(it.hasNext()) {
            ent = (Map.Entry)it.next();
        word = ((String)ent.getKey());
        count = ((Counter)ent.getValue()).count;
        System.out.println(padleft(Integer.toString(count),7,' ') + "\t" + word);
        }
    }
}
wordfreq.lcc
/* -*- mode: c -*-
 * $Id: wordfreq.gcc,v 1.4 2001/01/07 19:24:51 doug Exp $
 * http://www.bagley.org/~doug/shootout/
 */

#include <stdio.h>
#include <ctype.h>
#include <malloc.h>
#include <stdlib.h>
#include <string.h>
#include "simple_hash.h"

typedef int (*comparator)(const void *, const void *);

int cmp_hash(struct ht_node **a, struct ht_node **b) {
    int val = (*b)->val - (*a)->val;
    return((val == 0) ? strcmp((*b)->key, (*a)->key) : val);
}

int main() {
    int bufsize = 80;
    char *buf = (char *)malloc(bufsize + 1);
    char c;
    int i = 0;
    struct ht_ht *ht = ht_create(75000);
    struct ht_node **sort_array, **sort_tmp, *node;

    while ((c = getchar()) > 0) {
        if (isalpha(c)) {
            buf[i++] = tolower(c);
        if (i == bufsize) {
        bufsize *= 2;
        buf = realloc(buf, bufsize + 1);
        } 
        } else {
        if (i > 0) {
        buf[i] = '\0';
        ++(ht_find_new(ht, buf)->val);
        i = 0;
        }
        }
    }
    free(buf);

    sort_array = sort_tmp =
    malloc(sizeof(struct ht_node *) * ht_count(ht));

    for (node=ht_first(ht); (*sort_tmp++ = node) != 0; node=ht_next(ht)) ;

    qsort(sort_array, ht_count(ht), sizeof(struct ht_node *),
      (comparator)cmp_hash);

    for (i=0; i<ht_count(ht); i++)
    printf("%7d\t%s\n", ht_val(sort_array[i]), ht_key(sort_array[i])); 

    ht_destroy(ht);
    return(0);
}
wordfreq.lua
-- $Id: wordfreq.lua,v 1.3 2000/12/21 03:20:30 doug Exp $
-- http://www.bagley.org/~doug/shootout/
-- implemented by: Roberto Ierusalimschy

-- this version reads 4K chunks of input at a time

local words = {}   -- list of all words (for sorting)
local count = {}   -- count occurrences of each word

BUFSIZE = 2^12

while 1 do
  local lines, rest = read(BUFSIZE, "*l")
  if lines == nil then break end
  lines = lines..(rest or '')    -- ensures whole lines
  gsub(strlower(lines), "(%l+)", function (w)
    local cw = %count[w]
    if cw == nil then     -- first occurrence?
      cw = 0
      tinsert(%words, w)
    end
    %count[w] = cw + 1
  end)
end

sort(words, function (a,b)
  return  %count[a] > %count[b]  or
         (%count[a] == %count[b] and a > b)
end)

for i=1,getn(words) do
  local w = words[i]
  write(format("%7d\t%s\n", count[w], w))
end
wordfreq.lua5
-- $Id: wordfreq.lua,v 1.3 2000/12/21 03:20:30 doug Exp $
-- http://www.bagley.org/~doug/shootout/
-- contributed by Roberto Ierusalimschy

-- this version reads 4K chunks of input at a time

local words = {}   -- list of all words (for sorting)
local count = {}   -- count occurrences of each word

local BUFSIZE = 2^12

while true do
  local lines, rest = io.read(BUFSIZE, "*l")
  if lines == nil then break end
  lines = lines..(rest or '')    -- ensures whole lines
  for w in string.gfind(string.lower(lines), "(%l+)") do
    local cw = count[w]
    if not cw then     -- first occurrence?
      cw = 0
      table.insert(words, w)
    end
    count[w] = cw + 1
  end
end

table.sort(words, function (a,b)
  return  count[a] > count[b]  or (count[a] == count[b] and a > b)
end)

for i=1,table.getn(words) do
  local w = words[i]
  io.write(string.format("%7d\t%s\n", count[w], w))
end



wordfreq.mawk
# $Id: wordfreq.mawk,v 1.2 2001/05/20 06:13:00 doug Exp $
# http://www.bagley.org/~doug/shootout/

BEGIN {
    delete ARGV;
    FS = "[^A-Za-z][^A-Za-z]*";
}
{
    for (i=1; i<=NF; i++) {
    freq[tolower($(i))]++;
    }
}
END {
    # gawk doesn't have a builtin sort routine
    # so we have to pipe through the shell sort program
    sort = "/bin/sort -nr"
    for (word in freq) {
    if (word) {
        printf "%7d\t%s\n", freq[word], word | sort
    }
    }
    close(sort)
}
wordfreq.mercury
% ---------------------------------------------------------------------------- %
% wordfreq.m
% Ralph Becket <rbeck@microsoft.com>
% Tue Jan  9 15:56:12 GMT 2001
% vim: ts=4 sw=4 et tw=0 wm=0 ff=unix
%
% ---------------------------------------------------------------------------- %

:- module mytest.

:- interface.

:- import_module io.



:- pred main(io__state, io__state).
:- mode main(di, uo) is det.



:- implementation.



:- import_module string, int, list, require, std_util, char, map.



main -->
    count_word_freqs(map__init, FreqMap),
    { X = list__sort(list__map(func(K-V) = V-K, map__to_assoc_list(FreqMap))) },
    { Y = list__reverse(X) },
    io__write_list(Y, "\n", write_freq_word),
    io__nl.



:- pred count_word_freqs(map(string,int),map(string,int),io__state,io__state).
:- mode count_word_freqs(in, out, di, uo) is det.

count_word_freqs(FreqMap0, FreqMap) -->
    io__read_line_as_string(Result),
    (
        { Result = ok(Line) },
        { Words  = string__words(isnt(char__is_alpha),string__to_lower(Line)) },
        { Update =
            ( func(S, FM) = 
                ( if map__search(FM, S, N)
                  then FM ^ elem(S) := N + 1
                  else FM ^ elem(S) := 1
                )
            )
        },
        { FreqMap1 = list__foldl(Update, Words, FreqMap0) },
        count_word_freqs(FreqMap1, FreqMap)
    ;
        { Result = eof },
        { FreqMap = FreqMap0 }
    ;
        { Result = error(ErrNo) },
        { error(io__error_message(ErrNo)) }
    ).



:- pred write_freq_word(pair(int, string), io__state, io__state).
:- mode write_freq_word(in, di, uo) is det.

write_freq_word(Freq - Word) -->
    io__format("%7d\t%s", [i(Freq), s(Word)]).
wordfreq.mingw32
/* -*- mode: c -*-
 * $Id: wordfreq.gcc,v 1.4 2001/01/07 19:24:51 doug Exp $
 * http://www.bagley.org/~doug/shootout/
 */

#include <stdio.h>
#include <ctype.h>
#include <malloc.h>
#include <stdlib.h>
#include <string.h>
#include "simple_hash.h"

typedef int (*comparator)(const void *, const void *);

int cmp_hash(struct ht_node **a, struct ht_node **b) {
    int val = (*b)->val - (*a)->val;
    return((val == 0) ? strcmp((*b)->key, (*a)->key) : val);
}

int main() {
    int bufsize = 80;
    char *buf = (char *)malloc(bufsize + 1);
    char c;
    int i = 0;
    struct ht_ht *ht = ht_create(75000);
    struct ht_node **sort_array, **sort_tmp, *node;

    while ((c = getchar()) > 0) {
        if (isalpha(c)) {
            buf[i++] = tolower(c);
        if (i == bufsize) {
        bufsize *= 2;
        buf = realloc(buf, bufsize + 1);
        } 
        } else {
        if (i > 0) {
        buf[i] = '\0';
        ++(ht_find_new(ht, buf)->val);
        i = 0;
        }
        }
    }
    free(buf);

    sort_array = sort_tmp =
    malloc(sizeof(struct ht_node *) * ht_count(ht));

    for (node=ht_first(ht); (*sort_tmp++ = node) != 0; node=ht_next(ht)) ;

    qsort(sort_array, ht_count(ht), sizeof(struct ht_node *),
      (comparator)cmp_hash);

    for (i=0; i<ht_count(ht); i++)
    printf("%7d\t%s\n", ht_val(sort_array[i]), ht_key(sort_array[i])); 

    ht_destroy(ht);
    return(0);
}
wordfreq.ocaml
(*
 * $Id: wordfreq.ocaml,v 1.8 2001/05/26 01:48:48 doug Exp $
 * http://www.bagley.org/~doug/shootout/
 *)

let max = 4096
let buf = String.create max
let count = Hashtbl.create 75000
let wbuf = Buffer.create 64

let rec scan_words i n inword =
  if i < n then
    let c = buf.[i] in
    if 'a' <= c && c <= 'z' then begin
      Buffer.add_char wbuf c;
      scan_words (i+1) n true
    end
    else if 'A' <= c && c <= 'Z' then begin
      Buffer.add_char wbuf (Char.unsafe_chr(Char.code c + 32));
      scan_words (i+1) n true
    end
    else if inword then begin
      let word = Buffer.contents wbuf in
      begin try incr (Hashtbl.find count word)
      with Not_found -> Hashtbl.add count word (ref 1) end;
      Buffer.clear wbuf;
      scan_words (i+1) n false
    end else
      scan_words (i+1) n false
  else
    let nread = input stdin buf 0 max in
    if nread = 0 then () else scan_words 0 nread inword

let _ =
  scan_words 0 0 false;
  let out_lines = ref [] in
  let to_list l w c =
    l := (Printf.sprintf "%7d\t%s" !c w) :: !l in
  Hashtbl.iter (to_list out_lines) count;
  List.iter print_endline (List.sort (fun a b -> compare b a) !out_lines)

wordfreq.ocamlb
(*
 * $Id: wordfreq.ocaml,v 1.8 2001/05/26 01:48:48 doug Exp $
 * http://www.bagley.org/~doug/shootout/
 *)

let max = 4096
let buf = String.create max
let count = Hashtbl.create 75000
let wbuf = Buffer.create 64

let rec scan_words i n inword =
  if i < n then
    let c = buf.[i] in
    if 'a' <= c && c <= 'z' then begin
      Buffer.add_char wbuf c;
      scan_words (i+1) n true
    end
    else if 'A' <= c && c <= 'Z' then begin
      Buffer.add_char wbuf (Char.unsafe_chr(Char.code c + 32));
      scan_words (i+1) n true
    end
    else if inword then begin
      let word = Buffer.contents wbuf in
      begin try incr (Hashtbl.find count word)
      with Not_found -> Hashtbl.add count word (ref 1) end;
      Buffer.clear wbuf;
      scan_words (i+1) n false
    end else
      scan_words (i+1) n false
  else
    let nread = input stdin buf 0 max in
    if nread = 0 then () else scan_words 0 nread inword

let _ =
  scan_words 0 0 false;
  let out_lines = ref [] in
  let to_list l w c =
    l := (Printf.sprintf "%7d\t%s" !c w) :: !l in
  Hashtbl.iter (to_list out_lines) count;
  List.iter print_endline (List.sort (fun a b -> compare b a) !out_lines)

wordfreq.perl
#!/usr/local/bin/perl
# $Id: wordfreq.perl,v 1.13 2001/05/16 23:46:40 doug Exp $
# http://www.bagley.org/~doug/shootout/

# Tony Bowden suggested using tr versus lc and split(/[^a-z]/)

use strict;

my %count = ();
while (read(STDIN, $_, 4095) and $_ .= <STDIN>) {
    tr/A-Za-z/ /cs;
    ++$count{$_} foreach split(' ', lc $_);
}

my @lines = ();
my ($w, $c);
push(@lines, sprintf("%7d\t%s\n", $c, $w)) while (($w, $c) = each(%count));
print sort { $b cmp $a } @lines;
wordfreq.pike
#!/usr/local/bin/pike// -*- mode: pike -*-
// $Id: wordfreq.pike,v 1.3 2000/12/05 16:04:07 doug Exp $
// http://www.bagley.org/~doug/shootout/
// from: Fredrik Noring

void main()
{
    mapping(string:int) dictionary = ([]);
    string buffer = "";

    array(string) f = filter(map(enumerate(128),
                   lambda(int i)
                   {
                       return !('A' <= i && i <= 'Z' ||
                        'a' <= i && i <= 'z' ||
                        i == ' ') &&
                          sprintf("%c", i);
                   }), `!=, 0);
    array(string) t = allocate(sizeof(f), " ");

    for(;;)
    {
      string data =
          buffer + replace(lower_case(Stdio.stdin.read(4096)), f, t);
      
      if(!sizeof(data))
          break;
      
      array(string) words = data/" ";

      if(1 < sizeof(words) && sizeof(words[-1]))
          buffer = words[-1],
           words = words[..sizeof(words)-2];
      else
          buffer = "";

      foreach(words, string word)
          dictionary[word]++;
    }
    
    m_delete(dictionary, "");

    mapping(int:array(string)) revdictionary = ([]);
    array(string) words = indices(dictionary);
    array(int) freqs = values(dictionary);

    for(int i = 0; i < sizeof(dictionary); i++)
      revdictionary[freqs[i]] += ({ words[i] });

    freqs = sort(indices(revdictionary));
    for(int i = sizeof(freqs)-1; 0 <= i; i--)
    {
      int freq = freqs[i];
      words = sort(revdictionary[freq]);
      
      for(int j = sizeof(words)-1; 0 <= j; j--)
          write("%7d\t%s\n", freq, words[j]);
    }
}
wordfreq.pliant
# $Id: wordfreq.pliant,v 1.1 2002/03/18 14:07:00 dada Exp $
# http://dada.perl.it/shootout/

module "/pliant/language/unsafe.pli"
module "/pliant/language/context.pli"
module "/pliant/language/stream.pli"
module "/pliant/language/stream/pipe.pli"

function heapsort n ra
  arg Int n ; arg_rw Array:Str ra
  var Str rra
  var Int i
  var Int j
  var Int l := (n\2) + 1
  var Int ir := n
  
  part heapsort_loop
    if l>1
      l := l - 1
      rra := ra:l
    else
      rra := ra:ir
      ra:ir := ra:1
      ir := ir - 1
      if ir=1
        ra:1 := rra
        leave heapsort_loop
    i := l
    j := l*2

    while j<=ir
      if j<ir and ra:j > ra:(j+1)
        j := j + 1
      if rra > ra:j
        ra:i := ra:j
        i := j
        j := j + i
      else
        j := ir + 1
    ra:i := rra
    restart heapsort_loop

gvar Stream stdin

gvar Str line := ""
gvar Str l
gvar Str word
gvar Address Buf
gvar Int ReadCount
gvar CBool ok
gvar Int ReadSize := 4096
gvar Int eolpos
gvar Int i
gvar Int nl := 0
gvar Int nw := 0
gvar Int nc := 0
gvar Char ch
gvar Int j
gvar Array:Str lines

gvar (Dictionary Str Int) count
gvar Pointer:Int v
gvar Str k

stdin open "handle:0" in

while not stdin:atend
  line := stdin readline
  i := 0
  while i<line:len
    ch := line:i
    if ch>="a" and ch<="z" or ch>="A" and ch<="Z"
      j := i+1
      part find_word
        if j<line:len
          ch := line:j
          if ch>="a" and ch<="z" or ch>="A" and ch<="Z"
            j += 1              
            restart find_word          
      word := lower (line i j-i)
      if (count exists word)
        count:word += 1
      else
        count insert word 1        
      i += j-i
    else
      i += 1

lines += ""
v :> count first
k := count key v    
while addressof:v<>null
  if addressof:(count first k)<>null
    l := (string count:k)
    l := (repeat 7-l:len " ")+l
    line := l+"[tab]"+k+"[lf]"
    lines += line
    # console k "=" count:k eol
  v :> count next v
  if addressof:v<>null
    k := count key v

heapsort lines:size-1 lines
for i 0 lines:size-1
  console lines:i

wordfreq.poplisp
;;; -*- mode: lisp -*-
;;; $Id: wordfreq.cmucl,v 1.2 2001/06/05 22:23:23 doug Exp $
;;; http://www.bagley.org/~doug/shootout/
;;; From Bulent Murtezaoglu, based on code from Andrew McDowell

;;; The Block/Buffering I/O code is adapted from material posted to comp.lang.lisp
;;; by Thomas Kirk <tk@research.att.com>.
;;; Archived here: http://www.cs.cmu.edu/afs/cs/project/ai-repository/ai/lang/lisp/code/io/fast_io/fast_io.txt
;;;

(declaim (optimize (speed 3) (debug 0) (safety 0) (space 0) (compilation-speed 0)))


  (let* ((start 0)
     (current 0)
     (end 0)
     (eof nil)
     (inputbuffer (make-array 4096 :element-type '(unsigned-byte 8))))
    (declare (type (simple-array (unsigned-byte 8)) inputbuffer)
         (fixnum start end)
         (inline code-char)
         (type (unsigned-byte 8) current))
    (labels 
    ((get-char () ;;don't do this at home works through side effect unless eof
           (when (= start end)
             (setf start 0)
             (setf end (read-sequence inputbuffer *standard-input*))
             (when (zerop end)
               (setf eof t)
               (return-from get-char nil)))
           (setf current (aref inputbuffer start))
           (incf start))
    
      (word-constituent-p () ;; side effects current rets t if word-constituent
                 (or (and (>;= current 97) (<= current 122)) 
                   (and (>;= current 65) (<= current 90) 
                    (incf current 32))))
     (reader (buffer)
         (declare (type simple-base-string buffer))
         (let* ((size (length buffer))
            (fill-ptr 0))
           (declare (type (integer 0 1000)  size fill-ptr))
           (loop while (and (or (get-char) (return-from reader buffer))
                    (not (word-constituent-p))))
           (loop
             (when (= fill-ptr size)
               (let ((new-buffer
                  (make-string (the fixnum 
                         (setf size (the fixnum (+ fill-ptr 5)))))))
             (setf (subseq (the simple-base-string new-buffer) 0 fill-ptr)
                   buffer)
             (setf buffer new-buffer)))
             (setf (schar buffer fill-ptr) (code-char current))
             (incf fill-ptr)
             (if (get-char)
             (if (not (word-constituent-p))
                 (return))
               (return)))
           (lisp::shrink-vector buffer fill-ptr))))

      (let* ((h (make-hash-table :test 'equal :size 3380 :rehash-threshold 1.0))
         (buffer (make-string 10)))
    (declare (inline gethash maphash))
    (declare (type simple-base-string buffer))
    (loop
      (setf buffer (reader buffer))
      (if (not eof)
          (if (= 1 (the fixnum (incf (the fixnum (gethash buffer h 0)))))
          (setf buffer (make-string 10)));; only cons if used
        (return)))
    ;; hast table => list
    (let ((l '()))
      (maphash #'(lambda (key val)
               (push (cons key val) l))
           h)
      ;; sort the list
      (setf l (sort l #'(lambda (v1 v2)
                  (if (>; (the fixnum (cdr v1)) (the fixnum (cdr v2)))
                  t
                (if (= (the fixnum (cdr v1)) (the fixnum (cdr v2)))
                    (string-lessp (car v2) (car v1))
                  nil)))))
      
      ;; output the list of pairs 
      (mapcar #'(lambda (p)
              (format t "~7D    ~A~&" (cdr p) (car p))) 
          l)))))
wordfreq.python
#!/usr/local/bin/python
# $Id: wordfreq.python,v 1.9 2001/05/11 17:44:00 doug Exp $
# http://www.bagley.org/~doug/shootout/
#
# adapted from Bill Lear's original python word frequency counter
#
# Joel Rosdahl suggested using translate table to speed up
# word splitting.  That change alone sped this program up by
# at least a factor of 3.
#
# with further speedups from Mark Baker

import sys

def main():
    count = {}
    i_r = map(chr, range(256))

    trans = [' '] * 256
    o_a, o_z = ord('a'), (ord('z')+1)
    trans[ord('A'):(ord('Z')+1)] = i_r[o_a:o_z]
    trans[o_a:o_z] = i_r[o_a:o_z]
    trans = ''.join(trans)

    rl = sys.stdin.readlines

    lines = rl(4095)
    while lines:
        for line in lines:
            for word in line.translate(trans).split():
                try:
                    count[word] += 1
                except KeyError:
                    count[word] = 1
        lines = rl(4095)

    l = zip(count.values(), count.keys())
    l.sort()
    l.reverse()

    print '\n'.join(["%7s\t%s" % (count, word) for (count, word) in l])

main()
wordfreq.rexx

count. = 0
ws = ''
transtab = XRANGE('A', 'Z') || XRANGE('a', 'z')
transin  = XRANGE('A', 'Z')
transout = XRANGE('a', 'z')
notword  = XRANGE('00'x, D2C(C2D('a')-1)) || XRANGE(D2C(C2D('z')+1), 'FF'x)
DO UNTIL LINES() = 0
    PARSE LINEIN L

    L = TRANSLATE(TRANSLATE(L, transout, transin), '', notword, ' ')

    DO i = 1 TO WORDS(L)
        w = STRIP(WORD(L, i))
        count.w = count.w + 1
        IF count.w = 1 THEN ws = ws w
    END    
END

STREAM('rexx_tmp', 'C', 'CREATE')

DO i = 1 to WORDS(ws)
    w = WORD(ws, i)
    IF count.w > 0 THEN DO
        line = format(count.w, 7) || '09'x || w
        CALL LINEOUT('rexx_tmp', line)
    END
END

'c:\cygwin\bin\sort -nr rexx_tmp >rexx_sorted'

DO WHILE LINES('rexx_sorted') > 0
    SAY LINEIN('rexx_sorted')
END

/*
STREAM('rexx_tmp', 'C', 'CLOSE')
STREAM('rexx_sorted', 'C', 'CLOSE')

'del rexx_tmp'
'del rexx_sorted'
*/
wordfreq.ruby
#!/usr/local/bin/ruby
# -*- mode: ruby -*-
# $Id: wordfreq.ruby,v 1.9 2001/05/16 23:46:40 doug Exp $
# http://www.bagley.org/~doug/shootout/

freq = Hash.new(0)
loop {
    data = (STDIN.read(4095) or break) << (STDIN.gets || "")
    for word in data.downcase.tr_s('^A-Za-z',' ').split(' ')
    freq[word] += 1
    end
}
freq.delete("")

lines = Array.new
freq.each{|w,c| lines << sprintf("%7d\t%s\n", c, w) }
print lines.sort.reverse
wordfreq.slang
% $Id: wordfreq.slang,v 1.0 2003/01/03 14:43:00 dada Exp $
% http://dada.perl.it/shootout/
%
% contributed by John E. Davis

define main()
{
   variable count = Assoc_Type[Int_Type, 0];
   foreach (stdin) using ("line")
     {
    variable line = ();
    foreach (strtok (strlow (line), "^a-z"))
      {
         variable word = ();
         count [word] += 1;
      }
     }
   
   variable words = assoc_get_keys (count);
   variable freqs = assoc_get_values (count);

   variable i = array_sort (array_map (String_Type, &sprintf, "%12d", freqs)
                + words);
   i = i[[-1:0:-1]];
   
   foreach (i)
     {
    i = ();
    ()=fprintf (stdout, "%7d\t%s\n", freqs[i], words[i]);
     }
}

main();
wordfreq.smlnj
(* -*- mode: sml -*-
 * $Id: wordfreq.smlnj,v 1.3 2001/07/09 00:25:29 doug Exp $
 * http://www.bagley.org/~doug/shootout/
 * from Stephen Weeks
 *)

fun for (start, stop, f) =
   let
      fun loop i =
     if i > stop
        then ()
     else (f i; loop (i + 1))
   in
      loop start
   end
fun incr r = r := 1 + !r
val sub = Array.sub
val update = Array.update
   
signature HASH_SET =
   sig
      type 'a t

      val foreach: 'a t * ('a -> unit) -> unit
      (* lookupOrInsert (s, h, p, f)  looks in the set s for an entry with hash h
       * satisfying predicate p.  If the entry is there, it is returned.
       * Otherwise, the function f is called to create a new entry, which is
       * inserted and returned.
       *)
      val lookupOrInsert: 'a t * word * ('a -> bool) * (unit -> 'a) -> 'a
      val new: {hash: 'a -> word} -> 'a t
      val size: 'a t -> int
   end

structure HashSet: HASH_SET =
struct

datatype 'a t =
   T of {buckets: 'a list array ref,
     hash: 'a -> word,
     mask: word ref,
     numItems: int ref}

val initialSize: int = 65536
val initialMask: word = Word.fromInt initialSize - 0w1

fun 'a new {hash}: 'a t =
   T {buckets = ref (Array.array (initialSize, [])),
      hash = hash,
      numItems = ref 0,
      mask = ref initialMask}

fun size (T {numItems, ...}) = !numItems
fun numBuckets (T {buckets, ...}) = Array.length (!buckets)

fun index (w: word, mask: word): int =
   Word.toInt (Word.andb (w, mask))
   
fun resize (T {buckets, hash, mask, ...}, size: int, newMask: word): unit =
   let
      val newBuckets = Array.array (size, [])
   in Array.app (fn r =>
         List.app (fn a =>
                   let val j = index (hash a, newMask)
                   in Array.update
                  (newBuckets, j,
                   a :: Array.sub (newBuckets, j))
                   end) r) (!buckets)
      ; buckets := newBuckets
      ; mask := newMask
   end
              
fun maybeGrow (s as T {buckets, mask, numItems, ...}): unit =
   let
      val n = Array.length (!buckets)
   in if !numItems * 4 > n
     then resize (s,
              n * 2,
              
              Word.orb (0w1, Word.<< (!mask, 0w1)))
      else ()
   end

fun peekGen (T {buckets = ref buckets, mask, ...}, w, p, no, yes) =
   let
      val j = index (w, !mask)
      val b = Array.sub (buckets, j)
   in case List.find p b of
      NONE => no (j, b)
    | SOME a => yes a
   end

fun lookupOrInsert (table as T {buckets, numItems, ...}, w, p, f) =
   let
      fun no (j, b) =
     let val a = f ()
        val _ = incr numItems
        val _ = Array.update (!buckets, j, a :: b)
        val _ = maybeGrow table
     in a
     end
   in peekGen (table, w, p, no, fn x => x)
   end

fun foreach (T {buckets, ...}, f) =
   Array.app (fn r => List.app f r) (!buckets)

end

structure Buffer:
   sig
      type t

      val add: t * Word8.word -> unit
      val clear: t -> unit
      val contents: t -> string
      val new: int -> t
   end =
   struct
      datatype t = T of {elts: Word8Array.array ref,
             size: int ref}

      fun contents (T {elts, size, ...}) =
     Byte.bytesToString (Word8Array.extract (!elts, 0, SOME (!size)))

      fun clear (T {size, ...}) = size := 0

      fun new (bufSize) =
     T {elts = ref (Word8Array.array (bufSize, 0w0)),
        size = ref 0}

      fun add (T {elts, size}, x) =
     let
        val s = !size
        val _ = size := s + 1
        val a = !elts
        val n = Word8Array.length a
     in
        if s = n
           then
          let
             val a' =
            Word8Array.tabulate
            (2 * n, fn i =>
             if i < n then Word8Array.sub (a, i) else 0w0)
             val _ = elts := a'
             val _ = Word8Array.update (a', s, x)
          in ()
          end
        else Word8Array.update (a, s, x)
     end
   end

structure Quicksort:
   sig
      val quicksort: 'a array * ('a * 'a -> bool) -> unit
   end =
   struct
      fun assert (s, f: unit -> bool) =
     if true orelse f ()
        then ()
     else raise Fail (concat ["assert: ", s])

      fun forall (low, high, f) =
     let
        fun loop i = i > high orelse (f i andalso loop (i + 1))
     in
        loop low
     end

      fun fold (l, u, state, f) =
     let
        fun loop (i, state) =
           if i > u
          then state
           else loop (i + 1, f (i, state))
     in
        loop (l, state)
     end

      
      fun 'a isSorted (a: 'a array,
               lo: int,
               hi: int,
               op <= : 'a * 'a -> bool) =
     let
        fun loop (i, x) =
           i > hi
           orelse let
             val y = sub (a, i)
              in
             x <= y andalso loop (i + 1, y)
              end
     in
        lo >= hi orelse loop (lo + 1, sub (a, lo))
     end

      
      local
     open Word
     val seed = ref 0w13
      in
     fun rand () =
        let
           val res = 0w1664525 * !seed + 0w1013904223
           val _ = seed := res
        in
           toIntX res
        end
      end

      fun randInt (lo, hi) = lo + Int.mod (rand(), hi - lo + 1)

      
      fun insertionSort (a: 'a array, op <= : 'a * 'a -> bool): unit =
     let
        fun x i = sub (a, i)
     in
        for (1, Array.length a - 1, fn i =>
         let
            val _ =
               assert ("insertionSort1", fn () =>
                   isSorted (a, 0, i - 1, op <=))
            val t = x i
            fun sift (j: int) =
               (assert ("insertionSort2", fn () =>
                isSorted (a, 0, j - 1, op <=)
                andalso isSorted (a, j + 1, i, op <=)
                andalso forall (j + 1, i, fn k => t <= x k))
            ; if j > 0
                 then
                let
                   val j' = j - 1
                   val z = x j'
                in if t <= z
                      then (update (a, j, z);
                        sift j')
                   else j
                end
              else j)
            val _ = update (a, sift i, t)
         in ()
         end)
     end

      
      fun 'a quicksort (a: 'a array, op <= : 'a * 'a -> bool): unit =
     let
        fun x i = Array.sub (a, i)
        fun swap (i, j) =
           let
          val t = x i
          val _ = update (a, i, x j)
          val _ = update (a, j, t)
           in ()
           end
        val cutoff = 20
        fun qsort (l: int, u: int): unit =
           if u - l > cutoff
          then
             let
            val _ = swap (l, randInt (l, u))
            val t = x l
            val m =
               fold
               (l + 1, u, l, fn (i, m) =>
                (assert
                 ("qsort", fn () =>
                  forall (l + 1, m, fn k => x k <= t)
                  andalso forall (m + 1, i - 1, fn k => not (x k <= t)))
                 ; if x i <= t
                  then (swap (m + 1, i)
                    ; m + 1)
                   else m))
            val _ = swap (l, m)
            val _ = qsort (l, m - 1)
            val _ = qsort (m + 1, u)
             in ()
             end
           else ()
        val max = Array.length a - 1
        val _ = qsort (0, max)
        val _ = insertionSort (a, op <=)  
     in
        ()
     end
   end

structure Test : sig
    val main : (string * string list) -> OS.Process.status
end = struct

(* This hash function is taken from pages 56-57 of
 * The Practice of Programming by Kernighan and Pike.
 *)
fun hash (s: string): word =
   let
      val n = String.size s
      fun loop (i, w) =
     if i = n
        then w
     else Word.fromInt (Char.ord (String.sub (s, i))) + Word.* (w, 0w31)
   in
      loop (0, 0w0)
   end

fun hash (s: string): word =
   let
      val n = String.size s
      fun loop (i, w) =
     if i = n
        then w
     else loop (i + 1,
            Word.fromInt (Char.ord (String.sub (s, i)))
               + Word.* (w, 0w31))
   in
      loop (0, 0w0)
   end



val max = 4096
val buf = Word8Array.array (max, 0w0)
val count: {hash: word,
        word: string,
        count: int ref} HashSet.t = HashSet.new {hash = #hash}
val wbuf = Buffer.new 64

val c2b = Byte.charToByte
fun scan_words (i, n, inword) =
  if i < n
     then
    let
       val c = Word8Array.sub (buf, i)
    in
       if c2b #"a" <= c andalso c <= c2b #"z"
          then (Buffer.add (wbuf, c);
            scan_words (i + 1, n, true))
       else
          if c2b #"A" <= c andalso c <= c2b #"Z"
         then
            (Buffer.add (wbuf, c + 0w32);
             scan_words (i + 1, n, true))
          else
         if inword
            then 
               let
              val w = Buffer.contents wbuf
              val h = hash w
               in
              incr (#count
                (HashSet.lookupOrInsert
                 (count, h,
                  fn {hash, word, ...} =>
                  hash = h andalso word = w,
                  fn () => {hash = h, word = w, count = ref 0})));
              Buffer.clear wbuf;
              scan_words (i + 1, n, false)
               end
         else scan_words (i + 1, n, false)
    end
  else
     let
    val nread =
       Posix.IO.readArr (Posix.FileSys.stdin,
                 {buf = buf, i =  0, sz = NONE})
     in
    if nread = 0
       then ()
    else scan_words (0, nread, inword)
     end

fun printl [] = print "\n" | printl(h::t) = ( print h ; printl t )

fun rightJustify (s: string, width: int) =
   let
      val n = String.size s
   in concat [CharVector.tabulate (width - n, fn _ => #" "), s]
   end

fun main (name, args) =
   let
    val _ = scan_words (0, 0, false)
    val a = Array.array (HashSet.size count, (0, ""))
    val i = ref 0
    val _ = HashSet.foreach (count, fn {word, count, ...} =>
             (Array.update (a, !i, (!count, word)); incr i))
    val _ = Quicksort.quicksort (a, fn ((c, w), (c', w')) =>
                 c > c' orelse c = c' andalso w >= w')
    val _ = Array.app (fn (c, w) =>
           printl [rightJustify (Int.toString c, 7), "\t", w]) a
   in
      OS.Process.success
   end
end

val _ = SMLofNJ.exportFn("wordfreq", Test.main);
wordfreq.tcl
#!/usr/local/bin/tclsh
# $Id: wordfreq.tcl,v 1.11 2001/05/17 00:49:34 doug Exp $
# http://www.bagley.org/~doug/shootout/
# with help from: Tom Wilkason and Branko Vesligaj

proc main {} {
    while {1} {
    set data [read stdin 4096]
    if {[string equal $data {}]} {break}
    if {[gets stdin extra] >= 0} {
        append data $extra
    }
    regsub -all  {[^[:alpha:]]+} $data { } line
    foreach word [string tolower $line] {
        if {[catch {incr count($word)}]} {
        set count($word) 1
        }
    }
    }
    foreach {word cnt}  [array get count] {
    lappend lines [format "%7d\t%s" $cnt $word]
    }
    puts [join [lsort -decreasing $lines] "\n"]
}

main

wordfreq.vbscript
<job>

<script language=JScript runat=server>

    function Descending(a, b) {
        return ((b > a) ? 1 : ((b < a) ? -1 : 0));
    }

    function SortVBArray(arrVBArray) {
        return arrVBArray.toArray().sort(Descending).join('@');
    }
</script>

<script language=VBScript>

Function SortArray(arrInput)
    SortArray = Split(SortVBArray(arrInput), "@")
End Function

Set Count = CreateObject("Scripting.Dictionary")

Blob = WScript.StdIn.ReadAll

Lines = Split(Blob, vbCrLf)

For Each L in Lines
    Line = Trim(LCase(L))
    For B = 1 To Len(Line)
        C = Asc(Mid(Line, B, 1))
        If C <> Asc(" ") And (C < Asc("a") Or C > Asc("z")) Then
            'WSCript.Echo(Line)
            'WScript.Echo(String(B-1, " ") & "^")
            Line = Left(Line, B-1) & " " & Mid(Line, B+1)
            'WSCript.Echo(Line)
            'WScript.Echo(String(B-1, " ") & "^")
        End If
    Next

    Words = Split(Line, " ")
    For Each Word in Words
        If Word <> " " And Word <> "" Then 
            If Count.Exists(Word) Then
                Count.Item(Word) = Count.Item(Word) + 1
            Else
                Count.Item(Word) = 1
            End If
        End If
    Next    
Next

K = Count.Keys
Redim Lines(Count.Count-1)

For A = 0 To Count.Count-1
    N = CStr(Count.Item(K(A)))
    If Len(N) < 7 Then N = String(7-Len(N), " ") & N
    Lines(A) = N & Chr(9) & K(A)
Next

SortedLines = SortArray(Lines)
For A = LBound(SortedLines) To UBound(SortedLines)
    WScript.Echo(SortedLines(A))
Next
    
</script>
</job>
wordfreq.vc
/* -*- mode: c -*-
 * $Id: wordfreq.gcc,v 1.4 2001/01/07 19:24:51 doug Exp $
 * http://www.bagley.org/~doug/shootout/
 */

#include <stdio.h>
#include <ctype.h>
#include <malloc.h>
#include <stdlib.h>
#include <string.h>
#include "simple_hash.vc.h"

typedef int (*comparator)(const void *, const void *);

int cmp_hash(struct ht_node **a, struct ht_node **b) {
    int val = (*b)->val - (*a)->val;
    return((val == 0) ? strcmp((*b)->key, (*a)->key) : val);
}

int main() {
    int bufsize = 80;
    char *buf = (char *)malloc(bufsize + 1);
    char c;
    int i = 0;
    struct ht_ht *ht = ht_create(75000);
    struct ht_node **sort_array, **sort_tmp, *node;

    while ((c = getchar()) > 0) {
        if (isalpha(c)) {
            buf[i++] = tolower(c);
        if (i == bufsize) {
        bufsize *= 2;
        buf = realloc(buf, bufsize + 1);
        } 
        } else {
        if (i > 0) {
        buf[i] = '\0';
        ++(ht_find_new(ht, buf)->val);
        i = 0;
        }
        }
    }
    free(buf);

    sort_array = sort_tmp =
    malloc(sizeof(struct ht_node *) * ht_count(ht));

    for (node=ht_first(ht); (*sort_tmp++ = node) != 0; node=ht_next(ht)) ;

    qsort(sort_array, ht_count(ht), sizeof(struct ht_node *),
      (comparator)cmp_hash);

    for (i=0; i<ht_count(ht); i++)
    printf("%7d\t%s\n", ht_val(sort_array[i]), ht_key(sort_array[i])); 

    ht_destroy(ht);
    return(0);
}