Word Frequency Count

Word Frequency Count

Back to the Win32 Shootout
Back to dada's perl lab

[The Original Shootout] [NEWS] [FAQ] [Methodology] [Platform Details] [Acknowledgements] [Scorecard]

All Source For Word Frequency Count
wordfreq.awka
# $Id: wordfreq.gawk,v 1.2 2001/05/20 06:13:00 doug Exp $ # http://www.bagley.org/~doug/shootout/ BEGIN { delete ARGV; FS = "[^A-Za-z][^A-Za-z]"; } { for* (i=1; i<=NF; i++) { freq[tolower($(i))]++; } } END { # gawk doesn't have a builtin sort routine # so we have to pipe through the shell sort program sort = "/bin/sort -nr" for (word in freq) { if (word) { printf "%7d\t%s\n", freq[word], word \| sort } } close(sort) }
wordfreq.bcc
/* -- mode: c -- * $Id: wordfreq.gcc,v 1.4 2001/01/07 19:24:51 doug Exp $ * http://www.bagley.org/~doug/shootout/ / #include <stdio.h> #include <ctype.h> #include <malloc.h> #include <stdlib.h> #include <string.h> #define inline #include "simple_hash.h" typedef* int (comparator)(const* void , const* void ); int* cmp_hash(struct ht_node a, struct ht_node b) { int val = (b)->val - (a)->val; return((val == 0) ? strcmp((b)->key, (a)->key) : val); } int main() { int bufsize = 80; char buf = (char* )malloc(bufsize + 1); char* c; int i = 0; struct ht_ht ht = ht_create(75000); struct* ht_node sort_array, sort_tmp, node; while* ((c = getchar()) > 0) { if (isalpha(c)) { buf[i++] = tolower(c); if (i == bufsize) { bufsize = 2; buf = realloc(buf, bufsize + 1); } } else* { if (i > 0) { buf[i] = '\0'; ++(ht_find_new(ht, buf)->val); i = 0; } } } free(buf); sort_array = sort_tmp = malloc(sizeof(struct ht_node ) ht_count(ht)); for (node=ht_first(ht); (sort_tmp++ = node) != 0; node=ht_next(ht)) ; qsort(sort_array, ht_count(ht), sizeof(struct* ht_node ), (comparator)cmp_hash); for* (i=0; i<ht_count(ht); i++) printf("%7d\t%s\n", ht_val(sort_array[i]), ht_key(sort_array[i])); ht_destroy(ht); return(0); }
wordfreq.cygperl
#!/usr/local/bin/perl # $Id: wordfreq.perl,v 1.13 2001/05/16 23:46:40 doug Exp $ # http://www.bagley.org/~doug/shootout/ # Tony Bowden suggested using tr versus lc and split(/[^a-z]/) use strict; my %count = (); while (read(STDIN, $_, 4095) and $_ .= <STDIN>) { tr/A-Za-z/ /cs; ++$count{$_} foreach split(' ', lc $_); } my @lines = (); my ($w, $c); push(@lines, sprintf("%7d\t%s\n", $c, $w)) while (($w, $c) = each(%count)); print sort { $b cmp $a } @lines;
wordfreq.delphi
program wordfreq; uses SysUtils, Classes, Windows, simpleHash in 'simpleHash.pas'; const cWhiteSpace = [#0..#255]-['a'..'z','A'..'Z']; BUF_SIZE = 4096; var handle: THandle; buf: array[1..BUF_SIZE+1] of char; s,w: string; words: TStringHash; forSort: TList; numRead,i,wStart: cardinal; p: PHashEntry; procedure addWord(const s: string); var w: string; p: PHashEntry; begin w:=lowercase(s); p:=words.get(w); if p<>nil then inc(p^.Value) else words.add(w,1); end; function sortFunc(Item1, Item2: Pointer): Integer; begin with PHashEntry(Item1)^ do begin if Value<PHashEntry(Item2)^.Value then result:=1 else if Value>PHashEntry(Item2)^.Value then result:=-1 else begin // values equal if Key=PHashEntry(Item2)^.Key then result:=0 else if Key<PHashEntry(Item2)^.Key then result:=1 else result:=-1; end; end; end; begin handle:=GetStdHandle(STD_INPUT_HANDLE); words:=TStringHash.Create; s:=''; buf[BUF_SIZE+1]:=#0; numRead:=fileRead(handle,buf,BUF_SIZE); while numRead>0 do begin i:=1; if (s<>'')and(buf[1] in cWhiteSpace) then begin addWord(s); s:=''; end; repeat while (buf[i] in cWhiteSpace)and(i<=numRead) do inc(i); if i>numRead then break; // repeat wStart:=i; repeat inc(i); until (buf[i] in cWhiteSpace); setString(w,PChar(addr(buf[wStart])),i-wStart); if s<>'' then begin w:=s+w; s:=''; end; if i<=numRead then addWord(w) else // word exceeding buffer s:=w; until (i>numRead); numRead:=fileRead(handle,buf,BUF_SIZE); end; if (s<>'') then addWord(s); forSort:=TList.Create; forSort.Capacity:=words.Count; p:=words.getFirst; while p<>nil do begin forSort.Add(p); p:=words.getNext; end; forSort.Sort(sortFunc); for i:=0 to forSort.Count-1 do begin p:=PHashEntry(forSort.Items[i]); write(format('%7d'#9'%s'#13#10,[p^.Value,p^.Key])); end; words.Free; forSort.Free; end.
wordfreq.erlang
%%% -- mode: erlang -- %%% $Id: wordfreq.erlang,v 1.8 2000/12/31 22:54:17 doug Exp $ %%% http://www.bagley.org/~doug/shootout/ %% Use ETS tables (Erlang's associative store). -module(wordfreq). -export([main/0, main/1]). %% ignore program argument main() -> main(['1']). main(Args) -> OutL = fun ({Word, Count}) -> io:format("~7w\t~s~n", [Count, Word]) end, lists:foreach(OutL, sortedfreqs()), halt(0). % sort the results, descending sortedfreqs() -> Port = open_port({fd, 0, 1}, [eof, {line, 512}]), WordCountList = count_words_from_stream(ets:new(freqtab, [ordered_set]), Port), lists:reverse(lists:keysort(2, WordCountList)). count_words_from_stream(Table, Port) -> receive {Port, eof} -> ets:delete(Table, ''), ets:tab2list(Table); {Port, {_, {_, Line}}} -> count_words([], Line, Table), count_words_from_stream(Table, Port) end. count_word(Word, Table) -> WordAtom = list_to_atom(Word), case (catch ets:update_counter(Table, WordAtom, 1)) of {'EXIT', {badarg, _}} -> ets:insert(Table, {WordAtom, 1}); _ -> true end. % count_words(Word_Accumulator, Line_of_Chars, Table) count_words([], [], Table) -> true; count_words(Word, [], Table) -> count_word(Word, Table); count_words(Word, [H\|T], Table) when H >= $a, H=< $z -> NewWord = lists:append(Word, [H]), count_words(NewWord, T, Table); count_words(Word, [H\|T], Table) when H >= $A, H=< $Z -> NewWord = lists:append(Word, [(H - $A) + $a]), count_words(NewWord, T, Table); % we hit a non-word character so count previous word and continue count_words(Word, [H\|T], Table) -> count_word(Word, Table), count_words([], T, Table).
wordfreq.gawk
# $Id: wordfreq.gawk,v 1.2 2001/05/20 06:13:00 doug Exp $ # http://www.bagley.org/~doug/shootout/ BEGIN { delete ARGV; FS = "[^A-Za-z][^A-Za-z]"; } { for* (i=1; i<=NF; i++) { freq[tolower($(i))]++; } } END { # gawk doesn't have a builtin sort routine # so we have to pipe through the shell sort program sort = "/bin/sort -nr" for (word in freq) { if (word) { printf "%7d\t%s\n", freq[word], word \| sort } } close(sort) }
wordfreq.gcc
/* -- mode: c -- * $Id: wordfreq.gcc,v 1.4 2001/01/07 19:24:51 doug Exp $ * http://www.bagley.org/~doug/shootout/ / #include <stdio.h> #include <ctype.h> #include <malloc.h> #include <stdlib.h> #include <string.h> #include "simple_hash.h" typedef* int (comparator)(const* void , const* void ); int* cmp_hash(struct ht_node a, struct ht_node b) { int val = (b)->val - (a)->val; return((val == 0) ? strcmp((b)->key, (a)->key) : val); } int main() { int bufsize = 80; char buf = (char* )malloc(bufsize + 1); char* c; int i = 0; struct ht_ht ht = ht_create(75000); struct* ht_node sort_array, sort_tmp, node; while* ((c = getchar()) > 0) { if (isalpha(c)) { buf[i++] = tolower(c); if (i == bufsize) { bufsize = 2; buf = realloc(buf, bufsize + 1); } } else* { if (i > 0) { buf[i] = '\0'; ++(ht_find_new(ht, buf)->val); i = 0; } } } free(buf); sort_array = sort_tmp = malloc(sizeof(struct ht_node ) ht_count(ht)); for (node=ht_first(ht); (sort_tmp++ = node) != 0; node=ht_next(ht)) ; qsort(sort_array, ht_count(ht), sizeof(struct* ht_node ), (comparator)cmp_hash); for* (i=0; i<ht_count(ht); i++) printf("%7d\t%s\n", ht_val(sort_array[i]), ht_key(sort_array[i])); ht_destroy(ht); return(0); }
wordfreq.gforth
\ -- mode: forth -- \ $Id: wordfreq.gforth,v 1.1 2001/05/30 17:49:08 doug Exp $ \ http://www.bagley.org/~doug/shootout/ \ from Anton Ertl: wordlist constant word-counts create word-pointers 10000 cells allot variable endwp word-pointers endwp ! 1024 constant max-line create line max-line 2 + allot struct cell% field wf-count cell% 2* field wf-name end-struct wf% : count-word 2dup word-counts search-wordlist if 1 swap >body +! 2drop else nextname get-current word-counts set-current create set-current here endwp @ tuck ! cell+ endwp ! 1 , last @ name>string 2, endif ; : no-letter? dup 'a < swap 'z > or ; : process-word 2dup u< if over - count-word else 2drop endif ; : process-line bounds 2dup ?do i c@ $20 or dup i c! no-letter? if i process-word i 1+ endif loop swap process-word ; : process-file >r begin line max-line r@ read-line throw while line swap process-line repeat rdrop ; : output endwp @ word-pointers ?do i @ dup wf-count @ 7 .r #tab emit wf-name 2@ type cr cell +loop ; : wf< over wf-count @ over wf-count @ 2dup = if 2drop >r wf-name 2@ r> wf-name 2@ compare 0> else u> nip nip endif ; cell- -1 cells , : partition \ partition array addr1 u1 into all elements less than pivot and all \ others, addr1 u2 and addr3 u3 are the two partitions. \ lessthan-xt ( elemptr1 elemptr2 -- f ) compares the two elements { lessthan-xt } over @ { pivot } begin 2dup u< while begin pivot over @ lessthan-xt execute while cell- repeat swap over @ over ! begin 2dup u> while pivot over @ lessthan-xt execute 0= while cell+ repeat then swap over @ over ! repeat drop pivot over ! ; : sort1 recursive >r begin 2dup u< while 2dup r@ partition rot over cell- r@ sort1 cell+ swap repeat rdrop 2drop ; stdin process-file word-pointers endwp @ cell- ' wf< sort1 output bye
wordfreq.ghc
-- $Id: wordfreq.ghc,v 1.2 2001/02/27 04:04:35 doug Exp $ -- http://www.bagley.org/~doug/shootout/ -- from Julian Assange -- compile with: -- ghc -O -package data wordfreq.hs -o wordfreq module Main where import List(sortBy) import Char(toLower,isLower) import FiniteMap(fmToList,emptyFM,addToFM_C) main = interact $ unlines . pretty . sort . fmToList . makemap . cwords . lower where pretty l = [pad 7 (show n) ++ "\t" ++ w \| (w,n) <- l] where pad n s = replicate (n - length s) ' ' ++ s sort = sortBy (\(w0,n0) (w1,n1) -> case compare n1 n0 of EQ -> compare w1 w0 x -> x) makemap = addFM emptyFM where addFM fm [] = fm addFM fm (x:xs) = addFM (addToFM_C (+) fm x 1) xs cwords s = case dropWhile (not . isLower) s of "" -> [] s' -> w : (cwords s'') where (w, s'') = span isLower s' lower = map toLower
wordfreq.guile
#!/usr/local/bin/guile \ -e main -s !# ;;; $Id: wordfreq.guile,v 1.2 2001/06/29 23:12:37 doug Exp $ ;;; http://www.bagley.org/~doug/shootout/ ;;; from Brad Knotwell (use-modules (ice-9 string-fun) (ice-9 common-list)) (use-modules (ice-9 format)) (define my-hash (make-hash-table 4000)) (define (print-sorted-hash) (define (display-elt elt) (display (format "~7D\t~a\n" (car elt) (cdr elt)))) (map display-elt (sort-list (hash-fold (lambda (x y z) (cons (cons y x) z)) '() my-hash) (lambda (x y) (or (>; (car x) (car y)) (and (= (car x) (car y)) (string>;=? (cdr x) (cdr y)))))))) (define (load-hash x . tl) (define (do-entry entry) (let ((entry-val (hash-ref my-hash entry))) (hash-set! my-hash entry (if entry-val (1+ entry-val) 1)))) (map do-entry (remove-if (lambda (x) (string=? x "")) (cons x tl)))) (define (main args) (do ((line (read-line) (read-line))) ((eof-object? line) (print-sorted-hash)) (separate-fields-discarding-char #\space (list->;string (map (lambda (x) (if (char-alphabetic? x) x #\space)) (string->;list (string-downcase line)))) load-hash)))
wordfreq.ici
// $Id: wordfreq.ici,v 1.0 2003/01/03 12:23:00 dada Exp $ // http://dada.perl.it/shootout // // contributed by Tim Long static counts = struct(); static tolower(s) { s = explode(s); forall (c, i in s) { if (c >= 'A' && c <= 'Z') s[i] += 'a' - 'A'; } return implode(s); } while (l = getline()) { forall (w in smash(l, #\w+#, "\\&")) { if (w ~ #[A-Z]#) w = tolower(w); if (counts[w] == NULL) counts[w] = 1; else ++counts[w]; } } out = array(); forall (c, w in counts) push(out, sprintf("%7d\t%s\n", c, w)); sort(out, [func(a, b){return a > b ? -1 : a < b;}]); put(implode(out));
wordfreq.icon
# -- mode: icon -- # $Id: wordfreq.icon,v 1.1 2000/12/25 06:58:04 doug Exp $ # http://www.bagley.org/~doug/shootout/ procedure main(argv) local wcount, buckets, w, c, pair, wl wcount := table(0) every wcount[words()] +:= 1 buckets := table() every w := key(wcount) do { c := wcount[w] / buckets[c] := list() push(buckets[c], w) } buckets := sort(buckets,1) while pair := pull(buckets) do { c := pair[1] wl := sort(pair[2],1) while (write(right(c,7), "\t", pull(wl))) } end procedure words() local line, word while line := read() do { line := map(line) line ? while tab(upto(&letters)) do { word := tab(many(&letters)) suspend(word) } } end
wordfreq.java
// $Id: wordfreq.java,v 1.3 2000/12/17 21:40:53 doug Exp $ // http://www.bagley.org/~doug/shootout/ // Collection class code is from my friend Phil Chu, Thanks Phil! import java.io.; import* java.util.; import* java.text.; class* Counter { int count = 1; } public class wordfreq { public static void main(String[] args) { wf(); } public static String padleft(String s,int n,char c) { int len = s.length(); if( len>=n ) return s; char[] buf = new char[n]; for( int i=0;i<n-len;i++ ) buf[i]=c; s.getChars(0,len,buf,n-len); return new String(buf); } public static void wf() { HashMap map = new HashMap(); try { Reader r = new BufferedReader(new InputStreamReader(System.in)); StreamTokenizer st = new StreamTokenizer(r); st.lowerCaseMode(true); st.whitespaceChars( 0, 64 ); st.wordChars(65, 90); st.whitespaceChars( 91, 96 ); st.wordChars(97, 122); st.whitespaceChars( 123, 255 ); int tt = st.nextToken(); while (tt != StreamTokenizer.TT_EOF) { if (tt == StreamTokenizer.TT_WORD) { if (map.containsKey(st.sval)) { ((Counter)map.get(st.sval)).count++; } else { map.put(st.sval, new Counter()); } } tt = st.nextToken(); } } catch (IOException e) { System.err.println(e); return; } Collection entries = map.entrySet(); // flatten the entries set into a vector for sorting Vector rev_wf = new Vector(entries); // Sort the vector according to its value Collections.sort(rev_wf, new Comparator() { public int compare(Object o1, Object o2) { // First sort by frequency int c = ((Counter)((Map.Entry)o2).getValue()).count - ((Counter)((Map.Entry)o1).getValue()).count; if (c == 0) { // Second sort by lexicographical order c = ((String)((Map.Entry)o2).getKey()).compareTo((String)((Map.Entry)o1).getKey()); } return c; } } ); Iterator it = rev_wf.iterator(); Map.Entry ent; String word; int count; while(it.hasNext()) { ent = (Map.Entry)it.next(); word = ((String)ent.getKey()); count = ((Counter)ent.getValue()).count; System.out.println(padleft(Integer.toString(count),7,' ') + "\t" + word); } } }
wordfreq.lcc
/* -- mode: c -- * $Id: wordfreq.gcc,v 1.4 2001/01/07 19:24:51 doug Exp $ * http://www.bagley.org/~doug/shootout/ / #include <stdio.h> #include <ctype.h> #include <malloc.h> #include <stdlib.h> #include <string.h> #include "simple_hash.h" typedef* int (comparator)(const* void , const* void ); int* cmp_hash(struct ht_node a, struct ht_node b) { int val = (b)->val - (a)->val; return((val == 0) ? strcmp((b)->key, (a)->key) : val); } int main() { int bufsize = 80; char buf = (char* )malloc(bufsize + 1); char* c; int i = 0; struct ht_ht ht = ht_create(75000); struct* ht_node sort_array, sort_tmp, node; while* ((c = getchar()) > 0) { if (isalpha(c)) { buf[i++] = tolower(c); if (i == bufsize) { bufsize = 2; buf = realloc(buf, bufsize + 1); } } else* { if (i > 0) { buf[i] = '\0'; ++(ht_find_new(ht, buf)->val); i = 0; } } } free(buf); sort_array = sort_tmp = malloc(sizeof(struct ht_node ) ht_count(ht)); for (node=ht_first(ht); (sort_tmp++ = node) != 0; node=ht_next(ht)) ; qsort(sort_array, ht_count(ht), sizeof(struct* ht_node ), (comparator)cmp_hash); for* (i=0; i<ht_count(ht); i++) printf("%7d\t%s\n", ht_val(sort_array[i]), ht_key(sort_array[i])); ht_destroy(ht); return(0); }
wordfreq.lua
-- $Id: wordfreq.lua,v 1.3 2000/12/21 03:20:30 doug Exp $ -- http://www.bagley.org/~doug/shootout/ -- implemented by: Roberto Ierusalimschy -- this version reads 4K chunks of input at a time local words = {} -- list of all words (for sorting) local count = {} -- count occurrences of each word BUFSIZE = 2^12 while 1 do local lines, rest = read(BUFSIZE, "l") if lines == nil* then break end lines = lines..(rest or '') -- ensures whole lines gsub(strlower(lines), "(%l+)", function (w) local cw = %count[w] if cw == nil then -- first occurrence? cw = 0 tinsert(%words, w) end %count[w] = cw + 1 end) end sort(words, function (a,b) return %count[a] > %count[b] or (%count[a] == %count[b] and a > b) end) for i=1,getn(words) do local w = words[i] write(format("%7d\t%s\n", count[w], w)) end
wordfreq.lua5
-- $Id: wordfreq.lua,v 1.3 2000/12/21 03:20:30 doug Exp $ -- http://www.bagley.org/~doug/shootout/ -- contributed by Roberto Ierusalimschy -- this version reads 4K chunks of input at a time local words = {} -- list of all words (for sorting) local count = {} -- count occurrences of each word local BUFSIZE = 2^12 while true do local lines, rest = io.read(BUFSIZE, "l") if lines == nil* then break end lines = lines..(rest or '') -- ensures whole lines for w in string.gfind(string.lower(lines), "(%l+)") do local cw = count[w] if not cw then -- first occurrence? cw = 0 table.insert(words, w) end count[w] = cw + 1 end end table.sort(words, function (a,b) return count[a] > count[b] or (count[a] == count[b] and a > b) end) for i=1,table.getn(words) do local w = words[i] io.write(string.format("%7d\t%s\n", count[w], w)) end
wordfreq.mawk
# $Id: wordfreq.mawk,v 1.2 2001/05/20 06:13:00 doug Exp $ # http://www.bagley.org/~doug/shootout/ BEGIN { delete ARGV; FS = "[^A-Za-z][^A-Za-z]"; } { for* (i=1; i<=NF; i++) { freq[tolower($(i))]++; } } END { # gawk doesn't have a builtin sort routine # so we have to pipe through the shell sort program sort = "/bin/sort -nr" for (word in freq) { if (word) { printf "%7d\t%s\n", freq[word], word \| sort } } close(sort) }
wordfreq.mercury
% ---------------------------------------------------------------------------- % % wordfreq.m % Ralph Becket <rbeck@microsoft.com> % Tue Jan 9 15:56:12 GMT 2001 % vim: ts=4 sw=4 et tw=0 wm=0 ff=unix % % ---------------------------------------------------------------------------- % :- module mytest. :- interface. :- import_module io. :- pred main(io__state, io__state). :- mode main(di, uo) is det. :- implementation. :- import_module string, int, list, require, std_util, char, map. main --> count_word_freqs(map__init, FreqMap), { X = list__sort(list__map(func(K-V) = V-K, map__to_assoc_list(FreqMap))) }, { Y = list__reverse(X) }, io__write_list(Y, "\n", write_freq_word), io__nl. :- pred count_word_freqs(map(string,int),map(string,int),io__state,io__state). :- mode count_word_freqs(in, out, di, uo) is det. count_word_freqs(FreqMap0, FreqMap) --> io__read_line_as_string(Result), ( { Result = ok(Line) }, { Words = string__words(isnt(char__is_alpha),string__to_lower(Line)) }, { Update = ( func(S, FM) = ( if map__search(FM, S, N) then FM ^ elem(S) := N + 1 else FM ^ elem(S) := 1 ) ) }, { FreqMap1 = list__foldl(Update, Words, FreqMap0) }, count_word_freqs(FreqMap1, FreqMap) ; { Result = eof }, { FreqMap = FreqMap0 } ; { Result = error(ErrNo) }, { error(io__error_message(ErrNo)) } ). :- pred write_freq_word(pair(int, string), io__state, io__state). :- mode write_freq_word(in, di, uo) is det. write_freq_word(Freq - Word) --> io__format("%7d\t%s", [i(Freq), s(Word)]).
wordfreq.mingw32
/* -- mode: c -- * $Id: wordfreq.gcc,v 1.4 2001/01/07 19:24:51 doug Exp $ * http://www.bagley.org/~doug/shootout/ / #include <stdio.h> #include <ctype.h> #include <malloc.h> #include <stdlib.h> #include <string.h> #include "simple_hash.h" typedef* int (comparator)(const* void , const* void ); int* cmp_hash(struct ht_node a, struct ht_node b) { int val = (b)->val - (a)->val; return((val == 0) ? strcmp((b)->key, (a)->key) : val); } int main() { int bufsize = 80; char buf = (char* )malloc(bufsize + 1); char* c; int i = 0; struct ht_ht ht = ht_create(75000); struct* ht_node sort_array, sort_tmp, node; while* ((c = getchar()) > 0) { if (isalpha(c)) { buf[i++] = tolower(c); if (i == bufsize) { bufsize = 2; buf = realloc(buf, bufsize + 1); } } else* { if (i > 0) { buf[i] = '\0'; ++(ht_find_new(ht, buf)->val); i = 0; } } } free(buf); sort_array = sort_tmp = malloc(sizeof(struct ht_node ) ht_count(ht)); for (node=ht_first(ht); (sort_tmp++ = node) != 0; node=ht_next(ht)) ; qsort(sort_array, ht_count(ht), sizeof(struct* ht_node ), (comparator)cmp_hash); for* (i=0; i<ht_count(ht); i++) printf("%7d\t%s\n", ht_val(sort_array[i]), ht_key(sort_array[i])); ht_destroy(ht); return(0); }
wordfreq.ocaml
(* * $Id: wordfreq.ocaml,v 1.8 2001/05/26 01:48:48 doug Exp $ * http://www.bagley.org/~doug/shootout/ ) let* max = 4096 let buf = String.create max let count = Hashtbl.create 75000 let wbuf = Buffer.create 64 let rec scan_words i n inword = if i < n then let c = buf.[i] in if 'a' <= c && c <= 'z' then begin Buffer.add_char wbuf c; scan_words (i+1) n true end else if 'A' <= c && c <= 'Z' then begin Buffer.add_char wbuf (Char.unsafe_chr(Char.code c + 32)); scan_words (i+1) n true end else if inword then begin let word = Buffer.contents wbuf in begin try incr (Hashtbl.find count word) with Not_found -> Hashtbl.add count word (ref 1) end; Buffer.clear wbuf; scan_words (i+1) n false end else scan_words (i+1) n false else let nread = input stdin buf 0 max in if nread = 0 then () else scan_words 0 nread inword let _ = scan_words 0 0 false; let out_lines = ref [] in let to_list l w c = l := (Printf.sprintf "%7d\t%s" !c w) :: !l in Hashtbl.iter (to_list out_lines) count; List.iter print_endline (List.sort (fun a b -> compare b a) !out_lines)
wordfreq.ocamlb
(* * $Id: wordfreq.ocaml,v 1.8 2001/05/26 01:48:48 doug Exp $ * http://www.bagley.org/~doug/shootout/ ) let* max = 4096 let buf = String.create max let count = Hashtbl.create 75000 let wbuf = Buffer.create 64 let rec scan_words i n inword = if i < n then let c = buf.[i] in if 'a' <= c && c <= 'z' then begin Buffer.add_char wbuf c; scan_words (i+1) n true end else if 'A' <= c && c <= 'Z' then begin Buffer.add_char wbuf (Char.unsafe_chr(Char.code c + 32)); scan_words (i+1) n true end else if inword then begin let word = Buffer.contents wbuf in begin try incr (Hashtbl.find count word) with Not_found -> Hashtbl.add count word (ref 1) end; Buffer.clear wbuf; scan_words (i+1) n false end else scan_words (i+1) n false else let nread = input stdin buf 0 max in if nread = 0 then () else scan_words 0 nread inword let _ = scan_words 0 0 false; let out_lines = ref [] in let to_list l w c = l := (Printf.sprintf "%7d\t%s" !c w) :: !l in Hashtbl.iter (to_list out_lines) count; List.iter print_endline (List.sort (fun a b -> compare b a) !out_lines)
wordfreq.perl
#!/usr/local/bin/perl # $Id: wordfreq.perl,v 1.13 2001/05/16 23:46:40 doug Exp $ # http://www.bagley.org/~doug/shootout/ # Tony Bowden suggested using tr versus lc and split(/[^a-z]/) use strict; my %count = (); while (read(STDIN, $_, 4095) and $_ .= <STDIN>) { tr/A-Za-z/ /cs; ++$count{$_} foreach split(' ', lc $_); } my @lines = (); my ($w, $c); push(@lines, sprintf("%7d\t%s\n", $c, $w)) while (($w, $c) = each(%count)); print sort { $b cmp $a } @lines;
wordfreq.pike
#!/usr/local/bin/pike// -- mode: pike -- // $Id: wordfreq.pike,v 1.3 2000/12/05 16:04:07 doug Exp $ // http://www.bagley.org/~doug/shootout/ // from: Fredrik Noring void main() { mapping(string:int) dictionary = ([]); string buffer = ""; array(string) f = filter(map(enumerate(128), lambda(int i) { return !('A' <= i && i <= 'Z' \|\| 'a' <= i && i <= 'z' \|\| i == ' ') && sprintf("%c", i); }), `!=, 0); array(string) t = allocate(sizeof(f), " "); for(;;) { string data = buffer + replace(lower_case(Stdio.stdin.read(4096)), f, t); if(!sizeof(data)) break; array(string) words = data/" "; if(1 < sizeof(words) && sizeof(words[-1])) buffer = words[-1], words = words[..sizeof(words)-2]; else buffer = ""; foreach(words, string word) dictionary[word]++; } m_delete(dictionary, ""); mapping(int:array(string)) revdictionary = ([]); array(string) words = indices(dictionary); array(int) freqs = values(dictionary); for(int i = 0; i < sizeof(dictionary); i++) revdictionary[freqs[i]] += ({ words[i] }); freqs = sort(indices(revdictionary)); for(int i = sizeof(freqs)-1; 0 <= i; i--) { int freq = freqs[i]; words = sort(revdictionary[freq]); for(int j = sizeof(words)-1; 0 <= j; j--) write("%7d\t%s\n", freq, words[j]); } }
wordfreq.pliant
# $Id: wordfreq.pliant,v 1.1 2002/03/18 14:07:00 dada Exp $ # http://dada.perl.it/shootout/ module "/pliant/language/unsafe.pli" module "/pliant/language/context.pli" module "/pliant/language/stream.pli" module "/pliant/language/stream/pipe.pli" function heapsort n ra arg Int n ; arg_rw Array:Str ra var Str rra var Int i var Int j var Int l := (n\2) + 1 var Int ir := n part heapsort_loop if l>1 l := l - 1 rra := ra:l else rra := ra:ir ra:ir := ra:1 ir := ir - 1 if ir=1 ra:1 := rra leave heapsort_loop i := l j := l2 while* j<=ir if j<ir and ra:j > ra:(j+1) j := j + 1 if rra > ra:j ra:i := ra:j i := j j := j + i else j := ir + 1 ra:i := rra restart heapsort_loop gvar Stream stdin gvar Str line := "" gvar Str l gvar Str word gvar Address Buf gvar Int ReadCount gvar CBool ok gvar Int ReadSize := 4096 gvar Int eolpos gvar Int i gvar Int nl := 0 gvar Int nw := 0 gvar Int nc := 0 gvar Char ch gvar Int j gvar Array:Str lines gvar (Dictionary Str Int) count gvar Pointer:Int v gvar Str k stdin open "handle:0" in while not stdin:atend line := stdin readline i := 0 while i<line:len ch := line:i if ch>="a" and ch<="z" or ch>="A" and ch<="Z" j := i+1 part find_word if j<line:len ch := line:j if ch>="a" and ch<="z" or ch>="A" and ch<="Z" j += 1 restart find_word word := lower (line i j-i) if (count exists word) count:word += 1 else count insert word 1 i += j-i else i += 1 lines += "" v :> count first k := count key v while addressof:v<>null if addressof:(count first k)<>null l := (string count:k) l := (repeat 7-l:len " ")+l line := l+"[tab]"+k+"[lf]" lines += line # console k "=" count:k eol v :> count next v if addressof:v<>null k := count key v heapsort lines:size-1 lines for i 0 lines:size-1 console lines:i
wordfreq.poplisp
;;; -- mode: lisp -- ;;; $Id: wordfreq.cmucl,v 1.2 2001/06/05 22:23:23 doug Exp $ ;;; http://www.bagley.org/~doug/shootout/ ;;; From Bulent Murtezaoglu, based on code from Andrew McDowell ;;; The Block/Buffering I/O code is adapted from material posted to comp.lang.lisp ;;; by Thomas Kirk <tk@research.att.com>. ;;; Archived here: http://www.cs.cmu.edu/afs/cs/project/ai-repository/ai/lang/lisp/code/io/fast_io/fast_io.txt ;;; (declaim (optimize (speed 3) (debug 0) (safety 0) (space 0) (compilation-speed 0))) (let* ((start 0) (current 0) (end 0) (eof nil) (inputbuffer (make-array 4096 :element-type '(unsigned-byte 8)))) (declare (type (simple-array (unsigned-byte 8)) inputbuffer) (fixnum start end) (inline code-char) (type (unsigned-byte 8) current)) (labels ((get-char () ;;don't do this at home works through side effect unless eof (when (= start end) (setf start 0) (setf end (read-sequence inputbuffer standard-input)) (when (zerop end) (setf eof t) (return-from get-char nil))) (setf current (aref inputbuffer start)) (incf start)) (word-constituent-p () ;; side effects current rets t if word-constituent (or (and (>;= current 97) (<= current 122)) (and (>;= current 65) (<= current 90) (incf current 32)))) (reader (buffer) (declare (type simple-base-string buffer)) (let* ((size (length buffer)) (fill-ptr 0)) (declare (type (integer 0 1000) size fill-ptr)) (loop while (and (or (get-char) (return-from reader buffer)) (not (word-constituent-p)))) (loop (when (= fill-ptr size) (let ((new-buffer (make-string (the fixnum (setf size (the fixnum (+ fill-ptr 5))))))) (setf (subseq (the simple-base-string new-buffer) 0 fill-ptr) buffer) (setf buffer new-buffer))) (setf (schar buffer fill-ptr) (code-char current)) (incf fill-ptr) (if (get-char) (if (not (word-constituent-p)) (return)) (return))) (lisp::shrink-vector buffer fill-ptr)))) (let* ((h (make-hash-table :test 'equal :size 3380 :rehash-threshold 1.0)) (buffer (make-string 10))) (declare (inline gethash maphash)) (declare (type simple-base-string buffer)) (loop (setf buffer (reader buffer)) (if (not eof) (if (= 1 (the fixnum (incf (the fixnum (gethash buffer h 0))))) (setf buffer (make-string 10)));; only cons if used (return))) ;; hast table => list (let ((l '())) (maphash #'(lambda (key val) (push (cons key val) l)) h) ;; sort the list (setf l (sort l #'(lambda (v1 v2) (if (>; (the fixnum (cdr v1)) (the fixnum (cdr v2))) t (if (= (the fixnum (cdr v1)) (the fixnum (cdr v2))) (string-lessp (car v2) (car v1)) nil))))) ;; output the list of pairs (mapcar #'(lambda (p) (format t "~7D ~A~&" (cdr p) (car p))) l)))))
wordfreq.python
#!/usr/local/bin/python # $Id: wordfreq.python,v 1.9 2001/05/11 17:44:00 doug Exp $ # http://www.bagley.org/~doug/shootout/ # # adapted from Bill Lear's original python word frequency counter # # Joel Rosdahl suggested using translate table to speed up # word splitting. That change alone sped this program up by # at least a factor of 3. # # with further speedups from Mark Baker import sys def main(): count = {} i_r = map(chr, range(256)) trans = [' '] * 256 o_a, o_z = ord('a'), (ord('z')+1) trans[ord('A'):(ord('Z')+1)] = i_r[o_a:o_z] trans[o_a:o_z] = i_r[o_a:o_z] trans = ''.join(trans) rl = sys.stdin.readlines lines = rl(4095) while lines: for line in lines: for word in line.translate(trans).split(): try: count[word] += 1 except KeyError: count[word] = 1 lines = rl(4095) l = zip(count.values(), count.keys()) l.sort() l.reverse() print '\n'.join(["%7s\t%s" % (count, word) for (count, word) in l]) main()
wordfreq.rexx
count. = 0 ws = '' transtab = XRANGE('A', 'Z') \|\| XRANGE('a', 'z') transin = XRANGE('A', 'Z') transout = XRANGE('a', 'z') notword = XRANGE('00'x, D2C(C2D('a')-1)) \|\| XRANGE(D2C(C2D('z')+1), 'FF'x) DO UNTIL LINES() = 0 PARSE LINEIN L L = TRANSLATE(TRANSLATE(L, transout, transin), '', notword, ' ') DO i = 1 TO WORDS(L) w = STRIP(WORD(L, i)) count.w = count.w + 1 IF count.w = 1 THEN ws = ws w END END STREAM('rexx_tmp', 'C', 'CREATE') DO i = 1 to WORDS(ws) w = WORD(ws, i) IF count.w > 0 THEN DO line = format(count.w, 7) \|\| '09'x \|\| w CALL LINEOUT('rexx_tmp', line) END END 'c:\cygwin\bin\sort -nr rexx_tmp >rexx_sorted' DO WHILE LINES('rexx_sorted') > 0 SAY LINEIN('rexx_sorted') END /* STREAM('rexx_tmp', 'C', 'CLOSE') STREAM('rexx_sorted', 'C', 'CLOSE') 'del rexx_tmp' 'del rexx_sorted' */
wordfreq.ruby
#!/usr/local/bin/ruby # -- mode: ruby -- # $Id: wordfreq.ruby,v 1.9 2001/05/16 23:46:40 doug Exp $ # http://www.bagley.org/~doug/shootout/ freq = Hash.new(0) loop { data = (STDIN.read(4095) or break) << (STDIN.gets \|\| "") for word in data.downcase.tr_s('^A-Za-z',' ').split(' ') freq[word] += 1 end } freq.delete("") lines = Array.new freq.each{\|w,c\| lines << sprintf("%7d\t%s\n", c, w) } print lines.sort.reverse
wordfreq.slang
% $Id: wordfreq.slang,v 1.0 2003/01/03 14:43:00 dada Exp $ % http://dada.perl.it/shootout/ % % contributed by John E. Davis define main() { variable count = Assoc_Type[Int_Type, 0]; foreach (stdin) using ("line") { variable line = (); foreach (strtok (strlow (line), "^a-z")) { variable word = (); count [word] += 1; } } variable words = assoc_get_keys (count); variable freqs = assoc_get_values (count); variable i = array_sort (array_map (String_Type, &sprintf, "%12d", freqs) + words); i = i[[-1:0:-1]]; foreach (i) { i = (); ()=fprintf (stdout, "%7d\t%s\n", freqs[i], words[i]); } } main();
wordfreq.smlnj
(* -- mode: sml -- * $Id: wordfreq.smlnj,v 1.3 2001/07/09 00:25:29 doug Exp $ * http://www.bagley.org/~doug/shootout/ * from Stephen Weeks ) fun* for (start, stop, f) = let fun loop i = if i > stop then () else (f i; loop (i + 1)) in loop start end fun incr r = r := 1 + !r val sub = Array.sub val update = Array.update signature HASH_SET = sig type 'a t val foreach: 'a t * ('a -> unit) -> unit (* lookupOrInsert (s, h, p, f) looks in the set s for an entry with hash h * satisfying predicate p. If the entry is there, it is returned. * Otherwise, the function f is called to create a new entry, which is * inserted and returned. ) val* lookupOrInsert: 'a t * word * ('a -> bool) * (unit -> 'a) -> 'a val new: {hash: 'a -> word} -> 'a t val size: 'a t -> int end structure HashSet: HASH_SET = struct datatype 'a t = T of {buckets: 'a list array ref, hash: 'a -> word, mask: word ref, numItems: int ref} val initialSize: int = 65536 val initialMask: word = Word.fromInt initialSize - 0w1 fun 'a new {hash}: 'a t = T {buckets = ref (Array.array (initialSize, [])), hash = hash, numItems = ref 0, mask = ref initialMask} fun size (T {numItems, ...}) = !numItems fun numBuckets (T {buckets, ...}) = Array.length (!buckets) fun index (w: word, mask: word): int = Word.toInt (Word.andb (w, mask)) fun resize (T {buckets, hash, mask, ...}, size: int, newMask: word): unit = let val newBuckets = Array.array (size, []) in Array.app (fn r => List.app (fn a => let val j = index (hash a, newMask) in Array.update (newBuckets, j, a :: Array.sub (newBuckets, j)) end) r) (!buckets) ; buckets := newBuckets ; mask := newMask end fun maybeGrow (s as T {buckets, mask, numItems, ...}): unit = let val n = Array.length (!buckets) in if !numItems * 4 > n then resize (s, n * 2, Word.orb (0w1, Word.<< (!mask, 0w1))) else () end fun peekGen (T {buckets = ref buckets, mask, ...}, w, p, no, yes) = let val j = index (w, !mask) val b = Array.sub (buckets, j) in case List.find p b of NONE => no (j, b) \| SOME a => yes a end fun lookupOrInsert (table as T {buckets, numItems, ...}, w, p, f) = let fun no (j, b) = let val a = f () val _ = incr numItems val _ = Array.update (!buckets, j, a :: b) val _ = maybeGrow table in a end in peekGen (table, w, p, no, fn x => x) end fun foreach (T {buckets, ...}, f) = Array.app (fn r => List.app f r) (!buckets) end structure Buffer: sig type t val add: t * Word8.word -> unit val clear: t -> unit val contents: t -> string val new: int -> t end = struct datatype t = T of {elts: Word8Array.array ref, size: int ref} fun contents (T {elts, size, ...}) = Byte.bytesToString (Word8Array.extract (!elts, 0, SOME (!size))) fun clear (T {size, ...}) = size := 0 fun new (bufSize) = T {elts = ref (Word8Array.array (bufSize, 0w0)), size = ref 0} fun add (T {elts, size}, x) = let val s = !size val _ = size := s + 1 val a = !elts val n = Word8Array.length a in if s = n then let val a' = Word8Array.tabulate (2 * n, fn i => if i < n then Word8Array.sub (a, i) else 0w0) val _ = elts := a' val _ = Word8Array.update (a', s, x) in () end else Word8Array.update (a, s, x) end end structure Quicksort: sig val quicksort: 'a array * ('a * 'a -> bool) -> unit end = struct fun assert (s, f: unit -> bool) = if true orelse f () then () else raise Fail (concat ["assert: ", s]) fun forall (low, high, f) = let fun loop i = i > high orelse (f i andalso loop (i + 1)) in loop low end fun fold (l, u, state, f) = let fun loop (i, state) = if i > u then state else loop (i + 1, f (i, state)) in loop (l, state) end fun 'a isSorted (a: 'a array, lo: int, hi: int, op <= : 'a * 'a -> bool) = let fun loop (i, x) = i > hi orelse let val y = sub (a, i) in x <= y andalso loop (i + 1, y) end in lo >= hi orelse loop (lo + 1, sub (a, lo)) end local open Word val seed = ref 0w13 in fun rand () = let val res = 0w1664525 * !seed + 0w1013904223 val _ = seed := res in toIntX res end end fun randInt (lo, hi) = lo + Int.mod (rand(), hi - lo + 1) fun insertionSort (a: 'a array, op <= : 'a * 'a -> bool): unit = let fun x i = sub (a, i) in for (1, Array.length a - 1, fn i => let val _ = assert ("insertionSort1", fn () => isSorted (a, 0, i - 1, op <=)) val t = x i fun sift (j: int) = (assert ("insertionSort2", fn () => isSorted (a, 0, j - 1, op <=) andalso isSorted (a, j + 1, i, op <=) andalso forall (j + 1, i, fn k => t <= x k)) ; if j > 0 then let val j' = j - 1 val z = x j' in if t <= z then (update (a, j, z); sift j') else j end else j) val _ = update (a, sift i, t) in () end) end fun 'a quicksort (a: 'a array, op <= : 'a * 'a -> bool): unit = let fun x i = Array.sub (a, i) fun swap (i, j) = let val t = x i val _ = update (a, i, x j) val _ = update (a, j, t) in () end val cutoff = 20 fun qsort (l: int, u: int): unit = if u - l > cutoff then let val _ = swap (l, randInt (l, u)) val t = x l val m = fold (l + 1, u, l, fn (i, m) => (assert ("qsort", fn () => forall (l + 1, m, fn k => x k <= t) andalso forall (m + 1, i - 1, fn k => not (x k <= t))) ; if x i <= t then (swap (m + 1, i) ; m + 1) else m)) val _ = swap (l, m) val _ = qsort (l, m - 1) val _ = qsort (m + 1, u) in () end else () val max = Array.length a - 1 val _ = qsort (0, max) val _ = insertionSort (a, op <=) in () end end structure Test : sig val main : (string * string list) -> OS.Process.status end = struct (* This hash function is taken from pages 56-57 of * The Practice of Programming by Kernighan and Pike. ) fun* hash (s: string): word = let val n = String.size s fun loop (i, w) = if i = n then w else Word.fromInt (Char.ord (String.sub (s, i))) + Word.* (w, 0w31) in loop (0, 0w0) end fun hash (s: string): word = let val n = String.size s fun loop (i, w) = if i = n then w else loop (i + 1, Word.fromInt (Char.ord (String.sub (s, i))) + Word.* (w, 0w31)) in loop (0, 0w0) end val max = 4096 val buf = Word8Array.array (max, 0w0) val count: {hash: word, word: string, count: int ref} HashSet.t = HashSet.new {hash = #hash} val wbuf = Buffer.new 64 val c2b = Byte.charToByte fun scan_words (i, n, inword) = if i < n then let val c = Word8Array.sub (buf, i) in if c2b #"a" <= c andalso c <= c2b #"z" then (Buffer.add (wbuf, c); scan_words (i + 1, n, true)) else if c2b #"A" <= c andalso c <= c2b #"Z" then (Buffer.add (wbuf, c + 0w32); scan_words (i + 1, n, true)) else if inword then let val w = Buffer.contents wbuf val h = hash w in incr (#count (HashSet.lookupOrInsert (count, h, fn {hash, word, ...} => hash = h andalso word = w, fn () => {hash = h, word = w, count = ref 0}))); Buffer.clear wbuf; scan_words (i + 1, n, false) end else scan_words (i + 1, n, false) end else let val nread = Posix.IO.readArr (Posix.FileSys.stdin, {buf = buf, i = 0, sz = NONE}) in if nread = 0 then () else scan_words (0, nread, inword) end fun printl [] = print "\n" \| printl(h::t) = ( print h ; printl t ) fun rightJustify (s: string, width: int) = let val n = String.size s in concat [CharVector.tabulate (width - n, fn _ => #" "), s] end fun main (name, args) = let val _ = scan_words (0, 0, false) val a = Array.array (HashSet.size count, (0, "")) val i = ref 0 val _ = HashSet.foreach (count, fn {word, count, ...} => (Array.update (a, !i, (!count, word)); incr i)) val _ = Quicksort.quicksort (a, fn ((c, w), (c', w')) => c > c' orelse c = c' andalso w >= w') val _ = Array.app (fn (c, w) => printl [rightJustify (Int.toString c, 7), "\t", w]) a in OS.Process.success end end val _ = SMLofNJ.exportFn("wordfreq", Test.main);
wordfreq.tcl
#!/usr/local/bin/tclsh # $Id: wordfreq.tcl,v 1.11 2001/05/17 00:49:34 doug Exp $ # http://www.bagley.org/~doug/shootout/ # with help from: Tom Wilkason and Branko Vesligaj proc main {} { while {1} { set data [read stdin 4096] if {[string equal $data {}]} {break} if {[gets stdin extra] >= 0} { append data $extra } regsub -all {[^[:alpha:]]+} $data { } line foreach word [string tolower $line] { if {[catch {incr count($word)}]} { set count($word) 1 } } } foreach {word cnt} [array get count] { lappend lines [format "%7d\t%s" $cnt $word] } puts [join [lsort -decreasing $lines] "\n"] } main
wordfreq.vbscript
<job> <script language=JScript runat=server> function Descending(a, b) { return ((b > a) ? 1 : ((b < a) ? -1 : 0)); } function SortVBArray(arrVBArray) { return arrVBArray.toArray().sort(Descending).join('@'); } </script> <script language=VBScript> Function SortArray(arrInput) SortArray = Split(SortVBArray(arrInput), "@") End Function Set Count = CreateObject("Scripting.Dictionary") Blob = WScript.StdIn.ReadAll Lines = Split(Blob, vbCrLf) For Each L in Lines Line = Trim(LCase(L)) For B = 1 To Len(Line) C = Asc(Mid(Line, B, 1)) If C <> Asc(" ") And (C < Asc("a") Or C > Asc("z")) Then 'WSCript.Echo(Line) 'WScript.Echo(String(B-1, " ") & "^") Line = Left(Line, B-1) & " " & Mid(Line, B+1) 'WSCript.Echo(Line) 'WScript.Echo(String(B-1, " ") & "^") End If Next Words = Split(Line, " ") For Each Word in Words If Word <> " " And Word <> "" Then If Count.Exists(Word) Then Count.Item(Word) = Count.Item(Word) + 1 Else Count.Item(Word) = 1 End If End If Next Next K = Count.Keys Redim Lines(Count.Count-1) For A = 0 To Count.Count-1 N = CStr(Count.Item(K(A))) If Len(N) < 7 Then N = String(7-Len(N), " ") & N Lines(A) = N & Chr(9) & K(A) Next SortedLines = SortArray(Lines) For A = LBound(SortedLines) To UBound(SortedLines) WScript.Echo(SortedLines(A)) Next </script> </job>
wordfreq.vc
/* -- mode: c -- * $Id: wordfreq.gcc,v 1.4 2001/01/07 19:24:51 doug Exp $ * http://www.bagley.org/~doug/shootout/ / #include <stdio.h> #include <ctype.h> #include <malloc.h> #include <stdlib.h> #include <string.h> #include "simple_hash.vc.h" typedef* int (comparator)(const* void , const* void ); int* cmp_hash(struct ht_node a, struct ht_node b) { int val = (b)->val - (a)->val; return((val == 0) ? strcmp((b)->key, (a)->key) : val); } int main() { int bufsize = 80; char buf = (char* )malloc(bufsize + 1); char* c; int i = 0; struct ht_ht ht = ht_create(75000); struct* ht_node sort_array, sort_tmp, node; while* ((c = getchar()) > 0) { if (isalpha(c)) { buf[i++] = tolower(c); if (i == bufsize) { bufsize = 2; buf = realloc(buf, bufsize + 1); } } else* { if (i > 0) { buf[i] = '\0'; ++(ht_find_new(ht, buf)->val); i = 0; } } } free(buf); sort_array = sort_tmp = malloc(sizeof(struct ht_node ) ht_count(ht)); for (node=ht_first(ht); (sort_tmp++ = node) != 0; node=ht_next(ht)) ; qsort(sort_array, ht_count(ht), sizeof(struct* ht_node ), (comparator)cmp_hash); for* (i=0; i<ht_count(ht); i++) printf("%7d\t%s\n", ht_val(sort_array[i]), ht_key(sort_array[i])); ht_destroy(ht); return(0); }