%%% -*- mode: erlang -*-
%%% $Id: wordfreq.erlang,v 1.8 2000/12/31 22:54:17 doug Exp $
%%% http://www.bagley.org/~doug/shootout/
%% Use ETS tables (Erlang's associative store).
-module(wordfreq).
-export([main/0, main/1]).
%% ignore program argument
main() -> main(['1']).
main(Args) ->
OutL = fun ({Word, Count}) ->
io:format("~7w\t~s~n", [Count, Word])
end,
lists:foreach(OutL, sortedfreqs()),
halt(0).
% sort the results, descending
sortedfreqs() ->
Port = open_port({fd, 0, 1}, [eof, {line, 512}]),
WordCountList = count_words_from_stream(ets:new(freqtab, [ordered_set]), Port),
lists:reverse(lists:keysort(2, WordCountList)).
count_words_from_stream(Table, Port) ->
receive
{Port, eof} ->
ets:delete(Table, ''),
ets:tab2list(Table);
{Port, {_, {_, Line}}} ->
count_words([], Line, Table),
count_words_from_stream(Table, Port)
end.
count_word(Word, Table) ->
WordAtom = list_to_atom(Word),
case (catch ets:update_counter(Table, WordAtom, 1)) of
{'EXIT', {badarg, _}} ->
ets:insert(Table, {WordAtom, 1});
_ ->
true
end.
% count_words(Word_Accumulator, Line_of_Chars, Table)
count_words([], [], Table) -> true;
count_words(Word, [], Table) ->
count_word(Word, Table);
count_words(Word, [H|T], Table) when H >= $a, H=< $z ->
NewWord = lists:append(Word, [H]),
count_words(NewWord, T, Table);
count_words(Word, [H|T], Table) when H >= $A, H=< $Z ->
NewWord = lists:append(Word, [(H - $A) + $a]),
count_words(NewWord, T, Table);
% we hit a non-word character so count previous word and continue
count_words(Word, [H|T], Table) ->
count_word(Word, Table),
count_words([], T, Table).