;;; -*- mode: lisp -*-
;;; $Id: wordfreq.cmucl,v 1.2 2001/06/05 22:23:23 doug Exp $
;;; http://www.bagley.org/~doug/shootout/
;;; From Bulent Murtezaoglu, based on code from Andrew McDowell
;;; The Block/Buffering I/O code is adapted from material posted to comp.lang.lisp
;;; by Thomas Kirk <tk@research.att.com>.
;;; Archived here: http://www.cs.cmu.edu/afs/cs/project/ai-repository/ai/lang/lisp/code/io/fast_io/fast_io.txt
;;;
(declaim (optimize (speed 3) (debug 0) (safety 0) (space 0) (compilation-speed 0)))
(let* ((start 0)
(current 0)
(end 0)
(eof nil)
(inputbuffer (make-array 4096 :element-type '(unsigned-byte 8))))
(declare (type (simple-array (unsigned-byte 8)) inputbuffer)
(fixnum start end)
(inline code-char)
(type (unsigned-byte 8) current))
(labels
((get-char () ;;don't do this at home works through side effect unless eof
(when (= start end)
(setf start 0)
(setf end (read-sequence inputbuffer *standard-input*))
(when (zerop end)
(setf eof t)
(return-from get-char nil)))
(setf current (aref inputbuffer start))
(incf start))
(word-constituent-p () ;; side effects current rets t if word-constituent
(or (and (>= current 97) (<= current 122))
(and (>= current 65) (<= current 90)
(incf current 32))))
(reader (buffer)
(declare (type simple-base-string buffer))
(let* ((size (length buffer))
(fill-ptr 0))
(declare (type (integer 0 1000) size fill-ptr))
(loop while (and (or (get-char) (return-from reader buffer))
(not (word-constituent-p))))
(loop
(when (= fill-ptr size)
(let ((new-buffer
(make-string (the fixnum
(setf size (the fixnum (+ fill-ptr 5)))))))
(setf (subseq (the simple-base-string new-buffer) 0 fill-ptr)
buffer)
(setf buffer new-buffer)))
(setf (schar buffer fill-ptr) (code-char current))
(incf fill-ptr)
(if (get-char)
(if (not (word-constituent-p))
(return))
(return)))
(lisp::shrink-vector buffer fill-ptr))))
(let* ((h (make-hash-table :test 'equal :size 3380 :rehash-threshold 1.0))
(buffer (make-string 10)))
(declare (inline gethash maphash))
(declare (type simple-base-string buffer))
(loop
(setf buffer (reader buffer))
(if (not eof)
(if (= 1 (the fixnum (incf (the fixnum (gethash buffer h 0)))))
(setf buffer (make-string 10)));; only cons if used
(return)))
;; hast table => list
(let ((l '()))
(maphash #'(lambda (key val)
(push (cons key val) l))
h)
;; sort the list
(setf l (sort l #'(lambda (v1 v2)
(if (> (the fixnum (cdr v1)) (the fixnum (cdr v2)))
t
(if (= (the fixnum (cdr v1)) (the fixnum (cdr v2)))
(string-lessp (car v2) (car v1))
nil)))))
;; output the list of pairs
(mapcar #'(lambda (p)
(format t "~7D ~A~&" (cdr p) (car p)))
l)))))