HaHaHa!(old)

2006-12-05

単語の頻度を調べる

単語の頻度を調べる - rubyco(るびこ)の日記

素朴な(つまり遅い^^;)実装.case insensitive になるようにしてある.

module Main (main) where

import Data.Char
import Data.List
import Data.Map hiding (map)

newtype Keyword = K String
instance Eq Keyword where
  K s == K t = map toLower s == map toLower t
instance Ord Keyword where
  K s `compare` K t = map toLower s `compare` map toLower t
instance Show Keyword where
  show (K s) = map toLower s

main :: IO ()
main =   getContents 
     >>= mapM_ putStrLn . map show 
     .   sortBy cmp . toAscList 
     .   foldr (uncurry (insertWith (+))) empty 
     .   flip zip (repeat 1) . map K . words'

words' :: String -> [String]
words' s = case span isAlpha (dropWhile (not . isAlpha) s) of
             ("",_) -> []
             (w,rs) -> w:words' rs

cmp :: (Keyword, Int) -> (Keyword, Int) -> Ordering
cmp (K s, m) (K t, n) = compare n m

実行例

% cat hoge.txt                                                        ~
We wish you a Merry Christmas,
We wish you a Merry Christmas,
We wish you a Merry Christmas,
and A Happy New Year!
% runhaskell freqword.hs < hoge.txt
(a,4)
(christmas,3)
(merry,3)
(we,3)
(wish,3)
(you,3)
(and,1)
(happy,1)
(new,1)
(year,1)