2011年01月31日 月曜日
■ [メモ][GHC API]遅延評価を確認しる...
GHC APIを使ってデータ型を調べてみる。要はHaskellのデータ型をC的な感じで表示しているだけ...
ちょっと長い。あとGHCのライセンスは -> GHCのライセンス - Haskell卒業! - haskell...
{-# LANGUAGE BangPatterns, ViewPatterns, ScopedTypeVariables #-} {-# LANGUAGE MagicHash, UnboxedTuples #-} {-# LANGUAGE RecordWildCards, NamedFieldPuns #-} {-# OPTIONS_GHC -O2 -Wall -cpp #-} import GHC import GHC.Paths (libdir) import GHC.Exts import GHC.Arr (Array(..)) import HscTypes import Exception (assert) import MonadUtils import Control.Monad (forM) import ErrUtils import TcRnMonad import Linker (dataConInfoPtrToName) import PrelNames (pRELUDE) import Outputable import Util (ghciTablesNextToCode) import Constants (wORD_SIZE) import ByteCodeItbls (StgInfoTable) import qualified ByteCodeItbls as BCI (StgInfoTable(..)) import Foreign import Data.Array import MyLibrary.GHCi.Encoding.CP932 (useCP932) -- よくわからないけど、STGのクロージャー(?)の定義をそのままインポート... #define INVALID_OBJECT 0 #define CONSTR 1 #define CONSTR_1_0 2 #define CONSTR_0_1 3 #define CONSTR_2_0 4 #define CONSTR_1_1 5 #define CONSTR_0_2 6 #define CONSTR_STATIC 7 #define CONSTR_NOCAF_STATIC 8 #define FUN 9 #define FUN_1_0 10 #define FUN_0_1 11 #define FUN_2_0 12 #define FUN_1_1 13 #define FUN_0_2 14 #define FUN_STATIC 15 #define THUNK 16 #define THUNK_1_0 17 #define THUNK_0_1 18 #define THUNK_2_0 19 #define THUNK_1_1 20 #define THUNK_0_2 21 #define THUNK_STATIC 22 #define THUNK_SELECTOR 23 #define BCO 24 #define AP 25 #define PAP 26 #define AP_STACK 27 #define IND 28 #define IND_PERM 29 #define IND_STATIC 30 #define RET_BCO 31 #define RET_SMALL 32 #define RET_BIG 33 #define RET_DYN 34 #define RET_FUN 35 #define UPDATE_FRAME 36 #define CATCH_FRAME 37 #define STOP_FRAME 38 #define BLOCKING_QUEUE 39 #define BLACKHOLE 40 #define MVAR_CLEAN 41 #define MVAR_DIRTY 42 #define ARR_WORDS 43 #define MUT_ARR_PTRS_CLEAN 44 #define MUT_ARR_PTRS_DIRTY 45 #define MUT_ARR_PTRS_FROZEN0 46 #define MUT_ARR_PTRS_FROZEN 47 #define MUT_VAR_CLEAN 48 #define MUT_VAR_DIRTY 49 #define WEAK 50 #define PRIM 51 #define MUT_PRIM 52 #define TSO 53 #define TREC_CHUNK 54 #define ATOMICALLY_FRAME 55 #define CATCH_RETRY_FRAME 56 #define CATCH_STM_FRAME 57 #define WHITEHOLE 58 #define N_CLOSURE_TYPES 59 -- これ何というバッドノウハウ??? aP_CODE :: Int aP_CODE = AP pAP_CODE :: Int pAP_CODE = PAP #undef AP #undef PAP -- 色んなデータ型の内部情報を得る... data ClosureType = Constr | Fun -- 関数 | Thunk Int -- サンク | ThunkSelector | Blackhole -- 何よ、コレ... | AP -- 未評価の何か??? | PAP | Indirection Int | MutVar Int | MVar Int | Other Int deriving (Show, Eq, Ord) instance Outputable ClosureType where ppr = text . show data Closure = Closure{ tipe :: ClosureType, infoPtr :: Ptr (), infoTable :: StgInfoTable, ptrs :: Array Int HValue, nonPtrs :: [Word] } -- 任意のデータ型の情報を得るらしい... getClosureData :: a -> IO Closure getClosureData a = case unpackClosure# a of (# iptr, ptrs, nptrs #) -> do itbl <- peek $ if ghciTablesNextToCode then Ptr iptr else Ptr iptr `plusPtr` negate wORD_SIZE let tipe = readCType $ BCI.tipe itbl elements = fromIntegral $ BCI.ptrs itbl ptrsList = Array 0 (elements - 1) elements ptrs nptrs_data = [W# (indexWordArray# nptrs i) | I# i <- [0.. fromIntegral $ BCI.nptrs itbl]] assert (elements >= 0) $ return () ptrsList `seq` return (Closure tipe (Ptr iptr) itbl ptrsList nptrs_data) -- CのenumをHaskellの型に変換...ぽい... readCType :: Integral a => a -> ClosureType readCType i | i >= CONSTR && i <= CONSTR_NOCAF_STATIC = Constr | i >= FUN && i <= FUN_STATIC = Fun | i >= THUNK && i < THUNK_SELECTOR = Thunk i' | i == THUNK_SELECTOR = ThunkSelector | i == BLACKHOLE = Blackhole | i >= IND && i <= IND_STATIC = Indirection i' | i' == aP_CODE = AP | i == AP_STACK = AP | i' == pAP_CODE = PAP | i == MUT_VAR_CLEAN || i == MUT_VAR_DIRTY = MutVar i' | i == MVAR_CLEAN || i == MVAR_DIRTY = MVar i' | otherwise = Other i' where i' = fromIntegral i -- データコンストラクタ(?)かどうか...かな... isConstr :: ClosureType -> Bool isConstr Constr = True isConstr _ = False -- ううっ、よくわからぬ... isIndirection :: ClosureType -> Bool isIndirection (Indirection _) = True isIndirection _ = False -- これはサンクでしょ... isThunk :: ClosureType -> Bool isThunk (Thunk _) = True isThunk ThunkSelector = True isThunk AP = True isThunk _ = False -- あるデータ型の全フィールドが評価済みならTrueっぽい... isFullyEvaluated :: a -> IO Bool isFullyEvaluated a = do closure <- getClosureData a case tipe closure of Constr -> and <$> amapM isFullyEvaluated (ptrs closure) _ -> return False where amapM f (Array i0 i _ arr#) = forM [0 .. i - i0] $ \(I# i#) -> do case indexArray# arr# i# of (# e #) -> f e main :: IO () main = do _ <- useCP932 "" runGhc (Just libdir) $ do -- 適当に初期化... oldFlags@DynFlags{..} <- getSessionDynFlags let curFlags :: DynFlags curFlags = oldFlags{ hscTarget = HscInterpreted, ghcLink = LinkInMemory, optLevel = 2 } message :: (GhcMonad m) => SDoc -> m () message = liftIO . putMsg curFlags _ <- setSessionDynFlags curFlags -- エラーは適当に処理してちょ... defaultErrorHandler curFlags $ do -- コンテキスト(見えてるモジュールみたいな感じ)はPreludeだけ... setContext [] [(pRELUDE, Nothing)] -- コンパイルエラ-なら適当に処理してちょ... handleSourceError printExceptionAndWarnings $ do withSession $ \env -> do -- データ型の情報を出力する... let printClosureData :: (GhcMonad m) => a -> m () printClosureData x = do Closure{..} <- liftIO $ getClosureData x name <- ioMsgMaybe . liftIO . initTcPrintErrors env pRELUDE $ do if isConstr tipe then either (const (text "<no name>")) ppr <$> dataConInfoPtrToName infoPtr else return $ text "not available" message $ text "Name:" <+> name message $ text "ClosureType:" <+> ppr tipe message $ text "isConstr:" <+> ppr (isConstr tipe) message $ text "isIndirection:" <+> ppr (isIndirection tipe) message $ text "isThunk:" <+> ppr (isThunk tipe) liftIO (isFullyEvaluated x) >>= message . (text "isFullyEvaluated:" <+>) . ppr -- Intを調べるぉ... -- 文字列で与えられた式をコンパイル... exprX <- compileExpr "max 1 2 :: Int" -- 成功ならxに束縛する... let x = unsafeCoerce# exprX :: Int message $ text "\nIntだぉ..." message $ text "-> 評価前" printClosureData $ x message $ text "-> 評価後(by seq)" printClosureData $! x -- sumも調べるぉ... exprXS <- compileExpr "sum [1 .. 100] :: Int" let acc = unsafeCoerce# exprXS :: Int message $ text "\nsumだぉ..." message $ text "-> 評価前" printClosureData $ acc message $ text "-> 評価後(by seq)" printClosureData $! acc
実行結果...
$> :! ghc --make LazyEvaluation.hs [1 of 1] Compiling Main ( LazyEvaluation.hs, LazyEvaluation.o ) Linking LazyEvaluation.exe ... $> :! LazyEvaluation.exe Intだぉ... -> 評価前 Name: not available ClosureType: AP isConstr: False isIndirection: False isThunk: True isFullyEvaluated: False -> 評価後(by seq) Name: GHC.Types.I# ClosureType: Constr isConstr: True isIndirection: False isThunk: False isFullyEvaluated: True sumだぉ... -> 評価前 Name: not available ClosureType: AP isConstr: False isIndirection: False isThunk: True isFullyEvaluated: False -> 評価後(by seq) Name: GHC.Types.I# ClosureType: Constr isConstr: True isIndirection: False isThunk: False isFullyEvaluated: True
maxすら遅延する。恐るべき怠惰なHaskellタンでした...
評価情報は予想通りというかCの構造体で普通に管理されていることを確認しただけだった。X86アセンブラ以上のことは当然できないから、Haskellと言えども特にMagicはないよね...
冷静に思ったのだが、isFullyEvaluatedってやたらコストが高い気がする。こういう場合、普通なC/C++ならisFullyEvaluatedかどうかのフラグをオブジェクト毎に持たせるとかするのだがしてないっぽい???
コメント