Hatena::Grouphaskell

eagletmtの日記 このページをアンテナに追加

 | 

2010-08-23

FFI を利用する (4)

| 18:14 |  FFI を利用する (4) - eagletmtの日記 を含むブックマーク はてなブックマーク -  FFI を利用する (4) - eagletmtの日記  FFI を利用する (4) - eagletmtの日記 のブックマークコメント

最終的なコードと利用サンプル,そしてそれらをビルドする cabal ファイルを最後に載せておく.

libxml.cabal

Name: libxml
Version: 0.1
License: BSD3
Category: Text
Build-type: Simple
Cabal-version: >=1.6

Executable sample
  Main-is: Main.hs
  Other-Modules: LibXml
  Build-depends: base == 4.*, bytestring
  Build-tools: hsc2hs
  Hs-source-dirs: src
  Includes: wrapper.h
  Include-dirs: include
  C-sources: cbits/wrapper.c
  Extra-libraries: xml2

src/LibXml.hsc

{-# LANGUAGE ForeignFunctionInterface, EmptyDataDecls #-}
module LibXml (
    HtmlParserOption, recover, noError, noWarning, pedantic, noBlanks, noNet, noImplied, compact
  , XmlDoc, Node
  , parseHTML
  , xpath
  , textContent
  ) where
#include <libxml/HTMLparser.h>
#include <libxml/xpath.h>

import Control.Applicative ((<$>))
import Control.Exception (bracket)
import Data.Bits ((.|.))

import System.IO.Unsafe (unsafePerformIO)
import Foreign.Ptr (Ptr, nullPtr)
import Foreign.ForeignPtr (ForeignPtr, withForeignPtr)
import Foreign.C.String (CString, withCString)
import Foreign.C.Types (CInt)
import Foreign.Concurrent (newForeignPtr)
import Foreign.Storable (peekByteOff)
import Foreign.Marshal.Array (peekArray)
import qualified Data.ByteString as B

data XmlDocTag
newtype XmlDoc = MkXmlDoc (ForeignPtr XmlDocTag)
data NodeTag
newtype Node = MkNode (Ptr NodeTag)
data NodeSetTag
data XPathContextTag
data XPathObjectTag

newtype HtmlParserOption = HtmlParserOption { unHtmlParserOption :: CInt }
#{enum HtmlParserOption, HtmlParserOption
  , recover = HTML_PARSE_RECOVER
  , noError = HTML_PARSE_NOERROR
  , noWarning = HTML_PARSE_NOWARNING
  , pedantic = HTML_PARSE_PEDANTIC
  , noBlanks = HTML_PARSE_NOBLANKS
  , noNet = HTML_PARSE_NONET
  , noImplied = HTML_PARSE_NOIMPLIED
  , compact = HTML_PARSE_COMPACT
  }

foldOpts :: [HtmlParserOption] -> CInt
foldOpts = foldl (\a -> (a .|. ) . unHtmlParserOption) 0

withMaybeCString :: Maybe String -> (CString -> IO a) -> IO a
withMaybeCString (Just str) act = withCString str act
withMaybeCString Nothing act = act nullPtr

parseHTML :: B.ByteString -> String -> Maybe String -> [HtmlParserOption] -> XmlDoc
parseHTML src url encoding opts = unsafePerformIO $ do
  ptr <- B.useAsCStringLen src $
          \(cstr,len) -> withMaybeCString encoding $
            \e -> withCString url $
              \u -> c_htmlReadMemory cstr (fromIntegral len) e u (foldOpts opts)
  fptr <- newForeignPtr ptr $ c_xmlFreeDoc ptr
  return $ MkXmlDoc fptr
foreign import ccall "htmlReadMemory" c_htmlReadMemory :: CString -> CInt -> CString -> CString -> CInt -> IO (Ptr XmlDocTag)
foreign import ccall "xmlFreeDoc" c_xmlFreeDoc :: Ptr XmlDocTag -> IO ()

xpath :: String -> XmlDoc -> Maybe ([Node],Int)
xpath s (MkXmlDoc docPtr) = unsafePerformIO $ do
  ctx <- withForeignPtr docPtr c_xmlXPathNewContext
  obj <- withCString s (`c_xmlXPathEvalExpression` ctx)
  if obj == nullPtr
    then return Nothing
    else do
      nodes <- (#peek xmlXPathObject, nodesetval) obj :: IO (Ptr NodeSetTag)
      if nodes == nullPtr
        then c_xmlXPathFreeObject obj >> return Nothing
        else do
          nr <- fromIntegral <$> ((#peek xmlNodeSet, nodeNr) nodes :: IO CInt)
          tab <- (#peek xmlNodeSet, nodeTab) nodes :: IO (Ptr (Ptr NodeTag))
          ns <- map MkNode <$> peekArray nr tab
          c_xmlXPathFreeObject obj
          return $ Just (ns,nr)
foreign import ccall "xmlXPathNewContext" c_xmlXPathNewContext :: Ptr XmlDocTag -> IO (Ptr XPathContextTag)
foreign import ccall "xmlXPathEvalExpression" c_xmlXPathEvalExpression :: CString -> Ptr XPathContextTag -> IO (Ptr XPathObjectTag)
foreign import ccall "xmlXPathFreeObject" c_xmlXPathFreeObject :: Ptr XPathObjectTag -> IO ()

textContent :: Node -> B.ByteString
textContent (MkNode ptr) = unsafePerformIO $
  bracket (c_xmlNodeGetContent ptr) c_xmlFree B.packCString
foreign import ccall "xmlNodeGetContent" c_xmlNodeGetContent :: Ptr NodeTag -> IO CString
foreign import ccall "xmlFree_wrap" c_xmlFree :: Ptr a -> IO ()

include/wrapper.h

#ifndef LIBXML_WRAPPER_H
#define LIBXML_WRAPPER_H
void xmlFree_wrap(void *mem);
#endif

cbits/wrapper.c

#include <libxml/globals.h>
#include "wrapper.h"

void xmlFree_wrap(void *mem)
{
  xmlFree(mem);
}

src/Main.hs

{-# LANGUAGE OverloadedStrings #-}
module Main where
import Control.Monad (forM_)
import System.Environment (getArgs)
import qualified Data.ByteString.Char8 as B8
import LibXml

main :: IO ()
main = do
  args <- getArgs
  forM_ args $ \path -> do
    str <- B8.readFile path
    let doc = parseHTML str "" (Just "UTF-8") [recover, noError, noWarning, noNet]
    case xpath "//a[@href]" doc of
      Nothing -> putStrLn $ path ++ ": failed"
      Just (ns,nr) -> putStrLn (path ++ ": " ++ show nr ++ " nodes") >> mapM_ (B8.putStrLn . B8.append "  " . textContent) ns

NourMaNourMa2013/03/29 15:14I just hope whoever wtiers these keeps writing more!

hmdicqgpubhmdicqgpub2013/03/30 14:30iW5Mb1 <a href="http://ygnnewkfwyhb.com/">ygnnewkfwyhb</a>

xpfxoyufxpfxoyuf2013/03/31 22:03Cz3Fh4 , [url=http://flknbtjovirt.com/]flknbtjovirt[/url], [link=http://kdoazlurskqz.com/]kdoazlurskqz[/link], http://fzjqgrfcplbs.com/

qvgopmulirqvgopmulir2013/04/01 10:49Ac343G , [url=http://csqammgdnmrq.com/]csqammgdnmrq[/url], [link=http://kbtjeonmbhzm.com/]kbtjeonmbhzm[/link], http://ntgiiffhvwpi.com/

トラックバック - http://haskell.g.hatena.ne.jp/eagletmt/20100823
 |