module Main where import System import IO import Control.Monad import Maybe import List main = getArgs >>= run >> return () {- filelist is a file that looks like -- FILENAME number_of_summary_sentences FILENAME number_of_summary_sentences ... -- alignmentfile is the GIZA++ alignment file sntmatchfile is a filename that contains offsets for document sentences (used because we don't align whole sentences to whole documents. this should look like -- docid1 docid2 docid3 ... docidn docidm docido ... ... -- where the nth line corresponds to the nth summary sentence and the jth number on the nth line corresponds to the real document position of the document word at offset j. filename is the name of the file from filelist -} run [filelist,alignmentfile,sntmatchfile,filename] = do l <- liftM lines (readFile filelist) let fl = map ((\ [a,b] -> (a, read b)) . words) l al <- liftM lines (readFile alignmentfile) let (start,len) = cntlines fl filename let rel = map parseAl $ takeThirds $ take (3*len) $ drop (3*start) al sd <- liftM (map (concat . read) . lines) (readFile sntmatchfile) generate sd rel generate :: [[Int]] -> [[[Int]]] -> IO () generate sd rel = mapM_ (uncurry generate') (zip sd rel) generate' :: [Int] -> [[Int]] -> IO () generate' s (_:r) = mapM_ (\j -> do mapM_ (\k -> putStr (show (s!!(k-1)) ++ " 2 ")) j putStrLn "") r takeThirds (_:_:x:xs) = x : takeThirds xs takeThirds _ = [] parseAl :: String -> [[Int]] parseAl [] = [] parseAl ('{':xs) = let bef = takeWhile (/='}') xs aft = drop (length bef + 1) xs in (map read $ words bef) : parseAl aft parseAl (_:xs) = parseAl xs cntlines :: [(String,Int)] -> String -> (Int,Int) cntlines l fn = (str,len) where str = sum $ map snd $ takeWhile ((/=fn) . fst) l len = snd $ head $ dropWhile ((/=fn) . fst) l