Last active
November 27, 2016 22:21
-
-
Save vaibhavsagar/179c13211f6519449f02b5920861cdc8 to your computer and use it in GitHub Desktop.
Reproduction of a different bug in pipes-zlib(?)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env stack | |
{- stack | |
--resolver lts-6.24 | |
runghc | |
--package attoparsec | |
--package pipes-bytestring | |
--package containers | |
--package pipes-zlib -} | |
{-# LANGUAGE OverloadedStrings #-} | |
module Main where | |
import Data.Attoparsec.ByteString | |
import Data.Bits | |
import Data.List (foldl') | |
import Control.Arrow (first) | |
import Control.Monad.Trans.State.Strict | |
import Data.ByteString.Base16 (encode, decode) | |
import Data.ByteString.Lazy (toStrict, fromStrict) | |
import Data.Maybe (fromJust) | |
import Prelude hiding (take) | |
import qualified Codec.Compression.Zlib as Z | |
import qualified Data.ByteString as B | |
import qualified Data.Map.Strict as M | |
import qualified Pipes.Attoparsec as PA | |
import qualified Pipes.ByteString as PB | |
import qualified Pipes.Zlib as PZ | |
import qualified Prelude | |
import qualified System.IO as SI | |
import Pipes | |
import GHC.Word (Word8) | |
main = do | |
(entries, problemP) <- indexPackFile "pack-b5a8b44f24f750bd14b76037ce9bfd7ad2bcf861.pack" | |
(header, ref, decompressedP, level) <- evalStateT getNextEntry problemP | |
(b, p) <- either ((,) "" . return) id <$> next decompressedP | |
-- I'm getting (Left (Right ())) from `next p` below | |
(Left (Right unit)) <- next p | |
print unit | |
indexPackFile path = do | |
handle <- PB.fromHandle <$> SI.openFile path SI.ReadMode | |
((start, count), entries) <- runStateT parsePackFileStart handle | |
loopEntries' entries start count M.empty | |
where parsePackFileStart = do | |
(Right (len, count)) <- fromJust <$> PA.parseL parsePackFileHeader | |
return (len, count) | |
loopEntries' | |
:: Producer B.ByteString IO a -- remaining packfile input | |
-> Int -- number of bytes read so far | |
-> Int -- number of entries remaining | |
-> SeparatedEntries -- map of offsets to bytestrings | |
-> IO (SeparatedEntries, Producer B.ByteString IO a) | |
loopEntries' producer offset remaining indexedMap = case remaining of | |
178 -> return (indexedMap, producer) | |
_ -> do | |
(header, ref, decompressedP, level) <- evalStateT getNextEntry producer | |
step <- next decompressedP | |
let (decompressed, eitherP) = either ((,) "" . return) id step | |
(output, producer') <- advanceToCompletion decompressed eitherP | |
let content = B.concat [header, ref, compressToLevel level output] | |
let indexedMap' = M.insert offset content indexedMap | |
let offset' = offset + B.length content | |
let remaining' = remaining - 1 | |
loopEntries' producer' offset' remaining' indexedMap' | |
getNextEntry = do | |
(Right tLen) <- fromJust <$> PA.parse parseTypeLen | |
baseRef <- case fst tLen of | |
OfsDeltaObject -> do | |
(Right offset) <- fromJust <$> PA.parse parseOffset | |
return $ encodeOffset offset | |
RefDeltaObject -> do | |
(Right ref) <- fromJust <$> PA.parse parseBinRef | |
return $ fst $ decode ref | |
_ -> return "" | |
decompressed <- PZ.decompress' PZ.defaultWindowBits <$> get | |
PB.drawByte | |
level <- getCompressionLevel . fromJust <$> PB.peekByte | |
return (uncurry encodeTypeLen tLen, baseRef, decompressed, level) | |
advanceToCompletion decompressed producer = next producer >>= \step -> | |
case step of | |
(Left (Left p)) -> return (decompressed, p) | |
(Right (d, p')) -> | |
first (B.append decompressed) <$> advanceToCompletion d p' | |
_ -> error "No idea how to handle Left (Right _)" | |
-- Copied verbatim from https://github.com/vaibhavsagar/duffer | |
type SeparatedEntries = M.Map Int B.ByteString | |
type Ref = B.ByteString | |
parseBinRef :: Parser Ref | |
parseBinRef = encode <$> take 20 | |
data PackObjectType | |
= UnusedPackObjectType0 | |
| CommitObject | |
| TreeObject | |
| BlobObject | |
| TagObject | |
| UnusedPackObjectType5 | |
| OfsDeltaObject | |
| RefDeltaObject | |
deriving (Enum, Eq, Show) | |
encodeTypeLen :: PackObjectType -> Int -> B.ByteString | |
encodeTypeLen packObjType len = let | |
(last4, rest) = packEntryLenList len | |
firstByte = (fromEnum packObjType `shiftL` 4) .|. last4 | |
firstByte' = if rest /= "" then setBit firstByte 7 else firstByte | |
in B.cons (fromIntegral firstByte') rest | |
parseTypeLen :: (Bits t, Integral t) => Parser (PackObjectType, t) | |
parseTypeLen = do | |
header <- anyWord8 | |
let packType = packObjectType header | |
let initial = fromIntegral $ header .&. 15 | |
size <- if testMSB header | |
then do | |
rest <- littleEndian <$> parseVarInt | |
return $ initial + (rest `shiftL` 4) | |
else | |
return initial | |
return (packType, size) | |
parseVarInt :: (Bits t, Integral t) => Parser [t] | |
parseVarInt = anyWord8 >>= \byte -> | |
let value = fromIntegral $ byte .&. 127 | |
more = testMSB byte | |
in (value:) <$> if more then parseVarInt else return [] | |
testMSB :: Bits t => t -> Bool | |
testMSB = flip testBit 7 | |
parseOffset :: (Bits t, Integral t) => Parser t | |
parseOffset = parseVarInt >>= \values -> | |
let len = length values - 1 | |
concatenated = bigEndian values | |
in return $ concatenated + if len > 0 | |
-- I think the addition reinstates the MSBs that are otherwise | |
-- used to indicate whether there is more of the variable length | |
-- integer to parse. | |
then sum $ map (\i -> 2^(7*i)) [1..len] | |
else 0 | |
littleEndian, bigEndian :: (Bits t, Integral t) => [t] -> t | |
littleEndian = foldr (\a b -> a + (b `shiftL` 7)) 0 | |
bigEndian = foldl' (\a b -> (a `shiftL` 7) + b) 0 | |
parsePackFileHeader :: Parser Int | |
parsePackFileHeader = | |
word8s (B.unpack "PACK") *> take 4 *> (fromBytes <$> take 4) | |
fromBytes :: (Bits t, Integral t) => B.ByteString -> t | |
fromBytes = B.foldl (\a b -> (a `shiftL` 8) + fromIntegral b) 0 | |
word8s :: [Word8] -> Parser [Word8] | |
word8s = mapM word8 | |
packEntryLenList :: Int -> (Int, B.ByteString) | |
packEntryLenList n = let | |
last4 = fromIntegral n .&. 15 | |
rest = fromIntegral n `shiftR` 4 :: Int | |
last4' = if rest > 0 | |
then setBit last4 7 | |
else last4 | |
restL = to7BitList rest | |
restL' = if restL /= [0] | |
then map fromIntegral $ head restL:map (`setBit` 7) (tail restL) | |
else [] | |
in (last4', B.pack $ reverse restL') | |
encodeOffset :: Int -> B.ByteString | |
encodeOffset n = let | |
noTerms = floor $ logBase 128 (fromIntegral n * (128 - 1) + 128) - 1 | |
remove = sum $ Prelude.take noTerms $ map (128^) [1..] | |
remainder = n - remove | |
varInt = to7BitList remainder | |
encodedInts = setMSBs $ leftPadZeros varInt (noTerms + 1) | |
in B.pack $ map fromIntegral encodedInts | |
leftPadZeros :: [Int] -> Int -> [Int] | |
leftPadZeros ints n | |
| length ints >= n = ints | |
| otherwise = leftPadZeros (0:ints) n | |
setMSBs :: [Int] -> [Int] | |
setMSBs ints = let | |
ints' = reverse ints | |
ints'' = head ints' : map (`setBit` 7) ( tail ints') | |
in reverse ints'' | |
toByteList, to7BitList :: (Bits t, Integral t) => t -> [t] | |
toByteList = toSomeBitList 8 | |
to7BitList = toSomeBitList 7 | |
toSomeBitList :: (Bits t, Integral t) => Int -> t -> [t] | |
toSomeBitList some n = reverse $ toSomeBitList' some n | |
where toSomeBitList' some n = case divMod n (bit some) of | |
(0, i) -> [fromIntegral i] | |
(x, y) -> fromIntegral y : toSomeBitList' some x | |
compressToLevel :: Z.CompressionLevel -> B.ByteString -> B.ByteString | |
compressToLevel level content = toStrict $ | |
Z.compressWith Z.defaultCompressParams | |
{ Z.compressLevel = level } | |
$ fromStrict content | |
getCompressionLevel :: Word8 -> Z.CompressionLevel | |
getCompressionLevel levelByte = case levelByte of | |
1 -> Z.bestSpeed | |
156 -> Z.defaultCompression | |
_ -> error "I can't make sense of this compression level" | |
packObjectType :: (Bits t, Integral t) => t -> PackObjectType | |
packObjectType header = toEnum . fromIntegral $ (header `shiftR` 4) .&. 7 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment