#ifndef BITSTRING_BIGENDIAN
module Data.BitString
#else
module Data.BitString.BigEndian
#endif
(
BitString
, empty
, bitString
, bitStringLazy
, unsafeBitString'
, take
, drop
, splitAt
, append
, concat
, toList
, fromList
, to01List
, from01List
, null
, length
, foldl'
, findSubstring
, realizeBitStringLazy
, realizeBitStringStrict
, realizeBitString'
#ifdef WITH_QUICKCHECK
, runAllTest
, BitChunk (..)
, BitString ( BitString )
, mypack
, prop_fromToList
, prop_toFromList
, prop_append
, prop_drop
, prop_take
, prop_dropChunk
, prop_takeChunk
, prop_realign
, prop_realizeChunk
, prop_realize
, prop_realizeLen
, prop_findSubstring1
, prop_findSubstring1a
, prop_findSubstring1b
, prop_findSubstring2
#endif
)
where
import Prelude hiding (take,drop,last,length,splitAt,concat,null,rem,init)
import Control.Monad
import Control.Applicative hiding ( empty )
import Data.Bits ()
import Data.Int ()
import Data.Word ()
import Data.Maybe
import qualified Data.List as List
import Data.ByteString (ByteString)
import qualified Data.ByteString as B
import qualified Data.ByteString.Internal as B
import qualified Data.ByteString.Lazy as L
#ifdef WITH_QUICKCHECK
import Test.QuickCheck hiding ( (.&.) )
import qualified Data.ByteString.Char8 as BC
import Data.Char (ord)
#endif
import Foreign
import System.IO.Unsafe
flippedFoldM_ :: Monad m => a -> [b] -> (a -> b -> m a) -> m ()
flippedFoldM_ x ys f = foldM_ f x ys
flippedFoldM :: Monad m => a -> [b] -> (a -> b -> m a) -> m a
flippedFoldM x ys f = foldM f x ys
#ifdef BITSTRING_BIGENDIAN
byteReverseWord32 :: Word32 -> Word32
byteReverseWord32 w
= shiftR w 24
+ shiftR w 8 .&. 0x0000ff00
+ shiftL w 8 .&. 0x00ff0000
+ shiftL w 24
#endif // BITSTRING_BIGENDIAN
data BitChunk = BitChunk
{ bitChunkOffset :: !Int64
, bitChunkLength :: !Int64
, bitChunkData :: !ByteString
}
#ifdef WITH_QUICKCHECK
mypack :: String -> ByteString
mypack = B.pack . map c2w where
c2w = fromIntegral . ord
instance Show BitChunk where
show (BitChunk ofs len dat) = "BitChunk " ++ show ofs ++ " " ++ show len ++ " (mypack " ++ show dat ++ ")"
#else
instance Show BitChunk where
show chunk = "BitChunk <" ++ map f (bitChunkTo01List chunk) ++ ">" where
f 0 = '0'
f 1 = '1'
#endif // WITH_QUICKCHECK
emptyBitChunk :: BitChunk
emptyBitChunk = BitChunk 0 0 B.empty
unsafeBitChunk'
:: Int64
-> Int64
-> ByteString
-> BitChunk
unsafeBitChunk' ofs len dat = BitChunk ofs len dat where
bitChunk :: ByteString -> BitChunk
bitChunk bs = unsafeBitChunk' 0 (8 * fromIntegral (B.length bs)) bs
bitChunkDrop :: Int64 -> BitChunk -> BitChunk
bitChunkDrop k (BitChunk ofs len dat) = if k<len
then BitChunk (ofs+k) (lenk) dat
else emptyBitChunk
bitChunkTake :: Int64 -> BitChunk -> BitChunk
bitChunkTake k bc@(BitChunk ofs len dat)
| k==0 = emptyBitChunk
| k<=len = BitChunk ofs k dat
| otherwise = bc
splitBitChunkAt :: Int64 -> BitChunk -> (BitChunk,BitChunk)
splitBitChunkAt k b = (bitChunkTake k b, bitChunkDrop k b)
boolToWord8 :: Bool -> Word8
boolToWord8 bool = case bool of
True -> 1
False -> 0
word8ToBool :: Word8 -> Bool
word8ToBool w = (w/=0)
unsafeLookupBitChunk :: BitChunk -> Int64 -> Bool
unsafeLookupBitChunk chunk j = unsafeLookupBitChunk01 chunk j /= 0
unsafeLookupBitChunk01 :: BitChunk -> Int64 -> Word8
unsafeLookupBitChunk01 (BitChunk ofs len dat) j = bit where
(n,k) = divMod (ofs+j) 8
byte = B.index dat (fromIntegral n)
#ifndef BITSTRING_BIGENDIAN
bit = ((shiftR byte (fromIntegral k)) .&. 1)
#else
bit = ((shiftR byte (fromIntegral (7k))) .&. 1)
#endif
bitChunkToList :: BitChunk -> [Bool]
bitChunkToList chunk@(BitChunk ofs len dat) =
[ unsafeLookupBitChunk chunk k | k<-[0..len1] ]
bitChunkFromList :: [Bool] -> BitChunk
bitChunkFromList bits = BitChunk 0 (fromIntegral len) (B.pack bytes) where
(len,bytes) = worker bits
worker [] = ( 0, [] )
worker bits = ( len' + List.length this , byte:ys ) where
(this,rest) = List.splitAt 8 bits
#ifndef BITSTRING_BIGENDIAN
byte = List.foldl' (+) 0 $ zipWith shiftL (map boolToWord8 this) [0..7]
#else
byte = List.foldl' (+) 0 $ zipWith shiftL (map boolToWord8 this) [7,6..0]
#endif
(len' , ys) = worker rest
bitChunkTo01List :: BitChunk -> [Word8]
bitChunkTo01List chunk@(BitChunk ofs len dat) =
[ unsafeLookupBitChunk01 chunk k | k<-[0..len1] ]
bitChunkFrom01List :: [Word8] -> BitChunk
bitChunkFrom01List bits = BitChunk 0 (fromIntegral len) (B.pack bytes) where
(len,bytes) = worker bits
worker [] = ( 0, [] )
worker bits = ( len' + List.length this , byte:ys ) where
(this,rest) = List.splitAt 8 bits
#ifndef BITSTRING_BIGENDIAN
byte = List.foldl' (+) 0 $ zipWith shiftL (map (.&. 1) this) [0..7]
#else
byte = List.foldl' (+) 0 $ zipWith shiftL (map (.&. 1) this) [7,6..0]
#endif
(len' , ys) = worker rest
instance Eq BitChunk where
(==) x y = bitChunkToList x == bitChunkToList y
realignBitChunk :: BitChunk -> BitChunk
realignBitChunk (BitChunk ofs len dat) =
BitChunk 0 len $ case ofsFrac of
0 -> dat'
_ -> B.pack $ B.zipWith f dat' (B.snoc (B.tail dat') 0)
where
ofsFrac2 = 8 ofsFrac
#ifndef BITSTRING_BIGENDIAN
f b1 b2 = shiftR b1 (fromIntegral ofsFrac) + shiftL b2 (fromIntegral ofsFrac2)
#else
f b1 b2 = shiftL b1 (fromIntegral ofsFrac) + shiftR b2 (fromIntegral ofsFrac2)
#endif
dat' = B.drop (fromIntegral ofsInt) dat
(ofsInt, ofsFrac) = divMod ofs 8
realizeBitChunk :: BitChunk -> (ByteString, Maybe (Word8,Int))
realizeBitChunk orig = (whole, end) where
chunk@(BitChunk 0 len dat) = realignBitChunk orig
(n,k) = divMod len 8
whole = B.take (fromIntegral n) dat
end = case k of
0 -> Nothing
_ -> let w' = B.index dat (fromIntegral n)
#ifndef BITSTRING_BIGENDIAN
mask = 2^k 1 :: Word8
w = w' .&. mask
#else
kk = fromIntegral k :: Int
mask = shiftL (2^kk1) (8kk)
w = w' .&. mask
#endif
in Just (w, fromIntegral k)
unBitString :: BitString -> [BitChunk]
unBitString (BitString xs) = xs
newtype BitString = BitString [BitChunk]
#ifdef WITH_QUICKCHECK
deriving Show
#else
instance Show BitString where
show bits = "BitString <" ++ map f (to01List bits) ++ ">" where
f 0 = '0'
f 1 = '1'
f _ = error "BitString/show: impossible"
#endif
empty :: BitString
empty = BitString []
unsafeBitString'
:: Int64
-> Int64
-> ByteString
-> BitString
unsafeBitString' ofs len bs = BitString [unsafeBitChunk' ofs len bs]
bitString :: ByteString -> BitString
bitString bs = unsafeBitString' 0 (8 * fromIntegral (B.length bs)) bs
bitStringLazy :: L.ByteString -> BitString
bitStringLazy = concat . map bitString . L.toChunks
drop :: Int64 -> BitString -> BitString
drop k (BitString cs) = BitString (worker k cs) where
worker _ [] = []
worker k (BitChunk ofs len dat : cs) = if k < len
then BitChunk (ofs+k) (lenk) dat : cs
else worker (klen) cs
take :: Int64 -> BitString -> BitString
take k (BitString cs) = BitString (worker k cs) where
worker 0 _ = []
worker _ [] = []
worker k (c@(BitChunk ofs len dat) : cs) = if k <= len
then [ BitChunk ofs k dat ]
else c : worker (klen) cs
splitAt :: Int64 -> BitString -> (BitString,BitString)
splitAt k b = (take k b, drop k b)
append :: BitString -> BitString -> BitString
append (BitString chunks1) (BitString chunks2) = BitString (chunks1 ++ chunks2)
concat :: [BitString] -> BitString
concat xs = case xs of
[] -> empty
_ -> (BitString . List.concat . map unBitString) xs
toList :: BitString -> [Bool]
toList (BitString chunks) = List.concatMap bitChunkToList chunks
fromList :: [Bool] -> BitString
fromList digits = BitString [bitChunkFromList digits]
to01List :: BitString -> [Word8]
to01List (BitString chunks) = List.concatMap bitChunkTo01List chunks
from01List :: [Word8] -> BitString
from01List digits = BitString [bitChunkFrom01List digits]
length :: BitString -> Int64
length (BitString chunks) = List.foldl' (+) 0 (map bitChunkLength chunks)
null :: BitString -> Bool
null bits = (length bits == 0)
instance Eq BitString where
(==) = fallbackEqual
fallbackEqual :: BitString -> BitString -> Bool
fallbackEqual x y = (toList x == toList y)
foldl' :: (a -> Bool -> a) -> a -> BitString -> a
foldl' fun init bits = List.foldl' fun init (toList bits)
findSubstring
:: BitString
-> BitString
-> Maybe Int64
findSubstring = findSubstring32
findSubstring32
:: BitString
-> BitString
-> Maybe Int64
findSubstring32 small large =
unsafePerformIO $ do
withForeignPtr fptr_b_small $ \p'' -> do
let p' = (plusPtr p'' ofs_b_small) :: Ptr Word8
allocaArray (k+1) $ \q -> allocaArray (k+1) $ \p -> do
#ifndef BITSTRING_BIGENDIAN
let p8 = (castPtr :: Ptr Word32 -> Ptr Word8) p
forM_ [0..len_b_small1] $ \i -> do { x <- peekElemOff p' i ; pokeElemOff p8 i x }
peekElemOff p k >>= \x -> pokeElemOff p k (x .&. mask)
pokeElemOff q k 0
#else
pokeElemOff p k 0
let p8 = (castPtr :: Ptr Word32 -> Ptr Word8) p
forM_ [0..len_b_small1] $ \i -> do { x <- peekElemOff p' i ; pokeElemOff p8 i x }
forM_ [0..k] $ \j -> do { y <- peekElemOff p j ; pokeElemOff p j (byteReverseWord32 y) }
peekElemOff p k >>= \x -> pokeElemOff p k (x .&. mask)
pokeElemOff q k 0
#endif
worker p q 0 (to01List large)
where
m = length small
m32 = fromIntegral (mod m 32) :: Int
d32 = fromIntegral (div m 32) :: Int
hmm :: (Int, Word32, Int)
hmm@(k,mask,initShift) = case m32 of
#ifndef BITSTRING_BIGENDIAN
0 -> ( d32 1 , 0xffffffff , 31 )
_ -> ( d32 , 2^m32 1 , fromIntegral (mod (m1) 32) )
#else
0 -> ( d32 1 , 0xffffffff , 0 )
_ -> ( d32 , shiftL (2^m321) (32m32) , fromIntegral ( 32 mod m 32 ) )
#endif
b_small = realizeBitStringStrict small
(fptr_b_small, ofs_b_small, len_b_small) = B.toForeignPtr b_small
worker :: Ptr Word32 -> Ptr Word32 -> Int64 -> [Word8] -> IO (Maybe Int64)
worker !p !q !pos !bits = do
conds <- forM [0..k1] $ \j -> do { x <- peekElemOff p j ; y <- peekElemOff q j ; return (x==y) }
cond <- do { x <- peekElemOff p k ; y <- peekElemOff q k ; return (x .&. mask == y .&. mask) }
if and (cond:conds) && pos >= m
then return (Just (pos m))
else case bits of
[] -> return Nothing
(b:bs) -> do
#ifndef BITSTRING_BIGENDIAN
let init_cr = (fromIntegral b , initShift)
flippedFoldM_ init_cr [k,k1..0] $ \(c,r) j -> do
y <- peekElemOff q j
let cr' = ( y .&. 1 , 31 )
pokeElemOff q j (shiftR y 1 + shiftL c r)
return cr'
worker p q (pos+1) bs
#else
let init_cr = (fromIntegral b , initShift)
flippedFoldM_ init_cr [k,k1..0] $ \(c,r) j -> do
y <- peekElemOff q j
let cr' = ( shiftR y 31 , 0 )
pokeElemOff q j (shiftL y 1 + shiftL c r)
return cr'
worker p q (pos+1) bs
#endif
realizeBitString' :: BitString -> [ByteString]
realizeBitString' (BitString chunks) = worker Nothing chunks where
worker :: Maybe (Word8,Int) -> [BitChunk] -> [ByteString]
worker rem (b:bs) =
case rem of
Nothing ->
let (s, rem') = realizeBitChunk b
in s : worker rem' bs
Just (w,k) ->
if r >= q
then B.singleton t : s : worker rem' bs
else worker (Just (t, k+fromIntegral r)) bs
where
q = 8 fromIntegral k
r = bitChunkLength b
(x,y) = splitBitChunkAt q b
(s, rem') = realizeBitChunk y
#ifndef BITSTRING_BIGENDIAN
t = List.foldl' (+) w
$ zipWith shiftL (bitChunkTo01List x) [k..]
#else
u = 7k
t = List.foldl' (+) w
$ zipWith shiftL (bitChunkTo01List x) [u,u1..]
#endif
worker rem [] = case rem of
Nothing -> []
Just (w,_) -> [B.singleton w]
realizeBitStringLazy :: BitString -> L.ByteString
realizeBitStringLazy = L.fromChunks . realizeBitString'
realizeBitStringStrict :: BitString -> B.ByteString
realizeBitStringStrict = B.concat . realizeBitString'
#ifdef WITH_QUICKCHECK
newtype Size = Size Int64 deriving Show
newtype BoolList = BoolList [Bool] deriving Show
newtype SearchFor = SearchFor BitString deriving Show
instance Arbitrary Size where
arbitrary = Size <$> (fromIntegral :: Int -> Int64) <$> choose (0,64)
instance Arbitrary BoolList where
arbitrary = do
Size k <- arbitrary
BoolList <$> vector (fromIntegral k)
instance Arbitrary BitChunk where
arbitrary = do
k <- choose (0,24) :: Gen Int
l <- choose (0,15) :: Gen Int
BoolList list <- arbitrary
let bits1 = bitChunkDrop (fromIntegral k) $ bitChunkFromList list
len = bitChunkLength bits1
bits2 = bitChunkTake (max 0 $ len fromIntegral l) bits1
return bits2
instance Arbitrary SearchFor where
arbitrary = do
b <- arbitrary
let l = length b
if l >= 48 && l < 96
then return (SearchFor b)
else arbitrary
instance Arbitrary BitString where
arbitrary = do
k <- choose (0,7)
BitString <$> vector k
runAllTest :: IO ()
runAllTest = do
let mytest (text,prop) = do
print text
quickCheck prop
mytest ("fromToList" , prop_fromToList )
mytest ("toFromList" , prop_toFromList )
mytest ("append" , prop_append )
mytest ("drop" , prop_drop )
mytest ("take" , prop_take )
mytest ("dropChunk" , prop_dropChunk )
mytest ("takeChunk" , prop_takeChunk )
mytest ("realign" , prop_realign )
mytest ("realizeChunk" , prop_realizeChunk )
mytest ("realize" , prop_realize )
mytest ("realize" , prop_realize )
mytest ("realize" , prop_realize )
mytest ("realize" , prop_realize )
mytest ("realizeLen" , prop_realizeLen )
mytest ("findSubstring1" , prop_findSubstring1 )
mytest ("findSubstring1" , prop_findSubstring1 )
mytest ("findSubstring1" , prop_findSubstring1 )
mytest ("findSubstring1" , prop_findSubstring1 )
mytest ("findSubstring1a" , prop_findSubstring1a )
mytest ("findSubstring1b" , prop_findSubstring1b )
mytest ("findSubstring2" , prop_findSubstring2 )
prop_fromToList :: BitString -> Bool
prop_fromToList bits = fromList (toList bits) == bits
prop_toFromList :: BoolList -> Bool
prop_toFromList (BoolList list) = toList (fromList list) == list
prop_append :: [BitString] -> Bool
prop_append xs = toList (concat xs) == List.concat (map toList xs)
prop_drop :: Size -> BitString -> Bool
prop_drop (Size k) xs = toList (drop k xs) == List.drop (fromIntegral k) (toList xs)
prop_take :: Size -> BitString -> Bool
prop_take (Size k) xs = toList (take k xs) == List.take (fromIntegral k) (toList xs)
prop_dropChunk :: Size -> BitChunk -> Bool
prop_dropChunk (Size k) xs = bitChunkToList (bitChunkDrop k xs) == List.drop (fromIntegral k) (bitChunkToList xs)
prop_takeChunk :: Size -> BitChunk -> Bool
prop_takeChunk (Size k) xs = bitChunkToList (bitChunkTake k xs) == List.take (fromIntegral k) (bitChunkToList xs)
prop_realign :: BitChunk -> Bool
prop_realign chunk = realignBitChunk chunk == chunk
prop_realizeChunk :: BitChunk -> Bool
prop_realizeChunk chunk = append (bitString whole) (BitString [end]) == BitString [chunk] where
(whole,remain) = realizeBitChunk chunk
end = case remain of
Nothing -> emptyBitChunk
Just (w,k) -> BitChunk 0 (fromIntegral k) (B.singleton w)
prop_realize :: BitString -> Bool
prop_realize bits = let n = length bits in unsafeBitString' 0 n (realizeBitStringStrict bits) == bits
prop_realizeLen :: BitString -> Bool
prop_realizeLen bits = let n = length bits in div (n+7) 8 == (fromIntegral $ B.length $ realizeBitStringStrict bits)
prop_findSubstring1 :: SearchFor -> BitString -> BitString -> Bool
prop_findSubstring1 (SearchFor what) pre post = findSubstring what big == Just (length pre) where
big = concat [ pre , what , post ]
prop_findSubstring1a :: SearchFor -> BitString -> Bool
prop_findSubstring1a (SearchFor what) pre = findSubstring what big == Just (length pre) where
big = concat [ pre , what ]
prop_findSubstring1b :: SearchFor -> BitString -> Bool
prop_findSubstring1b (SearchFor what) post = findSubstring what big == Just 0 where
big = concat [ what , post ]
prop_findSubstring2 :: SearchFor -> BitString -> BitString -> Bool
prop_findSubstring2 (SearchFor what) pre post = findSubstring what big == Nothing where
big = concat [ pre , post ]
#endif // WITH_QUICKCHECK