{-# LANGUAGE BangPatterns, CPP, MagicHash, RankNTypes, RecordWildCards,
    UnboxedTuples #-}

-- |
-- Module      :  Data.Attoparsec.Text.Buffer
-- Copyright   :  Bryan O'Sullivan 2007-2015
-- License     :  BSD3
--
-- Maintainer  :  bos@serpentine.com
-- Stability   :  experimental
-- Portability :  GHC
--
-- An immutable buffer that supports cheap appends.

-- A Buffer is divided into an immutable read-only zone, followed by a
-- mutable area that we've preallocated, but not yet written to.
--
-- We overallocate at the end of a Buffer so that we can cheaply
-- append.  Since a user of an existing Buffer cannot see past the end
-- of its immutable zone into the data that will change during an
-- append, this is safe.
--
-- Once we run out of space at the end of a Buffer, we do the usual
-- doubling of the buffer size.

module Data.Attoparsec.Text.Buffer
    (
      Buffer
    , buffer
    , unbuffer
    , unbufferAt
    , length
    , pappend
    , iter
    , iter_
    , substring
    , lengthCodeUnits
    , dropCodeUnits
    ) where

import Control.Exception (assert)
import Data.List (foldl1')
import Data.Monoid as Mon (Monoid(..))
import Data.Semigroup (Semigroup(..))
import Data.Text ()
import Data.Text.Internal (Text(..))
#if MIN_VERSION_text(2,0,0)
import Data.Text.Internal.Encoding.Utf8 (utf8LengthByLeader)
import Data.Text.Unsafe (iterArray, lengthWord8)
#else
import Data.Bits (shiftR)
import Data.Text.Internal.Encoding.Utf16 (chr2)
import Data.Text.Internal.Unsafe.Char (unsafeChr)
import Data.Text.Unsafe (lengthWord16)
#endif
import Data.Text.Unsafe (Iter(..))
import Foreign.Storable (sizeOf)
import GHC.Exts (Int(..), indexIntArray#, unsafeCoerce#, writeIntArray#)
import GHC.ST (ST(..), runST)
import Prelude hiding (length)
import qualified Data.Text.Array as A

-- If _cap is zero, this buffer is empty.
data Buffer = Buf {
      Buffer -> Array
_arr :: {-# UNPACK #-} !A.Array
    , Buffer -> Int
_off :: {-# UNPACK #-} !Int
    , Buffer -> Int
_len :: {-# UNPACK #-} !Int
    , Buffer -> Int
_cap :: {-# UNPACK #-} !Int
    , Buffer -> Int
_gen :: {-# UNPACK #-} !Int
    }

instance Show Buffer where
    showsPrec :: Int -> Buffer -> ShowS
showsPrec Int
p = Int -> Text -> ShowS
forall a. Show a => Int -> a -> ShowS
showsPrec Int
p (Text -> ShowS) -> (Buffer -> Text) -> Buffer -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Buffer -> Text
unbuffer

-- | The initial 'Buffer' has no mutable zone, so we can avoid all
-- copies in the (hopefully) common case of no further input being fed
-- to us.
buffer :: Text -> Buffer
buffer :: Text -> Buffer
buffer (Text Array
arr Int
off Int
len) = Array -> Int -> Int -> Int -> Int -> Buffer
Buf Array
arr Int
off Int
len Int
len Int
0

unbuffer :: Buffer -> Text
unbuffer :: Buffer -> Text
unbuffer (Buf Array
arr Int
off Int
len Int
_ Int
_) = Array -> Int -> Int -> Text
Text Array
arr Int
off Int
len

unbufferAt :: Int -> Buffer -> Text
unbufferAt :: Int -> Buffer -> Text
unbufferAt Int
s (Buf Array
arr Int
off Int
len Int
_ Int
_) =
  Bool -> Text -> Text
forall a. (?callStack::CallStack) => Bool -> a -> a
assert (Int
s Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0 Bool -> Bool -> Bool
&& Int
s Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
len) (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$
  Array -> Int -> Int -> Text
Text Array
arr (Int
offInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
s) (Int
lenInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
s)

instance Semigroup Buffer where
    (Buf Array
_ Int
_ Int
_ Int
0 Int
_) <> :: Buffer -> Buffer -> Buffer
<> Buffer
b                     = Buffer
b
    Buffer
a               <> (Buf Array
_ Int
_ Int
_ Int
0 Int
_)       = Buffer
a
    Buffer
buf             <> (Buf Array
arr Int
off Int
len Int
_ Int
_) = Buffer -> Array -> Int -> Int -> Buffer
append Buffer
buf Array
arr Int
off Int
len
    {-# INLINE (<>) #-}

instance Monoid Buffer where
    mempty :: Buffer
mempty = Array -> Int -> Int -> Int -> Int -> Buffer
Buf Array
A.empty Int
0 Int
0 Int
0 Int
0
    {-# INLINE mempty #-}

    mappend :: Buffer -> Buffer -> Buffer
mappend = Buffer -> Buffer -> Buffer
forall a. Semigroup a => a -> a -> a
(<>)

    mconcat :: [Buffer] -> Buffer
mconcat [] = Buffer
forall a. Monoid a => a
Mon.mempty
    mconcat [Buffer]
xs = (Buffer -> Buffer -> Buffer) -> [Buffer] -> Buffer
forall a. (a -> a -> a) -> [a] -> a
foldl1' Buffer -> Buffer -> Buffer
forall a. Semigroup a => a -> a -> a
(<>) [Buffer]
xs

pappend :: Buffer -> Text -> Buffer
pappend :: Buffer -> Text -> Buffer
pappend (Buf Array
_ Int
_ Int
_ Int
0 Int
_) Text
t      = Text -> Buffer
buffer Text
t
pappend Buffer
buf (Text Array
arr Int
off Int
len) = Buffer -> Array -> Int -> Int -> Buffer
append Buffer
buf Array
arr Int
off Int
len

append :: Buffer -> A.Array -> Int -> Int -> Buffer
append :: Buffer -> Array -> Int -> Int -> Buffer
append (Buf Array
arr0 Int
off0 Int
len0 Int
cap0 Int
gen0) !Array
arr1 !Int
off1 !Int
len1 = (forall s. ST s Buffer) -> Buffer
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s Buffer) -> Buffer)
-> (forall s. ST s Buffer) -> Buffer
forall a b. (a -> b) -> a -> b
$ do
#if MIN_VERSION_text(2,0,0)
  let woff :: Int
woff    = Int -> Int
forall a. Storable a => a -> Int
sizeOf (Int
0::Int)
#else
  let woff    = sizeOf (0::Int) `shiftR` 1
#endif
      newlen :: Int
newlen  = Int
len0 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
len1
      !gen :: Int
gen    = if Int
gen0 Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 then Int
0 else Array -> Int
readGen Array
arr0
  if Int
gen Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
gen0 Bool -> Bool -> Bool
&& Int
newlen Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
cap0
    then do
      let newgen :: Int
newgen = Int
gen Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1
      MArray s
marr <- Array -> ST s (MArray s)
forall s. Array -> ST s (MArray s)
unsafeThaw Array
arr0
      MArray s -> Int -> ST s ()
forall s. MArray s -> Int -> ST s ()
writeGen MArray s
marr Int
newgen
#if MIN_VERSION_text(2,0,0)
      Int -> MArray s -> Int -> Array -> Int -> ST s ()
forall s. Int -> MArray s -> Int -> Array -> Int -> ST s ()
A.copyI Int
len1 MArray s
marr (Int
off0Int -> Int -> Int
forall a. Num a => a -> a -> a
+Int
len0) Array
arr1 Int
off1
#else
      A.copyI marr (off0+len0) arr1 off1 (off0+newlen)
#endif
      Array
arr2 <- MArray s -> ST s Array
forall s. MArray s -> ST s Array
A.unsafeFreeze MArray s
marr
      Buffer -> ST s Buffer
forall (m :: * -> *) a. Monad m => a -> m a
return (Array -> Int -> Int -> Int -> Int -> Buffer
Buf Array
arr2 Int
off0 Int
newlen Int
cap0 Int
newgen)
    else do
      let newcap :: Int
newcap = Int
newlen Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
2
          newgen :: Int
newgen = Int
1
      MArray s
marr <- Int -> ST s (MArray s)
forall s. Int -> ST s (MArray s)
A.new (Int
newcap Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
woff)
      MArray s -> Int -> ST s ()
forall s. MArray s -> Int -> ST s ()
writeGen MArray s
marr Int
newgen
#if MIN_VERSION_text(2,0,0)
      Int -> MArray s -> Int -> Array -> Int -> ST s ()
forall s. Int -> MArray s -> Int -> Array -> Int -> ST s ()
A.copyI Int
len0 MArray s
marr Int
woff Array
arr0 Int
off0
      Int -> MArray s -> Int -> Array -> Int -> ST s ()
forall s. Int -> MArray s -> Int -> Array -> Int -> ST s ()
A.copyI Int
len1 MArray s
marr (Int
woffInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
len0) Array
arr1 Int
off1
#else
      A.copyI marr woff arr0 off0 (woff+len0)
      A.copyI marr (woff+len0) arr1 off1 (woff+newlen)
#endif
      Array
arr2 <- MArray s -> ST s Array
forall s. MArray s -> ST s Array
A.unsafeFreeze MArray s
marr
      Buffer -> ST s Buffer
forall (m :: * -> *) a. Monad m => a -> m a
return (Array -> Int -> Int -> Int -> Int -> Buffer
Buf Array
arr2 Int
woff Int
newlen Int
newcap Int
newgen)

length :: Buffer -> Int
length :: Buffer -> Int
length (Buf Array
_ Int
_ Int
len Int
_ Int
_) = Int
len
{-# INLINE length #-}

substring :: Int -> Int -> Buffer -> Text
substring :: Int -> Int -> Buffer -> Text
substring Int
s Int
l (Buf Array
arr Int
off Int
len Int
_ Int
_) =
  Bool -> Text -> Text
forall a. (?callStack::CallStack) => Bool -> a -> a
assert (Int
s Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0 Bool -> Bool -> Bool
&& Int
s Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
len) (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
  Bool -> Text -> Text
forall a. (?callStack::CallStack) => Bool -> a -> a
assert (Int
l Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0 Bool -> Bool -> Bool
&& Int
l Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
lenInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
s) (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$
  Array -> Int -> Int -> Text
Text Array
arr (Int
offInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
s) Int
l
{-# INLINE substring #-}

#if MIN_VERSION_text(2,0,0)

lengthCodeUnits :: Text -> Int
lengthCodeUnits :: Text -> Int
lengthCodeUnits = Text -> Int
lengthWord8

dropCodeUnits :: Int -> Buffer -> Text
dropCodeUnits :: Int -> Buffer -> Text
dropCodeUnits Int
s (Buf Array
arr Int
off Int
len Int
_ Int
_) =
  Bool -> Text -> Text
forall a. (?callStack::CallStack) => Bool -> a -> a
assert (Int
s Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0 Bool -> Bool -> Bool
&& Int
s Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
len) (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$
  Array -> Int -> Int -> Text
Text Array
arr (Int
offInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
s) (Int
lenInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
s)
{-# INLINE dropCodeUnits #-}

-- | /O(1)/ Iterate (unsafely) one step forwards through a UTF-8
-- array, returning the current character and the delta to add to give
-- the next offset to iterate at.
iter :: Buffer -> Int -> Iter
iter :: Buffer -> Int -> Iter
iter (Buf Array
arr Int
off Int
_ Int
_ Int
_) Int
i = Array -> Int -> Iter
iterArray Array
arr (Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i)
{-# INLINE iter #-}

-- | /O(1)/ Iterate one step through a UTF-8 array, returning the
-- delta to add to give the next offset to iterate at.
iter_ :: Buffer -> Int -> Int
iter_ :: Buffer -> Int -> Int
iter_ (Buf Array
arr Int
off Int
_ Int
_ Int
_) Int
i = Word8 -> Int
utf8LengthByLeader (Word8 -> Int) -> Word8 -> Int
forall a b. (a -> b) -> a -> b
$ Array -> Int -> Word8
A.unsafeIndex Array
arr (Int
offInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
i)
{-# INLINE iter_ #-}

unsafeThaw :: A.Array -> ST s (A.MArray s)
unsafeThaw :: Array -> ST s (MArray s)
unsafeThaw (A.ByteArray ByteArray#
a) = STRep s (MArray s) -> ST s (MArray s)
forall s a. STRep s a -> ST s a
ST (STRep s (MArray s) -> ST s (MArray s))
-> STRep s (MArray s) -> ST s (MArray s)
forall a b. (a -> b) -> a -> b
$ \State# s
s# ->
                          (# State# s
s#, MutableByteArray# s -> MArray s
forall s. MutableByteArray# s -> MArray s
A.MutableByteArray (ByteArray# -> MutableByteArray# s
unsafeCoerce# ByteArray#
a) #)

readGen :: A.Array -> Int
readGen :: Array -> Int
readGen (A.ByteArray ByteArray#
a) = case ByteArray# -> Int# -> Int#
indexIntArray# ByteArray#
a Int#
0# of Int#
r# -> Int# -> Int
I# Int#
r#

writeGen :: A.MArray s -> Int -> ST s ()
writeGen :: MArray s -> Int -> ST s ()
writeGen (A.MutableByteArray MutableByteArray# s
a) (I# Int#
gen#) = STRep s () -> ST s ()
forall s a. STRep s a -> ST s a
ST (STRep s () -> ST s ()) -> STRep s () -> ST s ()
forall a b. (a -> b) -> a -> b
$ \State# s
s0# ->
  case MutableByteArray# s -> Int# -> Int# -> State# s -> State# s
forall d.
MutableByteArray# d -> Int# -> Int# -> State# d -> State# d
writeIntArray# MutableByteArray# s
a Int#
0# Int#
gen# State# s
s0# of
    State# s
s1# -> (# State# s
s1#, () #)

#else

lengthCodeUnits :: Text -> Int
lengthCodeUnits = lengthWord16

dropCodeUnits :: Int -> Buffer -> Text
dropCodeUnits s (Buf arr off len _ _) =
  assert (s >= 0 && s <= len) $
  Text arr (off+s) (len-s)
{-# INLINE dropCodeUnits #-}

-- | /O(1)/ Iterate (unsafely) one step forwards through a UTF-16
-- array, returning the current character and the delta to add to give
-- the next offset to iterate at.
iter :: Buffer -> Int -> Iter
iter (Buf arr off _ _ _) i
    | m < 0xD800 || m > 0xDBFF = Iter (unsafeChr m) 1
    | otherwise                = Iter (chr2 m n) 2
  where m = A.unsafeIndex arr j
        n = A.unsafeIndex arr k
        j = off + i
        k = j + 1
{-# INLINE iter #-}

-- | /O(1)/ Iterate one step through a UTF-16 array, returning the
-- delta to add to give the next offset to iterate at.
iter_ :: Buffer -> Int -> Int
iter_ (Buf arr off _ _ _) i | m < 0xD800 || m > 0xDBFF = 1
                                | otherwise                = 2
  where m = A.unsafeIndex arr (off+i)
{-# INLINE iter_ #-}

unsafeThaw :: A.Array -> ST s (A.MArray s)
unsafeThaw A.Array{..} = ST $ \s# ->
                          (# s#, A.MArray (unsafeCoerce# aBA) #)

readGen :: A.Array -> Int
readGen a = case indexIntArray# (A.aBA a) 0# of r# -> I# r#

writeGen :: A.MArray s -> Int -> ST s ()
writeGen a (I# gen#) = ST $ \s0# ->
  case writeIntArray# (A.maBA a) 0# gen# s0# of
    s1# -> (# s1#, () #)

#endif