{-# LANGUAGE BangPatterns               #-}
{-# LANGUAGE CPP                        #-}
{-# LANGUAGE DeriveDataTypeable         #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MagicHash                  #-}
{-# LANGUAGE RankNTypes                 #-}
{-# LANGUAGE TypeFamilies               #-}
{-# LANGUAGE UnboxedTuples              #-}
{-# LANGUAGE UnliftedFFITypes           #-}
{-# LANGUAGE Unsafe                     #-}
{-# LANGUAGE ViewPatterns               #-}

-- |
-- Module      : Data.Text.Short.Internal
-- Copyright   : © Herbert Valerio Riedel 2017
-- License     : BSD3
--
-- Maintainer  : hvr@gnu.org
-- Stability   : stable
--
-- Memory-efficient representation of Unicode text strings.
--
-- @since 0.1
module Data.Text.Short.Internal
    ( -- * The 'ShortText' type
      ShortText(..)

      -- * Basic operations
    , null
    , length
    , isAscii
    , splitAt
    , splitAtEnd
    , indexEndMaybe
    , indexMaybe
    , isPrefixOf
    , stripPrefix
    , isSuffixOf
    , stripSuffix

    , cons
    , snoc
    , uncons
    , unsnoc

    , findIndex
    , find
    , all

    , span
    , spanEnd
    , split

    , intersperse
    , intercalate
    , reverse
    , replicate

    , filter
    , dropAround

    , foldl
    , foldl'
    , foldr
    , foldl1
    , foldl1'
    , foldr1

      -- * Conversions
      -- ** 'Char'
    , singleton

      -- ** 'String'
    , Data.Text.Short.Internal.fromString
    , toString

      -- ** 'T.Text'
    , fromText
    , toText

      -- ** 'BS.ByteString'
    , fromShortByteString
    , fromShortByteStringUnsafe
    , toShortByteString

    , fromByteString
    , fromByteStringUnsafe
    , toByteString

    , toBuilder

      -- * misc
      -- ** For Haddock

    , BS.ByteString
    , T.Text
    , module Prelude

      -- ** Internals
    , isValidUtf8
    ) where

import           Control.DeepSeq                (NFData)
import           Control.Monad.ST               (stToIO)
import           Data.Binary
import           Data.Bits
import qualified Data.ByteString                as BS
import qualified Data.ByteString.Builder        as BB
import           Data.ByteString.Short          (ShortByteString)
import qualified Data.ByteString.Short          as BSS
import qualified Data.ByteString.Short.Internal as BSSI
import           Data.Char                      (ord)
import           Data.Data                      (Data(..),constrIndex, Constr,
                                                 mkConstr, DataType, mkDataType,
                                                 Fixity(Prefix))
import           Data.Hashable                  (Hashable)
import           Data.Typeable                  (Typeable)
import qualified Data.List                      as List
import           Data.Maybe                     (fromMaybe, isNothing)
import           Data.Semigroup
import qualified Data.String                    as S
import qualified Data.Text                      as T
import qualified Data.Text.Encoding             as T
import           Foreign.C
import           GHC.Base                       (assert, unsafeChr)
import qualified GHC.CString                    as GHC
import           GHC.Exts                       (Addr#, ByteArray#, Int (I#),
                                                 Int#, MutableByteArray#,
                                                 Ptr (..), RealWorld, Word (W#))
import qualified GHC.Exts
import qualified GHC.Foreign                    as GHC
import           GHC.IO.Encoding
import           GHC.ST
import           Prelude                        hiding (all, any, break, concat,
                                                 drop, dropWhile, filter, foldl,
                                                 foldl1, foldr, foldr1, head,
                                                 init, last, length, null,
                                                 replicate, reverse, span,
                                                 splitAt, tail, take, takeWhile)
import           System.IO.Unsafe
import           Text.Printf                    (PrintfArg, formatArg,
                                                 formatString)

import qualified PrimOps

-- | A compact representation of Unicode strings.
--
-- A 'ShortText' value is a sequence of Unicode scalar values, as defined in
-- <http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf#page=35 §3.9, definition D76 of the Unicode 5.2 standard >;
-- This means that a 'ShortText' is a list of (scalar) Unicode code-points (i.e. code-points in the range @[U+00 .. U+D7FF] ∪ [U+E000 .. U+10FFFF]@).
--
-- This type relates to 'T.Text' as 'ShortByteString' relates to 'BS.ByteString' by providing a more compact type. Please consult the documentation of "Data.ByteString.Short" for more information.
--
-- Currently, a boxed unshared 'T.Text' has a memory footprint of 6 words (i.e. 48 bytes on 64-bit systems) plus 2 or 4 bytes per code-point (due to the internal UTF-16 representation). Each 'T.Text' value which can share its payload with another 'T.Text' requires only 4 words additionally. Unlike 'BS.ByteString', 'T.Text' use unpinned memory.
--
-- In comparison, the footprint of a boxed 'ShortText' is only 4 words (i.e. 32 bytes on 64-bit systems) plus 1, 2, 3, or 4 bytes per code-point (due to the internal UTF-8 representation).
-- It can be shown that for realistic data <http://utf8everywhere.org/#asian UTF-16 has a space overhead of 50% over UTF-8>.
--
-- __NOTE__: The `Typeable` instance isn't defined for GHC 7.8 (and older) prior to @text-short-0.1.3@
--
-- @since 0.1
newtype ShortText = ShortText ShortByteString
                  deriving (Eq ShortText
Eq ShortText
-> (Int -> ShortText -> Int)
-> (ShortText -> Int)
-> Hashable ShortText
Int -> ShortText -> Int
ShortText -> Int
forall a. Eq a -> (Int -> a -> Int) -> (a -> Int) -> Hashable a
hash :: ShortText -> Int
$chash :: ShortText -> Int
hashWithSalt :: Int -> ShortText -> Int
$chashWithSalt :: Int -> ShortText -> Int
$cp1Hashable :: Eq ShortText
Hashable,Semigroup ShortText
ShortText
Semigroup ShortText
-> ShortText
-> (ShortText -> ShortText -> ShortText)
-> ([ShortText] -> ShortText)
-> Monoid ShortText
[ShortText] -> ShortText
ShortText -> ShortText -> ShortText
forall a.
Semigroup a -> a -> (a -> a -> a) -> ([a] -> a) -> Monoid a
mconcat :: [ShortText] -> ShortText
$cmconcat :: [ShortText] -> ShortText
mappend :: ShortText -> ShortText -> ShortText
$cmappend :: ShortText -> ShortText -> ShortText
mempty :: ShortText
$cmempty :: ShortText
$cp1Monoid :: Semigroup ShortText
Monoid,ShortText -> ()
(ShortText -> ()) -> NFData ShortText
forall a. (a -> ()) -> NFData a
rnf :: ShortText -> ()
$crnf :: ShortText -> ()
NFData,b -> ShortText -> ShortText
NonEmpty ShortText -> ShortText
ShortText -> ShortText -> ShortText
(ShortText -> ShortText -> ShortText)
-> (NonEmpty ShortText -> ShortText)
-> (forall b. Integral b => b -> ShortText -> ShortText)
-> Semigroup ShortText
forall b. Integral b => b -> ShortText -> ShortText
forall a.
(a -> a -> a)
-> (NonEmpty a -> a)
-> (forall b. Integral b => b -> a -> a)
-> Semigroup a
stimes :: b -> ShortText -> ShortText
$cstimes :: forall b. Integral b => b -> ShortText -> ShortText
sconcat :: NonEmpty ShortText -> ShortText
$csconcat :: NonEmpty ShortText -> ShortText
<> :: ShortText -> ShortText -> ShortText
$c<> :: ShortText -> ShortText -> ShortText
Data.Semigroup.Semigroup,Typeable)

-- | It exposes a similar 'Data' instance abstraction as 'T.Text' (see
-- discussion referenced there for more details), preserving the
-- @[Char]@ data abstraction at the cost of inefficiency.
--
-- @since 0.1.3
instance Data ShortText where
  gfoldl :: (forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> ShortText -> c ShortText
gfoldl forall d b. Data d => c (d -> b) -> d -> c b
f forall g. g -> c g
z ShortText
txt = (String -> ShortText) -> c (String -> ShortText)
forall g. g -> c g
z String -> ShortText
fromString c (String -> ShortText) -> String -> c ShortText
forall d b. Data d => c (d -> b) -> d -> c b
`f` (ShortText -> String
toString ShortText
txt)
  toConstr :: ShortText -> Constr
toConstr ShortText
_ = Constr
packConstr
  gunfold :: (forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c ShortText
gunfold forall b r. Data b => c (b -> r) -> c r
k forall r. r -> c r
z Constr
c = case Constr -> Int
constrIndex Constr
c of
    Int
1 -> c (String -> ShortText) -> c ShortText
forall b r. Data b => c (b -> r) -> c r
k ((String -> ShortText) -> c (String -> ShortText)
forall r. r -> c r
z String -> ShortText
fromString)
    Int
_ -> String -> c ShortText
forall a. HasCallStack => String -> a
error String
"gunfold"
  dataTypeOf :: ShortText -> DataType
dataTypeOf ShortText
_ = DataType
shortTextDataType

packConstr :: Constr
packConstr :: Constr
packConstr = DataType -> String -> [String] -> Fixity -> Constr
mkConstr DataType
shortTextDataType String
"fromString" [] Fixity
Prefix

shortTextDataType :: DataType
shortTextDataType :: DataType
shortTextDataType = String -> [Constr] -> DataType
mkDataType String
"Data.Text.Short" [Constr
packConstr]

instance Eq ShortText where
  {-# INLINE (==) #-}
  == :: ShortText -> ShortText -> Bool
(==) ShortText
x ShortText
y
    | Int
lx Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
ly  = Bool
False
    | Int
lx Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
==  Int
0  = Bool
True
    | Bool
otherwise = case ByteArray# -> Int# -> ByteArray# -> Int# -> Int# -> Int#
PrimOps.compareByteArrays# (ShortText -> ByteArray#
toByteArray# ShortText
x) Int#
0# (ShortText -> ByteArray#
toByteArray# ShortText
y) Int#
0# Int#
n# of
                    Int#
0# -> Bool
True
                    Int#
_  -> Bool
False
    where
      !lx :: Int
lx@(I# Int#
n#) = ShortText -> Int
toLength ShortText
x
      !ly :: Int
ly = ShortText -> Int
toLength ShortText
y

instance Ord ShortText where
  compare :: ShortText -> ShortText -> Ordering
compare ShortText
t1 ShortText
t2
    | Int
n Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0  = Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare Int
n1 Int
n2
    | Bool
otherwise = case ByteArray# -> Int# -> ByteArray# -> Int# -> Int# -> Int#
PrimOps.compareByteArrays# ByteArray#
ba1# Int#
0# ByteArray#
ba2# Int#
0# Int#
n# of
        Int#
r# | Int# -> Int
I# Int#
r# Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0 -> Ordering
LT
           | Int# -> Int
I# Int#
r# Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0 -> Ordering
GT
           | Int
n1 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
n2   -> Ordering
LT
           | Int
n1 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
n2   -> Ordering
GT
           | Bool
otherwise -> Ordering
EQ
    where
      ba1# :: ByteArray#
ba1# = ShortText -> ByteArray#
toByteArray# ShortText
t1
      ba2# :: ByteArray#
ba2# = ShortText -> ByteArray#
toByteArray# ShortText
t2
      !n1 :: Int
n1 = ShortText -> Int
toLength ShortText
t1
      !n2 :: Int
n2 = ShortText -> Int
toLength ShortText
t2
      !n :: Int
n@(I# Int#
n#) = Int
n1 Int -> Int -> Int
forall a. Ord a => a -> a -> a
`min` Int
n2

instance Show ShortText where
    showsPrec :: Int -> ShortText -> ShowS
showsPrec Int
p (ShortText ShortByteString
b) = Int -> String -> ShowS
forall a. Show a => Int -> a -> ShowS
showsPrec Int
p (TextEncoding -> ShortByteString -> String
decodeStringShort' TextEncoding
utf8 ShortByteString
b)
    show :: ShortText -> String
show (ShortText ShortByteString
b)        = ShowS
forall a. Show a => a -> String
show        (TextEncoding -> ShortByteString -> String
decodeStringShort' TextEncoding
utf8 ShortByteString
b)

instance Read ShortText where
    readsPrec :: Int -> ReadS ShortText
readsPrec Int
p = ((String, String) -> (ShortText, String))
-> [(String, String)] -> [(ShortText, String)]
forall a b. (a -> b) -> [a] -> [b]
map (\(String
x,String
s) -> (ShortByteString -> ShortText
ShortText (ShortByteString -> ShortText) -> ShortByteString -> ShortText
forall a b. (a -> b) -> a -> b
$ TextEncoding -> String -> ShortByteString
encodeStringShort TextEncoding
utf8 String
x,String
s)) ([(String, String)] -> [(ShortText, String)])
-> (String -> [(String, String)]) -> ReadS ShortText
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> String -> [(String, String)]
forall a. Read a => Int -> ReadS a
readsPrec Int
p

-- | @since 0.1.2
instance PrintfArg ShortText where
  formatArg :: ShortText -> FieldFormatter
formatArg ShortText
txt = String -> FieldFormatter
forall a. IsChar a => [a] -> FieldFormatter
formatString (String -> FieldFormatter) -> String -> FieldFormatter
forall a b. (a -> b) -> a -> b
$ ShortText -> String
toString ShortText
txt

-- | The 'Binary' encoding matches the one for 'T.Text'
#if MIN_VERSION_binary(0,8,1)
instance Binary ShortText where
    put :: ShortText -> Put
put = ShortByteString -> Put
forall t. Binary t => t -> Put
put (ShortByteString -> Put)
-> (ShortText -> ShortByteString) -> ShortText -> Put
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShortText -> ShortByteString
toShortByteString
    get :: Get ShortText
get = do
        ShortByteString
sbs <- Get ShortByteString
forall t. Binary t => Get t
get
        case ShortByteString -> Maybe ShortText
fromShortByteString ShortByteString
sbs of
          Maybe ShortText
Nothing -> String -> Get ShortText
forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"Binary.get(ShortText): Invalid UTF-8 stream"
          Just ShortText
st -> ShortText -> Get ShortText
forall (m :: * -> *) a. Monad m => a -> m a
return ShortText
st
#else
-- fallback via 'ByteString' instance
instance Binary ShortText where
    put = put . toByteString
    get = do
        bs <- get
        case fromByteString bs of
          Nothing -> fail "Binary.get(ShortText): Invalid UTF-8 stream"
          Just st -> return st
#endif

-- | \(\mathcal{O}(1)\) Test whether a 'ShortText' is empty.
--
-- >>> null ""
-- True
--
-- prop> null (singleton c) == False
--
-- prop> null t == (length t == 0)
--
-- @since 0.1
null :: ShortText -> Bool
null :: ShortText -> Bool
null = ShortByteString -> Bool
BSS.null (ShortByteString -> Bool)
-> (ShortText -> ShortByteString) -> ShortText -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShortText -> ShortByteString
toShortByteString

-- | \(\mathcal{O}(n)\) Count the number of Unicode code-points in a 'ShortText'.
--
-- >>> length "abcd€"
-- 5
--
-- >>> length ""
-- 0
--
-- prop> length t >= 0
--
-- @since 0.1
length :: ShortText -> Int
length :: ShortText -> Int
length ShortText
st = CSize -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (CSize -> Int) -> CSize -> Int
forall a b. (a -> b) -> a -> b
$ IO CSize -> CSize
forall a. IO a -> a
unsafeDupablePerformIO (ByteArray# -> CSize -> IO CSize
c_text_short_length (ShortText -> ByteArray#
toByteArray# ShortText
st) (ShortText -> CSize
toCSize ShortText
st))

foreign import ccall unsafe "hs_text_short_length" c_text_short_length :: ByteArray# -> CSize -> IO CSize

-- | \(\mathcal{O}(n)\) Test whether 'ShortText' contains only ASCII code-points (i.e. only U+0000 through U+007F).
--
-- This is a more efficient version of @'all' 'Data.Char.isAscii'@.
--
-- >>> isAscii ""
-- True
--
-- >>> isAscii "abc\NUL"
-- True
--
-- >>> isAscii "abcd€"
-- False
--
-- prop> isAscii t == all (< '\x80') t
--
-- @since 0.1
isAscii :: ShortText -> Bool
isAscii :: ShortText -> Bool
isAscii ShortText
st = (CInt -> CInt -> Bool
forall a. Eq a => a -> a -> Bool
/= CInt
0) (CInt -> Bool) -> CInt -> Bool
forall a b. (a -> b) -> a -> b
$ IO CInt -> CInt
forall a. IO a -> a
unsafeDupablePerformIO (ByteArray# -> CSize -> IO CInt
c_text_short_is_ascii (ShortText -> ByteArray#
toByteArray# ShortText
st) CSize
sz)
  where
    sz :: CSize
sz = ShortText -> CSize
toCSize ShortText
st

foreign import ccall unsafe "hs_text_short_is_ascii" c_text_short_is_ascii :: ByteArray# -> CSize -> IO CInt

-- | \(\mathcal{O}(n)\) Test whether /all/ code points in 'ShortText' satisfy a predicate.
--
-- >>> all (const False) ""
-- True
--
-- >>> all (> 'c') "abcdabcd"
-- False
--
-- >>> all (/= 'c') "abdabd"
-- True
--
-- @since 0.1.2
all :: (Char -> Bool) -> ShortText -> Bool
all :: (Char -> Bool) -> ShortText -> Bool
all Char -> Bool
p ShortText
st = Maybe B -> Bool
forall a. Maybe a -> Bool
isNothing ((Char -> Bool) -> ShortText -> B -> Maybe B
findOfs (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
p) ShortText
st (Int -> B
B Int
0))

-- | \(\mathcal{O}(n)\) Return the left-most codepoint in 'ShortText' that satisfies the given predicate.
--
-- >>> find (> 'b') "abcdabcd"
-- Just 'c'
--
-- >>> find (> 'b') "ababab"
-- Nothing
--
-- @since 0.1.2
find :: (Char -> Bool) -> ShortText -> Maybe Char
find :: (Char -> Bool) -> ShortText -> Maybe Char
find Char -> Bool
p ShortText
st = B -> Maybe Char
go B
0
  where
    go :: B -> Maybe Char
go !B
ofs
      | B
ofs B -> B -> Bool
forall a. Ord a => a -> a -> Bool
>= B
sz  = Maybe Char
forall a. Maybe a
Nothing
      | Bool
otherwise  = let (Char
c,B
ofs') = ShortText -> B -> (Char, B)
decodeCharAtOfs ShortText
st B
ofs
                     in Char
c Char -> Maybe Char -> Maybe Char
`seq` B
ofs' B -> Maybe Char -> Maybe Char
`seq`
                        if Char -> Bool
p Char
c
                        then Char -> Maybe Char
forall a. a -> Maybe a
Just Char
c
                        else B -> Maybe Char
go B
ofs'

    !sz :: B
sz = ShortText -> B
toB ShortText
st

-- | \(\mathcal{O}(n)\) Return the index of the left-most codepoint in 'ShortText' that satisfies the given predicate.
--
-- >>> findIndex (> 'b') "abcdabcdef"
-- Just 2
--
-- >>> findIndex (> 'b') "ababab"
-- Nothing
--
-- prop> (indexMaybe t =<< findIndex p t) == find p t
--
-- @since 0.1.2
findIndex :: (Char -> Bool) -> ShortText -> Maybe Int
findIndex :: (Char -> Bool) -> ShortText -> Maybe Int
findIndex Char -> Bool
p ShortText
st = B -> Int -> Maybe Int
go B
0 Int
0
  where
    go :: B -> Int -> Maybe Int
go !B
ofs !Int
i
      | B
ofs B -> B -> Bool
forall a. Ord a => a -> a -> Bool
>= B
sz  = Maybe Int
forall a. Maybe a
Nothing
      | Bool
otherwise  = let (Char
c,B
ofs') = ShortText -> B -> (Char, B)
decodeCharAtOfs ShortText
st B
ofs
                     in Char
c Char -> Maybe Int -> Maybe Int
`seq` B
ofs' B -> Maybe Int -> Maybe Int
`seq`
                        if Char -> Bool
p Char
c
                        then Int -> Maybe Int
forall a. a -> Maybe a
Just Int
i
                        else B -> Int -> Maybe Int
go B
ofs' (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)

    !sz :: B
sz = ShortText -> B
toB ShortText
st


-- | \(\mathcal{O}(n)\) Splits a string into components delimited by separators,
-- where the predicate returns True for a separator element.  The
-- resulting components do not contain the separators.  Two adjacent
-- separators result in an empty component in the output.  eg.
--
-- >>> split (=='a') "aabbaca"
-- ["","","bb","c",""]
--
-- >>> split (=='a') ""
-- [""]
--
-- prop> intercalate (singleton c) (split (== c) t) = t
--
-- __NOTE__: 'split' never returns an empty list to match the semantics of its counterpart from "Data.Text".
--
-- @since 0.1.3
split :: (Char -> Bool) -> ShortText -> [ShortText]
split :: (Char -> Bool) -> ShortText -> [ShortText]
split Char -> Bool
p ShortText
st0 = B -> [ShortText]
go B
0
  where
    go :: B -> [ShortText]
go !B
ofs0 = case (Char -> Bool) -> ShortText -> B -> Maybe (B, B)
findOfs' Char -> Bool
p ShortText
st0 B
ofs0 of
      Just (B
ofs1,B
ofs2) -> ShortText -> B -> B -> ShortText
slice ShortText
st0 B
ofs0 (B
ofs1B -> B -> B
forall a. Num a => a -> a -> a
-B
ofs0) ShortText -> [ShortText] -> [ShortText]
forall a. a -> [a] -> [a]
: B -> [ShortText]
go B
ofs2
      Maybe (B, B)
Nothing
        | B
ofs0 B -> B -> Bool
forall a. Eq a => a -> a -> Bool
== B
0 -> ShortText
st0 ShortText -> [ShortText] -> [ShortText]
forall a. a -> [a] -> [a]
: []
        | Bool
otherwise -> ShortText -> B -> B -> ShortText
slice ShortText
st0 B
ofs0 (B
maxOfsB -> B -> B
forall a. Num a => a -> a -> a
-B
ofs0) ShortText -> [ShortText] -> [ShortText]
forall a. a -> [a] -> [a]
: []

    !maxOfs :: B
maxOfs = ShortText -> B
toB ShortText
st0

-- internal helper
{-# INLINE findOfs #-}
findOfs :: (Char -> Bool) -> ShortText -> B -> Maybe B
findOfs :: (Char -> Bool) -> ShortText -> B -> Maybe B
findOfs Char -> Bool
p ShortText
st = B -> Maybe B
go
  where
    go :: B -> Maybe B
    go :: B -> Maybe B
go !B
ofs | B
ofs B -> B -> Bool
forall a. Ord a => a -> a -> Bool
>= B
sz = Maybe B
forall a. Maybe a
Nothing
    go !B
ofs | Char -> Bool
p Char
c       = B -> Maybe B
forall a. a -> Maybe a
Just B
ofs
            | Bool
otherwise = B -> Maybe B
go B
ofs'
      where
        (Char
c,B
ofs') = ShortText -> B -> (Char, B)
decodeCharAtOfs ShortText
st B
ofs

    !sz :: B
sz = ShortText -> B
toB ShortText
st

{-# INLINE findOfs' #-}
findOfs' :: (Char -> Bool) -> ShortText -> B -> Maybe (B,B)
findOfs' :: (Char -> Bool) -> ShortText -> B -> Maybe (B, B)
findOfs' Char -> Bool
p ShortText
st = B -> Maybe (B, B)
go
  where
    go :: B -> Maybe (B,B)
    go :: B -> Maybe (B, B)
go !B
ofs | B
ofs B -> B -> Bool
forall a. Ord a => a -> a -> Bool
>= B
sz = Maybe (B, B)
forall a. Maybe a
Nothing
    go !B
ofs | Char -> Bool
p Char
c       = (B, B) -> Maybe (B, B)
forall a. a -> Maybe a
Just (B
ofs,B
ofs')
            | Bool
otherwise = B -> Maybe (B, B)
go B
ofs'
      where
        (Char
c,B
ofs') = ShortText -> B -> (Char, B)
decodeCharAtOfs ShortText
st B
ofs

    !sz :: B
sz = ShortText -> B
toB ShortText
st


{-# INLINE findOfsRev #-}
findOfsRev :: (Char -> Bool) -> ShortText -> B -> Maybe B
findOfsRev :: (Char -> Bool) -> ShortText -> B -> Maybe B
findOfsRev Char -> Bool
p ShortText
st = B -> Maybe B
go
  where
    go :: B -> Maybe B
go (B Int
0) = Maybe B
forall a. Maybe a
Nothing
    go !B
ofs
      | Char -> Bool
p (CP -> Char
cp2ch CP
cp) = B -> Maybe B
forall a. a -> Maybe a
Just B
ofs
      | Bool
otherwise    = B -> Maybe B
go (B
ofsB -> B -> B
forall a. Num a => a -> a -> a
-CP -> B
cpLen CP
cp)
      where
        !cp :: CP
cp = ShortText -> B -> CP
readCodePointRev ShortText
st B
ofs

-- | \(\mathcal{O}(n)\) Split 'ShortText' into longest prefix satisfying the given predicate and the remaining suffix.
--
-- >>> span (< 'c') "abcdabcd"
-- ("ab","cdabcd")
--
-- prop> fst (span p t) <> snd (span p t) == t
--
-- @since 0.1.2
span :: (Char -> Bool) -> ShortText -> (ShortText,ShortText)
span :: (Char -> Bool) -> ShortText -> (ShortText, ShortText)
span Char -> Bool
p ShortText
st
  | Just B
ofs <- (Char -> Bool) -> ShortText -> B -> Maybe B
findOfs (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
p) ShortText
st (Int -> B
B Int
0) = B -> ShortText -> (ShortText, ShortText)
splitAtOfs B
ofs ShortText
st
  | Bool
otherwise = (ShortText
st,ShortText
forall a. Monoid a => a
mempty)

-- | \(\mathcal{O}(n)\) Split 'ShortText' into longest suffix satisfying the given predicate and the preceding prefix.
--
-- >>> spanEnd (> 'c') "abcdabcd"
-- ("abcdabc","d")
--
-- prop> fst (spanEnd p t) <> snd (spanEnd p t) == t
--
-- @since 0.1.2
spanEnd :: (Char -> Bool) -> ShortText -> (ShortText,ShortText)
spanEnd :: (Char -> Bool) -> ShortText -> (ShortText, ShortText)
spanEnd Char -> Bool
p ShortText
st
  | Just B
ofs <- (Char -> Bool) -> ShortText -> B -> Maybe B
findOfsRev (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
p) ShortText
st (ShortText -> B
toB ShortText
st) = B -> ShortText -> (ShortText, ShortText)
splitAtOfs B
ofs ShortText
st
  | Bool
otherwise = (ShortText
forall a. Monoid a => a
mempty,ShortText
st)

----------------------------------------------------------------------------

toCSize :: ShortText -> CSize
toCSize :: ShortText -> CSize
toCSize = Int -> CSize
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> CSize) -> (ShortText -> Int) -> ShortText -> CSize
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShortByteString -> Int
BSS.length (ShortByteString -> Int)
-> (ShortText -> ShortByteString) -> ShortText -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShortText -> ShortByteString
toShortByteString

toB :: ShortText -> B
toB :: ShortText -> B
toB = Int -> B
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> B) -> (ShortText -> Int) -> ShortText -> B
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShortByteString -> Int
BSS.length (ShortByteString -> Int)
-> (ShortText -> ShortByteString) -> ShortText -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShortText -> ShortByteString
toShortByteString

toLength :: ShortText -> Int
toLength :: ShortText -> Int
toLength ShortText
st = Int# -> Int
I# (ShortText -> Int#
toLength# ShortText
st)

toLength# :: ShortText -> Int#
toLength# :: ShortText -> Int#
toLength# ShortText
st = ByteArray# -> Int#
GHC.Exts.sizeofByteArray# (ShortText -> ByteArray#
toByteArray# ShortText
st)

toByteArray# :: ShortText -> ByteArray#
toByteArray# :: ShortText -> ByteArray#
toByteArray# (ShortText (BSSI.SBS ByteArray#
ba#)) = ByteArray#
ba#

-- | \(\mathcal{O}(0)\) Converts to UTF-8 encoded 'ShortByteString'
--
-- This operation has effectively no overhead, as it's currently merely a @newtype@-cast.
--
-- @since 0.1
toShortByteString :: ShortText -> ShortByteString
toShortByteString :: ShortText -> ShortByteString
toShortByteString (ShortText ShortByteString
b) = ShortByteString
b

-- | \(\mathcal{O}(n)\) Converts to UTF-8 encoded 'BS.ByteString'
--
-- @since 0.1
toByteString :: ShortText -> BS.ByteString
toByteString :: ShortText -> ByteString
toByteString = ShortByteString -> ByteString
BSS.fromShort (ShortByteString -> ByteString)
-> (ShortText -> ShortByteString) -> ShortText -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShortText -> ShortByteString
toShortByteString

-- | Construct a 'BB.Builder' that encodes 'ShortText' as UTF-8.
--
-- @since 0.1
toBuilder :: ShortText -> BB.Builder
toBuilder :: ShortText -> Builder
toBuilder = ShortByteString -> Builder
BB.shortByteString (ShortByteString -> Builder)
-> (ShortText -> ShortByteString) -> ShortText -> Builder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShortText -> ShortByteString
toShortByteString

-- | \(\mathcal{O}(n)\) Convert to 'String'
--
-- prop> (fromString . toString) t == t
--
-- __Note__: See documentation of 'fromString' for why @('toString' . 'fromString')@ is not an identity function.
--
-- @since 0.1
toString :: ShortText -> String
-- NOTE: impl below beats
--   toString = decodeStringShort' utf8 . toShortByteString
-- except for smallish strings
toString :: ShortText -> String
toString ShortText
st = B -> String
go B
0
  where
    go :: B -> String
go !B
ofs
      | B
ofs B -> B -> Bool
forall a. Ord a => a -> a -> Bool
>= B
sz  = []
      | Bool
otherwise  = let (Char
c,B
ofs') = ShortText -> B -> (Char, B)
decodeCharAtOfs ShortText
st B
ofs
                     in Char
c Char -> ShowS
`seq` B
ofs' B -> ShowS
`seq` (Char
c Char -> ShowS
forall a. a -> [a] -> [a]
: B -> String
go B
ofs')

    !sz :: B
sz = ShortText -> B
toB ShortText
st

----------------------------------------------------------------------------
-- Folds

-- | \(\mathcal{O}(n)\) Reduces the characters of the 'ShortText' with
-- the binary operator and an initial in forward direction (i.e. from
-- left to right).
--
-- >>> foldl (\_ _ -> True) False ""
-- False
--
-- >>> foldl (\s c -> c : s) ['.'] "abcd"
-- "dcba."
--
-- @since 0.1.2
foldl :: (a -> Char -> a) -> a -> ShortText -> a
foldl :: (a -> Char -> a) -> a -> ShortText -> a
foldl a -> Char -> a
f a
z ShortText
st = B -> a -> a
go B
0 a
z
  where
    go :: B -> a -> a
go !B
ofs a
acc
      | B
ofs B -> B -> Bool
forall a. Ord a => a -> a -> Bool
>= B
sz  = a
acc
      | Bool
otherwise  = let (Char
c,B
ofs') = ShortText -> B -> (Char, B)
decodeCharAtOfs ShortText
st B
ofs
                     in Char
c Char -> a -> a
`seq` B
ofs' B -> a -> a
`seq` B -> a -> a
go B
ofs' (a -> Char -> a
f a
acc Char
c)

    !sz :: B
sz = ShortText -> B
toB ShortText
st

-- | \(\mathcal{O}(n)\) Reduces the characters of the 'ShortText' with the binary operator.
--
-- >>> foldl1 max "abcdcba"
-- 'd'
--
-- >>> foldl1 const "abcd"
-- 'a'
--
-- >>> foldl1 (flip const) "abcd"
-- 'd'
--
-- __Note__: Will throw an 'error' exception if index is out of bounds.
--
-- @since 0.1.2
foldl1 :: (Char -> Char -> Char) -> ShortText -> Char
foldl1 :: (Char -> Char -> Char) -> ShortText -> Char
foldl1 Char -> Char -> Char
f ShortText
st
  | B
sz B -> B -> Bool
forall a. Eq a => a -> a -> Bool
== B
0    = String -> Char
forall a. HasCallStack => String -> a
error String
"foldl1: empty ShortText"
  | Bool
otherwise  = B -> Char -> Char
go B
c0sz Char
c0
  where
    go :: B -> Char -> Char
go !B
ofs Char
acc
      | B
ofs B -> B -> Bool
forall a. Ord a => a -> a -> Bool
>= B
sz  = Char
acc
      | Bool
otherwise  = let (Char
c,B
ofs') = ShortText -> B -> (Char, B)
decodeCharAtOfs ShortText
st B
ofs
                     in Char
c Char -> Char -> Char
`seq` B
ofs' B -> Char -> Char
`seq` B -> Char -> Char
go B
ofs' (Char -> Char -> Char
f Char
acc Char
c)
    !sz :: B
sz = ShortText -> B
toB ShortText
st
    (Char
c0,B
c0sz) = ShortText -> B -> (Char, B)
decodeCharAtOfs ShortText
st (Int -> B
B Int
0)

-- | \(\mathcal{O}(n)\) Strict version of 'foldl'.
--
-- @since 0.1.2
foldl' :: (a -> Char -> a) -> a -> ShortText -> a
foldl' :: (a -> Char -> a) -> a -> ShortText -> a
foldl' a -> Char -> a
f !a
z ShortText
st = B -> a -> a
go B
0 a
z
  where
    go :: B -> a -> a
go !B
ofs !a
acc
      | B
ofs B -> B -> Bool
forall a. Ord a => a -> a -> Bool
>= B
sz  = a
acc
      | Bool
otherwise  = let (Char
c,B
ofs') = ShortText -> B -> (Char, B)
decodeCharAtOfs ShortText
st B
ofs
                     in Char
c Char -> a -> a
`seq` B
ofs' B -> a -> a
`seq` B -> a -> a
go B
ofs' (a -> Char -> a
f a
acc Char
c)

    !sz :: B
sz = ShortText -> B
toB ShortText
st

-- | \(\mathcal{O}(n)\) Strict version of 'foldl1'.
--
-- @since 0.1.2
foldl1' :: (Char -> Char -> Char) -> ShortText -> Char
foldl1' :: (Char -> Char -> Char) -> ShortText -> Char
foldl1' Char -> Char -> Char
f ShortText
st
  | B
sz B -> B -> Bool
forall a. Eq a => a -> a -> Bool
== B
0    = String -> Char
forall a. HasCallStack => String -> a
error String
"foldl1: empty ShortText"
  | Bool
otherwise  = B -> Char -> Char
go B
c0sz Char
c0
  where
    go :: B -> Char -> Char
go !B
ofs !Char
acc
      | B
ofs B -> B -> Bool
forall a. Ord a => a -> a -> Bool
>= B
sz  = Char
acc
      | Bool
otherwise  = let (Char
c,B
ofs') = ShortText -> B -> (Char, B)
decodeCharAtOfs ShortText
st B
ofs
                     in Char
c Char -> Char -> Char
`seq` B
ofs' B -> Char -> Char
`seq` B -> Char -> Char
go B
ofs' (Char -> Char -> Char
f Char
acc Char
c)
    !sz :: B
sz = ShortText -> B
toB ShortText
st
    (Char
c0,B
c0sz) = ShortText -> B -> (Char, B)
decodeCharAtOfs ShortText
st (Int -> B
B Int
0)

-- | \(\mathcal{O}(n)\) Reduces the characters of the 'ShortText' with
-- the binary operator and an initial in reverse direction (i.e. from
-- right to left).
--
-- >>> foldr (\_ _ -> True) False ""
-- False
--
-- >>> foldr (:) ['.'] "abcd"
-- "abcd."
--
-- @since 0.1.2
foldr :: (Char -> a -> a) -> a -> ShortText -> a
foldr :: (Char -> a -> a) -> a -> ShortText -> a
foldr Char -> a -> a
f a
z ShortText
st = B -> a
go B
0
  where
    go :: B -> a
go !B
ofs
      | B
ofs B -> B -> Bool
forall a. Ord a => a -> a -> Bool
>= B
sz  = a
z
      | Bool
otherwise  = let (Char
c,B
ofs') = ShortText -> B -> (Char, B)
decodeCharAtOfs ShortText
st B
ofs
                     in Char
c Char -> a -> a
`seq` B
ofs' B -> a -> a
`seq` Char -> a -> a
f Char
c (B -> a
go B
ofs')

    !sz :: B
sz = ShortText -> B
toB ShortText
st

-- | \(\mathcal{O}(n)\) Reduces the characters of the 'ShortText' with the binary operator.
--
-- >>> foldr1 max "abcdcba"
-- 'd'
--
-- >>> foldr1 const "abcd"
-- 'a'
--
-- >>> foldr1 (flip const) "abcd"
-- 'd'
--
-- __Note__: Will throw an 'error' exception if index is out of bounds.
--
-- @since 0.1.2
foldr1 :: (Char -> Char -> Char) -> ShortText -> Char
foldr1 :: (Char -> Char -> Char) -> ShortText -> Char
foldr1 Char -> Char -> Char
f ShortText
st
  | B
sz B -> B -> Bool
forall a. Eq a => a -> a -> Bool
== B
0    = String -> Char
forall a. HasCallStack => String -> a
error String
"foldr1: empty ShortText"
  | Bool
otherwise  = B -> Char
go B
0
  where
    go :: B -> Char
go !B
ofs = let (Char
c,B
ofs') = ShortText -> B -> (Char, B)
decodeCharAtOfs ShortText
st B
ofs
              in Char
c Char -> Char -> Char
`seq` B
ofs' B -> Char -> Char
`seq`
                 (if B
ofs' B -> B -> Bool
forall a. Ord a => a -> a -> Bool
>= B
sz then Char
c else Char -> Char -> Char
f Char
c (B -> Char
go B
ofs'))

    !sz :: B
sz = ShortText -> B
toB ShortText
st

-- | \(\mathcal{O}(n)\) Convert to 'T.Text'
--
-- prop> (fromText . toText) t == t
--
-- prop> (toText . fromText) t == t
--
-- This is currently not \(\mathcal{O}(1)\) because currently 'T.Text' uses UTF-16 as its internal representation.
-- In the event that 'T.Text' will change its internal representation to UTF-8 this operation will become \(\mathcal{O}(1)\).
--
-- @since 0.1
toText :: ShortText -> T.Text
toText :: ShortText -> Text
toText = ByteString -> Text
T.decodeUtf8 (ByteString -> Text)
-> (ShortText -> ByteString) -> ShortText -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShortText -> ByteString
toByteString

----

-- | \(\mathcal{O}(n)\) Construct/pack from 'String'
--
-- >>> fromString []
-- ""
--
-- >>> fromString ['a','b','c']
-- "abc"
--
-- >>> fromString ['\55295','\55296','\57343','\57344'] -- U+D7FF U+D800 U+DFFF U+E000
-- "\55295\65533\65533\57344"
--
-- __Note__: This function is total because it replaces the (invalid) code-points U+D800 through U+DFFF with the replacement character U+FFFD.
--
-- @since 0.1
fromString :: String -> ShortText
fromString :: String -> ShortText
fromString []  = ShortText
forall a. Monoid a => a
mempty
fromString [Char
c] = Char -> ShortText
singleton Char
c
fromString String
s = ShortByteString -> ShortText
ShortText (ShortByteString -> ShortText)
-> (String -> ShortByteString) -> String -> ShortText
forall b c a. (b -> c) -> (a -> b) -> a -> c
. TextEncoding -> String -> ShortByteString
encodeStringShort TextEncoding
utf8 (String -> ShortByteString) -> ShowS -> String -> ShortByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Char) -> ShowS
forall a b. (a -> b) -> [a] -> [b]
map Char -> Char
r (String -> ShortText) -> String -> ShortText
forall a b. (a -> b) -> a -> b
$ String
s
  where
    r :: Char -> Char
r Char
c | Int -> Bool
forall i. (Num i, Bits i) => i -> Bool
isSurr (Char -> Int
ord Char
c) = Char
'\xFFFD'
        | Bool
otherwise      = Char
c

-- | \(\mathcal{O}(n)\) Construct 'ShortText' from 'T.Text'
--
-- This is currently not \(\mathcal{O}(1)\) because currently 'T.Text' uses UTF-16 as its internal representation.
-- In the event that 'T.Text' will change its internal representation to UTF-8 this operation will become \(\mathcal{O}(1)\).
--
-- @since 0.1
fromText :: T.Text -> ShortText
fromText :: Text -> ShortText
fromText = ByteString -> ShortText
fromByteStringUnsafe (ByteString -> ShortText)
-> (Text -> ByteString) -> Text -> ShortText
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString
T.encodeUtf8

-- | \(\mathcal{O}(n)\) Construct 'ShortText' from UTF-8 encoded 'ShortByteString'
--
-- This operation doesn't copy the input 'ShortByteString' but it
-- cannot be \(\mathcal{O}(1)\) because we need to validate the UTF-8 encoding.
--
-- Returns 'Nothing' in case of invalid UTF-8 encoding.
--
-- >>> fromShortByteString "\x00\x38\xF0\x90\x8C\x9A" -- U+00 U+38 U+1031A
-- Just "\NUL8\66330"
--
-- >>> fromShortByteString "\xC0\x80" -- invalid denormalised U+00
-- Nothing
--
-- >>> fromShortByteString "\xED\xA0\x80" -- U+D800 (non-scalar code-point)
-- Nothing
--
-- >>> fromShortByteString "\xF4\x8f\xbf\xbf" -- U+10FFFF
-- Just "\1114111"
--
-- >>> fromShortByteString "\xF4\x90\x80\x80" -- U+110000 (invalid)
-- Nothing
--
-- prop> fromShortByteString (toShortByteString t) == Just t
--
-- @since 0.1
fromShortByteString :: ShortByteString -> Maybe ShortText
fromShortByteString :: ShortByteString -> Maybe ShortText
fromShortByteString ShortByteString
sbs
  | ShortText -> Bool
isValidUtf8 ShortText
st  = ShortText -> Maybe ShortText
forall a. a -> Maybe a
Just ShortText
st
  | Bool
otherwise       = Maybe ShortText
forall a. Maybe a
Nothing
  where
    st :: ShortText
st = ShortByteString -> ShortText
ShortText ShortByteString
sbs

-- | \(\mathcal{O}(0)\) Construct 'ShortText' from UTF-8 encoded 'ShortByteString'
--
-- This operation has effectively no overhead, as it's currently merely a @newtype@-cast.
--
-- __WARNING__: Unlike the safe 'fromShortByteString' conversion, this
-- conversion is /unsafe/ as it doesn't validate the well-formedness of the
-- UTF-8 encoding.
--
-- @since 0.1.1
fromShortByteStringUnsafe :: ShortByteString -> ShortText
fromShortByteStringUnsafe :: ShortByteString -> ShortText
fromShortByteStringUnsafe = ShortByteString -> ShortText
ShortText

-- | \(\mathcal{O}(n)\) Construct 'ShortText' from UTF-8 encoded 'BS.ByteString'
--
-- 'fromByteString' accepts (or rejects) the same input data as 'fromShortByteString'.
--
-- Returns 'Nothing' in case of invalid UTF-8 encoding.
--
-- @since 0.1
fromByteString :: BS.ByteString -> Maybe ShortText
fromByteString :: ByteString -> Maybe ShortText
fromByteString = ShortByteString -> Maybe ShortText
fromShortByteString (ShortByteString -> Maybe ShortText)
-> (ByteString -> ShortByteString) -> ByteString -> Maybe ShortText
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ShortByteString
BSS.toShort

-- | \(\mathcal{O}(n)\) Construct 'ShortText' from UTF-8 encoded 'BS.ByteString'
--
-- This operation is \(\mathcal{O}(n)\) because the 'BS.ByteString' needs to be
-- copied into an unpinned 'ByteArray#'.
--
-- __WARNING__: Unlike the safe 'fromByteString' conversion, this
-- conversion is /unsafe/ as it doesn't validate the well-formedness of the
-- UTF-8 encoding.
--
-- @since 0.1.1
fromByteStringUnsafe :: BS.ByteString -> ShortText
fromByteStringUnsafe :: ByteString -> ShortText
fromByteStringUnsafe = ShortByteString -> ShortText
ShortText (ShortByteString -> ShortText)
-> (ByteString -> ShortByteString) -> ByteString -> ShortText
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ShortByteString
BSS.toShort

----------------------------------------------------------------------------

encodeString :: TextEncoding -> String -> BS.ByteString
encodeString :: TextEncoding -> String -> ByteString
encodeString TextEncoding
te String
str = IO ByteString -> ByteString
forall a. IO a -> a
unsafePerformIO (IO ByteString -> ByteString) -> IO ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ TextEncoding
-> String -> (CStringLen -> IO ByteString) -> IO ByteString
forall a. TextEncoding -> String -> (CStringLen -> IO a) -> IO a
GHC.withCStringLen TextEncoding
te String
str CStringLen -> IO ByteString
BS.packCStringLen

-- decodeString :: TextEncoding -> BS.ByteString -> Maybe String
-- decodeString te bs = cvtEx $ unsafePerformIO $ try $ BS.useAsCStringLen bs (GHC.peekCStringLen te)
--   where
--     cvtEx :: Either IOException a -> Maybe a
--     cvtEx = either (const Nothing) Just

decodeString' :: TextEncoding -> BS.ByteString -> String
decodeString' :: TextEncoding -> ByteString -> String
decodeString' TextEncoding
te ByteString
bs = IO String -> String
forall a. IO a -> a
unsafePerformIO (IO String -> String) -> IO String -> String
forall a b. (a -> b) -> a -> b
$ ByteString -> (CStringLen -> IO String) -> IO String
forall a. ByteString -> (CStringLen -> IO a) -> IO a
BS.useAsCStringLen ByteString
bs (TextEncoding -> CStringLen -> IO String
GHC.peekCStringLen TextEncoding
te)

decodeStringShort' :: TextEncoding -> ShortByteString -> String
decodeStringShort' :: TextEncoding -> ShortByteString -> String
decodeStringShort' TextEncoding
te = TextEncoding -> ByteString -> String
decodeString' TextEncoding
te (ByteString -> String)
-> (ShortByteString -> ByteString) -> ShortByteString -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShortByteString -> ByteString
BSS.fromShort

encodeStringShort :: TextEncoding -> String -> BSS.ShortByteString
encodeStringShort :: TextEncoding -> String -> ShortByteString
encodeStringShort TextEncoding
te = ByteString -> ShortByteString
BSS.toShort (ByteString -> ShortByteString)
-> (String -> ByteString) -> String -> ShortByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. TextEncoding -> String -> ByteString
encodeString TextEncoding
te

-- isValidUtf8' :: ShortText -> Int
-- isValidUtf8' st = fromIntegral $ unsafeDupablePerformIO (c_text_short_is_valid_utf8 (toByteArray# st) (toCSize st))

isValidUtf8 :: ShortText -> Bool
isValidUtf8 :: ShortText -> Bool
isValidUtf8 ShortText
st = (CInt -> CInt -> Bool
forall a. Eq a => a -> a -> Bool
==CInt
0) (CInt -> Bool) -> CInt -> Bool
forall a b. (a -> b) -> a -> b
$ IO CInt -> CInt
forall a. IO a -> a
unsafeDupablePerformIO (ByteArray# -> CSize -> IO CInt
c_text_short_is_valid_utf8 (ShortText -> ByteArray#
toByteArray# ShortText
st) (ShortText -> CSize
toCSize ShortText
st))

type CCodePoint = Word

foreign import ccall unsafe "hs_text_short_is_valid_utf8" c_text_short_is_valid_utf8 :: ByteArray# -> CSize -> IO CInt

foreign import ccall unsafe "hs_text_short_index_cp" c_text_short_index :: ByteArray# -> CSize -> CSize -> IO CCodePoint

-- | \(\mathcal{O}(n)\) Lookup /i/-th code-point in 'ShortText'.
--
-- Returns 'Nothing' if out of bounds.
--
-- prop> indexMaybe (singleton c) 0 == Just c
--
-- prop> indexMaybe t 0 == fmap fst (uncons t)
--
-- prop> indexMaybe mempty i == Nothing
--
-- @since 0.1.2
indexMaybe :: ShortText -> Int -> Maybe Char
indexMaybe :: ShortText -> Int -> Maybe Char
indexMaybe ShortText
st Int
i
  | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0      = Maybe Char
forall a. Maybe a
Nothing
  | Bool
otherwise  = CP -> Maybe Char
cp2chSafe CP
cp
  where
    cp :: CP
cp = Word -> CP
CP (Word -> CP) -> Word -> CP
forall a b. (a -> b) -> a -> b
$ IO Word -> Word
forall a. IO a -> a
unsafeDupablePerformIO (ByteArray# -> CSize -> CSize -> IO Word
c_text_short_index (ShortText -> ByteArray#
toByteArray# ShortText
st) (ShortText -> CSize
toCSize ShortText
st) (Int -> CSize
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
i))

-- | \(\mathcal{O}(n)\) Lookup /i/-th code-point from the end of 'ShortText'.
--
-- Returns 'Nothing' if out of bounds.
--
-- prop> indexEndMaybe (singleton c) 0 == Just c
--
-- prop> indexEndMaybe t 0 == fmap snd (unsnoc t)
--
-- prop> indexEndMaybe mempty i == Nothing
--
-- @since 0.1.2
indexEndMaybe :: ShortText -> Int -> Maybe Char
indexEndMaybe :: ShortText -> Int -> Maybe Char
indexEndMaybe ShortText
st Int
i
  | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0      = Maybe Char
forall a. Maybe a
Nothing
  | Bool
otherwise  = CP -> Maybe Char
cp2chSafe CP
cp
  where
    cp :: CP
cp = Word -> CP
CP (Word -> CP) -> Word -> CP
forall a b. (a -> b) -> a -> b
$ IO Word -> Word
forall a. IO a -> a
unsafeDupablePerformIO (ByteArray# -> CSize -> CSize -> IO Word
c_text_short_index_rev (ShortText -> ByteArray#
toByteArray# ShortText
st) (ShortText -> CSize
toCSize ShortText
st) (Int -> CSize
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
i))

foreign import ccall unsafe "hs_text_short_index_cp_rev" c_text_short_index_rev :: ByteArray# -> CSize -> CSize -> IO CCodePoint


-- | \(\mathcal{O}(n)\) Split 'ShortText' into two halves.
--
-- @'splitAt' n t@ returns a pair of 'ShortText' with the following properties:
--
-- prop> length (fst (splitAt n t)) == min (length t) (max 0 n)
--
-- prop> fst (splitAt n t) <> snd (splitAt n t) == t
--
-- >>> splitAt 2 "abcdef"
-- ("ab","cdef")
--
-- >>> splitAt 10 "abcdef"
-- ("abcdef","")
--
-- >>> splitAt (-1) "abcdef"
-- ("","abcdef")
--
-- @since 0.1.2
splitAt :: Int -> ShortText -> (ShortText,ShortText)
splitAt :: Int -> ShortText -> (ShortText, ShortText)
splitAt Int
i ShortText
st
  | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0    = (ShortText
forall a. Monoid a => a
mempty,ShortText
st)
  | Bool
otherwise = B -> ShortText -> (ShortText, ShortText)
splitAtOfs B
ofs ShortText
st
  where
    ofs :: B
ofs   = CSize -> B
csizeToB (CSize -> B) -> CSize -> B
forall a b. (a -> b) -> a -> b
$
            IO CSize -> CSize
forall a. IO a -> a
unsafeDupablePerformIO (ByteArray# -> CSize -> CSize -> IO CSize
c_text_short_index_ofs (ShortText -> ByteArray#
toByteArray# ShortText
st) CSize
stsz (Int -> CSize
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
i))
    stsz :: CSize
stsz  = ShortText -> CSize
toCSize ShortText
st

-- | \(\mathcal{O}(n)\) Split 'ShortText' into two halves.
--
-- @'splitAtEnd' n t@ returns a pair of 'ShortText' with the following properties:
--
-- prop> length (snd (splitAtEnd n t)) == min (length t) (max 0 n)
--
-- prop> fst (splitAtEnd n t) <> snd (splitAtEnd n t) == t
--
-- prop> splitAtEnd n t == splitAt (length t - n) t
--
-- >>> splitAtEnd 2 "abcdef"
-- ("abcd","ef")
--
-- >>> splitAtEnd 10 "abcdef"
-- ("","abcdef")
--
-- >>> splitAtEnd (-1) "abcdef"
-- ("abcdef","")
--
-- @since 0.1.2
splitAtEnd :: Int -> ShortText -> (ShortText,ShortText)
splitAtEnd :: Int -> ShortText -> (ShortText, ShortText)
splitAtEnd Int
i ShortText
st
  | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0      = (ShortText
st,ShortText
forall a. Monoid a => a
mempty)
  | B
ofs B -> B -> Bool
forall a. Ord a => a -> a -> Bool
>= B
stsz = (ShortText
forall a. Monoid a => a
mempty,ShortText
st)
  | Bool
otherwise   = B -> ShortText -> (ShortText, ShortText)
splitAtOfs B
ofs ShortText
st
  where
    ofs :: B
ofs   = CSize -> B
csizeToB (CSize -> B) -> CSize -> B
forall a b. (a -> b) -> a -> b
$
            IO CSize -> CSize
forall a. IO a -> a
unsafeDupablePerformIO (ByteArray# -> CSize -> CSize -> IO CSize
c_text_short_index_ofs_rev (ShortText -> ByteArray#
toByteArray# ShortText
st) (ShortText -> CSize
toCSize ShortText
st) (Int -> CSize
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1)))
    stsz :: B
stsz  = ShortText -> B
toB ShortText
st

{-# INLINE splitAtOfs #-}
splitAtOfs :: B -> ShortText -> (ShortText,ShortText)
splitAtOfs :: B -> ShortText -> (ShortText, ShortText)
splitAtOfs B
ofs ShortText
st
  | B
ofs  B -> B -> Bool
forall a. Eq a => a -> a -> Bool
== B
0    = (ShortText
forall a. Monoid a => a
mempty,ShortText
st)
  | B
ofs  B -> B -> Bool
forall a. Ord a => a -> a -> Bool
>= B
stsz = (ShortText
st,ShortText
forall a. Monoid a => a
mempty)
  | Bool
otherwise    = (ShortText -> B -> B -> ShortText
slice ShortText
st B
0 B
ofs, ShortText -> B -> B -> ShortText
slice ShortText
st B
ofs (B
stszB -> B -> B
forall a. Num a => a -> a -> a
-B
ofs))
  where
    !stsz :: B
stsz  = ShortText -> B
toB ShortText
st

foreign import ccall unsafe "hs_text_short_index_ofs" c_text_short_index_ofs :: ByteArray# -> CSize -> CSize -> IO CSize

foreign import ccall unsafe "hs_text_short_index_ofs_rev" c_text_short_index_ofs_rev :: ByteArray# -> CSize -> CSize -> IO CSize


-- | \(\mathcal{O}(n)\) Inverse operation to 'cons'
--
-- Returns 'Nothing' for empty input 'ShortText'.
--
-- prop> uncons (cons c t) == Just (c,t)
--
-- >>> uncons ""
-- Nothing
--
-- >>> uncons "fmap"
-- Just ('f',"map")
--
-- @since 0.1.2
uncons :: ShortText -> Maybe (Char,ShortText)
uncons :: ShortText -> Maybe (Char, ShortText)
uncons ShortText
st
  | ShortText -> Bool
null ShortText
st    = Maybe (Char, ShortText)
forall a. Maybe a
Nothing
  | B
len2 B -> B -> Bool
forall a. Eq a => a -> a -> Bool
== B
0  = (Char, ShortText) -> Maybe (Char, ShortText)
forall a. a -> Maybe a
Just (Char
c0, ShortText
forall a. Monoid a => a
mempty)
  | Bool
otherwise  = (Char, ShortText) -> Maybe (Char, ShortText)
forall a. a -> Maybe a
Just (Char
c0, ShortText -> B -> B -> ShortText
slice ShortText
st B
ofs B
len2)
  where
    c0 :: Char
c0  = CP -> Char
cp2ch CP
cp0
    cp0 :: CP
cp0 = ShortText -> B -> CP
readCodePoint ShortText
st B
0
    ofs :: B
ofs = CP -> B
cpLen CP
cp0
    len2 :: B
len2 = ShortText -> B
toB ShortText
st B -> B -> B
forall a. Num a => a -> a -> a
- B
ofs

-- | \(\mathcal{O}(n)\) Inverse operation to 'snoc'
--
-- Returns 'Nothing' for empty input 'ShortText'.
--
-- prop> unsnoc (snoc t c) == Just (t,c)
--
-- >>> unsnoc ""
-- Nothing
--
-- >>> unsnoc "fmap"
-- Just ("fma",'p')
--
-- @since 0.1.2
unsnoc :: ShortText -> Maybe (ShortText,Char)
unsnoc :: ShortText -> Maybe (ShortText, Char)
unsnoc ShortText
st
  | ShortText -> Bool
null ShortText
st    = Maybe (ShortText, Char)
forall a. Maybe a
Nothing
  | B
len1 B -> B -> Bool
forall a. Eq a => a -> a -> Bool
== B
0  = (ShortText, Char) -> Maybe (ShortText, Char)
forall a. a -> Maybe a
Just (ShortText
forall a. Monoid a => a
mempty, Char
c0)
  | Bool
otherwise  = (ShortText, Char) -> Maybe (ShortText, Char)
forall a. a -> Maybe a
Just (ShortText -> B -> B -> ShortText
slice ShortText
st B
0 B
len1, Char
c0)
  where
    c0 :: Char
c0  = CP -> Char
cp2ch CP
cp0
    cp0 :: CP
cp0 = ShortText -> B -> CP
readCodePointRev ShortText
st B
stsz
    stsz :: B
stsz = ShortText -> B
toB ShortText
st
    len1 :: B
len1 = B
stsz B -> B -> B
forall a. Num a => a -> a -> a
- CP -> B
cpLen CP
cp0

-- | \(\mathcal{O}(n)\) Tests whether the first 'ShortText' is a prefix of the second 'ShortText'
--
-- >>> isPrefixOf "ab" "abcdef"
-- True
--
-- >>> isPrefixOf "ac" "abcdef"
-- False
--
-- prop> isPrefixOf "" t == True
--
-- prop> isPrefixOf t t == True
--
-- @since 0.1.2
isPrefixOf :: ShortText -> ShortText -> Bool
isPrefixOf :: ShortText -> ShortText -> Bool
isPrefixOf ShortText
x ShortText
y
  | Int
lx Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
ly = Bool
False
  | Int
lx Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = Bool
True
  | Bool
otherwise = case ByteArray# -> Int# -> ByteArray# -> Int# -> Int# -> Int#
PrimOps.compareByteArrays# (ShortText -> ByteArray#
toByteArray# ShortText
x) Int#
0# (ShortText -> ByteArray#
toByteArray# ShortText
y) Int#
0# Int#
n# of
                  Int#
0# -> Bool
True
                  Int#
_  -> Bool
False
  where
    !lx :: Int
lx@(I# Int#
n#) = ShortText -> Int
toLength ShortText
x
    !ly :: Int
ly = ShortText -> Int
toLength ShortText
y

-- | \(\mathcal{O}(n)\) Strip prefix from second 'ShortText' argument.
--
-- Returns 'Nothing' if first argument is not a prefix of the second argument.
--
-- >>> stripPrefix "text-" "text-short"
-- Just "short"
--
-- >>> stripPrefix "test-" "text-short"
-- Nothing
--
-- @since 0.1.2
stripPrefix :: ShortText -> ShortText -> Maybe ShortText
stripPrefix :: ShortText -> ShortText -> Maybe ShortText
stripPrefix ShortText
pfx ShortText
t
  | ShortText -> ShortText -> Bool
isPrefixOf ShortText
pfx ShortText
t = ShortText -> Maybe ShortText
forall a. a -> Maybe a
Just (ShortText -> Maybe ShortText) -> ShortText -> Maybe ShortText
forall a b. (a -> b) -> a -> b
$! (ShortText, ShortText) -> ShortText
forall a b. (a, b) -> b
snd (B -> ShortText -> (ShortText, ShortText)
splitAtOfs (ShortText -> B
toB ShortText
pfx) ShortText
t)
  | Bool
otherwise        = Maybe ShortText
forall a. Maybe a
Nothing

-- | \(\mathcal{O}(n)\) Tests whether the first 'ShortText' is a suffix of the second 'ShortText'
--
-- >>> isSuffixOf "ef" "abcdef"
-- True
--
-- >>> isPrefixOf "df" "abcdef"
-- False
--
-- prop> isSuffixOf "" t == True
--
-- prop> isSuffixOf t t == True
--
-- @since 0.1.2
isSuffixOf :: ShortText -> ShortText -> Bool
isSuffixOf :: ShortText -> ShortText -> Bool
isSuffixOf ShortText
x ShortText
y
  | Int
lx Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
ly = Bool
False
  | Int
lx Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = Bool
True
  | Bool
otherwise = case ByteArray# -> Int# -> ByteArray# -> Int# -> Int# -> Int#
PrimOps.compareByteArrays# (ShortText -> ByteArray#
toByteArray# ShortText
x) Int#
0# (ShortText -> ByteArray#
toByteArray# ShortText
y) Int#
ofs2# Int#
n# of
                  Int#
0# -> Bool
True
                  Int#
_  -> Bool
False
  where
    !(I# Int#
ofs2#) = Int
ly Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
lx
    !lx :: Int
lx@(I# Int#
n#) = ShortText -> Int
toLength ShortText
x
    !ly :: Int
ly = ShortText -> Int
toLength ShortText
y

-- | \(\mathcal{O}(n)\) Strip suffix from second 'ShortText' argument.
--
-- Returns 'Nothing' if first argument is not a suffix of the second argument.
--
-- >>> stripSuffix "-short" "text-short"
-- Just "text"
--
-- >>> stripSuffix "-utf8" "text-short"
-- Nothing
--
-- @since 0.1.2
stripSuffix :: ShortText -> ShortText -> Maybe ShortText
stripSuffix :: ShortText -> ShortText -> Maybe ShortText
stripSuffix ShortText
sfx ShortText
t
  | ShortText -> ShortText -> Bool
isSuffixOf ShortText
sfx ShortText
t = ShortText -> Maybe ShortText
forall a. a -> Maybe a
Just (ShortText -> Maybe ShortText) -> ShortText -> Maybe ShortText
forall a b. (a -> b) -> a -> b
$! (ShortText, ShortText) -> ShortText
forall a b. (a, b) -> a
fst (B -> ShortText -> (ShortText, ShortText)
splitAtOfs B
pfxLen ShortText
t)
  | Bool
otherwise        = Maybe ShortText
forall a. Maybe a
Nothing
  where
    pfxLen :: B
pfxLen = ShortText -> B
toB ShortText
t B -> B -> B
forall a. Num a => a -> a -> a
- ShortText -> B
toB ShortText
sfx

----------------------------------------------------------------------------

-- | \(\mathcal{O}(n)\) Insert character between characters of 'ShortText'.
--
-- >>> intersperse '*' "_"
-- "_"
--
-- >>> intersperse '*' "MASH"
-- "M*A*S*H"
--
-- @since 0.1.2
intersperse :: Char -> ShortText -> ShortText
intersperse :: Char -> ShortText -> ShortText
intersperse Char
c ShortText
st
  | ShortText -> Bool
null ShortText
st = ShortText
forall a. Monoid a => a
mempty
  | Int
sn Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1 = ShortText
st
  | Bool
otherwise = B -> (forall s. MBA s -> ST s ()) -> ShortText
create B
newsz ((forall s. MBA s -> ST s ()) -> ShortText)
-> (forall s. MBA s -> ST s ()) -> ShortText
forall a b. (a -> b) -> a -> b
$ \MBA s
mba -> do
      let !cp0 :: CP
cp0 = ShortText -> B -> CP
readCodePoint ShortText
st B
0
          !cp0sz :: B
cp0sz = CP -> B
cpLen CP
cp0
      B -> MBA s -> B -> CP -> ST s ()
forall s. B -> MBA s -> B -> CP -> ST s ()
writeCodePointN B
cp0sz MBA s
mba B
0 CP
cp0
      MBA s -> Int -> B -> B -> ST s ()
forall s. MBA s -> Int -> B -> B -> ST s ()
go MBA s
mba (Int
sn Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) B
cp0sz B
cp0sz
  where
    newsz :: B
newsz = B
ssz B -> B -> B
forall a. Num a => a -> a -> a
+ ((Int
snInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1) Int -> B -> B
`mulB` B
csz)
    ssz :: B
ssz = ShortText -> B
toB ShortText
st
    sn :: Int
sn  = ShortText -> Int
length ShortText
st
    csz :: B
csz = CP -> B
cpLen CP
cp
    cp :: CP
cp  = Char -> CP
ch2cp Char
c

    go :: MBA s -> Int -> B -> B -> ST s ()
    go :: MBA s -> Int -> B -> B -> ST s ()
go MBA s
_   Int
0 !B
_  !B
_   = () -> ST s ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
    go MBA s
mba Int
n B
ofs B
ofs2 = do
      let !cp1 :: CP
cp1 = ShortText -> B -> CP
readCodePoint ShortText
st B
ofs2
          !cp1sz :: B
cp1sz = CP -> B
cpLen CP
cp1
      B -> MBA s -> B -> CP -> ST s ()
forall s. B -> MBA s -> B -> CP -> ST s ()
writeCodePointN B
csz   MBA s
mba B
ofs CP
cp
      B -> MBA s -> B -> CP -> ST s ()
forall s. B -> MBA s -> B -> CP -> ST s ()
writeCodePointN B
cp1sz MBA s
mba (B
ofsB -> B -> B
forall a. Num a => a -> a -> a
+B
csz) CP
cp1
      MBA s -> Int -> B -> B -> ST s ()
forall s. MBA s -> Int -> B -> B -> ST s ()
go MBA s
mba (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1) (B
ofsB -> B -> B
forall a. Num a => a -> a -> a
+B
cszB -> B -> B
forall a. Num a => a -> a -> a
+B
cp1sz) (B
ofs2B -> B -> B
forall a. Num a => a -> a -> a
+B
cp1sz)

-- | \(\mathcal{O}(n)\) Insert 'ShortText' inbetween list of 'ShortText's.
--
-- >>> intercalate ", " []
-- ""
--
-- >>> intercalate ", " ["foo"]
-- "foo"
--
-- >>> intercalate ", " ["foo","bar","doo"]
-- "foo, bar, doo"
--
-- prop> intercalate "" ts == concat ts
--
-- @since 0.1.2
intercalate :: ShortText -> [ShortText] -> ShortText
intercalate :: ShortText -> [ShortText] -> ShortText
intercalate ShortText
_ []  = ShortText
forall a. Monoid a => a
mempty
intercalate ShortText
_ [ShortText
t] = ShortText
t
intercalate ShortText
sep [ShortText]
ts
  | ShortText -> Bool
null ShortText
sep   = [ShortText] -> ShortText
forall a. Monoid a => [a] -> a
mconcat [ShortText]
ts
  | Bool
otherwise  = [ShortText] -> ShortText
forall a. Monoid a => [a] -> a
mconcat (ShortText -> [ShortText] -> [ShortText]
forall a. a -> [a] -> [a]
List.intersperse ShortText
sep [ShortText]
ts)

-- | \(\mathcal{O}(n*m)\) Replicate a 'ShortText'.
--
-- A repetition count smaller than 1 results in an empty string result.
--
-- >>> replicate 3 "jobs!"
-- "jobs!jobs!jobs!"
--
-- >>> replicate 10000 ""
-- ""
--
-- >>> replicate 0 "nothing"
-- ""
--
-- prop> length (replicate n t) == max 0 n * length t
--
-- @since 0.1.2
replicate :: Int -> ShortText -> ShortText
replicate :: Int -> ShortText -> ShortText
replicate Int
n0 ShortText
t
  | Int
n0 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
1     = ShortText
forall a. Monoid a => a
mempty
  | ShortText -> Bool
null ShortText
t    = ShortText
forall a. Monoid a => a
mempty
  | Bool
otherwise = B -> (forall s. MBA s -> ST s ()) -> ShortText
create (Int
n0 Int -> B -> B
`mulB` B
sz) (Int -> MBA s -> ST s ()
forall s. Int -> MBA s -> ST s ()
go Int
0)
  where
    go :: Int -> MBA s -> ST s ()
    go :: Int -> MBA s -> ST s ()
go Int
j MBA s
mba
      | Int
j Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
n0    = () -> ST s ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
      | Bool
otherwise  = do
          ShortText -> B -> MBA s -> B -> B -> ST s ()
forall s. ShortText -> B -> MBA s -> B -> B -> ST s ()
copyByteArray ShortText
t B
0 MBA s
mba (Int
j Int -> B -> B
`mulB` B
sz) B
sz
          Int -> MBA s -> ST s ()
forall s. Int -> MBA s -> ST s ()
go (Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) MBA s
mba

    sz :: B
sz = ShortText -> B
toB ShortText
t

-- | \(\mathcal{O}(n)\) Reverse characters in 'ShortText'.
--
-- >>> reverse "star live desserts"
-- "stressed evil rats"
--
-- prop> reverse (singleton c) == singleton c
--
-- prop> reverse (reverse t) == t
--
-- @since 0.1.2
reverse :: ShortText -> ShortText
reverse :: ShortText -> ShortText
reverse ShortText
st
  | ShortText -> Bool
null ShortText
st   = ShortText
forall a. Monoid a => a
mempty
  | Int
sn Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1   = ShortText
st
  | Bool
otherwise = B -> (forall s. MBA s -> ST s ()) -> ShortText
create B
sz ((forall s. MBA s -> ST s ()) -> ShortText)
-> (forall s. MBA s -> ST s ()) -> ShortText
forall a b. (a -> b) -> a -> b
$ Int -> B -> MBA s -> ST s ()
forall s. Int -> B -> MBA s -> ST s ()
go Int
sn B
0
  where
    sz :: B
sz = ShortText -> B
toB ShortText
st
    sn :: Int
sn = ShortText -> Int
length ShortText
st

    go :: Int -> B -> MBA s -> ST s ()
    go :: Int -> B -> MBA s -> ST s ()
go Int
0 !B
_  MBA s
_   = () -> ST s ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
    go Int
i B
ofs MBA s
mba = do
      let !cp :: CP
cp   = ShortText -> B -> CP
readCodePoint ShortText
st B
ofs
          !cpsz :: B
cpsz = CP -> B
cpLen CP
cp
          !ofs' :: B
ofs' = B
ofsB -> B -> B
forall a. Num a => a -> a -> a
+B
cpsz
      B -> MBA s -> B -> CP -> ST s ()
forall s. B -> MBA s -> B -> CP -> ST s ()
writeCodePointN B
cpsz MBA s
mba (B
sz B -> B -> B
forall a. Num a => a -> a -> a
- B
ofs') CP
cp
      Int -> B -> MBA s -> ST s ()
forall s. Int -> B -> MBA s -> ST s ()
go (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1) B
ofs' MBA s
mba


-- | \(\mathcal{O}(n)\) Remove characters from 'ShortText' which don't satisfy given predicate.
--
-- >>> filter (`notElem` ['a','e','i','o','u']) "You don't need vowels to convey information!"
-- "Y dn't nd vwls t cnvy nfrmtn!"
--
-- prop> filter (const False) t == ""
--
-- prop> filter (const True) t == t
--
-- prop> length (filter p t) <= length t
--
-- prop> filter p t == pack [ c | c <- unpack t, p c ]
--
-- @since 0.1.2
filter :: (Char -> Bool) -> ShortText -> ShortText
filter :: (Char -> Bool) -> ShortText -> ShortText
filter Char -> Bool
p ShortText
t
  = case (Maybe B
mofs1,Maybe B
mofs2) of
      (Maybe B
Nothing,   Maybe B
_)       -> ShortText
t -- no non-accepted characters found
      (Just B
0,    Maybe B
Nothing) -> ShortText
forall a. Monoid a => a
mempty -- no accepted characters found
      (Just B
ofs1, Maybe B
Nothing) -> ShortText -> B -> B -> ShortText
slice ShortText
t B
0 B
ofs1 -- only prefix accepted
      (Just B
ofs1, Just B
ofs2) -> B -> (forall s. MBA s -> ST s B) -> ShortText
createShrink (B
t0szB -> B -> B
forall a. Num a => a -> a -> a
-(B
ofs2B -> B -> B
forall a. Num a => a -> a -> a
-B
ofs1)) ((forall s. MBA s -> ST s B) -> ShortText)
-> (forall s. MBA s -> ST s B) -> ShortText
forall a b. (a -> b) -> a -> b
$ \MBA s
mba -> do
        -- copy accepted prefix
        ShortText -> B -> MBA s -> B -> B -> ST s ()
forall s. ShortText -> B -> MBA s -> B -> B -> ST s ()
copyByteArray ShortText
t B
0 MBA s
mba B
0 B
ofs1
        -- [ofs1 .. ofs2) are a non-accepted region
        -- filter rest after ofs2
        B
t1sz <- MBA s -> B -> B -> ST s B
forall s. MBA s -> B -> B -> ST s B
go MBA s
mba B
ofs2 B
ofs1
        B -> ST s B
forall (m :: * -> *) a. Monad m => a -> m a
return B
t1sz
  where
    mofs1 :: Maybe B
mofs1 = (Char -> Bool) -> ShortText -> B -> Maybe B
findOfs (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
p) ShortText
t (Int -> B
B Int
0) -- first non-accepted Char
    mofs2 :: Maybe B
mofs2 = (Char -> Bool) -> ShortText -> B -> Maybe B
findOfs Char -> Bool
p ShortText
t (B -> Maybe B -> B
forall a. a -> Maybe a -> a
fromMaybe (Int -> B
B Int
0) Maybe B
mofs1) -- first accepted Char

    t0sz :: B
t0sz = ShortText -> B
toB ShortText
t

    go :: MBA s -> B -> B -> ST s B
    go :: MBA s -> B -> B -> ST s B
go MBA s
mba !B
t0ofs !B
t1ofs
      | B
t0ofs B -> B -> Bool
forall a. Ord a => a -> a -> Bool
>= B
t0sz = B -> ST s B
forall (m :: * -> *) a. Monad m => a -> m a
return B
t1ofs
      | Bool
otherwise = let !cp :: CP
cp = ShortText -> B -> CP
readCodePoint ShortText
t B
t0ofs
                        !cpsz :: B
cpsz = CP -> B
cpLen CP
cp
                    in if Char -> Bool
p (CP -> Char
cp2ch CP
cp)
                       then B -> MBA s -> B -> CP -> ST s ()
forall s. B -> MBA s -> B -> CP -> ST s ()
writeCodePointN B
cpsz MBA s
mba B
t1ofs CP
cp ST s () -> ST s B -> ST s B
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>>
                            MBA s -> B -> B -> ST s B
forall s. MBA s -> B -> B -> ST s B
go MBA s
mba (B
t0ofsB -> B -> B
forall a. Num a => a -> a -> a
+B
cpsz) (B
t1ofsB -> B -> B
forall a. Num a => a -> a -> a
+B
cpsz)
                       else MBA s -> B -> B -> ST s B
forall s. MBA s -> B -> B -> ST s B
go MBA s
mba (B
t0ofsB -> B -> B
forall a. Num a => a -> a -> a
+B
cpsz) B
t1ofs -- skip code-point

-- | \(\mathcal{O}(n)\) Strip characters from the beginning end and of 'ShortText' which satisfy given predicate.
--
-- >>> dropAround (== ' ') "   white   space   "
-- "white   space"
--
-- >>> dropAround (> 'a') "bcdefghi"
-- ""
--
-- @since 0.1.2
dropAround :: (Char -> Bool) -> ShortText -> ShortText
dropAround :: (Char -> Bool) -> ShortText -> ShortText
dropAround Char -> Bool
p ShortText
t0 = case (Maybe B
mofs1,Maybe B
mofs2) of
                    (Maybe B
Nothing,Maybe B
_) -> ShortText
forall a. Monoid a => a
mempty
                    (Just B
ofs1,Just B
ofs2)
                      | B
ofs1 B -> B -> Bool
forall a. Eq a => a -> a -> Bool
== B
0, B
ofs2 B -> B -> Bool
forall a. Eq a => a -> a -> Bool
== B
t0sz  -> ShortText
t0
                      | B
ofs1 B -> B -> Bool
forall a. Ord a => a -> a -> Bool
< B
ofs2  -> B -> (forall s. MBA s -> ST s ()) -> ShortText
create (B
ofs2B -> B -> B
forall a. Num a => a -> a -> a
-B
ofs1) ((forall s. MBA s -> ST s ()) -> ShortText)
-> (forall s. MBA s -> ST s ()) -> ShortText
forall a b. (a -> b) -> a -> b
$ \MBA s
mba -> do
                          ShortText -> B -> MBA s -> B -> B -> ST s ()
forall s. ShortText -> B -> MBA s -> B -> B -> ST s ()
copyByteArray ShortText
t0 B
ofs1 MBA s
mba (Int -> B
B Int
0) (B
ofs2B -> B -> B
forall a. Num a => a -> a -> a
-B
ofs1)
                    (Maybe B
_,Maybe B
_) -> String -> ShortText
forall a. HasCallStack => String -> a
error String
"dropAround: the impossible happened"
  where
    mofs1 :: Maybe B
mofs1 = (Char -> Bool) -> ShortText -> B -> Maybe B
findOfs    (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
p) ShortText
t0 (Int -> B
B Int
0)
    mofs2 :: Maybe B
mofs2 = (Char -> Bool) -> ShortText -> B -> Maybe B
findOfsRev (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
p) ShortText
t0 B
t0sz
    t0sz :: B
t0sz = ShortText -> B
toB ShortText
t0

----------------------------------------------------------------------------

-- | Construct a new 'ShortText' from an existing one by slicing
--
-- NB: The 'CSize' arguments refer to byte-offsets
slice :: ShortText -> B -> B -> ShortText
slice :: ShortText -> B -> B -> ShortText
slice ShortText
st B
ofs B
len
  | B
ofs B -> B -> Bool
forall a. Ord a => a -> a -> Bool
< B
0    = String -> ShortText
forall a. HasCallStack => String -> a
error String
"invalid offset"
  | B
len B -> B -> Bool
forall a. Ord a => a -> a -> Bool
< B
0    = String -> ShortText
forall a. HasCallStack => String -> a
error String
"invalid length"
  | B
len' B -> B -> Bool
forall a. Eq a => a -> a -> Bool
== B
0  = ShortText
forall a. Monoid a => a
mempty
  | Bool
otherwise  = B -> (forall s. MBA s -> ST s ()) -> ShortText
create B
len' ((forall s. MBA s -> ST s ()) -> ShortText)
-> (forall s. MBA s -> ST s ()) -> ShortText
forall a b. (a -> b) -> a -> b
$ \MBA s
mba -> ShortText -> B -> MBA s -> B -> B -> ST s ()
forall s. ShortText -> B -> MBA s -> B -> B -> ST s ()
copyByteArray ShortText
st B
ofs' MBA s
mba B
0 B
len'
  where
    len0 :: B
len0 = ShortText -> B
toB ShortText
st
    len' :: B
len' = B -> B -> B
forall a. Ord a => a -> a -> a
max B
0 (B -> B -> B
forall a. Ord a => a -> a -> a
min B
len (B
len0B -> B -> B
forall a. Num a => a -> a -> a
-B
ofs))
    ofs' :: B
ofs' = B -> B -> B
forall a. Ord a => a -> a -> a
max B
0 B
ofs

----------------------------------------------------------------------------
-- low-level MutableByteArray# helpers

-- | Byte offset (or size) in bytes
--
-- This currently wraps an 'Int' because this is what GHC's primops
-- currently use for byte offsets/sizes.
newtype B = B { B -> Int
unB :: Int }
          deriving (Eq B
Eq B
-> (B -> B -> Ordering)
-> (B -> B -> Bool)
-> (B -> B -> Bool)
-> (B -> B -> Bool)
-> (B -> B -> Bool)
-> (B -> B -> B)
-> (B -> B -> B)
-> Ord B
B -> B -> Bool
B -> B -> Ordering
B -> B -> B
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: B -> B -> B
$cmin :: B -> B -> B
max :: B -> B -> B
$cmax :: B -> B -> B
>= :: B -> B -> Bool
$c>= :: B -> B -> Bool
> :: B -> B -> Bool
$c> :: B -> B -> Bool
<= :: B -> B -> Bool
$c<= :: B -> B -> Bool
< :: B -> B -> Bool
$c< :: B -> B -> Bool
compare :: B -> B -> Ordering
$ccompare :: B -> B -> Ordering
$cp1Ord :: Eq B
Ord,B -> B -> Bool
(B -> B -> Bool) -> (B -> B -> Bool) -> Eq B
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: B -> B -> Bool
$c/= :: B -> B -> Bool
== :: B -> B -> Bool
$c== :: B -> B -> Bool
Eq,Integer -> B
B -> B
B -> B -> B
(B -> B -> B)
-> (B -> B -> B)
-> (B -> B -> B)
-> (B -> B)
-> (B -> B)
-> (B -> B)
-> (Integer -> B)
-> Num B
forall a.
(a -> a -> a)
-> (a -> a -> a)
-> (a -> a -> a)
-> (a -> a)
-> (a -> a)
-> (a -> a)
-> (Integer -> a)
-> Num a
fromInteger :: Integer -> B
$cfromInteger :: Integer -> B
signum :: B -> B
$csignum :: B -> B
abs :: B -> B
$cabs :: B -> B
negate :: B -> B
$cnegate :: B -> B
* :: B -> B -> B
$c* :: B -> B -> B
- :: B -> B -> B
$c- :: B -> B -> B
+ :: B -> B -> B
$c+ :: B -> B -> B
Num)

{- TODO: introduce operators for 'B' to avoid 'Num' -}

mulB :: Int -> B -> B
mulB :: Int -> B -> B
mulB Int
n (B Int
b) = Int -> B
B (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
*Int
b)

csizeFromB :: B -> CSize
csizeFromB :: B -> CSize
csizeFromB = Int -> CSize
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> CSize) -> (B -> Int) -> B -> CSize
forall b c a. (b -> c) -> (a -> b) -> a -> c
. B -> Int
unB

csizeToB :: CSize -> B
csizeToB :: CSize -> B
csizeToB = Int -> B
B (Int -> B) -> (CSize -> Int) -> CSize -> B
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CSize -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral

data MBA s = MBA# { MBA s -> MutableByteArray# s
unMBA# :: MutableByteArray# s }

{-# INLINE create #-}
create :: B -> (forall s. MBA s -> ST s ()) -> ShortText
create :: B -> (forall s. MBA s -> ST s ()) -> ShortText
create B
n forall s. MBA s -> ST s ()
go = (forall s. ST s ShortText) -> ShortText
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s ShortText) -> ShortText)
-> (forall s. ST s ShortText) -> ShortText
forall a b. (a -> b) -> a -> b
$ do
  MBA s
mba <- B -> ST s (MBA s)
forall s. B -> ST s (MBA s)
newByteArray B
n
  MBA s -> ST s ()
forall s. MBA s -> ST s ()
go MBA s
mba
  MBA s -> ST s ShortText
forall s. MBA s -> ST s ShortText
unsafeFreeze MBA s
mba

{-# INLINE createShrink #-}
createShrink :: B -> (forall s. MBA s -> ST s B) -> ShortText
createShrink :: B -> (forall s. MBA s -> ST s B) -> ShortText
createShrink B
n forall s. MBA s -> ST s B
go = (forall s. ST s ShortText) -> ShortText
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s ShortText) -> ShortText)
-> (forall s. ST s ShortText) -> ShortText
forall a b. (a -> b) -> a -> b
$ do
  MBA s
mba <- B -> ST s (MBA s)
forall s. B -> ST s (MBA s)
newByteArray B
n
  B
n' <- MBA s -> ST s B
forall s. MBA s -> ST s B
go MBA s
mba
  if B
n' B -> B -> Bool
forall a. Ord a => a -> a -> Bool
< B
n
    then MBA s -> B -> ST s ShortText
forall s. MBA s -> B -> ST s ShortText
unsafeFreezeShrink MBA s
mba B
n'
    else MBA s -> ST s ShortText
forall s. MBA s -> ST s ShortText
unsafeFreeze MBA s
mba

{-# INLINE unsafeFreeze #-}
unsafeFreeze :: MBA s -> ST s ShortText
unsafeFreeze :: MBA s -> ST s ShortText
unsafeFreeze (MBA# MutableByteArray# s
mba#)
  = STRep s ShortText -> ST s ShortText
forall s a. STRep s a -> ST s a
ST (STRep s ShortText -> ST s ShortText)
-> STRep s ShortText -> ST s ShortText
forall a b. (a -> b) -> a -> b
$ \State# s
s -> case MutableByteArray# s -> State# s -> (# State# s, ByteArray# #)
forall d.
MutableByteArray# d -> State# d -> (# State# d, ByteArray# #)
GHC.Exts.unsafeFreezeByteArray# MutableByteArray# s
mba# State# s
s of
                 (# State# s
s', ByteArray#
ba# #) -> (# State# s
s', ShortByteString -> ShortText
ShortText (ByteArray# -> ShortByteString
BSSI.SBS ByteArray#
ba#) #)

{-# INLINE copyByteArray #-}
copyByteArray :: ShortText -> B -> MBA s -> B -> B -> ST s ()
copyByteArray :: ShortText -> B -> MBA s -> B -> B -> ST s ()
copyByteArray (ShortText (BSSI.SBS ByteArray#
src#)) (B (I# Int#
src_off#)) (MBA# MutableByteArray# s
dst#) (B (I# Int#
dst_off#)) (B (I# Int#
len#))
  = STRep s () -> ST s ()
forall s a. STRep s a -> ST s a
ST (STRep s () -> ST s ()) -> STRep s () -> ST s ()
forall a b. (a -> b) -> a -> b
$ \State# s
s -> case ByteArray#
-> Int#
-> MutableByteArray# s
-> Int#
-> Int#
-> State# s
-> State# s
forall d.
ByteArray#
-> Int#
-> MutableByteArray# d
-> Int#
-> Int#
-> State# d
-> State# d
GHC.Exts.copyByteArray# ByteArray#
src# Int#
src_off# MutableByteArray# s
dst# Int#
dst_off# Int#
len# State# s
s of
                 State# s
s' -> (# State# s
s', () #)

{-# INLINE newByteArray #-}
newByteArray :: B -> ST s (MBA s)
newByteArray :: B -> ST s (MBA s)
newByteArray (B (I# Int#
n#))
  = STRep s (MBA s) -> ST s (MBA s)
forall s a. STRep s a -> ST s a
ST (STRep s (MBA s) -> ST s (MBA s))
-> STRep s (MBA s) -> ST s (MBA s)
forall a b. (a -> b) -> a -> b
$ \State# s
s -> case Int# -> State# s -> (# State# s, MutableByteArray# s #)
forall d. Int# -> State# d -> (# State# d, MutableByteArray# d #)
GHC.Exts.newByteArray# Int#
n# State# s
s of
                 (# State# s
s', MutableByteArray# s
mba# #) -> (# State# s
s', MutableByteArray# s -> MBA s
forall s. MutableByteArray# s -> MBA s
MBA# MutableByteArray# s
mba# #)

{-# INLINE writeWord8Array #-}
writeWord8Array :: MBA s -> B -> Word -> ST s ()
writeWord8Array :: MBA s -> B -> Word -> ST s ()
writeWord8Array (MBA# MutableByteArray# s
mba#) (B (I# Int#
i#)) (W# Word#
w#)
  = STRep s () -> ST s ()
forall s a. STRep s a -> ST s a
ST (STRep s () -> ST s ()) -> STRep s () -> ST s ()
forall a b. (a -> b) -> a -> b
$ \State# s
s -> case MutableByteArray# s -> Int# -> Word# -> State# s -> State# s
forall d.
MutableByteArray# d -> Int# -> Word# -> State# d -> State# d
GHC.Exts.writeWord8Array# MutableByteArray# s
mba# Int#
i# Word#
w# State# s
s of
                 State# s
s' -> (# State# s
s', () #)
{- not needed yet
{-# INLINE indexWord8Array #-}
indexWord8Array :: ShortText -> B -> Word
indexWord8Array (ShortText (BSSI.SBS ba#)) (B (I# i#)) = W# (GHC.Exts.indexWord8Array# ba# i#)
-}

{-# INLINE copyAddrToByteArray #-}
copyAddrToByteArray :: Ptr a -> MBA RealWorld -> B -> B -> ST RealWorld ()
copyAddrToByteArray :: Ptr a -> MBA RealWorld -> B -> B -> ST RealWorld ()
copyAddrToByteArray (Ptr Addr#
src#) (MBA# MutableByteArray# RealWorld
dst#) (B (I# Int#
dst_off#)) (B (I# Int#
len#))
  = STRep RealWorld () -> ST RealWorld ()
forall s a. STRep s a -> ST s a
ST (STRep RealWorld () -> ST RealWorld ())
-> STRep RealWorld () -> ST RealWorld ()
forall a b. (a -> b) -> a -> b
$ \State# RealWorld
s -> case Addr#
-> MutableByteArray# RealWorld
-> Int#
-> Int#
-> State# RealWorld
-> State# RealWorld
forall d.
Addr#
-> MutableByteArray# d -> Int# -> Int# -> State# d -> State# d
GHC.Exts.copyAddrToByteArray# Addr#
src# MutableByteArray# RealWorld
dst# Int#
dst_off# Int#
len# State# RealWorld
s of
                 State# RealWorld
s' -> (# State# RealWorld
s', () #)

----------------------------------------------------------------------------
-- unsafeFreezeShrink

#if __GLASGOW_HASKELL__ >= 710
-- for GHC versions which have the 'shrinkMutableByteArray#' primop
{-# INLINE unsafeFreezeShrink #-}
unsafeFreezeShrink :: MBA s -> B -> ST s ShortText
unsafeFreezeShrink :: MBA s -> B -> ST s ShortText
unsafeFreezeShrink MBA s
mba B
n = do
  MBA s -> B -> ST s ()
forall s. MBA s -> B -> ST s ()
shrink MBA s
mba B
n
  MBA s -> ST s ShortText
forall s. MBA s -> ST s ShortText
unsafeFreeze MBA s
mba

{-# INLINE shrink #-}
shrink :: MBA s -> B -> ST s ()
shrink :: MBA s -> B -> ST s ()
shrink (MBA# MutableByteArray# s
mba#) (B (I# Int#
i#))
  = STRep s () -> ST s ()
forall s a. STRep s a -> ST s a
ST (STRep s () -> ST s ()) -> STRep s () -> ST s ()
forall a b. (a -> b) -> a -> b
$ \State# s
s -> case MutableByteArray# s -> Int# -> State# s -> State# s
forall d. MutableByteArray# d -> Int# -> State# d -> State# d
GHC.Exts.shrinkMutableByteArray# MutableByteArray# s
mba# Int#
i# State# s
s of
                 State# s
s' -> (# State# s
s', () #)
#else
-- legacy code for GHC versions which lack `shrinkMutableByteArray#` primop
{-# INLINE unsafeFreezeShrink #-}
unsafeFreezeShrink :: MBA s -> B -> ST s ShortText
unsafeFreezeShrink mba0 n = do
  mba' <- newByteArray n
  copyByteArray2 mba0 0 mba' 0 n
  unsafeFreeze mba'

{-# INLINE copyByteArray2 #-}
copyByteArray2 :: MBA s -> B -> MBA s -> B -> B -> ST s ()
copyByteArray2 (MBA# src#) (B (I# src_off#)) (MBA# dst#) (B (I# dst_off#)) (B( I# len#))
  = ST $ \s -> case GHC.Exts.copyMutableByteArray# src# src_off# dst# dst_off# len# s of
                 s' -> (# s', () #)
#endif

----------------------------------------------------------------------------
-- Helpers for encoding code points into UTF-8 code units
--
--   7 bits| <    0x80 | 0xxxxxxx
--  11 bits| <   0x800 | 110yyyyx  10xxxxxx
--  16 bits| < 0x10000 | 1110yyyy  10yxxxxx  10xxxxxx
--  21 bits|           | 11110yyy  10yyxxxx  10xxxxxx  10xxxxxx

-- | Unicode Code-point
--
-- Keeping it as a 'Word' is more convenient for bit-ops and FFI
newtype CP = CP Word

{-# INLINE ch2cp #-}
ch2cp :: Char -> CP
ch2cp :: Char -> CP
ch2cp (Char -> Int
ord -> Int
ci)
  | Int -> Bool
forall i. (Num i, Bits i) => i -> Bool
isSurr Int
ci  = Word -> CP
CP Word
0xFFFD
  | Bool
otherwise  = Word -> CP
CP (Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
ci)

{-# INLINE isSurr #-}
isSurr :: (Num i, Bits i) => i -> Bool
isSurr :: i -> Bool
isSurr i
ci = i
ci i -> i -> i
forall a. Bits a => a -> a -> a
.&. i
0xfff800 i -> i -> Bool
forall a. Eq a => a -> a -> Bool
== i
0xd800

{-# INLINE cp2ch #-}
cp2ch :: CP -> Char
cp2ch :: CP -> Char
cp2ch (CP Word
w) = (Word
w Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
< Word
0x110000) Bool -> Char -> Char
forall a. HasCallStack => Bool -> a -> a
`assert` Int -> Char
unsafeChr (Word -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word
w)

-- used/needed by index-lookup functions to encode out of bounds
cp2chSafe :: CP -> Maybe Char
cp2chSafe :: CP -> Maybe Char
cp2chSafe CP
cp
  | CP -> Bool
cpNull CP
cp = Maybe Char
forall a. Maybe a
Nothing
  | Bool
otherwise = Char -> Maybe Char
forall a. a -> Maybe a
Just (Char -> Maybe Char) -> Char -> Maybe Char
forall a b. (a -> b) -> a -> b
$! CP -> Char
cp2ch CP
cp
  where
    cpNull :: CP -> Bool
    cpNull :: CP -> Bool
cpNull (CP Word
w) = Word
w Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
>= Word
0x110000

{-# INLINE cpLen #-}
cpLen :: CP -> B
cpLen :: CP -> B
cpLen (CP Word
cp)
  | Word
cp Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
<    Word
0x80  = Int -> B
B Int
1
  | Word
cp Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
<   Word
0x800  = Int -> B
B Int
2
  | Word
cp Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
< Word
0x10000  = Int -> B
B Int
3
  | Bool
otherwise     = Int -> B
B Int
4

-- convenience wrapper; unsafe like readCodePoint
{-# INLINE decodeCharAtOfs #-}
decodeCharAtOfs :: ShortText -> B -> (Char,B)
decodeCharAtOfs :: ShortText -> B -> (Char, B)
decodeCharAtOfs ShortText
st B
ofs = (Char
c,B
ofs')
  where
    c :: Char
c    = CP -> Char
cp2ch CP
cp
    ofs' :: B
ofs' = B
ofs B -> B -> B
forall a. Num a => a -> a -> a
+ CP -> B
cpLen CP
cp
    cp :: CP
cp   = ShortText -> B -> CP
readCodePoint ShortText
st B
ofs
{- pure version of decodeCharAtOfs, but unfortunately significantly slower

decodeCharAtOfs st ofs
  | b0 < 0x80 = (cp2ch $ CP b0,ofs + B 1)
  | otherwise = case b0 `unsafeShiftR` 4 of
                  0xf -> (cp2ch $ CP go4, ofs + B 4)
                  0xe -> (cp2ch $ CP go3, ofs + B 3)
                  _   -> (cp2ch $ CP go2, ofs + B 2)
  where
    b0    = buf 0
    buf j = indexWord8Array st (ofs+j)

    go2 =     ((b0    .&. 0x1f) `unsafeShiftL` 6)
          .|.  (buf 1 .&. 0x3f)

    go3 =     ((b0    .&. 0x0f) `unsafeShiftL` (6+6))
          .|. ((buf 1 .&. 0x3f) `unsafeShiftL` 6)
          .|.  (buf 2 .&. 0x3f)

    go4 =     ((b0    .&. 0x07) `unsafeShiftL` (6+6+6))
          .|. ((buf 1 .&. 0x3f) `unsafeShiftL` (6+6))
          .|. ((buf 2 .&. 0x3f) `unsafeShiftL` 6)
          .|.  (buf 3 .&. 0x3f)
-}


-- | \(\mathcal{O}(1)\) Construct 'ShortText' from single codepoint.
--
-- prop> singleton c == pack [c]
--
-- prop> length (singleton c) == 1
--
-- >>> singleton 'A'
-- "A"
--
-- >>> map singleton ['\55295','\55296','\57343','\57344'] -- U+D7FF U+D800 U+DFFF U+E000
-- ["\55295","\65533","\65533","\57344"]
--
-- __Note__: This function is total because it replaces the (invalid) code-points U+D800 through U+DFFF with the replacement character U+FFFD.
--
-- @since 0.1.2
singleton :: Char -> ShortText
singleton :: Char -> ShortText
singleton = CP -> ShortText
singleton' (CP -> ShortText) -> (Char -> CP) -> Char -> ShortText
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> CP
ch2cp

singleton' :: CP -> ShortText
singleton' :: CP -> ShortText
singleton' cp :: CP
cp@(CP Word
cpw)
  | Word
cpw Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
<    Word
0x80  = B -> (forall s. MBA s -> ST s ()) -> ShortText
create B
1 ((forall s. MBA s -> ST s ()) -> ShortText)
-> (forall s. MBA s -> ST s ()) -> ShortText
forall a b. (a -> b) -> a -> b
$ \MBA s
mba -> MBA s -> B -> CP -> ST s ()
forall s. MBA s -> B -> CP -> ST s ()
writeCodePoint1 MBA s
mba B
0 CP
cp
  | Word
cpw Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
<   Word
0x800  = B -> (forall s. MBA s -> ST s ()) -> ShortText
create B
2 ((forall s. MBA s -> ST s ()) -> ShortText)
-> (forall s. MBA s -> ST s ()) -> ShortText
forall a b. (a -> b) -> a -> b
$ \MBA s
mba -> MBA s -> B -> CP -> ST s ()
forall s. MBA s -> B -> CP -> ST s ()
writeCodePoint2 MBA s
mba B
0 CP
cp
  | Word
cpw Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
< Word
0x10000  = B -> (forall s. MBA s -> ST s ()) -> ShortText
create B
3 ((forall s. MBA s -> ST s ()) -> ShortText)
-> (forall s. MBA s -> ST s ()) -> ShortText
forall a b. (a -> b) -> a -> b
$ \MBA s
mba -> MBA s -> B -> CP -> ST s ()
forall s. MBA s -> B -> CP -> ST s ()
writeCodePoint3 MBA s
mba B
0 CP
cp
  | Bool
otherwise      = B -> (forall s. MBA s -> ST s ()) -> ShortText
create B
4 ((forall s. MBA s -> ST s ()) -> ShortText)
-> (forall s. MBA s -> ST s ()) -> ShortText
forall a b. (a -> b) -> a -> b
$ \MBA s
mba -> MBA s -> B -> CP -> ST s ()
forall s. MBA s -> B -> CP -> ST s ()
writeCodePoint4 MBA s
mba B
0 CP
cp

-- | \(\mathcal{O}(n)\) Prepend a character to a 'ShortText'.
--
-- prop> cons c t == singleton c <> t
--
-- @since 0.1.2
cons :: Char -> ShortText -> ShortText
cons :: Char -> ShortText -> ShortText
cons (Char -> CP
ch2cp -> cp :: CP
cp@(CP Word
cpw)) ShortText
sfx
  | B
n B -> B -> Bool
forall a. Eq a => a -> a -> Bool
== B
0         = CP -> ShortText
singleton' CP
cp
  | Word
cpw Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
<    Word
0x80  = B -> (forall s. MBA s -> ST s ()) -> ShortText
create (B
nB -> B -> B
forall a. Num a => a -> a -> a
+B
1) ((forall s. MBA s -> ST s ()) -> ShortText)
-> (forall s. MBA s -> ST s ()) -> ShortText
forall a b. (a -> b) -> a -> b
$ \MBA s
mba -> MBA s -> B -> CP -> ST s ()
forall s. MBA s -> B -> CP -> ST s ()
writeCodePoint1 MBA s
mba B
0 CP
cp ST s () -> ST s () -> ST s ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> B -> MBA s -> ST s ()
forall s. B -> MBA s -> ST s ()
copySfx B
1 MBA s
mba
  | Word
cpw Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
<   Word
0x800  = B -> (forall s. MBA s -> ST s ()) -> ShortText
create (B
nB -> B -> B
forall a. Num a => a -> a -> a
+B
2) ((forall s. MBA s -> ST s ()) -> ShortText)
-> (forall s. MBA s -> ST s ()) -> ShortText
forall a b. (a -> b) -> a -> b
$ \MBA s
mba -> MBA s -> B -> CP -> ST s ()
forall s. MBA s -> B -> CP -> ST s ()
writeCodePoint2 MBA s
mba B
0 CP
cp ST s () -> ST s () -> ST s ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> B -> MBA s -> ST s ()
forall s. B -> MBA s -> ST s ()
copySfx B
2 MBA s
mba
  | Word
cpw Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
< Word
0x10000  = B -> (forall s. MBA s -> ST s ()) -> ShortText
create (B
nB -> B -> B
forall a. Num a => a -> a -> a
+B
3) ((forall s. MBA s -> ST s ()) -> ShortText)
-> (forall s. MBA s -> ST s ()) -> ShortText
forall a b. (a -> b) -> a -> b
$ \MBA s
mba -> MBA s -> B -> CP -> ST s ()
forall s. MBA s -> B -> CP -> ST s ()
writeCodePoint3 MBA s
mba B
0 CP
cp ST s () -> ST s () -> ST s ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> B -> MBA s -> ST s ()
forall s. B -> MBA s -> ST s ()
copySfx B
3 MBA s
mba
  | Bool
otherwise      = B -> (forall s. MBA s -> ST s ()) -> ShortText
create (B
nB -> B -> B
forall a. Num a => a -> a -> a
+B
4) ((forall s. MBA s -> ST s ()) -> ShortText)
-> (forall s. MBA s -> ST s ()) -> ShortText
forall a b. (a -> b) -> a -> b
$ \MBA s
mba -> MBA s -> B -> CP -> ST s ()
forall s. MBA s -> B -> CP -> ST s ()
writeCodePoint4 MBA s
mba B
0 CP
cp ST s () -> ST s () -> ST s ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> B -> MBA s -> ST s ()
forall s. B -> MBA s -> ST s ()
copySfx B
4 MBA s
mba
  where
    !n :: B
n = ShortText -> B
toB ShortText
sfx

    copySfx :: B -> MBA s -> ST s ()
    copySfx :: B -> MBA s -> ST s ()
copySfx B
ofs MBA s
mba = ShortText -> B -> MBA s -> B -> B -> ST s ()
forall s. ShortText -> B -> MBA s -> B -> B -> ST s ()
copyByteArray ShortText
sfx B
0 MBA s
mba B
ofs B
n

-- | \(\mathcal{O}(n)\) Append a character to the ond of a 'ShortText'.
--
-- prop> snoc t c == t <> singleton c
--
-- @since 0.1.2
snoc :: ShortText -> Char -> ShortText
snoc :: ShortText -> Char -> ShortText
snoc ShortText
pfx (Char -> CP
ch2cp -> cp :: CP
cp@(CP Word
cpw))
  | B
n B -> B -> Bool
forall a. Eq a => a -> a -> Bool
== B
0         = CP -> ShortText
singleton' CP
cp
  | Word
cpw Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
<    Word
0x80  = B -> (forall s. MBA s -> ST s ()) -> ShortText
create (B
nB -> B -> B
forall a. Num a => a -> a -> a
+B
1) ((forall s. MBA s -> ST s ()) -> ShortText)
-> (forall s. MBA s -> ST s ()) -> ShortText
forall a b. (a -> b) -> a -> b
$ \MBA s
mba -> MBA s -> ST s ()
forall s. MBA s -> ST s ()
copyPfx MBA s
mba ST s () -> ST s () -> ST s ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> MBA s -> B -> CP -> ST s ()
forall s. MBA s -> B -> CP -> ST s ()
writeCodePoint1 MBA s
mba B
n CP
cp
  | Word
cpw Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
<   Word
0x800  = B -> (forall s. MBA s -> ST s ()) -> ShortText
create (B
nB -> B -> B
forall a. Num a => a -> a -> a
+B
2) ((forall s. MBA s -> ST s ()) -> ShortText)
-> (forall s. MBA s -> ST s ()) -> ShortText
forall a b. (a -> b) -> a -> b
$ \MBA s
mba -> MBA s -> ST s ()
forall s. MBA s -> ST s ()
copyPfx MBA s
mba ST s () -> ST s () -> ST s ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> MBA s -> B -> CP -> ST s ()
forall s. MBA s -> B -> CP -> ST s ()
writeCodePoint2 MBA s
mba B
n CP
cp
  | Word
cpw Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
< Word
0x10000  = B -> (forall s. MBA s -> ST s ()) -> ShortText
create (B
nB -> B -> B
forall a. Num a => a -> a -> a
+B
3) ((forall s. MBA s -> ST s ()) -> ShortText)
-> (forall s. MBA s -> ST s ()) -> ShortText
forall a b. (a -> b) -> a -> b
$ \MBA s
mba -> MBA s -> ST s ()
forall s. MBA s -> ST s ()
copyPfx MBA s
mba ST s () -> ST s () -> ST s ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> MBA s -> B -> CP -> ST s ()
forall s. MBA s -> B -> CP -> ST s ()
writeCodePoint3 MBA s
mba B
n CP
cp
  | Bool
otherwise      = B -> (forall s. MBA s -> ST s ()) -> ShortText
create (B
nB -> B -> B
forall a. Num a => a -> a -> a
+B
4) ((forall s. MBA s -> ST s ()) -> ShortText)
-> (forall s. MBA s -> ST s ()) -> ShortText
forall a b. (a -> b) -> a -> b
$ \MBA s
mba -> MBA s -> ST s ()
forall s. MBA s -> ST s ()
copyPfx MBA s
mba ST s () -> ST s () -> ST s ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> MBA s -> B -> CP -> ST s ()
forall s. MBA s -> B -> CP -> ST s ()
writeCodePoint4 MBA s
mba B
n CP
cp
  where
    !n :: B
n = ShortText -> B
toB ShortText
pfx

    copyPfx :: MBA s -> ST s ()
    copyPfx :: MBA s -> ST s ()
copyPfx MBA s
mba = ShortText -> B -> MBA s -> B -> B -> ST s ()
forall s. ShortText -> B -> MBA s -> B -> B -> ST s ()
copyByteArray ShortText
pfx B
0 MBA s
mba B
0 B
n

{-
writeCodePoint :: MBA s -> Int -> Word -> ST s ()
writeCodePoint mba ofs cp
  | cp <    0x80  = writeCodePoint1 mba ofs cp
  | cp <   0x800  = writeCodePoint2 mba ofs cp
  | cp < 0x10000  = writeCodePoint3 mba ofs cp
  | otherwise     = writeCodePoint4 mba ofs cp
-}

writeCodePointN :: B -> MBA s -> B -> CP -> ST s ()
writeCodePointN :: B -> MBA s -> B -> CP -> ST s ()
writeCodePointN B
1 = MBA s -> B -> CP -> ST s ()
forall s. MBA s -> B -> CP -> ST s ()
writeCodePoint1
writeCodePointN B
2 = MBA s -> B -> CP -> ST s ()
forall s. MBA s -> B -> CP -> ST s ()
writeCodePoint2
writeCodePointN B
3 = MBA s -> B -> CP -> ST s ()
forall s. MBA s -> B -> CP -> ST s ()
writeCodePoint3
writeCodePointN B
4 = MBA s -> B -> CP -> ST s ()
forall s. MBA s -> B -> CP -> ST s ()
writeCodePoint4
writeCodePointN B
_ = MBA s -> B -> CP -> ST s ()
forall a. HasCallStack => a
undefined

writeCodePoint1 :: MBA s -> B -> CP -> ST s ()
writeCodePoint1 :: MBA s -> B -> CP -> ST s ()
writeCodePoint1 MBA s
mba B
ofs (CP Word
cp) =
  MBA s -> B -> Word -> ST s ()
forall s. MBA s -> B -> Word -> ST s ()
writeWord8Array MBA s
mba B
ofs Word
cp

writeCodePoint2 :: MBA s -> B -> CP -> ST s ()
writeCodePoint2 :: MBA s -> B -> CP -> ST s ()
writeCodePoint2 MBA s
mba B
ofs (CP Word
cp) = do
  MBA s -> B -> Word -> ST s ()
forall s. MBA s -> B -> Word -> ST s ()
writeWord8Array MBA s
mba  B
ofs    (Word
0xc0 Word -> Word -> Word
forall a. Bits a => a -> a -> a
.|. (Word
cp Word -> Int -> Word
forall a. Bits a => a -> Int -> a
`unsafeShiftR` Int
6))
  MBA s -> B -> Word -> ST s ()
forall s. MBA s -> B -> Word -> ST s ()
writeWord8Array MBA s
mba (B
ofsB -> B -> B
forall a. Num a => a -> a -> a
+B
1) (Word
0x80 Word -> Word -> Word
forall a. Bits a => a -> a -> a
.|. (Word
cp                     Word -> Word -> Word
forall a. Bits a => a -> a -> a
.&. Word
0x3f))

writeCodePoint3 :: MBA s -> B -> CP -> ST s ()
writeCodePoint3 :: MBA s -> B -> CP -> ST s ()
writeCodePoint3 MBA s
mba B
ofs (CP Word
cp) = do
  MBA s -> B -> Word -> ST s ()
forall s. MBA s -> B -> Word -> ST s ()
writeWord8Array MBA s
mba  B
ofs    (Word
0xe0 Word -> Word -> Word
forall a. Bits a => a -> a -> a
.|.  (Word
cp Word -> Int -> Word
forall a. Bits a => a -> Int -> a
`unsafeShiftR` Int
12))
  MBA s -> B -> Word -> ST s ()
forall s. MBA s -> B -> Word -> ST s ()
writeWord8Array MBA s
mba (B
ofsB -> B -> B
forall a. Num a => a -> a -> a
+B
1) (Word
0x80 Word -> Word -> Word
forall a. Bits a => a -> a -> a
.|. ((Word
cp Word -> Int -> Word
forall a. Bits a => a -> Int -> a
`unsafeShiftR` Int
6)  Word -> Word -> Word
forall a. Bits a => a -> a -> a
.&. Word
0x3f))
  MBA s -> B -> Word -> ST s ()
forall s. MBA s -> B -> Word -> ST s ()
writeWord8Array MBA s
mba (B
ofsB -> B -> B
forall a. Num a => a -> a -> a
+B
2) (Word
0x80 Word -> Word -> Word
forall a. Bits a => a -> a -> a
.|.  (Word
cp                    Word -> Word -> Word
forall a. Bits a => a -> a -> a
.&. Word
0x3f))

writeCodePoint4 :: MBA s -> B -> CP -> ST s ()
writeCodePoint4 :: MBA s -> B -> CP -> ST s ()
writeCodePoint4 MBA s
mba B
ofs (CP Word
cp) = do
  MBA s -> B -> Word -> ST s ()
forall s. MBA s -> B -> Word -> ST s ()
writeWord8Array MBA s
mba  B
ofs    (Word
0xf0 Word -> Word -> Word
forall a. Bits a => a -> a -> a
.|.  (Word
cp Word -> Int -> Word
forall a. Bits a => a -> Int -> a
`unsafeShiftR` Int
18))
  MBA s -> B -> Word -> ST s ()
forall s. MBA s -> B -> Word -> ST s ()
writeWord8Array MBA s
mba (B
ofsB -> B -> B
forall a. Num a => a -> a -> a
+B
1) (Word
0x80 Word -> Word -> Word
forall a. Bits a => a -> a -> a
.|. ((Word
cp Word -> Int -> Word
forall a. Bits a => a -> Int -> a
`unsafeShiftR` Int
12) Word -> Word -> Word
forall a. Bits a => a -> a -> a
.&. Word
0x3f))
  MBA s -> B -> Word -> ST s ()
forall s. MBA s -> B -> Word -> ST s ()
writeWord8Array MBA s
mba (B
ofsB -> B -> B
forall a. Num a => a -> a -> a
+B
2) (Word
0x80 Word -> Word -> Word
forall a. Bits a => a -> a -> a
.|. ((Word
cp Word -> Int -> Word
forall a. Bits a => a -> Int -> a
`unsafeShiftR` Int
6)  Word -> Word -> Word
forall a. Bits a => a -> a -> a
.&. Word
0x3f))
  MBA s -> B -> Word -> ST s ()
forall s. MBA s -> B -> Word -> ST s ()
writeWord8Array MBA s
mba (B
ofsB -> B -> B
forall a. Num a => a -> a -> a
+B
3) (Word
0x80 Word -> Word -> Word
forall a. Bits a => a -> a -> a
.|. (Word
cp                     Word -> Word -> Word
forall a. Bits a => a -> a -> a
.&. Word
0x3f))

-- beware: UNSAFE!
readCodePoint :: ShortText -> B -> CP
readCodePoint :: ShortText -> B -> CP
readCodePoint ShortText
st (B -> CSize
csizeFromB -> CSize
ofs)
  = Word -> CP
CP (Word -> CP) -> Word -> CP
forall a b. (a -> b) -> a -> b
$ IO Word -> Word
forall a. IO a -> a
unsafeDupablePerformIO (ByteArray# -> CSize -> IO Word
c_text_short_ofs_cp (ShortText -> ByteArray#
toByteArray# ShortText
st) CSize
ofs)

foreign import ccall unsafe "hs_text_short_ofs_cp" c_text_short_ofs_cp :: ByteArray# -> CSize -> IO CCodePoint

readCodePointRev :: ShortText -> B -> CP
readCodePointRev :: ShortText -> B -> CP
readCodePointRev ShortText
st (B -> CSize
csizeFromB -> CSize
ofs)
  = Word -> CP
CP (Word -> CP) -> Word -> CP
forall a b. (a -> b) -> a -> b
$ IO Word -> Word
forall a. IO a -> a
unsafeDupablePerformIO (ByteArray# -> CSize -> IO Word
c_text_short_ofs_cp_rev (ShortText -> ByteArray#
toByteArray# ShortText
st) CSize
ofs)

foreign import ccall unsafe "hs_text_short_ofs_cp_rev" c_text_short_ofs_cp_rev :: ByteArray# -> CSize -> IO CCodePoint

----------------------------------------------------------------------------
-- string & list literals

-- | __Note__: Surrogate pairs (@[U+D800 .. U+DFFF]@) character literals are replaced by U+FFFD.
--
-- @since 0.1.2
instance GHC.Exts.IsList ShortText where
    type (Item ShortText) = Char
    fromList :: [Item ShortText] -> ShortText
fromList = String -> ShortText
[Item ShortText] -> ShortText
fromString
    toList :: ShortText -> [Item ShortText]
toList   = ShortText -> String
ShortText -> [Item ShortText]
toString

-- | __Note__: Surrogate pairs (@[U+D800 .. U+DFFF]@) in string literals are replaced by U+FFFD.
--
-- This matches the behaviour of 'S.IsString' instance for 'T.Text'.
instance S.IsString ShortText where
    fromString :: String -> ShortText
fromString = String -> ShortText
fromStringLit

-- i.e., don't inline before Phase 0
{-# INLINE [0] fromStringLit #-}
fromStringLit :: String -> ShortText
fromStringLit :: String -> ShortText
fromStringLit = String -> ShortText
fromString

{-# RULES "ShortText empty literal" fromStringLit "" = mempty #-}

-- TODO: this doesn't seem to fire
{-# RULES "ShortText singleton literal" forall c . fromStringLit [c] = singleton c #-}

{-# RULES "ShortText literal ASCII" forall s . fromStringLit (GHC.unpackCString# s) = fromLitAsciiAddr# s #-}

{-# RULES "ShortText literal UTF-8" forall s . fromStringLit (GHC.unpackCStringUtf8# s) = fromLitMUtf8Addr# s #-}

{-# NOINLINE fromLitAsciiAddr# #-}
fromLitAsciiAddr# :: Addr# -> ShortText
fromLitAsciiAddr# :: Addr# -> ShortText
fromLitAsciiAddr# (Addr# -> Ptr CChar
forall a. Addr# -> Ptr a
Ptr -> Ptr CChar
ptr) = IO ShortText -> ShortText
forall a. IO a -> a
unsafeDupablePerformIO (IO ShortText -> ShortText) -> IO ShortText -> ShortText
forall a b. (a -> b) -> a -> b
$ do
  B
sz <- CSize -> B
csizeToB (CSize -> B) -> IO CSize -> IO B
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` Ptr CChar -> IO CSize
c_strlen Ptr CChar
ptr

  case B
sz B -> B -> Ordering
forall a. Ord a => a -> a -> Ordering
`compare` B
0 of
    Ordering
EQ -> ShortText -> IO ShortText
forall (m :: * -> *) a. Monad m => a -> m a
return ShortText
forall a. Monoid a => a
mempty -- should not happen if rules fire correctly
    Ordering
GT -> ST RealWorld ShortText -> IO ShortText
forall a. ST RealWorld a -> IO a
stToIO (ST RealWorld ShortText -> IO ShortText)
-> ST RealWorld ShortText -> IO ShortText
forall a b. (a -> b) -> a -> b
$ do
      MBA RealWorld
mba <- B -> ST RealWorld (MBA RealWorld)
forall s. B -> ST s (MBA s)
newByteArray B
sz
      Ptr CChar -> MBA RealWorld -> B -> B -> ST RealWorld ()
forall a. Ptr a -> MBA RealWorld -> B -> B -> ST RealWorld ()
copyAddrToByteArray Ptr CChar
ptr MBA RealWorld
mba B
0 B
sz
      MBA RealWorld -> ST RealWorld ShortText
forall s. MBA s -> ST s ShortText
unsafeFreeze MBA RealWorld
mba
    Ordering
LT -> ShortText -> IO ShortText
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> ShortText
forall a. HasCallStack => String -> a
error String
"fromLitAsciiAddr#")
          -- NOTE: should never happen unless strlen(3) overflows (NB: CSize
          -- is unsigned; the overflow would occur when converting to
          -- 'B')

foreign import ccall unsafe "strlen" c_strlen :: CString -> IO CSize

-- GHC uses an encoding resembling Modified UTF-8 for non-ASCII string-literals
{-# NOINLINE fromLitMUtf8Addr# #-}
fromLitMUtf8Addr# :: Addr# -> ShortText
fromLitMUtf8Addr# :: Addr# -> ShortText
fromLitMUtf8Addr# (Addr# -> Ptr CChar
forall a. Addr# -> Ptr a
Ptr -> Ptr CChar
ptr) = IO ShortText -> ShortText
forall a. IO a -> a
unsafeDupablePerformIO (IO ShortText -> ShortText) -> IO ShortText -> ShortText
forall a b. (a -> b) -> a -> b
$ do
  B
sz <- Int -> B
B (Int -> B) -> IO Int -> IO B
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` Ptr CChar -> IO Int
c_text_short_mutf8_strlen Ptr CChar
ptr

  case B
sz B -> B -> Ordering
forall a. Ord a => a -> a -> Ordering
`compare` B
0 of
    Ordering
EQ -> ShortText -> IO ShortText
forall (m :: * -> *) a. Monad m => a -> m a
return ShortText
forall a. Monoid a => a
mempty -- should not happen if rules fire correctly
    Ordering
GT -> ST RealWorld ShortText -> IO ShortText
forall a. ST RealWorld a -> IO a
stToIO (ST RealWorld ShortText -> IO ShortText)
-> ST RealWorld ShortText -> IO ShortText
forall a b. (a -> b) -> a -> b
$ do
      MBA RealWorld
mba <- B -> ST RealWorld (MBA RealWorld)
forall s. B -> ST s (MBA s)
newByteArray B
sz
      Ptr CChar -> MBA RealWorld -> B -> B -> ST RealWorld ()
forall a. Ptr a -> MBA RealWorld -> B -> B -> ST RealWorld ()
copyAddrToByteArray Ptr CChar
ptr MBA RealWorld
mba B
0 B
sz
      MBA RealWorld -> ST RealWorld ShortText
forall s. MBA s -> ST s ShortText
unsafeFreeze MBA RealWorld
mba
    Ordering
LT -> do
      MBA RealWorld
mba <- ST RealWorld (MBA RealWorld) -> IO (MBA RealWorld)
forall a. ST RealWorld a -> IO a
stToIO (B -> ST RealWorld (MBA RealWorld)
forall s. B -> ST s (MBA s)
newByteArray (B -> B
forall a. Num a => a -> a
abs B
sz))
      Ptr CChar -> MutableByteArray# RealWorld -> IO ()
c_text_short_mutf8_trans Ptr CChar
ptr (MBA RealWorld -> MutableByteArray# RealWorld
forall s. MBA s -> MutableByteArray# s
unMBA# MBA RealWorld
mba)
      ST RealWorld ShortText -> IO ShortText
forall a. ST RealWorld a -> IO a
stToIO (MBA RealWorld -> ST RealWorld ShortText
forall s. MBA s -> ST s ShortText
unsafeFreeze MBA RealWorld
mba)

foreign import ccall unsafe "hs_text_short_mutf8_strlen" c_text_short_mutf8_strlen :: CString -> IO Int

foreign import ccall unsafe "hs_text_short_mutf8_trans" c_text_short_mutf8_trans :: CString -> MutableByteArray# RealWorld -> IO ()

-- $setup
-- >>> :set -XOverloadedStrings
-- >>> import Data.Text.Short (pack, unpack, concat)
-- >>> import Text.Show.Functions ()
-- >>> import qualified Test.QuickCheck.Arbitrary as QC
-- >>> import Test.QuickCheck.Instances ()
-- >>> instance QC.Arbitrary ShortText where { arbitrary = fmap fromString QC.arbitrary }