{-# LANGUAGE BangPatterns, CPP, GeneralizedNewtypeDeriving #-}
-- |
-- Module      : Data.Text.Foreign
-- Copyright   : (c) 2009, 2010 Bryan O'Sullivan
--
-- License     : BSD-style
-- Maintainer  : bos@serpentine.com
-- Portability : GHC
--
-- Support for using 'Text' data with native code via the Haskell
-- foreign function interface.

module Data.Text.Foreign
    (
    -- * Interoperability with native code
    -- $interop
      I16
    -- * Safe conversion functions
    , fromPtr
    , useAsPtr
    , asForeignPtr
    -- ** Encoding as UTF-8
    , peekCStringLen
    , withCStringLen
    -- * Unsafe conversion code
    , lengthWord16
    , unsafeCopyToPtr
    -- * Low-level manipulation
    -- $lowlevel
    , dropWord16
    , takeWord16
    ) where

#if defined(ASSERTS)
import Control.Exception (assert)
#endif
#if MIN_VERSION_base(4,4,0)
import Control.Monad.ST.Unsafe (unsafeIOToST)
#else
import Control.Monad.ST (unsafeIOToST)
#endif
import Data.ByteString.Unsafe (unsafePackCStringLen, unsafeUseAsCStringLen)
import Data.Text.Encoding (decodeUtf8, encodeUtf8)
import Data.Text.Internal (Text(..), empty)
import Data.Text.Unsafe (lengthWord16)
import Data.Word (Word16)
import Foreign.C.String (CStringLen)
import Foreign.ForeignPtr (ForeignPtr, mallocForeignPtrArray, withForeignPtr)
import Foreign.Marshal.Alloc (allocaBytes)
import Foreign.Ptr (Ptr, castPtr, plusPtr)
import Foreign.Storable (peek, poke)
import qualified Data.Text.Array as A

-- $interop
--
-- The 'Text' type is implemented using arrays that are not guaranteed
-- to have a fixed address in the Haskell heap. All communication with
-- native code must thus occur by copying data back and forth.
--
-- The 'Text' type's internal representation is UTF-16, using the
-- platform's native endianness.  This makes copied data suitable for
-- use with native libraries that use a similar representation, such
-- as ICU.  To interoperate with native libraries that use different
-- internal representations, such as UTF-8 or UTF-32, consider using
-- the functions in the 'Data.Text.Encoding' module.

-- | A type representing a number of UTF-16 code units.
newtype I16 = I16 Int
    deriving (I16
I16 -> I16 -> Bounded I16
forall a. a -> a -> Bounded a
maxBound :: I16
$cmaxBound :: I16
minBound :: I16
$cminBound :: I16
External instance of the constraint type Bounded Int
Bounded, Int -> I16
I16 -> Int
I16 -> [I16]
I16 -> I16
I16 -> I16 -> [I16]
I16 -> I16 -> I16 -> [I16]
(I16 -> I16)
-> (I16 -> I16)
-> (Int -> I16)
-> (I16 -> Int)
-> (I16 -> [I16])
-> (I16 -> I16 -> [I16])
-> (I16 -> I16 -> [I16])
-> (I16 -> I16 -> I16 -> [I16])
-> Enum I16
forall a.
(a -> a)
-> (a -> a)
-> (Int -> a)
-> (a -> Int)
-> (a -> [a])
-> (a -> a -> [a])
-> (a -> a -> [a])
-> (a -> a -> a -> [a])
-> Enum a
enumFromThenTo :: I16 -> I16 -> I16 -> [I16]
$cenumFromThenTo :: I16 -> I16 -> I16 -> [I16]
enumFromTo :: I16 -> I16 -> [I16]
$cenumFromTo :: I16 -> I16 -> [I16]
enumFromThen :: I16 -> I16 -> [I16]
$cenumFromThen :: I16 -> I16 -> [I16]
enumFrom :: I16 -> [I16]
$cenumFrom :: I16 -> [I16]
fromEnum :: I16 -> Int
$cfromEnum :: I16 -> Int
toEnum :: Int -> I16
$ctoEnum :: Int -> I16
pred :: I16 -> I16
$cpred :: I16 -> I16
succ :: I16 -> I16
$csucc :: I16 -> I16
External instance of the constraint type Enum Int
Enum, I16 -> I16 -> Bool
(I16 -> I16 -> Bool) -> (I16 -> I16 -> Bool) -> Eq I16
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: I16 -> I16 -> Bool
$c/= :: I16 -> I16 -> Bool
== :: I16 -> I16 -> Bool
$c== :: I16 -> I16 -> Bool
External instance of the constraint type Eq Int
Eq, Enum I16
Real I16
Real I16
-> Enum I16
-> (I16 -> I16 -> I16)
-> (I16 -> I16 -> I16)
-> (I16 -> I16 -> I16)
-> (I16 -> I16 -> I16)
-> (I16 -> I16 -> (I16, I16))
-> (I16 -> I16 -> (I16, I16))
-> (I16 -> Integer)
-> Integral I16
I16 -> Integer
I16 -> I16 -> (I16, I16)
I16 -> I16 -> I16
forall a.
Real a
-> Enum a
-> (a -> a -> a)
-> (a -> a -> a)
-> (a -> a -> a)
-> (a -> a -> a)
-> (a -> a -> (a, a))
-> (a -> a -> (a, a))
-> (a -> Integer)
-> Integral a
toInteger :: I16 -> Integer
$ctoInteger :: I16 -> Integer
divMod :: I16 -> I16 -> (I16, I16)
$cdivMod :: I16 -> I16 -> (I16, I16)
quotRem :: I16 -> I16 -> (I16, I16)
$cquotRem :: I16 -> I16 -> (I16, I16)
mod :: I16 -> I16 -> I16
$cmod :: I16 -> I16 -> I16
div :: I16 -> I16 -> I16
$cdiv :: I16 -> I16 -> I16
rem :: I16 -> I16 -> I16
$crem :: I16 -> I16 -> I16
quot :: I16 -> I16 -> I16
$cquot :: I16 -> I16 -> I16
External instance of the constraint type Integral Int
Instance of class: Enum of the constraint type Enum I16
Instance of class: Real of the constraint type Real I16
Instance of class: Real of the constraint type Real I16
Instance of class: Enum of the constraint type Enum I16
Integral, Integer -> I16
I16 -> I16
I16 -> I16 -> I16
(I16 -> I16 -> I16)
-> (I16 -> I16 -> I16)
-> (I16 -> I16 -> I16)
-> (I16 -> I16)
-> (I16 -> I16)
-> (I16 -> I16)
-> (Integer -> I16)
-> Num I16
forall a.
(a -> a -> a)
-> (a -> a -> a)
-> (a -> a -> a)
-> (a -> a)
-> (a -> a)
-> (a -> a)
-> (Integer -> a)
-> Num a
fromInteger :: Integer -> I16
$cfromInteger :: Integer -> I16
signum :: I16 -> I16
$csignum :: I16 -> I16
abs :: I16 -> I16
$cabs :: I16 -> I16
negate :: I16 -> I16
$cnegate :: I16 -> I16
* :: I16 -> I16 -> I16
$c* :: I16 -> I16 -> I16
- :: I16 -> I16 -> I16
$c- :: I16 -> I16 -> I16
+ :: I16 -> I16 -> I16
$c+ :: I16 -> I16 -> I16
External instance of the constraint type Num Int
Num, Eq I16
Eq I16
-> (I16 -> I16 -> Ordering)
-> (I16 -> I16 -> Bool)
-> (I16 -> I16 -> Bool)
-> (I16 -> I16 -> Bool)
-> (I16 -> I16 -> Bool)
-> (I16 -> I16 -> I16)
-> (I16 -> I16 -> I16)
-> Ord I16
I16 -> I16 -> Bool
I16 -> I16 -> Ordering
I16 -> I16 -> I16
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: I16 -> I16 -> I16
$cmin :: I16 -> I16 -> I16
max :: I16 -> I16 -> I16
$cmax :: I16 -> I16 -> I16
>= :: I16 -> I16 -> Bool
$c>= :: I16 -> I16 -> Bool
> :: I16 -> I16 -> Bool
$c> :: I16 -> I16 -> Bool
<= :: I16 -> I16 -> Bool
$c<= :: I16 -> I16 -> Bool
< :: I16 -> I16 -> Bool
$c< :: I16 -> I16 -> Bool
compare :: I16 -> I16 -> Ordering
$ccompare :: I16 -> I16 -> Ordering
Instance of class: Eq of the constraint type Eq I16
Instance of class: Eq of the constraint type Eq I16
External instance of the constraint type Ord Int
Ord, ReadPrec [I16]
ReadPrec I16
Int -> ReadS I16
ReadS [I16]
(Int -> ReadS I16)
-> ReadS [I16] -> ReadPrec I16 -> ReadPrec [I16] -> Read I16
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [I16]
$creadListPrec :: ReadPrec [I16]
readPrec :: ReadPrec I16
$creadPrec :: ReadPrec I16
readList :: ReadS [I16]
$creadList :: ReadS [I16]
readsPrec :: Int -> ReadS I16
$creadsPrec :: Int -> ReadS I16
External instance of the constraint type Read Int
External instance of the constraint type Monad ReadPrec
External instance of the constraint type Monad ReadPrec
Instance of class: Read of the constraint type Read I16
Read, Num I16
Ord I16
Num I16 -> Ord I16 -> (I16 -> Rational) -> Real I16
I16 -> Rational
forall a. Num a -> Ord a -> (a -> Rational) -> Real a
toRational :: I16 -> Rational
$ctoRational :: I16 -> Rational
External instance of the constraint type Real Int
Instance of class: Ord of the constraint type Ord I16
Instance of class: Num of the constraint type Num I16
Instance of class: Ord of the constraint type Ord I16
Instance of class: Num of the constraint type Num I16
Real, Int -> I16 -> ShowS
[I16] -> ShowS
I16 -> String
(Int -> I16 -> ShowS)
-> (I16 -> String) -> ([I16] -> ShowS) -> Show I16
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [I16] -> ShowS
$cshowList :: [I16] -> ShowS
show :: I16 -> String
$cshow :: I16 -> String
showsPrec :: Int -> I16 -> ShowS
$cshowsPrec :: Int -> I16 -> ShowS
External instance of the constraint type Show Int
External instance of the constraint type Ord Int
Show)

-- | /O(n)/ Create a new 'Text' from a 'Ptr' 'Word16' by copying the
-- contents of the array.
fromPtr :: Ptr Word16           -- ^ source array
        -> I16                  -- ^ length of source array (in 'Word16' units)
        -> IO Text
fromPtr :: Ptr Word16 -> I16 -> IO Text
fromPtr Ptr Word16
_   (I16 Int
0)   = Text -> IO Text
forall (m :: * -> *) a. Monad m => a -> m a
External instance of the constraint type Monad IO
return Text
empty
fromPtr Ptr Word16
ptr (I16 Int
len) =
#if defined(ASSERTS)
    assert (len > 0) $
#endif
    Text -> IO Text
forall (m :: * -> *) a. Monad m => a -> m a
External instance of the constraint type Monad IO
return (Text -> IO Text) -> Text -> IO Text
forall a b. (a -> b) -> a -> b
$! Array -> Int -> Int -> Text
Text Array
arr Int
0 Int
len
  where
    arr :: Array
arr = (forall s. ST s (MArray s)) -> Array
A.run (Int -> ST s (MArray s)
forall s. Int -> ST s (MArray s)
A.new Int
len ST s (MArray s) -> (MArray s -> ST s (MArray s)) -> ST s (MArray s)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
External instance of the constraint type forall s. Monad (ST s)
>>= MArray s -> ST s (MArray s)
forall {s}. MArray s -> ST s (MArray s)
copy)
    copy :: MArray s -> ST s (MArray s)
copy MArray s
marr = Ptr Word16 -> Int -> ST s (MArray s)
loop Ptr Word16
ptr Int
0
      where
        loop :: Ptr Word16 -> Int -> ST s (MArray s)
loop !Ptr Word16
p !Int
i | Int
i Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
External instance of the constraint type Eq Int
== Int
len = MArray s -> ST s (MArray s)
forall (m :: * -> *) a. Monad m => a -> m a
External instance of the constraint type forall s. Monad (ST s)
return MArray s
marr
                   | Bool
otherwise = do
          MArray s -> Int -> Word16 -> ST s ()
forall s. MArray s -> Int -> Word16 -> ST s ()
A.unsafeWrite MArray s
marr Int
i (Word16 -> ST s ()) -> ST s Word16 -> ST s ()
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
External instance of the constraint type forall s. Monad (ST s)
=<< IO Word16 -> ST s Word16
forall a s. IO a -> ST s a
unsafeIOToST (Ptr Word16 -> IO Word16
forall a. Storable a => Ptr a -> IO a
External instance of the constraint type Storable Word16
peek Ptr Word16
p)
          Ptr Word16 -> Int -> ST s (MArray s)
loop (Ptr Word16
p Ptr Word16 -> Int -> Ptr Word16
forall a b. Ptr a -> Int -> Ptr b
`plusPtr` Int
2) (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
External instance of the constraint type Num Int
+ Int
1)

-- $lowlevel
--
-- Foreign functions that use UTF-16 internally may return indices in
-- units of 'Word16' instead of characters.  These functions may
-- safely be used with such indices, as they will adjust offsets if
-- necessary to preserve the validity of a Unicode string.

-- | /O(1)/ Return the prefix of the 'Text' of @n@ 'Word16' units in
-- length.
--
-- If @n@ would cause the 'Text' to end inside a surrogate pair, the
-- end of the prefix will be advanced by one additional 'Word16' unit
-- to maintain its validity.
takeWord16 :: I16 -> Text -> Text
takeWord16 :: I16 -> Text -> Text
takeWord16 (I16 Int
n) t :: Text
t@(Text Array
arr Int
off Int
len)
    | Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
External instance of the constraint type Ord Int
<= Int
0               = Text
empty
    | Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
External instance of the constraint type Ord Int
>= Int
len Bool -> Bool -> Bool
|| Int
m Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
External instance of the constraint type Ord Int
>= Int
len = Text
t
    | Bool
otherwise            = Array -> Int -> Int -> Text
Text Array
arr Int
off Int
m
  where
    m :: Int
m | Word16
w Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
External instance of the constraint type Ord Word16
< Word16
0xD800 Bool -> Bool -> Bool
|| Word16
w Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
External instance of the constraint type Ord Word16
> Word16
0xDBFF = Int
n
      | Bool
otherwise                = Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
External instance of the constraint type Num Int
+Int
1
    w :: Word16
w = Array -> Int -> Word16
A.unsafeIndex Array
arr (Int
offInt -> Int -> Int
forall a. Num a => a -> a -> a
External instance of the constraint type Num Int
+Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
External instance of the constraint type Num Int
-Int
1)

-- | /O(1)/ Return the suffix of the 'Text', with @n@ 'Word16' units
-- dropped from its beginning.
--
-- If @n@ would cause the 'Text' to begin inside a surrogate pair, the
-- beginning of the suffix will be advanced by one additional 'Word16'
-- unit to maintain its validity.
dropWord16 :: I16 -> Text -> Text
dropWord16 :: I16 -> Text -> Text
dropWord16 (I16 Int
n) t :: Text
t@(Text Array
arr Int
off Int
len)
    | Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
External instance of the constraint type Ord Int
<= Int
0               = Text
t
    | Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
External instance of the constraint type Ord Int
>= Int
len Bool -> Bool -> Bool
|| Int
m Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
External instance of the constraint type Ord Int
>= Int
len = Text
empty
    | Bool
otherwise            = Array -> Int -> Int -> Text
Text Array
arr (Int
offInt -> Int -> Int
forall a. Num a => a -> a -> a
External instance of the constraint type Num Int
+Int
m) (Int
lenInt -> Int -> Int
forall a. Num a => a -> a -> a
External instance of the constraint type Num Int
-Int
m)
  where
    m :: Int
m | Word16
w Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
External instance of the constraint type Ord Word16
< Word16
0xD800 Bool -> Bool -> Bool
|| Word16
w Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
External instance of the constraint type Ord Word16
> Word16
0xDBFF = Int
n
      | Bool
otherwise                = Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
External instance of the constraint type Num Int
+Int
1
    w :: Word16
w = Array -> Int -> Word16
A.unsafeIndex Array
arr (Int
offInt -> Int -> Int
forall a. Num a => a -> a -> a
External instance of the constraint type Num Int
+Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
External instance of the constraint type Num Int
-Int
1)

-- | /O(n)/ Copy a 'Text' to an array.  The array is assumed to be big
-- enough to hold the contents of the entire 'Text'.
unsafeCopyToPtr :: Text -> Ptr Word16 -> IO ()
unsafeCopyToPtr :: Text -> Ptr Word16 -> IO ()
unsafeCopyToPtr (Text Array
arr Int
off Int
len) Ptr Word16
ptr = Ptr Word16 -> Int -> IO ()
loop Ptr Word16
ptr Int
off
  where
    end :: Int
end = Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
External instance of the constraint type Num Int
+ Int
len
    loop :: Ptr Word16 -> Int -> IO ()
loop !Ptr Word16
p !Int
i | Int
i Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
External instance of the constraint type Eq Int
== Int
end  = () -> IO ()
forall (m :: * -> *) a. Monad m => a -> m a
External instance of the constraint type Monad IO
return ()
               | Bool
otherwise = do
      Ptr Word16 -> Word16 -> IO ()
forall a. Storable a => Ptr a -> a -> IO ()
External instance of the constraint type Storable Word16
poke Ptr Word16
p (Array -> Int -> Word16
A.unsafeIndex Array
arr Int
i)
      Ptr Word16 -> Int -> IO ()
loop (Ptr Word16
p Ptr Word16 -> Int -> Ptr Word16
forall a b. Ptr a -> Int -> Ptr b
`plusPtr` Int
2) (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
External instance of the constraint type Num Int
+ Int
1)

-- | /O(n)/ Perform an action on a temporary, mutable copy of a
-- 'Text'.  The copy is freed as soon as the action returns.
useAsPtr :: Text -> (Ptr Word16 -> I16 -> IO a) -> IO a
useAsPtr :: Text -> (Ptr Word16 -> I16 -> IO a) -> IO a
useAsPtr t :: Text
t@(Text Array
_arr Int
_off Int
len) Ptr Word16 -> I16 -> IO a
action =
    Int -> (Ptr Word16 -> IO a) -> IO a
forall a b. Int -> (Ptr a -> IO b) -> IO b
allocaBytes (Int
len Int -> Int -> Int
forall a. Num a => a -> a -> a
External instance of the constraint type Num Int
* Int
2) ((Ptr Word16 -> IO a) -> IO a) -> (Ptr Word16 -> IO a) -> IO a
forall a b. (a -> b) -> a -> b
$ \Ptr Word16
buf -> do
      Text -> Ptr Word16 -> IO ()
unsafeCopyToPtr Text
t Ptr Word16
buf
      Ptr Word16 -> I16 -> IO a
action (Ptr Word16 -> Ptr Word16
forall a b. Ptr a -> Ptr b
castPtr Ptr Word16
buf) (Int -> I16
forall a b. (Integral a, Num b) => a -> b
Instance of class: Num of the constraint type Num I16
External instance of the constraint type Integral Int
fromIntegral Int
len)

-- | /O(n)/ Make a mutable copy of a 'Text'.
asForeignPtr :: Text -> IO (ForeignPtr Word16, I16)
asForeignPtr :: Text -> IO (ForeignPtr Word16, I16)
asForeignPtr t :: Text
t@(Text Array
_arr Int
_off Int
len) = do
  ForeignPtr Word16
fp <- Int -> IO (ForeignPtr Word16)
forall a. Storable a => Int -> IO (ForeignPtr a)
External instance of the constraint type Storable Word16
mallocForeignPtrArray Int
len
  ForeignPtr Word16 -> (Ptr Word16 -> IO ()) -> IO ()
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr Word16
fp ((Ptr Word16 -> IO ()) -> IO ()) -> (Ptr Word16 -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ Text -> Ptr Word16 -> IO ()
unsafeCopyToPtr Text
t
  (ForeignPtr Word16, I16) -> IO (ForeignPtr Word16, I16)
forall (m :: * -> *) a. Monad m => a -> m a
External instance of the constraint type Monad IO
return (ForeignPtr Word16
fp, Int -> I16
I16 Int
len)

-- | /O(n)/ Decode a C string with explicit length, which is assumed
-- to have been encoded as UTF-8. If decoding fails, a
-- 'UnicodeException' is thrown.
--
-- @since 1.0.0.0
peekCStringLen :: CStringLen -> IO Text
peekCStringLen :: CStringLen -> IO Text
peekCStringLen CStringLen
cs = do
  ByteString
bs <- CStringLen -> IO ByteString
unsafePackCStringLen CStringLen
cs
  Text -> IO Text
forall (m :: * -> *) a. Monad m => a -> m a
External instance of the constraint type Monad IO
return (Text -> IO Text) -> Text -> IO Text
forall a b. (a -> b) -> a -> b
$! ByteString -> Text
decodeUtf8 ByteString
bs

-- | Marshal a 'Text' into a C string encoded as UTF-8 in temporary
-- storage, with explicit length information. The encoded string may
-- contain NUL bytes, and is not followed by a trailing NUL byte.
--
-- The temporary storage is freed when the subcomputation terminates
-- (either normally or via an exception), so the pointer to the
-- temporary storage must /not/ be used after this function returns.
--
-- @since 1.0.0.0
withCStringLen :: Text -> (CStringLen -> IO a) -> IO a
withCStringLen :: Text -> (CStringLen -> IO a) -> IO a
withCStringLen Text
t CStringLen -> IO a
act = ByteString -> (CStringLen -> IO a) -> IO a
forall a. ByteString -> (CStringLen -> IO a) -> IO a
unsafeUseAsCStringLen (Text -> ByteString
encodeUtf8 Text
t) CStringLen -> IO a
act