{-# LANGUAGE MagicHash, NoImplicitPrelude, BangPatterns, UnliftedFFITypes #-}
-----------------------------------------------------------------------------
-- |
-- Module      :  GHC.CString
-- Copyright   :  (c) The University of Glasgow 2011
-- License     :  see libraries/ghc-prim/LICENSE
--
-- Maintainer  :  cvs-ghc@haskell.org
-- Stability   :  internal
-- Portability :  non-portable (GHC Extensions)
--
-- GHC C strings definitions (previously in GHC.Base).
-- Use GHC.Exts from the base package instead of importing this
-- module directly.
--
-----------------------------------------------------------------------------

module GHC.CString (
        unpackCString#, unpackAppendCString#, unpackFoldrCString#,
        unpackCStringUtf8#, unpackNBytes#, cstringLength#
    ) where

import GHC.Types
import GHC.Prim

-----------------------------------------------------------------------------
-- Unpacking C strings
-----------------------------------------------------------------------------

-- This code is needed for virtually all programs, since it's used for
-- unpacking the strings of error messages.

-- Used to be in GHC.Base, but was moved to ghc-prim because the new generics
-- stuff uses Strings in the representation, so to give representations for
-- ghc-prim types we need unpackCString#

{- Note [Inlining unpackCString#]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
There's really no point in ever inlining things like unpackCString# as the loop
doesn't specialise in an interesting way and we can't deforest the list
constructors (we'd want to use unpackFoldrCString# for this). Moreover, it's
pretty small, so there's a danger that it'll be inlined at every literal, which
is a waste.

Moreover, inlining early may interfere with a variety of rules that are supposed
to match unpackCString#,

 * BuiltInRules in GHC.Core.Opt.ConstantFold; e.g.
       eqString (unpackCString# (Lit s1)) (unpackCString# (Lit s2)
          = s1 == s2

 * unpacking rules; e.g. in GHC.Base,
       unpackCString# a
          = build (unpackFoldrCString# a)

 * stream fusion rules; e.g. in the `text` library,
       unstream (S.map safe (S.streamList (GHC.unpackCString# a)))
          = unpackCString# a

Moreover, we want to make it CONLIKE, so that:

* the rules in GHC.Core.Opt.ConstantFold will fire when the string is let-bound.
  E.g. the eqString rule in GHC.Core.Opt.ConstantFold
   eqString (unpackCString# (Lit s1)) (unpackCString# (Lit s2) = s1==s2

* exprIsConApp_maybe will see the string when we have
     let x = unpackCString# "foo"#
     ...(case x of algs)...

All of this goes for unpackCStringUtf8# too.
-}

{- Note [unpackCString# iterating over addr]
   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

When unpacking unpackCString# and friends repeatedly return a cons cell
containing:
* The current character we just unpacked.
* A thunk to unpack the rest of the string.

In order to minimize the size of the thunk we do not index of
the start of the string, offsetting into it, but instead increment
the addr and always use offset 0#.

This works since these two expressions will read from the same address.
* `indexCharOffAddr# a i`
* `indexCharOffAddr (a `plusAddr#` i) 0#`

This way we avoid the need for the thunks to close over both the start of
the string and the current offset, saving a word for each character unpacked.
-}

unpackCString# :: Addr# -> [Char]
{-# NOINLINE CONLIKE unpackCString# #-}
unpackCString# :: Addr# -> [Char]
unpackCString# Addr#
addr
    | Int# -> Bool
isTrue# (Char#
ch Char# -> Char# -> Int#
`eqChar#` Char#
'\0'#) = []
    | Bool
True                         = Char# -> Char
C# Char#
ch Char -> [Char] -> [Char]
forall a. a -> [a] -> [a]
: Addr# -> [Char]
unpackCString# (Addr#
addr Addr# -> Int# -> Addr#
`plusAddr#` Int#
1#)
      where
        -- See Note [unpackCString# iterating over addr]
        !ch :: Char#
ch = Addr# -> Int# -> Char#
indexCharOffAddr# Addr#
addr Int#
0#


unpackAppendCString# :: Addr# -> [Char] -> [Char]
{-# NOINLINE unpackAppendCString# #-}
     -- See the NOINLINE note on unpackCString#
unpackAppendCString# :: Addr# -> [Char] -> [Char]
unpackAppendCString# Addr#
addr [Char]
rest
    | Int# -> Bool
isTrue# (Char#
ch Char# -> Char# -> Int#
`eqChar#` Char#
'\0'#) = [Char]
rest
    | Bool
True                         = Char# -> Char
C# Char#
ch Char -> [Char] -> [Char]
forall a. a -> [a] -> [a]
: Addr# -> [Char] -> [Char]
unpackAppendCString# (Addr#
addr Addr# -> Int# -> Addr#
`plusAddr#` Int#
1#) [Char]
rest
      where
        -- See Note [unpackCString# iterating over addr]
        !ch :: Char#
ch = Addr# -> Int# -> Char#
indexCharOffAddr# Addr#
addr Int#
0#

unpackFoldrCString# :: Addr# -> (Char  -> a -> a) -> a -> a

-- Usually the unpack-list rule turns unpackFoldrCString# into unpackCString#

-- It also has a BuiltInRule in GHC.Core.Opt.ConstantFold:
--      unpackFoldrCString# "foo" c (unpackFoldrCString# "baz" c n)
--        =  unpackFoldrCString# "foobaz" c n

{-# NOINLINE unpackFoldrCString# #-}
-- At one stage I had NOINLINE [0] on the grounds that, unlike
-- unpackCString#, there *is* some point in inlining
-- unpackFoldrCString#, because we get better code for the
-- higher-order function call.  BUT there may be a lot of
-- literal strings, and making a separate 'unpack' loop for
-- each is highly gratuitous.  See nofib/real/anna/PrettyPrint.

unpackFoldrCString# :: Addr# -> (Char -> a -> a) -> a -> a
unpackFoldrCString# Addr#
addr Char -> a -> a
f a
z
  | Int# -> Bool
isTrue# (Char#
ch Char# -> Char# -> Int#
`eqChar#` Char#
'\0'#) = a
z
  | Bool
True                         = Char# -> Char
C# Char#
ch Char -> a -> a
`f` Addr# -> (Char -> a -> a) -> a -> a
forall a. Addr# -> (Char -> a -> a) -> a -> a
unpackFoldrCString# (Addr#
addr Addr# -> Int# -> Addr#
`plusAddr#` Int#
1#) Char -> a -> a
f a
z
  where
    -- See Note [unpackCString# iterating over addr]
    !ch :: Char#
ch = Addr# -> Int# -> Char#
indexCharOffAddr# Addr#
addr Int#
0#

-- There's really no point in inlining this for the same reasons as
-- unpackCString. See Note [Inlining unpackCString#] above for details.
unpackCStringUtf8# :: Addr# -> [Char]
{-# NOINLINE CONLIKE unpackCStringUtf8# #-}
unpackCStringUtf8# :: Addr# -> [Char]
unpackCStringUtf8# Addr#
addr
    | Int# -> Bool
isTrue# (Char#
ch Char# -> Char# -> Int#
`eqChar#` Char#
'\0'#  ) = []
    | Int# -> Bool
isTrue# (Char#
ch Char# -> Char# -> Int#
`leChar#` Char#
'\x7F'#) = Char# -> Char
C# Char#
ch Char -> [Char] -> [Char]
forall a. a -> [a] -> [a]
: Addr# -> [Char]
unpackCStringUtf8# (Addr#
addr Addr# -> Int# -> Addr#
`plusAddr#` Int#
1#)
    | Int# -> Bool
isTrue# (Char#
ch Char# -> Char# -> Int#
`leChar#` Char#
'\xDF'#) =
        let !c :: Char
c = Char# -> Char
C# (Int# -> Char#
chr# (((Char# -> Int#
ord# Char#
ch                                  Int# -> Int# -> Int#
-# Int#
0xC0#) Int# -> Int# -> Int#
`uncheckedIShiftL#`  Int#
6#) Int# -> Int# -> Int#
+#
                            (Char# -> Int#
ord# (Addr# -> Int# -> Char#
indexCharOffAddr# (Addr#
addr Addr# -> Int# -> Addr#
`plusAddr#` Int#
1#) Int#
0#) Int# -> Int# -> Int#
-# Int#
0x80#)))
        in Char
c Char -> [Char] -> [Char]
forall a. a -> [a] -> [a]
: Addr# -> [Char]
unpackCStringUtf8# (Addr#
addr Addr# -> Int# -> Addr#
`plusAddr#` Int#
2#)
    | Int# -> Bool
isTrue# (Char#
ch Char# -> Char# -> Int#
`leChar#` Char#
'\xEF'#) =
        let !c :: Char
c = Char# -> Char
C# (Int# -> Char#
chr# (((Char# -> Int#
ord# Char#
ch                                             Int# -> Int# -> Int#
-# Int#
0xE0#) Int# -> Int# -> Int#
`uncheckedIShiftL#` Int#
12#) Int# -> Int# -> Int#
+#
                           ((Char# -> Int#
ord# (Addr# -> Int# -> Char#
indexCharOffAddr# (Addr#
addr Addr# -> Int# -> Addr#
`plusAddr#` Int#
1#) Int#
0#) Int# -> Int# -> Int#
-# Int#
0x80#) Int# -> Int# -> Int#
`uncheckedIShiftL#`  Int#
6#) Int# -> Int# -> Int#
+#
                            (Char# -> Int#
ord# (Addr# -> Int# -> Char#
indexCharOffAddr# (Addr#
addr Addr# -> Int# -> Addr#
`plusAddr#` Int#
2#) Int#
0#) Int# -> Int# -> Int#
-# Int#
0x80#)))
        in Char
c Char -> [Char] -> [Char]
forall a. a -> [a] -> [a]
: Addr# -> [Char]
unpackCStringUtf8# (Addr#
addr Addr# -> Int# -> Addr#
`plusAddr#` Int#
3#)
    | Bool
True                           =
        let !c :: Char
c = Char# -> Char
C# (Int# -> Char#
chr# (((Char# -> Int#
ord# Char#
ch                                  Int# -> Int# -> Int#
-# Int#
0xF0#) Int# -> Int# -> Int#
`uncheckedIShiftL#` Int#
18#) Int# -> Int# -> Int#
+#
                           ((Char# -> Int#
ord# (Addr# -> Int# -> Char#
indexCharOffAddr# (Addr#
addr Addr# -> Int# -> Addr#
`plusAddr#` Int#
1#) Int#
0#) Int# -> Int# -> Int#
-# Int#
0x80#) Int# -> Int# -> Int#
`uncheckedIShiftL#` Int#
12#) Int# -> Int# -> Int#
+#
                           ((Char# -> Int#
ord# (Addr# -> Int# -> Char#
indexCharOffAddr# (Addr#
addr Addr# -> Int# -> Addr#
`plusAddr#` Int#
2#) Int#
0#) Int# -> Int# -> Int#
-# Int#
0x80#) Int# -> Int# -> Int#
`uncheckedIShiftL#`  Int#
6#) Int# -> Int# -> Int#
+#
                            (Char# -> Int#
ord# (Addr# -> Int# -> Char#
indexCharOffAddr# (Addr#
addr Addr# -> Int# -> Addr#
`plusAddr#` Int#
3#) Int#
0#) Int# -> Int# -> Int#
-# Int#
0x80#)))
        in Char
c Char -> [Char] -> [Char]
forall a. a -> [a] -> [a]
: Addr# -> [Char]
unpackCStringUtf8# (Addr#
addr Addr# -> Int# -> Addr#
`plusAddr#` Int#
4#)
      where
        -- See Note [unpackCString# iterating over addr]
        !ch :: Char#
ch = Addr# -> Int# -> Char#
indexCharOffAddr# Addr#
addr Int#
0#

-- There's really no point in inlining this for the same reasons as
-- unpackCString. See Note [Inlining unpackCString#] above for details.
unpackNBytes# :: Addr# -> Int# -> [Char]
{-# NOINLINE unpackNBytes# #-}
unpackNBytes# :: Addr# -> Int# -> [Char]
unpackNBytes# Addr#
_addr Int#
0#   = []
unpackNBytes#  Addr#
addr Int#
len# = [Char] -> Int# -> [Char]
unpack [] (Int#
len# Int# -> Int# -> Int#
-# Int#
1#)
    where
     unpack :: [Char] -> Int# -> [Char]
     unpack :: [Char] -> Int# -> [Char]
unpack [Char]
acc Int#
i#
      | Int# -> Bool
isTrue# (Int#
i# Int# -> Int# -> Int#
<# Int#
0#)  = [Char]
acc
      | Bool
True                =
         case Addr# -> Int# -> Char#
indexCharOffAddr# Addr#
addr Int#
i# of
            Char#
ch -> [Char] -> Int# -> [Char]
unpack (Char# -> Char
C# Char#
ch Char -> [Char] -> [Char]
forall a. a -> [a] -> [a]
: [Char]
acc) (Int#
i# Int# -> Int# -> Int#
-# Int#
1#)

-- The return type is not correct here. We really want CSize,
-- but that type is defined in base. However, CSize should always
-- match the size of a machine word (I hope), so this is probably
-- alright on all platforms that GHC supports.
foreign import ccall unsafe "strlen" c_strlen :: Addr# -> Int#

-- | Compute the length of a NUL-terminated string. This address
-- must refer to immutable memory. GHC includes a built-in rule for
-- constant folding when the argument is a statically-known literal.
-- That is, a core-to-core pass reduces the expression
-- @cstringLength# "hello"#@ to the constant @5#@.
cstringLength# :: Addr# -> Int#
{-# INLINE[0] cstringLength# #-}
cstringLength# :: Addr# -> Int#
cstringLength# = Addr# -> Int#
c_strlen