-- (c) The GHC Team
--
-- Functions to evaluate whether or not a string is a valid identifier.
-- There is considerable overlap between the logic here and the logic
-- in GHC.Parser.Lexer, but sadly there seems to be no way to merge them.

module GHC.Utils.Lexeme (
          -- * Lexical characteristics of Haskell names

          -- | Use these functions to figure what kind of name a 'FastString'
          -- represents; these functions do /not/ check that the identifier
          -- is valid.

        isLexCon, isLexVar, isLexId, isLexSym,
        isLexConId, isLexConSym, isLexVarId, isLexVarSym,
        startsVarSym, startsVarId, startsConSym, startsConId,

          -- * Validating identifiers

          -- | These functions (working over plain old 'String's) check
          -- to make sure that the identifier is valid.
        okVarOcc, okConOcc, okTcOcc,
        okVarIdOcc, okVarSymOcc, okConIdOcc, okConSymOcc

        -- Some of the exports above are not used within GHC, but may
        -- be of value to GHC API users.

  ) where

import GHC.Prelude

import GHC.Data.FastString

import Data.Char
import qualified Data.Set as Set

import GHC.Lexeme

{-

************************************************************************
*                                                                      *
    Lexical categories
*                                                                      *
************************************************************************

These functions test strings to see if they fit the lexical categories
defined in the Haskell report.

Note [Classification of generated names]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Some names generated for internal use can show up in debugging output,
e.g.  when using -ddump-simpl. These generated names start with a $
but should still be pretty-printed using prefix notation. We make sure
this is the case in isLexVarSym by only classifying a name as a symbol
if all its characters are symbols, not just its first one.
-}

isLexCon,   isLexVar,    isLexId,    isLexSym    :: FastString -> Bool
isLexConId, isLexConSym, isLexVarId, isLexVarSym :: FastString -> Bool

isLexCon :: FastString -> Bool
isLexCon FastString
cs = FastString -> Bool
isLexConId  FastString
cs Bool -> Bool -> Bool
|| FastString -> Bool
isLexConSym FastString
cs
isLexVar :: FastString -> Bool
isLexVar FastString
cs = FastString -> Bool
isLexVarId  FastString
cs Bool -> Bool -> Bool
|| FastString -> Bool
isLexVarSym FastString
cs

isLexId :: FastString -> Bool
isLexId  FastString
cs = FastString -> Bool
isLexConId  FastString
cs Bool -> Bool -> Bool
|| FastString -> Bool
isLexVarId  FastString
cs
isLexSym :: FastString -> Bool
isLexSym FastString
cs = FastString -> Bool
isLexConSym FastString
cs Bool -> Bool -> Bool
|| FastString -> Bool
isLexVarSym FastString
cs

-------------
isLexConId :: FastString -> Bool
isLexConId FastString
cs                           -- Prefix type or data constructors
  | FastString -> Bool
nullFS FastString
cs          = Bool
False          --      e.g. "Foo", "[]", "(,)"
  | FastString
cs FastString -> FastString -> Bool
forall a. Eq a => a -> a -> Bool
External instance of the constraint type Eq FastString
== ([Char] -> FastString
fsLit [Char]
"[]") = Bool
True
  | Bool
otherwise          = Char -> Bool
startsConId (FastString -> Char
headFS FastString
cs)

isLexVarId :: FastString -> Bool
isLexVarId FastString
cs                           -- Ordinary prefix identifiers
  | FastString -> Bool
nullFS FastString
cs         = Bool
False           --      e.g. "x", "_x"
  | Bool
otherwise         = Char -> Bool
startsVarId (FastString -> Char
headFS FastString
cs)

isLexConSym :: FastString -> Bool
isLexConSym FastString
cs                          -- Infix type or data constructors
  | FastString -> Bool
nullFS FastString
cs          = Bool
False          --      e.g. ":-:", ":", "->"
  | FastString
cs FastString -> FastString -> Bool
forall a. Eq a => a -> a -> Bool
External instance of the constraint type Eq FastString
== ([Char] -> FastString
fsLit [Char]
"->") = Bool
True
  | Bool
otherwise          = Char -> Bool
startsConSym (FastString -> Char
headFS FastString
cs)

isLexVarSym :: FastString -> Bool
isLexVarSym FastString
fs                          -- Infix identifiers e.g. "+"
  | FastString
fs FastString -> FastString -> Bool
forall a. Eq a => a -> a -> Bool
External instance of the constraint type Eq FastString
== ([Char] -> FastString
fsLit [Char]
"~R#") = Bool
True
  | Bool
otherwise
  = case (if FastString -> Bool
nullFS FastString
fs then [] else FastString -> [Char]
unpackFS FastString
fs) of
      [] -> Bool
False
      (Char
c:[Char]
cs) -> Char -> Bool
startsVarSym Char
c Bool -> Bool -> Bool
&& (Char -> Bool) -> [Char] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
External instance of the constraint type Foldable []
all Char -> Bool
isVarSymChar [Char]
cs
        -- See Note [Classification of generated names]

{-

************************************************************************
*                                                                      *
    Detecting valid names for Template Haskell
*                                                                      *
************************************************************************

-}

----------------------
-- External interface
----------------------

-- | Is this an acceptable variable name?
okVarOcc :: String -> Bool
okVarOcc :: [Char] -> Bool
okVarOcc str :: [Char]
str@(Char
c:[Char]
_)
  | Char -> Bool
startsVarId Char
c
  = [Char] -> Bool
okVarIdOcc [Char]
str
  | Char -> Bool
startsVarSym Char
c
  = [Char] -> Bool
okVarSymOcc [Char]
str
okVarOcc [Char]
_ = Bool
False

-- | Is this an acceptable constructor name?
okConOcc :: String -> Bool
okConOcc :: [Char] -> Bool
okConOcc str :: [Char]
str@(Char
c:[Char]
_)
  | Char -> Bool
startsConId Char
c
  = [Char] -> Bool
okConIdOcc [Char]
str
  | Char -> Bool
startsConSym Char
c
  = [Char] -> Bool
okConSymOcc [Char]
str
  | [Char]
str [Char] -> [Char] -> Bool
forall a. Eq a => a -> a -> Bool
External instance of the constraint type forall a. Eq a => Eq [a]
External instance of the constraint type Eq Char
== [Char]
"[]"
  = Bool
True
okConOcc [Char]
_ = Bool
False

-- | Is this an acceptable type name?
okTcOcc :: String -> Bool
okTcOcc :: [Char] -> Bool
okTcOcc [Char]
"[]" = Bool
True
okTcOcc [Char]
"->" = Bool
True
okTcOcc [Char]
"~"  = Bool
True
okTcOcc str :: [Char]
str@(Char
c:[Char]
_)
  | Char -> Bool
startsConId Char
c
  = [Char] -> Bool
okConIdOcc [Char]
str
  | Char -> Bool
startsConSym Char
c
  = [Char] -> Bool
okConSymOcc [Char]
str
  | Char -> Bool
startsVarSym Char
c
  = [Char] -> Bool
okVarSymOcc [Char]
str
okTcOcc [Char]
_ = Bool
False

-- | Is this an acceptable alphanumeric variable name, assuming it starts
-- with an acceptable letter?
okVarIdOcc :: String -> Bool
okVarIdOcc :: [Char] -> Bool
okVarIdOcc [Char]
str = [Char] -> Bool
okIdOcc [Char]
str Bool -> Bool -> Bool
&&
                 -- admit "_" as a valid identifier.  Required to support typed
                 -- holes in Template Haskell.  See #10267
                 ([Char]
str [Char] -> [Char] -> Bool
forall a. Eq a => a -> a -> Bool
External instance of the constraint type forall a. Eq a => Eq [a]
External instance of the constraint type Eq Char
== [Char]
"_" Bool -> Bool -> Bool
|| Bool -> Bool
not ([Char]
str [Char] -> Set [Char] -> Bool
forall a. Ord a => a -> Set a -> Bool
External instance of the constraint type forall a. Ord a => Ord [a]
External instance of the constraint type Ord Char
`Set.member` Set [Char]
reservedIds))

-- | Is this an acceptable symbolic variable name, assuming it starts
-- with an acceptable character?
okVarSymOcc :: String -> Bool
okVarSymOcc :: [Char] -> Bool
okVarSymOcc [Char]
str = (Char -> Bool) -> [Char] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
External instance of the constraint type Foldable []
all Char -> Bool
okSymChar [Char]
str Bool -> Bool -> Bool
&&
                  Bool -> Bool
not ([Char]
str [Char] -> Set [Char] -> Bool
forall a. Ord a => a -> Set a -> Bool
External instance of the constraint type forall a. Ord a => Ord [a]
External instance of the constraint type Ord Char
`Set.member` Set [Char]
reservedOps) Bool -> Bool -> Bool
&&
                  Bool -> Bool
not ([Char] -> Bool
isDashes [Char]
str)

-- | Is this an acceptable alphanumeric constructor name, assuming it
-- starts with an acceptable letter?
okConIdOcc :: String -> Bool
okConIdOcc :: [Char] -> Bool
okConIdOcc [Char]
str = [Char] -> Bool
okIdOcc [Char]
str Bool -> Bool -> Bool
||
                 Bool -> [Char] -> Bool
is_tuple_name1 Bool
True  [Char]
str Bool -> Bool -> Bool
||
                   -- Is it a boxed tuple...
                 Bool -> [Char] -> Bool
is_tuple_name1 Bool
False [Char]
str Bool -> Bool -> Bool
||
                   -- ...or an unboxed tuple (#12407)...
                 [Char] -> Bool
is_sum_name1 [Char]
str
                   -- ...or an unboxed sum (#12514)?
  where
    -- check for tuple name, starting at the beginning
    is_tuple_name1 :: Bool -> [Char] -> Bool
is_tuple_name1 Bool
True  (Char
'(' : [Char]
rest)       = Bool -> [Char] -> Bool
is_tuple_name2 Bool
True  [Char]
rest
    is_tuple_name1 Bool
False (Char
'(' : Char
'#' : [Char]
rest) = Bool -> [Char] -> Bool
is_tuple_name2 Bool
False [Char]
rest
    is_tuple_name1 Bool
_     [Char]
_                  = Bool
False

    -- check for tuple tail
    is_tuple_name2 :: Bool -> [Char] -> Bool
is_tuple_name2 Bool
True  [Char]
")"          = Bool
True
    is_tuple_name2 Bool
False [Char]
"#)"         = Bool
True
    is_tuple_name2 Bool
boxed (Char
',' : [Char]
rest) = Bool -> [Char] -> Bool
is_tuple_name2 Bool
boxed [Char]
rest
    is_tuple_name2 Bool
boxed (Char
ws  : [Char]
rest)
      | Char -> Bool
isSpace Char
ws                    = Bool -> [Char] -> Bool
is_tuple_name2 Bool
boxed [Char]
rest
    is_tuple_name2 Bool
_     [Char]
_            = Bool
False

    -- check for sum name, starting at the beginning
    is_sum_name1 :: [Char] -> Bool
is_sum_name1 (Char
'(' : Char
'#' : [Char]
rest) = Bool -> [Char] -> Bool
is_sum_name2 Bool
False [Char]
rest
    is_sum_name1 [Char]
_                  = Bool
False

    -- check for sum tail, only allowing at most one underscore
    is_sum_name2 :: Bool -> [Char] -> Bool
is_sum_name2 Bool
_          [Char]
"#)"         = Bool
True
    is_sum_name2 Bool
underscore (Char
'|' : [Char]
rest) = Bool -> [Char] -> Bool
is_sum_name2 Bool
underscore [Char]
rest
    is_sum_name2 Bool
False      (Char
'_' : [Char]
rest) = Bool -> [Char] -> Bool
is_sum_name2 Bool
True [Char]
rest
    is_sum_name2 Bool
underscore (Char
ws  : [Char]
rest)
      | Char -> Bool
isSpace Char
ws                       = Bool -> [Char] -> Bool
is_sum_name2 Bool
underscore [Char]
rest
    is_sum_name2 Bool
_          [Char]
_            = Bool
False

-- | Is this an acceptable symbolic constructor name, assuming it
-- starts with an acceptable character?
okConSymOcc :: String -> Bool
okConSymOcc :: [Char] -> Bool
okConSymOcc [Char]
":" = Bool
True
okConSymOcc [Char]
str = (Char -> Bool) -> [Char] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
External instance of the constraint type Foldable []
all Char -> Bool
okSymChar [Char]
str Bool -> Bool -> Bool
&&
                  Bool -> Bool
not ([Char]
str [Char] -> Set [Char] -> Bool
forall a. Ord a => a -> Set a -> Bool
External instance of the constraint type forall a. Ord a => Ord [a]
External instance of the constraint type Ord Char
`Set.member` Set [Char]
reservedOps)

----------------------
-- Internal functions
----------------------

-- | Is this string an acceptable id, possibly with a suffix of hashes,
-- but not worrying about case or clashing with reserved words?
okIdOcc :: String -> Bool
okIdOcc :: [Char] -> Bool
okIdOcc [Char]
str
  = let hashes :: [Char]
hashes = (Char -> Bool) -> [Char] -> [Char]
forall a. (a -> Bool) -> [a] -> [a]
dropWhile Char -> Bool
okIdChar [Char]
str in
    (Char -> Bool) -> [Char] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
External instance of the constraint type Foldable []
all (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
External instance of the constraint type Eq Char
== Char
'#') [Char]
hashes   -- -XMagicHash allows a suffix of hashes
                          -- of course, `all` says "True" to an empty list

-- | Is this character acceptable in an identifier (after the first letter)?
-- See alexGetByte in GHC.Parser.Lexer
okIdChar :: Char -> Bool
okIdChar :: Char -> Bool
okIdChar Char
c = case Char -> GeneralCategory
generalCategory Char
c of
  GeneralCategory
UppercaseLetter -> Bool
True
  GeneralCategory
LowercaseLetter -> Bool
True
  GeneralCategory
TitlecaseLetter -> Bool
True
  GeneralCategory
ModifierLetter  -> Bool
True -- See #10196
  GeneralCategory
OtherLetter     -> Bool
True -- See #1103
  GeneralCategory
NonSpacingMark  -> Bool
True -- See #7650
  GeneralCategory
DecimalNumber   -> Bool
True
  GeneralCategory
OtherNumber     -> Bool
True -- See #4373
  GeneralCategory
_               -> Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
External instance of the constraint type Eq Char
== Char
'\'' Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
External instance of the constraint type Eq Char
== Char
'_'

-- | All reserved identifiers. Taken from section 2.4 of the 2010 Report.
reservedIds :: Set.Set String
reservedIds :: Set [Char]
reservedIds = [[Char]] -> Set [Char]
forall a. Ord a => [a] -> Set a
External instance of the constraint type forall a. Ord a => Ord [a]
External instance of the constraint type Ord Char
Set.fromList [ [Char]
"case", [Char]
"class", [Char]
"data", [Char]
"default", [Char]
"deriving"
                           , [Char]
"do", [Char]
"else", [Char]
"foreign", [Char]
"if", [Char]
"import", [Char]
"in"
                           , [Char]
"infix", [Char]
"infixl", [Char]
"infixr", [Char]
"instance", [Char]
"let"
                           , [Char]
"module", [Char]
"newtype", [Char]
"of", [Char]
"then", [Char]
"type", [Char]
"where"
                           , [Char]
"_" ]

-- | All reserved operators. Taken from section 2.4 of the 2010 Report.
reservedOps :: Set.Set String
reservedOps :: Set [Char]
reservedOps = [[Char]] -> Set [Char]
forall a. Ord a => [a] -> Set a
External instance of the constraint type forall a. Ord a => Ord [a]
External instance of the constraint type Ord Char
Set.fromList [ [Char]
"..", [Char]
":", [Char]
"::", [Char]
"=", [Char]
"\\", [Char]
"|", [Char]
"<-", [Char]
"->"
                           , [Char]
"@", [Char]
"~", [Char]
"=>" ]

-- | Does this string contain only dashes and has at least 2 of them?
isDashes :: String -> Bool
isDashes :: [Char] -> Bool
isDashes (Char
'-' : Char
'-' : [Char]
rest) = (Char -> Bool) -> [Char] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
External instance of the constraint type Foldable []
all (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
External instance of the constraint type Eq Char
== Char
'-') [Char]
rest
isDashes [Char]
_                  = Bool
False