{-# LANGUAGE BangPatterns #-}
-- | This module is separate from the Lexer.x input to Alex
-- to segregate the automatically generated code from the
-- hand written code. The automatically generated code
-- causes lots of warnings which mask the interesting warnings.
module Config.LexerUtils
  (
  -- * Alex wrapper
    AlexInput
  , alexGetByte

  -- * Lexer modes
  , LexerMode(..)
  , startString
  , nestMode
  , endMode

  -- * Token builders
  , token
  , token_
  , section
  , number

  -- * Final actions
  , untermString
  , eofAction
  , errorAction
  ) where

import           Control.Applicative
import           Data.Char (GeneralCategory(..), generalCategory, digitToInt,
                            isAscii, isSpace, ord, isDigit, isHexDigit)
import           Data.Text (Text)
import           Data.Word (Word8)
import           Numeric   (readInt, readHex)
import qualified Data.Text as Text

import           Config.Tokens
import           Config.Number
import qualified Config.NumberParser

------------------------------------------------------------------------
-- Custom Alex wrapper - these functions are used by generated code
------------------------------------------------------------------------

-- | The generated code expects the lexer input type to be named 'AlexInput'
type AlexInput = Located Text

-- | Get the next characteristic byte from the input source.
alexGetByte :: AlexInput -> Maybe (Word8,AlexInput)
alexGetByte :: AlexInput -> Maybe (Word8, AlexInput)
alexGetByte (Located p :: Position
p cs :: Text
cs)
  = do (c :: Char
c,cs' :: Text
cs') <- Text -> Maybe (Char, Text)
Text.uncons Text
cs
       let !b :: Word8
b = Char -> Word8
byteForChar Char
c
           !inp :: AlexInput
inp = Position -> Text -> AlexInput
forall a. Position -> a -> Located a
Located (Position -> Char -> Position
move Position
p Char
c) Text
cs'
       (Word8, AlexInput) -> Maybe (Word8, AlexInput)
forall (m :: * -> *) a. Monad m => a -> m a
return (Word8
b, AlexInput
inp)

------------------------------------------------------------------------

-- | Advance the position according to the kind of character lexed.
move :: Position -> Char -> Position
move :: Position -> Char -> Position
move (Position ix :: Int
ix line :: Int
line column :: Int
column) c :: Char
c =
  case Char
c of
    '\t' -> Int -> Int -> Int -> Position
Position (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1) Int
line (((Int
column Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 7) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` 8) Int -> Int -> Int
forall a. Num a => a -> a -> a
* 8 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1)
    '\n' -> Int -> Int -> Int -> Position
Position (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1) (Int
line Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1) 1
    _    -> Int -> Int -> Int -> Position
Position (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1) Int
line (Int
column Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1)

-- | Action to perform upon end of file. Produce errors if EOF was unexpected.
eofAction :: Position -> LexerMode -> [Located Token]
eofAction :: Position -> LexerMode -> [Located Token]
eofAction eofPosn :: Position
eofPosn st :: LexerMode
st =
  case LexerMode
st of
    InComment       posn :: Position
posn _     -> [Position -> Token -> Located Token
forall a. Position -> a -> Located a
Located Position
posn (Error -> Token
Error Error
UntermComment)]
    InCommentString posn :: Position
posn _     -> [Position -> Token -> Located Token
forall a. Position -> a -> Located a
Located Position
posn (Error -> Token
Error Error
UntermComment)]
    InString        posn :: Position
posn _     -> [Position -> Token -> Located Token
forall a. Position -> a -> Located a
Located Position
posn (Error -> Token
Error Error
UntermString)]
    InNormal                   -> [Position -> Token -> Located Token
forall a. Position -> a -> Located a
Located (Position -> Position
park Position
eofPosn) Token
EOF]

-- | Terminate the line if needed and move the cursor to column 0 to ensure
-- that it terminates any top-level block.
park :: Position -> Position
park :: Position -> Position
park pos :: Position
pos
  | Position -> Int
posColumn Position
pos Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== 1 = Position
pos { posColumn :: Int
posColumn = 0 }
  | Bool
otherwise          = Position
pos { posColumn :: Int
posColumn = 0, posLine :: Int
posLine = Position -> Int
posLine Position
pos Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1 }

-- | Action to perform when lexer gets stuck. Emits an error.
errorAction :: AlexInput -> [Located Token]
errorAction :: AlexInput -> [Located Token]
errorAction inp :: AlexInput
inp = [(Text -> Token) -> AlexInput -> Located Token
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Error -> Token
Error (Error -> Token) -> (Text -> Error) -> Text -> Token
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Error
NoMatch (Char -> Error) -> (Text -> Char) -> Text -> Error
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Char
Text.head) AlexInput
inp]

------------------------------------------------------------------------
-- Lexer Modes
------------------------------------------------------------------------

-- | The lexer can be in any of four modes which determine which rules
-- are active.
data LexerMode
  = InNormal
  | InComment       !Position !LexerMode -- ^ Start of comment and return mode
  | InCommentString !Position !LexerMode -- ^ Start of string and return mode
  | InString        !Position !Text      -- ^ Start of string and input text

-- | Type of actions used by lexer upon matching a rule
type Action =
  Int                          {- ^ match length                       -} ->
  Located Text                 {- ^ current input                      -} ->
  LexerMode                    {- ^ lexer mode                         -} ->
  (LexerMode, [Located Token]) {- ^ updated lexer mode, emitted tokens -}

-- | Helper function for building an 'Action' using the lexeme
token :: (Text -> Token) -> Action
token :: (Text -> Token) -> Action
token f :: Text -> Token
f len :: Int
len match :: AlexInput
match st :: LexerMode
st = (LexerMode
st, [(Text -> Token) -> AlexInput -> Located Token
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Text -> Token
f (Text -> Token) -> (Text -> Text) -> Text -> Token
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Text -> Text
Text.take Int
len) AlexInput
match])

-- | Helper function for building an 'Action' where the lexeme is unused.
token_ :: Token -> Action
token_ :: Token -> Action
token_ = (Text -> Token) -> Action
token ((Text -> Token) -> Action)
-> (Token -> Text -> Token) -> Token -> Action
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Token -> Text -> Token
forall a b. a -> b -> a
const

------------------------------------------------------------------------
-- Alternative modes
------------------------------------------------------------------------

-- | Used to enter one of the nested modes
nestMode :: (Position -> LexerMode -> LexerMode) -> Action
nestMode :: (Position -> LexerMode -> LexerMode) -> Action
nestMode f :: Position -> LexerMode -> LexerMode
f _ match :: AlexInput
match st :: LexerMode
st = (Position -> LexerMode -> LexerMode
f (AlexInput -> Position
forall a. Located a -> Position
locPosition AlexInput
match) LexerMode
st, [])

-- | Enter the string literal lexer
startString :: Action
startString :: Action
startString _ (Located posn :: Position
posn text :: Text
text) _ = (Position -> Text -> LexerMode
InString Position
posn Text
text, [])

-- | Successfully terminate the current mode and emit tokens as needed
endMode :: Action
endMode :: Action
endMode len :: Int
len (Located endPosn :: Position
endPosn _) mode :: LexerMode
mode =
  case LexerMode
mode of
    InNormal                 -> (LexerMode
InNormal, [])
    InCommentString _ st :: LexerMode
st     -> (LexerMode
st, [])
    InComment       _ st :: LexerMode
st     -> (LexerMode
st, [])
    InString startPosn :: Position
startPosn input :: Text
input ->
      let n :: Int
n = Position -> Int
posIndex Position
endPosn Int -> Int -> Int
forall a. Num a => a -> a -> a
- Position -> Int
posIndex Position
startPosn Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
len
          badEscape :: Error
badEscape = Text -> Error
BadEscape (String -> Text
Text.pack "out of range")
      in case ReadS String
forall a. Read a => ReadS a
reads (Text -> String
Text.unpack (Int -> Text -> Text
Text.take Int
n Text
input)) of
           [(s :: String
s,"")] -> (LexerMode
InNormal, [Position -> Token -> Located Token
forall a. Position -> a -> Located a
Located Position
startPosn (Text -> Token
String (String -> Text
Text.pack String
s))])
           _        -> (LexerMode
InNormal, [Position -> Token -> Located Token
forall a. Position -> a -> Located a
Located Position
startPosn (Error -> Token
Error Error
badEscape)])

-- | Action for unterminated string constant
untermString :: Action
untermString :: Action
untermString _ _ = \(InString posn :: Position
posn _) ->
  (LexerMode
InNormal, [Position -> Token -> Located Token
forall a. Position -> a -> Located a
Located Position
posn (Error -> Token
Error Error
UntermString)])

------------------------------------------------------------------------
-- Token builders
------------------------------------------------------------------------

-- | Construct a 'Number' token from a token using a
-- given base. This function expect the token to be
-- legal for the given base. This is checked by Alex.
number ::
  Text {- ^ sign-prefix-digits -} ->
  Token
number :: Text -> Token
number = Number -> Token
Number (Number -> Token) -> (Text -> Number) -> Text -> Token
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Number
Config.NumberParser.number (String -> Number) -> (Text -> String) -> Text -> Number
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
Text.unpack (Text -> String) -> (Text -> Text) -> Text -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
Text.toUpper

-- | Process a section heading token
section :: Text -> Token
section :: Text -> Token
section = Text -> Token
Section (Text -> Token) -> (Text -> Text) -> Text -> Token
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> Text -> Text
Text.dropWhileEnd Char -> Bool
isSpace (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
Text.init

------------------------------------------------------------------------
-- Embed all of unicode, kind of, in a single byte!
------------------------------------------------------------------------

-- | Alex is driven by looking up elements in a 128 element array.
-- This function maps each ASCII character to its ASCII encoding
-- and it maps non-ASCII code-points to a character class (0-6)
byteForChar :: Char -> Word8
byteForChar :: Char -> Word8
byteForChar c :: Char
c
  | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= '\6' = Word8
non_graphic
  | Char -> Bool
isAscii Char
c = Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
ord Char
c)
  | Bool
otherwise = case Char -> GeneralCategory
generalCategory Char
c of
                  LowercaseLetter       -> Word8
lower
                  OtherLetter           -> Word8
lower
                  UppercaseLetter       -> Word8
upper
                  TitlecaseLetter       -> Word8
upper
                  DecimalNumber         -> Word8
digit
                  OtherNumber           -> Word8
digit
                  ConnectorPunctuation  -> Word8
symbol
                  DashPunctuation       -> Word8
symbol
                  OtherPunctuation      -> Word8
symbol
                  MathSymbol            -> Word8
symbol
                  CurrencySymbol        -> Word8
symbol
                  ModifierSymbol        -> Word8
symbol
                  OtherSymbol           -> Word8
symbol
                  Space                 -> Word8
space
                  ModifierLetter        -> Word8
other
                  NonSpacingMark        -> Word8
other
                  SpacingCombiningMark  -> Word8
other
                  EnclosingMark         -> Word8
other
                  LetterNumber          -> Word8
other
                  OpenPunctuation       -> Word8
other
                  ClosePunctuation      -> Word8
other
                  InitialQuote          -> Word8
other
                  FinalQuote            -> Word8
other
                  _                     -> Word8
non_graphic
  where
  non_graphic :: Word8
non_graphic     = 0
  upper :: Word8
upper           = 1
  lower :: Word8
lower           = 2
  digit :: Word8
digit           = 3
  symbol :: Word8
symbol          = 4
  space :: Word8
space           = 5
  other :: Word8
other           = 6