-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/


-- | Simple lexer based on applicative regular expressions
--   
--   Simple lexer based on applicative regular expressions
@package lexer-applicative
@version 2.1.0.1


-- | For some background, see
--   <a>https://ro-che.info/articles/2015-01-02-lexical-analysis</a>
module Language.Lexer.Applicative

-- | A <a>Lexer</a> specification consists of two recognizers: one for
--   meaningful tokens and one for whitespace and comments.
--   
--   Although you can construct <a>Lexer</a>s directly, it is more
--   convenient to build them with <a>token</a>, <a>whitespace</a>, and the
--   <a>Monoid</a> instance like this:
--   
--   <pre>
--   myLexer :: <a>Lexer</a> MyToken
--   myLexer = <a>mconcat</a>
--     [ <a>token</a>      (<a>longest</a> myToken)
--     , <a>whitespace</a> (<a>longest</a> myWhiteSpace)
--     , <a>whitespace</a> (<a>longestShortest</a> myCommentPrefix myCommentSuffix)
--     ]
--   </pre>
data Lexer tok
Lexer :: Recognizer tok -> Recognizer () -> Lexer tok
[lexerTokenRE] :: Lexer tok -> Recognizer tok
[lexerWhitespaceRE] :: Lexer tok -> Recognizer ()

-- | Build a lexer with the given token recognizer and no (i.e.
--   <a>mempty</a>) whitespace recognizer.
--   
--   <a>token</a> is a monoid homomorphism:
--   
--   <pre>
--   <a>token</a> a <a>&lt;&gt;</a> <a>token</a> b = <a>token</a> (a <a>&lt;&gt;</a> b)
--   </pre>
token :: Recognizer tok -> Lexer tok

-- | Build a lexer with the given whitespace recognizer and no (i.e.
--   <a>mempty</a>) token recognizer.
--   
--   <a>whitespace</a> is a monoid homomorphism:
--   
--   <pre>
--   <a>whitespace</a> a <a>&lt;&gt;</a> <a>whitespace</a> b = <a>whitespace</a> (a <a>&lt;&gt;</a> b)
--   </pre>
whitespace :: Recognizer a -> Lexer tok

-- | A token recognizer
--   
--   <a>Recognizer</a> values are constructed by functions like
--   <a>longest</a> and <a>longestShortest</a>, combined with
--   <a>mappend</a>, and used by <a>token</a> and <a>whitespace</a>.
--   
--   When a recognizer returns without consuming any characters, a lexical
--   error is signaled.
data Recognizer tok

-- | When scanning a next token, the regular expression will compete with
--   the other <a>Recognizer</a>s of its <a>Lexer</a>. If it wins, its
--   result will become the next token.
--   
--   <a>longest</a> has the following properties:
--   
--   <ul>
--   <li><pre><a>longest</a> (r1 <a>&lt;|&gt;</a> r2) = <a>longest</a> r1
--   <a>&lt;&gt;</a> <a>longest</a> r2</pre></li>
--   <li><pre><a>longest</a> r = <a>longestShortest</a> r
--   <a>pure</a></pre></li>
--   </ul>
longest :: RE Char tok -> Recognizer tok

-- | This is a more sophisticated recognizer than <a>longest</a>.
--   
--   It recognizes a token consisting of a prefix and a suffix, where
--   prefix is chosen longest, and suffix is chosen shortest.
--   
--   An example would be a C block comment
--   
--   <pre>
--   /* comment text */
--   </pre>
--   
--   The naive
--   
--   <pre>
--   <a>longest</a> (<a>string</a> "/*" <a>*&gt;</a> <a>many</a> <a>anySym</a> <a>*&gt;</a> <a>string</a> "*/")
--   </pre>
--   
--   doesn't work because it consumes too much: in
--   
--   <pre>
--   /* xxx */ yyy /* zzz */
--   </pre>
--   
--   it will treat the whole line as a comment.
--   
--   This is where <a>longestShortest</a> comes in handy:
--   
--   <pre>
--   <a>longestShortest</a>
--      (<a>string</a> "/*")
--      (\_ -&gt; <a>many</a> <a>anySym</a> <a>*&gt;</a> <a>string</a> "*/")
--   </pre>
--   
--   Operationally, the prefix regex first competes with other
--   <a>Recognizer</a>s for the longest match. If it wins, then the
--   shortest match for the suffix regex is found, and the two results are
--   combined with the given function to produce a token.
--   
--   The two regular expressions combined must consume some input, or else
--   <a>LexicalError</a> is thrown. However, any one of them may return
--   without consuming input.
--   
--   * * *
--   
--   Once the prefix regex wins, the choice is committed; the suffix regex
--   must match or else a <a>LexicalError</a> is thrown. Therefore,
--   
--   <pre>
--   <a>longestShortest</a> pref suff1
--            <a>&lt;&gt;</a>
--   <a>longestShortest</a> pref suff2
--            =
--   <a>longestShortest</a> pref suff1
--   </pre>
--   
--   and is not the same as
--   
--   <pre>
--   <a>longestShortest</a> pref (suff1 <a>&lt;|&gt;</a> suff2)
--   </pre>
--   
--   The following holds, however:
--   
--   <pre>
--   <a>longestShortest</a> pref1 suff
--            <a>&lt;&gt;</a>
--   <a>longestShortest</a> pref2 suff
--            =
--   <a>longestShortest</a> (pref1 <a>&lt;|&gt;</a> pref2) suff
--   </pre>
longestShortest :: RE Char pref -> (pref -> RE Char tok) -> Recognizer tok

-- | Run a lexer on a string and produce a lazy stream of tokens
runLexer :: forall tok. Lexer tok -> String -> String -> TokenStream (L tok)

-- | A stream of tokens
data TokenStream tok
TsToken :: tok -> (TokenStream tok) -> TokenStream tok
TsEof :: TokenStream tok
TsError :: LexicalError -> TokenStream tok

-- | Convert a <a>TokenStream</a> to a list of tokens. Turn <a>TsError</a>
--   into a runtime <a>LexicalError</a> exception.
streamToList :: TokenStream tok -> [tok]

-- | Convert a <a>TokenStream</a> into either a token list or a
--   <a>LexicalError</a>. This function may be occasionally useful, but in
--   general its use is discouraged because it needs to force the whole
--   stream before returning a result.
streamToEitherList :: TokenStream tok -> Either LexicalError [tok]

-- | The lexical error exception
data LexicalError
LexicalError :: !Pos -> LexicalError
instance GHC.Show.Show tok => GHC.Show.Show (Language.Lexer.Applicative.TokenStream tok)
instance GHC.Base.Functor Language.Lexer.Applicative.TokenStream
instance GHC.Classes.Eq tok => GHC.Classes.Eq (Language.Lexer.Applicative.TokenStream tok)
instance GHC.Classes.Eq Language.Lexer.Applicative.LexicalError
instance GHC.Base.Functor Language.Lexer.Applicative.Lexer
instance GHC.Base.Functor Language.Lexer.Applicative.Recognizer
instance GHC.Show.Show Language.Lexer.Applicative.LexicalError
instance GHC.Exception.Exception Language.Lexer.Applicative.LexicalError
instance GHC.Base.Monoid (Language.Lexer.Applicative.Lexer tok)
instance GHC.Base.Monoid (Language.Lexer.Applicative.Recognizer tok)
