-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/


-- | Simple lexer based on applicative regular expressions
--   
--   Simple lexer based on applicative regular expressions
@package lexer-applicative
@version 2.1.0.2


-- | For some background, see
--   <a>https://ro-che.info/articles/2015-01-02-lexical-analysis</a>
module Language.Lexer.Applicative

-- | A <a>Lexer</a> specification consists of two recognizers: one for
--   meaningful tokens and one for whitespace and comments.
--   
--   Although you can construct <a>Lexer</a>s directly, it is more
--   convenient to build them with <a>token</a>, <a>whitespace</a>, and the
--   <a>Monoid</a> instance like this:
--   
--   <pre>
--   myLexer :: <a>Lexer</a> MyToken
--   myLexer = <a>mconcat</a>
--     [ <a>token</a>      (<a>longest</a> myToken)
--     , <a>whitespace</a> (<a>longest</a> myWhiteSpace)
--     , <a>whitespace</a> (<a>longestShortest</a> myCommentPrefix myCommentSuffix)
--     ]
--   </pre>
data Lexer tok
Lexer :: Recognizer tok -> Recognizer () -> Lexer tok
[lexerTokenRE] :: Lexer tok -> Recognizer tok
[lexerWhitespaceRE] :: Lexer tok -> Recognizer ()

-- | Build a lexer with the given token recognizer and no (i.e.
--   <a>mempty</a>) whitespace recognizer.
--   
--   <a>token</a> is a monoid homomorphism:
--   
--   <pre>
--   <a>token</a> a <a>&lt;&gt;</a> <a>token</a> b = <a>token</a> (a <a>&lt;&gt;</a> b)
--   </pre>
token :: Recognizer tok -> Lexer tok

-- | Build a lexer with the given whitespace recognizer and no (i.e.
--   <a>mempty</a>) token recognizer.
--   
--   <a>whitespace</a> is a monoid homomorphism:
--   
--   <pre>
--   <a>whitespace</a> a <a>&lt;&gt;</a> <a>whitespace</a> b = <a>whitespace</a> (a <a>&lt;&gt;</a> b)
--   </pre>
whitespace :: Recognizer a -> Lexer tok

-- | A token recognizer
--   
--   <a>Recognizer</a> values are constructed by functions like
--   <a>longest</a> and <a>longestShortest</a>, combined with
--   <a>mappend</a>, and used by <a>token</a> and <a>whitespace</a>.
--   
--   When a recognizer returns without consuming any characters, a lexical
--   error is signaled.
data Recognizer tok

-- | When scanning a next token, the regular expression will compete with
--   the other <a>Recognizer</a>s of its <a>Lexer</a>. If it wins, its
--   result will become the next token.
--   
--   <a>longest</a> has the following properties:
--   
--   <ul>
--   <li><pre><a>longest</a> (r1 <a>&lt;|&gt;</a> r2) = <a>longest</a> r1
--   <a>&lt;&gt;</a> <a>longest</a> r2</pre></li>
--   <li><pre><a>longest</a> r = <a>longestShortest</a> r
--   <a>pure</a></pre></li>
--   </ul>
longest :: RE Char tok -> Recognizer tok

-- | This is a more sophisticated recognizer than <a>longest</a>.
--   
--   It recognizes a token consisting of a prefix and a suffix, where
--   prefix is chosen longest, and suffix is chosen shortest.
--   
--   An example would be a C block comment
--   
--   <pre>
--   /* comment text */
--   </pre>
--   
--   The naive
--   
--   <pre>
--   <a>longest</a> (<a>string</a> "/*" <a>*&gt;</a> <a>many</a> <a>anySym</a> <a>*&gt;</a> <a>string</a> "*/")
--   </pre>
--   
--   doesn't work because it consumes too much: in
--   
--   <pre>
--   /* xxx */ yyy /* zzz */
--   </pre>
--   
--   it will treat the whole line as a comment.
--   
--   This is where <a>longestShortest</a> comes in handy:
--   
--   <pre>
--   <a>longestShortest</a>
--      (<a>string</a> "/*")
--      (\_ -&gt; <a>many</a> <a>anySym</a> <a>*&gt;</a> <a>string</a> "*/")
--   </pre>
--   
--   Operationally, the prefix regex first competes with other
--   <a>Recognizer</a>s for the longest match. If it wins, then the
--   shortest match for the suffix regex is found, and the two results are
--   combined with the given function to produce a token.
--   
--   The two regular expressions combined must consume some input, or else
--   <a>LexicalError</a> is thrown. However, any one of them may return
--   without consuming input.
--   
--   * * *
--   
--   Once the prefix regex wins, the choice is committed; the suffix regex
--   must match or else a <a>LexicalError</a> is thrown. Therefore,
--   
--   <pre>
--   <a>longestShortest</a> pref suff1
--            <a>&lt;&gt;</a>
--   <a>longestShortest</a> pref suff2
--            =
--   <a>longestShortest</a> pref suff1
--   </pre>
--   
--   and is not the same as
--   
--   <pre>
--   <a>longestShortest</a> pref (suff1 <a>&lt;|&gt;</a> suff2)
--   </pre>
--   
--   The following holds, however:
--   
--   <pre>
--   <a>longestShortest</a> pref1 suff
--            <a>&lt;&gt;</a>
--   <a>longestShortest</a> pref2 suff
--            =
--   <a>longestShortest</a> (pref1 <a>&lt;|&gt;</a> pref2) suff
--   </pre>
longestShortest :: RE Char pref -> (pref -> RE Char tok) -> Recognizer tok

-- | Run a lexer on a string and produce a lazy stream of tokens
runLexer :: forall tok. Lexer tok -> String -> String -> TokenStream (L tok)

-- | A stream of tokens
data TokenStream tok
TsToken :: tok -> (TokenStream tok) -> TokenStream tok
TsEof :: TokenStream tok
TsError :: LexicalError -> TokenStream tok

-- | Convert a <a>TokenStream</a> to a list of tokens. Turn <a>TsError</a>
--   into a runtime <a>LexicalError</a> exception.
streamToList :: TokenStream tok -> [tok]

-- | Convert a <a>TokenStream</a> into either a token list or a
--   <a>LexicalError</a>. This function may be occasionally useful, but in
--   general its use is discouraged because it needs to force the whole
--   stream before returning a result.
streamToEitherList :: TokenStream tok -> Either LexicalError [tok]

-- | The lexical error exception
data LexicalError
LexicalError :: !Pos -> LexicalError
instance GHC.Show.Show tok => GHC.Show.Show (Language.Lexer.Applicative.TokenStream tok)
instance GHC.Base.Functor Language.Lexer.Applicative.TokenStream
instance GHC.Classes.Eq tok => GHC.Classes.Eq (Language.Lexer.Applicative.TokenStream tok)
instance GHC.Classes.Eq Language.Lexer.Applicative.LexicalError
instance GHC.Base.Functor Language.Lexer.Applicative.Lexer
instance GHC.Base.Functor Language.Lexer.Applicative.Recognizer
instance GHC.Show.Show Language.Lexer.Applicative.LexicalError
instance GHC.Exception.Exception Language.Lexer.Applicative.LexicalError
instance Data.Semigroup.Semigroup (Language.Lexer.Applicative.Lexer tok)
instance GHC.Base.Monoid (Language.Lexer.Applicative.Lexer tok)
instance Data.Semigroup.Semigroup (Language.Lexer.Applicative.Recognizer tok)
instance GHC.Base.Monoid (Language.Lexer.Applicative.Recognizer tok)
