Implement scanning using megaparsec
This is for the fourth chapter of Crafting Interpreters. https://craftinginterpreters.com/scanning.html This is only a first pass. Still need to test complex expressions. Basic scanning/parsing works. See test.
This commit is contained in:
parent
f3bd555903
commit
997a0d11f8
7 changed files with 319 additions and 20 deletions
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
lox-input.lox
|
12
app/Main.hs
12
app/Main.hs
|
@ -1,6 +1,9 @@
|
|||
module Main where
|
||||
|
||||
import MyLib (handleFileInput)
|
||||
import Data.Text.IO qualified as TextIO
|
||||
import GHC.IO.Encoding (setLocaleEncoding)
|
||||
import GHC.IO.Encoding.UTF8 (utf8)
|
||||
import MyLib (runLox)
|
||||
import Options.Applicative
|
||||
( Alternative ((<|>)),
|
||||
Parser,
|
||||
|
@ -19,6 +22,7 @@ import Options.Applicative
|
|||
strOption,
|
||||
(<**>),
|
||||
)
|
||||
import Options.Applicative.Builder (value)
|
||||
|
||||
data CmdLineInput
|
||||
= FileInput FilePath
|
||||
|
@ -31,6 +35,7 @@ fileInput =
|
|||
( long "file"
|
||||
<> short 'f'
|
||||
<> metavar "FILENAME"
|
||||
<> value "lox-input.lox"
|
||||
<> help "Input file with Lox code"
|
||||
)
|
||||
|
||||
|
@ -47,11 +52,14 @@ cmdLineInput = fileInput <|> stdInput
|
|||
|
||||
handleCmdLineInput :: CmdLineInput -> IO ()
|
||||
handleCmdLineInput = \case
|
||||
FileInput file -> handleFileInput file
|
||||
FileInput file -> do
|
||||
source <- TextIO.readFile file
|
||||
runLox source >>= print
|
||||
StdInput -> print "We do not handle stdin yet"
|
||||
|
||||
main :: IO ()
|
||||
main = do
|
||||
setLocaleEncoding utf8
|
||||
handleCmdLineInput =<< execParser opts
|
||||
where
|
||||
opts :: ParserInfo CmdLineInput
|
||||
|
|
|
@ -56,10 +56,12 @@ common warnings
|
|||
|
||||
library
|
||||
-- Import common warning flags.
|
||||
import: warnings
|
||||
import: warnings
|
||||
|
||||
-- Modules exported by the library.
|
||||
exposed-modules: MyLib
|
||||
exposed-modules:
|
||||
LoxTypes
|
||||
MyLib
|
||||
|
||||
-- Modules included in this library but not exported.
|
||||
-- other-modules:
|
||||
|
@ -67,14 +69,21 @@ library
|
|||
-- LANGUAGE extensions used by modules in this package.
|
||||
-- other-extensions:
|
||||
|
||||
default-extensions: OverloadedStrings
|
||||
|
||||
-- Other library packages from which modules are imported.
|
||||
build-depends: base ^>=4.17.1.0
|
||||
build-depends:
|
||||
, base ^>=4.17.1.0
|
||||
, megaparsec
|
||||
, mtl
|
||||
, parser-combinators
|
||||
, text
|
||||
|
||||
-- Directories containing source files.
|
||||
hs-source-dirs: src
|
||||
hs-source-dirs: src
|
||||
|
||||
-- Base language which the package is written in.
|
||||
default-language: GHC2021
|
||||
default-language: GHC2021
|
||||
|
||||
executable crafting-interpreters
|
||||
-- Import common warning flags.
|
||||
|
@ -96,6 +105,7 @@ executable crafting-interpreters
|
|||
, base ^>=4.17.1.0
|
||||
, crafting-interpreters
|
||||
, optparse-applicative
|
||||
, text
|
||||
|
||||
-- Directories containing source files.
|
||||
hs-source-dirs: app
|
||||
|
@ -105,27 +115,37 @@ executable crafting-interpreters
|
|||
|
||||
test-suite crafting-interpreters-test
|
||||
-- Import common warning flags.
|
||||
import: warnings
|
||||
import: warnings
|
||||
|
||||
-- Base language which the package is written in.
|
||||
default-language: GHC2021
|
||||
default-language: GHC2021
|
||||
|
||||
-- Modules included in this executable, other than Main.
|
||||
-- other-modules:
|
||||
other-modules: ScannerTest
|
||||
default-extensions: OverloadedStrings
|
||||
|
||||
-- LANGUAGE extensions used by modules in this package.
|
||||
-- other-extensions:
|
||||
|
||||
-- The interface type and version of the test suite.
|
||||
type: exitcode-stdio-1.0
|
||||
type: exitcode-stdio-1.0
|
||||
|
||||
-- Directories containing source files.
|
||||
hs-source-dirs: test
|
||||
hs-source-dirs: test
|
||||
|
||||
-- The entrypoint to the test suite.
|
||||
main-is: Main.hs
|
||||
main-is: Main.hs
|
||||
|
||||
-- Test dependencies.
|
||||
-- Ensure that tasty-discover is available even without installation
|
||||
build-tool-depends: tasty-discover:tasty-discover
|
||||
build-depends:
|
||||
, base ^>=4.17.1.0
|
||||
, crafting-interpreters
|
||||
, hspec
|
||||
, QuickCheck
|
||||
, tasty
|
||||
, tasty-discover
|
||||
, tasty-hspec
|
||||
, tasty-quickcheck
|
||||
, text
|
||||
|
|
114
src/LoxTypes.hs
Normal file
114
src/LoxTypes.hs
Normal file
|
@ -0,0 +1,114 @@
|
|||
module LoxTypes where
|
||||
|
||||
import Prelude hiding (False, True)
|
||||
|
||||
data CharToken
|
||||
= LeftParen
|
||||
| RightParen
|
||||
| LeftBrace
|
||||
| RightBrace
|
||||
| Comma
|
||||
| Dot
|
||||
| Minus
|
||||
| Plus
|
||||
| Semicolon
|
||||
| Slash
|
||||
| Star
|
||||
deriving (Eq)
|
||||
|
||||
instance Show CharToken where
|
||||
show LeftParen = "("
|
||||
show RightParen = ")"
|
||||
show LeftBrace = "{"
|
||||
show RightBrace = "}"
|
||||
show Comma = ","
|
||||
show Dot = "."
|
||||
show Minus = "-"
|
||||
show Plus = "+"
|
||||
show Semicolon = ";"
|
||||
show Slash = "/"
|
||||
show Star = "*"
|
||||
|
||||
data MultiCharToken
|
||||
= Bang
|
||||
| BangEqual
|
||||
| Equal
|
||||
| EqualEqual
|
||||
| Greater
|
||||
| GreaterEqual
|
||||
| Less
|
||||
| LessEqual
|
||||
deriving (Eq)
|
||||
|
||||
instance Show MultiCharToken where
|
||||
show Bang = "!"
|
||||
show BangEqual = "!="
|
||||
show Equal = "="
|
||||
show EqualEqual = "=="
|
||||
show Greater = ">"
|
||||
show GreaterEqual = ">="
|
||||
show Less = "<"
|
||||
show LessEqual = "<="
|
||||
|
||||
data Keywords
|
||||
= And
|
||||
| Class
|
||||
| Else
|
||||
| False
|
||||
| Func
|
||||
| For
|
||||
| If
|
||||
| Nil
|
||||
| Or
|
||||
| Print
|
||||
| Return
|
||||
| Super
|
||||
| This
|
||||
| True
|
||||
| Var
|
||||
| While
|
||||
deriving (Eq)
|
||||
|
||||
instance Show Keywords where
|
||||
show And = "and"
|
||||
show Class = "class"
|
||||
show Else = "else"
|
||||
show False = "false"
|
||||
show Func = "fun"
|
||||
show For = "for"
|
||||
show If = "if"
|
||||
show Nil = "nil"
|
||||
show Or = "or"
|
||||
show Print = "print"
|
||||
show Return = "return"
|
||||
show Super = "super"
|
||||
show This = "this"
|
||||
show True = "true"
|
||||
show Var = "var"
|
||||
show While = "while"
|
||||
|
||||
data Literal
|
||||
= Identifier Keywords
|
||||
| Str String
|
||||
| Number Double
|
||||
deriving (Eq, Show)
|
||||
|
||||
data LoxToken
|
||||
= Char CharToken
|
||||
| Op MultiCharToken
|
||||
| Lit Literal
|
||||
| Eof
|
||||
deriving (Eq, Show)
|
||||
|
||||
-- This is how Class Token is defined in Crafting Interpreters but we will
|
||||
-- make use of the State in Megaparsec to report errors.
|
||||
-- data LoxToken = Token
|
||||
-- { tokenType :: TokenType,
|
||||
-- lexeme :: Text,
|
||||
-- literal :: Literal,
|
||||
-- line :: Int
|
||||
-- }
|
||||
-- deriving (Eq)
|
||||
--
|
||||
-- instance Show LoxToken where
|
||||
-- show (Token tokenType lexeme literal _) = show tokenType ++ " " ++ show lexeme ++ " " ++ show literal
|
134
src/MyLib.hs
134
src/MyLib.hs
|
@ -1,4 +1,132 @@
|
|||
module MyLib (handleFileInput) where
|
||||
module MyLib (runLox) where
|
||||
|
||||
handleFileInput :: FilePath -> IO ()
|
||||
handleFileInput _ = putStrLn "handleFileInput"
|
||||
import Data.Text (Text, pack, unpack)
|
||||
import Data.Void (Void)
|
||||
import GHC.Float (int2Double)
|
||||
import LoxTypes
|
||||
import Text.Megaparsec hiding (State)
|
||||
import Text.Megaparsec.Char
|
||||
import Text.Megaparsec.Char.Lexer qualified as L
|
||||
|
||||
type LoxParser = Parsec Void Text
|
||||
|
||||
runLox :: (Monad m) => Text -> m [Either (ParseErrorBundle Text Void) [LoxToken]]
|
||||
runLox loxSource = return $ runLoxParser <$> (pack <$> lines (unpack loxSource))
|
||||
|
||||
skipSpaceOrComment :: LoxParser ()
|
||||
skipSpaceOrComment =
|
||||
L.space
|
||||
space1
|
||||
(L.skipLineComment "//")
|
||||
(L.skipBlockComment "/*" "*/")
|
||||
|
||||
scanCharToken :: LoxParser LoxToken
|
||||
scanCharToken = do
|
||||
try leftParen
|
||||
<|> try rightParen
|
||||
<|> try leftBrace
|
||||
<|> try rightBrace
|
||||
<|> try rightBrace
|
||||
<|> try comma
|
||||
<|> try dot
|
||||
<|> try minus
|
||||
<|> try plus
|
||||
<|> try semiColon
|
||||
<|> try slash
|
||||
<|> try star
|
||||
where
|
||||
leftParen = char '(' >> return (Char LeftParen)
|
||||
rightParen = char ')' >> return (Char RightParen)
|
||||
leftBrace = char '{' >> return (Char LeftBrace)
|
||||
rightBrace = char '}' >> return (Char RightBrace)
|
||||
comma = char ',' >> return (Char Comma)
|
||||
dot = char '.' >> return (Char Dot)
|
||||
minus = char '-' >> return (Char Minus)
|
||||
plus = char '+' >> return (Char Plus)
|
||||
semiColon = char ';' >> return (Char Semicolon)
|
||||
star = char '*' >> return (Char Star)
|
||||
slash = char '/' >> return (Char Slash)
|
||||
|
||||
scanOperators :: LoxParser LoxToken
|
||||
scanOperators = do
|
||||
try bangEqual
|
||||
<|> try bang
|
||||
<|> try equalEqual
|
||||
<|> try equal
|
||||
<|> try greaterEqual
|
||||
<|> try greater
|
||||
<|> try lessEqual
|
||||
<|> try less
|
||||
where
|
||||
bangEqual = char '!' >> char '=' >> return (Op BangEqual)
|
||||
bang = char '!' >> return (Op Bang)
|
||||
equalEqual = char '=' >> char '=' >> return (Op EqualEqual)
|
||||
equal = char '=' >> return (Op Equal)
|
||||
greaterEqual = char '>' >> char '=' >> return (Op GreaterEqual)
|
||||
greater = char '>' >> return (Op Greater)
|
||||
lessEqual = char '<' >> char '=' >> return (Op LessEqual)
|
||||
less = char '<' >> return (Op Less)
|
||||
|
||||
scanStringLiteral :: LoxParser LoxToken
|
||||
scanStringLiteral = do
|
||||
s <- char '"' >> manyTill L.charLiteral (char '"')
|
||||
return (Lit $ Str s)
|
||||
|
||||
scanNumberLiteral :: LoxParser LoxToken
|
||||
scanNumberLiteral = do
|
||||
try tryFloat <|> try tryDecimal
|
||||
where
|
||||
tryFloat = Lit . Number <$> L.float
|
||||
tryDecimal = Lit . Number . int2Double <$> L.decimal
|
||||
|
||||
scanIdentifiers :: LoxParser LoxToken
|
||||
scanIdentifiers =
|
||||
iand
|
||||
<|> iclass
|
||||
<|> ielse
|
||||
<|> ifalse
|
||||
<|> ifunc
|
||||
<|> ifor
|
||||
<|> iif
|
||||
<|> inil
|
||||
<|> ior
|
||||
<|> iprint
|
||||
<|> ireturn
|
||||
<|> isuper
|
||||
<|> ithis
|
||||
<|> itrue
|
||||
<|> ivar
|
||||
<|> iwhile
|
||||
where
|
||||
iand = try (string "and") >> return (Lit $ Identifier And)
|
||||
iclass = try (string "class") >> return (Lit $ Identifier Class)
|
||||
ielse = try (string "else") >> return (Lit $ Identifier Else)
|
||||
ifalse = try (string "false") >> return (Lit $ Identifier LoxTypes.False)
|
||||
ifunc = try (string "fun") >> return (Lit $ Identifier Func)
|
||||
ifor = try (string "for") >> return (Lit $ Identifier For)
|
||||
iif = try (string "if") >> return (Lit $ Identifier If)
|
||||
inil = try (string "nil") >> return (Lit $ Identifier Nil)
|
||||
ior = try (string "or") >> return (Lit $ Identifier Or)
|
||||
iprint = try (string "print") >> return (Lit $ Identifier Print)
|
||||
ireturn = try (string "return") >> return (Lit $ Identifier Return)
|
||||
isuper = try (string "super") >> return (Lit $ Identifier Super)
|
||||
ithis = try (string "this") >> return (Lit $ Identifier This)
|
||||
itrue = try (string "true") >> return (Lit $ Identifier LoxTypes.True)
|
||||
ivar = try (string "var") >> return (Lit $ Identifier Var)
|
||||
iwhile = try (string "while") >> return (Lit $ Identifier While)
|
||||
|
||||
scanLiteral :: LoxParser LoxToken
|
||||
scanLiteral = try scanStringLiteral <|> try scanNumberLiteral <|> try scanIdentifiers
|
||||
|
||||
scanTokens :: LoxParser [LoxToken]
|
||||
scanTokens = manyTill scanLine eof
|
||||
where
|
||||
scanLine :: LoxParser LoxToken
|
||||
scanLine = do
|
||||
try skipSpaceOrComment
|
||||
loxTokens <- try scanLiteral <|> try scanOperators <|> try scanCharToken
|
||||
try skipSpaceOrComment
|
||||
return loxTokens
|
||||
|
||||
runLoxParser :: Text -> Either (ParseErrorBundle Text Void) [LoxToken]
|
||||
runLoxParser = runParser scanTokens ""
|
||||
|
|
|
@ -1,4 +1 @@
|
|||
module Main (main) where
|
||||
|
||||
main :: IO ()
|
||||
main = putStrLn "Test suite not yet implemented."
|
||||
{-# OPTIONS_GHC -F -pgmF tasty-discover -optF --modules=*Test.hs #-}
|
||||
|
|
31
test/ScannerTest.hs
Normal file
31
test/ScannerTest.hs
Normal file
|
@ -0,0 +1,31 @@
|
|||
module ScannerTest where
|
||||
|
||||
import Data.Either (rights)
|
||||
import Data.Text (pack)
|
||||
import LoxTypes
|
||||
import MyLib
|
||||
import Test.Hspec
|
||||
|
||||
-- Test are taken from https://github.com/munificent/craftinginterpreters/tree/master/test/scanning
|
||||
|
||||
spec_scanner :: Spec
|
||||
spec_scanner = do
|
||||
describe "Correctly scans and parses keywords" $ do
|
||||
it "all are Literal Identifier Keywords" $ do
|
||||
res <- runLox $ pack "\"and\" \"class\" \"else\" \"false\" \"for\" \"fun\" \"if\" \"nil\" \"or\" \"return\" \"super\" \"this\" \"true\" \"var\" \"while\""
|
||||
rights res `shouldBe` [[Lit (Str "and"),Lit (Str "class"),Lit (Str "else"),Lit (Str "false"),Lit (Str "for"),Lit (Str "fun"),Lit (Str "if"),Lit (Str "nil"),Lit (Str "or"),Lit (Str "return"),Lit (Str "super"),Lit (Str "this"),Lit (Str "true"),Lit (Str "var"),Lit (Str "while")]]
|
||||
|
||||
describe "Correctly scans and parses numbers" $ do
|
||||
it "all are Literal Number's" $ do
|
||||
res <- runLox $ pack "123.45 456.0 445.112345 123456"
|
||||
rights res `shouldBe` [[Lit (Number 123.45),Lit (Number 456.0),Lit (Number 445.112345),Lit (Number 123456.0)]]
|
||||
|
||||
describe "Correctly scans and parses punctuators" $ do
|
||||
it "all are char tokens and operators" $ do
|
||||
res <- runLox $ pack "(){};,+-*!===<=>=!=<>/."
|
||||
rights res `shouldBe` [[Char LeftParen,Char RightParen,Char LeftBrace,Char RightBrace,Char Semicolon,Char Comma,Char Plus,Char Minus,Char Star,Op BangEqual,Op EqualEqual,Op LessEqual,Op GreaterEqual,Op BangEqual,Op Less,Op Greater,Char Slash,Char Dot]]
|
||||
|
||||
describe "Correctly scans and parses strings" $ do
|
||||
it "all are strings" $ do
|
||||
res <- runLox $ pack "\"\" \"string\""
|
||||
rights res `shouldBe` [[Lit (Str ""),Lit (Str "string")]]
|
Loading…
Reference in a new issue