From 997a0d11f8b597fa96497105af2f73847108b34b Mon Sep 17 00:00:00 2001 From: Sanchayan Maity Date: Mon, 19 Jun 2023 20:40:32 +0530 Subject: [PATCH] Implement scanning using megaparsec This is for the fourth chapter of Crafting Interpreters. https://craftinginterpreters.com/scanning.html This is only a first pass. Still need to test complex expressions. Basic scanning/parsing works. See test. --- .gitignore | 1 + app/Main.hs | 12 +++- crafting-interpreters.cabal | 42 ++++++++--- src/LoxTypes.hs | 114 ++++++++++++++++++++++++++++++ src/MyLib.hs | 134 +++++++++++++++++++++++++++++++++++- test/Main.hs | 5 +- test/ScannerTest.hs | 31 +++++++++ 7 files changed, 319 insertions(+), 20 deletions(-) create mode 100644 .gitignore create mode 100644 src/LoxTypes.hs create mode 100644 test/ScannerTest.hs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1ad70e6 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +lox-input.lox diff --git a/app/Main.hs b/app/Main.hs index 14ad46a..6944ad4 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -1,6 +1,9 @@ module Main where -import MyLib (handleFileInput) +import Data.Text.IO qualified as TextIO +import GHC.IO.Encoding (setLocaleEncoding) +import GHC.IO.Encoding.UTF8 (utf8) +import MyLib (runLox) import Options.Applicative ( Alternative ((<|>)), Parser, @@ -19,6 +22,7 @@ import Options.Applicative strOption, (<**>), ) +import Options.Applicative.Builder (value) data CmdLineInput = FileInput FilePath @@ -31,6 +35,7 @@ fileInput = ( long "file" <> short 'f' <> metavar "FILENAME" + <> value "lox-input.lox" <> help "Input file with Lox code" ) @@ -47,11 +52,14 @@ cmdLineInput = fileInput <|> stdInput handleCmdLineInput :: CmdLineInput -> IO () handleCmdLineInput = \case - FileInput file -> handleFileInput file + FileInput file -> do + source <- TextIO.readFile file + runLox source >>= print StdInput -> print "We do not handle stdin yet" main :: IO () main = do + setLocaleEncoding utf8 handleCmdLineInput =<< execParser opts where opts :: ParserInfo CmdLineInput diff --git a/crafting-interpreters.cabal b/crafting-interpreters.cabal index 0ff1aae..e606fdb 100644 --- a/crafting-interpreters.cabal +++ b/crafting-interpreters.cabal @@ -56,10 +56,12 @@ common warnings library -- Import common warning flags. - import: warnings + import: warnings -- Modules exported by the library. - exposed-modules: MyLib + exposed-modules: + LoxTypes + MyLib -- Modules included in this library but not exported. -- other-modules: @@ -67,14 +69,21 @@ library -- LANGUAGE extensions used by modules in this package. -- other-extensions: + default-extensions: OverloadedStrings + -- Other library packages from which modules are imported. - build-depends: base ^>=4.17.1.0 + build-depends: + , base ^>=4.17.1.0 + , megaparsec + , mtl + , parser-combinators + , text -- Directories containing source files. - hs-source-dirs: src + hs-source-dirs: src -- Base language which the package is written in. - default-language: GHC2021 + default-language: GHC2021 executable crafting-interpreters -- Import common warning flags. @@ -96,6 +105,7 @@ executable crafting-interpreters , base ^>=4.17.1.0 , crafting-interpreters , optparse-applicative + , text -- Directories containing source files. hs-source-dirs: app @@ -105,27 +115,37 @@ executable crafting-interpreters test-suite crafting-interpreters-test -- Import common warning flags. - import: warnings + import: warnings -- Base language which the package is written in. - default-language: GHC2021 + default-language: GHC2021 -- Modules included in this executable, other than Main. - -- other-modules: + other-modules: ScannerTest + default-extensions: OverloadedStrings -- LANGUAGE extensions used by modules in this package. -- other-extensions: -- The interface type and version of the test suite. - type: exitcode-stdio-1.0 + type: exitcode-stdio-1.0 -- Directories containing source files. - hs-source-dirs: test + hs-source-dirs: test -- The entrypoint to the test suite. - main-is: Main.hs + main-is: Main.hs -- Test dependencies. + -- Ensure that tasty-discover is available even without installation + build-tool-depends: tasty-discover:tasty-discover build-depends: , base ^>=4.17.1.0 , crafting-interpreters + , hspec + , QuickCheck + , tasty + , tasty-discover + , tasty-hspec + , tasty-quickcheck + , text diff --git a/src/LoxTypes.hs b/src/LoxTypes.hs new file mode 100644 index 0000000..526bc1f --- /dev/null +++ b/src/LoxTypes.hs @@ -0,0 +1,114 @@ +module LoxTypes where + +import Prelude hiding (False, True) + +data CharToken + = LeftParen + | RightParen + | LeftBrace + | RightBrace + | Comma + | Dot + | Minus + | Plus + | Semicolon + | Slash + | Star + deriving (Eq) + +instance Show CharToken where + show LeftParen = "(" + show RightParen = ")" + show LeftBrace = "{" + show RightBrace = "}" + show Comma = "," + show Dot = "." + show Minus = "-" + show Plus = "+" + show Semicolon = ";" + show Slash = "/" + show Star = "*" + +data MultiCharToken + = Bang + | BangEqual + | Equal + | EqualEqual + | Greater + | GreaterEqual + | Less + | LessEqual + deriving (Eq) + +instance Show MultiCharToken where + show Bang = "!" + show BangEqual = "!=" + show Equal = "=" + show EqualEqual = "==" + show Greater = ">" + show GreaterEqual = ">=" + show Less = "<" + show LessEqual = "<=" + +data Keywords + = And + | Class + | Else + | False + | Func + | For + | If + | Nil + | Or + | Print + | Return + | Super + | This + | True + | Var + | While + deriving (Eq) + +instance Show Keywords where + show And = "and" + show Class = "class" + show Else = "else" + show False = "false" + show Func = "fun" + show For = "for" + show If = "if" + show Nil = "nil" + show Or = "or" + show Print = "print" + show Return = "return" + show Super = "super" + show This = "this" + show True = "true" + show Var = "var" + show While = "while" + +data Literal + = Identifier Keywords + | Str String + | Number Double + deriving (Eq, Show) + +data LoxToken + = Char CharToken + | Op MultiCharToken + | Lit Literal + | Eof + deriving (Eq, Show) + +-- This is how Class Token is defined in Crafting Interpreters but we will +-- make use of the State in Megaparsec to report errors. +-- data LoxToken = Token +-- { tokenType :: TokenType, +-- lexeme :: Text, +-- literal :: Literal, +-- line :: Int +-- } +-- deriving (Eq) +-- +-- instance Show LoxToken where +-- show (Token tokenType lexeme literal _) = show tokenType ++ " " ++ show lexeme ++ " " ++ show literal diff --git a/src/MyLib.hs b/src/MyLib.hs index 202c1f3..dbb85da 100644 --- a/src/MyLib.hs +++ b/src/MyLib.hs @@ -1,4 +1,132 @@ -module MyLib (handleFileInput) where +module MyLib (runLox) where -handleFileInput :: FilePath -> IO () -handleFileInput _ = putStrLn "handleFileInput" +import Data.Text (Text, pack, unpack) +import Data.Void (Void) +import GHC.Float (int2Double) +import LoxTypes +import Text.Megaparsec hiding (State) +import Text.Megaparsec.Char +import Text.Megaparsec.Char.Lexer qualified as L + +type LoxParser = Parsec Void Text + +runLox :: (Monad m) => Text -> m [Either (ParseErrorBundle Text Void) [LoxToken]] +runLox loxSource = return $ runLoxParser <$> (pack <$> lines (unpack loxSource)) + +skipSpaceOrComment :: LoxParser () +skipSpaceOrComment = + L.space + space1 + (L.skipLineComment "//") + (L.skipBlockComment "/*" "*/") + +scanCharToken :: LoxParser LoxToken +scanCharToken = do + try leftParen + <|> try rightParen + <|> try leftBrace + <|> try rightBrace + <|> try rightBrace + <|> try comma + <|> try dot + <|> try minus + <|> try plus + <|> try semiColon + <|> try slash + <|> try star + where + leftParen = char '(' >> return (Char LeftParen) + rightParen = char ')' >> return (Char RightParen) + leftBrace = char '{' >> return (Char LeftBrace) + rightBrace = char '}' >> return (Char RightBrace) + comma = char ',' >> return (Char Comma) + dot = char '.' >> return (Char Dot) + minus = char '-' >> return (Char Minus) + plus = char '+' >> return (Char Plus) + semiColon = char ';' >> return (Char Semicolon) + star = char '*' >> return (Char Star) + slash = char '/' >> return (Char Slash) + +scanOperators :: LoxParser LoxToken +scanOperators = do + try bangEqual + <|> try bang + <|> try equalEqual + <|> try equal + <|> try greaterEqual + <|> try greater + <|> try lessEqual + <|> try less + where + bangEqual = char '!' >> char '=' >> return (Op BangEqual) + bang = char '!' >> return (Op Bang) + equalEqual = char '=' >> char '=' >> return (Op EqualEqual) + equal = char '=' >> return (Op Equal) + greaterEqual = char '>' >> char '=' >> return (Op GreaterEqual) + greater = char '>' >> return (Op Greater) + lessEqual = char '<' >> char '=' >> return (Op LessEqual) + less = char '<' >> return (Op Less) + +scanStringLiteral :: LoxParser LoxToken +scanStringLiteral = do + s <- char '"' >> manyTill L.charLiteral (char '"') + return (Lit $ Str s) + +scanNumberLiteral :: LoxParser LoxToken +scanNumberLiteral = do + try tryFloat <|> try tryDecimal + where + tryFloat = Lit . Number <$> L.float + tryDecimal = Lit . Number . int2Double <$> L.decimal + +scanIdentifiers :: LoxParser LoxToken +scanIdentifiers = + iand + <|> iclass + <|> ielse + <|> ifalse + <|> ifunc + <|> ifor + <|> iif + <|> inil + <|> ior + <|> iprint + <|> ireturn + <|> isuper + <|> ithis + <|> itrue + <|> ivar + <|> iwhile + where + iand = try (string "and") >> return (Lit $ Identifier And) + iclass = try (string "class") >> return (Lit $ Identifier Class) + ielse = try (string "else") >> return (Lit $ Identifier Else) + ifalse = try (string "false") >> return (Lit $ Identifier LoxTypes.False) + ifunc = try (string "fun") >> return (Lit $ Identifier Func) + ifor = try (string "for") >> return (Lit $ Identifier For) + iif = try (string "if") >> return (Lit $ Identifier If) + inil = try (string "nil") >> return (Lit $ Identifier Nil) + ior = try (string "or") >> return (Lit $ Identifier Or) + iprint = try (string "print") >> return (Lit $ Identifier Print) + ireturn = try (string "return") >> return (Lit $ Identifier Return) + isuper = try (string "super") >> return (Lit $ Identifier Super) + ithis = try (string "this") >> return (Lit $ Identifier This) + itrue = try (string "true") >> return (Lit $ Identifier LoxTypes.True) + ivar = try (string "var") >> return (Lit $ Identifier Var) + iwhile = try (string "while") >> return (Lit $ Identifier While) + +scanLiteral :: LoxParser LoxToken +scanLiteral = try scanStringLiteral <|> try scanNumberLiteral <|> try scanIdentifiers + +scanTokens :: LoxParser [LoxToken] +scanTokens = manyTill scanLine eof + where + scanLine :: LoxParser LoxToken + scanLine = do + try skipSpaceOrComment + loxTokens <- try scanLiteral <|> try scanOperators <|> try scanCharToken + try skipSpaceOrComment + return loxTokens + +runLoxParser :: Text -> Either (ParseErrorBundle Text Void) [LoxToken] +runLoxParser = runParser scanTokens "" diff --git a/test/Main.hs b/test/Main.hs index 3e2059e..86678d6 100644 --- a/test/Main.hs +++ b/test/Main.hs @@ -1,4 +1 @@ -module Main (main) where - -main :: IO () -main = putStrLn "Test suite not yet implemented." +{-# OPTIONS_GHC -F -pgmF tasty-discover -optF --modules=*Test.hs #-} diff --git a/test/ScannerTest.hs b/test/ScannerTest.hs new file mode 100644 index 0000000..52711d6 --- /dev/null +++ b/test/ScannerTest.hs @@ -0,0 +1,31 @@ +module ScannerTest where + +import Data.Either (rights) +import Data.Text (pack) +import LoxTypes +import MyLib +import Test.Hspec + +-- Test are taken from https://github.com/munificent/craftinginterpreters/tree/master/test/scanning + +spec_scanner :: Spec +spec_scanner = do + describe "Correctly scans and parses keywords" $ do + it "all are Literal Identifier Keywords" $ do + res <- runLox $ pack "\"and\" \"class\" \"else\" \"false\" \"for\" \"fun\" \"if\" \"nil\" \"or\" \"return\" \"super\" \"this\" \"true\" \"var\" \"while\"" + rights res `shouldBe` [[Lit (Str "and"),Lit (Str "class"),Lit (Str "else"),Lit (Str "false"),Lit (Str "for"),Lit (Str "fun"),Lit (Str "if"),Lit (Str "nil"),Lit (Str "or"),Lit (Str "return"),Lit (Str "super"),Lit (Str "this"),Lit (Str "true"),Lit (Str "var"),Lit (Str "while")]] + + describe "Correctly scans and parses numbers" $ do + it "all are Literal Number's" $ do + res <- runLox $ pack "123.45 456.0 445.112345 123456" + rights res `shouldBe` [[Lit (Number 123.45),Lit (Number 456.0),Lit (Number 445.112345),Lit (Number 123456.0)]] + + describe "Correctly scans and parses punctuators" $ do + it "all are char tokens and operators" $ do + res <- runLox $ pack "(){};,+-*!===<=>=!=<>/." + rights res `shouldBe` [[Char LeftParen,Char RightParen,Char LeftBrace,Char RightBrace,Char Semicolon,Char Comma,Char Plus,Char Minus,Char Star,Op BangEqual,Op EqualEqual,Op LessEqual,Op GreaterEqual,Op BangEqual,Op Less,Op Greater,Char Slash,Char Dot]] + + describe "Correctly scans and parses strings" $ do + it "all are strings" $ do + res <- runLox $ pack "\"\" \"string\"" + rights res `shouldBe` [[Lit (Str ""),Lit (Str "string")]]