Commit b2acf6af authored by matthew-eads's avatar matthew-eads
Browse files

parse ast into simpler structure

parent 92319477
.stack-work/install/x86_64-linux/lts-5.4/7.10.3/bin/gg-mk2
\ No newline at end of file
......@@ -5,14 +5,15 @@ description: Please see README.md
homepage: https://github.cs.tufts.edu/siriusly/gg-proto
author: Matthew Eads
build-type: Simple
-- extra-source-files:
extra-source-files: AST.hs
cabal-version: >=1.10
executable gg-mk2
hs-source-dirs: src
main-is: AST.hs
ghc-options: -threaded -rtsopts -with-rtsopts=-N
build-depends: base, array
build-depends: base, array, haskell-src-meta, haskell-src-exts,
haskell-src, template-haskell
default-language: Haskell2010
source-repository head
......
This diff is collapsed.
{
module Main where
import System.Environment
import Data.Char
import Data.Maybe
import System.IO
}
%name grammar
%tokentype { Token }
%token
int { TokenInt }
id { TokenID $$ }
binop { TokenBinop }
set { TokenSet }
if { TokenIf}
while { TokenWhile }
begin { TokenBegin }
var { TokenVar }
userdef { TokenUserDef }
builtin { TokenBuiltin }
custom { TokenCustom }
string { TokenString $$ }
eof { TokenEOF }
nl { TokenNL }
line { TokenLine $$ }
%error {parseError}
%%
Prog: Grammar Prog { $1:$2 }
| nl Prog { $2 }
| eof {[]}
Grammar: set nl { SET }
| if nl { IF }
| while nl { WHILE }
| begin nl { BEGIN }
| var nl { VAR }
| userdef { USERDEF }
| int { INT }
| binop string nl { BINOP $2 }
| builtin id string nl { BUILTIN $2 $3 }
| custom id line { CUSTOM $2 $3 }
ID : id { $1 }
String : string { $1 }
{
parseError :: [Token] -> a
parseError t = error ("Parse Error on token(s) " ++ (show t))
data Grammar = SET
| IF
| WHILE
| BEGIN
| VAR
| USERDEF
| INT
| BINOP String
| BUILTIN String String
| CUSTOM String String
deriving Show
type Prog = [Grammar]
type ID = String
--type _String = String
data Token = TokenBinop
| TokenID String
| TokenSet
| TokenIf
| TokenWhile
| TokenBegin
| TokenInt
| TokenVar
| TokenUserDef
| TokenBuiltin
| TokenCustom
| TokenString String
| TokenEOF
| TokenNL
| TokenLine String
deriving (Eq, Show)
data State = Ignore | Gdef | Line
lexer :: String -> [Token] -> State -> [Token]
lexer ('-':'-':'>':rest) acc Ignore = lexer rest acc Gdef
lexer ('b':'i':'n':'o':'p':rest) acc Gdef = lexer rest (TokenBinop:acc) Gdef
lexer ('s':'e':'t':rest) acc Gdef = lexer rest (TokenSet:acc) Gdef
lexer ('i':'f':rest) acc Gdef = lexer rest (TokenIf:acc) Gdef
lexer ('w':'h':'i':'l':'e':rest) acc Gdef = lexer rest (TokenWhile:acc) Gdef
lexer ('b':'e':'g':'i':'n':rest) acc Gdef = lexer rest (TokenBegin:acc) Gdef
lexer ('i':'n':'t':rest) acc Gdef = lexer rest (TokenInt:acc) Gdef
lexer ('v':'a':'r':rest) acc Gdef = lexer rest (TokenVar:acc) Gdef
lexer ('u':'s':'e':'r':'d':'e':'f':rest) acc Gdef = lexer rest (TokenUserDef:acc) Gdef
lexer ('b':'u':'i':'l':'t':'i':'n':rest) acc Gdef = lexer rest (TokenBuiltin:acc) Gdef
lexer ('c':'u':'s':'t':'o':'m':rest) acc Gdef = lexer rest (TokenCustom:acc) Line
lexer ('"':rest) acc Gdef =
let str = takeWhile (\c -> c /= '"') rest
rst = tail (dropWhile (\c -> c /= '"') rest)
in lexer rst ((TokenString str):acc) Gdef
lexer (' ':rest) acc Gdef = lexer rest acc Gdef
lexer ('\t':rest) acc Gdef = lexer rest acc Gdef
lexer ('\n':rest) acc Gdef = lexer rest (TokenNL:acc) Ignore
lexer ('\r':rest) acc Gdef = lexer rest (TokenNL:acc) Ignore
lexer s acc Gdef =
let str = takeWhile (\c -> (c /= ' ') && (c /= '\n')) s
rest = tail (dropWhile (\c -> (c /= ' ') && (c /= '\n')) s)
in lexer rest ((TokenID str):acc) Gdef
lexer s acc Line =
let s' = (dropWhile (\c -> c == ' ')) s
(id, s'') = ((takeWhile (\c -> (c /= ' ') && (c /= '\n')) s'),
(dropWhile (\c -> (c /= ' ') && (c /= '\n')) s'))
str = takeWhile (\c -> (c /= '\n') && (c /= '\r')) s''
rest = tail (dropWhile (\c -> (c /= '\n') && (c /= '\r')) s'')
in lexer rest ((TokenLine str):(TokenID id):acc) Ignore
lexer (x:s) acc Ignore = lexer s acc Ignore
lexer [] acc Ignore = reverse (TokenEOF:acc)
writeHeader :: Handle -> String -> IO ()
writeHeader h s = do {
hPutStr h ("{\nmodule Main where\n\nimport Lexer\n}\n" ++
"%name imp\n%tokentype { Token }\n%token \n" ++ s ++
"if { TokenIf }\nwhile { TokenWhile }\n" ++
"begin { TokenBegin }\nset { TokenSet }\n" ++
"'(' { TokenLParen }\n')' { TokenRParen }\n" ++
"int { TokenInt $$ }\nvar { TokenVar $$ }\n" ++
"'@' { TokenOp $$ }\n'!' { TokenOp $$ }\n'$' { TokenOp $$ }\n'%' { TokenOp $$ }\n'^' { TokenOp $$ }\n'&' { TokenOp $$ }\n'_' { TokenOp $$ }\n'`' { TokenOp $$ }\n'~' { TokenOp $$ }\n'=' { TokenOp $$ }\n'[' { TokenOp $$ }\n']' { TokenOp $$ }\n'{' { TokenOp $$ }\n'}' { TokenOp $$ }\n':' { TokenOp $$ }\n';' { TokenOp $$ }\n'<' { TokenOp $$ }\n'>' { TokenOp $$ }\n" ++
"%error {parseError}\n%%\n\n" ++
"Prog: AST Prog { $1:$2 }\n\t| AST { [$1] }\n\n" ++
"AST: ");
}
writeFooter :: Handle -> IO ()
writeFooter h = do {
hPutStr h ("\nMultiExpr : AST MultiExpr {$1:$2}\n | AST {[$1]}\n\n{\n\nparseError :: [Token] -> a\nparseError t = error (\"Parse Error on token \" ++ (show t))\nhappyError = parseError\n\ndata AST = Lit Int\n | Id String\n | Builtin String [AST]\n | UserDef String [AST]\n | IfThenElse AST AST AST\n | While AST AST\n | Do [AST]\n deriving Show\n\ntype MultiExpr = [AST]\n\ntype Prog = [AST]\n\n\nmain = do {\n s <- getContents;\n putStrLn (show (imp (alexScanTokens s)));\n}\n}");}
ruleToString :: Grammar -> String
ruleToString SET =" | '(' set var AST ')' {Builtin \"set\" [(Id $3), $4]}\n"
ruleToString IF = " | '(' if AST AST AST ')' {IfThenElse $3 $4 $5}\n"
ruleToString WHILE = " | '(' while AST AST ')' {While $3 $4}\n"
ruleToString BEGIN = " | '(' begin MultiExpr ')' {Builtin \"begin\" $3}\n"
ruleToString VAR = " | var { Id $1 }\n"
ruleToString INT = " | int { Lit $1 }\n"
ruleToString USERDEF = " | '(' var MultiExpr ')' {UserDef $2 $3}\n"
ruleToString (BINOP s) = " | '(' '" ++ s ++ "' AST AST ')' {Builtin $2 [$3, $4]}\n"
ruleToString (BUILTIN s1 s2) = " | '(' " ++ s2 ++ " MultiExpr ')' { Builtin \"" ++ s2 ++ "\" $3}\n"
ruleToString (CUSTOM s1 s2) = undefined
newTokens :: Grammar -> Maybe String
newTokens (BINOP s) = Just ("'" ++ s ++ "'" ++ "\t{ TokenOp $$ }\n")
newTokens (BUILTIN s1 s2) = Just ("'" ++ s2 ++ "'" ++ "\t{ TokenBuiltin $$ }\n")
--newTokens (BUILTIN s1 s2) = Just (s2 ++ "\t{ Token" ++
-- ((toUpper (head s2)):(tail s2)) ++ "}\n")
newTokens _ = Nothing
lexerTokens :: Grammar -> Maybe String
lexerTokens (BINOP s) = Just ("\"" ++ s ++ "\"\t{ \\s -> TokenOp s }\n")
lexerTokens (BUILTIN s1 s2) = Just ("\"" ++ s2 ++ "\"\t{ \\s -> TokenBuiltin s }\n")
lexerTokens _ = Nothing
printFirstRule :: Grammar -> String
printFirstRule g = let str = ruleToString g
in tail (dropWhile (\c -> (c == ' ')) str)
maybeAppend :: String -> Maybe String -> String
maybeAppend l (Just s) = s ++ l
maybeAppend l Nothing = l
makeParser :: [Grammar] -> Handle -> Handle -> IO ()
makeParser g lexerF parserF = do {
writeHeader parserF (foldl maybeAppend [] (map newTokens g));
hPutStrLn parserF (printFirstRule (head g));
hPutStr parserF (foldl (++) [] (map ruleToString (tail g)));
writeFooter parserF;
}
makeLexer :: Handle -> String -> IO ()
makeLexer h s =
do {
hPutStr h ("{\nmodule Lexer where\n}\n\n%wrapper \"basic\"\n\n$digit = 0-9\n$alpha = [a-z]\n$Alpha = [A-Z]\n\ntokens :-\n\n\t$white+\t;\n\tset\t{ \\s -> TokenSet }\n\tif\t{ \\s -> TokenIf }\n\twhile \t{ \\s -> TokenWhile }\n\tbegin \t{ \\s -> TokenBegin }\n");
hPutStr h s;
hPutStr h " [\\+\\-\\/\\*] { \\s -> TokenOp s}\n [\\(] { \\s -> TokenLParen }\n [\\)] { \\s -> TokenRParen }\n\t$digit+\t{ \\s -> TokenInt (read s) }\n\t$alpha [$alpha $Alpha \\_]* { \\s -> TokenVar s}\n . { \\s -> TokenSym (head s)}\n{\n\ndata Token = TokenSet\n\t | TokenIf\n\t | TokenWhile\n\t | TokenBegin\n | TokenSym Char\n | TokenOp String\n | TokenBuiltin String\n | TokenPlus\n | TokenMult\n | TokenDiv\n | TokenMinus\n | TokenLParen\n | TokenRParen\n\t | TokenInt Int\n\t | TokenVar String\n\t deriving (Eq, Show)\n\n}\n";
}
main = do {
args <- getArgs;
s <- if length args == 1 then readFile (head args)
else (error "No file given");
lexerF <- openFile "Lexer.x" WriteMode;
parserF <- openFile (takeWhile ((/=) '.') (head args)) WriteMode;
let
tokes = lexer s [] Ignore
gram = grammar tokes in
do {
makeLexer lexerF (foldl maybeAppend [] (map lexerTokens gram));
makeParser gram lexerF parserF;
}
}
}
......@@ -4,29 +4,25 @@ type ID = String
data AST = Expression Exp AST
| TopLevlDec Dec AST
data Exp = SET ID Exp --> set
| IF SimpleExp Exp Exp --> if
| WHILE SimpleExp Exp --> while
| BEGIN [Exp] --> begin
data Exp = SET ID Exp
| IF SimpleExp Exp Exp
| WHILE SimpleExp Exp
| BEGIN [Exp]
| SimpleExp SimpleExp
data BinOp = Plus SimpleExp SimpleExp --> binop "+"
| Mult SimpleExp SimpleExp --> binop "*"
| Div SimpleExp SimpleExp --> binop "/"
| Minus SimpleExp SimpleExp --> binop "-"
| CheckExpect SimpleExp SimpleExp --> binop "check-expect"
data BinOp = Plus SimpleExp SimpleExp
| Mult SimpleExp SimpleExp
| Div SimpleExp SimpleExp
| Minus SimpleExp SimpleExp
| CheckExpect SimpleExp SimpleExp
data SimpleExp =
Lit Int --> int
| Var ID --> var
| Apply ID [SimpleExp] --> userdef
Lit Int
| Var ID
| Apply ID [SimpleExp]
| BinOp BinOp
| AGet SimpleExp SimpleExp -- > custom Aget AST "[" AST "]"
--(arr[i])
| ASet SimpleExp SimpleExp SimpleExp -- > custom Aset "set" AST "[" AST "]" AST
--(set arr[i] j)
| AMake ID SimpleExp --> builtin Amake "new"
--(new arr size)
| AGet SimpleExp SimpleExp --(arr[i])
| ASet SimpleExp SimpleExp SimpleExp --(set arr[i] j)
| AMake ID SimpleExp --(new arr size)
data Dec = Val ID Exp
This diff is collapsed.
{
module Main where
import Lexer
}
%name imp
%tokentype { Token }
%token
new { TokenBuiltin $$}
"check-expect" { TokenOp $$ }
"-" { TokenOp $$ }
"/" { TokenOp $$ }
"*" { TokenOp $$ }
"+" { TokenOp $$ }
if { TokenIf }
while { TokenWhile }
begin { TokenBegin }
set { TokenSet }
"(" { TokenLParen }
")" { TokenRParen }
int { TokenInt $$ }
var { TokenVar $$ }
"@" { TokenOp $$ }
"!" { TokenOp $$ }
"$" { TokenOp $$ }
"%" { TokenOp $$ }
"^" { TokenOp $$ }
"&" { TokenOp $$ }
"_" { TokenOp $$ }
"`" { TokenOp $$ }
"~" { TokenOp $$ }
"=" { TokenOp $$ }
"[" { TokenOp $$ }
"]" { TokenOp $$ }
"{" { TokenOp $$ }
"}" { TokenOp $$ }
":" { TokenOp $$ }
";" { TokenOp $$ }
"<" { TokenOp $$ }
">" { TokenOp $$ }
%error {parseError}
%%
Prog: AST Prog { $1:$2 }
| AST { [$1] }
AST: "(" set var AST ")" {Builtin "set" [(Id $3), $4]}
| "(" if AST AST AST ")" {IfThenElse $3 $4 $5}
| "(" while AST AST ")" {While $3 $4}
| "(" begin MultiExpr ")" {Builtin "begin" $3}
| "(" "+" AST AST ")" {Builtin $2 [$3, $4]}
| "(" "*" AST AST ")" {Builtin $2 [$3, $4]}
| "(" "/" AST AST ")" {Builtin $2 [$3, $4]}
| "(" "-" AST AST ")" {Builtin $2 [$3, $4]}
| "(" "check-expect" AST AST ")" {Builtin $2 [$3, $4]}
| int { Lit $1 }
| var { Id $1 }
| "(" var MultiExpr ")" {UserDef $2 $3}
| "(" new MultiExpr ")" { Builtin "new" $3}
MultiExpr : AST MultiExpr {$1:$2}
| AST {[$1]}
{
parseError :: [Token] -> a
parseError t = error ("Parse Error on token " ++ (show t))
happyError = parseError
data AST = Lit Int
| Id String
| Builtin String [AST]
| UserDef String [AST]
| IfThenElse AST AST AST
| While AST AST
| Do [AST]
deriving Show
type MultiExpr = [AST]
type Prog = [AST]
main = do {
s <- getContents;
putStrLn (show (imp (alexScanTokens s)));
}
}
This diff is collapsed.
{
module Lexer where
}
%wrapper "basic"
$digit = 0-9
$alpha = [a-z]
$Alpha = [A-Z]
tokens :-
$white+ ;
set { \s -> TokenSet }
if { \s -> TokenIf }
while { \s -> TokenWhile }
begin { \s -> TokenBegin }
"new" { \s -> TokenBuiltin s }
"check-expect" { \s -> TokenOp s }
"-" { \s -> TokenOp s }
"/" { \s -> TokenOp s }
"*" { \s -> TokenOp s }
"+" { \s -> TokenOp s }
[\+\-\/\*] { \s -> TokenOp s}
[\(] { \s -> TokenLParen }
[\)] { \s -> TokenRParen }
$digit+ { \s -> TokenInt (read s) }
$alpha [$alpha $Alpha \_]* { \s -> TokenVar s}
. { \s -> TokenSym (head s)}
{
data Token = TokenSet
| TokenIf
| TokenWhile
| TokenBegin
| TokenSym Char
| TokenOp String
| TokenBuiltin String
| TokenPlus
| TokenMult
| TokenDiv
| TokenMinus
| TokenLParen
| TokenRParen
| TokenInt Int
| TokenVar String
deriving (Eq, Show)
}
all: AST.y
happy AST.y && ghc AST.hs && ./AST Imp.ast && alex Lexer.x && happy Imp.y && ghc Imp.hs
clean:
rm -f Lexer.hs && rm -f AST.hs && stack clean
setup:
stack setup
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment