From fc5f2f117260e8ea7fbb8ed9416d3a09d49120fb Mon Sep 17 00:00:00 2001 From: Greg Date: Sun, 29 Sep 2024 18:45:11 +0300 Subject: [PATCH] lexer --- glox.go | 360 ++++++++++++++++++++++++++++++++++++++++++++ go.mod | 9 ++ go.sum | 6 + tokentype_string.go | 61 ++++++++ 4 files changed, 436 insertions(+) create mode 100644 glox.go create mode 100644 go.mod create mode 100644 go.sum create mode 100644 tokentype_string.go diff --git a/glox.go b/glox.go new file mode 100644 index 0000000..0b796b2 --- /dev/null +++ b/glox.go @@ -0,0 +1,360 @@ +package main + +import ( + "bufio" + "fmt" + "log" + "os" + "regexp" + "strconv" + "unicode" + "unicode/utf8" +) + +func main() { + + switch len(os.Args) { + case 1: + runPrompt() + case 2: + runFile(os.Args[1]) + default: + println("Usage: glox [file]") + os.Exit(1) + } +} + +var hadError = false + +//go:generate go run golang.org/x/tools/cmd/stringer -type=TokenType +type TokenType int + +const ( + // one char + LEFT_PAREN TokenType = iota + RIGHT_PAREN + LEFT_BRACE + RIGHT_BRACE + COMMA + DOT + MINUS + PLUS + SEMICOLON + SLASH + STAR + + // one or two chars + BANG + BANG_EQUAL + EQUAL + EQUAL_EQUAL + GREATER + GREATER_EQUAL + LESS + LESS_EQUAL + + // Literals + IDENTIFIER + STRING + NUMBER + + // keywords + AND + CLASS + ELSE + FALSE + FUN + FOR + IF + NIL + OR + PRINT + RETURN + SUPER + THIS + TRUE + VAR + WHILE + + EOF +) + +var keywords = map[string]TokenType{ + "and": AND, + "class": CLASS, + "else": ELSE, + "false": FALSE, + "for": FOR, + "fun": FUN, + "if": IF, + "nil": NIL, + "or": OR, + "print": PRINT, + "return": RETURN, + "super": SUPER, + "this": THIS, + "true": TRUE, + "var": VAR, + "while": WHILE, +} + +type Token struct { + typ TokenType + lexeme string + literal any + line int +} + +func (t *Token) string() string { + return fmt.Sprintf("%s - %s - %v", t.typ, t.lexeme, t.literal) +} + +type Scanner struct { + source []byte + tokens []Token + start int + current int + line int +} + +func newScanner(source []byte) *Scanner { + return &Scanner{source: source, start: 0, current: 0, line: 1} +} + +func (s *Scanner) scan() []Token { + + for !s.isAtEnd() { + s.start = s.current + s.scanToken() + } + + s.tokens = append(s.tokens, Token{EOF, "EOF", struct{}{}, s.line}) + + return s.tokens +} + +func (s *Scanner) printError(message string) { + fmt.Printf("[line %d] Error: %s\n", s.line, message) + hadError = true +} + +func (s *Scanner) scanToken() { + c := s.advance() + switch c { + case '(': + s.addToken(LEFT_PAREN, struct{}{}) + case ')': + s.addToken(RIGHT_PAREN, struct{}{}) + case '{': + s.addToken(LEFT_BRACE, struct{}{}) + case '}': + s.addToken(RIGHT_BRACE, struct{}{}) + case ',': + s.addToken(COMMA, struct{}{}) + case '.': + s.addToken(DOT, struct{}{}) + case '-': + s.addToken(MINUS, struct{}{}) + case '+': + s.addToken(PLUS, struct{}{}) + case ';': + s.addToken(SEMICOLON, struct{}{}) + case '*': + s.addToken(STAR, struct{}{}) + + case '!': + if s.match('=') { + s.addToken(BANG_EQUAL, struct{}{}) + } else { + s.addToken(BANG, struct{}{}) + } + case '=': + if s.match('=') { + s.addToken(EQUAL_EQUAL, struct{}{}) + } else { + s.addToken(EQUAL, struct{}{}) + } + case '<': + if s.match('=') { + s.addToken(LESS_EQUAL, struct{}{}) + } else { + s.addToken(LESS, struct{}{}) + } + case '>': + if s.match('=') { + s.addToken(GREATER_EQUAL, struct{}{}) + } else { + s.addToken(GREATER, struct{}{}) + } + + case '/': + if s.match('/') { + for s.peek() != '\n' && !s.isAtEnd() { + s.advance() + } + } else { + s.addToken(SLASH, struct{}{}) + } + case '"': + s.string() + case ' ': + case '\t': + case '\r': + break + case '\n': + s.line++ + default: + if unicode.IsDigit(c) { + s.number() + break + } + + if s.isAlpha(c) { + s.identifier() + break + } + + s.printError(fmt.Sprintf("Unexpected character %s", string(c))) + } +} + +func (s *Scanner) identifier() { + for unicode.IsDigit(s.peek()) || s.isAlpha(s.peek()) { + s.advance() + } + + str := s.source[s.start:s.current] + + if id, found := keywords[string(str)]; found { + s.addToken(id, struct{}{}) + } else { + s.addToken(IDENTIFIER, struct{}{}) + } + +} + +func (s *Scanner) string() { + for s.peek() != '"' && !s.isAtEnd() { + if s.peek() == '\n' { + s.line++ + } + s.advance() + } + + if s.isAtEnd() { + s.printError("Unterminated string") + return + } + + s.advance() + str := string(s.source[s.start+1 : s.current-1]) + s.addToken(STRING, str) +} + +func (s *Scanner) number() { + for unicode.IsDigit(s.peek()) { + s.advance() + } + + if s.peek() == '.' && unicode.IsDigit(s.peekNext()) { + s.advance() + } + + for unicode.IsDigit(s.peek()) { + s.advance() + } + + num, err := strconv.ParseFloat(string(s.source[s.start:s.current]), 64) + + if err != nil { + s.printError(err.Error()) + } + + s.addToken(NUMBER, num) +} + +func (s *Scanner) isAlpha(ch rune) bool { + return regexp.MustCompile(`^[A-Za-z_]+$`).MatchString(string(ch)) +} + +func (s *Scanner) addToken(typ TokenType, literal any) { + text := string(s.source[s.start:s.current]) + s.tokens = append(s.tokens, Token{typ: typ, lexeme: text, literal: literal, line: s.line}) +} + +func (s *Scanner) advance() rune { + char, size := utf8.DecodeRune(s.source[s.current:]) + s.current += size + return char +} + +func (s *Scanner) peek() rune { + char, _ := utf8.DecodeRune(s.source[s.current:]) + return char +} + +func (s *Scanner) peekNext() rune { + _, size := utf8.DecodeRune(s.source[s.current+1:]) + if s.current+size >= len(s.source) { + return '\000' + } + + next, _ := utf8.DecodeRune(s.source[s.current+size:]) + return next +} + +func (s *Scanner) match(ch rune) bool { + if s.isAtEnd() { + return false + } + + decoded, size := utf8.DecodeRune(s.source[s.current:]) + s.current += size + return ch == decoded +} + +func (s *Scanner) isAtEnd() bool { + return s.current >= len(s.source) +} + +func panic(err error) { + if err != nil { + log.Fatal(err) + } +} + +func runPrompt() { + scanner := bufio.NewScanner(os.Stdin) + scanner.Split(bufio.ScanLines) + + for { + print("> ") + scanner.Scan() + line := scanner.Text() + if len(line) == 0 { + break + } + run([]byte(scanner.Text())) + hadError = false + } +} + +func runFile(path string) { + file, err := os.ReadFile(path) + + panic(err) + + run(file) + + if hadError { + os.Exit(1) + } +} + +func run(source []byte) { + tokens := newScanner(source).scan() + + for _, token := range tokens { + println(token.string()) + } +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..e9e1cb4 --- /dev/null +++ b/go.mod @@ -0,0 +1,9 @@ +module fotonmoton/glox + +go 1.23.1 + +require ( + golang.org/x/mod v0.21.0 // indirect + golang.org/x/sync v0.8.0 // indirect + golang.org/x/tools v0.25.0 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..ed88392 --- /dev/null +++ b/go.sum @@ -0,0 +1,6 @@ +golang.org/x/mod v0.21.0 h1:vvrHzRwRfVKSiLrG+d4FMl/Qi4ukBCE6kZlTUkDYRT0= +golang.org/x/mod v0.21.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY= +golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= +golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/tools v0.25.0 h1:oFU9pkj/iJgs+0DT+VMHrx+oBKs/LJMV+Uvg78sl+fE= +golang.org/x/tools v0.25.0/go.mod h1:/vtpO8WL1N9cQC3FN5zPqb//fRXskFHbLKk4OW1Q7rg= diff --git a/tokentype_string.go b/tokentype_string.go new file mode 100644 index 0000000..d4cec0d --- /dev/null +++ b/tokentype_string.go @@ -0,0 +1,61 @@ +// Code generated by "stringer -type=TokenType"; DO NOT EDIT. + +package main + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[LEFT_PAREN-0] + _ = x[RIGHT_PAREN-1] + _ = x[LEFT_BRACE-2] + _ = x[RIGHT_BRACE-3] + _ = x[COMMA-4] + _ = x[DOT-5] + _ = x[MINUS-6] + _ = x[PLUS-7] + _ = x[SEMICOLON-8] + _ = x[SLASH-9] + _ = x[STAR-10] + _ = x[BANG-11] + _ = x[BANG_EQUAL-12] + _ = x[EQUAL-13] + _ = x[EQUAL_EQUAL-14] + _ = x[GREATER-15] + _ = x[GREATER_EQUAL-16] + _ = x[LESS-17] + _ = x[LESS_EQUAL-18] + _ = x[IDENTIFIER-19] + _ = x[STRING-20] + _ = x[NUMBER-21] + _ = x[AND-22] + _ = x[CLASS-23] + _ = x[ELSE-24] + _ = x[FALSE-25] + _ = x[FUN-26] + _ = x[FOR-27] + _ = x[IF-28] + _ = x[NIL-29] + _ = x[OR-30] + _ = x[PRINT-31] + _ = x[RETURN-32] + _ = x[SUPER-33] + _ = x[THIS-34] + _ = x[TRUE-35] + _ = x[VAR-36] + _ = x[WHILE-37] + _ = x[EOF-38] +} + +const _TokenType_name = "LEFT_PARENRIGHT_PARENLEFT_BRACERIGHT_BRACECOMMADOTMINUSPLUSSEMICOLONSLASHSTARBANGBANG_EQUALEQUALEQUAL_EQUALGREATERGREATER_EQUALLESSLESS_EQUALIDENTIFIERSTRINGNUMBERANDCLASSELSEFALSEFUNFORIFNILORPRINTRETURNSUPERTHISTRUEVARWHILEEOF" + +var _TokenType_index = [...]uint8{0, 10, 21, 31, 42, 47, 50, 55, 59, 68, 73, 77, 81, 91, 96, 107, 114, 127, 131, 141, 151, 157, 163, 166, 171, 175, 180, 183, 186, 188, 191, 193, 198, 204, 209, 213, 217, 220, 225, 228} + +func (i TokenType) String() string { + if i < 0 || i >= TokenType(len(_TokenType_index)-1) { + return "TokenType(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _TokenType_name[_TokenType_index[i]:_TokenType_index[i+1]] +}