diff --git a/gql/lexer/lexer.go b/gql/lexer/lexer.go new file mode 100644 index 0000000000000000000000000000000000000000..b5bcd39954b4a4a0839db0276ffb5cc97a83fc68 --- /dev/null +++ b/gql/lexer/lexer.go @@ -0,0 +1,127 @@ +package gqlex + +import ( + "fmt" + "strings" + "unicode/utf8" +) + +type itemType int + +const ( + itemError itemType = iota + itemEOF + itemLeftCurl // left curly bracket + itemRightCurl // right curly bracket + itemString // quoted string + itemText // plain text + itemIdentifier // variables +) + +const EOF = -1 + +type item struct { + typ itemType + val string +} + +func (i item) String() string { + switch i.typ { + case itemEOF: + return "EOF" + case itemError: + return i.val + case itemIdentifier: + return fmt.Sprintf("var: [%v]", i.val) + } + /* + if len(i.val) > 10 { + return fmt.Sprintf("%.10q...", i.val) + } + */ + return fmt.Sprintf("%q", i.val) +} + +type lexer struct { + // NOTE: Using a text scanner wouldn't work because it's designed for parsing + // Golang. It won't keep track of start position, or allow us to retrieve + // slice from [start:pos]. Better to just use normal string. + input string // string being scanned. + start int // start position of this item. + pos int // current position of this item. + width int // width of last rune read from input. + items chan item // channel of scanned items. + depth int // nesting of {} +} + +func newLexer(input string) *lexer { + l := &lexer{ + input: input, + items: make(chan item), + } + go l.run() + return l +} + +func (l *lexer) errorf(format string, + args ...interface{}) stateFn { + l.items <- item{ + typ: itemError, + val: fmt.Sprintf(format, args...), + } + return nil +} + +func (l *lexer) emit(t itemType) { + l.items <- item{ + typ: t, + val: l.input[l.start:l.pos], + } + l.start = l.pos +} + +func (l *lexer) run() { + for state := lexText; state != nil; { + state = state(l) + } + close(l.items) // No more tokens. +} + +func (l *lexer) next() (result rune) { + if l.pos >= len(l.input) { + l.width = 0 + return EOF + } + r, w := utf8.DecodeRuneInString(l.input[l.pos:]) + l.width = w + l.pos += l.width + return r +} + +func (l *lexer) backup() { + l.pos -= l.width +} + +func (l *lexer) peek() rune { + r := l.next() + l.backup() + return r +} + +func (l *lexer) ignore() { + l.start = l.pos +} + +func (l *lexer) accept(valid string) bool { + if strings.IndexRune(valid, l.next()) >= 0 { + return true + } + l.backup() + return false +} + +func (l *lexer) acceptRun(valid string) { + for strings.IndexRune(valid, l.next()) >= 0 { + } + l.backup() +} diff --git a/gql/lexer/lexer_test.go b/gql/lexer/lexer_test.go new file mode 100644 index 0000000000000000000000000000000000000000..c82d493400856aa01d60c6ab6a5f06da8cc6b42f --- /dev/null +++ b/gql/lexer/lexer_test.go @@ -0,0 +1,23 @@ +package gqlex + +import ( + "fmt" + "testing" +) + +func TestNewLexer(t *testing.T) { + input := ` + { + me { + name + city + friends { + name + } + } + }` + l := newLexer(input) + for item := range l.items { + fmt.Println(item.String()) + } +} diff --git a/gql/lexer/state.go b/gql/lexer/state.go new file mode 100644 index 0000000000000000000000000000000000000000..a4ac842060fbdfbef7b766e1295df98f4668b9da --- /dev/null +++ b/gql/lexer/state.go @@ -0,0 +1,105 @@ +package gqlex + +import ( + "strings" + "unicode" +) + +const ( + leftCurl = "{" + rightCurl = "}" +) + +// stateFn represents the state of the scanner as a function that +// returns the next state. +type stateFn func(*lexer) stateFn + +func lexText(l *lexer) stateFn { + for { + if strings.HasPrefix(l.input[l.pos:], leftCurl) { + if l.pos > l.start { + l.emit(itemText) + } + return lexLeftCurl + } + if strings.HasPrefix(l.input[l.pos:], rightCurl) { + return l.errorf("Too many right brackets") + } + if l.next() == EOF { + break + } + } + // Correctly reached EOF. + if l.pos > l.start { + l.emit(itemText) + } + l.emit(itemEOF) + return nil // Stop the run loop. +} + +func lexLeftCurl(l *lexer) stateFn { + l.pos += len(leftCurl) + l.depth += 1 + l.emit(itemLeftCurl) + return lexInside(l) +} + +func lexRightCurl(l *lexer) stateFn { + l.pos += len(rightCurl) + l.depth -= 1 + l.emit(itemRightCurl) + + if l.depth == 0 { + return lexText + } else { + return lexInside + } +} + +func lexInside(l *lexer) stateFn { + for { + if strings.HasPrefix(l.input[l.pos:], rightCurl) { + return lexRightCurl + } + if strings.HasPrefix(l.input[l.pos:], leftCurl) { + return lexLeftCurl + } + + switch r := l.next(); { + case r == EOF: + return l.errorf("unclosed action") + case isSpace(r) || isEndOfLine(r): + l.ignore() + case isAlphaNumeric(r): + l.backup() + return lexIdentifier + } + } +} + +func lexIdentifier(l *lexer) stateFn { +Loop: + for { + switch r := l.next(); { + case isAlphaNumeric(r): + // absorb. + default: + l.backup() + l.emit(itemIdentifier) + break Loop + } + } + return lexInside +} + +func isSpace(r rune) bool { + return r == ' ' || r == '\t' +} + +func isEndOfLine(r rune) bool { + return r == '\r' || r == '\n' +} + +func isAlphaNumeric(r rune) bool { + return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r) +}