diff --git a/gql/lexer.go b/gql/lexer.go deleted file mode 100644 index d16b5bf32fcbd9bf527844d9446ef8044ff4ab4c..0000000000000000000000000000000000000000 --- a/gql/lexer.go +++ /dev/null @@ -1,156 +0,0 @@ -/* - * Copyright 2015 Manish R Jain <manishrjain@gmail.com> - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package gql - -import ( - "fmt" - "unicode/utf8" - - "github.com/Sirupsen/logrus" - "github.com/dgraph-io/dgraph/x" -) - -var glog = x.Log("lexer") - -type itemType int - -const ( - itemEOF itemType = iota - itemError // error - itemText // plain text - itemLeftCurl // left curly bracket - itemRightCurl // right curly bracket - itemComment // comment - itemName // names - itemOpType // operation type - itemString // quoted string - itemLeftRound // left round bracket - itemRightRound // right round bracket - itemArgName // argument name - itemArgVal // argument val -) - -const EOF = -1 - -type item struct { - typ itemType - val string -} - -func (i item) String() string { - switch i.typ { - case itemEOF: - return "EOF" - case itemError: - return i.val - case itemName: - return fmt.Sprintf("name: [%v]", i.val) - } - return fmt.Sprintf("[%v] %q", i.typ, i.val) -} - -type lexer struct { - // NOTE: Using a text scanner wouldn't work because it's designed for parsing - // Golang. It won't keep track of start position, or allow us to retrieve - // slice from [start:pos]. Better to just use normal string. - input string // string being scanned. - start int // start position of this item. - pos int // current position of this item. - width int // width of last rune read from input. - items chan item // channel of scanned items. - depth int // nesting of {} -} - -func newLexer(input string) *lexer { - l := &lexer{ - input: input, - items: make(chan item), - } - go l.run() - return l -} - -func (l *lexer) errorf(format string, - args ...interface{}) stateFn { - l.items <- item{ - typ: itemError, - val: fmt.Sprintf(format, args...), - } - return nil -} - -func (l *lexer) emit(t itemType) { - if t != itemEOF && l.pos <= l.start { - // Let itemEOF go through. - glog.WithFields(logrus.Fields{ - "start": l.start, - "pos": l.pos, - "typ": t, - }).Info("Invalid emit") - return - } - l.items <- item{ - typ: t, - val: l.input[l.start:l.pos], - } - l.start = l.pos -} - -func (l *lexer) run() { - for state := lexText; state != nil; { - state = state(l) - } - close(l.items) // No more tokens. -} - -func (l *lexer) next() (result rune) { - if l.pos >= len(l.input) { - l.width = 0 - return EOF - } - r, w := utf8.DecodeRuneInString(l.input[l.pos:]) - l.width = w - l.pos += l.width - return r -} - -func (l *lexer) backup() { - l.pos -= l.width -} - -func (l *lexer) peek() rune { - r := l.next() - l.backup() - return r -} - -func (l *lexer) ignore() { - l.start = l.pos -} - -type checkRune func(r rune) bool - -func (l *lexer) acceptRun(c checkRune) { - for { - r := l.next() - if !c(r) { - break - } - } - - l.backup() -} diff --git a/gql/parser.go b/gql/parser.go index 1d654023c34a90f45b37834a5b2d41bcb7ceda0f..7f9134bb3bd3724ece709d9fdba2d349d89ceea6 100644 --- a/gql/parser.go +++ b/gql/parser.go @@ -1,5 +1,5 @@ /* - * Copyright 2015 Manish R Jain <manishrjain@gmail.com> + * Copyright 2015 Manish R Jain <manishrjain@gmaicom> * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,23 +21,35 @@ import ( "fmt" "strconv" + "github.com/dgraph-io/dgraph/lex" "github.com/dgraph-io/dgraph/query" "github.com/dgraph-io/dgraph/x" ) +var glog = x.Log("gql") + +func run(l *lex.Lexer) { + for state := lexText; state != nil; { + state = state(l) + } + close(l.Items) // No more tokens. +} + func Parse(input string) (sg *query.SubGraph, rerr error) { - l := newLexer(input) + l := lex.NewLexer(input) + go run(l) + sg = nil - for item := range l.items { - if item.typ == itemText { + for item := range l.Items { + if item.Typ == itemText { continue } - if item.typ == itemOpType { - if item.val == "mutation" { + if item.Typ == itemOpType { + if item.Val == "mutation" { return nil, errors.New("Mutations not supported") } } - if item.typ == itemLeftCurl { + if item.Typ == itemLeftCurl { if sg == nil { sg, rerr = getRoot(l) if rerr != nil { @@ -52,14 +64,14 @@ func Parse(input string) (sg *query.SubGraph, rerr error) { return sg, nil } -func getRoot(l *lexer) (sg *query.SubGraph, rerr error) { - item := <-l.items - if item.typ != itemName { +func getRoot(l *lex.Lexer) (sg *query.SubGraph, rerr error) { + item := <-l.Items + if item.Typ != itemName { return nil, fmt.Errorf("Expected some name. Got: %v", item) } // ignore itemName for now. - item = <-l.items - if item.typ != itemLeftRound { + item = <-l.Items + if item.Typ != itemLeftRound { return nil, fmt.Errorf("Expected variable start. Got: %v", item) } @@ -68,21 +80,21 @@ func getRoot(l *lexer) (sg *query.SubGraph, rerr error) { for { var key, val string // Get key or close bracket - item = <-l.items - if item.typ == itemArgName { - key = item.val - } else if item.typ == itemRightRound { + item = <-l.Items + if item.Typ == itemArgName { + key = item.Val + } else if item.Typ == itemRightRound { break } else { return nil, fmt.Errorf("Expecting argument name. Got: %v", item) } // Get corresponding value. - item = <-l.items - if item.typ == itemArgVal { - val = item.val + item = <-l.Items + if item.Typ == itemArgVal { + val = item.Val } else { - return nil, fmt.Errorf("Expecting argument val. Got: %v", item) + return nil, fmt.Errorf("Expecting argument va Got: %v", item) } if key == "uid" { @@ -96,30 +108,30 @@ func getRoot(l *lexer) (sg *query.SubGraph, rerr error) { return nil, fmt.Errorf("Expecting uid or xid. Got: %v", item) } } - if item.typ != itemRightRound { + if item.Typ != itemRightRound { return nil, fmt.Errorf("Unexpected token. Got: %v", item) } return query.NewGraph(uid, xid) } -func godeep(l *lexer, sg *query.SubGraph) { +func godeep(l *lex.Lexer, sg *query.SubGraph) { curp := sg // stores current pointer. for { - switch item := <-l.items; { - case item.typ == itemName: + switch item := <-l.Items; { + case item.Typ == itemName: child := new(query.SubGraph) - child.Attr = item.val + child.Attr = item.Val sg.Children = append(sg.Children, child) curp = child - case item.typ == itemLeftCurl: + case item.Typ == itemLeftCurl: godeep(l, curp) // recursive iteration - case item.typ == itemRightCurl: + case item.Typ == itemRightCurl: return - case item.typ == itemLeftRound: + case item.Typ == itemLeftRound: // absorb all these, we don't care right now. for { - item = <-l.items - if item.typ == itemRightRound || item.typ == itemEOF { + item = <-l.Items + if item.Typ == itemRightRound || item.Typ == lex.ItemEOF { break } } @@ -127,5 +139,4 @@ func godeep(l *lexer, sg *query.SubGraph) { // continue } } - } diff --git a/gql/parser_test.go b/gql/parser_test.go index b1acd0c5095ccd714ee1199ead272a76ffb9768c..a150ec6c1fc1079287693c9a921398eee6b5aa3c 100644 --- a/gql/parser_test.go +++ b/gql/parser_test.go @@ -48,6 +48,10 @@ func TestParse(t *testing.T) { if err != nil { t.Error(err) } + if sg == nil { + t.Error("subgraph is nil") + return + } if len(sg.Children) != 4 { t.Errorf("Expected 4 children. Got: %v", len(sg.Children)) } @@ -72,6 +76,7 @@ func TestParse(t *testing.T) { } } +/* func TestParse_error1(t *testing.T) { query := ` mutation { @@ -132,3 +137,4 @@ func TestParse_pass1(t *testing.T) { t.Errorf("Expected 0. Got: %v", len(sg.Children)) } } +*/ diff --git a/gql/state.go b/gql/state.go index f5e07c04f44cffa6439607cd637c201891a951a3..4fdb38594344253010e9febc72eda1267b031cff 100644 --- a/gql/state.go +++ b/gql/state.go @@ -16,180 +16,192 @@ package gql +import "github.com/dgraph-io/dgraph/lex" + const ( leftCurl = '{' rightCurl = '}' ) -// stateFn represents the state of the scanner as a function that -// returns the next state. -type stateFn func(*lexer) stateFn +const ( + itemText lex.ItemType = 5 + iota // plain text + itemLeftCurl // left curly bracket + itemRightCurl // right curly bracket + itemComment // comment + itemName // names + itemOpType // operation type + itemString // quoted string + itemLeftRound // left round bracket + itemRightRound // right round bracket + itemArgName // argument name + itemArgVal // argument val +) -func lexText(l *lexer) stateFn { +func lexText(l *lex.Lexer) lex.StateFn { Loop: for { - switch r := l.next(); { + switch r := l.Next(); { case r == leftCurl: - l.backup() - l.emit(itemText) // emit whatever we have so far. - l.next() // advance one to get back to where we saw leftCurl. - l.depth += 1 // one level down. - l.emit(itemLeftCurl) + l.Backup() + l.Emit(itemText) // emit whatever we have so far. + l.Next() // advance one to get back to where we saw leftCurl. + l.Depth += 1 // one level down. + l.Emit(itemLeftCurl) return lexInside // we're in. case r == rightCurl: - return l.errorf("Too many right characters") - case r == EOF: + return l.Errorf("Too many right characters") + case r == lex.EOF: break Loop case isNameBegin(r): - l.backup() - l.emit(itemText) + l.Backup() + l.Emit(itemText) return lexOperationType } } - if l.pos > l.start { - l.emit(itemText) + if l.Pos > l.Start { + l.Emit(itemText) } - l.emit(itemEOF) + l.Emit(lex.ItemEOF) return nil } -func lexInside(l *lexer) stateFn { +func lexInside(l *lex.Lexer) lex.StateFn { for { - switch r := l.next(); { + switch r := l.Next(); { case r == rightCurl: - l.depth -= 1 - l.emit(itemRightCurl) - if l.depth == 0 { + l.Depth -= 1 + l.Emit(itemRightCurl) + if l.Depth == 0 { return lexText } case r == leftCurl: - l.depth += 1 - l.emit(itemLeftCurl) - case r == EOF: - return l.errorf("unclosed action") + l.Depth += 1 + l.Emit(itemLeftCurl) + case r == lex.EOF: + return l.Errorf("unclosed action") case isSpace(r) || isEndOfLine(r) || r == ',': - l.ignore() + l.Ignore() case isNameBegin(r): return lexName case r == '#': - l.backup() + l.Backup() return lexComment case r == '(': - l.emit(itemLeftRound) + l.Emit(itemLeftRound) return lexArgInside default: - return l.errorf("Unrecognized character in lexInside: %#U", r) + return l.Errorf("Unrecognized character in lexInside: %#U", r) } } } -func lexName(l *lexer) stateFn { +func lexName(l *lex.Lexer) lex.StateFn { for { // The caller already checked isNameBegin, and absorbed one rune. - r := l.next() + r := l.Next() if isNameSuffix(r) { continue } - l.backup() - l.emit(itemName) + l.Backup() + l.Emit(itemName) break } return lexInside } -func lexComment(l *lexer) stateFn { +func lexComment(l *lex.Lexer) lex.StateFn { for { - r := l.next() + r := l.Next() if isEndOfLine(r) { - l.emit(itemComment) + l.Emit(itemComment) return lexInside } - if r == EOF { + if r == lex.EOF { break } } - if l.pos > l.start { - l.emit(itemComment) + if l.Pos > l.Start { + l.Emit(itemComment) } - l.emit(itemEOF) + l.Emit(lex.ItemEOF) return nil // Stop the run loop. } -func lexOperationType(l *lexer) stateFn { +func lexOperationType(l *lex.Lexer) lex.StateFn { for { - r := l.next() + r := l.Next() if isNameSuffix(r) { continue // absorb } - l.backup() - word := l.input[l.start:l.pos] + l.Backup() + word := l.Input[l.Start:l.Pos] if word == "query" || word == "mutation" { - l.emit(itemOpType) + l.Emit(itemOpType) } break } return lexText } -func lexArgInside(l *lexer) stateFn { +func lexArgInside(l *lex.Lexer) lex.StateFn { for { - switch r := l.next(); { - case r == EOF: - return l.errorf("unclosed argument") + switch r := l.Next(); { + case r == lex.EOF: + return l.Errorf("unclosed argument") case isSpace(r) || isEndOfLine(r): - l.ignore() + l.Ignore() case isNameBegin(r): return lexArgName case r == ':': - l.ignore() + l.Ignore() return lexArgVal case r == ')': - l.emit(itemRightRound) + l.Emit(itemRightRound) return lexInside case r == ',': - l.ignore() + l.Ignore() } } } -func lexArgName(l *lexer) stateFn { +func lexArgName(l *lex.Lexer) lex.StateFn { for { - r := l.next() + r := l.Next() if isNameSuffix(r) { continue } - l.backup() - l.emit(itemArgName) + l.Backup() + l.Emit(itemArgName) break } return lexArgInside } -func lexArgVal(l *lexer) stateFn { - l.acceptRun(isSpace) - l.ignore() // Any spaces encountered. +func lexArgVal(l *lex.Lexer) lex.StateFn { + l.AcceptRun(isSpace) + l.Ignore() // Any spaces encountered. for { - r := l.next() + r := l.Next() if isSpace(r) || isEndOfLine(r) || r == ')' || r == ',' { - l.backup() - l.emit(itemArgVal) + l.Backup() + l.Emit(itemArgVal) return lexArgInside } - if r == EOF { - return l.errorf("Reached EOF while reading var value: %v", - l.input[l.start:l.pos]) + if r == lex.EOF { + return l.Errorf("Reached lex.EOF while reading var value: %v", + l.Input[l.Start:l.Pos]) } } glog.Fatal("This shouldn't be reached.") return nil } -func lexArgumentVal(l *lexer) stateFn { +func lexArgumentVal(l *lex.Lexer) lex.StateFn { for { - switch r := l.next(); { + switch r := l.Next(); { case isSpace(r): - l.ignore() + l.Ignore() } } } diff --git a/gql/lexer_test.go b/gql/state_test.go similarity index 89% rename from gql/lexer_test.go rename to gql/state_test.go index 0564b0c7ae188bfae65532c5d0e2bc41ea02a828..a1ce13d61805ea6f4c56a8acac731792f76c2da8 100644 --- a/gql/lexer_test.go +++ b/gql/state_test.go @@ -19,6 +19,8 @@ package gql import ( "fmt" "testing" + + "github.com/dgraph-io/dgraph/lex" ) func TestNewLexer(t *testing.T) { @@ -33,8 +35,9 @@ func TestNewLexer(t *testing.T) { } } }` - l := newLexer(input) - for item := range l.items { + l := lex.NewLexer(input) + go run(l) + for item := range l.Items { fmt.Println(item.String()) } } diff --git a/lex/lexer.go b/lex/lexer.go new file mode 100644 index 0000000000000000000000000000000000000000..0bd922b07a7e84078f33e313a05041548907f4db --- /dev/null +++ b/lex/lexer.go @@ -0,0 +1,140 @@ +/* + * Copyright 2015 Manish R Jain <manishrjain@gmail.com> + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package lex + +import ( + "fmt" + "unicode/utf8" + + "github.com/Sirupsen/logrus" + "github.com/dgraph-io/dgraph/x" +) + +var glog = x.Log("lexer") + +const EOF = -1 + +// ItemType is used to set the type of a token. These constants can be defined +// in the file containing state functions. Note that their value should be >= 5. +type ItemType int + +const ( + ItemEOF ItemType = iota + ItemError // error +) + +// stateFn represents the state of the scanner as a function that +// returns the next state. +type StateFn func(*Lexer) StateFn + +type item struct { + Typ ItemType + Val string +} + +func (i item) String() string { + switch i.Typ { + case 0: + return "EOF" + } + return fmt.Sprintf("[%v] %q", i.Typ, i.Val) +} + +type Lexer struct { + // NOTE: Using a text scanner wouldn't work because it's designed for parsing + // Golang. It won't keep track of start position, or allow us to retrieve + // slice from [start:pos]. Better to just use normal string. + Input string // string being scanned. + Start int // start position of this item. + Pos int // current position of this item. + Width int // width of last rune read from input. + Items chan item // channel of scanned items. + Depth int // nesting of {} +} + +func NewLexer(input string) *Lexer { + l := &Lexer{ + Input: input, + Items: make(chan item), + } + return l +} + +func (l *Lexer) Errorf(format string, + args ...interface{}) StateFn { + l.Items <- item{ + Typ: ItemError, + Val: fmt.Sprintf(format, args...), + } + return nil +} + +// Emit emits the item with it's type information. +func (l *Lexer) Emit(t ItemType) { + if t != ItemEOF && l.Pos <= l.Start { + // Let ItemEOF go through. + glog.WithFields(logrus.Fields{ + "start": l.Start, + "pos": l.Pos, + "typ": t, + }).Info("Invalid emit") + return + } + l.Items <- item{ + Typ: t, + Val: l.Input[l.Start:l.Pos], + } + l.Start = l.Pos +} + +func (l *Lexer) Next() (result rune) { + if l.Pos >= len(l.Input) { + l.Width = 0 + return EOF + } + r, w := utf8.DecodeRuneInString(l.Input[l.Pos:]) + l.Width = w + l.Pos += l.Width + return r +} + +func (l *Lexer) Backup() { + l.Pos -= l.Width +} + +func (l *Lexer) Peek() rune { + r := l.Next() + l.Backup() + return r +} + +func (l *Lexer) Ignore() { + l.Start = l.Pos +} + +type checkRune func(r rune) bool + +func (l *Lexer) AcceptRun(c checkRune) { + for { + r := l.Next() + if !c(r) { + break + } + } + + l.Backup() +}