Skip to content
Snippets Groups Projects
Commit 9b1a2f23 authored by Manish R Jain's avatar Manish R Jain
Browse files

Move Lexer class out to it's own package so GQL and RDF can share it.

parent 20f43611
No related branches found
No related tags found
No related merge requests found
/*
* Copyright 2015 Manish R Jain <manishrjain@gmail.com>
* Copyright 2015 Manish R Jain <manishrjain@gmaicom>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
......@@ -21,23 +21,35 @@ import (
"fmt"
"strconv"
"github.com/dgraph-io/dgraph/lex"
"github.com/dgraph-io/dgraph/query"
"github.com/dgraph-io/dgraph/x"
)
var glog = x.Log("gql")
func run(l *lex.Lexer) {
for state := lexText; state != nil; {
state = state(l)
}
close(l.Items) // No more tokens.
}
func Parse(input string) (sg *query.SubGraph, rerr error) {
l := newLexer(input)
l := lex.NewLexer(input)
go run(l)
sg = nil
for item := range l.items {
if item.typ == itemText {
for item := range l.Items {
if item.Typ == itemText {
continue
}
if item.typ == itemOpType {
if item.val == "mutation" {
if item.Typ == itemOpType {
if item.Val == "mutation" {
return nil, errors.New("Mutations not supported")
}
}
if item.typ == itemLeftCurl {
if item.Typ == itemLeftCurl {
if sg == nil {
sg, rerr = getRoot(l)
if rerr != nil {
......@@ -52,14 +64,14 @@ func Parse(input string) (sg *query.SubGraph, rerr error) {
return sg, nil
}
func getRoot(l *lexer) (sg *query.SubGraph, rerr error) {
item := <-l.items
if item.typ != itemName {
func getRoot(l *lex.Lexer) (sg *query.SubGraph, rerr error) {
item := <-l.Items
if item.Typ != itemName {
return nil, fmt.Errorf("Expected some name. Got: %v", item)
}
// ignore itemName for now.
item = <-l.items
if item.typ != itemLeftRound {
item = <-l.Items
if item.Typ != itemLeftRound {
return nil, fmt.Errorf("Expected variable start. Got: %v", item)
}
......@@ -68,21 +80,21 @@ func getRoot(l *lexer) (sg *query.SubGraph, rerr error) {
for {
var key, val string
// Get key or close bracket
item = <-l.items
if item.typ == itemArgName {
key = item.val
} else if item.typ == itemRightRound {
item = <-l.Items
if item.Typ == itemArgName {
key = item.Val
} else if item.Typ == itemRightRound {
break
} else {
return nil, fmt.Errorf("Expecting argument name. Got: %v", item)
}
// Get corresponding value.
item = <-l.items
if item.typ == itemArgVal {
val = item.val
item = <-l.Items
if item.Typ == itemArgVal {
val = item.Val
} else {
return nil, fmt.Errorf("Expecting argument val. Got: %v", item)
return nil, fmt.Errorf("Expecting argument va Got: %v", item)
}
if key == "uid" {
......@@ -96,30 +108,30 @@ func getRoot(l *lexer) (sg *query.SubGraph, rerr error) {
return nil, fmt.Errorf("Expecting uid or xid. Got: %v", item)
}
}
if item.typ != itemRightRound {
if item.Typ != itemRightRound {
return nil, fmt.Errorf("Unexpected token. Got: %v", item)
}
return query.NewGraph(uid, xid)
}
func godeep(l *lexer, sg *query.SubGraph) {
func godeep(l *lex.Lexer, sg *query.SubGraph) {
curp := sg // stores current pointer.
for {
switch item := <-l.items; {
case item.typ == itemName:
switch item := <-l.Items; {
case item.Typ == itemName:
child := new(query.SubGraph)
child.Attr = item.val
child.Attr = item.Val
sg.Children = append(sg.Children, child)
curp = child
case item.typ == itemLeftCurl:
case item.Typ == itemLeftCurl:
godeep(l, curp) // recursive iteration
case item.typ == itemRightCurl:
case item.Typ == itemRightCurl:
return
case item.typ == itemLeftRound:
case item.Typ == itemLeftRound:
// absorb all these, we don't care right now.
for {
item = <-l.items
if item.typ == itemRightRound || item.typ == itemEOF {
item = <-l.Items
if item.Typ == itemRightRound || item.Typ == lex.ItemEOF {
break
}
}
......@@ -127,5 +139,4 @@ func godeep(l *lexer, sg *query.SubGraph) {
// continue
}
}
}
......@@ -48,6 +48,10 @@ func TestParse(t *testing.T) {
if err != nil {
t.Error(err)
}
if sg == nil {
t.Error("subgraph is nil")
return
}
if len(sg.Children) != 4 {
t.Errorf("Expected 4 children. Got: %v", len(sg.Children))
}
......@@ -72,6 +76,7 @@ func TestParse(t *testing.T) {
}
}
/*
func TestParse_error1(t *testing.T) {
query := `
mutation {
......@@ -132,3 +137,4 @@ func TestParse_pass1(t *testing.T) {
t.Errorf("Expected 0. Got: %v", len(sg.Children))
}
}
*/
......@@ -16,180 +16,192 @@
package gql
import "github.com/dgraph-io/dgraph/lex"
const (
leftCurl = '{'
rightCurl = '}'
)
// stateFn represents the state of the scanner as a function that
// returns the next state.
type stateFn func(*lexer) stateFn
const (
itemText lex.ItemType = 5 + iota // plain text
itemLeftCurl // left curly bracket
itemRightCurl // right curly bracket
itemComment // comment
itemName // names
itemOpType // operation type
itemString // quoted string
itemLeftRound // left round bracket
itemRightRound // right round bracket
itemArgName // argument name
itemArgVal // argument val
)
func lexText(l *lexer) stateFn {
func lexText(l *lex.Lexer) lex.StateFn {
Loop:
for {
switch r := l.next(); {
switch r := l.Next(); {
case r == leftCurl:
l.backup()
l.emit(itemText) // emit whatever we have so far.
l.next() // advance one to get back to where we saw leftCurl.
l.depth += 1 // one level down.
l.emit(itemLeftCurl)
l.Backup()
l.Emit(itemText) // emit whatever we have so far.
l.Next() // advance one to get back to where we saw leftCurl.
l.Depth += 1 // one level down.
l.Emit(itemLeftCurl)
return lexInside // we're in.
case r == rightCurl:
return l.errorf("Too many right characters")
case r == EOF:
return l.Errorf("Too many right characters")
case r == lex.EOF:
break Loop
case isNameBegin(r):
l.backup()
l.emit(itemText)
l.Backup()
l.Emit(itemText)
return lexOperationType
}
}
if l.pos > l.start {
l.emit(itemText)
if l.Pos > l.Start {
l.Emit(itemText)
}
l.emit(itemEOF)
l.Emit(lex.ItemEOF)
return nil
}
func lexInside(l *lexer) stateFn {
func lexInside(l *lex.Lexer) lex.StateFn {
for {
switch r := l.next(); {
switch r := l.Next(); {
case r == rightCurl:
l.depth -= 1
l.emit(itemRightCurl)
if l.depth == 0 {
l.Depth -= 1
l.Emit(itemRightCurl)
if l.Depth == 0 {
return lexText
}
case r == leftCurl:
l.depth += 1
l.emit(itemLeftCurl)
case r == EOF:
return l.errorf("unclosed action")
l.Depth += 1
l.Emit(itemLeftCurl)
case r == lex.EOF:
return l.Errorf("unclosed action")
case isSpace(r) || isEndOfLine(r) || r == ',':
l.ignore()
l.Ignore()
case isNameBegin(r):
return lexName
case r == '#':
l.backup()
l.Backup()
return lexComment
case r == '(':
l.emit(itemLeftRound)
l.Emit(itemLeftRound)
return lexArgInside
default:
return l.errorf("Unrecognized character in lexInside: %#U", r)
return l.Errorf("Unrecognized character in lexInside: %#U", r)
}
}
}
func lexName(l *lexer) stateFn {
func lexName(l *lex.Lexer) lex.StateFn {
for {
// The caller already checked isNameBegin, and absorbed one rune.
r := l.next()
r := l.Next()
if isNameSuffix(r) {
continue
}
l.backup()
l.emit(itemName)
l.Backup()
l.Emit(itemName)
break
}
return lexInside
}
func lexComment(l *lexer) stateFn {
func lexComment(l *lex.Lexer) lex.StateFn {
for {
r := l.next()
r := l.Next()
if isEndOfLine(r) {
l.emit(itemComment)
l.Emit(itemComment)
return lexInside
}
if r == EOF {
if r == lex.EOF {
break
}
}
if l.pos > l.start {
l.emit(itemComment)
if l.Pos > l.Start {
l.Emit(itemComment)
}
l.emit(itemEOF)
l.Emit(lex.ItemEOF)
return nil // Stop the run loop.
}
func lexOperationType(l *lexer) stateFn {
func lexOperationType(l *lex.Lexer) lex.StateFn {
for {
r := l.next()
r := l.Next()
if isNameSuffix(r) {
continue // absorb
}
l.backup()
word := l.input[l.start:l.pos]
l.Backup()
word := l.Input[l.Start:l.Pos]
if word == "query" || word == "mutation" {
l.emit(itemOpType)
l.Emit(itemOpType)
}
break
}
return lexText
}
func lexArgInside(l *lexer) stateFn {
func lexArgInside(l *lex.Lexer) lex.StateFn {
for {
switch r := l.next(); {
case r == EOF:
return l.errorf("unclosed argument")
switch r := l.Next(); {
case r == lex.EOF:
return l.Errorf("unclosed argument")
case isSpace(r) || isEndOfLine(r):
l.ignore()
l.Ignore()
case isNameBegin(r):
return lexArgName
case r == ':':
l.ignore()
l.Ignore()
return lexArgVal
case r == ')':
l.emit(itemRightRound)
l.Emit(itemRightRound)
return lexInside
case r == ',':
l.ignore()
l.Ignore()
}
}
}
func lexArgName(l *lexer) stateFn {
func lexArgName(l *lex.Lexer) lex.StateFn {
for {
r := l.next()
r := l.Next()
if isNameSuffix(r) {
continue
}
l.backup()
l.emit(itemArgName)
l.Backup()
l.Emit(itemArgName)
break
}
return lexArgInside
}
func lexArgVal(l *lexer) stateFn {
l.acceptRun(isSpace)
l.ignore() // Any spaces encountered.
func lexArgVal(l *lex.Lexer) lex.StateFn {
l.AcceptRun(isSpace)
l.Ignore() // Any spaces encountered.
for {
r := l.next()
r := l.Next()
if isSpace(r) || isEndOfLine(r) || r == ')' || r == ',' {
l.backup()
l.emit(itemArgVal)
l.Backup()
l.Emit(itemArgVal)
return lexArgInside
}
if r == EOF {
return l.errorf("Reached EOF while reading var value: %v",
l.input[l.start:l.pos])
if r == lex.EOF {
return l.Errorf("Reached lex.EOF while reading var value: %v",
l.Input[l.Start:l.Pos])
}
}
glog.Fatal("This shouldn't be reached.")
return nil
}
func lexArgumentVal(l *lexer) stateFn {
func lexArgumentVal(l *lex.Lexer) lex.StateFn {
for {
switch r := l.next(); {
switch r := l.Next(); {
case isSpace(r):
l.ignore()
l.Ignore()
}
}
}
......
......@@ -19,6 +19,8 @@ package gql
import (
"fmt"
"testing"
"github.com/dgraph-io/dgraph/lex"
)
func TestNewLexer(t *testing.T) {
......@@ -33,8 +35,9 @@ func TestNewLexer(t *testing.T) {
}
}
}`
l := newLexer(input)
for item := range l.items {
l := lex.NewLexer(input)
go run(l)
for item := range l.Items {
fmt.Println(item.String())
}
}
......@@ -14,7 +14,7 @@
* limitations under the License.
*/
package gql
package lex
import (
"fmt"
......@@ -26,131 +26,115 @@ import (
var glog = x.Log("lexer")
type itemType int
const EOF = -1
// ItemType is used to set the type of a token. These constants can be defined
// in the file containing state functions. Note that their value should be >= 5.
type ItemType int
const (
itemEOF itemType = iota
itemError // error
itemText // plain text
itemLeftCurl // left curly bracket
itemRightCurl // right curly bracket
itemComment // comment
itemName // names
itemOpType // operation type
itemString // quoted string
itemLeftRound // left round bracket
itemRightRound // right round bracket
itemArgName // argument name
itemArgVal // argument val
ItemEOF ItemType = iota
ItemError // error
)
const EOF = -1
// stateFn represents the state of the scanner as a function that
// returns the next state.
type StateFn func(*Lexer) StateFn
type item struct {
typ itemType
val string
Typ ItemType
Val string
}
func (i item) String() string {
switch i.typ {
case itemEOF:
switch i.Typ {
case 0:
return "EOF"
case itemError:
return i.val
case itemName:
return fmt.Sprintf("name: [%v]", i.val)
}
return fmt.Sprintf("[%v] %q", i.typ, i.val)
return fmt.Sprintf("[%v] %q", i.Typ, i.Val)
}
type lexer struct {
type Lexer struct {
// NOTE: Using a text scanner wouldn't work because it's designed for parsing
// Golang. It won't keep track of start position, or allow us to retrieve
// slice from [start:pos]. Better to just use normal string.
input string // string being scanned.
start int // start position of this item.
pos int // current position of this item.
width int // width of last rune read from input.
items chan item // channel of scanned items.
depth int // nesting of {}
Input string // string being scanned.
Start int // start position of this item.
Pos int // current position of this item.
Width int // width of last rune read from input.
Items chan item // channel of scanned items.
Depth int // nesting of {}
}
func newLexer(input string) *lexer {
l := &lexer{
input: input,
items: make(chan item),
func NewLexer(input string) *Lexer {
l := &Lexer{
Input: input,
Items: make(chan item),
}
go l.run()
return l
}
func (l *lexer) errorf(format string,
args ...interface{}) stateFn {
l.items <- item{
typ: itemError,
val: fmt.Sprintf(format, args...),
func (l *Lexer) Errorf(format string,
args ...interface{}) StateFn {
l.Items <- item{
Typ: ItemError,
Val: fmt.Sprintf(format, args...),
}
return nil
}
func (l *lexer) emit(t itemType) {
if t != itemEOF && l.pos <= l.start {
// Let itemEOF go through.
// Emit emits the item with it's type information.
func (l *Lexer) Emit(t ItemType) {
if t != ItemEOF && l.Pos <= l.Start {
// Let ItemEOF go through.
glog.WithFields(logrus.Fields{
"start": l.start,
"pos": l.pos,
"start": l.Start,
"pos": l.Pos,
"typ": t,
}).Info("Invalid emit")
return
}
l.items <- item{
typ: t,
val: l.input[l.start:l.pos],
}
l.start = l.pos
}
func (l *lexer) run() {
for state := lexText; state != nil; {
state = state(l)
l.Items <- item{
Typ: t,
Val: l.Input[l.Start:l.Pos],
}
close(l.items) // No more tokens.
l.Start = l.Pos
}
func (l *lexer) next() (result rune) {
if l.pos >= len(l.input) {
l.width = 0
func (l *Lexer) Next() (result rune) {
if l.Pos >= len(l.Input) {
l.Width = 0
return EOF
}
r, w := utf8.DecodeRuneInString(l.input[l.pos:])
l.width = w
l.pos += l.width
r, w := utf8.DecodeRuneInString(l.Input[l.Pos:])
l.Width = w
l.Pos += l.Width
return r
}
func (l *lexer) backup() {
l.pos -= l.width
func (l *Lexer) Backup() {
l.Pos -= l.Width
}
func (l *lexer) peek() rune {
r := l.next()
l.backup()
func (l *Lexer) Peek() rune {
r := l.Next()
l.Backup()
return r
}
func (l *lexer) ignore() {
l.start = l.pos
func (l *Lexer) Ignore() {
l.Start = l.Pos
}
type checkRune func(r rune) bool
func (l *lexer) acceptRun(c checkRune) {
func (l *Lexer) AcceptRun(c checkRune) {
for {
r := l.next()
r := l.Next()
if !c(r) {
break
}
}
l.backup()
l.Backup()
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment