From 1fd88720911c1eb26db036bfad5370066736af2c Mon Sep 17 00:00:00 2001
From: Manish R Jain <manishrjain@gmail.com>
Date: Wed, 28 Oct 2015 12:26:17 +1100
Subject: [PATCH] fix up left and right curl. Support name, comment and
 operation types.

---
 gql/lexer/lexer.go      |  41 ++++++----
 gql/lexer/lexer_test.go |   6 +-
 gql/lexer/state.go      | 165 +++++++++++++++++++++++++---------------
 3 files changed, 132 insertions(+), 80 deletions(-)

diff --git a/gql/lexer/lexer.go b/gql/lexer/lexer.go
index b5bcd399..4edaa5e2 100644
--- a/gql/lexer/lexer.go
+++ b/gql/lexer/lexer.go
@@ -4,18 +4,25 @@ import (
 	"fmt"
 	"strings"
 	"unicode/utf8"
+
+	"github.com/Sirupsen/logrus"
+	"github.com/manishrjain/dgraph/x"
 )
 
+var glog = x.Log("lexer")
+
 type itemType int
 
 const (
-	itemError itemType = iota
-	itemEOF
-	itemLeftCurl   // left curly bracket
-	itemRightCurl  // right curly bracket
-	itemString     // quoted string
-	itemText       // plain text
-	itemIdentifier // variables
+	itemEOF       itemType = iota
+	itemError              // error
+	itemText               // plain text
+	itemLeftCurl           // left curly bracket
+	itemRightCurl          // right curly bracket
+	itemComment            // comment
+	itemName               // names
+	itemOpType             // operation type
+	itemString             // quoted string
 )
 
 const EOF = -1
@@ -31,15 +38,10 @@ func (i item) String() string {
 		return "EOF"
 	case itemError:
 		return i.val
-	case itemIdentifier:
-		return fmt.Sprintf("var: [%v]", i.val)
+	case itemName:
+		return fmt.Sprintf("name: [%v]", i.val)
 	}
-	/*
-		if len(i.val) > 10 {
-			return fmt.Sprintf("%.10q...", i.val)
-		}
-	*/
-	return fmt.Sprintf("%q", i.val)
+	return fmt.Sprintf("[%v] %q", i.typ, i.val)
 }
 
 type lexer struct {
@@ -73,6 +75,15 @@ func (l *lexer) errorf(format string,
 }
 
 func (l *lexer) emit(t itemType) {
+	if t != itemEOF && l.pos <= l.start {
+		// Let itemEOF go through.
+		glog.WithFields(logrus.Fields{
+			"start": l.start,
+			"pos":   l.pos,
+			"typ":   t,
+		}).Info("Invalid emit")
+		return
+	}
 	l.items <- item{
 		typ: t,
 		val: l.input[l.start:l.pos],
diff --git a/gql/lexer/lexer_test.go b/gql/lexer/lexer_test.go
index c82d4934..ca843d5f 100644
--- a/gql/lexer/lexer_test.go
+++ b/gql/lexer/lexer_test.go
@@ -7,10 +7,10 @@ import (
 
 func TestNewLexer(t *testing.T) {
 	input := `
-	{
+	mutation {
 		me {
-			name
-			city
+			name0 # my name
+			_city, # 0what would fail lex.
 			friends {
 				name
 			}
diff --git a/gql/lexer/state.go b/gql/lexer/state.go
index a4ac8420..c02c0ffd 100644
--- a/gql/lexer/state.go
+++ b/gql/lexer/state.go
@@ -1,13 +1,8 @@
 package gqlex
 
-import (
-	"strings"
-	"unicode"
-)
-
 const (
-	leftCurl  = "{"
-	rightCurl = "}"
+	leftCurl  = '{'
+	rightCurl = '}'
 )
 
 // stateFn represents the state of the scanner as a function that
@@ -15,91 +10,137 @@ const (
 type stateFn func(*lexer) stateFn
 
 func lexText(l *lexer) stateFn {
+Loop:
 	for {
-		if strings.HasPrefix(l.input[l.pos:], leftCurl) {
-			if l.pos > l.start {
-				l.emit(itemText)
-			}
-			return lexLeftCurl
-		}
-		if strings.HasPrefix(l.input[l.pos:], rightCurl) {
-			return l.errorf("Too many right brackets")
-		}
-		if l.next() == EOF {
-			break
+		switch r := l.next(); {
+		case r == leftCurl:
+			l.backup()
+			l.emit(itemText) // emit whatever we have so far.
+			l.next()         // advance one to get back to where we saw leftCurl.
+			l.depth += 1     // one level down.
+			l.emit(itemLeftCurl)
+			return lexInside // we're in.
+
+		case r == rightCurl:
+			return l.errorf("Too many right characters")
+		case r == EOF:
+			break Loop
+		case isNameBegin(r):
+			l.backup()
+			l.emit(itemText)
+			return lexOperationType
 		}
 	}
-	// Correctly reached EOF.
 	if l.pos > l.start {
 		l.emit(itemText)
 	}
 	l.emit(itemEOF)
-	return nil // Stop the run loop.
-}
-
-func lexLeftCurl(l *lexer) stateFn {
-	l.pos += len(leftCurl)
-	l.depth += 1
-	l.emit(itemLeftCurl)
-	return lexInside(l)
-}
-
-func lexRightCurl(l *lexer) stateFn {
-	l.pos += len(rightCurl)
-	l.depth -= 1
-	l.emit(itemRightCurl)
-
-	if l.depth == 0 {
-		return lexText
-	} else {
-		return lexInside
-	}
+	return nil
 }
 
 func lexInside(l *lexer) stateFn {
 	for {
-		if strings.HasPrefix(l.input[l.pos:], rightCurl) {
-			return lexRightCurl
-		}
-		if strings.HasPrefix(l.input[l.pos:], leftCurl) {
-			return lexLeftCurl
-		}
-
 		switch r := l.next(); {
+		case r == rightCurl:
+			l.depth -= 1
+			l.emit(itemRightCurl)
+			if l.depth == 0 {
+				return lexText
+			}
+		case r == leftCurl:
+			l.depth += 1
+			l.emit(itemLeftCurl)
 		case r == EOF:
 			return l.errorf("unclosed action")
-		case isSpace(r) || isEndOfLine(r):
+		case isSpace(r) || isEndOfLine(r) || r == ',':
 			l.ignore()
-		case isAlphaNumeric(r):
+		case isNameBegin(r):
 			l.backup()
-			return lexIdentifier
+			return lexName
+		case r == '#':
+			l.backup()
+			return lexComment
+		default:
+			return l.errorf("Unrecognized character in lexInside: %#U", r)
 		}
 	}
 }
 
-func lexIdentifier(l *lexer) stateFn {
-Loop:
+func lexName(l *lexer) stateFn {
 	for {
-		switch r := l.next(); {
-		case isAlphaNumeric(r):
-			// absorb.
-		default:
-			l.backup()
-			l.emit(itemIdentifier)
-			break Loop
+		// The caller must have already checked isNameBegin.
+		r := l.next()
+		if isNameSuffix(r) {
+			continue
 		}
+		l.backup()
+		l.emit(itemName)
+		break
 	}
 	return lexInside
 }
 
+func lexComment(l *lexer) stateFn {
+	for {
+		r := l.next()
+		if isEndOfLine(r) {
+			l.emit(itemComment)
+			return lexInside
+		}
+		if r == EOF {
+			break
+		}
+	}
+	if l.pos > l.start {
+		l.emit(itemComment)
+	}
+	l.emit(itemEOF)
+	return nil // Stop the run loop.
+}
+
+func lexOperationType(l *lexer) stateFn {
+	for {
+		r := l.next()
+		if isNameSuffix(r) {
+			continue // absorb
+		}
+		l.backup()
+		word := l.input[l.start:l.pos]
+		if word == "query" || word == "mutation" {
+			l.emit(itemOpType)
+		}
+		break
+	}
+	return lexText
+}
+
 func isSpace(r rune) bool {
-	return r == ' ' || r == '\t'
+	return r == '\u0009' || r == '\u0020'
 }
 
 func isEndOfLine(r rune) bool {
-	return r == '\r' || r == '\n'
+	return r == '\u000A' || r == '\u000D'
+}
+
+func isNameBegin(r rune) bool {
+	switch {
+	case r >= 'a' && r <= 'z':
+		return true
+	case r >= 'A' && r <= 'Z':
+		return true
+	case r == '_':
+		return true
+	default:
+		return false
+	}
 }
 
-func isAlphaNumeric(r rune) bool {
-	return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
+func isNameSuffix(r rune) bool {
+	if isNameBegin(r) {
+		return true
+	}
+	if r >= '0' && r <= '9' {
+		return true
+	}
+	return false
 }
-- 
GitLab