Skip to content
Snippets Groups Projects
Commit c1032e50 authored by Manish R Jain's avatar Manish R Jain
Browse files

Working implementation of nquad lexing.

parent 1dbfa870
No related branches found
No related tags found
No related merge requests found
......@@ -21,11 +21,14 @@ package rdf
import "github.com/dgraph-io/dgraph/lex"
const (
itemText lex.ItemType = 5 + iota // plain text
itemSubject // subject
itemPredicate // predicate
itemObject // object
itemLabel // label
itemText lex.ItemType = 5 + iota // plain text
itemSubject // subject, 6
itemPredicate // predicate, 7
itemObject // object, 8
itemLabel // label, 9
itemLiteral // literal, 10
itemLanguage // language, 11
itemObjectType // object type, 12
)
const (
......@@ -65,6 +68,15 @@ Loop:
} else {
return l.Errorf("Invalid input: %v at lexText", r)
}
case r == '"':
if l.Depth != AT_OBJECT {
return l.Errorf("Invalid quote for non-object.")
}
l.Backup()
l.Emit(itemText)
return lexObject
case r == '.' || r == lex.EOF:
break Loop
}
......@@ -85,6 +97,29 @@ func lexUntilClosing(l *lex.Lexer, styp lex.ItemType,
return l.Errorf("Unexpected end of subject")
}
if r == '>' {
l.Emit(styp)
return sfn
}
return l.Errorf("Invalid character %v found for itemType: %v", r, styp)
}
// Assumes that the current rune is '_'.
func lexBlankNode(l *lex.Lexer, styp lex.ItemType,
sfn lex.StateFn) lex.StateFn {
r := l.Next()
if r != ':' {
return l.Errorf("Invalid input RDF Blank Node found at pos: %v", r)
}
// RDF Blank Node.
// TODO: At some point do checkings based on the guidelines. For now,
// just accept everything until space.
l.AcceptUntil(isSpace)
r = l.Peek()
if r == lex.EOF {
return l.Errorf("Unexpected end of subject")
}
if isSpace(r) {
l.Emit(styp)
l.Depth += 1
return sfn
......@@ -95,27 +130,12 @@ func lexUntilClosing(l *lex.Lexer, styp lex.ItemType,
func lexSubject(l *lex.Lexer) lex.StateFn {
r := l.Next()
if r == '<' {
l.Depth += 1
return lexUntilClosing(l, itemSubject, lexText)
}
if r == '_' {
r = l.Next()
if r != ':' {
return l.Errorf("Invalid input RDF Blank Node found at pos: %v", r)
}
// RDF Blank Node.
// TODO: At some point do checkings based on the guidelines. For now,
// just accept everything until space.
l.AcceptUntil(isSpace)
r = l.Peek()
if r == lex.EOF {
return l.Errorf("Unexpected end of subject")
}
if isSpace(r) {
l.Emit(itemSubject)
l.Depth += 1
return lexText
}
return lexBlankNode(l, itemSubject, lexText)
}
return l.Errorf("Invalid character during lexSubject: %v", r)
......@@ -126,14 +146,76 @@ func lexPredicate(l *lex.Lexer) lex.StateFn {
if r != '<' {
return l.Errorf("Invalid character in lexPredicate: %v", r)
}
l.Depth += 1
return lexUntilClosing(l, itemPredicate, lexText)
}
func lexLanguage(l *lex.Lexer) lex.StateFn {
r := l.Next()
if r != '@' {
return l.Errorf("Expected @ prefix for lexLanguage")
}
l.Ignore()
r = l.Next()
if !isLangTagPrefix(r) {
return l.Errorf("Invalid language tag prefix: %v", r)
}
l.AcceptRun(isLangTag)
l.Emit(itemLanguage)
return lexText
}
// Assumes '"' has already been encountered.
func lexLiteral(l *lex.Lexer) lex.StateFn {
l.AcceptUntil(isEndLiteral)
l.Emit(itemLiteral)
l.Next() // Move to end literal.
l.Ignore() // Ignore end literal.
l.Depth += 1
r := l.Peek()
if r == '@' {
return lexLanguage(l)
} else if r == '^' {
return lexObjectType(l)
} else {
return lexText
}
}
func lexObjectType(l *lex.Lexer) lex.StateFn {
r := l.Next()
if r != '^' {
return l.Errorf("Expected ^ for lexObjectType")
}
r = l.Next()
if r != '^' {
return l.Errorf("Expected ^^ for lexObjectType")
}
l.Ignore()
r = l.Next()
if r != '<' {
return l.Errorf("Expected < for lexObjectType")
}
return lexUntilClosing(l, itemObjectType, lexText)
}
func lexObject(l *lex.Lexer) lex.StateFn {
r := l.Next()
if r == '<' {
l.Depth += 1
return lexUntilClosing(l, itemObject, lexText)
}
if r == '_' {
return lexBlankNode(l, itemObject, lexText)
}
if r == '"' {
l.Ignore()
return lexLiteral(l)
}
return l.Errorf("Invalid char: %v at lexObject", r)
}
......@@ -144,3 +226,33 @@ func isClosingBracket(r rune) bool {
func isSpace(r rune) bool {
return r == '\u0009' || r == '\u0020'
}
func isEndLiteral(r rune) bool {
return r == '"' || r == '\u000d' || r == '\u000a'
}
func isLangTagPrefix(r rune) bool {
switch {
case r >= 'a' && r <= 'z':
return true
case r >= 'A' && r <= 'Z':
return true
default:
return false
}
}
func isLangTag(r rune) bool {
if isLangTagPrefix(r) {
return true
}
switch {
case r == '-':
return true
case r >= '0' && r <= '9':
return true
default:
return false
}
}
......@@ -41,6 +41,36 @@ var testNQuads = []struct {
attr: "<predicate>",
valueid: "<Object_id>",
},
{
input: `_:alice <follows> _:bob0 .`,
entity: "_:alice",
attr: "<follows>",
valueid: "_:bob0",
},
{
input: `_:alice <name> "Alice In Wonderland" .`,
entity: "_:alice",
attr: "<name>",
valueid: "Alice In Wonderland",
},
{
input: `_:alice <name> "Alice In Wonderland"@en-0 .`,
entity: "_:alice",
attr: "<name>",
valueid: "Alice In Wonderland",
},
{
input: `_:alice <age> "013"^^<integer> .`,
entity: "_:alice",
attr: "<age>",
valueid: "Alice In Wonderland",
},
{
input: `<http://www.w3.org/2001/sw/RDFCore/ntriples/> <http://purl.org/dc/terms/title> "N-Triples"@en-US .`,
entity: "<http://www.w3.org/2001/sw/RDFCore/ntriples/>",
attr: "<http://purl.org/dc/terms/title>",
valueid: "Alice In Wonderland",
},
}
func TestLex(t *testing.T) {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment