From 1d92022e4bfc984ebecc91cc5f60d509e67296fa Mon Sep 17 00:00:00 2001
From: Manish R Jain <manishrjain@gmail.com>
Date: Mon, 2 Nov 2015 17:42:49 +1100
Subject: [PATCH] working parser

---
 lex/lexer.go      |   2 +-
 rdf/parse.go      |  82 ++++++++++++++++++++++++++++++++
 rdf/parse_test.go | 117 ++++++++++++++++++++++++++++++++++++++++++++++
 rdf/state.go      |   7 ++-
 rdf/state_test.go | 105 -----------------------------------------
 5 files changed, 206 insertions(+), 107 deletions(-)
 create mode 100644 rdf/parse.go
 create mode 100644 rdf/parse_test.go
 delete mode 100644 rdf/state_test.go

diff --git a/lex/lexer.go b/lex/lexer.go
index ec88d570..3fcf191e 100644
--- a/lex/lexer.go
+++ b/lex/lexer.go
@@ -91,7 +91,7 @@ func (l *Lexer) Emit(t ItemType) {
 			"start": l.Start,
 			"pos":   l.Pos,
 			"typ":   t,
-		}).Info("Invalid emit")
+		}).Debug("Invalid emit")
 		return
 	}
 	l.Items <- item{
diff --git a/rdf/parse.go b/rdf/parse.go
new file mode 100644
index 00000000..de9661e7
--- /dev/null
+++ b/rdf/parse.go
@@ -0,0 +1,82 @@
+/*
+ * Copyright 2015 Manish R Jain <manishrjain@gmail.com>
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * 		http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package rdf
+
+import (
+	"fmt"
+
+	"github.com/dgraph-io/dgraph/lex"
+)
+
+type NQuad struct {
+	Subject     string
+	Predicate   string
+	ObjectId    string
+	ObjectValue interface{}
+	Label       string
+	Language    string
+}
+
+func stripBracketsIfPresent(val string) string {
+	if val[0] != '<' {
+		return val
+	}
+	if val[len(val)-1] != '>' {
+		return val
+	}
+	return val[1 : len(val)-1]
+}
+
+func Parse(line string) (rnq NQuad, rerr error) {
+	l := lex.NewLexer(line)
+	go run(l)
+	var oval string
+	for item := range l.Items {
+		if item.Typ == itemSubject {
+			rnq.Subject = stripBracketsIfPresent(item.Val)
+		}
+		if item.Typ == itemPredicate {
+			rnq.Predicate = stripBracketsIfPresent(item.Val)
+		}
+		if item.Typ == itemObject {
+			rnq.ObjectId = stripBracketsIfPresent(item.Val)
+		}
+		if item.Typ == itemLiteral {
+			oval = item.Val
+		}
+		if item.Typ == itemLanguage {
+			rnq.Language = item.Val
+		}
+		if item.Typ == itemObjectType {
+			if len(oval) == 0 {
+				glog.Fatalf(
+					"itemObject should be emitted before itemObjectType. Input: %q", line)
+			}
+			oval += "@@" + stripBracketsIfPresent(item.Val)
+		}
+	}
+	if len(oval) > 0 {
+		rnq.ObjectValue = oval
+	}
+	if len(rnq.Subject) == 0 || len(rnq.Predicate) == 0 {
+		return rnq, fmt.Errorf("Empty required fields in NQuad")
+	}
+	if len(rnq.ObjectId) == 0 && rnq.ObjectValue == nil {
+		return rnq, fmt.Errorf("No Object in NQuad")
+	}
+	return rnq, nil
+}
diff --git a/rdf/parse_test.go b/rdf/parse_test.go
new file mode 100644
index 00000000..2d26986c
--- /dev/null
+++ b/rdf/parse_test.go
@@ -0,0 +1,117 @@
+/*
+ * Copyright 2015 Manish R Jain <manishrjain@gmail.com>
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * 		http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package rdf
+
+import (
+	"reflect"
+	"testing"
+)
+
+var testNQuads = []struct {
+	input  string
+	nq     NQuad
+	hasErr bool
+}{
+	{
+		input: `<some_subject_id> <predicate> <object_id> .`,
+		nq: NQuad{
+			Subject:     "some_subject_id",
+			Predicate:   "predicate",
+			ObjectId:    "object_id",
+			ObjectValue: nil,
+		},
+	},
+	{
+		input: `_:alice <predicate> <object_id> .`,
+		nq: NQuad{
+			Subject:     "_:alice",
+			Predicate:   "predicate",
+			ObjectId:    "object_id",
+			ObjectValue: nil,
+		},
+	},
+	{
+		input: `_:alice <follows> _:bob0 .`,
+		nq: NQuad{
+			Subject:     "_:alice",
+			Predicate:   "follows",
+			ObjectId:    "_:bob0",
+			ObjectValue: nil,
+		},
+	},
+	{
+		input: `_:alice <name> "Alice In Wonderland" .`,
+		nq: NQuad{
+			Subject:     "_:alice",
+			Predicate:   "name",
+			ObjectId:    "",
+			ObjectValue: "Alice In Wonderland",
+		},
+	},
+	{
+		input: `_:alice <name> "Alice In Wonderland"@en-0 .`,
+		nq: NQuad{
+			Subject:     "_:alice",
+			Predicate:   "name",
+			ObjectId:    "",
+			ObjectValue: "Alice In Wonderland",
+			Language:    "en-0",
+		},
+	},
+	{
+		input: `_:alice <age> "013"^^<integer> .`,
+		nq: NQuad{
+			Subject:     "_:alice",
+			Predicate:   "age",
+			ObjectId:    "",
+			ObjectValue: "013@@integer",
+		},
+	},
+	{
+		input: `<http://www.w3.org/2001/sw/RDFCore/ntriples/> <http://purl.org/dc/terms/title> "N-Triples"@en-US .`,
+		nq: NQuad{
+			Subject:     "http://www.w3.org/2001/sw/RDFCore/ntriples/",
+			Predicate:   "http://purl.org/dc/terms/title",
+			ObjectId:    "",
+			ObjectValue: "N-Triples",
+			Language:    "en-US",
+		},
+	},
+	{
+		input: `_:art <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://xmlns.com/foaf/0.1/Person> .`,
+		nq: NQuad{
+			Subject:     "_:art",
+			Predicate:   "http://www.w3.org/1999/02/22-rdf-syntax-ns#type",
+			ObjectId:    "http://xmlns.com/foaf/0.1/Person",
+			ObjectValue: nil,
+		},
+	},
+}
+
+func TestLex(t *testing.T) {
+	for _, test := range testNQuads {
+		rnq, err := Parse(test.input)
+		if test.hasErr {
+			if err == nil {
+				t.Errorf("Expected error for input: %q", test.input)
+			}
+		}
+		if !reflect.DeepEqual(rnq, test.nq) {
+			t.Errorf("Expected %v. Got: %v", test.nq, rnq)
+		}
+	}
+}
diff --git a/rdf/state.go b/rdf/state.go
index 3ed0cdbe..ca8af393 100644
--- a/rdf/state.go
+++ b/rdf/state.go
@@ -18,7 +18,10 @@
 // http://www.w3.org/TR/n-quads/
 package rdf
 
-import "github.com/dgraph-io/dgraph/lex"
+import (
+	"github.com/dgraph-io/dgraph/lex"
+	"github.com/dgraph-io/dgraph/x"
+)
 
 const (
 	itemText       lex.ItemType = 5 + iota // plain text
@@ -38,6 +41,8 @@ const (
 	AT_LABEL
 )
 
+var glog = x.Log("rdf")
+
 func run(l *lex.Lexer) {
 	for state := lexText; state != nil; {
 		state = state(l)
diff --git a/rdf/state_test.go b/rdf/state_test.go
deleted file mode 100644
index 97c33782..00000000
--- a/rdf/state_test.go
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Copyright 2015 Manish R Jain <manishrjain@gmail.com>
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * 		http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package rdf
-
-import (
-	"testing"
-
-	"github.com/dgraph-io/dgraph/lex"
-)
-
-var testNQuads = []struct {
-	input   string
-	entity  string
-	attr    string
-	valueid string
-	value   interface{}
-}{
-	{
-		input:   `<some_subject_id> <predicate> <Object_id> .`,
-		entity:  "<some_subject_id>",
-		attr:    "<predicate>",
-		valueid: "<Object_id>",
-	},
-	{
-		input:   `_:alice <predicate> <Object_id> .`,
-		entity:  "_:alice",
-		attr:    "<predicate>",
-		valueid: "<Object_id>",
-	},
-	{
-		input:   `_:alice <follows> _:bob0 .`,
-		entity:  "_:alice",
-		attr:    "<follows>",
-		valueid: "_:bob0",
-	},
-	{
-		input:   `_:alice <name> "Alice In Wonderland" .`,
-		entity:  "_:alice",
-		attr:    "<name>",
-		valueid: "Alice In Wonderland",
-	},
-	{
-		input:   `_:alice <name> "Alice In Wonderland"@en-0 .`,
-		entity:  "_:alice",
-		attr:    "<name>",
-		valueid: "Alice In Wonderland",
-	},
-	{
-		input:   `_:alice <age> "013"^^<integer> .`,
-		entity:  "_:alice",
-		attr:    "<age>",
-		valueid: "Alice In Wonderland",
-	},
-	{
-		input:   `<http://www.w3.org/2001/sw/RDFCore/ntriples/> <http://purl.org/dc/terms/title> "N-Triples"@en-US .`,
-		entity:  "<http://www.w3.org/2001/sw/RDFCore/ntriples/>",
-		attr:    "<http://purl.org/dc/terms/title>",
-		valueid: "Alice In Wonderland",
-	},
-}
-
-func TestLex(t *testing.T) {
-	for _, test := range testNQuads {
-		l := lex.NewLexer(test.input)
-		go run(l)
-		for item := range l.Items {
-			t.Logf("Item: %v", item)
-			if item.Typ == itemSubject {
-				if item.Val != test.entity {
-					t.Errorf("Expected: %v. Got: %v", test.entity, item.Val)
-				} else {
-					t.Logf("Subject matches")
-				}
-			}
-			if item.Typ == itemPredicate {
-				if item.Val != test.attr {
-					t.Errorf("Expected: %v. Got: %v", test.attr, item.Val)
-				} else {
-					t.Logf("Predicate matches")
-				}
-			}
-			if item.Typ == itemObject {
-				if item.Val != test.valueid {
-					t.Errorf("Expected: %v. Got: %v", test.valueid, item.Val)
-				} else {
-					t.Logf("Object matches")
-				}
-			}
-		}
-	}
-}
-- 
GitLab