From 1d92022e4bfc984ebecc91cc5f60d509e67296fa Mon Sep 17 00:00:00 2001 From: Manish R Jain <manishrjain@gmail.com> Date: Mon, 2 Nov 2015 17:42:49 +1100 Subject: [PATCH] working parser --- lex/lexer.go | 2 +- rdf/parse.go | 82 ++++++++++++++++++++++++++++++++ rdf/parse_test.go | 117 ++++++++++++++++++++++++++++++++++++++++++++++ rdf/state.go | 7 ++- rdf/state_test.go | 105 ----------------------------------------- 5 files changed, 206 insertions(+), 107 deletions(-) create mode 100644 rdf/parse.go create mode 100644 rdf/parse_test.go delete mode 100644 rdf/state_test.go diff --git a/lex/lexer.go b/lex/lexer.go index ec88d570..3fcf191e 100644 --- a/lex/lexer.go +++ b/lex/lexer.go @@ -91,7 +91,7 @@ func (l *Lexer) Emit(t ItemType) { "start": l.Start, "pos": l.Pos, "typ": t, - }).Info("Invalid emit") + }).Debug("Invalid emit") return } l.Items <- item{ diff --git a/rdf/parse.go b/rdf/parse.go new file mode 100644 index 00000000..de9661e7 --- /dev/null +++ b/rdf/parse.go @@ -0,0 +1,82 @@ +/* + * Copyright 2015 Manish R Jain <manishrjain@gmail.com> + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package rdf + +import ( + "fmt" + + "github.com/dgraph-io/dgraph/lex" +) + +type NQuad struct { + Subject string + Predicate string + ObjectId string + ObjectValue interface{} + Label string + Language string +} + +func stripBracketsIfPresent(val string) string { + if val[0] != '<' { + return val + } + if val[len(val)-1] != '>' { + return val + } + return val[1 : len(val)-1] +} + +func Parse(line string) (rnq NQuad, rerr error) { + l := lex.NewLexer(line) + go run(l) + var oval string + for item := range l.Items { + if item.Typ == itemSubject { + rnq.Subject = stripBracketsIfPresent(item.Val) + } + if item.Typ == itemPredicate { + rnq.Predicate = stripBracketsIfPresent(item.Val) + } + if item.Typ == itemObject { + rnq.ObjectId = stripBracketsIfPresent(item.Val) + } + if item.Typ == itemLiteral { + oval = item.Val + } + if item.Typ == itemLanguage { + rnq.Language = item.Val + } + if item.Typ == itemObjectType { + if len(oval) == 0 { + glog.Fatalf( + "itemObject should be emitted before itemObjectType. Input: %q", line) + } + oval += "@@" + stripBracketsIfPresent(item.Val) + } + } + if len(oval) > 0 { + rnq.ObjectValue = oval + } + if len(rnq.Subject) == 0 || len(rnq.Predicate) == 0 { + return rnq, fmt.Errorf("Empty required fields in NQuad") + } + if len(rnq.ObjectId) == 0 && rnq.ObjectValue == nil { + return rnq, fmt.Errorf("No Object in NQuad") + } + return rnq, nil +} diff --git a/rdf/parse_test.go b/rdf/parse_test.go new file mode 100644 index 00000000..2d26986c --- /dev/null +++ b/rdf/parse_test.go @@ -0,0 +1,117 @@ +/* + * Copyright 2015 Manish R Jain <manishrjain@gmail.com> + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package rdf + +import ( + "reflect" + "testing" +) + +var testNQuads = []struct { + input string + nq NQuad + hasErr bool +}{ + { + input: `<some_subject_id> <predicate> <object_id> .`, + nq: NQuad{ + Subject: "some_subject_id", + Predicate: "predicate", + ObjectId: "object_id", + ObjectValue: nil, + }, + }, + { + input: `_:alice <predicate> <object_id> .`, + nq: NQuad{ + Subject: "_:alice", + Predicate: "predicate", + ObjectId: "object_id", + ObjectValue: nil, + }, + }, + { + input: `_:alice <follows> _:bob0 .`, + nq: NQuad{ + Subject: "_:alice", + Predicate: "follows", + ObjectId: "_:bob0", + ObjectValue: nil, + }, + }, + { + input: `_:alice <name> "Alice In Wonderland" .`, + nq: NQuad{ + Subject: "_:alice", + Predicate: "name", + ObjectId: "", + ObjectValue: "Alice In Wonderland", + }, + }, + { + input: `_:alice <name> "Alice In Wonderland"@en-0 .`, + nq: NQuad{ + Subject: "_:alice", + Predicate: "name", + ObjectId: "", + ObjectValue: "Alice In Wonderland", + Language: "en-0", + }, + }, + { + input: `_:alice <age> "013"^^<integer> .`, + nq: NQuad{ + Subject: "_:alice", + Predicate: "age", + ObjectId: "", + ObjectValue: "013@@integer", + }, + }, + { + input: `<http://www.w3.org/2001/sw/RDFCore/ntriples/> <http://purl.org/dc/terms/title> "N-Triples"@en-US .`, + nq: NQuad{ + Subject: "http://www.w3.org/2001/sw/RDFCore/ntriples/", + Predicate: "http://purl.org/dc/terms/title", + ObjectId: "", + ObjectValue: "N-Triples", + Language: "en-US", + }, + }, + { + input: `_:art <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://xmlns.com/foaf/0.1/Person> .`, + nq: NQuad{ + Subject: "_:art", + Predicate: "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", + ObjectId: "http://xmlns.com/foaf/0.1/Person", + ObjectValue: nil, + }, + }, +} + +func TestLex(t *testing.T) { + for _, test := range testNQuads { + rnq, err := Parse(test.input) + if test.hasErr { + if err == nil { + t.Errorf("Expected error for input: %q", test.input) + } + } + if !reflect.DeepEqual(rnq, test.nq) { + t.Errorf("Expected %v. Got: %v", test.nq, rnq) + } + } +} diff --git a/rdf/state.go b/rdf/state.go index 3ed0cdbe..ca8af393 100644 --- a/rdf/state.go +++ b/rdf/state.go @@ -18,7 +18,10 @@ // http://www.w3.org/TR/n-quads/ package rdf -import "github.com/dgraph-io/dgraph/lex" +import ( + "github.com/dgraph-io/dgraph/lex" + "github.com/dgraph-io/dgraph/x" +) const ( itemText lex.ItemType = 5 + iota // plain text @@ -38,6 +41,8 @@ const ( AT_LABEL ) +var glog = x.Log("rdf") + func run(l *lex.Lexer) { for state := lexText; state != nil; { state = state(l) diff --git a/rdf/state_test.go b/rdf/state_test.go deleted file mode 100644 index 97c33782..00000000 --- a/rdf/state_test.go +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright 2015 Manish R Jain <manishrjain@gmail.com> - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package rdf - -import ( - "testing" - - "github.com/dgraph-io/dgraph/lex" -) - -var testNQuads = []struct { - input string - entity string - attr string - valueid string - value interface{} -}{ - { - input: `<some_subject_id> <predicate> <Object_id> .`, - entity: "<some_subject_id>", - attr: "<predicate>", - valueid: "<Object_id>", - }, - { - input: `_:alice <predicate> <Object_id> .`, - entity: "_:alice", - attr: "<predicate>", - valueid: "<Object_id>", - }, - { - input: `_:alice <follows> _:bob0 .`, - entity: "_:alice", - attr: "<follows>", - valueid: "_:bob0", - }, - { - input: `_:alice <name> "Alice In Wonderland" .`, - entity: "_:alice", - attr: "<name>", - valueid: "Alice In Wonderland", - }, - { - input: `_:alice <name> "Alice In Wonderland"@en-0 .`, - entity: "_:alice", - attr: "<name>", - valueid: "Alice In Wonderland", - }, - { - input: `_:alice <age> "013"^^<integer> .`, - entity: "_:alice", - attr: "<age>", - valueid: "Alice In Wonderland", - }, - { - input: `<http://www.w3.org/2001/sw/RDFCore/ntriples/> <http://purl.org/dc/terms/title> "N-Triples"@en-US .`, - entity: "<http://www.w3.org/2001/sw/RDFCore/ntriples/>", - attr: "<http://purl.org/dc/terms/title>", - valueid: "Alice In Wonderland", - }, -} - -func TestLex(t *testing.T) { - for _, test := range testNQuads { - l := lex.NewLexer(test.input) - go run(l) - for item := range l.Items { - t.Logf("Item: %v", item) - if item.Typ == itemSubject { - if item.Val != test.entity { - t.Errorf("Expected: %v. Got: %v", test.entity, item.Val) - } else { - t.Logf("Subject matches") - } - } - if item.Typ == itemPredicate { - if item.Val != test.attr { - t.Errorf("Expected: %v. Got: %v", test.attr, item.Val) - } else { - t.Logf("Predicate matches") - } - } - if item.Typ == itemObject { - if item.Val != test.valueid { - t.Errorf("Expected: %v. Got: %v", test.valueid, item.Val) - } else { - t.Logf("Object matches") - } - } - } - } -} -- GitLab