From 0f905d99813476d29d44d65c59acd1e63d82a51e Mon Sep 17 00:00:00 2001 From: Manish R Jain <manishrjain@gmail.com> Date: Mon, 30 Nov 2015 11:58:31 +1100 Subject: [PATCH] Update README to include direct installation instructions. Also fix a bug wrt flatbuffers usage --- README.md | 59 ++++++++++++++++++++++++++++++++++++++++++-------- query/query.go | 2 +- 2 files changed, 51 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 0e96975e..5cddf18a 100644 --- a/README.md +++ b/README.md @@ -2,8 +2,40 @@ Distributed Graph Serving System # Installation -DGraph depends on [RocksDB](https://github.com/facebook/rocksdb). -So, install that first. + +## Via Docker +There's a docker image that you can readily use. +``` +$ docker pull dgraph/dgraph:latest +$ docker run -t -i -v /somedir:/dgraph -v $HOME/go/src/github.com/dgraph-io/benchmarks/data:/data -p 8080:8080 dgraph/dgraph:latest +``` + +Once into the dgraph container, you can now load your data. Also see [Data Loading](#data-loading) below. +``` +$ loader --postings /dgraph/p --rdfgzips /data/rdf-data.gzip --max_ram_mb 3000 +``` +Once done, you can start the server +``` +$ mkdir /dgraph/m # Create the mutations directory first. +$ server --postings /dgraph/p --mutations /dgraph/m --max_ram_mb 3000 +``` + +Now you can query the server, like so: +``` +$ curl localhost:8080/query -XPOST -d '{root(_xid_: g.11b7nwjrxk) {type.object.name.en}}' +``` + +## Directly on host machine +Best way to do this is to refer to [Dockerfile](Dockerfile), which has the most complete +instructions on getting the right setup. +All the instructions below are based on a Debian/Ubuntu system. + +### Install Go 1.4 +Go 1.5 has a regression bug in `cgo`, due to which DGraph is dependent on Go1.4. +So [download and install Go 1.4.3](https://golang.org/dl/). + +### Install RocksDB +DGraph depends on [RocksDB](https://github.com/facebook/rocksdb) for storing posting lists. ``` # First install dependencies. @@ -16,16 +48,22 @@ $ make shared_lib $ sudo make install ``` -This would install RocksDB library in /usr/local/lib. Make sure that your `LD_LIBRARY_PATH` is correctly pointing to it. +This would install RocksDB library in `/usr/local/lib`. Make sure that your `LD_LIBRARY_PATH` is correctly pointing to it. ``` # In ~/.bashrc export LD_LIBRARY_PATH="/usr/local/lib" ``` -Now get [dgraph](https://github.com/dgraph-io/dgraph) code: +### Install DGraph +Now get [DGraph](https://github.com/dgraph-io/dgraph) code. DGraph uses `glock` to fix dependency versions. ``` +go get -v github.com/robfig/glock go get -v github.com/dgraph-io/dgraph/... +glock sync github.com/dgraph-io/dgraph + +# Optional +go test github.com/dgraph-io/dgraph/... ``` # Usage @@ -54,11 +92,12 @@ are applied in layers above posting lists. While loader doesn't write to disk every time a mutation happens, it does periodically merge all the mutations to posting lists, and writes them to rocksdb which persists them. How often this merging happens can be fine tuned by specifying `max_ram_mb`. -Every time loader determines it exceeds this threshold, it would *stop the world*, and start the merge process. -The more memory is available for loader to work with, the less merging needs to be done, the faster the loading. +Periodically loader checks it's memory usage and if determines it exceeds this threshold, +it would *stop the world*, and start the merge process. +The more memory is available for loader to work with, the less frequently merging needs to be done, the faster the loading. -Thus, loader performance is highly dependent on merging performance, which depends on how fast the underlying persistent storage is. -So, *RAMFS/TMPFS > SSD > Hard disk*, when it comes to loading performance. +In other words, loader performance is highly dependent on merging performance, which depends on how fast the underlying persistent storage is. +So, *Ramfs/Tmpfs > SSD > Hard disk*, when it comes to loading performance. As a reference point, it takes 220 seconds to load 4.1M RDFs from `names.gz`(from benchmarks repository) on my 6-core Intel Xeon Dell Precision T3500, using 1G TMPFS for postings directory, and with `max_ram_mb=3000` flag set. @@ -98,4 +137,6 @@ me(_xid_: m.06pj8) { ``` This query would find all movies directed by Steven Spielberg, their names, initial release dates, countries, genres, and the cast of these movies, i.e. characteres and actors playing those characters; and all the movies directed by these actors, if any. -The support for GraphQL is very limited right now. In particular, mutations, fragments etc. via GraphQL aren't supported. You can conveniently browse [Freebase film schema here](http://www.freebase.com/film/film?schema=&lang=en). There're also some pointers in dgraph-io/benchmarks/data/README.md. +The support for GraphQL is very limited right now. In particular, mutations, fragments etc. via GraphQL aren't supported. +You can conveniently browse [Freebase film schema here](http://www.freebase.com/film/film?schema=&lang=en). +There're also some schema pointers in [README](https://github.com/dgraph-io/benchmarks/blob/master/data/README.md). diff --git a/query/query.go b/query/query.go index d28a0feb..2d1ccb20 100644 --- a/query/query.go +++ b/query/query.go @@ -278,8 +278,8 @@ func NewGraph(euid uint64, exid string) (*SubGraph, error) { // Also need to add nil value to keep this consistent. var voffset flatbuffers.UOffsetT { - task.ValueStart(b) bvo := b.CreateByteVector(x.Nilbyte) + task.ValueStart(b) task.ValueAddVal(b, bvo) voffset = task.ValueEnd(b) } -- GitLab