Newer
Older
Manish R Jain
committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
/*
* Copyright 2015 Manish R Jain <manishrjain@gmail.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"compress/gzip"
"flag"
"os"
"runtime"
"runtime/pprof"
"strings"
"github.com/Sirupsen/logrus"
"github.com/dgraph-io/dgraph/commit"
"github.com/dgraph-io/dgraph/loader"
"github.com/dgraph-io/dgraph/posting"
"github.com/dgraph-io/dgraph/store"
"github.com/dgraph-io/dgraph/x"
)
var glog = x.Log("loader_main")
var rdfGzips = flag.String("rdfgzips", "",
"Comma separated gzip files containing RDF data")
var mod = flag.Uint64("mod", 1, "Only pick entities, where uid % mod == 0.")
var numgo = flag.Int("numgo", 4,
"Number of goroutines to use for reading file.")
var postingDir = flag.String("postings", "", "Directory to store posting lists")
var mutationDir = flag.String("mutations", "", "Directory to store mutations")
var cpuprofile = flag.String("cpuprofile", "", "write cpu profile to file")
func main() {
flag.Parse()
if !flag.Parsed() {
glog.Fatal("Unable to parse flags")
}
if len(*cpuprofile) > 0 {
f, err := os.Create(*cpuprofile)
if err != nil {
glog.Fatal(err)
}
pprof.StartCPUProfile(f)
defer pprof.StopCPUProfile()
}
logrus.SetLevel(logrus.InfoLevel)
Manish R Jain
committed
numCpus := runtime.GOMAXPROCS(-1)
glog.WithField("gomaxprocs", numCpus).Info("Number of CPUs")
Manish R Jain
committed
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
if len(*rdfGzips) == 0 {
glog.Fatal("No RDF GZIP files specified")
}
ps := new(store.Store)
ps.Init(*postingDir)
defer ps.Close()
clog := commit.NewLogger(*mutationDir, "dgraph", 50<<20)
clog.SkipWrite = true // Don't write to commit logs.
clog.Init()
defer clog.Close()
posting.Init(ps, clog)
files := strings.Split(*rdfGzips, ",")
for _, path := range files {
if len(path) == 0 {
continue
}
glog.WithField("path", path).Info("Handling...")
f, err := os.Open(path)
if err != nil {
glog.WithError(err).Fatal("Unable to open rdf file.")
}
r, err := gzip.NewReader(f)
if err != nil {
glog.WithError(err).Fatal("Unable to create gzip reader.")
}
count, err := loader.HandleRdfReader(r, *mod)
if err != nil {
glog.Fatal(err)
}
glog.WithField("count", count).Info("RDFs parsed")
r.Close()
f.Close()
}
glog.Info("Calling merge lists")
Manish R Jain
committed
posting.MergeLists(100 * numCpus) // 100 per core.