Skip to content
Snippets Groups Projects
main.go 3.25 KiB
Newer Older
  • Learn to ignore specific revisions
  • /*
     * Copyright 2015 Manish R Jain <manishrjain@gmail.com>
     *
     * Licensed under the Apache License, Version 2.0 (the "License");
     * you may not use this file except in compliance with the License.
     * You may obtain a copy of the License at
     *
     * 		http://www.apache.org/licenses/LICENSE-2.0
     *
     * Unless required by applicable law or agreed to in writing, software
     * distributed under the License is distributed on an "AS IS" BASIS,
     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     * See the License for the specific language governing permissions and
     * limitations under the License.
     */
    
    package main
    
    import (
    	"compress/gzip"
    	"flag"
    	"os"
    	"runtime"
    	"runtime/pprof"
    	"strings"
    
    	"github.com/Sirupsen/logrus"
    	"github.com/dgraph-io/dgraph/loader"
    	"github.com/dgraph-io/dgraph/posting"
    	"github.com/dgraph-io/dgraph/store"
    
    	"github.com/dgraph-io/dgraph/uid"
    
    	"github.com/dgraph-io/dgraph/x"
    )
    
    var glog = x.Log("loader_main")
    
    var rdfGzips = flag.String("rdfgzips", "",
    	"Comma separated gzip files containing RDF data")
    
    var instanceIdx = flag.Uint64("instanceIdx", 0,
    	"Only pick entities, where Fingerprint % numInstance == instanceIdx.")
    var numInstances = flag.Uint64("numInstances", 1,
    	"Total number of instances among which uid assigning is shared")
    
    var postingDir = flag.String("postings", "", "Directory to store posting lists")
    
    var uidDir = flag.String("uids", "", "Directory to read UID posting lists")
    
    var cpuprofile = flag.String("cpuprofile", "", "write cpu profile to file")
    
    var memprofile = flag.String("memprofile", "", "write memory profile to file")
    
    var numcpu = flag.Int("numCpu", runtime.NumCPU(),
    	"Number of cores to be used by the process")
    
    
    func main() {
    	flag.Parse()
    	if !flag.Parsed() {
    		glog.Fatal("Unable to parse flags")
    	}
    	if len(*cpuprofile) > 0 {
    		f, err := os.Create(*cpuprofile)
    
    			glog.Fatal(err)
    		}
    		pprof.StartCPUProfile(f)
    		defer pprof.StopCPUProfile()
    	}
    
    	logrus.SetLevel(logrus.InfoLevel)
    
    Ashwin's avatar
    Ashwin committed
    	numCpus := *numcpu
    
    	prevProcs := runtime.GOMAXPROCS(numCpus)
    	glog.WithField("num_cpu", numCpus).
    		WithField("prev_maxprocs", prevProcs).
    		Info("Set max procs to num cpus")
    
    
    	if len(*rdfGzips) == 0 {
    		glog.Fatal("No RDF GZIP files specified")
    	}
    
    	dataStore := new(store.Store)
    	dataStore.Init(*postingDir)
    	defer dataStore.Close()
    
    	uidStore := new(store.Store)
    	uidStore.Init(*uidDir)
    	defer uidStore.Close()
    
    Ashwin's avatar
    Ashwin committed
    
    	posting.Init(nil)
    
    	uid.Init(uidStore)
    	loader.Init(uidStore, dataStore)
    
    
    	files := strings.Split(*rdfGzips, ",")
    	for _, path := range files {
    		if len(path) == 0 {
    			continue
    		}
    		glog.WithField("path", path).Info("Handling...")
    		f, err := os.Open(path)
    		if err != nil {
    			glog.WithError(err).Fatal("Unable to open rdf file.")
    		}
    
    		r, err := gzip.NewReader(f)
    		if err != nil {
    			glog.WithError(err).Fatal("Unable to create gzip reader.")
    		}
    
    
    		count, err := loader.HandleRdfReader(r, *instanceIdx, *numInstances)
    
    			glog.WithError(err).Fatal("While handling rdf reader.")
    
    		}
    		glog.WithField("count", count).Info("RDFs parsed")
    		r.Close()
    		f.Close()
    	}
    	glog.Info("Calling merge lists")
    
    	posting.MergeLists(100 * numCpus) // 100 per core.
    
    
    	if len(*memprofile) > 0 {
    		f, err := os.Create(*memprofile)
    		if err != nil {
    			glog.Fatal(err)
    		}
    		pprof.WriteHeapProfile(f)
    		f.Close()
    	}