From 9a3472b750bfc03b906ebe19506c56cbdb1eb22d Mon Sep 17 00:00:00 2001
From: Manish R Jain <manishrjain@gmail.com>
Date: Thu, 22 Oct 2015 15:03:49 +1100
Subject: [PATCH] More thought spent on how query execution would work.
 Changing things.

---
 query/query.go    | 30 +++++++++++++++++++++++++-----
 query/result.fbs  | 14 ++++++++++++++
 query/thoughts.md | 32 ++++++++++++++++++++++++++++++++
 3 files changed, 71 insertions(+), 5 deletions(-)
 create mode 100644 query/thoughts.md

diff --git a/query/query.go b/query/query.go
index fceda987..9426db65 100644
--- a/query/query.go
+++ b/query/query.go
@@ -46,12 +46,30 @@ import (
 
 var log = x.Log("query")
 
-type Mattr struct {
-	Attr string
-	Msg  *Message
+type SubGraph struct {
+	Attr     string
+	Children []*SubGraph
+
+	Query  []byte
+	Result []byte
+}
+
+func NewGraph(id uint64, xid string) *SubGraph {
+	// This would set the Result field in SubGraph,
+	// and populate the children for attributes.
+	return nil
+}
 
-	ResultUids  []byte // Flatbuffer result.Uids
-	ResultValue []byte // gob.Encode
+type Mattr struct {
+	Attr   string
+	Msg    *Mattr
+	Query  []byte // flatbuffer
+	Result []byte // flatbuffer
+
+	/*
+		ResultUids  []byte // Flatbuffer result.Uids
+		ResultValue []byte // gob.Encode
+	*/
 }
 
 type Message struct {
@@ -130,7 +148,9 @@ func Run(m *Message) error {
 			x.Err(log, err).WithField("uid", m.Id).WithField("attr", mattr.Attr).
 				Error("While extracting data from posting list")
 		}
+
 		if mattr.Msg != nil {
+			// Now this would most likely be sent over wire to other servers.
 			if err := Run(mattr.Msg); err != nil {
 				return err
 			}
diff --git a/query/result.fbs b/query/result.fbs
index 8517d3fb..e75f4fc3 100644
--- a/query/result.fbs
+++ b/query/result.fbs
@@ -4,4 +4,18 @@ table Uids {
 	uid:[ulong];
 }
 
+table TaskQuery {
+	attr:string;
+	uids:[ulong];
+}
+
+struct Value {
+	val:[ubyte];
+}
+
+table TaskResult {
+	uids:[ulong];
+	values:[Value];
+}
+
 root_type Uids;
diff --git a/query/thoughts.md b/query/thoughts.md
new file mode 100644
index 00000000..4e9ee160
--- /dev/null
+++ b/query/thoughts.md
@@ -0,0 +1,32 @@
+How to generate a unique list of uids by querying list of posting lists?
+
+Sol 1:
+- Say there're k posting lists involved.
+- One way to do so is to have a heap of k elements.
+- At each iteration, we pop() an element from the heap (log k)
+- Advance the pointer of that posting list, and retrieve another element (involves mutex read lock)
+- Push() that element into the heap (log k)
+- This would give us O(N*log k), with mutex lock acquired N times.
+- With N=1000 and k=5, this gives us 1000 * ln(5) ~ 1600
+
+Performance Improvements (memory tradeoff) [Sol1a]:
+- We can alleviate the need for mutex locks by copying over all the posting list uids in separate vectors.
+- This would avoid N lock acquisitions, only requiring the best-case scenario of k locks.
+- But this also means all the posting list uids would be stored in memory.
+
+Performance with Memory [Sol1b]:
+- Use k channels, with each channel only maintaining a buffer of say 1000 uids.
+- In fact, keep the read lock acquired during this process, to avoid the posting list from changing during a query.
+- So, basically have a way for a posting list to stream uids to a blocking channel, after having acquired a read lock.
+- Overall this process of merging uids shouldn't take that long anyways; so this won't starve writes, only delay them.
+
+Another way [Sol2]:
+- Pick a posting list, copy all it's uids in one go (one mutex lock)
+- Use a binary tree to store uids. Eliminate duplicates.
+- Iterate over each element in the uids vector, and insert into binary tree. [O(log N) max per insert]
+- Repeat with other posting lists.
+- This would give us O(N log N) complexity, with mutex lock acquired k times.
+- With N=1000 and k=5, this gives us 1000 * ln(1000) ~ 7000
+- Not choosing this path.
+
+Solution: Sol1b
-- 
GitLab