Newer
Older
* This file is available under the Apache License, Version 2.0,
* with the Commons Clause restriction.
"fmt"
"math/rand"
"sort"
"github.com/dgraph-io/dgraph/protos/intern"
"github.com/stretchr/testify/require"
func newList(data []uint64) *intern.List {
return &intern.List{data}
require.Equal(t, MergeSorted(input).Uids, []uint64{55})
newList([]uint64{1, 3, 6, 8, 10}),
newList([]uint64{2, 4, 5, 7, 15}),
require.Equal(t, MergeSorted(input).Uids,
newList([]uint64{1, 3, 6, 8, 10}),
newList([]uint64{}),
require.Equal(t, MergeSorted(input).Uids, []uint64{1, 3, 6, 8, 10})
newList([]uint64{}),
newList([]uint64{1, 3, 6, 8, 10}),
require.Equal(t, MergeSorted(input).Uids, []uint64{1, 3, 6, 8, 10})
newList([]uint64{}),
newList([]uint64{}),
require.Empty(t, MergeSorted(input).Uids)
newList([]uint64{11, 13, 16, 18, 20}),
newList([]uint64{12, 14, 15, 15, 16, 16, 17, 25}),
newList([]uint64{1, 2}),
require.Equal(t, MergeSorted(input).Uids,
newList([]uint64{5, 6, 7}),
newList([]uint64{3, 4}),
newList([]uint64{1, 2}),
newList([]uint64{}),
require.Equal(t, MergeSorted(input).Uids, []uint64{1, 2, 3, 4, 5, 6, 7})
require.Empty(t, MergeSorted(input).Uids)
require.Equal(t, MergeSorted(input).Uids, []uint64{1})
newList([]uint64{1, 2, 3, 3, 6}),
newList([]uint64{4, 8, 9}),
require.Equal(t, MergeSorted(input).Uids, []uint64{1, 2, 3, 4, 6, 8, 9})
newList([]uint64{1, 2, 3}),
newList([]uint64{2, 3, 4, 5}),
require.Equal(t, []uint64{2, 3}, IntersectSorted(input).Uids)
require.Equal(t, IntersectSorted(input).Uids, []uint64{1, 2, 3})
require.Empty(t, IntersectSorted(input).Uids)
require.Equal(t, IntersectSorted(input).Uids, []uint64{100, 101})
newList([]uint64{1, 2, 3}),
newList([]uint64{2, 3, 4, 5}),
newList([]uint64{4, 5, 6}),
require.Empty(t, IntersectSorted(input).Uids)
func TestIntersectSorted6(t *testing.T) {
newList([]uint64{10, 12, 13}),
newList([]uint64{2, 3, 4, 13}),
newList([]uint64{4, 5, 6}),
}
require.Empty(t, IntersectSorted(input).Uids)
newList([]uint64{1, 2, 3}),
newList([]uint64{1}),
}
output := Difference(input[0], input[1])
require.Equal(t, []uint64{2, 3}, output.Uids)
}
func TestDiffSorted2(t *testing.T) {
newList([]uint64{1, 2, 3}),
newList([]uint64{2}),
}
output := Difference(input[0], input[1])
require.Equal(t, []uint64{1, 3}, output.Uids)
}
func TestDiffSorted3(t *testing.T) {
newList([]uint64{1, 2, 3}),
newList([]uint64{3}),
}
output := Difference(input[0], input[1])
require.Equal(t, []uint64{1, 2}, output.Uids)
}
func TestDiffSorted4(t *testing.T) {
newList([]uint64{1, 2, 3}),
newList([]uint64{}),
}
output := Difference(input[0], input[1])
require.Equal(t, []uint64{1, 2, 3}, output.Uids)
}
func TestDiffSorted5(t *testing.T) {
newList([]uint64{}),
newList([]uint64{1, 2}),
}
output := Difference(input[0], input[1])
require.Equal(t, []uint64{}, output.Uids)
func TestSubSorted1(t *testing.T) {
newList([]uint64{1, 2, 3}),
newList([]uint64{2, 3, 4, 5}),
}
output := Difference(input[0], input[1])
require.Equal(t, []uint64{1}, output.Uids)
}
func TestSubSorted6(t *testing.T) {
newList([]uint64{10, 12, 13}),
newList([]uint64{2, 3, 4, 13}),
}
output := Difference(input[0], input[1])
require.Equal(t, []uint64{10, 12}, output.Uids)
}
u := newList([]uint64{1, 2, 3})
v := newList([]uint64{})
require.Empty(t, u.Uids)
u := newList([]uint64{1, 2, 3})
v := newList([]uint64{1, 2, 3, 4, 5})
require.Equal(t, []uint64{1, 2, 3}, u.Uids)
require.Equal(t, []uint64{1, 2, 3, 4, 5}, v.Uids)
u := newList([]uint64{1, 2, 3})
v := newList([]uint64{2})
require.Equal(t, []uint64{2}, u.Uids)
require.Equal(t, []uint64{2}, v.Uids)
u := newList([]uint64{1, 2, 3})
v := newList([]uint64{0, 5})
require.Empty(t, u.Uids)
require.Equal(t, []uint64{0, 5}, v.Uids)
u := newList([]uint64{1, 2, 3})
v := newList([]uint64{3, 5})
require.Equal(t, []uint64{3}, u.Uids)
func TestUIDListIntersectDupFirst(t *testing.T) {
u := newList([]uint64{1, 1, 2, 3})
v := newList([]uint64{1, 2})
require.Equal(t, []uint64{1, 2}, u.Uids)
}
func TestUIDListIntersectDupBoth(t *testing.T) {
u := newList([]uint64{1, 1, 2, 3, 5})
v := newList([]uint64{1, 1, 2, 4})
require.Equal(t, []uint64{1, 1, 2}, u.Uids)
}
func TestUIDListIntersectDupSecond(t *testing.T) {
u := newList([]uint64{1, 2, 3, 5})
v := newList([]uint64{1, 1, 2, 4})
require.Equal(t, []uint64{1, 2}, u.Uids)
l := []uint64{1, 2, 3, 4, 5}
u := newList(l)
ApplyFilter(u, func(a uint64, idx int) bool { return (l[idx] % 2) == 1 })
require.Equal(t, []uint64{1, 3, 5}, u.Uids)
// Benchmarks for IntersectWith
func BenchmarkListIntersectRandom(b *testing.B) {
randomTests := func(arrSz int, overlap float64) {
limit := int64(float64(arrSz) / overlap)
u1, v1 := make([]uint64, arrSz, arrSz), make([]uint64, arrSz, arrSz)
for i := 0; i < arrSz; i++ {
u1[i] = uint64(rand.Int63n(limit))
v1[i] = uint64(rand.Int63n(limit))
}
sort.Slice(u1, func(i, j int) bool { return u1[i] < u1[j] })
sort.Slice(v1, func(i, j int) bool { return v1[i] < v1[j] })
dst1 := &intern.List{}
dst2 := &intern.List{}
b.Run(fmt.Sprintf(":size=%d:overlap=%.2f:", arrSz, overlap),
func(b *testing.B) {
for k := 0; k < b.N; k++ {
IntersectWith(u, v, dst1)
}
})
b.Run(fmt.Sprintf(":compressed:size=%d:overlap=%.2f:", arrSz, overlap),
func(b *testing.B) {
for k := 0; k < b.N; k++ {
IntersectCompressedWith(compressedUids, 0, v, dst2)
}
i := 0
j := 0
for i < len(dst1.Uids) {
if dst1.Uids[i] != dst2.Uids[j] {
b.Errorf("Unexpected error in intersection")
}
// Behaviour of bin intersect is not defined when duplicates are present
i = skipDuplicate(dst1.Uids, i)
j = skipDuplicate(dst2.Uids, j)
}
if j < len(dst2.Uids) {
b.Errorf("Unexpected error in intersection")
}
randomTests(10240, 0.3)
randomTests(1024000, 0.3)
randomTests(10240, 0.1)
randomTests(1024000, 0.1)
randomTests(10240, 0.01)
randomTests(1024000, 0.01)
func BenchmarkListIntersectRatio(b *testing.B) {
randomTests := func(sz int, overlap float64) {
sz1 := sz
sz2 := sz
rs := []int{1, 10, 50, 100, 500, 1000, 10000, 100000, 1000000}
for _, r := range rs {
sz1 = sz
sz2 = sz * r
if sz2 > 1000000 {
break
}
u1, v1 := make([]uint64, sz1, sz1), make([]uint64, sz2, sz2)
limit := int64(float64(sz) / overlap)
for i := 0; i < sz1; i++ {
u1[i] = uint64(rand.Int63n(limit))
}
for i := 0; i < sz2; i++ {
v1[i] = uint64(rand.Int63n(limit))
}
sort.Slice(u1, func(i, j int) bool { return u1[i] < u1[j] })
sort.Slice(v1, func(i, j int) bool { return v1[i] < v1[j] })
u := &intern.List{u1}
v := &intern.List{v1}
dst1 := &intern.List{}
dst2 := &intern.List{}
fmt.Printf("len: %d, compressed: %d, bytes/int: %f\n",
len(v1), len(compressedUids), float64(len(compressedUids))/float64(len(v1)))
b.Run(fmt.Sprintf(":IntersectWith:ratio=%d:size=%d:overlap=%.2f:", r, sz, overlap),
func(b *testing.B) {
for k := 0; k < b.N; k++ {
}
})
b.Run(fmt.Sprintf("compressed:IntersectWith:ratio=%d:size=%d:overlap=%.2f:", r, sz, overlap),
func(b *testing.B) {
for k := 0; k < b.N; k++ {
IntersectCompressedWith(compressedUids, 0, u, dst2)
}
})
fmt.Println()
i := 0
j := 0
for i < len(dst1.Uids) {
if dst1.Uids[i] != dst2.Uids[j] {
b.Errorf("Unexpected error in intersection")
}
// Behaviour of bin intersect is not defined when duplicates are present
i = skipDuplicate(dst1.Uids, i)
j = skipDuplicate(dst2.Uids, j)
}
if j < len(dst2.Uids) {
b.Errorf("Unexpected error in intersection")
}
}
}
randomTests(10, 0.01)
randomTests(100, 0.01)
randomTests(1000, 0.01)
randomTests(10000, 0.01)
randomTests(100000, 0.01)
randomTests(1000000, 0.01)
func skipDuplicate(in []uint64, idx int) int {
i := idx + 1
for i < len(in) && in[i] == in[idx] {
i += 1
}
return i