Skip to content
Snippets Groups Projects
uidlist_test.go 10 KiB
Newer Older
  • Learn to ignore specific revisions
  • Jay Chiu's avatar
    Jay Chiu committed
    /*
    
     * Copyright 2016-2018 Dgraph Labs, Inc.
    
    Jay Chiu's avatar
    Jay Chiu committed
     *
    
    Manish R Jain's avatar
    Manish R Jain committed
     * This file is available under the Apache License, Version 2.0,
     * with the Commons Clause restriction.
    
    Jay Chiu's avatar
    Jay Chiu committed
     */
    
    package algo
    
    import (
    
    	"github.com/dgraph-io/dgraph/bp128"
    
    	"github.com/dgraph-io/dgraph/protos/intern"
    
    	"github.com/stretchr/testify/require"
    
    Jay Chiu's avatar
    Jay Chiu committed
    )
    
    
    func newList(data []uint64) *intern.List {
    	return &intern.List{data}
    
    Jay Chiu's avatar
    Jay Chiu committed
    func TestMergeSorted1(t *testing.T) {
    
    	input := []*intern.List{
    
    		newList([]uint64{55}),
    
    Jay Chiu's avatar
    Jay Chiu committed
    	}
    
    	require.Equal(t, MergeSorted(input).Uids, []uint64{55})
    
    Jay Chiu's avatar
    Jay Chiu committed
    }
    
    func TestMergeSorted2(t *testing.T) {
    
    	input := []*intern.List{
    
    		newList([]uint64{1, 3, 6, 8, 10}),
    		newList([]uint64{2, 4, 5, 7, 15}),
    
    Jay Chiu's avatar
    Jay Chiu committed
    	}
    
    	require.Equal(t, MergeSorted(input).Uids,
    
    Jay Chiu's avatar
    Jay Chiu committed
    		[]uint64{1, 2, 3, 4, 5, 6, 7, 8, 10, 15})
    
    Jay Chiu's avatar
    Jay Chiu committed
    }
    
    func TestMergeSorted3(t *testing.T) {
    
    	input := []*intern.List{
    
    		newList([]uint64{1, 3, 6, 8, 10}),
    		newList([]uint64{}),
    
    Jay Chiu's avatar
    Jay Chiu committed
    	}
    
    	require.Equal(t, MergeSorted(input).Uids, []uint64{1, 3, 6, 8, 10})
    
    Jay Chiu's avatar
    Jay Chiu committed
    }
    
    func TestMergeSorted4(t *testing.T) {
    
    	input := []*intern.List{
    
    		newList([]uint64{}),
    		newList([]uint64{1, 3, 6, 8, 10}),
    
    Jay Chiu's avatar
    Jay Chiu committed
    	}
    
    	require.Equal(t, MergeSorted(input).Uids, []uint64{1, 3, 6, 8, 10})
    
    Jay Chiu's avatar
    Jay Chiu committed
    }
    
    func TestMergeSorted5(t *testing.T) {
    
    	input := []*intern.List{
    
    		newList([]uint64{}),
    		newList([]uint64{}),
    
    Jay Chiu's avatar
    Jay Chiu committed
    	}
    
    	require.Empty(t, MergeSorted(input).Uids)
    
    Jay Chiu's avatar
    Jay Chiu committed
    }
    
    func TestMergeSorted6(t *testing.T) {
    
    	input := []*intern.List{
    
    		newList([]uint64{11, 13, 16, 18, 20}),
    		newList([]uint64{12, 14, 15, 15, 16, 16, 17, 25}),
    		newList([]uint64{1, 2}),
    
    Jay Chiu's avatar
    Jay Chiu committed
    	}
    
    	require.Equal(t, MergeSorted(input).Uids,
    
    Jay Chiu's avatar
    Jay Chiu committed
    		[]uint64{1, 2, 11, 12, 13, 14, 15, 16, 17, 18, 20, 25})
    
    Jay Chiu's avatar
    Jay Chiu committed
    }
    
    func TestMergeSorted7(t *testing.T) {
    
    	input := []*intern.List{
    
    		newList([]uint64{5, 6, 7}),
    		newList([]uint64{3, 4}),
    		newList([]uint64{1, 2}),
    		newList([]uint64{}),
    
    Jay Chiu's avatar
    Jay Chiu committed
    	}
    
    	require.Equal(t, MergeSorted(input).Uids, []uint64{1, 2, 3, 4, 5, 6, 7})
    
    Jay Chiu's avatar
    Jay Chiu committed
    }
    
    func TestMergeSorted8(t *testing.T) {
    
    	input := []*intern.List{}
    
    	require.Empty(t, MergeSorted(input).Uids)
    
    Jay Chiu's avatar
    Jay Chiu committed
    }
    
    func TestMergeSorted9(t *testing.T) {
    
    	input := []*intern.List{
    
    		newList([]uint64{1, 1, 1}),
    
    Jay Chiu's avatar
    Jay Chiu committed
    	}
    
    	require.Equal(t, MergeSorted(input).Uids, []uint64{1})
    
    Jay Chiu's avatar
    Jay Chiu committed
    }
    
    func TestMergeSorted10(t *testing.T) {
    
    	input := []*intern.List{
    
    		newList([]uint64{1, 2, 3, 3, 6}),
    		newList([]uint64{4, 8, 9}),
    
    Jay Chiu's avatar
    Jay Chiu committed
    	}
    
    	require.Equal(t, MergeSorted(input).Uids, []uint64{1, 2, 3, 4, 6, 8, 9})
    
    Jay Chiu's avatar
    Jay Chiu committed
    }
    
    func TestIntersectSorted1(t *testing.T) {
    
    	input := []*intern.List{
    
    		newList([]uint64{1, 2, 3}),
    		newList([]uint64{2, 3, 4, 5}),
    
    Jay Chiu's avatar
    Jay Chiu committed
    	}
    
    	require.Equal(t, []uint64{2, 3}, IntersectSorted(input).Uids)
    
    Jay Chiu's avatar
    Jay Chiu committed
    }
    
    func TestIntersectSorted2(t *testing.T) {
    
    	input := []*intern.List{
    
    		newList([]uint64{1, 2, 3}),
    
    Jay Chiu's avatar
    Jay Chiu committed
    	}
    
    	require.Equal(t, IntersectSorted(input).Uids, []uint64{1, 2, 3})
    
    Jay Chiu's avatar
    Jay Chiu committed
    }
    
    func TestIntersectSorted3(t *testing.T) {
    
    	input := []*intern.List{}
    
    	require.Empty(t, IntersectSorted(input).Uids)
    
    Jay Chiu's avatar
    Jay Chiu committed
    }
    
    func TestIntersectSorted4(t *testing.T) {
    
    	input := []*intern.List{
    
    		newList([]uint64{100, 101}),
    
    Jay Chiu's avatar
    Jay Chiu committed
    	}
    
    	require.Equal(t, IntersectSorted(input).Uids, []uint64{100, 101})
    
    Jay Chiu's avatar
    Jay Chiu committed
    }
    
    func TestIntersectSorted5(t *testing.T) {
    
    	input := []*intern.List{
    
    		newList([]uint64{1, 2, 3}),
    		newList([]uint64{2, 3, 4, 5}),
    		newList([]uint64{4, 5, 6}),
    
    Jay Chiu's avatar
    Jay Chiu committed
    	}
    
    	require.Empty(t, IntersectSorted(input).Uids)
    
    func TestIntersectSorted6(t *testing.T) {
    
    	input := []*intern.List{
    
    		newList([]uint64{10, 12, 13}),
    		newList([]uint64{2, 3, 4, 13}),
    		newList([]uint64{4, 5, 6}),
    	}
    
    	require.Empty(t, IntersectSorted(input).Uids)
    
    func TestDiffSorted1(t *testing.T) {
    
    	input := []*intern.List{
    
    		newList([]uint64{1, 2, 3}),
    		newList([]uint64{1}),
    	}
    
    	output := Difference(input[0], input[1])
    	require.Equal(t, []uint64{2, 3}, output.Uids)
    
    }
    
    func TestDiffSorted2(t *testing.T) {
    
    	input := []*intern.List{
    
    		newList([]uint64{1, 2, 3}),
    		newList([]uint64{2}),
    	}
    
    	output := Difference(input[0], input[1])
    	require.Equal(t, []uint64{1, 3}, output.Uids)
    
    }
    
    func TestDiffSorted3(t *testing.T) {
    
    	input := []*intern.List{
    
    		newList([]uint64{1, 2, 3}),
    		newList([]uint64{3}),
    	}
    
    	output := Difference(input[0], input[1])
    	require.Equal(t, []uint64{1, 2}, output.Uids)
    
    }
    
    func TestDiffSorted4(t *testing.T) {
    
    	input := []*intern.List{
    
    		newList([]uint64{1, 2, 3}),
    		newList([]uint64{}),
    	}
    
    	output := Difference(input[0], input[1])
    	require.Equal(t, []uint64{1, 2, 3}, output.Uids)
    
    }
    
    func TestDiffSorted5(t *testing.T) {
    
    	input := []*intern.List{
    
    		newList([]uint64{}),
    		newList([]uint64{1, 2}),
    	}
    
    	output := Difference(input[0], input[1])
    	require.Equal(t, []uint64{}, output.Uids)
    
    	input := []*intern.List{
    
    		newList([]uint64{1, 2, 3}),
    		newList([]uint64{2, 3, 4, 5}),
    	}
    
    	output := Difference(input[0], input[1])
    	require.Equal(t, []uint64{1}, output.Uids)
    
    	input := []*intern.List{
    
    		newList([]uint64{10, 12, 13}),
    		newList([]uint64{2, 3, 4, 13}),
    	}
    
    	output := Difference(input[0], input[1])
    	require.Equal(t, []uint64{10, 12}, output.Uids)
    
    Jay Chiu's avatar
    Jay Chiu committed
    func TestUIDListIntersect1(t *testing.T) {
    
    	u := newList([]uint64{1, 2, 3})
    	v := newList([]uint64{})
    
    	IntersectWith(u, v, u)
    
    Jay Chiu's avatar
    Jay Chiu committed
    }
    
    func TestUIDListIntersect2(t *testing.T) {
    
    	u := newList([]uint64{1, 2, 3})
    	v := newList([]uint64{1, 2, 3, 4, 5})
    
    	IntersectWith(u, v, u)
    
    	require.Equal(t, []uint64{1, 2, 3}, u.Uids)
    
    	require.Equal(t, []uint64{1, 2, 3, 4, 5}, v.Uids)
    
    Jay Chiu's avatar
    Jay Chiu committed
    }
    
    func TestUIDListIntersect3(t *testing.T) {
    
    	u := newList([]uint64{1, 2, 3})
    	v := newList([]uint64{2})
    
    	IntersectWith(u, v, u)
    
    	require.Equal(t, []uint64{2}, u.Uids)
    	require.Equal(t, []uint64{2}, v.Uids)
    
    Jay Chiu's avatar
    Jay Chiu committed
    }
    
    func TestUIDListIntersect4(t *testing.T) {
    
    	u := newList([]uint64{1, 2, 3})
    	v := newList([]uint64{0, 5})
    
    	IntersectWith(u, v, u)
    
    	require.Empty(t, u.Uids)
    	require.Equal(t, []uint64{0, 5}, v.Uids)
    
    Jay Chiu's avatar
    Jay Chiu committed
    }
    
    func TestUIDListIntersect5(t *testing.T) {
    
    	u := newList([]uint64{1, 2, 3})
    	v := newList([]uint64{3, 5})
    
    	IntersectWith(u, v, u)
    
    	require.Equal(t, []uint64{3}, u.Uids)
    
    func TestUIDListIntersectDupFirst(t *testing.T) {
    	u := newList([]uint64{1, 1, 2, 3})
    	v := newList([]uint64{1, 2})
    
    	IntersectWith(u, v, u)
    
    	require.Equal(t, []uint64{1, 2}, u.Uids)
    
    }
    
    func TestUIDListIntersectDupBoth(t *testing.T) {
    	u := newList([]uint64{1, 1, 2, 3, 5})
    	v := newList([]uint64{1, 1, 2, 4})
    
    	IntersectWith(u, v, u)
    
    	require.Equal(t, []uint64{1, 1, 2}, u.Uids)
    
    }
    
    func TestUIDListIntersectDupSecond(t *testing.T) {
    	u := newList([]uint64{1, 2, 3, 5})
    	v := newList([]uint64{1, 1, 2, 4})
    
    	IntersectWith(u, v, u)
    
    	require.Equal(t, []uint64{1, 2}, u.Uids)
    
    Jay Chiu's avatar
    Jay Chiu committed
    func TestApplyFilterUint(t *testing.T) {
    
    	l := []uint64{1, 2, 3, 4, 5}
    	u := newList(l)
    	ApplyFilter(u, func(a uint64, idx int) bool { return (l[idx] % 2) == 1 })
    
    	require.Equal(t, []uint64{1, 3, 5}, u.Uids)
    
    Jay Chiu's avatar
    Jay Chiu committed
    }
    
    
    // Benchmarks for IntersectWith
    
    func BenchmarkListIntersectRandom(b *testing.B) {
    	randomTests := func(arrSz int, overlap float64) {
    		limit := int64(float64(arrSz) / overlap)
    		u1, v1 := make([]uint64, arrSz, arrSz), make([]uint64, arrSz, arrSz)
    		for i := 0; i < arrSz; i++ {
    			u1[i] = uint64(rand.Int63n(limit))
    			v1[i] = uint64(rand.Int63n(limit))
    		}
    		sort.Slice(u1, func(i, j int) bool { return u1[i] < u1[j] })
    		sort.Slice(v1, func(i, j int) bool { return v1[i] < v1[j] })
    
    		u := newList(u1)
    		v := newList(v1)
    
    		dst1 := &intern.List{}
    		dst2 := &intern.List{}
    
    		compressedUids := bp128.DeltaPack(u1)
    
    		b.Run(fmt.Sprintf(":size=%d:overlap=%.2f:", arrSz, overlap),
    			func(b *testing.B) {
    				for k := 0; k < b.N; k++ {
    					IntersectWith(u, v, dst1)
    				}
    			})
    
    		b.Run(fmt.Sprintf(":compressed:size=%d:overlap=%.2f:", arrSz, overlap),
    
    				for k := 0; k < b.N; k++ {
    					IntersectCompressedWith(compressedUids, 0, v, dst2)
    				}
    
    		i := 0
    		j := 0
    		for i < len(dst1.Uids) {
    			if dst1.Uids[i] != dst2.Uids[j] {
    				b.Errorf("Unexpected error in intersection")
    			}
    			// Behaviour of bin intersect is not defined when duplicates are present
    			i = skipDuplicate(dst1.Uids, i)
    			j = skipDuplicate(dst2.Uids, j)
    		}
    		if j < len(dst2.Uids) {
    			b.Errorf("Unexpected error in intersection")
    		}
    
    	randomTests(10240, 0.3)
    	randomTests(1024000, 0.3)
    	randomTests(10240, 0.1)
    	randomTests(1024000, 0.1)
    	randomTests(10240, 0.01)
    	randomTests(1024000, 0.01)
    
    
    func BenchmarkListIntersectRatio(b *testing.B) {
    	randomTests := func(sz int, overlap float64) {
    		sz1 := sz
    		sz2 := sz
    		rs := []int{1, 10, 50, 100, 500, 1000, 10000, 100000, 1000000}
    		for _, r := range rs {
    			sz1 = sz
    			sz2 = sz * r
    			if sz2 > 1000000 {
    				break
    			}
    
    			u1, v1 := make([]uint64, sz1, sz1), make([]uint64, sz2, sz2)
    			limit := int64(float64(sz) / overlap)
    			for i := 0; i < sz1; i++ {
    				u1[i] = uint64(rand.Int63n(limit))
    			}
    			for i := 0; i < sz2; i++ {
    				v1[i] = uint64(rand.Int63n(limit))
    			}
    
    Ashish Negi's avatar
    Ashish Negi committed
    			sort.Slice(u1, func(i, j int) bool { return u1[i] < u1[j] })
    			sort.Slice(v1, func(i, j int) bool { return v1[i] < v1[j] })
    
    			u := &intern.List{u1}
    			v := &intern.List{v1}
    			dst1 := &intern.List{}
    			dst2 := &intern.List{}
    
    			compressedUids := bp128.DeltaPack(v1)
    
    			fmt.Printf("len: %d, compressed: %d, bytes/int: %f\n",
    				len(v1), len(compressedUids), float64(len(compressedUids))/float64(len(v1)))
    
    			b.Run(fmt.Sprintf(":IntersectWith:ratio=%d:size=%d:overlap=%.2f:", r, sz, overlap),
    				func(b *testing.B) {
    					for k := 0; k < b.N; k++ {
    
    						IntersectWith(u, v, dst1)
    
    			b.Run(fmt.Sprintf("compressed:IntersectWith:ratio=%d:size=%d:overlap=%.2f:", r, sz, overlap),
    				func(b *testing.B) {
    					for k := 0; k < b.N; k++ {
    						IntersectCompressedWith(compressedUids, 0, u, dst2)
    					}
    				})
    			fmt.Println()
    			i := 0
    			j := 0
    			for i < len(dst1.Uids) {
    				if dst1.Uids[i] != dst2.Uids[j] {
    					b.Errorf("Unexpected error in intersection")
    				}
    				// Behaviour of bin intersect is not defined when duplicates are present
    				i = skipDuplicate(dst1.Uids, i)
    				j = skipDuplicate(dst2.Uids, j)
    			}
    			if j < len(dst2.Uids) {
    				b.Errorf("Unexpected error in intersection")
    			}
    
    		}
    	}
    
    	randomTests(10, 0.01)
    	randomTests(100, 0.01)
    	randomTests(1000, 0.01)
    	randomTests(10000, 0.01)
    	randomTests(100000, 0.01)
    	randomTests(1000000, 0.01)
    
    func skipDuplicate(in []uint64, idx int) int {
    	i := idx + 1
    	for i < len(in) && in[i] == in[idx] {
    		i += 1
    	}
    	return i