init
This commit is contained in:
24
vendor/github.com/willf/bloom/LICENSE
generated
vendored
Normal file
24
vendor/github.com/willf/bloom/LICENSE
generated
vendored
Normal file
@ -0,0 +1,24 @@
|
||||
Copyright (c) 2014 Will Fitzgerald. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
197
vendor/github.com/willf/bloom/Makefile
generated
vendored
Normal file
197
vendor/github.com/willf/bloom/Makefile
generated
vendored
Normal file
@ -0,0 +1,197 @@
|
||||
# MAKEFILE
|
||||
#
|
||||
# @author Nicola Asuni <info@tecnick.com>
|
||||
# @link https://github.com/willf/bloom
|
||||
# ------------------------------------------------------------------------------
|
||||
|
||||
# List special make targets that are not associated with files
|
||||
.PHONY: help all test format fmtcheck vet lint coverage cyclo ineffassign misspell structcheck varcheck errcheck gosimple astscan qa deps clean nuke
|
||||
|
||||
# Use bash as shell (Note: Ubuntu now uses dash which doesn't support PIPESTATUS).
|
||||
SHELL=/bin/bash
|
||||
|
||||
# CVS path (path to the parent dir containing the project)
|
||||
CVSPATH=github.com/willf
|
||||
|
||||
# Project owner
|
||||
OWNER=willf
|
||||
|
||||
# Project vendor
|
||||
VENDOR=willf
|
||||
|
||||
# Project name
|
||||
PROJECT=bloom
|
||||
|
||||
# Project version
|
||||
VERSION=$(shell cat VERSION)
|
||||
|
||||
# Name of RPM or DEB package
|
||||
PKGNAME=${VENDOR}-${PROJECT}
|
||||
|
||||
# Current directory
|
||||
CURRENTDIR=$(shell pwd)
|
||||
|
||||
# GO lang path
|
||||
ifneq ($(GOPATH),)
|
||||
ifeq ($(findstring $(GOPATH),$(CURRENTDIR)),)
|
||||
# the defined GOPATH is not valid
|
||||
GOPATH=
|
||||
endif
|
||||
endif
|
||||
ifeq ($(GOPATH),)
|
||||
# extract the GOPATH
|
||||
GOPATH=$(firstword $(subst /src/, ,$(CURRENTDIR)))
|
||||
endif
|
||||
|
||||
# --- MAKE TARGETS ---
|
||||
|
||||
# Display general help about this command
|
||||
help:
|
||||
@echo ""
|
||||
@echo "$(PROJECT) Makefile."
|
||||
@echo "GOPATH=$(GOPATH)"
|
||||
@echo "The following commands are available:"
|
||||
@echo ""
|
||||
@echo " make qa : Run all the tests"
|
||||
@echo " make test : Run the unit tests"
|
||||
@echo ""
|
||||
@echo " make format : Format the source code"
|
||||
@echo " make fmtcheck : Check if the source code has been formatted"
|
||||
@echo " make vet : Check for suspicious constructs"
|
||||
@echo " make lint : Check for style errors"
|
||||
@echo " make coverage : Generate the coverage report"
|
||||
@echo " make cyclo : Generate the cyclomatic complexity report"
|
||||
@echo " make ineffassign : Detect ineffectual assignments"
|
||||
@echo " make misspell : Detect commonly misspelled words in source files"
|
||||
@echo " make structcheck : Find unused struct fields"
|
||||
@echo " make varcheck : Find unused global variables and constants"
|
||||
@echo " make errcheck : Check that error return values are used"
|
||||
@echo " make gosimple : Suggest code simplifications"
|
||||
@echo " make astscan : GO AST scanner"
|
||||
@echo ""
|
||||
@echo " make docs : Generate source code documentation"
|
||||
@echo ""
|
||||
@echo " make deps : Get the dependencies"
|
||||
@echo " make clean : Remove any build artifact"
|
||||
@echo " make nuke : Deletes any intermediate file"
|
||||
@echo ""
|
||||
|
||||
# Alias for help target
|
||||
all: help
|
||||
|
||||
# Run the unit tests
|
||||
test:
|
||||
@mkdir -p target/test
|
||||
@mkdir -p target/report
|
||||
GOPATH=$(GOPATH) \
|
||||
go test \
|
||||
-covermode=atomic \
|
||||
-bench=. \
|
||||
-race \
|
||||
-cpuprofile=target/report/cpu.out \
|
||||
-memprofile=target/report/mem.out \
|
||||
-mutexprofile=target/report/mutex.out \
|
||||
-coverprofile=target/report/coverage.out \
|
||||
-v ./... | \
|
||||
tee >(PATH=$(GOPATH)/bin:$(PATH) go-junit-report > target/test/report.xml); \
|
||||
test $${PIPESTATUS[0]} -eq 0
|
||||
|
||||
# Format the source code
|
||||
format:
|
||||
@find . -type f -name "*.go" -exec gofmt -s -w {} \;
|
||||
|
||||
# Check if the source code has been formatted
|
||||
fmtcheck:
|
||||
@mkdir -p target
|
||||
@find . -type f -name "*.go" -exec gofmt -s -d {} \; | tee target/format.diff
|
||||
@test ! -s target/format.diff || { echo "ERROR: the source code has not been formatted - please use 'make format' or 'gofmt'"; exit 1; }
|
||||
|
||||
# Check for syntax errors
|
||||
vet:
|
||||
GOPATH=$(GOPATH) go vet .
|
||||
|
||||
# Check for style errors
|
||||
lint:
|
||||
GOPATH=$(GOPATH) PATH=$(GOPATH)/bin:$(PATH) golint .
|
||||
|
||||
# Generate the coverage report
|
||||
coverage:
|
||||
@mkdir -p target/report
|
||||
GOPATH=$(GOPATH) \
|
||||
go tool cover -html=target/report/coverage.out -o target/report/coverage.html
|
||||
|
||||
# Report cyclomatic complexity
|
||||
cyclo:
|
||||
@mkdir -p target/report
|
||||
GOPATH=$(GOPATH) gocyclo -avg ./ | tee target/report/cyclo.txt ; test $${PIPESTATUS[0]} -eq 0
|
||||
|
||||
# Detect ineffectual assignments
|
||||
ineffassign:
|
||||
@mkdir -p target/report
|
||||
GOPATH=$(GOPATH) ineffassign ./ | tee target/report/ineffassign.txt ; test $${PIPESTATUS[0]} -eq 0
|
||||
|
||||
# Detect commonly misspelled words in source files
|
||||
misspell:
|
||||
@mkdir -p target/report
|
||||
GOPATH=$(GOPATH) misspell -error ./ | tee target/report/misspell.txt ; test $${PIPESTATUS[0]} -eq 0
|
||||
|
||||
# Find unused struct fields
|
||||
structcheck:
|
||||
@mkdir -p target/report
|
||||
GOPATH=$(GOPATH) structcheck -a ./ | tee target/report/structcheck.txt
|
||||
|
||||
# Find unused global variables and constants
|
||||
varcheck:
|
||||
@mkdir -p target/report
|
||||
GOPATH=$(GOPATH) varcheck -e ./ | tee target/report/varcheck.txt
|
||||
|
||||
# Check that error return values are used
|
||||
errcheck:
|
||||
@mkdir -p target/report
|
||||
GOPATH=$(GOPATH) errcheck ./ | tee target/report/errcheck.txt
|
||||
|
||||
# Suggest code simplifications
|
||||
gosimple:
|
||||
@mkdir -p target/report
|
||||
GOPATH=$(GOPATH) gosimple ./ | tee target/report/gosimple.txt
|
||||
|
||||
# AST scanner
|
||||
astscan:
|
||||
@mkdir -p target/report
|
||||
GOPATH=$(GOPATH) gas .//*.go | tee target/report/astscan.txt ; test $${PIPESTATUS[0]} -eq 0
|
||||
|
||||
# Generate source docs
|
||||
docs:
|
||||
@mkdir -p target/docs
|
||||
nohup sh -c 'GOPATH=$(GOPATH) godoc -http=127.0.0.1:6060' > target/godoc_server.log 2>&1 &
|
||||
wget --directory-prefix=target/docs/ --execute robots=off --retry-connrefused --recursive --no-parent --adjust-extension --page-requisites --convert-links http://127.0.0.1:6060/pkg/github.com/${VENDOR}/${PROJECT}/ ; kill -9 `lsof -ti :6060`
|
||||
@echo '<html><head><meta http-equiv="refresh" content="0;./127.0.0.1:6060/pkg/'${CVSPATH}'/'${PROJECT}'/index.html"/></head><a href="./127.0.0.1:6060/pkg/'${CVSPATH}'/'${PROJECT}'/index.html">'${PKGNAME}' Documentation ...</a></html>' > target/docs/index.html
|
||||
|
||||
# Alias to run all quality-assurance checks
|
||||
qa: fmtcheck test vet lint coverage cyclo ineffassign misspell structcheck varcheck errcheck gosimple astscan
|
||||
|
||||
# --- INSTALL ---
|
||||
|
||||
# Get the dependencies
|
||||
deps:
|
||||
GOPATH=$(GOPATH) go get ./...
|
||||
GOPATH=$(GOPATH) go get github.com/golang/lint/golint
|
||||
GOPATH=$(GOPATH) go get github.com/jstemmer/go-junit-report
|
||||
GOPATH=$(GOPATH) go get github.com/axw/gocov/gocov
|
||||
GOPATH=$(GOPATH) go get github.com/fzipp/gocyclo
|
||||
GOPATH=$(GOPATH) go get github.com/gordonklaus/ineffassign
|
||||
GOPATH=$(GOPATH) go get github.com/client9/misspell/cmd/misspell
|
||||
GOPATH=$(GOPATH) go get github.com/opennota/check/cmd/structcheck
|
||||
GOPATH=$(GOPATH) go get github.com/opennota/check/cmd/varcheck
|
||||
GOPATH=$(GOPATH) go get github.com/kisielk/errcheck
|
||||
GOPATH=$(GOPATH) go get honnef.co/go/tools/cmd/gosimple
|
||||
GOPATH=$(GOPATH) go get github.com/GoASTScanner/gas
|
||||
|
||||
# Remove any build artifact
|
||||
clean:
|
||||
GOPATH=$(GOPATH) go clean ./...
|
||||
|
||||
# Deletes any intermediate file
|
||||
nuke:
|
||||
rm -rf ./target
|
||||
GOPATH=$(GOPATH) go clean -i ./...
|
69
vendor/github.com/willf/bloom/README.md
generated
vendored
Normal file
69
vendor/github.com/willf/bloom/README.md
generated
vendored
Normal file
@ -0,0 +1,69 @@
|
||||
Bloom filters
|
||||
-------------
|
||||
|
||||
[](https://travis-ci.org/willf/bloom?branch=master)
|
||||
[](https://coveralls.io/github/willf/bloom?branch=master)
|
||||
[](https://goreportcard.com/report/github.com/willf/bloom)
|
||||
[](http://godoc.org/github.com/willf/bloom)
|
||||
|
||||
A Bloom filter is a representation of a set of _n_ items, where the main
|
||||
requirement is to make membership queries; _i.e._, whether an item is a
|
||||
member of a set.
|
||||
|
||||
A Bloom filter has two parameters: _m_, a maximum size (typically a reasonably large multiple of the cardinality of the set to represent) and _k_, the number of hashing functions on elements of the set. (The actual hashing functions are important, too, but this is not a parameter for this implementation). A Bloom filter is backed by a [BitSet](https://github.com/willf/bitset); a key is represented in the filter by setting the bits at each value of the hashing functions (modulo _m_). Set membership is done by _testing_ whether the bits at each value of the hashing functions (again, modulo _m_) are set. If so, the item is in the set. If the item is actually in the set, a Bloom filter will never fail (the true positive rate is 1.0); but it is susceptible to false positives. The art is to choose _k_ and _m_ correctly.
|
||||
|
||||
In this implementation, the hashing functions used is [murmurhash](github.com/spaolacci/murmur3), a non-cryptographic hashing function.
|
||||
|
||||
This implementation accepts keys for setting and testing as `[]byte`. Thus, to
|
||||
add a string item, `"Love"`:
|
||||
|
||||
n := uint(1000)
|
||||
filter := bloom.New(20*n, 5) // load of 20, 5 keys
|
||||
filter.Add([]byte("Love"))
|
||||
|
||||
Similarly, to test if `"Love"` is in bloom:
|
||||
|
||||
if filter.Test([]byte("Love"))
|
||||
|
||||
For numeric data, I recommend that you look into the encoding/binary library. But, for example, to add a `uint32` to the filter:
|
||||
|
||||
i := uint32(100)
|
||||
n1 := make([]byte, 4)
|
||||
binary.BigEndian.PutUint32(n1, i)
|
||||
filter.Add(n1)
|
||||
|
||||
Finally, there is a method to estimate the false positive rate of a particular
|
||||
bloom filter for a set of size _n_:
|
||||
|
||||
if filter.EstimateFalsePositiveRate(1000) > 0.001
|
||||
|
||||
Given the particular hashing scheme, it's best to be empirical about this. Note
|
||||
that estimating the FP rate will clear the Bloom filter.
|
||||
|
||||
Discussion here: [Bloom filter](https://groups.google.com/d/topic/golang-nuts/6MktecKi1bE/discussion)
|
||||
|
||||
Godoc documentation: https://godoc.org/github.com/willf/bloom
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
go get -u github.com/willf/bloom
|
||||
```
|
||||
|
||||
## Contributing
|
||||
|
||||
If you wish to contribute to this project, please branch and issue a pull request against master ("[GitHub Flow](https://guides.github.com/introduction/flow/)")
|
||||
|
||||
This project include a Makefile that allows you to test and build the project with simple commands.
|
||||
To see all available options:
|
||||
```bash
|
||||
make help
|
||||
```
|
||||
|
||||
## Running all tests
|
||||
|
||||
Before committing the code, please check if it passes all tests using (note: this will install some dependencies):
|
||||
```bash
|
||||
make deps
|
||||
make qa
|
||||
```
|
1
vendor/github.com/willf/bloom/VERSION
generated
vendored
Normal file
1
vendor/github.com/willf/bloom/VERSION
generated
vendored
Normal file
@ -0,0 +1 @@
|
||||
2.0.3
|
362
vendor/github.com/willf/bloom/bloom.go
generated
vendored
Normal file
362
vendor/github.com/willf/bloom/bloom.go
generated
vendored
Normal file
@ -0,0 +1,362 @@
|
||||
/*
|
||||
Package bloom provides data structures and methods for creating Bloom filters.
|
||||
|
||||
A Bloom filter is a representation of a set of _n_ items, where the main
|
||||
requirement is to make membership queries; _i.e._, whether an item is a
|
||||
member of a set.
|
||||
|
||||
A Bloom filter has two parameters: _m_, a maximum size (typically a reasonably large
|
||||
multiple of the cardinality of the set to represent) and _k_, the number of hashing
|
||||
functions on elements of the set. (The actual hashing functions are important, too,
|
||||
but this is not a parameter for this implementation). A Bloom filter is backed by
|
||||
a BitSet; a key is represented in the filter by setting the bits at each value of the
|
||||
hashing functions (modulo _m_). Set membership is done by _testing_ whether the
|
||||
bits at each value of the hashing functions (again, modulo _m_) are set. If so,
|
||||
the item is in the set. If the item is actually in the set, a Bloom filter will
|
||||
never fail (the true positive rate is 1.0); but it is susceptible to false
|
||||
positives. The art is to choose _k_ and _m_ correctly.
|
||||
|
||||
In this implementation, the hashing functions used is murmurhash,
|
||||
a non-cryptographic hashing function.
|
||||
|
||||
This implementation accepts keys for setting as testing as []byte. Thus, to
|
||||
add a string item, "Love":
|
||||
|
||||
uint n = 1000
|
||||
filter := bloom.New(20*n, 5) // load of 20, 5 keys
|
||||
filter.Add([]byte("Love"))
|
||||
|
||||
Similarly, to test if "Love" is in bloom:
|
||||
|
||||
if filter.Test([]byte("Love"))
|
||||
|
||||
For numeric data, I recommend that you look into the binary/encoding library. But,
|
||||
for example, to add a uint32 to the filter:
|
||||
|
||||
i := uint32(100)
|
||||
n1 := make([]byte,4)
|
||||
binary.BigEndian.PutUint32(n1,i)
|
||||
f.Add(n1)
|
||||
|
||||
Finally, there is a method to estimate the false positive rate of a particular
|
||||
Bloom filter for a set of size _n_:
|
||||
|
||||
if filter.EstimateFalsePositiveRate(1000) > 0.001
|
||||
|
||||
Given the particular hashing scheme, it's best to be empirical about this. Note
|
||||
that estimating the FP rate will clear the Bloom filter.
|
||||
*/
|
||||
package bloom
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
|
||||
"github.com/spaolacci/murmur3"
|
||||
"github.com/willf/bitset"
|
||||
)
|
||||
|
||||
// A BloomFilter is a representation of a set of _n_ items, where the main
|
||||
// requirement is to make membership queries; _i.e._, whether an item is a
|
||||
// member of a set.
|
||||
type BloomFilter struct {
|
||||
m uint
|
||||
k uint
|
||||
b *bitset.BitSet
|
||||
}
|
||||
|
||||
func max(x, y uint) uint {
|
||||
if x > y {
|
||||
return x
|
||||
}
|
||||
return y
|
||||
}
|
||||
|
||||
// New creates a new Bloom filter with _m_ bits and _k_ hashing functions
|
||||
// We force _m_ and _k_ to be at least one to avoid panics.
|
||||
func New(m uint, k uint) *BloomFilter {
|
||||
return &BloomFilter{max(1, m), max(1, k), bitset.New(m)}
|
||||
}
|
||||
|
||||
// From creates a new Bloom filter with len(_data_) * 64 bits and _k_ hashing
|
||||
// functions. The data slice is not going to be reset.
|
||||
func From(data []uint64, k uint) *BloomFilter {
|
||||
m := uint(len(data) * 64)
|
||||
return &BloomFilter{m, k, bitset.From(data)}
|
||||
}
|
||||
|
||||
// baseHashes returns the four hash values of data that are used to create k
|
||||
// hashes
|
||||
func baseHashes(data []byte) [4]uint64 {
|
||||
a1 := []byte{1} // to grab another bit of data
|
||||
hasher := murmur3.New128()
|
||||
hasher.Write(data) // #nosec
|
||||
v1, v2 := hasher.Sum128()
|
||||
hasher.Write(a1) // #nosec
|
||||
v3, v4 := hasher.Sum128()
|
||||
return [4]uint64{
|
||||
v1, v2, v3, v4,
|
||||
}
|
||||
}
|
||||
|
||||
// location returns the ith hashed location using the four base hash values
|
||||
func location(h [4]uint64, i uint) uint64 {
|
||||
ii := uint64(i)
|
||||
return h[ii%2] + ii*h[2+(((ii+(ii%2))%4)/2)]
|
||||
}
|
||||
|
||||
// location returns the ith hashed location using the four base hash values
|
||||
func (f *BloomFilter) location(h [4]uint64, i uint) uint {
|
||||
return uint(location(h, i) % uint64(f.m))
|
||||
}
|
||||
|
||||
// EstimateParameters estimates requirements for m and k.
|
||||
// Based on https://bitbucket.org/ww/bloom/src/829aa19d01d9/bloom.go
|
||||
// used with permission.
|
||||
func EstimateParameters(n uint, p float64) (m uint, k uint) {
|
||||
m = uint(math.Ceil(-1 * float64(n) * math.Log(p) / math.Pow(math.Log(2), 2)))
|
||||
k = uint(math.Ceil(math.Log(2) * float64(m) / float64(n)))
|
||||
return
|
||||
}
|
||||
|
||||
// NewWithEstimates creates a new Bloom filter for about n items with fp
|
||||
// false positive rate
|
||||
func NewWithEstimates(n uint, fp float64) *BloomFilter {
|
||||
m, k := EstimateParameters(n, fp)
|
||||
return New(m, k)
|
||||
}
|
||||
|
||||
// Cap returns the capacity, _m_, of a Bloom filter
|
||||
func (f *BloomFilter) Cap() uint {
|
||||
return f.m
|
||||
}
|
||||
|
||||
// K returns the number of hash functions used in the BloomFilter
|
||||
func (f *BloomFilter) K() uint {
|
||||
return f.k
|
||||
}
|
||||
|
||||
// Add data to the Bloom Filter. Returns the filter (allows chaining)
|
||||
func (f *BloomFilter) Add(data []byte) *BloomFilter {
|
||||
h := baseHashes(data)
|
||||
for i := uint(0); i < f.k; i++ {
|
||||
f.b.Set(f.location(h, i))
|
||||
}
|
||||
return f
|
||||
}
|
||||
|
||||
// Merge the data from two Bloom Filters.
|
||||
func (f *BloomFilter) Merge(g *BloomFilter) error {
|
||||
// Make sure the m's and k's are the same, otherwise merging has no real use.
|
||||
if f.m != g.m {
|
||||
return fmt.Errorf("m's don't match: %d != %d", f.m, g.m)
|
||||
}
|
||||
|
||||
if f.k != g.k {
|
||||
return fmt.Errorf("k's don't match: %d != %d", f.m, g.m)
|
||||
}
|
||||
|
||||
f.b.InPlaceUnion(g.b)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Copy creates a copy of a Bloom filter.
|
||||
func (f *BloomFilter) Copy() *BloomFilter {
|
||||
fc := New(f.m, f.k)
|
||||
fc.Merge(f) // #nosec
|
||||
return fc
|
||||
}
|
||||
|
||||
// AddString to the Bloom Filter. Returns the filter (allows chaining)
|
||||
func (f *BloomFilter) AddString(data string) *BloomFilter {
|
||||
return f.Add([]byte(data))
|
||||
}
|
||||
|
||||
// Test returns true if the data is in the BloomFilter, false otherwise.
|
||||
// If true, the result might be a false positive. If false, the data
|
||||
// is definitely not in the set.
|
||||
func (f *BloomFilter) Test(data []byte) bool {
|
||||
h := baseHashes(data)
|
||||
for i := uint(0); i < f.k; i++ {
|
||||
if !f.b.Test(f.location(h, i)) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// TestString returns true if the string is in the BloomFilter, false otherwise.
|
||||
// If true, the result might be a false positive. If false, the data
|
||||
// is definitely not in the set.
|
||||
func (f *BloomFilter) TestString(data string) bool {
|
||||
return f.Test([]byte(data))
|
||||
}
|
||||
|
||||
// TestLocations returns true if all locations are set in the BloomFilter, false
|
||||
// otherwise.
|
||||
func (f *BloomFilter) TestLocations(locs []uint64) bool {
|
||||
for i := 0; i < len(locs); i++ {
|
||||
if !f.b.Test(uint(locs[i] % uint64(f.m))) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// TestAndAdd is the equivalent to calling Test(data) then Add(data).
|
||||
// Returns the result of Test.
|
||||
func (f *BloomFilter) TestAndAdd(data []byte) bool {
|
||||
present := true
|
||||
h := baseHashes(data)
|
||||
for i := uint(0); i < f.k; i++ {
|
||||
l := f.location(h, i)
|
||||
if !f.b.Test(l) {
|
||||
present = false
|
||||
}
|
||||
f.b.Set(l)
|
||||
}
|
||||
return present
|
||||
}
|
||||
|
||||
// TestAndAddString is the equivalent to calling Test(string) then Add(string).
|
||||
// Returns the result of Test.
|
||||
func (f *BloomFilter) TestAndAddString(data string) bool {
|
||||
return f.TestAndAdd([]byte(data))
|
||||
}
|
||||
|
||||
// ClearAll clears all the data in a Bloom filter, removing all keys
|
||||
func (f *BloomFilter) ClearAll() *BloomFilter {
|
||||
f.b.ClearAll()
|
||||
return f
|
||||
}
|
||||
|
||||
// EstimateFalsePositiveRate returns, for a BloomFilter with a estimate of m bits
|
||||
// and k hash functions, what the false positive rate will be
|
||||
// while storing n entries; runs 100,000 tests. This is an empirical
|
||||
// test using integers as keys. As a side-effect, it clears the BloomFilter.
|
||||
func (f *BloomFilter) EstimateFalsePositiveRate(n uint) (fpRate float64) {
|
||||
rounds := uint32(100000)
|
||||
f.ClearAll()
|
||||
n1 := make([]byte, 4)
|
||||
for i := uint32(0); i < uint32(n); i++ {
|
||||
binary.BigEndian.PutUint32(n1, i)
|
||||
f.Add(n1)
|
||||
}
|
||||
fp := 0
|
||||
// test for number of rounds
|
||||
for i := uint32(0); i < rounds; i++ {
|
||||
binary.BigEndian.PutUint32(n1, i+uint32(n)+1)
|
||||
if f.Test(n1) {
|
||||
//fmt.Printf("%v failed.\n", i+uint32(n)+1)
|
||||
fp++
|
||||
}
|
||||
}
|
||||
fpRate = float64(fp) / (float64(rounds))
|
||||
f.ClearAll()
|
||||
return
|
||||
}
|
||||
|
||||
// bloomFilterJSON is an unexported type for marshaling/unmarshaling BloomFilter struct.
|
||||
type bloomFilterJSON struct {
|
||||
M uint `json:"m"`
|
||||
K uint `json:"k"`
|
||||
B *bitset.BitSet `json:"b"`
|
||||
}
|
||||
|
||||
// MarshalJSON implements json.Marshaler interface.
|
||||
func (f *BloomFilter) MarshalJSON() ([]byte, error) {
|
||||
return json.Marshal(bloomFilterJSON{f.m, f.k, f.b})
|
||||
}
|
||||
|
||||
// UnmarshalJSON implements json.Unmarshaler interface.
|
||||
func (f *BloomFilter) UnmarshalJSON(data []byte) error {
|
||||
var j bloomFilterJSON
|
||||
err := json.Unmarshal(data, &j)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
f.m = j.M
|
||||
f.k = j.K
|
||||
f.b = j.B
|
||||
return nil
|
||||
}
|
||||
|
||||
// WriteTo writes a binary representation of the BloomFilter to an i/o stream.
|
||||
// It returns the number of bytes written.
|
||||
func (f *BloomFilter) WriteTo(stream io.Writer) (int64, error) {
|
||||
err := binary.Write(stream, binary.BigEndian, uint64(f.m))
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
err = binary.Write(stream, binary.BigEndian, uint64(f.k))
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
numBytes, err := f.b.WriteTo(stream)
|
||||
return numBytes + int64(2*binary.Size(uint64(0))), err
|
||||
}
|
||||
|
||||
// ReadFrom reads a binary representation of the BloomFilter (such as might
|
||||
// have been written by WriteTo()) from an i/o stream. It returns the number
|
||||
// of bytes read.
|
||||
func (f *BloomFilter) ReadFrom(stream io.Reader) (int64, error) {
|
||||
var m, k uint64
|
||||
err := binary.Read(stream, binary.BigEndian, &m)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
err = binary.Read(stream, binary.BigEndian, &k)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
b := &bitset.BitSet{}
|
||||
numBytes, err := b.ReadFrom(stream)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
f.m = uint(m)
|
||||
f.k = uint(k)
|
||||
f.b = b
|
||||
return numBytes + int64(2*binary.Size(uint64(0))), nil
|
||||
}
|
||||
|
||||
// GobEncode implements gob.GobEncoder interface.
|
||||
func (f *BloomFilter) GobEncode() ([]byte, error) {
|
||||
var buf bytes.Buffer
|
||||
_, err := f.WriteTo(&buf)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return buf.Bytes(), nil
|
||||
}
|
||||
|
||||
// GobDecode implements gob.GobDecoder interface.
|
||||
func (f *BloomFilter) GobDecode(data []byte) error {
|
||||
buf := bytes.NewBuffer(data)
|
||||
_, err := f.ReadFrom(buf)
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
// Equal tests for the equality of two Bloom filters
|
||||
func (f *BloomFilter) Equal(g *BloomFilter) bool {
|
||||
return f.m == g.m && f.k == g.k && f.b.Equal(g.b)
|
||||
}
|
||||
|
||||
// Locations returns a list of hash locations representing a data item.
|
||||
func Locations(data []byte, k uint) []uint64 {
|
||||
locs := make([]uint64, k)
|
||||
|
||||
// calculate locations
|
||||
h := baseHashes(data)
|
||||
for i := uint(0); i < k; i++ {
|
||||
locs[i] = location(h, i)
|
||||
}
|
||||
|
||||
return locs
|
||||
}
|
Reference in New Issue
Block a user