From 62f5bc66abb9294996f078df2be97f39723aefff Mon Sep 17 00:00:00 2001 From: Brian Dupras Date: Fri, 22 Jan 2016 16:44:23 -0700 Subject: [PATCH] expectedFpp() --- TODO.md | 21 +++++++++++ .../guava/probably/BloomFilter.java | 10 +++++ .../guava/probably/CuckooFilter.java | 37 +++++++++++++++++-- .../guava/probably/ProbabilisticFilter.java | 32 ++++++++++++++++ .../cuckoo/AbstractCuckooStrategy.java | 24 ++++++++++++ .../guava/probably/cuckoo/CuckooTable.java | 13 ++++++- .../guava/probably/cuckoo/Strategy.java | 1 + .../guava/probably/CuckooFilterTest.java | 2 +- 8 files changed, 135 insertions(+), 5 deletions(-) create mode 100644 TODO.md diff --git a/TODO.md b/TODO.md new file mode 100644 index 0000000..ddb9e14 --- /dev/null +++ b/TODO.md @@ -0,0 +1,21 @@ +Guava-Probably: TODO List +======================================================= + +=1.0 +* removeAll(Collection) +* removeAll(Filter) +* @throws UnsupportedOperationException - true up to java.util.Set +* Full interface tests on CuckooFilter and BloomFilter +* ?? check out MultiSet interface for semantics + + +=Beyond 1.0 + +==CI +* commit/push to release SNAPSHOT, major, minor, patch :: maven central && javadocs + +== Features +* CuckooFilter impl increase max capacity (separate even/odd tables?) +* Primitive interface API (to avoid object alloc) +* Direct hash fn invocation (to avoid object alloc) +* extract filter dimensions calculation diff --git a/src/main/java/com/duprasville/guava/probably/BloomFilter.java b/src/main/java/com/duprasville/guava/probably/BloomFilter.java index 42a7584..518cfca 100644 --- a/src/main/java/com/duprasville/guava/probably/BloomFilter.java +++ b/src/main/java/com/duprasville/guava/probably/BloomFilter.java @@ -228,4 +228,14 @@ public boolean remove(T t) { throw new UnsupportedOperationException(); } + public boolean removeAll(Collection c) { + checkNotNull(c); + throw new UnsupportedOperationException(); + } + + public boolean removeAll(ProbabilisticFilter f) { + checkNotNull(f); + throw new UnsupportedOperationException(); + } + } \ No newline at end of file diff --git a/src/main/java/com/duprasville/guava/probably/CuckooFilter.java b/src/main/java/com/duprasville/guava/probably/CuckooFilter.java index e1ab3b4..dabc21d 100644 --- a/src/main/java/com/duprasville/guava/probably/CuckooFilter.java +++ b/src/main/java/com/duprasville/guava/probably/CuckooFilter.java @@ -195,8 +195,8 @@ public boolean add(T e) { } /** - * Removes {@code e} from this {@link CuckooFilter}. {@code e} must been previously added to the - * filter. Removing an {@code e} that hasn't been added to the filter may put the filter in an + * Removes {@code e} from this {@link CuckooFilter}. {@code e} must have been previously added to + * the filter. Removing an {@code e} that hasn't been added to the filter may put the filter in an * inconsistent state causing it to return false negative responses from {@link * #contains(Object)}.

If {@code false} is returned, this is definitely an indication * that either this invocation or a previous invocation has been made without a matching @@ -211,6 +211,37 @@ public boolean remove(T e) { return strategy.remove(e, funnel, table); } + /** + * Removes all elements of {@code c} from this {@link CuckooFilter}. Each element of {@code c} + * must represented in the filter before invocation. Removing an element that hasn't been added to + * the filter may put the filter in an inconsistent state causing it to return false negative + * responses from {@link #contains(Object)}.

If {@code false} is returned, this is + * definitely an indication that {@code c} contained at least one element that was not + * represented in the filter. This condition is always an error and this {@link CuckooFilter} can + * no longer be relied upon to return correct {@code false} responses from {@link + * #contains(Object)}. + * + * @return true if {@code e} was successfully removed from the filter. + */ + @CheckReturnValue + public boolean removeAll(Collection c) { + for (T e : c) { + if (!remove(e)) { + return false; + } + } + return true; + } + + public boolean removeAll(ProbabilisticFilter f) { + checkNotNull(f); + if (this == f) { + clear(); + return true; + } + checkCompatibility(f, "remove"); + return this.strategy.removeAll(this.table, ((CuckooFilter) f).table); + } /** * Returns the number of inserted items currently represented in the filter. @@ -248,7 +279,7 @@ long bitSize() { * degrading its {@code FPP}. */ public double currentFpp() { - return table.expectedFpp(); + return table.currentFpp(); } /** diff --git a/src/main/java/com/duprasville/guava/probably/ProbabilisticFilter.java b/src/main/java/com/duprasville/guava/probably/ProbabilisticFilter.java index eb883e1..205a350 100644 --- a/src/main/java/com/duprasville/guava/probably/ProbabilisticFilter.java +++ b/src/main/java/com/duprasville/guava/probably/ProbabilisticFilter.java @@ -101,6 +101,38 @@ public interface ProbabilisticFilter { */ boolean remove(E e); + /** + * Returns {@code true} if {@link #remove(Object)}{@code == true} for all of the elements of the + * specified collection. + * + * @param c collection of elements to be removed form the filter + * @return {@code true} if all of the elements of the specified collection were successfully + * removed from the filter, {@code false} if any of the elements was not successfully removed. + * @throws ClassCastException if the types of one or more elements in the specified + * collection are incompatible with this filter (optional) + * @throws NullPointerException if the specified collection contains one or more null + * elements and this filter does not permit null elements + * (optional), or if the specified collection is null + * @throws UnsupportedOperationException if the {@link #removeAll(Collection)} operation is not + * supported by this filter + */ + boolean removeAll(Collection c); + + /** + * Subtracts the specified filter from {@code this} filter. The mutations happen to {@code this} + * instance. Callers must ensure that the specified filter represents entries that have been + * previously added to {@code this} filter. + * + * @param f The filter to subtract from {@code this} filter. {@code f} is not mutated. + * @return {@code true} if the operation was successful, {@code false} otherwise. + * @throws IllegalArgumentException if {@link #isCompatible(ProbabilisticFilter)}{@code == + * false} + * @throws NullPointerException if the specified filter is null + * @throws UnsupportedOperationException if the {@link #removeAll(ProbabilisticFilter)} operation + * is not supported by this filter + */ + boolean removeAll(ProbabilisticFilter f); + /** * Returns {@code true} if this filter might contain the specified element. * diff --git a/src/main/java/com/duprasville/guava/probably/cuckoo/AbstractCuckooStrategy.java b/src/main/java/com/duprasville/guava/probably/cuckoo/AbstractCuckooStrategy.java index 71b6f60..ccb7cf2 100644 --- a/src/main/java/com/duprasville/guava/probably/cuckoo/AbstractCuckooStrategy.java +++ b/src/main/java/com/duprasville/guava/probably/cuckoo/AbstractCuckooStrategy.java @@ -116,6 +116,30 @@ public boolean containsAll(CuckooTable thiz, CuckooTable that) { return true; } + public boolean removeAll(CuckooTable thiz, CuckooTable that) { + if (!thiz.isCompatible(that)) { + return false; + } + + for (long index = 0; index < that.numBuckets; index++) { + for (int entry = 0; entry < that.numEntriesPerBucket; entry++) { + int fingerprint = that.readEntry(index, entry); + if (CuckooTable.EMPTY_ENTRY == fingerprint) { + continue; + } + + int thizCount = thiz.countEntry(fingerprint, index) + + thiz.countEntry(fingerprint, altIndex(index, fingerprint, thiz.numBuckets)); + int thatCount = that.countEntry(fingerprint, index) + + that.countEntry(fingerprint, altIndex(index, fingerprint, that.numBuckets)); + if (thizCount >= thatCount) { + return false; + } + } + } + return true; + } + @Override public boolean equals(Object obj) { if (obj instanceof Strategy) { diff --git a/src/main/java/com/duprasville/guava/probably/cuckoo/CuckooTable.java b/src/main/java/com/duprasville/guava/probably/cuckoo/CuckooTable.java index a1e9d85..a5bc735 100644 --- a/src/main/java/com/duprasville/guava/probably/cuckoo/CuckooTable.java +++ b/src/main/java/com/duprasville/guava/probably/cuckoo/CuckooTable.java @@ -23,6 +23,7 @@ import java.util.Arrays; import static com.google.common.base.Preconditions.checkArgument; +import static java.lang.Math.pow; public class CuckooTable { static final int EMPTY_ENTRY = 0x00; @@ -249,7 +250,17 @@ public double load() { } public double expectedFpp() { - return (2.0D * size / numBuckets) / Math.pow(2, numBitsPerEntry); + return (2.0D * size / numBuckets) / pow(2, numBitsPerEntry); + } + + public double currentFpp() { + return 1.0D - pow( + ( pow(2, numBitsPerEntry) - 2 ) + / + ( pow(2, numBitsPerEntry) - 1 ) + , + 2 * numEntriesPerBucket * load() + ); } public double averageBitsPerEntry() { diff --git a/src/main/java/com/duprasville/guava/probably/cuckoo/Strategy.java b/src/main/java/com/duprasville/guava/probably/cuckoo/Strategy.java index 49a0ca7..2bdb067 100644 --- a/src/main/java/com/duprasville/guava/probably/cuckoo/Strategy.java +++ b/src/main/java/com/duprasville/guava/probably/cuckoo/Strategy.java @@ -26,4 +26,5 @@ public interface Strategy extends Serializable { boolean addAll(CuckooTable thiz, CuckooTable that); boolean equivalent(CuckooTable thiz, CuckooTable that); boolean containsAll(CuckooTable thiz, CuckooTable that); + boolean removeAll(CuckooTable thiz, CuckooTable that); } diff --git a/src/test/java/com/duprasville/guava/probably/CuckooFilterTest.java b/src/test/java/com/duprasville/guava/probably/CuckooFilterTest.java index c894ac4..c31435c 100644 --- a/src/test/java/com/duprasville/guava/probably/CuckooFilterTest.java +++ b/src/test/java/com/duprasville/guava/probably/CuckooFilterTest.java @@ -120,7 +120,7 @@ public void testCreateAndCheckBealDupras32CuckooFilterWithKnownUtf8FalsePositive } assertEquals(expectedNumFpp, actualNumFpp); // The normal order of (expected, actual) is reversed here on purpose. - assertEquals((double) expectedNumFpp / numInsertions, cf.currentFpp(), 0.00037); + assertEquals((double) expectedNumFpp / numInsertions, cf.currentFpp(), 0.0004); } /**