pastebin

Paste #65873: SimpleBloomFilter.java (v2)

package com.uprizer.sensearray.util;

import java.io.Serializable;
import java.util.BitSet;
import java.util.Collection;
import java.util.Iterator;
import java.util.Random;
import java.util.Set;

/**
 * A simple Bloom Filter (see http://en.wikipedia.org/wiki/Bloom_filter) that
 * uses java.util.Random as a primitive hash function, and which implements
 * Java's Set interface for convenience.
 * 
 * Only the add(), addAll(), contains(), and containsAll() methods are
 * implemented. Calling any other method will yield an 
 * UnsupportedOperationException.
 * 
 * This code may be used, modified, and redistributed provided that the 
 * author tag below remains intact.
 * 
 * @author Ian Clarke <ian@uprizer.com>
 * 
 * @param <E>
 *            The type of object the BloomFilter should contain
 */
public class SimpleBloomFilter<E> implements Set<E>, Serializable {
	private static final long serialVersionUID = 3527833617516722215L;
	protected int k;
	BitSet bitSet;
	int bitArraySize, expectedElements;

	/**
	 * Construct a SimpleBloomFilter. You must specify the number of bits in the
	 * Bloom Filter, and also you should specify the number of items you
	 * expect to add. The latter is used to choose some optimal internal values to
	 * minimize the false-positive rate (which can be estimated with
	 * expectedFalsePositiveRate()).
	 * 
	 * @param bitArraySize
	 *            The number of bits in the bit array (often called 'm' in the
	 *            context of bloom filters).
	 * @param expectedElements
	 *            The typical number of items you expect to be added to the
	 *            SimpleBloomFilter (often called 'n').
	 */
	public SimpleBloomFilter(int bitArraySize, int expectedElements) {
		this.bitArraySize = bitArraySize;
		this.expectedElements = expectedElements;
		this.k = (int) Math.ceil((bitArraySize / expectedElements)
				* Math.log(2.0));
		bitSet = new BitSet(bitArraySize);
	}

	/**
	 * Calculates the approximate probability of the contains() method returning
	 * true for an object that had not previously been inserted into the bloom
	 * filter. This is known as the "false positive probability".
	 * 
	 * @return The estimated false positive rate
	 */
	public double expectedFalsePositiveProbability() {
		return Math.pow((1 - Math.exp(-k * (double) expectedElements
				/ (double) bitArraySize)), k);
	}

	/*
	 * @return This method will always return false
	 * 
	 * @see java.util.Set#add(java.lang.Object)
	 */
	public boolean add(E o) {
		Random r = new Random(o.hashCode());
		for (int x = 0; x < k; x++) {
			bitSet.set(r.nextInt(bitArraySize), true);
		}
		return false;
	}

	/**
	 * @return This method will always return false
	 */
	public boolean addAll(Collection<? extends E> c) {
		for (E o : c) {
			add(o);
		}
		return false;
	}

	/**
	 * Clear the Bloom Filter
	 */
	public void clear() {
		for (int x = 0; x < bitSet.length(); x++) {
			bitSet.set(x, false);
		}
	}

	/**
	 * @return False indicates that o was definitely not added to this Bloom Filter, 
	 *         true indicates that it probably was.  The probability can be estimated
	 *         using the expectedFalsePositiveProbability() method.
	 */
	public boolean contains(Object o) {
		Random r = new Random(o.hashCode());
		for (int x = 0; x < k; x++) {
			if (!bitSet.get(r.nextInt(bitArraySize)))
				return false;
		}
		return true;
	}

	public boolean containsAll(Collection<?> c) {
		for (Object o : c) {
			if (!contains(o))
				return false;
		}
		return true;
	}

	/**
	 * Not implemented
	 */
	public boolean isEmpty() {
		throw new UnsupportedOperationException();
	}

	/**
	 * Not implemented
	 */
	public Iterator<E> iterator() {
		throw new UnsupportedOperationException();
	}

	/**
	 * Not implemented
	 */
	public boolean remove(Object o) {
		throw new UnsupportedOperationException();
	}

	/**
	 * Not implemented
	 */
	public boolean removeAll(Collection<?> c) {
		throw new UnsupportedOperationException();
	}

	/**
	 * Not implemented
	 */
	public boolean retainAll(Collection<?> c) {
		throw new UnsupportedOperationException();
	}

	/**
	 * Not implemented
	 */
	public int size() {
		throw new UnsupportedOperationException();
	}

	/**
	 * Not implemented
	 */
	public Object[] toArray() {
		throw new UnsupportedOperationException();
	}

	/**
	 * Not implemented
	 */
	public <T> T[] toArray(T[] a) {
		throw new UnsupportedOperationException();
	}
}

New paste

  Language:
Private   — Wrap long lines  —  1 + 2 =   —  

About this pastebin

Welcome to the bulix.org / pastebin. Please don't use this pastebin for illegal purposes, defamation or kitten-squashing.

This pastebin is written using PHP and MySQL and relies on Alex Gorbatchev's syntax hhighlighter (JavaScript based). To avoid spam, you will be required to complete a small mathematical challenge when adding a new paste.

New! Try the pastebin command-line tool: paste.py (requires Python and python-beautifulsoup).

30 more recent public pastes


# Title Type Date
1 Untitled C paste by 77.104.192.100 C 02 Sep 2010, 16:52
2 Untitled Visual Basic paste by 187.7.138.176 Visual Basic 02 Sep 2010, 16:36
3 Untitled Ruby paste by 217.9.0.179 Ruby 02 Sep 2010, 15:42
4 whywhywhy C 02 Sep 2010, 15:15
5 Untitled Javascript paste by 134.184.43.109 Javascript 02 Sep 2010, 14:41
6 Untitled Bash paste by 158.42.126.237 Bash 02 Sep 2010, 14:32
7 Untitled C paste by 184.75.34.234 C 02 Sep 2010, 14:24
8 Untitled Perl paste by 155.253.6.151 Perl 02 Sep 2010, 14:22
9 Untitled Ruby paste by 58.215.81.151 Ruby 02 Sep 2010, 14:14
10 Untitled SQL paste by 98.190.244.179 SQL 02 Sep 2010, 13:55
11 Untitled C paste by 85.18.126.106 C 02 Sep 2010, 13:40
12 Untitled C++ paste by 187.111.15.230 C++ 02 Sep 2010, 11:00
13 Untitled SQL paste by 85.254.213.163 SQL 02 Sep 2010, 10:58
14 Untitled Ruby paste by 89.232.63.173 Ruby 02 Sep 2010, 10:47
15 Untitled Visual Basic paste by 74.50.59.175 Visual Basic 02 Sep 2010, 09:40
16 Untitled Perl paste by 202.14.181.11 Perl 02 Sep 2010, 09:04
17 Untitled PHP paste by 61.181.246.205 PHP 02 Sep 2010, 08:52
18 Untitled Bash paste by 174.138.160.29 Bash 02 Sep 2010, 08:43
19 AdultFriendFinder Report ASCII 02 Sep 2010, 06:56
20 Untitled PHP paste by 183.91.87.16 PHP 02 Sep 2010, 03:56
21 Untitled Javascript paste by 81.227.80.35 Javascript 02 Sep 2010, 03:21
22 Untitled Lua paste by 218.201.21.182 Lua 02 Sep 2010, 01:58
23 Untitled C# paste by 79.98.27.109 C# 02 Sep 2010, 01:41
24 Untitled SQL paste by 200.184.84.133 SQL 02 Sep 2010, 00:41
25 Untitled C# paste by 173.61.246.36 C# 01 Sep 2010, 23:24
26 Untitled Visual Basic paste by 121.135.192.120 Visual Basic 01 Sep 2010, 23:07
27 Untitled C# paste by 190.30.88.228 C# 01 Sep 2010, 21:05
28 Untitled Css paste by 85.9.50.182 Css 01 Sep 2010, 19:53
29 Untitled C paste by 65.46.53.202 C 01 Sep 2010, 19:41
30 Untitled Ruby paste by 201.192.75.42 Ruby 01 Sep 2010, 18:44

Powered by the Bulix.org Code Pastebin, by Maxime Petazzoni. View pastebin statistics.