2011-06-29 12:06:33 -04:00
|
|
|
/*
|
|
|
|
* bitdht/bdbloom.cc
|
|
|
|
*
|
|
|
|
* BitDHT: An Flexible DHT library.
|
|
|
|
*
|
|
|
|
* Copyright 2011 by Robert Fernie
|
|
|
|
*
|
|
|
|
* This library is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU Library General Public
|
|
|
|
* License Version 3 as published by the Free Software Foundation.
|
|
|
|
*
|
|
|
|
* This library is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* Library General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU Library General Public
|
|
|
|
* License along with this library; if not, write to the Free Software
|
|
|
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
|
|
|
|
* USA.
|
|
|
|
*
|
|
|
|
* Please report all bugs and problems to "bitdht@lunamutt.com".
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "util/bdbloom.h"
|
|
|
|
|
|
|
|
#include <sstream>
|
|
|
|
#include <iomanip>
|
|
|
|
|
2011-07-05 19:53:40 -04:00
|
|
|
#if defined(_WIN32) || defined(__MINGW32__)
|
|
|
|
#include <malloc.h>
|
|
|
|
#endif
|
2011-06-29 12:06:33 -04:00
|
|
|
/* Bloom Filter implementation */
|
|
|
|
|
|
|
|
|
|
|
|
bloomFilter::bloomFilter(int m, int k)
|
|
|
|
{
|
|
|
|
mBits.resize(m);
|
|
|
|
mHashFns.resize(k);
|
|
|
|
|
|
|
|
mFilterBits = m;
|
|
|
|
mNoHashs = k;
|
|
|
|
mNoElements = 0;
|
|
|
|
|
|
|
|
int i;
|
|
|
|
for(i = 0; i < m; i++)
|
|
|
|
{
|
|
|
|
mBits[i] = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
for(i = 0; i < k; i++)
|
|
|
|
{
|
|
|
|
mHashFns[i] = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
uint8_t convertCharToUint8(char ch1, char ch2)
|
|
|
|
{
|
|
|
|
uint8_t value1 = 0;
|
|
|
|
uint8_t value2 = 0;
|
|
|
|
|
|
|
|
/* do char1 */
|
|
|
|
if (ch1 >= '0' && ch1 <= '9')
|
|
|
|
value1 = (ch1 - '0');
|
|
|
|
else if (ch1 >= 'A' && ch1 <= 'F')
|
|
|
|
value1 = (ch1 - 'A' + 10);
|
|
|
|
else if (ch1 >= 'a' && ch1 <= 'f')
|
|
|
|
value1 = (ch1 - 'a' + 10);
|
|
|
|
|
|
|
|
/* do char2 */
|
|
|
|
if (ch2 >= '0' && ch2 <= '9')
|
|
|
|
value2 = (ch2 - '0');
|
|
|
|
else if (ch2 >= 'A' && ch2 <= 'F')
|
|
|
|
value2 = (ch2 - 'A' + 10);
|
|
|
|
else if (ch2 >= 'a' && ch2 <= 'f')
|
|
|
|
value2 = (ch2 - 'a' + 10);
|
|
|
|
|
|
|
|
uint8_t output = (value1 << 4) + value2;
|
|
|
|
return output;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define BITS_PER_BYTE (8)
|
|
|
|
|
|
|
|
int bloomFilter::setFilterBits(const std::string &hex)
|
|
|
|
{
|
|
|
|
int bytes = (mFilterBits / BITS_PER_BYTE);
|
|
|
|
if (mFilterBits % BITS_PER_BYTE)
|
|
|
|
{
|
|
|
|
bytes++;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (hex.size() < bytes * 2)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
// convert to binary array.
|
|
|
|
uint8_t *tmparray = (uint8_t *) malloc(bytes);
|
|
|
|
int i = 0;
|
|
|
|
|
|
|
|
for(i = 0; i < bytes; i++)
|
|
|
|
{
|
|
|
|
tmparray[i] = convertCharToUint8(hex[2 * i], hex[2 * i + 1]);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
for(i = 0; i < mFilterBits; i++)
|
|
|
|
{
|
|
|
|
int byte = i / BITS_PER_BYTE;
|
|
|
|
int bit = i % BITS_PER_BYTE;
|
|
|
|
uint8_t value = (tmparray[byte] & (1 << bit));
|
|
|
|
|
|
|
|
if (value)
|
|
|
|
{
|
|
|
|
mBits[i] = 1;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
mBits[i] = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
free(tmparray);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string bloomFilter::getFilter()
|
|
|
|
{
|
|
|
|
/* extract filter as a hex string */
|
|
|
|
std::string output;
|
|
|
|
int bytes = (mFilterBits / BITS_PER_BYTE);
|
|
|
|
if (mFilterBits % BITS_PER_BYTE)
|
|
|
|
{
|
|
|
|
bytes++;
|
|
|
|
}
|
|
|
|
|
|
|
|
// convert to binary array.
|
|
|
|
uint8_t *tmparray = (uint8_t *) malloc(bytes);
|
|
|
|
int i,j;
|
|
|
|
|
|
|
|
for(i = 0; i < bytes; i++)
|
|
|
|
{
|
|
|
|
tmparray[i] = 0;
|
|
|
|
for(j = 0; j < BITS_PER_BYTE; j++)
|
|
|
|
{
|
|
|
|
int bit = i * BITS_PER_BYTE + j;
|
|
|
|
if (mBits[bit])
|
|
|
|
{
|
|
|
|
tmparray[i] |= (1 << j);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
std::ostringstream out;
|
|
|
|
for(int i = 0; i < bytes; i++)
|
|
|
|
{
|
|
|
|
out << std::setw(2) << std::setfill('0') << std::hex << (uint32_t) (tmparray)[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
free(tmparray);
|
|
|
|
|
|
|
|
return out.str();
|
|
|
|
}
|
|
|
|
|
|
|
|
void bloomFilter::setBit(int bit)
|
|
|
|
{
|
|
|
|
mBits[bit] = 1;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool bloomFilter::isBitSet(int bit)
|
|
|
|
{
|
|
|
|
return (mBits[bit] == 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
uint32_t bloomFilter::filterBits()
|
|
|
|
{
|
|
|
|
return mFilterBits;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint32_t bloomFilter::countBits()
|
|
|
|
{
|
|
|
|
int count = 0;
|
|
|
|
int i;
|
|
|
|
for(i = 0; i < mFilterBits; i++)
|
|
|
|
{
|
|
|
|
if (mBits[i])
|
|
|
|
{
|
|
|
|
count++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void bloomFilter::printFilter(std::ostream &out)
|
|
|
|
{
|
|
|
|
out << "bloomFilter: m = " << mFilterBits;
|
|
|
|
out << " k = " << mNoHashs;
|
|
|
|
out << " n = " << mNoElements;
|
|
|
|
out << std::endl;
|
|
|
|
|
|
|
|
out << "BITS: ";
|
|
|
|
int i;
|
|
|
|
for(i = 0; i < mFilterBits; i++)
|
|
|
|
{
|
|
|
|
if ((i > 0) && (i % 32 == 0))
|
|
|
|
{
|
|
|
|
out << std::endl;
|
|
|
|
out << "BITS: ";
|
|
|
|
}
|
|
|
|
if (mBits[i])
|
|
|
|
{
|
|
|
|
out << "1";
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
out << "0";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
out << std::endl;
|
|
|
|
out << "STR: " << getFilter();
|
|
|
|
out << std::endl;
|
|
|
|
}
|
|
|
|
|
|
|
|
void bloomFilter::setHashFunction(int idx, uint32_t (*hashfn)(const std::string &))
|
|
|
|
{
|
|
|
|
mHashFns[idx] = hashfn;
|
|
|
|
}
|
|
|
|
|
|
|
|
void bloomFilter::add(const std::string &hex)
|
|
|
|
{
|
|
|
|
uint32_t (*hashfn)(const std::string &);
|
|
|
|
int i;
|
|
|
|
for(i = 0; i < mNoHashs; i++)
|
|
|
|
{
|
|
|
|
hashfn = mHashFns[i];
|
|
|
|
|
|
|
|
int bit = hashfn(hex);
|
|
|
|
|
|
|
|
setBit(bit);
|
|
|
|
}
|
|
|
|
|
|
|
|
mNoElements++;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
bool bloomFilter::test(const std::string &hex)
|
|
|
|
{
|
|
|
|
uint32_t (*hashfn)(const std::string &);
|
|
|
|
int i;
|
|
|
|
for(i = 0; i < mNoHashs; i++)
|
|
|
|
{
|
|
|
|
hashfn = mHashFns[i];
|
|
|
|
|
|
|
|
int bit = hashfn(hex);
|
|
|
|
|
|
|
|
if (!isBitSet(bit))
|
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint32_t getFirst10BitsAsNumber(const std::string &input)
|
|
|
|
{
|
|
|
|
if (input.size() < 8)
|
|
|
|
{
|
|
|
|
std::cerr << "getFirst10BitsAsNumber() ERROR Size too small!";
|
|
|
|
std::cerr << std::endl;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint8_t data[4];
|
|
|
|
|
|
|
|
data[0] = convertCharToUint8(input[0], input[1]);
|
|
|
|
data[1] = convertCharToUint8(input[2], input[3]);
|
|
|
|
data[2] = convertCharToUint8(input[4], input[5]);
|
|
|
|
data[3] = convertCharToUint8(input[6], input[7]);
|
|
|
|
|
|
|
|
uint32_t val = ((data[0] & 0xff) << 2) + ((data[1] & 0xc0) >> 6);
|
|
|
|
|
|
|
|
#ifdef DEBUG_BLOOM
|
|
|
|
std::cerr << "getFirst10BitsAsNumber() input: " << input;
|
|
|
|
std::cerr << std::endl;
|
|
|
|
std::cerr << "getFirst10BitsAsNumber() ";
|
|
|
|
std::cerr << " data[0]: " << std::hex << (uint32_t) data[0];
|
|
|
|
std::cerr << " data[1]: " << (uint32_t) data[1];
|
|
|
|
std::cerr << " data[2]: " << (uint32_t) data[2];
|
|
|
|
std::cerr << " data[3]: " << (uint32_t) data[3];
|
|
|
|
std::cerr << " val: " << std::dec << (uint32_t) val;
|
|
|
|
std::cerr << std::endl;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
return val;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint32_t getSecond10BitsAsNumber(const std::string &input)
|
|
|
|
{
|
|
|
|
if (input.size() < 8)
|
|
|
|
{
|
|
|
|
std::cerr << "getSecond10BitsAsNumber() ERROR Size too small!";
|
|
|
|
std::cerr << std::endl;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint8_t data[4];
|
|
|
|
|
|
|
|
data[0] = convertCharToUint8(input[0], input[1]);
|
|
|
|
data[1] = convertCharToUint8(input[2], input[3]);
|
|
|
|
data[2] = convertCharToUint8(input[4], input[5]);
|
|
|
|
data[3] = convertCharToUint8(input[6], input[7]);
|
|
|
|
|
|
|
|
uint32_t val = ((data[1] & 0x3f) << 4) + ((data[2] & 0xf0) >> 4);
|
|
|
|
|
|
|
|
#ifdef DEBUG_BLOOM
|
|
|
|
std::cerr << "getSecond10BitsAsNumber() input: " << input;
|
|
|
|
std::cerr << std::endl;
|
|
|
|
std::cerr << "getSecond10BitsAsNumber() ";
|
|
|
|
std::cerr << " data[0]: " << std::hex << (uint32_t) data[0];
|
|
|
|
std::cerr << " data[1]: " << (uint32_t) data[1];
|
|
|
|
std::cerr << " data[2]: " << (uint32_t) data[2];
|
|
|
|
std::cerr << " data[3]: " << (uint32_t) data[3];
|
|
|
|
std::cerr << " val: " << std::dec << (uint32_t) val;
|
|
|
|
std::cerr << std::endl;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
return val;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
uint32_t getMid10BitsAsNumber(const std::string &input)
|
|
|
|
{
|
|
|
|
if (input.size() < 8)
|
|
|
|
{
|
|
|
|
std::cerr << "getMid10BitsAsNumber() ERROR Size too small!";
|
|
|
|
std::cerr << std::endl;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint8_t data[4];
|
|
|
|
|
|
|
|
data[0] = convertCharToUint8(input[0], input[1]);
|
|
|
|
data[1] = convertCharToUint8(input[2], input[3]);
|
|
|
|
data[2] = convertCharToUint8(input[4], input[5]);
|
|
|
|
data[3] = convertCharToUint8(input[6], input[7]);
|
|
|
|
|
|
|
|
uint32_t val = ((data[0] & 0x07) << 7) + ((data[1] & 0x7f) >> 1);
|
|
|
|
|
|
|
|
#ifdef DEBUG_BLOOM
|
|
|
|
std::cerr << "getMid10BitsAsNumber() input: " << input;
|
|
|
|
std::cerr << std::endl;
|
|
|
|
std::cerr << "getMid10BitsAsNumber() ";
|
|
|
|
std::cerr << " data[0]: " << std::hex << (uint32_t) data[0];
|
|
|
|
std::cerr << " data[1]: " << (uint32_t) data[1];
|
|
|
|
std::cerr << " data[2]: " << (uint32_t) data[2];
|
|
|
|
std::cerr << " data[3]: " << (uint32_t) data[3];
|
|
|
|
std::cerr << " val: " << std::dec << (uint32_t) val;
|
|
|
|
std::cerr << std::endl;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
return val;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#define BDFILTER_M 1024
|
|
|
|
#define BDFILTER_K 3
|
|
|
|
|
|
|
|
bdBloom::bdBloom()
|
|
|
|
:bloomFilter(BDFILTER_M, BDFILTER_K)
|
|
|
|
{
|
|
|
|
/* set the fns. */
|
|
|
|
setHashFunction(0, getFirst10BitsAsNumber);
|
|
|
|
setHashFunction(1, getSecond10BitsAsNumber);
|
|
|
|
setHashFunction(2, getMid10BitsAsNumber);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|