mirror of
https://github.com/9fans/plan9port.git
synced 2025-01-15 11:20:03 +00:00
211 lines
3.6 KiB
C
211 lines
3.6 KiB
C
|
/*
|
||
|
* Bloom filter tracking which scores are present in our arenas
|
||
|
* and (more importantly) which are not.
|
||
|
*/
|
||
|
|
||
|
#include "stdinc.h"
|
||
|
#include "dat.h"
|
||
|
#include "fns.h"
|
||
|
|
||
|
int
|
||
|
bloominit(Bloom *b, vlong vsize, u8int *data)
|
||
|
{
|
||
|
ulong size;
|
||
|
|
||
|
size = vsize;
|
||
|
if(size != vsize){ /* truncation */
|
||
|
werrstr("bloom data too big");
|
||
|
return -1;
|
||
|
}
|
||
|
|
||
|
b->size = size;
|
||
|
b->nhash = 32; /* will be fixed by caller on initialization */
|
||
|
if(data != nil)
|
||
|
if(unpackbloomhead(b, data) < 0)
|
||
|
return -1;
|
||
|
|
||
|
fprint(2, "bloom size %lud nhash %d\n", b->size, b->nhash);
|
||
|
b->mask = b->size-1;
|
||
|
b->data = data;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
void
|
||
|
wbbloomhead(Bloom *b)
|
||
|
{
|
||
|
packbloomhead(b, b->data);
|
||
|
}
|
||
|
|
||
|
Bloom*
|
||
|
readbloom(Part *p)
|
||
|
{
|
||
|
int i, n;
|
||
|
uint ones;
|
||
|
uchar buf[512];
|
||
|
uchar *data;
|
||
|
u32int *a;
|
||
|
Bloom *b;
|
||
|
|
||
|
b = vtmallocz(sizeof *b);
|
||
|
if(readpart(p, 0, buf, sizeof buf) < 0)
|
||
|
return nil;
|
||
|
fprint(2, "header %.16H\n", buf);
|
||
|
if(bloominit(b, 0, buf) < 0){
|
||
|
vtfree(b);
|
||
|
return nil;
|
||
|
}
|
||
|
data = vtmallocz(b->size);
|
||
|
if(readpart(p, 0, data, b->size) < 0){
|
||
|
vtfree(b);
|
||
|
vtfree(data);
|
||
|
return nil;
|
||
|
}
|
||
|
b->data = data;
|
||
|
b->part = p;
|
||
|
|
||
|
a = (u32int*)b->data;
|
||
|
n = b->size/4;
|
||
|
ones = 0;
|
||
|
for(i=0; i<n; i++)
|
||
|
ones += countbits(a[i]);
|
||
|
addstat(StatBloomOnes, ones);
|
||
|
|
||
|
if(b->size == MaxBloomSize) /* 2^32 overflows ulong */
|
||
|
addstat(StatBloomBits, b->size*8-1);
|
||
|
else
|
||
|
addstat(StatBloomBits, b->size*8);
|
||
|
|
||
|
return b;
|
||
|
}
|
||
|
|
||
|
int
|
||
|
writebloom(Bloom *b)
|
||
|
{
|
||
|
wbbloomhead(b);
|
||
|
return writepart(b->part, 0, b->data, b->size);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Derive two random 32-bit quantities a, b from the score
|
||
|
* and then use a+b*i as a sequence of bloom filter indices.
|
||
|
* Michael Mitzenmacher has a recent (2005) paper saying this is okay.
|
||
|
* We reserve the bottom bytes (BloomHeadSize*8 bits) for the header.
|
||
|
*/
|
||
|
static void
|
||
|
gethashes(u8int *score, ulong *h)
|
||
|
{
|
||
|
int i;
|
||
|
u32int a, b;
|
||
|
|
||
|
a = 0;
|
||
|
b = 0;
|
||
|
for(i=4; i+8<=VtScoreSize; i+=8){
|
||
|
a ^= *(u32int*)(score+i);
|
||
|
b ^= *(u32int*)(score+i+4);
|
||
|
}
|
||
|
if(i+4 <= VtScoreSize) /* 20 is not 4-aligned */
|
||
|
a ^= *(u32int*)(score+i);
|
||
|
for(i=0; i<BloomMaxHash; i++, a+=b)
|
||
|
h[i] = a < BloomHeadSize*8 ? BloomHeadSize*8 : a;
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
_markbloomfilter(Bloom *b, u8int *score)
|
||
|
{
|
||
|
int i, nnew;
|
||
|
ulong h[BloomMaxHash];
|
||
|
u32int x, *y, z, *tab;
|
||
|
|
||
|
trace("markbloomfilter", "markbloomfilter %V", score);
|
||
|
gethashes(score, h);
|
||
|
nnew = 0;
|
||
|
tab = (u32int*)b->data;
|
||
|
for(i=0; i<b->nhash; i++){
|
||
|
x = h[i];
|
||
|
y = &tab[(x&b->mask)>>5];
|
||
|
z = 1<<(x&31);
|
||
|
if(!(*y&z)){
|
||
|
nnew++;
|
||
|
*y |= z;
|
||
|
}
|
||
|
}
|
||
|
if(nnew)
|
||
|
addstat(StatBloomOnes, nnew);
|
||
|
|
||
|
trace("markbloomfilter", "markbloomfilter exit");
|
||
|
}
|
||
|
|
||
|
static int
|
||
|
_inbloomfilter(Bloom *b, u8int *score)
|
||
|
{
|
||
|
int i;
|
||
|
ulong h[BloomMaxHash], x;
|
||
|
u32int *tab;
|
||
|
|
||
|
gethashes(score, h);
|
||
|
tab = (u32int*)b->data;
|
||
|
for(i=0; i<b->nhash; i++){
|
||
|
x = h[i];
|
||
|
if(!(tab[(x&b->mask)>>5] & (1<<(x&31))))
|
||
|
return 0;
|
||
|
}
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
int
|
||
|
inbloomfilter(Bloom *b, u8int *score)
|
||
|
{
|
||
|
int r;
|
||
|
uint ms;
|
||
|
|
||
|
if(b == nil)
|
||
|
return 1;
|
||
|
|
||
|
ms = msec();
|
||
|
rlock(&b->lk);
|
||
|
r = _inbloomfilter(b, score);
|
||
|
runlock(&b->lk);
|
||
|
ms = ms - msec();
|
||
|
addstat2(StatBloomLookup, 1, StatBloomLookupTime, ms);
|
||
|
if(r)
|
||
|
addstat(StatBloomMiss, 1);
|
||
|
else
|
||
|
addstat(StatBloomHit, 1);
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
void
|
||
|
markbloomfilter(Bloom *b, u8int *score)
|
||
|
{
|
||
|
if(b == nil)
|
||
|
return;
|
||
|
|
||
|
rlock(&b->lk);
|
||
|
qlock(&b->mod);
|
||
|
_markbloomfilter(b, score);
|
||
|
qunlock(&b->mod);
|
||
|
runlock(&b->lk);
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
bloomwriteproc(void *v)
|
||
|
{
|
||
|
Bloom *b;
|
||
|
|
||
|
b = v;
|
||
|
for(;;){
|
||
|
recv(b->writechan, 0);
|
||
|
if(writebloom(b) < 0)
|
||
|
fprint(2, "oops! writing bloom: %r\n");
|
||
|
send(b->writedonechan, 0);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void
|
||
|
startbloomproc(Bloom *b)
|
||
|
{
|
||
|
b->writechan = chancreate(sizeof(void*), 0);
|
||
|
b->writedonechan = chancreate(sizeof(void*), 0);
|
||
|
vtproc(bloomwriteproc, b);
|
||
|
}
|