/* * Rebuild the Venti index from scratch. */ #include "stdinc.h" #include "dat.h" #include "fns.h" /* * Write a single bucket. Could profit from a big buffer here * so that we can absorb sporadic runs of blocks into one write, * avoiding disk seeks. */ static int writebucket(Index *ix, u32int buck, IBucket *ib, ZBlock *b) { ISect *is; is = ix->sects[indexsect0(ix, buck)]; if(buck < is->start || buck >= is->stop){ seterr(EAdmin, "cannot find index section for bucket %lud\n", (ulong)buck); return -1; } buck -= is->start; /* qlock(&stats.lock); stats.indexwrites++; qunlock(&stats.lock); */ packibucket(ib, b->data, is->bucketmagic); return writepart(is->part, is->blockbase + ((u64int)buck << is->blocklog), b->data, is->blocksize); } static int buildindex(Index *ix, Part *part, u64int off, u64int clumps, int zero) { IEStream *ies; IBucket ib, zib; ZBlock *z, *b; u32int next, buck; int ok; uint nbuck; u64int found = 0; //ZZZ make buffer size configurable b = alloczblock(ix->blocksize, 0, ix->blocksize); z = alloczblock(ix->blocksize, 1, ix->blocksize); ies = initiestream(part, off, clumps, 64*1024); if(b == nil || z == nil || ies == nil){ ok = 0; goto breakout; return -1; } ok = 0; next = 0; memset(&ib, 0, sizeof ib); ib.data = b->data + IBucketSize; zib.data = z->data + IBucketSize; zib.n = 0; nbuck = 0; for(;;){ buck = buildbucket(ix, ies, &ib, ix->blocksize-IBucketSize); found += ib.n; if(zero){ for(; next != buck; next++){ if(next == ix->buckets){ if(buck != TWID32){ fprint(2, "bucket out of range\n"); ok = -1; } goto breakout; } if(writebucket(ix, next, &zib, z) < 0){ fprint(2, "can't write zero bucket to buck=%d: %r", next); ok = -1; } } } if(buck >= ix->buckets){ if(buck == TWID32) break; fprint(2, "bucket out of range\n"); ok = -1; goto breakout; } if(writebucket(ix, buck, &ib, b) < 0){ fprint(2, "bad bucket found=%lld: %r\n", found); ok = -1; } next = buck + 1; if(++nbuck%10000 == 0) fprint(2, "\t%,d buckets written...\n", nbuck); } breakout:; fprint(2, "wrote index with %lld entries\n", found); freeiestream(ies); freezblock(z); freezblock(b); return ok; } void usage(void) { fprint(2, "usage: buildindex [-Z] [-B blockcachesize] config tmppart\n"); threadexitsall(0); } Config conf; void threadmain(int argc, char *argv[]) { Part *part; u64int clumps, base; u32int bcmem; int zero; zero = 1; bcmem = 0; ARGBEGIN{ case 'B': bcmem = unittoull(ARGF()); break; case 'Z': zero = 0; break; default: usage(); break; }ARGEND if(argc != 2) usage(); if(initventi(argv[0], &conf) < 0) sysfatal("can't init venti: %r"); if(bcmem < maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16)) bcmem = maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16); if(0) fprint(2, "initialize %d bytes of disk block cache\n", bcmem); initdcache(bcmem); fprint(2, "building a new index %s using %s for temporary storage\n", mainindex->name, argv[1]); part = initpart(argv[1], ORDWR|ODIRECT); if(part == nil) sysfatal("can't initialize temporary partition: %r"); clumps = sortrawientries(mainindex, part, &base, mainindex->bloom); if(clumps == TWID64) sysfatal("can't build sorted index: %r"); fprint(2, "found and sorted index entries for clumps=%lld at %lld\n", clumps, base); if(buildindex(mainindex, part, base, clumps, zero) < 0) sysfatal("can't build new index: %r"); if(mainindex->bloom) writebloom(mainindex->bloom); threadexitsall(0); }