cdesktopenv/cde/lib/DtMmdb/diskhash/disk_hash.C

482 lines
11 KiB
C

/*
* CDE - Common Desktop Environment
*
* Copyright (c) 1993-2012, The Open Group. All rights reserved.
*
* These libraries and programs are free software; you can
* redistribute them and/or modify them under the terms of the GNU
* Lesser General Public License as published by the Free Software
* Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* These libraries and programs are distributed in the hope that
* they will be useful, but WITHOUT ANY WARRANTY; without even the
* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with these libraries and programs; if not, write
* to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
* Floor, Boston, MA 02110-1301 USA
*/
/*
* $XConsortium: disk_hash.cc /main/5 1996/07/18 14:28:19 drk $
*
* Copyright (c) 1993 HAL Computer Systems International, Ltd.
* All rights reserved. Unpublished -- rights reserved under
* the Copyright Laws of the United States. USE OF A COPYRIGHT
* NOTICE IS PRECAUTIONARY ONLY AND DOES NOT IMPLY PUBLICATION
* OR DISCLOSURE.
*
* THIS SOFTWARE CONTAINS CONFIDENTIAL INFORMATION AND TRADE
* SECRETS OF HAL COMPUTER SYSTEMS INTERNATIONAL, LTD. USE,
* DISCLOSURE, OR REPRODUCTION IS PROHIBITED WITHOUT THE
* PRIOR EXPRESS WRITTEN PERMISSION OF HAL COMPUTER SYSTEMS
* INTERNATIONAL, LTD.
*
* RESTRICTED RIGHTS LEGEND
* Use, duplication, or disclosure by the Government is subject
* to the restrictions as set forth in subparagraph (c)(l)(ii)
* of the Rights in Technical Data and Computer Software clause
* at DFARS 252.227-7013.
*
* HAL COMPUTER SYSTEMS INTERNATIONAL, LTD.
* 1315 Dell Avenue
* Campbell, CA 95008
*
*/
#include "diskhash/disk_hash.h"
#include "api/transaction.h"
extern transaction* g_transac;
int dsteps[] = { 2, 3, 5, 7, 11, 13, 17, 21, 23, 29, 31 };
int no_dsteps;
#define KPB 20 // keys per bucket
#define REHASHS 5 // number of rehashs in a row with
// fixed bucket size
#define TOP_LEVEL_PARAMS (sizeof(k) + sizeof(p) + sizeof(M) + \
sizeof(n) + sizeof(v))
//static buffer buf(LBUFSIZ);
disk_hash::disk_hash(page_storage* store, int prime, int expected_n) :
index_agent(), key_store(store), buf(store -> aux_buf())
{
if ( g_transac ) {
g_transac -> book(store);
}
buf.set_swap_order(store -> swap_order());
rand_generator.seed();
no_dsteps = sizeof(dsteps) / sizeof(int);
init_params(prime, expected_n);
bucket_vector = new bucket_array(M+v, store);
hash_vector = new void_ptr_array(2*MAX(expected_n, (int) n));
k_vector = new void_ptr_array(M+v);
r_vector = new void_ptr_array(M+v);
k_vector -> reset_vptr(voidPtr(1));
}
disk_hash::~disk_hash()
{
delete bucket_vector;
delete hash_vector;
delete k_vector;
delete r_vector;
}
void disk_hash::init_params(int prime, int expected_n)
{
int pgs = key_store -> pages();
if ( pgs > 0 ) {
if ( (*key_store)(1, page_storage::READ) -> count() != 2 ) {
throw(stringException("corruptted primary bucket"));
}
/////////////////////////////////
// read in params from the store
/////////////////////////////////
buf.reset();
(*key_store)(1, page_storage::READ) -> get(1, buf);
buf.get(k).get(p).get(M).get(n).get(v);
} else {
///////////////////////////////////////
// init the store. bucket 1 is reserved
// for top level function params
///////////////////////////////////////
set_p(prime);
set_k(rand_generator.rand());
set_M(MAX(1, expected_n/KPB));
set_v(MAX(1, M/4));
set_n(0);
key_store -> add_page_frames(1);
int slot_num; char* z;
(*key_store)(1, page_storage::WRITE) ->
alloc_slot(slot_num, TOP_LEVEL_PARAMS, z);
if ( slot_num != 1 ) {
throw(stringException("corruptted primary bucket"));
}
sync_params();
}
}
void disk_hash::sync_params()
{
buf.reset();
buf.put(k).put(p).put(M).put(n).put(v);
(*key_store)(1, page_storage::WRITE) -> update_slot(1, buf);
}
void disk_hash::clean()
{
throw(stringException("void disk_hash::clean(): not implemented yet"));
}
///////////////////////////////////////////////////////////////
// rehash all keys
///////////////////////////////////////////////////////////////
Boolean disk_hash::rehash(data_t& w)
{
//MESSAGE(cerr, "REHASH:");
char tmp_name[PATHSIZ];
snprintf(tmp_name, sizeof(tmp_name), "%s.tmp", key_store -> my_name());
fstream pool(form("%s/%s", key_store -> my_path(), tmp_name),
ios::in | ios::out
);
for ( int i=0; i<bucket_vector -> count(); i++ ) {
pool << bucket_vector -> get_bucket(i);
}
pool << w;
Boolean ok = false;
for ( int rehashs=0; rehashs<REHASHS; rehashs++ ) {
/////////////////////
// adjust params
/////////////////////
int old_M = M;
int old_v = v;
set_M(MAX(n/KPB, 2*M));
set_v(M/4);
int delta = M + v - old_M - old_v;
k_vector -> expandWith(delta);
r_vector -> expandWith(delta);
//////////////////////////////////////////
// expand the buckets and the hash table
//////////////////////////////////////////
bucket_vector -> expandWith(MAX(0, M+v - bucket_vector -> count()));
hash_vector -> expandWith(MAX(0, 2*n - hash_vector -> count()));
/////////////////////
// clean buckets
/////////////////////
for ( int i=0; i<bucket_vector -> count(); i++ ) {
bucket_vector -> get_bucket(i).remove_all();
}
if ( _rehash(pool) == true ) {
ok = true;
break;
}
}
if ( ok == false )
throw(stringException("rehash() failed"));
del_file(tmp_name, key_store -> my_path());
return true;
}
Boolean disk_hash::_rehash(fstream& pool)
{
pool.clear(); pool.seekg(0, ios::beg);
hash_vector -> reset_vptr(voidPtr(0));
k_vector -> reset_vptr(voidPtr(1));
r_vector -> reset_vptr(voidPtr(0));
set_k(rand_generator.rand());
data_t x((char*)0, 0, 0);
while ( pool >> x ) {
if ( _insert(x, false) == false )
return false;
}
return true;
}
/************************************/
// insert
/************************************/
Boolean disk_hash::insert(data_t& w)
{
if ( _insert(w, true) == false )
throw(stringException("disk_hash::insert() failed"));
n++;
sync_params();
return true;
}
Boolean disk_hash::_insert(data_t& w, Boolean rehash_if_fail)
{
int hash = w.bucket_num(k, p, M);
//int hash = w.key.int_key;
//debug(cerr, hash);
disk_bucket& b = bucket_vector -> get_bucket(hash);
int slot_num = b.insert(&w);
if ( slot_num != 0 ) {
caching(b, w, slot_num);
} else {
///////////////////////////////////
// insert into the overflow bucket
///////////////////////////////////
//MESSAGE(cerr, "INSERT to overflow buckets");
//debug(cerr, hash);
for ( hash %= v; hash < (int) v; hash++ ) {
disk_bucket& overflowb = bucket_vector -> get_bucket(hash+M);
slot_num = overflowb.insert(&w);
if ( slot_num != 0 ) {
caching(overflowb, w, slot_num);
break;
}
}
if ( slot_num == 0 && rehash_if_fail == true )
return rehash(w);
}
if ( slot_num != 0 )
return true;
else
return false;
}
void disk_hash::caching(disk_bucket& b, data_t& w, int slot_num)
{
//debug(cerr, b.bnum());
//debug(cerr, k_vector -> count());
int kv = int((long)(*k_vector)[b.bnum()]);
int rv = int((long)(*r_vector)[b.bnum()]);
///////////////////////////////////////////
// cache all keys in the bycket except w.
// In fact, only need to cache keys whose
// hash vector slots have been updated.
// It is to be enhanced.
///////////////////////////////////////////
int ind = b.first();
while ( ind != 0 && ind != slot_num ) {
data_t* x = b(ind);
if ( x ) {
hash_vector -> insert(
(voidPtr)(size_t)ind,
x -> slot_num(kv, rv, p, hash_vector -> count())
);
}
delete x;
b.next(ind);
}
////////////////////////////////////////
// cache w. it is always in the cache.
// others may be overwritten.
////////////////////////////////////////
hash_vector -> insert(
(voidPtr)(size_t)slot_num,
w.slot_num(kv, rv, p, hash_vector -> count())
);
}
/******************************************/
// remove operation
/******************************************/
Boolean disk_hash::remove(data_t& w)
{
int slot_num;
disk_bucket* b = 0;
if ( member(w, b, slot_num) == true ) {
b -> remove(slot_num);
n--;
sync_params();
return true;
} else
return false;
}
Boolean disk_hash::member(data_t& w)
{
//MESSAGE(cerr, "disk_hash::member():");
//asciiOut(cerr);
//MESSAGE(cerr, "+++++++++++++");
disk_bucket* b = 0;
int slot_num;
return member(w, b, slot_num);
}
Boolean disk_hash::member(data_t& w, disk_bucket*& b, int& slot_num) const
{
int hash = w.bucket_num(k, p, M);
//int hash = w.key.int_key;
//debug(cerr, hash);
b = &bucket_vector -> get_bucket(hash);
int kv = int((long)(*k_vector)[b -> bnum()]);
int rv = int((long)(*r_vector)[b -> bnum()]);
slot_num =
int((long)(*hash_vector)[w.slot_num(kv, rv, p, hash_vector -> count())]);
//debug(cerr, slot_num);
if ( b -> member(w, slot_num) == true )
return true;
if ( b -> overflow() == true ) {
for ( hash %= v; hash < (int) v; hash++ ) {
b = &bucket_vector -> get_bucket(hash+M);
if ( b -> member(w, slot_num) == true ) {
return true;
}
}
}
return false;
}
int disk_hash::first_bucket()
{
return ( M > 0 ) ? 0 : -1;
}
disk_bucket* disk_hash::get_bucket(int& ind)
{
return &bucket_vector -> get_bucket(ind);
}
void disk_hash::next_bucket(int& ind)
{
ind = ( ind >= (int)(M+v-1) ) ? -1 : (ind+1);
}
/*******************************************************/
// print operation
/*******************************************************/
ostream& disk_hash::asciiOut(ostream& out)
{
int ind = first_bucket();
while ( ind != -1 ) {
//MESSAGE(cerr, "New Bucket:");
//debug(cerr, ind);
disk_bucket* bucket = get_bucket(ind);
if ( bucket ) {
out << *bucket;
}
next_bucket(ind);
}
return out;
}
istream& disk_hash::asciiIn(istream& in)
{
data_t actor;
while ( in >> actor ) {
_insert(actor, true);
n++;
}
sync_params();
return in;
}
ostream& disk_hash::asciiOut(ostream& out, print_func_ptr_t print_f)
{
int ind = first_bucket();
while ( ind != -1 ) {
disk_bucket* bucket = get_bucket(ind);
if ( bucket ) {
bucket -> asciiOut(out, print_f);
}
next_bucket(ind);
}
return out;
}
void disk_hash::out_params()
{
debug(cerr, k);
debug(cerr, p);
debug(cerr, M);
debug(cerr, v);
debug(cerr, n);
}