cdesktopenv/cde/programs/dtinfo/DtMmdb/compression/trie.h

161 lines
4.1 KiB
C++

/*
* CDE - Common Desktop Environment
*
* Copyright (c) 1993-2012, The Open Group. All rights reserved.
*
* These libraries and programs are free software; you can
* redistribute them and/or modify them under the terms of the GNU
* Lesser General Public License as published by the Free Software
* Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* These libraries and programs are distributed in the hope that
* they will be useful, but WITHOUT ANY WARRANTY; without even the
* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with these libraries and programs; if not, write
* to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
* Floor, Boston, MA 02110-1301 USA
*/
/*
* $XConsortium: trie.h /main/3 1996/06/11 17:15:31 cde-hal $
*
* Copyright (c) 1993 HAL Computer Systems International, Ltd.
* All rights reserved. Unpublished -- rights reserved under
* the Copyright Laws of the United States. USE OF A COPYRIGHT
* NOTICE IS PRECAUTIONARY ONLY AND DOES NOT IMPLY PUBLICATION
* OR DISCLOSURE.
*
* THIS SOFTWARE CONTAINS CONFIDENTIAL INFORMATION AND TRADE
* SECRETS OF HAL COMPUTER SYSTEMS INTERNATIONAL, LTD. USE,
* DISCLOSURE, OR REPRODUCTION IS PROHIBITED WITHOUT THE
* PRIOR EXPRESS WRITTEN PERMISSION OF HAL COMPUTER SYSTEMS
* INTERNATIONAL, LTD.
*
* RESTRICTED RIGHTS LEGEND
* Use, duplication, or disclosure by the Government is subject
* to the restrictions as set forth in subparagraph (c)(l)(ii)
* of the Rights in Technical Data and Computer Software clause
* at DFARS 252.227-7013.
*
* HAL COMPUTER SYSTEMS INTERNATIONAL, LTD.
* 1315 Dell Avenue
* Campbell, CA 95008
*
*/
#ifndef _trie_h
#define _trie_h 1
#include "compression/code.h"
#include "dstr/heap.h"
#define MAX_LEVELS 50
#define LANG_ALPHABET_SZ 256
class trie_node;
struct info_t {
unsigned freq: 23;
unsigned letter: 8;
unsigned mark: 1;
};
class trie_node_info
{
trie_node* child;
union {
struct info_t info_view;
int int_view;
} info;
union {
encoding_unit* eu;
int heap;
int pos;
} image;
public:
//trie_node_info (char letter = 0, int freq = 0, trie_node* child = 0);
trie_node_info ();
~trie_node_info ();
friend class trie_node;
friend class trie;
friend Boolean trie_node_ls(voidPtr n1, voidPtr n2);
friend Boolean trie_node_eq(voidPtr n1, voidPtr n2);
friend void update_index(int ind, void* x);
};
typedef trie_node_info* trie_node_info_ptr_t;
class trie_node
{
protected:
#ifdef C_API
trie_node_info_ptr_t* children;
#else
trie_node_info children[LANG_ALPHABET_SZ+1];
// children[LANG_ALPHABET_SZ+1].child encodes the parent
#endif
public:
trie_node(trie_node_info* parent);
~trie_node();
void _print(ostream& out, char* prefix, int prefix_sz);
friend class trie;
};
class trie
{
protected:
trie_node* root;
int max_trie_level;
int total_nodes;
int level_sz[MAX_LEVELS];
heap* sorted_freqs;
encoding_unit** alphabet;
unsigned int alphabet_sz;
int estimated_sz;
protected:
void collect_freqs(trie_node* rt, int level);
ostring* get_word(trie_node_info* x);
void _find_leaf(trie_node* z, int& j);
void extend_alphabet();
public:
trie(int estimated_alphabet_sz = 400) ;
virtual ~trie() ;
virtual void add(unsigned char* word, int len, int freq = 1) ;
virtual void add_letters(unsigned char* letters, int len) ;
virtual encoding_unit* add_to_alphabet(unsigned char* word, int sz, int fq);
virtual encoding_unit** get_alphabet(unsigned int& alphabet_sz);
virtual Boolean travers_to(char* str, int sz,
trie_node*& node, trie_node_info*& node_info
);
// take the longest substring from str and returns its
// encoding_unit.
virtual encoding_unit* parse(unsigned char* str, int len);
friend ostream& operator <<(ostream& out, trie& tr);
};
#endif