275 lines
7.9 KiB
C
275 lines
7.9 KiB
C
/*
|
|
* CDE - Common Desktop Environment
|
|
*
|
|
* Copyright (c) 1993-2012, The Open Group. All rights reserved.
|
|
*
|
|
* These libraries and programs are free software; you can
|
|
* redistribute them and/or modify them under the terms of the GNU
|
|
* Lesser General Public License as published by the Free Software
|
|
* Foundation; either version 2 of the License, or (at your option)
|
|
* any later version.
|
|
*
|
|
* These libraries and programs are distributed in the hope that
|
|
* they will be useful, but WITHOUT ANY WARRANTY; without even the
|
|
* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
|
* PURPOSE. See the GNU Lesser General Public License for more
|
|
* details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with these libraries and programs; if not, write
|
|
* to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
|
|
* Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
/*
|
|
* COMPONENT_NAME: austext
|
|
*
|
|
* FUNCTIONS: dump_hashtab
|
|
* is_duprec
|
|
* main
|
|
*
|
|
* ORIGINS: 27
|
|
*
|
|
*
|
|
* (C) COPYRIGHT International Business Machines Corp. 1993,1995
|
|
* All Rights Reserved
|
|
* Licensed Materials - Property of IBM
|
|
* US Government Users Restricted Rights - Use, duplication or
|
|
* disclosure restricted by GSA ADP Schedule Contract with IBM Corp.
|
|
*/
|
|
/******************* ISDUPREC.C *******************
|
|
* $XConsortium: isduprec.c /main/5 1996/05/07 13:37:35 drk $
|
|
* June 1993.
|
|
* Is_duprec() returns 0 (FALSE) for every record id it is passed
|
|
* unless one is passed that duplicates a previous one,
|
|
* in which case it returns 1 (TRUE).
|
|
* It ensures that duplicate record ids in an .fzk file
|
|
* are not processed by either ravel or borodin.
|
|
* It does it by storing each recid into a hash table and
|
|
* searching the table before storing a new recid.
|
|
* Returns 2 on errors (malloc out of space, etc);
|
|
*
|
|
* Global 'duprec_hashsize' can be changed to any rational value
|
|
* for a hash table size (say 1000 to 30,000) prior to the first call
|
|
* of is_duprec(). It should be roughly => to the total number of
|
|
* different record ids expected to be passed to is_duprec().
|
|
* If initialized to 0 before the first call, that will disable
|
|
* duplicate checking, i.e. is_duprec() will allocate no memory
|
|
* and always return 0.
|
|
*
|
|
* $Log$
|
|
* Revision 2.2 1995/10/25 17:22:48 miker
|
|
* Added prolog.
|
|
*
|
|
* Revision 2.1 1995/09/22 20:56:44 miker
|
|
* Freeze DtSearch 0.1, AusText 2.1.8
|
|
*
|
|
* Revision 1.3 1995/09/05 18:11:45 miker
|
|
* Minor changes so ansi c compilers won't whine.
|
|
*/
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#ifdef TEST
|
|
#include <stdio.h>
|
|
#include <errno.h>
|
|
#endif
|
|
|
|
#define PROGNAME "ISDUPREC"
|
|
#define HASHSIZE 3000L
|
|
#define NOT_A_DUP 0
|
|
#define IS_A_DUP 1
|
|
#define OUT_OF_MEM 2
|
|
|
|
unsigned long duprec_hashsize = HASHSIZE;
|
|
|
|
/************************************************/
|
|
/* */
|
|
/* HASHNODE */
|
|
/* */
|
|
/************************************************/
|
|
/* The hash table is a HASHSIZE array of pointers to these structures.
|
|
* Each pointer is initialized to NULL.
|
|
* Additions are handled by filling in a HASHNODE pointed to
|
|
* by the table pointer. The 'recid' is NOT a char array of length
|
|
* 1, but a string whose length varies depending on the actual
|
|
* length of the passed record id. Each hashnode is malloced
|
|
* for exactly the right length. Collisions are handled by linking
|
|
* additional nodes off of the original one.
|
|
*/
|
|
typedef struct hash_tag {
|
|
struct hash_tag *link;
|
|
char recid[2]; /* actual array size varies */
|
|
} HASHNODE;
|
|
|
|
|
|
#ifdef TEST
|
|
/************************************************/
|
|
/* */
|
|
/* dump_hashtab() */
|
|
/* */
|
|
/************************************************/
|
|
/* For debugging, prints out all recids in hashtab, skipping empty bkts */
|
|
static void dump_hashtab (HASHNODE ** hashtab)
|
|
{
|
|
HASHNODE *hp, **hpp;
|
|
int i;
|
|
printf (PROGNAME "67 dump_hashtab(%p):\n", hashtab);
|
|
for (i = 0, hpp = hashtab; i < duprec_hashsize; i++, hpp++) {
|
|
if (*hpp) {
|
|
printf (" %4d:", i);
|
|
fflush (stdout);
|
|
for (hp = *hpp; hp != NULL; hp = hp->link)
|
|
printf (" '%s'", hp->recid);
|
|
putchar ('\n');
|
|
fflush (stdout);
|
|
}
|
|
}
|
|
return;
|
|
} /* dump_hashtab() */
|
|
|
|
#endif /* TEST */
|
|
|
|
|
|
/************************************************/
|
|
/* */
|
|
/* is_duprec() */
|
|
/* */
|
|
/************************************************/
|
|
/* Normal return is 0 indicating that passed record id is unique.
|
|
* Also immediately returns 0 if duplicate checking has been
|
|
* turned off by setting global 'duprec_hashsize' to zero.
|
|
* Returns 1 if record id is a duplicate.
|
|
* Returns 2 if out of memory.
|
|
* First call uses 'duprec_hashsize' to create hash table.
|
|
*/
|
|
int is_duprec (char *recid)
|
|
{
|
|
static HASHNODE **hashtab = NULL;
|
|
static unsigned long primes[10] =
|
|
{1013, 1511, 2203, 3511, 5003, 10007, 15013, 20011, 25013, 30001};
|
|
|
|
unsigned long i;
|
|
char *cp;
|
|
unsigned long sum;
|
|
HASHNODE *hp, **hpp;
|
|
|
|
if (duprec_hashsize == 0UL)
|
|
return NOT_A_DUP;
|
|
|
|
/* Generate hash table at first call only */
|
|
if (hashtab == NULL) {
|
|
/*
|
|
* adjust table size upward to nearest preordained prime
|
|
* number
|
|
*/
|
|
for (i = 0; i < 9 && primes[i] < duprec_hashsize; i++);
|
|
duprec_hashsize = primes[i];
|
|
#ifdef TEST
|
|
printf (PROGNAME "117 Create hash table, duprec_hashsize set = %ld.\n",
|
|
duprec_hashsize);
|
|
#endif
|
|
|
|
hashtab = malloc ((duprec_hashsize + 2L) * sizeof (HASHNODE *));
|
|
if (hashtab == NULL)
|
|
return OUT_OF_MEM;
|
|
|
|
/* init table to all NULL pointers. */
|
|
hpp = hashtab;
|
|
for (i = duprec_hashsize + 2L; i > 0L; i--)
|
|
*hpp++ = NULL;
|
|
}
|
|
|
|
/*****dump_hashtab(hashtab);******/
|
|
|
|
/* HASH FUNCTION: H(recid) = (SUM(i*recid[i])) mod M,
|
|
* where M is table size (prime), and SUM is calculated
|
|
* for i=1 to end of recid. Multiplying the position by the character
|
|
* value at that position minimizes the influence of identical
|
|
* characters at the beginnings and ends of recids,
|
|
* and also usually yields a number larger than M.
|
|
* Not skipping over the first position (the keytype char) helps
|
|
* efficiently catch recids that are blank after the keytype.
|
|
*/
|
|
sum = 0UL;
|
|
i = 1;
|
|
cp = recid;
|
|
while (*cp != 0)
|
|
sum += i++ * (*cp++);
|
|
hpp = &(hashtab[sum % duprec_hashsize]); /* hpp = head of linked
|
|
* list */
|
|
|
|
#ifdef TEST
|
|
printf (PROGNAME "150 is_duprec('%s')=hashtab[%lu]=%p: ",
|
|
recid, sum % duprec_hashsize, *hpp);
|
|
fflush (stdout);
|
|
i = 0;
|
|
#endif
|
|
|
|
/* Search linked list (if any) for hashnode containing recid */
|
|
for (hp = *hpp; hp != NULL; hp = hp->link) {
|
|
#ifdef TEST
|
|
i++;
|
|
#endif
|
|
|
|
if (strcmp (hp->recid, recid) == 0) {
|
|
#ifdef TEST
|
|
printf ("DUP!@listpos=%d\n", i);
|
|
#endif
|
|
return IS_A_DUP;
|
|
}
|
|
hpp = &hp->link; /* now hpp = tail of linked list */
|
|
}
|
|
#ifdef TEST
|
|
printf ("miss@listlen=%d\n", i);
|
|
#endif
|
|
|
|
/* Not a duplicate. Add current recid to hash table. */
|
|
if ((hp = malloc (sizeof (HASHNODE) + strlen (recid) + 2)) == NULL)
|
|
return OUT_OF_MEM;
|
|
strcpy (hp->recid, recid);
|
|
hp->link = NULL;
|
|
/*****hp->link = *hpp;******/
|
|
*hpp = hp;
|
|
return NOT_A_DUP;
|
|
} /* is_duprec() */
|
|
|
|
|
|
#ifdef MAIN
|
|
/************************************************/
|
|
/* */
|
|
/* main() */
|
|
/* */
|
|
/************************************************/
|
|
main (int argc, char *argv[])
|
|
{
|
|
int i;
|
|
FILE *f;
|
|
char buf[2048];
|
|
|
|
if (argc < 2) {
|
|
printf ("USAGE: %s <file> [n]\n"
|
|
"where file contains list of char strings\n"
|
|
"and optional n changes hash table size.\n",
|
|
argv[0]);
|
|
return;
|
|
}
|
|
if ((f = fopen (argv[1], "r")) == NULL) {
|
|
printf ("Can't open %s: %s\n", argv[1], strerror (errno));
|
|
return;
|
|
}
|
|
if (argc >= 3)
|
|
duprec_hashsize = atol (argv[2]);
|
|
|
|
while (fgets (buf, sizeof (buf), f) != NULL) {
|
|
buf[sizeof (buf) - 1] = 0;
|
|
i = is_duprec (buf);
|
|
printf ("%s", buf); /* each buf should end in \n */
|
|
if (i > 1)
|
|
break;
|
|
}
|
|
return;
|
|
}
|
|
|
|
#endif
|
|
/******************* ISDUPREC.C *******************/
|