252 lines
7.1 KiB
C
252 lines
7.1 KiB
C
/*
|
|
* CDE - Common Desktop Environment
|
|
*
|
|
* Copyright (c) 1993-2012, The Open Group. All rights reserved.
|
|
*
|
|
* These libraries and programs are free software; you can
|
|
* redistribute them and/or modify them under the terms of the GNU
|
|
* Lesser General Public License as published by the Free Software
|
|
* Foundation; either version 2 of the License, or (at your option)
|
|
* any later version.
|
|
*
|
|
* These libraries and programs are distributed in the hope that
|
|
* they will be useful, but WITHOUT ANY WARRANTY; without even the
|
|
* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
|
* PURPOSE. See the GNU Lesser General Public License for more
|
|
* details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with these libraries and programs; if not, write
|
|
* to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
|
|
* Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
/*
|
|
* COMPONENT_NAME: austext
|
|
*
|
|
* FUNCTIONS: bmhcore
|
|
* bmhtable_build
|
|
* bmstrstr
|
|
* main
|
|
*
|
|
* ORIGINS: 27
|
|
*
|
|
*
|
|
* (C) COPYRIGHT International Business Machines Corp. 1992,1995
|
|
* All Rights Reserved
|
|
* Licensed Materials - Property of IBM
|
|
* US Government Users Restricted Rights - Use, duplication or
|
|
* disclosure restricted by GSA ADP Schedule Contract with IBM Corp.
|
|
*/
|
|
/************************* BMSTRSTR.C ****************************
|
|
* $TOG: bmstrstr.c /main/6 1998/04/17 11:25:23 mgreess $
|
|
* Original module named fastsearch.c
|
|
* and included colocation string search functions.
|
|
* Modification of Boyer-Moore-Horspool algorithm,
|
|
* Sec 10.5.2 of Information Retrieval, Frakes and Baeza-Yates, editors.
|
|
* Provides a generalized boyer-moore
|
|
* strstr() function. The table used in the BMH algorithm is
|
|
* generated in a separate function to improve efficiency when
|
|
* looking for the same substring pattern in multiple text strings.
|
|
* The 'length' arguments can be passed if known, or passed as
|
|
* strlen(xxx) if not known. HOWEVER the string arrays MUST be at
|
|
* least 1 char larger then strlen() says ('cause we insert a \0).
|
|
* This whole thing has been coded for SPEED!
|
|
*
|
|
* $Log$
|
|
* Revision 2.2 1995/10/26 15:37:42 miker
|
|
* Added prolog.
|
|
*
|
|
* Revision 2.1 1995/09/22 19:10:39 miker
|
|
* Freeze DtSearch 0.1, AusText 2.1.8
|
|
*
|
|
* Revision 1.2 1995/08/31 22:12:05 miker
|
|
* Minor changes for DtSearch.
|
|
*/
|
|
#include "SearchP.h"
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <sys/param.h>
|
|
|
|
#ifndef _AIX
|
|
#define __strcmp strcmp
|
|
#endif
|
|
|
|
/*********#define TEST_BMSTRSTR**************/
|
|
|
|
/****************************************/
|
|
/* */
|
|
/* bmhtable_build */
|
|
/* */
|
|
/****************************************/
|
|
/* Builds the table for search substring 'pattern'
|
|
* used by the BMH core search algorithm.
|
|
* 'patlen' is the string length of 'pattern'.
|
|
* The caller defines and passes 'bmhtable',
|
|
* an array of long integers (size_t) of size MAX_BMHTAB.
|
|
* This function initializes bmhtable for later search call.
|
|
*/
|
|
void bmhtable_build (
|
|
unsigned char *pattern,
|
|
size_t patlen,
|
|
size_t * bmhtable)
|
|
{
|
|
int k;
|
|
|
|
for (k = 0; k < MAX_BMHTAB; k++)
|
|
bmhtable[k] = patlen;
|
|
patlen--;
|
|
for (k = 0; k < patlen; k++)
|
|
bmhtable[pattern[k]] = (patlen - k);
|
|
return;
|
|
} /* bmhtable_build() */
|
|
|
|
|
|
/****************************************/
|
|
/* */
|
|
/* bmhcore */
|
|
/* */
|
|
/****************************************/
|
|
/* Performs 'core' BMH search after bmhtable is built.
|
|
* Returns ptr to first occurrence of pattern in text or NULL.
|
|
* WARNING! IF EITHER txtlen OR patlen <= 0, THIS FUNCTION WILL CRASH!!!
|
|
* Pattern and patlen MUST BE identical with those used to build
|
|
* bmhtable.
|
|
*/
|
|
|
|
char *bmhcore (
|
|
unsigned char *text,
|
|
size_t txtlen,
|
|
unsigned char *pattern,
|
|
size_t patlen,
|
|
size_t *bmhtable)
|
|
{
|
|
unsigned char
|
|
lastchar = pattern[patlen - 1];
|
|
unsigned char
|
|
textchar;
|
|
unsigned char
|
|
*cp;
|
|
unsigned char
|
|
*last;
|
|
int savechar;
|
|
int savechar2;
|
|
unsigned char
|
|
*result = NULL;
|
|
|
|
/* Terminate pattern with a char we KNOW is not in text.
|
|
* Note that this requires string to have room for \0 at end.
|
|
*/
|
|
savechar = pattern[patlen];
|
|
pattern[patlen] = '\0';
|
|
|
|
last = text + txtlen;
|
|
for (cp = text + patlen - 1; cp < last; cp += bmhtable[textchar]) {
|
|
/*
|
|
* Check if last character matches. If it doesn't, no need
|
|
* to check any further.
|
|
*/
|
|
if ((textchar = *cp) != lastchar)
|
|
continue;
|
|
savechar2 = cp[1];
|
|
cp[1] = '\0';
|
|
if (!__strcmp ((char *) (cp + 1 - patlen), (char *) pattern))
|
|
result = cp + 1 - patlen;
|
|
cp[1] = savechar2;
|
|
if (result)
|
|
break;
|
|
}
|
|
pattern[patlen] = savechar; /* restore last char */
|
|
return (char *) result;
|
|
} /* bmhcore() */
|
|
|
|
|
|
/****************************************/
|
|
/* */
|
|
/* bmstrstr */
|
|
/* */
|
|
/****************************************/
|
|
/* Search in text [1..txtlen] for pattern [1..patlen].
|
|
* Returns ptr to first occurrence of pattern, or NULL.
|
|
*/
|
|
char *bmstrstr (
|
|
unsigned char *text,
|
|
size_t txtlen,
|
|
unsigned char *pattern,
|
|
size_t patlen)
|
|
{
|
|
size_t bmhtable[MAX_BMHTAB];
|
|
|
|
bmhtable_build (pattern, patlen, bmhtable);
|
|
return bmhcore (text, txtlen, pattern, patlen, bmhtable);
|
|
} /* bmstrstr() */
|
|
|
|
|
|
#ifdef TEST_BMSTRSTR /* for test only */
|
|
#include <sys/stat.h>
|
|
/****************************************/
|
|
/* */
|
|
/* main */
|
|
/* */
|
|
/****************************************/
|
|
/* tests bmstrstr() against standard strstr() on a specified file */
|
|
main ()
|
|
{
|
|
FILE *f;
|
|
struct stat statbuf;
|
|
size_t fsize = 0L;
|
|
char fname[BUFSIZ+1];
|
|
char pattern[MAXPATHLEN+1];
|
|
char *readbuf = NULL;
|
|
char *ptr;
|
|
|
|
MAIN_LOOP:
|
|
printf ("\nEnter a filename (Ctrl-C quits) > ");
|
|
|
|
*fname = '\0';
|
|
fgets (fname, sizeof(fname), stdin);
|
|
if (strlen(fname) && fname[strlen(fname)-1] == '\n')
|
|
fname[strlen(fname)-1] = '\0';
|
|
|
|
if ((f = fopen (fname, "r")) == NULL) {
|
|
printf ("Can't open '%s': %s\n", fname, strerror (errno));
|
|
goto MAIN_LOOP;
|
|
}
|
|
|
|
fstat (fileno (f), &statbuf);
|
|
if (fsize > statbuf.st_size) {
|
|
free (readbuf);
|
|
readbuf = NULL;
|
|
}
|
|
fsize = statbuf.st_size;
|
|
if (readbuf == NULL)
|
|
readbuf = malloc (fsize + 64L);
|
|
|
|
fread (readbuf, fsize, 1L, f);
|
|
fclose (f);
|
|
|
|
printf ("Enter a search pattern > ");
|
|
|
|
*pattern = '\0';
|
|
fgets (pattern, sizeof(pattern), stdin);
|
|
if (strlen(pattern) && pattern[strlen(pattern)-1] == '\n')
|
|
pattern[strlen(pattern)-1] = '\0';
|
|
|
|
ptr = bmstrstr (readbuf, fsize, pattern, strlen (pattern));
|
|
if (ptr == NULL)
|
|
puts ("bmstrstr: Pattern not found.");
|
|
else
|
|
printf ("bmstrstr: Pattern found at offset %ld.\n", ptr - readbuf);
|
|
|
|
ptr = strstr (readbuf, pattern);
|
|
if (ptr == NULL)
|
|
puts ("strstr: Pattern not found.");
|
|
else
|
|
printf ("strstr: Pattern found at offset %ld.\n", ptr - readbuf);
|
|
|
|
goto MAIN_LOOP;
|
|
} /* main() test program */
|
|
|
|
#endif
|
|
|
|
/************************* BMSTRSTR.C ****************************/
|