cdesktopenv/cde/lib/DtSearch/boolpars.c

/*
 * CDE - Common Desktop Environment
 *
 * Copyright (c) 1993-2012, The Open Group. All rights reserved.
 *
 * These libraries and programs are free software; you can
 * redistribute them and/or modify them under the terms of the GNU
 * Lesser General Public License as published by the Free Software
 * Foundation; either version 2 of the License, or (at your option)
 * any later version.
 *
 * These libraries and programs are distributed in the hope that
 * they will be useful, but WITHOUT ANY WARRANTY; without even the
 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 * PURPOSE. See the GNU Lesser General Public License for more
 * details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with these libraries and programs; if not, write
 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
 * Floor, Boston, MA 02110-1301 USA
 */
/* $XConsortium: boolpars.c /main/5 1996/11/25 18:49:27 drk $
 *
 * (c) Copyright 1996 Digital Equipment Corporation.
 * (c) Copyright 1996 Hewlett-Packard Company.
 * (c) Copyright 1996 International Business Machines Corp.
 * (c) Copyright 1996 Sun Microsystems, Inc.
 * (c) Copyright 1996 Novell, Inc.
 * (c) Copyright 1996 FUJITSU LIMITED.
 * (c) Copyright 1996 Hitachi.
 */
/*
 *   COMPONENT_NAME: austext
 *
 *   FUNCTIONS: add_syntax_errmsg
 *		boolean_parse
 *		boolyac_AND
 *		boolyac_COLLOC
 *		boolyac_NOT
 *		boolyac_OR
 *		copy_final_truthtab
 *		copy_token
 *		creatett
 *		freett
 *		get_stem_truthtab
 *		main
 *		process_user_args
 *		yyerror
 *		yylex
 *
 *   ORIGINS: 27
 *
 *
 *   (C) COPYRIGHT International Business Machines Corp. 1996
 *   All Rights Reserved
 *   Licensed Materials - Property of IBM
 *   US Government Users Restricted Rights - Use, duplication or
 *   disclosure restricted by GSA ADP Schedule Contract with IBM Corp.
 */
/********************* BOOLPARS.C ********************
 * $Id: boolpars.c /main/5 1996/11/25 18:49:27 drk $
 * February 1996.
 * AusText/DtSearch yacc-based boolean query parser.
 * Converts boolean query into stems array and truth table
 * for subsequent search.  Boolyac.y is the yacc source.
 * After processing by yacc, it becomes boolyac.c and boolyac.h.
 * This module contains all the related C source code: yylex,
 * yacc action functions, and the main AusText driver function, boolean_parse.
 * Additional information (format of TRUTHTAB) in header file boolpars.h.
 *
 * $Log$
 * Revision 1.4  1996/03/22  23:12:50  miker
 * Added string.h header and correctly cast strcspn() calls.
 *
 * Revision 1.3  1996/03/20  19:14:30  miker
 * Enable collocation expressions in stem (type 'S') searches.
 *
 * Revision 1.2  1996/03/13  22:35:59  miker
 * Changed char to UCHAR several places; similar typecasts.
 *
 * Revision 1.1  1996/03/05  15:52:06  miker
 * Initial revision
 */
#include "SearchE.h"
#include <stdlib.h>
#include <string.h>
#include "boolpars.h"
#include "boolyac.h"

#if (DtSrMAX_STEMCOUNT != 8)
#error DtSrMAX_STEMCOUNT is not defined to be 8.
#endif

#define PROGNAME	"BOOLPARS"
#define WORD_ENDERS	" \t\n\f()|@~&"
#define MAX_YYERRORS	4
#define MS_boolpars	28


/****************************************/
/*					*/
/*		 GLOBALS		*/
/*					*/
/****************************************/
int		qry_has_no_NOTs =	FALSE;
int		qry_is_all_ANDs =	FALSE;
TRUTHTAB	final_truthtab =	{ 0 };
int		parser_invalid_wordcount = 0;

static int	debugging_boolpars =	FALSE;
static unsigned char
		*final_permutes =	NULL;
static int	last_token_was_boolop =	TRUE;
static char	*msgbuf =		NULL;
static UCHAR	*next_lex_char =	NULL;
static int	paren_count =		0;
static TRUTHTAB	*ttlist =		NULL;
static int	yyerror_count =		0;
static size_t	yyleng;		/* same as in lex API */
static char	*yytext;	/* same as in lex API */


/****************************************/
/*					*/
/*	     add_syntax_errmsg		*/
/*					*/
/****************************************/
/* Action function called for yacc rules used to trap syntax errors.
 * Adds error message identified by msgno to user's msglist.
 */
void	add_syntax_errmsg (int msgno)
{
    switch (msgno) {
	case 1:
	    /* Message #2 is called in two places */
	    sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 2,
		"%s Query field is empty."),
		PROGNAME"086");
	    DtSearchAddMessage (msgbuf);
	    break;

	case 2:
	    sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 5,
		"%s Boolean operators must be positioned\n"
		"between words or expressions.  Two sequential words\n"
		"without an operator are interpreted as being separated\n"
		"by the AND operator (&)."),
		PROGNAME"091");
	    DtSearchAddMessage (msgbuf);
	    break;

	case 3:
	    sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 6,
		"%s Expression in parentheses is missing."),
		PROGNAME"093");
	    DtSearchAddMessage (msgbuf);
	    break;

	case 4:
	    sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 7,
		"%s NOT operator (~) must be positioned to\n"
		"the left of the word or expression it qualifies."),
		PROGNAME"098");
	    DtSearchAddMessage (msgbuf);
	    break;

	case 5:
	    /* Message #3 is called in two places */
	    sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 3,
		"%s COLLOCATION operator (@) may\n"
		"only be positioned between two words."),
		PROGNAME"111");
	    DtSearchAddMessage (msgbuf);
	    break;

	case 6:
	    sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 4,
		"%s One or more words in your\n"
		"query are not stored in database '%s'.") ,
		PROGNAME"089", usrblk.dblk->label);
	    DtSearchAddMessage (msgbuf);
	    break;

	default:
	    sprintf (msgbuf,  catgets(dtsearch_catd, MS_boolpars, 8,
		"%s Invalid boolean query.  Syntax Error #%d.") ,
		PROGNAME"100", msgno);
	    DtSearchAddMessage (msgbuf);
	    break;
    }
    return;
} /* add_syntax_errmsg() */


/****************************************/
/*					*/
/*	        creatett		*/
/*					*/
/****************************************/
/* Constructor for new truth table.
 * Allocates it, inits it, and links it into ttlist.
 */
static TRUTHTAB	*creatett (int stemno, int pmsz, unsigned char *permutes)
{
    TRUTHTAB *newtt = austext_malloc (sizeof(TRUTHTAB) + pmsz + 4,
	PROGNAME"140", NULL);
    memset (newtt, 0, sizeof(TRUTHTAB));
    newtt->stemno = stemno;
    newtt->pmsz = pmsz;
    newtt->permutes = (unsigned char *) (newtt + 1);
    memcpy (newtt->permutes, permutes, pmsz);
    newtt->next = ttlist;
    ttlist = newtt;
    return newtt;
} /* creatett() */


/****************************************/
/*					*/
/*		 freett			*/
/*					*/
/****************************************/
/* Destructor of passed truth table.
 * Unlinks it from ttlist and frees it.
 */
static void	freett (TRUTHTAB *argtt)
{
    TRUTHTAB	*tt;
    TRUTHTAB	**lastlink = &ttlist;
    for (tt = ttlist;  tt;  tt = tt->next) {
	if (tt == argtt) {
	    *lastlink = tt->next;
	    free (tt);
	    break;
	}
	lastlink = &tt->next;
    }
    return;
} /* freett() */


/****************************************/
/*					*/
/*	   copy_final_truthtab		*/
/*					*/
/****************************************/
/* Copys passed truth table into global final_truthtab.
 * Returns final_truthtab.
 */
TRUTHTAB	*copy_final_truthtab (TRUTHTAB *tt)
{
    memset (&final_truthtab, 0, sizeof(TRUTHTAB));
    if (!final_permutes)
	final_permutes = austext_malloc (300, PROGNAME"788", NULL);
    final_truthtab.pmsz =	tt->pmsz;
    final_truthtab.permutes =	final_permutes;
    memcpy (final_permutes, tt->permutes, final_truthtab.pmsz);
    return &final_truthtab;
} /* copy_final_truthtab() */


/****************************************/
/*					*/
/*	    get_stem_truthtab		*/
/*					*/
/****************************************/
/* Subroutine of yylex.  Also used in yacc action functions.
 * Creates and returns truth table for passed stem.
 * If stem is new, adds it to saveusr.stems array, and adds
 * the original query word string to usrblk.stems for msgs.
 * Returns NULL and posts err msg if array is full
 * or has other error.
 */
static TRUTHTAB	*get_stem_truthtab (char *newstem, char *origword)
{
    int			i, stemno;
    unsigned char	bitmask;
    unsigned char	*pmp;
    unsigned char	new_permutes [128];
    TRUTHTAB		*newtt;

    /* Check if stem is already in array */
    for (stemno = 0;  stemno < saveusr.stemcount;  stemno++)
	if (strcmp (newstem, saveusr.stems[stemno]) == 0)
	    break;

    /* Add new stem to array */
    if (stemno == saveusr.stemcount) {
	if (++saveusr.stemcount > DtSrMAX_STEMCOUNT) {
	    sprintf (msgbuf, catgets (dtsearch_catd, MS_boolpars, 9,
		"%s Too many terms in boolean query."),
		PROGNAME"1513");
	    DtSearchAddMessage (msgbuf);
	    saveusr.stemcount--;
	    return NULL;
	}
	strncpy (saveusr.stems[stemno], newstem, DtSrMAXWIDTH_HWORD);
	saveusr.stems [stemno] [DtSrMAXWIDTH_HWORD - 1] = 0;
	if (origword) {
	    strncpy (usrblk.stems[stemno], origword, DtSrMAXWIDTH_HWORD);
	    usrblk.stems [stemno] [DtSrMAXWIDTH_HWORD - 1] = 0;
	}
    }

    /* Stemno now indicates correct term in saveusr.stems.
     * Truth table for a single term has 128 8-bit permutes,
     * the 1/2 of all 256 possible permutations that have
     * that term's bit switched on.
     */
    bitmask = 1 << stemno;	/* mask with only newstem's bit on */
    pmp = new_permutes;
    for (i=0; i<256; i++)
	if ((i & bitmask) != 0) {
	    *pmp = i;
	    pmp++;
	}
    newtt = creatett (stemno, 128, new_permutes);
    if (debugging_boolpars) {
	fprintf (aa_stderr, "   WORD: stem[%d]='%c%s' expr=%p pmsz=%d\n",
	    stemno,
	    (saveusr.stems[stemno][0] == STEM_CH) ?
		'~'  :  saveusr.stems[stemno][0],
	    &saveusr.stems[stemno][1],
	    (void *) newtt, newtt->pmsz);
	fflush (aa_stderr);
    }
    return newtt;
} /* get_stem_truthtab() */


/****************************************/
/*					*/
/*	        boolyac_AND		*/
/*					*/
/****************************************/
/* Action function for AND expression rule.
 * Returns set INTERSECTION of passed truth tables,
 * ie only the permutes they have in common.
 * Any truth table, input or output, can be the empty or
 * the universal set.  For example: "(A & B) & ~A" is empty.
 */
TRUTHTAB	*boolyac_AND (TRUTHTAB *tt1, TRUTHTAB *tt2) {
    TRUTHTAB		*newtt;
    unsigned char	new_permutes [256];
    int			pm1, pm2, newpm;

    pm1 = pm2 = newpm = 0;
    while (pm1 < tt1->pmsz  &&  pm2 < tt2->pmsz) {
	if (tt1->permutes[pm1] < tt2->permutes[pm2])
	    pm1++;
	else if (tt1->permutes[pm1] > tt2->permutes[pm2])
	    pm2++;
	else {
	    new_permutes [newpm++] = tt1->permutes [pm1];
	    pm1++;
	    pm2++;
	}
    }

    /* Free old truthtabs, create new one. */
    freett (tt1);
    freett (tt2);
    newtt = creatett (-1, newpm, new_permutes);
    if (debugging_boolpars) {
	fprintf (aa_stderr, "    AND: exprs=%p,%p-->expr=%p pmsz=%d\n",
	    (void *) tt1, (void *) tt2, (void *) newtt, newtt->pmsz);
	fflush (aa_stderr);
    }
    return newtt;
} /* boolyac_AND() */


/****************************************/
/*					*/
/*	        boolyac_OR		*/
/*					*/
/****************************************/
/* Action function for OR expression rule.
 * Returns set UNION of passed truth tables.
 * Any truth table, input or output, can be the empty or
 * the universal set.  For example: "A | ~A" is universal.
 */
TRUTHTAB	*boolyac_OR (TRUTHTAB *tt1, TRUTHTAB *tt2) {
    TRUTHTAB		*newtt;
    unsigned char	new_permutes [256];
    unsigned char	*permutes1 = tt1->permutes;
    unsigned char	*permutes2 = tt2->permutes;
    int			pm1, pm2, newpm;

    pm1 = pm2 = newpm = 0;

    /* While neither permutes array is exhausted... */
    while (pm1 < tt1->pmsz  &&  pm2 < tt2->pmsz) {
	if (permutes1[pm1] < permutes2[pm2])
	    new_permutes [newpm++] = permutes1[pm1++];
	else if (permutes2[pm2] < permutes1[pm1])
	    new_permutes [newpm++] = permutes2[pm2++];
	else {
	    new_permutes [newpm++] = permutes1[pm1++];
	    pm2++;
	}
    }
    /* After one or both permutes arrays are exhausted... */
    while (pm1 < tt1->pmsz)
	new_permutes [newpm++] = permutes1[pm1++];
    while (pm2 < tt2->pmsz)
	new_permutes [newpm++] = permutes2[pm2++];

    /* Free old truthtabs, create new one. */
    freett (tt1);
    freett (tt2);
    newtt = creatett (-1, newpm, new_permutes);
    if (debugging_boolpars) {
	fprintf (aa_stderr, "     OR: exprs=%p,%p-->expr=%p pmsz=%d\n",
	    (void *) tt1, (void *) tt2, (void *) newtt, newtt->pmsz);
	fflush (aa_stderr);
    }
    return newtt;
} /* boolyac_OR() */


/****************************************/
/*					*/
/*	        boolyac_NOT		*/
/*					*/
/****************************************/
/* Action function for NOT expression rule.
 * Returns set COMPLEMENT of passed truth table,
 * ie the universal set minus the passed set,
 * ie all possible permutes except those passed.
 * Either the old or the new truth table can be
 * the empty or the universal set.
 */
TRUTHTAB	*boolyac_NOT (TRUTHTAB *oldtt) {
    TRUTHTAB		*newtt;
    unsigned char	new_permutes [256];
    int			oldpm, newpm;
    int			candidate;

    oldpm = newpm = 0;
    for (candidate = 0;  candidate < 256;  candidate++) {
	if (oldpm >= oldtt->pmsz  ||  candidate < oldtt->permutes [oldpm]) {
	    new_permutes [newpm++] = candidate;
	}
	/*
	 * oldtt not done  &&  candidate == oldtt.
	 * (candidate > oldtt not possible).
	 */
	else {
	    oldpm++;
	}
    }
    freett (oldtt);
    newtt = creatett (-1, newpm, new_permutes);
    if (debugging_boolpars) {
	fprintf (aa_stderr, "    NOT: expr=%p-->expr=%p pmsz=%d\n",
	    (void *) oldtt, (void *) newtt, newtt->pmsz);
	fflush (aa_stderr);
    }
    return newtt;
} /* boolyac_NOT() */


/****************************************/
/*					*/
/*	      boolyac_COLLOC		*/
/*					*/
/****************************************/
/* Action function for COLLOCATION expression rule.
 * The record set satisfying a collocation expression is
 * generated dynamically.  At the parse level it is equivalent
 * to a separate 'word' with its own (undetermined) record set.
 * So it's given its own slot in saveusr.stems.  The word
 * in saveusr.stems is formatted "@ssttv[v...]" where ss and tt are
 * ascii numbers that index the original collocated words
 * in saveusr.stems, and v... is the collocation value integer.
 * For example, "@03005" represents the collocation of stem
 * number 3 and stem number 0, with collocation value 5.
 *
 * Returns NULL and errmsg on msglist if any problems.
 */
TRUTHTAB	*boolyac_COLLOC (
		    TRUTHTAB	*word1tt,
		    int		colloc_val,
		    TRUTHTAB	*word2tt)
{
    TRUTHTAB	*newtt;
    char	wordbuf [DtSrMAXWIDTH_HWORD];

    if (word1tt->stemno < 0  ||  word2tt->stemno < 0) {
	/* Message #3 is called in two places */
	sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 3,
	    "%s COLLOCATION operator (@) may\n"
	    "only be positioned between two words."),
	    PROGNAME"371");
	DtSearchAddMessage (msgbuf);
	return NULL;
    }
    if (word1tt->stemno == word2tt->stemno) {
	sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 12,
	    "%s Collocation operator is not\n"
	    "permitted between identical words."),
	    PROGNAME"377");
	DtSearchAddMessage (msgbuf);
	return NULL;
    }
    sprintf (wordbuf, COLLOC_STEM_FORMAT,
	word1tt->stemno, word2tt->stemno, colloc_val);
    if ((newtt = get_stem_truthtab (wordbuf, wordbuf)) == NULL)
	return NULL;
    freett (word1tt);
    freett (word2tt);
    if (debugging_boolpars) {
	fprintf (aa_stderr, " COLLOC: exprs=%p,%p-->expr=%p pmsz=%d\n",
	    (void *) word1tt, (void *) word2tt, (void *) newtt, newtt->pmsz);
	fflush (aa_stderr);
    }
    return newtt;
} /* boolyac_COLLOC() */


/****************************************/
/*					*/
/*		 yyerror		*/
/*					*/
/****************************************/
/* Replaces standard yacc error routine. */
void	yyerror (char *msg) {
    if (strcmp (msg, "syntax error") == 0) {
	if (DtSearchHasMessages())
	    return;
	else if (parser_invalid_wordcount > 0)
	    add_syntax_errmsg(6);
	else {
	    sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 1,
		"%s Your search string is an invalid\n"
		"boolean query.  Please reformulate and try again."),
		PROGNAME"001");
	    DtSearchAddMessage (msgbuf);
	}
    }
    else
	DtSearchAddMessage (msg);
    return;
} /* yyerror() */


/****************************************/
/*					*/
/*		copy_token		*/
/*					*/
/****************************************/
/* Subroutine of yylex().  Copies passed substring
 * Into a zero-terminated buffer of its own.
 * Static buffer good until next call.
 */
static char	*copy_token (UCHAR *tokenp, size_t toklen)
{
    static char		*buf =	NULL;
    static size_t	bufsz =	0;
    if (toklen > bufsz) {
	if (buf)
	    free (buf);
	bufsz = toklen + (toklen >> 1);	/* 1.5 times size needed */
	buf = austext_malloc (bufsz + 4, PROGNAME"182", NULL);
    }
    strncpy (buf, (char *) tokenp, toklen);
    buf [toklen] = 0;
    return buf;
} /* copy_token() */


/****************************************/
/*					*/
/*		   yylex		*/
/*					*/
/****************************************/
/* Delivers tokens to yyparse() from usrblk.query */
int	yylex (void)
{
    int		retn_token;
    PARG	parg;
    char	*stembufp;
    char	mystembuf [DtSrMAXWIDTH_HWORD + 4];

GET_ANOTHER_TOKEN:

    /* Skip white space */
    while (ascii_charmap[*next_lex_char] & WHITESPACE)
	next_lex_char++;

    /* Terminating zero indicates end of query and end of parse.
     * Automatically close unbalanced parentheses.
     */
    if (*next_lex_char == 0) {
	if (paren_count > 0) {
	    paren_count--;
	    retn_token =	')';
	    yytext =		")";
	    yyleng =		1;
	    goto DELIVER_TOKEN;
	}
	retn_token =		0;
	yytext =		"";
	yyleng =		0;
	goto DELIVER_TOKEN;
    }

    switch (*next_lex_char) {
	case '|':	/* OR operator */
	    last_token_was_boolop =	TRUE;
	    retn_token =		'|';
	    yytext =			"|";
	    yyleng =			1;
	    next_lex_char++;
	    break;

	case '~':	/* NOT operator */
	    if (!last_token_was_boolop) {
		/* Generate implied AND between words
		 * and parenthesized expressions.
		 * A NOT is not itself boolean; it must
		 * precede the next word or expression.
		 */
		last_token_was_boolop =	TRUE;
		retn_token =		'&';
		yytext =		"&";
		yyleng =		1;
		break;
	    }
	    last_token_was_boolop =	TRUE;
	    retn_token =		'~';
	    yytext =			"~";
	    yyleng =			1;
	    next_lex_char++;
	    break;

	case '&':	/* AND operator */
	    if (last_token_was_boolop && qry_is_all_ANDs) {
		/* Ignore multiple AND operators.
		 * These might occur if we silently
		 * discarded some invalid words.
		 */
		next_lex_char++;
		goto GET_ANOTHER_TOKEN;
	    }
	    last_token_was_boolop =	TRUE;
	    retn_token =		'&';
	    yytext =			"&";
	    yyleng =			1;
	    next_lex_char++;
	    break;

	case '(':	/* OPEN parentheses */
	    if (!last_token_was_boolop) {
		/* Generate implied AND between words
		 * and parenthesized expressions.
		 */
		last_token_was_boolop =	TRUE;
		retn_token =		'&';
		yytext =		"&";
		yyleng =		1;
		break;
	    }
	    paren_count++;
	    retn_token =		'(';
	    yytext =			"(";
	    yyleng =			1;
	    next_lex_char++;
	    break;

	case ')':	/* CLOSE parentheses */
	    /* Just discard excessive right parentheses */
	    if (--paren_count < 0) {
		paren_count = 0;
		next_lex_char++;
		goto GET_ANOTHER_TOKEN;
	    }
	    last_token_was_boolop =	FALSE;
	    retn_token =		')';
	    yytext =			")";
	    yyleng =			1;
	    next_lex_char++;
	    break;

	case '@':	/* COLLOCATION operator */
	    /* Collocation token:
	     * Token is defined as the collocation char followed
	     * by one or more numeric digits: "@#[#...]".
	     * Syntactically it's a kind of an AND operator.
	     * Semantically it's a pseudo word token
	     * (it will occupy a slot in the stems array).
	     * The yylval is the integer value following
	     * the collocation character.
	     */
	    yyleng = strcspn ((char *) next_lex_char + 1, WORD_ENDERS) + 1;
	    yytext = copy_token (next_lex_char, yyleng);
	    next_lex_char += yyleng;

	    if ((usrblk.dblk->dbrec.or_dbaccess & ORA_BLOB) == 0) {
		retn_token = ERROR_TOKEN;
		sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 10,
		    "%s Collocation searches not available for database '%s'."),
		    PROGNAME"2567", usrblk.dblk->label);
		DtSearchAddMessage (msgbuf);
		break;
	    }
	    yylval.int_val = atoi (yytext + 1);
	    if (yylval.int_val <= 0) {
		retn_token = ERROR_TOKEN;
		sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 11,
		    "%s Collocation operator '%.*s' is invalid.\n"
		    "Correct format is '@n' where n is greater than zero.") ,
		    PROGNAME"294", DtSrMAXWIDTH_HWORD, yytext);
		DtSearchAddMessage (msgbuf);
		break;
	    }
	    last_token_was_boolop =	TRUE;
	    retn_token =		COLLOC_TOKEN;
	    break;


	default:
	    /* Presumed word token:
	     * Token is all text chars until next whitespace,
	     * next lex token, or end of string.
	     * Linguistically parse it and optionally stem it.
	     * The token value is the truth table for one
	     * word: all permutes with only that word's
	     * bits turned on.  If the word is already
	     * in the stems array, then the permutes
	     * position is the word's index in the array.
	     * If the word is not in the array, it's added.
	     * If the array is full, then an error is reported.
	     */
	    if (!last_token_was_boolop) {
		/* Generate implied AND between words
		 * and parenthesized expressions.
		 */
		last_token_was_boolop =	TRUE;
		retn_token =		'&';
		yytext =		"&";
		yyleng =		1;
		break;
	    }
	    yyleng = strcspn ((char *) next_lex_char, WORD_ENDERS);
	    yytext = copy_token (next_lex_char, yyleng);
	    next_lex_char += yyleng;
	    /*
	     * Linguistically parse the token.
	     * Failure can occur because word is too short
	     * or too long, it's on the stoplist, etc.
	     * Setting PA_MSGS causes parser to explain
	     * invalid words with a msg.
	     */
	    memset (&parg, 0, sizeof(PARG));
	    parg.dblk =		usrblk.dblk;
	    parg.string =	yytext;
	    /*****if (!qry_is_all_ANDs)********/
		parg.flags =	PA_MSGS;
	    stembufp = usrblk.dblk->parser (&parg);
	    if (debugging_boolpars) {
		fprintf (aa_stderr, "   lang: '%s' -> '%s'\n",
		    yytext, (stembufp)? stembufp : "<null>");
		fflush (aa_stderr);
	    }
	    /*
	     * If token is not a linguistically valid word,
	     * one of two things can happen.  If the query
	     * is all_ANDs (most common type) we silently
	     * ignore the token.
	     * Otherwise report error and quit now.
	     */
	    if (stembufp == NULL) {
		parser_invalid_wordcount++;
		if (qry_is_all_ANDs)
		    goto GET_ANOTHER_TOKEN;
		retn_token = ERROR_TOKEN;
		if (!DtSearchHasMessages()) {
		    sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 13,
			"%s Word '%.*s' is invalid.") ,
			PROGNAME"315", DtSrMAXWIDTH_HWORD, yytext);
		    DtSearchAddMessage (msgbuf);
		}
		break;
	    }
	    if (strlen(stembufp) != strlen(yytext)) {
		retn_token =		ERROR_TOKEN;
		sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 14,
		    "%s String '%.*s' is not a single word.") ,
		    PROGNAME"634", DtSrMAXWIDTH_HWORD, yytext);
		DtSearchAddMessage (msgbuf);
		break;
	    }
	    /*
	     * If stemming, we must prefix term with
	     * special stem char in the stems array.
	     */
	    if (usrblk.request == OE_SRCH_STEMS) {
		stembufp = usrblk.dblk->stemmer (stembufp, usrblk.dblk);
		if (debugging_boolpars) {
		    fprintf (aa_stderr, " stemer: -> '%s'\n", stembufp);
		    fflush (aa_stderr);
		}
		mystembuf[0] = STEM_CH;
		strncpy (mystembuf + 1, stembufp, DtSrMAXWIDTH_HWORD);
		mystembuf [DtSrMAXWIDTH_HWORD - 1] = 0;
		stembufp = mystembuf;
	    }

	    /* Load stem into stems arrays and return it's truth table. */
	    if ((yylval.truthtab = get_stem_truthtab (stembufp, yytext))) {
		retn_token =		WORD_TOKEN;
		last_token_was_boolop =	FALSE;
	    }
	    else
		retn_token =		ERROR_TOKEN;
	    break;

    } /* switch on *next_lex_char */

DELIVER_TOKEN:
    if (debugging_boolpars) {
	fprintf (aa_stderr,
	    "  yylex: op?=%d parct=%d tok#=%d lval=%p%sYYTEXT='%s'\n",
	    last_token_was_boolop, paren_count,
	    retn_token, (void *) yylval.truthtab,
	    (retn_token == COLLOC_TOKEN)? "\t\t" : "\t",
	    yytext);
	fflush (aa_stderr);
    }
    return retn_token;

} /* yylex() */


/****************************************/
/*					*/
/*	       boolean_parse		*/
/*					*/
/****************************************/
/* Called from Opera_Engine for boolean searches.
 * Driver for yyparse().
 * Expects usrblk.request == OE_SRCH_STEMS or OE_SRCH_WORDS.
 * If parse is completely successful (query is valid), outputs
 *   saveusr.stemcount,
 *   saveusr.stems (stemmed if necessary with STEM_CH as first char,
 *	and phony colloc words with '@' as first char),
 *   usrblk.stems (original unstemmed query terms for err msgs),
 *   final_truthtab,
 *   qry_has_no_NOTs,
 *   qry_is_all_ANDs,
 * and returns TRUE.  Truthtab allocation good until next call.
 * If parse fails, returns FALSE and err msg(s) on msglist.
 */
int	boolean_parse (void)
{
    int		i;
    char	*cptr;
    TRUTHTAB	*tt, *ttnext;

    debugging_boolpars = (usrblk.debug & USRDBG_BOOL);
    if (!msgbuf)
	msgbuf = austext_malloc (300 + DtSrMAXWIDTH_HWORD,
		PROGNAME"255", NULL);

    /* Test for empty query */
    if (usrblk.query == NULL) {
EMPTY_QUERY:
	/* Message #2 is called in two places */
	sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 2,
	    "%s Query is empty."), PROGNAME"289");
	DtSearchAddMessage (msgbuf);
	return FALSE;
    }
    for  (cptr = usrblk.query;  *cptr;  cptr++) {
	if ((ascii_charmap[*cptr] & WHITESPACE) == 0)
	    break;
    }
    if (*cptr == 0)
	goto EMPTY_QUERY;

    /* Init globals for yylex and yyparse */
    next_lex_char =		(UCHAR *) usrblk.query;
    paren_count =		0;
    yyerror_count =		0;
    last_token_was_boolop =	TRUE;
    saveusr.stemcount =		0;
    parser_invalid_wordcount =	0;

    /* Query "is all ANDS" if it has no ORs, NOTs, or COLLOCs.
     * Missing or linguistically invalid words will be silently
     * discarded for all_ANDs queries.
     * Query "has no NOTs" if it has no NOTs.
     * Results from queries without NOTs can be statistically sorted.
     */
    qry_has_no_NOTs = !strchr (usrblk.query, '~');
    qry_is_all_ANDs = !strpbrk (usrblk.query, "|~@");

    if (debugging_boolpars || (usrblk.debug & USRDBG_SRCHCMPL)) {
	fprintf (aa_stderr,
	    "start boolean_parse: stem?=%d allANDs?=%d noNOTs?=%d\n"
	    "  query: '%s'\n",
	    (usrblk.request == OE_SRCH_STEMS),
	    qry_is_all_ANDs, qry_has_no_NOTs, usrblk.query);
	fflush (aa_stderr);
    }

    if (yyparse() != 0)
	return FALSE;

    /* Free entire remaining ttlist.  Only you
     * can prevent forest fires and memory leaks.
     */
    tt = ttlist;
    while (tt) {
	ttnext = tt->next;
	free (tt);
	tt = ttnext;
    }
    ttlist = NULL;

    if (debugging_boolpars || (usrblk.debug & USRDBG_SRCHCMPL)) {
	print_stems (saveusr.stemcount, saveusr.stems,
	    PROGNAME"815 end boolean_parse, syntax ok,");
	fprintf (aa_stderr, "  permutes=%d:", final_truthtab.pmsz);
	for (i=0;  i<16;  i++) {
	    if (i >=  final_truthtab.pmsz)
		break;
	    fprintf (aa_stderr, " %02x", final_truthtab.permutes [i]);
	}
	fputc ('\n', aa_stderr);
	fflush (aa_stderr);
    }

    if (final_truthtab.pmsz <= 0) {
	sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 15,
	    "%s Your query cannot logically return\n"
	    "any records.  Please reformulate and try again."),
	    PROGNAME"334");
	DtSearchAddMessage (msgbuf);
	return FALSE;
    }
    if (final_truthtab.pmsz >= 256) {
	sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 16,
	    "%s Your query will return entire database\n"
	    "'%s'.  Please reformulate and try again.") ,
	    PROGNAME"341", usrblk.dblk->label);
	DtSearchAddMessage (msgbuf);
	return FALSE;
    }
    return TRUE;
} /* boolean_parse() */


#ifdef TESTBOOL	/*-----------------------------------------------*/

USRBLK		usrblk = { 0 };
DBLK		dblk;
SAVEUSR		saveusr = { 0 };
extern int	debugging_teskey;
extern int	debugging_paice;
extern int	debugging_jpn;

/****************************************/
/*					*/
/*	    process_user_args		*/
/*					*/
/****************************************/
/* Subroutine of main().  Validates and loads global
 * variables with values from command line arguments.
 */
static void	process_user_args (int argc, char *argv[])
{
    int		i;
    char	*argptr;
    char	*cptr;
    char	*src, *targ;
    int		oops = FALSE;

    /* Each pass grabs new parm of "-xxx" format */
    argc--, argv++;
    while (argc > 0) {
	argptr = argv[0];
	if (*argptr != '-')
	    break;
	switch (argptr[1]) {
	    case 'm':
		if (argptr[2] == 'x')
		    dblk.dbrec.or_maxwordsz = atoi (argptr + 3);
		else if (argptr[2] == 'n')
		    dblk.dbrec.or_minwordsz = atoi (argptr + 3);
		else
		    goto BAD_ARG;
		break;

	    case 'l':
		dblk.dbrec.or_language = atoi (argptr + 2);
		break;

	    case 'd':
		for (cptr = argptr+2;  *cptr != 0;  cptr++) {
		    switch (*cptr) {
			case 't': debugging_teskey = TRUE; break;
			case 'p': debugging_paice = TRUE; break;
			case 'j': debugging_jpn = TRUE; break;
			default:
			    oops = TRUE;
			    fprintf (aa_stderr,
				"%s Invalid debug option %c.\a\n",
				PROGNAME"049", *cptr);
			    break;
		    }
		}
		break;

BAD_ARG:
	    default:
		oops = TRUE;
		fprintf (aa_stderr,
		    "%s Invalid command line argument '%s'.\a\n",
		    PROGNAME"059", argptr);
		break;
	} /* end switch */

	argc--, argv++;
    } /* main loop on each arg */


    if (oops) {
	fprintf (aa_stderr,
	    "\nUSAGE: %s [options]\n"
	    "  -mx#   maximum word size.\n"
	    "  -mn#   minimum word size.\n"
	    "  -dtpj  Debug: Teskey, Paice, Japanese.\n"
	    "  -l#    language number.  Default 0.\n",
	    aa_argv0);
	exit(2);
    }
    return;
}  /* process_user_args() */


/****************************************/
/*					*/
/*		   main			*/
/*					*/
/****************************************/
int	main	(int argc, char *argv[])
{
    int		i;
    int		valid_boolpars;
    char	*cptr;
    char	linebuf [1024];

    /* Init global variables */
    aa_argv0 = argv[0];

    memset (&usrblk, 0, sizeof(USRBLK));
    usrblk.dblk = &dblk;
    usrblk.debug |= USRDBG_BOOL;	/* set debugging_boolpars */

    memset (&dblk, 0, sizeof(DBLK));
    strcpy (dblk.name, "testbool");
    dblk.label = dblk.name;
    dblk.dbrec.or_dbaccess |= ORA_BLOB;	/* enable collocations */

    /* Read command line args */
    process_user_args (argc, argv);

    if (!load_language (&dblk, NULL)) {
	fprintf (aa_stderr,
	    PROGNAME"140 load_language() failed.  Msgs:\n%s\n",
	    DtSearchGetMessages());
	return 2;
    }
    fprintf (aa_stderr, "  lang=%d minwdsz=%d maxwdsz=%d.\n",
	dblk.dbrec.or_language,
	dblk.dbrec.or_minwordsz,
	dblk.dbrec.or_maxwordsz);

    /* Main loop.  Each line is a boolean query. */
    printf ("Enter an AusText boolean query.  'q' or '.' to quit.\n"
	"If first char is '$', words will be stemmed:\n> ");
    fflush (stdout);
    while (fgets (linebuf, sizeof(linebuf), stdin) != NULL) {

	linebuf [sizeof(linebuf) - 1] = 0;
	if (strcmp (linebuf, ".\n") == 0)
	    break;
	if (strcmp (linebuf, "q\n") == 0)
	    break;
	if (linebuf[0] == '\n')
	    break;
	linebuf [strlen(linebuf) - 1] = 0;	/* overlay \n */

	if (linebuf[0] == '$') {
	    usrblk.query = linebuf + 1;
	    usrblk.request = OE_SRCH_STEMS;
	}
	else {
	    usrblk.query = linebuf;
	    usrblk.request = OE_SRCH_WORDS;
	}

	if (!boolean_parse())
	    puts (PROGNAME"707 boolean_parse() returned FALSE (OE_BAD_QUERY).");
	if (DtSearchHasMessages()) {
	    printf ("mmmmm Messages returned to user mmmmmmmmmmmmmmmmmm\n"
		"%s\nmmmmm End of messages to user mmmmmmmmmmmmmmmmmmmm\n",
		DtSearchGetMessages());
	    DtSearchFreeMessages();
	}

    printf ("--------------------------------\n> ");
    fflush (stdout);
    } /* main read loop for each query line */
    return 0;
} /* main() */

#endif /* TESTBOOL */

/********************* BOOLPARS.C ********************/