cdesktopenv/cde/programs/dthelp/parser/pass2/util/scanutil.c

250 lines
5.6 KiB
C

/*
* CDE - Common Desktop Environment
*
* Copyright (c) 1993-2012, The Open Group. All rights reserved.
*
* These libraries and programs are free software; you can
* redistribute them and/or modify them under the terms of the GNU
* Lesser General Public License as published by the Free Software
* Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* These libraries and programs are distributed in the hope that
* they will be useful, but WITHOUT ANY WARRANTY; without even the
* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with these libraries and programs; if not, write
* to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
* Floor, Boston, MA 02110-1301 USA
*/
/* $XConsortium: scanutil.c /main/3 1995/11/08 11:07:21 rswiston $ */
/* Copyright (c) 1986, 1987, 1988, 1989 Hewlett-Packard Co. */
/* Scanutil.c contains procedures used in the scanners of BUILD and ELTDEF */
extern FILE *ifile;
/* Getachar returns the next character from the input stream */
int getachar(void)
{
int c;
M_WCHAR wc,wnl;
char mbyte[32]; /* bigger than any possible multibyte char */
int length, ret;
if (toundo) wc = (M_WCHAR) savechar[--toundo];
else
{
length = 0;
if ((c = getc(ifile)) == EOF) return(EOF);
while (1)
{
mbyte[length++] = c;
mbyte[length] = 0;
if (mblen(mbyte,length) != -1) break; /* hurray! */
if (length == MB_CUR_MAX)
{ /* reached max without a hit */
m_error("An invalid multi-byte character was found in the input");
c = ' ';
length = 1;
break;
}
if ((c = getc(ifile)) == EOF)
{ /* huh? */
m_error("End-of-file found in within a multi-byte character");
return(EOF);
}
}
ret = mbtowc(&wc,mbyte,length);
}
ret = mbtowc(&wnl, "\n", 1);
(void) ret;
if (wc == wnl) m_line++;
if (wc == 65535)
return -1;
return((int) wc);
}
/* Reads the next token stored in a packed trie (as defined by context.dat) */
int gettoken(int *c, int context)
{
int hold[MAXD + 1];
int ucase;
int i, n = 0, current, delim[MAXD + 1];
if (! (current = m_contree[context - 1]))
{
*c = getachar();
return(M_NULLVAL);
}
current--;
while (TRUE)
{
hold[n] = getachar();
ucase = m_ctupper(hold[n]);
delim[n] = FALSE;
for (i = current;
(int) m_delimtrie[i].symbol < ucase && m_delimtrie[i].more;
i++);
if ((int) m_delimtrie[i].symbol == ucase)
{
current = m_delimtrie[i].index;
if (! m_delimtrie[current].symbol)
delim[n] = m_delimtrie[current].index;
n++;
}
else break;
}
while (n >= 0)
{
if (delim[n])
{
/* Found a delimiter. If it ends with a letter, verify
that the following character is not a letter, in order
to issue error messages in cases such as <!ENTITYrunon ... */
if (m_cttype(hold[n]) != M_NMSTART) return(delim[n]);
*c = getachar();
ungetachar(*c);
if (*c == EOF || m_cttype(*c) != M_NMSTART) return(delim[n]);
}
if (n) ungetachar(hold[n]);
n--;
}
*c = *hold;
return(M_NULLVAL);
}
/* Reads a literal (called after the opening quotation mark is read) */
LOGICAL litproc(int delim)
{
int n, i, number;
M_WCHAR *p, *pStart; /* bigger than wide versions of lit or lita */
int c;
for (i = 0 ; i < M_LITLEN ; i++)
{
n = gettoken(&c, LITCON);
switch (n)
{
case LIT:
case LITA:
if (n == delim)
{
literal[i] = M_EOS;
return(TRUE);
}
pStart = p = MakeWideCharString((n == LIT) ? lit : lita);
if (w_strlen(p) + i > M_LITLEN)
{
m_error("Literal too long");
return(FALSE);
}
while (*p) literal[i++] = *p++;
m_free(pStart,"wide character string");
i--;
break;
case CRO:
number = 0;
while (! (n = gettoken(&c, CHARENT)))
{
if (c <= '0' || c >= '9')
{
ungetachar(c);
break;
}
number = 10 * number + c - '0';
if (number >= M_CHARSETLEN)
{
m_error("Invalid character code");
return(FALSE);
}
}
literal[i] = (M_WCHAR) number;
break;
case LITRS:
case LITRSC:
#if defined(BUILDEXTERN)
if (curcon == SREF || curcon == S2REF)
literal[i] = RS;
else
#endif
i--;
break;
case LITRE:
case LITREC:
literal[i] = M_RE;
break;
case LITTAB:
case LITTABC:
literal[i] = M_TAB;
break;
case LITSPACE:
case LITCSPACE:
literal[i] = M_SPACE;
break;
default:
if (c == EOF)
{
m_error("End of File occurred within literal");
return(FALSE);
}
literal[i] = (M_WCHAR) c;
break;
} /* End switch */
} /* End for i */
n = gettoken(&c, LITCON);
if (n == delim)
{
literal[M_LITLEN] = M_EOS;
return(TRUE);
}
if (n) undodelim(m_dlmptr[n - 1]) ;
else ungetachar(c);
m_error("Literal too long");
return(FALSE);
}
/* Returns a context-dependent delimiter string to input stream so
characters can be reread one at a time in another context */
void undodelim(M_WCHAR *delim)
{
M_WCHAR *p;
for (p = delim ; *p ; p++);
p--;
while (TRUE)
{
ungetachar(*p);
if (p == delim) return;
p--;
}
}
/* Returns a character to the input stream to read again later. Unexplained
problems occurred using standard ungetc procedure; hence
explicit declaration of buffer for read-ahead characters */
void ungetachar(int c)
{
M_WCHAR wnl;
if (toundo >= SAVECHAR)
{
m_error("Exceeded read-ahead buffer");
exit(TRUE);
}
savechar[toundo++] = c;
if (toundo > maxundo) maxundo = toundo;
int ret = mbtowc(&wnl, "\n", 1);
(void) ret;
if (c == wnl) m_line--;
}