242 lines
9.3 KiB
C
242 lines
9.3 KiB
C
/*
|
|
* CDE - Common Desktop Environment
|
|
*
|
|
* Copyright (c) 1993-2012, The Open Group. All rights reserved.
|
|
*
|
|
* These libraries and programs are free software; you can
|
|
* redistribute them and/or modify them under the terms of the GNU
|
|
* Lesser General Public License as published by the Free Software
|
|
* Foundation; either version 2 of the License, or (at your option)
|
|
* any later version.
|
|
*
|
|
* These libraries and programs are distributed in the hope that
|
|
* they will be useful, but WITHOUT ANY WARRANTY; without even the
|
|
* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
|
* PURPOSE. See the GNU Lesser General Public License for more
|
|
* details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with these libraries and programs; if not, write
|
|
* to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
|
|
* Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
/* $XConsortium: context.c /main/3 1995/11/08 09:48:13 rswiston $ */
|
|
/*
|
|
Copyright 1986 Tandem Computers Incorporated.
|
|
This product and information is proprietary of Tandem Computers Incorporated.
|
|
Copyright (c) 1986, 1987, 1988, 1989 Hewlett-Packard Co.
|
|
*/
|
|
|
|
/*
|
|
Context.c contains the main procedure for program CONTEXT.
|
|
|
|
CONTEXT builds scanning and parsing tables for a simple
|
|
state-transition recognizer. Input consists of two files, delimiter
|
|
declarations in file "delim.dat" and state tables with corresponding
|
|
actions in file "context.dat". If a parameter is present on the
|
|
command line, a sparse matrix technique is used to encode some of the
|
|
output arrays.
|
|
|
|
Data lines in "delim.dat" contain the name of the delimiter followed
|
|
by the text of the delimiter (one or more non-white space
|
|
characters). The delimiter name must begin in the first column.
|
|
Lines with white-space in the first column are considered comments.
|
|
|
|
Data in "context.dat" consists of free-form multi-line declarations.
|
|
Each declaration is terminated by a semi-colon. It begins with the
|
|
name of a token followed by any number of pairs of contexts (parser
|
|
states). The context pairs are separated by a colon and may
|
|
optionally be followed by some C code enclosed in braces. In each
|
|
pair, the first context is one in which the delimiter is recognized,
|
|
the second is the state that will result from an occurrence of the
|
|
delimiter in the first context. If code is specified, it is to be
|
|
executed by the parser when the delimiter occurs in the first context
|
|
of the pair. If successive pairs of contexts are separated by a
|
|
comma, code which follows is common to all the pairs.
|
|
|
|
The tokens specified in "context.dat" can either be delimiters defined in
|
|
"delim.dat" or other tokens (e.g., number, name, end_of_file) that will
|
|
be recognized by the user-defined scanner.
|
|
|
|
For example, suppose "delim.dat" contains the line
|
|
|
|
grpo (
|
|
|
|
and "context.dat" includes
|
|
|
|
grpo element:namegroup
|
|
content:model {openmod() ;}
|
|
excon:openex
|
|
pcon:type ;
|
|
|
|
name namegroup:namesep {eltname() ;}
|
|
model:connector,
|
|
model2:connector
|
|
{storemod(nstring) ;}
|
|
|
|
These declarations specify that the "grpo" delimiter is "(" and is
|
|
recognized in contexts "element", "content", "excon", and "pcon".
|
|
Procedure openmod() is called when "grpo" occurs in "content" and,
|
|
beyond the state change, no code is executed in other contexts when
|
|
"grpo" occurs. The "name" token is valid in contexts "namegroup",
|
|
"model", and "model2"; in the first, the context is set to "namesep"
|
|
and procedure eltname() is called, in the last two, the context is
|
|
set to "connector" and storemod(nstring) is called.
|
|
|
|
The program has several output files: case.c, delim.h, context.h, and
|
|
error. Case.c contains nested switch statements to execute the code
|
|
specified with the context transitions. Delim.h contains C
|
|
declarations related to delimiters; context.h has declarations
|
|
related to contexts. Programs that include delim.h should define
|
|
M_DELIMDEF for definition form of the declarations and not define it
|
|
for the declaration form. Programs that include context.h should
|
|
define M_CONDEF analogously.
|
|
|
|
The program numbers the delimiter names and the contexts as
|
|
encountered and outputs corresponding #define instructions with all
|
|
letters in the names forced to uppercase to delim.h and context.h.
|
|
In addition, an array named for each delimiter (with all letters
|
|
forced to lowercase) is initialized to the delimiter text. The first
|
|
declaration in the above example, for instance, would result in the
|
|
following lines written to delim.h:
|
|
|
|
#define GRPO 1
|
|
M_DELIMEXTERN char grpo[] M_DELIMINIT("(") ;
|
|
|
|
These lines would be written to context.h:
|
|
|
|
#define ELEMENT 1
|
|
#define NAMEGROUP 2
|
|
#define CONTENT 3
|
|
#define MODEL 4
|
|
#define EXCON 5
|
|
#define OPENEX 6
|
|
#define PCON 7
|
|
#define TYPE 8
|
|
#define NAMESEP 9
|
|
#define CONNECTOR 10
|
|
#define MODEL2 11
|
|
|
|
Delim.h also contains an array, m_dlmptr, which contains pointers to the
|
|
names of all the defined delimiters. In addition, delim.h contains the
|
|
declaration of a tree representation of the delimiters allowed in each
|
|
context. Internally, the program represents the delimiter strings with a
|
|
tree. The children of the root of this tree represent the set of characters
|
|
with which delimiters can start. Children of other nodes represent
|
|
characters that can follow the character of their parent. Leaf nodes
|
|
are associated with the null character, indicating the end of a
|
|
delimiter. A path from the root to a leaf thus represents a valid
|
|
delimiter.
|
|
|
|
The form in which the tree is represented within this program is not
|
|
quite the same as the output data structure. In the latter, each
|
|
element consists of a flag called 'more' indicating whether it is the last
|
|
child of its parent; a character, called 'symbol'; and an integer,
|
|
called 'index'. All sons of a node are stored in adjacent elements
|
|
ordered in increaing value of their 'symbol' fields. 'Symbol'
|
|
contains the character associated with the node (0 if the node is a
|
|
leaf node), with the sign bit turned on if the node is the last child
|
|
of its parent. 'Index' contains the array index of the first son of
|
|
the node, if the node is not a leaf. If the node is a leaf, 'index'
|
|
is the number of the represented delimiter (i.e., 1 for "grpo" in the
|
|
above example).
|
|
|
|
As the tree is constructed, it is not feasible to store brothers in adjacent
|
|
elements, so the internal data structure is different. It contains a 'next'
|
|
field which points to the first brother of a given node. Because the 'next'
|
|
field can be used to detect the last child of a parent, it is not necessary
|
|
to use the sign bit of 'symbol' to indicate this special case.
|
|
|
|
Two other arrays are written to context.h. Contree[i] points to the root
|
|
of the delimiter tree for the ith context within the array tree whose
|
|
declaration is in delim.h; nextcon[i][j] indicates the state
|
|
resulting from an occurrence of delimiter j+1 in state i+1. Finally,
|
|
a #define of MAXD to the length of the longest delimiter is written to
|
|
delim.h.
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include "basic.h"
|
|
#include "trie.h"
|
|
|
|
#include "common.h"
|
|
#include "chartype.h"
|
|
#define CONTDEF 1
|
|
#include "cont.h"
|
|
|
|
/* Main procedure */
|
|
int main(int argc, char **argv)
|
|
{
|
|
int n ;
|
|
|
|
m_openchk(&delim, "delim.h", "w") ;
|
|
m_openchk(&context, "context.h", "w") ;
|
|
m_openchk(&fcase, "case.c", "w") ;
|
|
m_openchk(&cdat, "context.dat", "r") ;
|
|
m_openchk(&ddat, "delim.dat", "r") ;
|
|
m_openchk(&m_errfile, "error", "w") ;
|
|
fputs(" switch(m_token) {\n", fcase) ; /* keep the "}" balanced */
|
|
|
|
fputs("#if defined(M_DELIMDEF)\n", delim) ;
|
|
fputs("#define M_DELIMEXTERN\n", delim) ;
|
|
fputs("#define M_DELIMINIT(a) = a\n", delim) ;
|
|
fputs("#else\n", delim) ;
|
|
fputs("#define M_DELIMEXTERN extern\n", delim) ;
|
|
fputs("#define M_DELIMINIT(a)\n", delim) ;
|
|
fputs("#endif\n\n", delim) ;
|
|
|
|
fputs("#if defined(M_CONDEF)\n", context) ;
|
|
fputs("#define M_CONEXTERN\n", context) ;
|
|
fputs("#else\n", context) ;
|
|
fputs("#define M_CONEXTERN extern\n", context) ;
|
|
fputs("#endif\n\n", context) ;
|
|
|
|
contree = (M_TRIE **) calloc(NUMCON, sizeof(M_TRIE *)) ;
|
|
xtransit = (int *) calloc(NUMCON * NUMDELIM, sizeof(int)) ;
|
|
contexts = (M_WCHAR **) calloc(NUMCON, sizeof(char *)) ;
|
|
dlmptr = (char**) calloc(NUMDELIM, sizeof(char*)) ;
|
|
|
|
loaddelim() ;
|
|
while (getdname())
|
|
{
|
|
casestarted = FALSE ;
|
|
while ((n = getContext()) >= 0)
|
|
{
|
|
if (withdelim) enterdelim(n) ;
|
|
getcolon() ;
|
|
if (transit(n, curdelim))
|
|
{
|
|
if (! withdelim)
|
|
{
|
|
char *mb_dname, *mb_contexts;
|
|
|
|
mb_dname = MakeMByteString(dname);
|
|
mb_contexts = MakeMByteString(contexts[n]);
|
|
warning2(
|
|
"Duplicate assignment to token \"%s\" in context \"%s\"",
|
|
mb_dname,
|
|
mb_contexts) ;
|
|
m_free(mb_dname,"multi-byte string");
|
|
m_free(mb_contexts,"multi-byte string");
|
|
}
|
|
}
|
|
transit(n, curdelim) = getContext() + 1 ;
|
|
getcode(n) ;
|
|
}
|
|
if (casestarted)
|
|
{
|
|
fprintf(fcase, " default:\n break ;\n") ;
|
|
/* keep the "{" balanced */
|
|
fprintf(fcase, " }\n break ;\n") ;
|
|
}
|
|
}
|
|
/* keep the "{" balanced */
|
|
fprintf(fcase, " default:\n break ;\n }\n") ;
|
|
fprintf(context, "#define MAXD %d\n", maxd) ;
|
|
dumptree((LOGICAL) (argc > 1)) ;
|
|
fprintf(stderr, "NUMCON set to %d\n", NUMCON) ;
|
|
fprintf(stderr, "NUMDELIM set to %d\n", NUMDELIM) ;
|
|
exit(errexit) ;
|
|
}
|