668 lines
14 KiB
C
668 lines
14 KiB
C
/*
|
|
* CDE - Common Desktop Environment
|
|
*
|
|
* Copyright (c) 1993-2012, The Open Group. All rights reserved.
|
|
*
|
|
* These libraries and programs are free software; you can
|
|
* redistribute them and/or modify them under the terms of the GNU
|
|
* Lesser General Public License as published by the Free Software
|
|
* Foundation; either version 2 of the License, or (at your option)
|
|
* any later version.
|
|
*
|
|
* These libraries and programs are distributed in the hope that
|
|
* they will be useful, but WITHOUT ANY WARRANTY; without even the
|
|
* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
|
* PURPOSE. See the GNU Lesser General Public License for more
|
|
* details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with these libraries and programs; if not, write
|
|
* to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
|
|
* Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
/* Copyright (c) 1995 FUJITSU LIMITED */
|
|
/* All Rights Reserved */
|
|
|
|
/* $TOG: DocParser.C /main/16 1998/04/17 11:48:07 mgreess $ */
|
|
#ifdef DEBUG
|
|
#include "assert.h"
|
|
#endif
|
|
#include "Debug.h"
|
|
#include "StyleSheetExceptions.h"
|
|
#include "DocParser.h"
|
|
#include "Resolver.h"
|
|
#include "Element.h"
|
|
#include "AttributeList.h"
|
|
|
|
#define DATA_BUF_SIZ 4096
|
|
|
|
#if defined(SC3)
|
|
static ostrstream& terminate(ostrstream& ost)
|
|
{
|
|
char* pstring = ost.str();
|
|
*(pstring + ost.pcount()) = '\0';
|
|
|
|
return ost;
|
|
}
|
|
#endif
|
|
|
|
DocParser::DocParser(Resolver &r)
|
|
: f_ignoring_element(0), f_resolver(r),
|
|
#if defined(SC3)
|
|
f_buffer(new char[DATA_BUF_SIZ]),
|
|
f_output(f_buffer, DATA_BUF_SIZ)
|
|
#else
|
|
f_streambuf(new stringbuf()),
|
|
f_output()
|
|
#endif
|
|
{
|
|
}
|
|
|
|
DocParser::~DocParser()
|
|
{
|
|
#if defined(SC3)
|
|
if (f_buffer) delete[] f_buffer;
|
|
#else
|
|
// this causes a free memory read when f_output is deleted as part of this
|
|
// object...nothing we can do about it
|
|
delete f_streambuf ;
|
|
#endif
|
|
}
|
|
|
|
unsigned int
|
|
DocParser::parse(istream &input)
|
|
{
|
|
f_resolver.Begin();
|
|
unsigned int ok = rawParse(input);
|
|
f_resolver.End();
|
|
return ok;
|
|
}
|
|
|
|
unsigned int
|
|
DocParser::rawParse(istream &input)
|
|
{
|
|
string data;
|
|
|
|
input.unsetf(ios::skipws);
|
|
|
|
f_ignoring_element = 0 ;
|
|
|
|
switch(read_tag(input, f_output))
|
|
{
|
|
case StartTag:
|
|
{
|
|
#if defined(SC3)
|
|
Symbol name(gElemSymTab->intern(terminate(f_output).str()));
|
|
f_output.rdbuf()->freeze(0);
|
|
#else
|
|
data = f_output.str().c_str();
|
|
|
|
/*
|
|
MESSAGE(cerr, "StartTag case:");
|
|
debug(cerr, f_output.str().size());
|
|
debug(cerr, data.c_str());
|
|
*/
|
|
|
|
Symbol name(gElemSymTab->intern(data.c_str()));
|
|
#endif
|
|
process(input, f_output, name, 1, 1);
|
|
}
|
|
break;
|
|
case EndTag:
|
|
case AttributeSection:
|
|
case OliasAttribute:
|
|
throw(CASTDPUTEXCEPT docParserUnexpectedTag());
|
|
break;
|
|
case NoTag:
|
|
throw(CASTDPUDEXCEPT docParserUnexpectedData());
|
|
break;
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
|
|
void
|
|
update_last_seen_child_name(Symbol*& last_seen_child_name, unsigned int& child_relative_sibling_number, const Symbol& new_child_name)
|
|
{
|
|
if ( last_seen_child_name == 0 ||
|
|
!(*last_seen_child_name == Symbol(new_child_name))
|
|
)
|
|
{
|
|
delete last_seen_child_name ;
|
|
last_seen_child_name = new Symbol(new_child_name);
|
|
child_relative_sibling_number= 1;
|
|
} else
|
|
child_relative_sibling_number++;
|
|
|
|
return;
|
|
}
|
|
|
|
void
|
|
DocParser::process(istream &input, ostringstream &output,
|
|
const Symbol &name,
|
|
unsigned int sibling_number, unsigned int this_sibling_number)
|
|
{
|
|
ON_DEBUG(cerr << "process(" << name << ") -> " << sibling_number << endl);
|
|
|
|
Symbol* last_seen_child_name = 0;
|
|
|
|
unsigned int child_relative_sibling_number = 0;
|
|
|
|
unsigned int child = 1 ; // sibling numbers for child elements
|
|
|
|
#if !defined(SC3)
|
|
string pstring;
|
|
#endif
|
|
string data;
|
|
|
|
char c ;
|
|
while ((input >> c) && (c == '\n'));
|
|
input.putback(c);
|
|
|
|
if (input.eof())
|
|
throw(CASTDPUEEXCEPT docParserUnexpectedEof());
|
|
|
|
int ignore = 0 ;
|
|
|
|
mtry
|
|
{
|
|
// process whatever comes right after start tag
|
|
TagType tt = read_tag(input, output);
|
|
switch (tt)
|
|
{
|
|
case StartTag:
|
|
{
|
|
ON_DEBUG(cerr << "beginElement" << endl);
|
|
// have to begin this element before processing child elements
|
|
if (!f_ignoring_element)
|
|
{
|
|
ignore = f_resolver.beginElement(new Element(name,
|
|
sibling_number, 0,
|
|
0,
|
|
this_sibling_number));
|
|
f_ignoring_element = ignore ;
|
|
}
|
|
|
|
|
|
/////////////////////////////
|
|
// first child of this node
|
|
/////////////////////////////
|
|
#if defined(SC3)
|
|
Symbol name(gElemSymTab->intern(terminate(f_output).str()));
|
|
|
|
update_last_seen_child_name(last_seen_child_name,
|
|
child_relative_sibling_number, name);
|
|
|
|
f_output.rdbuf()->freeze(0);
|
|
|
|
process(input, output, name, child++, child_relative_sibling_number);
|
|
#else
|
|
data = f_output.str().c_str();
|
|
//#if !defined(SC3)
|
|
// data[f_output.str().size()] = '\0';
|
|
//#endif
|
|
Symbol name(gElemSymTab->intern(data.c_str()));
|
|
update_last_seen_child_name(last_seen_child_name,
|
|
child_relative_sibling_number, name);
|
|
|
|
process(input, output, name,
|
|
child++, child_relative_sibling_number);
|
|
#endif
|
|
}
|
|
break;
|
|
case EndTag:
|
|
// hit an end tag right after start tag
|
|
#ifdef DEBUG
|
|
{
|
|
#if defined(SC3)
|
|
data = terminate(f_output).str();
|
|
f_output.rdbuf()->freeze(0);
|
|
#else
|
|
data = f_output.str().c_str();
|
|
//#ifdef _IBMR2
|
|
//#if !defined(SC3)
|
|
// data[f_output.str().size()] = '\0';
|
|
//#endif
|
|
#endif
|
|
cerr << "EndTag: " << data.c_str() << endl;
|
|
assert(gElemSymTab->intern(data.c_str()) == name);
|
|
}
|
|
#endif
|
|
|
|
// this node
|
|
if (!f_ignoring_element)
|
|
{
|
|
int ignore = f_resolver.beginElement(new Element(name,
|
|
sibling_number,
|
|
0, 0, this_sibling_number));
|
|
if (!ignore)
|
|
f_resolver.endElement(name);
|
|
}
|
|
return ; // EXIT FUNCTION
|
|
break;
|
|
case AttributeSection:
|
|
{
|
|
AttributeList *attrs = 0;
|
|
AttributeList *olias_attrs = 0;
|
|
|
|
mtry
|
|
{
|
|
process_attributes(input, output, attrs, olias_attrs);
|
|
|
|
if (!f_ignoring_element)
|
|
{
|
|
//////////////////////////////
|
|
// this node with attributes
|
|
//////////////////////////////
|
|
ignore = f_resolver.beginElement(new Element(name,
|
|
sibling_number,
|
|
attrs,
|
|
olias_attrs,
|
|
this_sibling_number
|
|
));
|
|
f_ignoring_element = ignore ;
|
|
}
|
|
}
|
|
mcatch_any()
|
|
{
|
|
/*
|
|
delete attrs ;
|
|
delete olias_attrs ;
|
|
*/
|
|
attrs = 0 ;
|
|
olias_attrs = 0 ;
|
|
}
|
|
end_try;
|
|
}
|
|
break;
|
|
case OliasAttribute:
|
|
throw(CASTDPUTEXCEPT docParserUnexpectedTag());
|
|
break;
|
|
|
|
case NoTag:
|
|
{
|
|
if (!f_ignoring_element)
|
|
{
|
|
// this node
|
|
ignore = f_resolver.beginElement(new Element(name,
|
|
sibling_number,
|
|
0, 0, this_sibling_number));
|
|
f_ignoring_element = ignore ;
|
|
}
|
|
// process data
|
|
read_data(input, output);
|
|
|
|
if (!f_ignoring_element)
|
|
{
|
|
// the str() call seems to add the null byte to the stream
|
|
// and increment the pcount, so we must make sure it gets
|
|
// called first
|
|
#if defined(SC3)
|
|
char *pstring = terminate(f_output).str();
|
|
int size = f_output.pcount();
|
|
f_resolver.data(pstring, size);
|
|
f_output.rdbuf()->freeze(0);
|
|
#else
|
|
pstring = f_output.str().c_str();
|
|
int size = pstring.size() + 1;
|
|
f_resolver.data(pstring.c_str(), size);
|
|
#endif
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
|
|
while ((tt = read_tag(input, output)) != EndTag)
|
|
switch (tt)
|
|
{
|
|
case StartTag:
|
|
{
|
|
/////////////////////////////
|
|
// second child and beyond.
|
|
/////////////////////////////
|
|
data = f_output.str().c_str();
|
|
#if defined(SC3)
|
|
f_output.rdbuf()->freeze(0);
|
|
#endif
|
|
|
|
/*
|
|
MESSAGE(cerr, "StartTag case2");
|
|
debug(cerr, data);
|
|
debug(cerr, f_output.str().size());
|
|
*/
|
|
|
|
Symbol name(gElemSymTab->intern(data.c_str()));
|
|
update_last_seen_child_name(last_seen_child_name,
|
|
child_relative_sibling_number, name);
|
|
|
|
process(input, output, name, child++, child_relative_sibling_number);
|
|
}
|
|
break;
|
|
case EndTag: // should never get this
|
|
break;
|
|
// we have already processed these for this tag
|
|
case AttributeSection:
|
|
case OliasAttribute:
|
|
throw(CASTDPUTEXCEPT docParserUnexpectedTag());
|
|
break;
|
|
case NoTag:
|
|
{
|
|
read_data(input, output);
|
|
|
|
if (!f_ignoring_element)
|
|
{
|
|
// the str() call seems to add the null byte to the stream
|
|
// and increment the pcount, so we must make sure it gets
|
|
// called first
|
|
#if defined(SC3)
|
|
char *pstring = f_output.str();
|
|
int size = f_output.pcount();
|
|
*(pstring + size) = 0;
|
|
f_resolver.data(pstring, size);
|
|
f_output.rdbuf()->freeze(0);
|
|
#else
|
|
pstring = f_output.str().c_str();
|
|
int size = pstring.size() + 1;
|
|
f_resolver.data(pstring.c_str(), size);
|
|
#endif
|
|
}
|
|
}
|
|
}
|
|
#ifdef DEBUG
|
|
{
|
|
#if defined(SC3)
|
|
data = terminate(f_output).str();
|
|
f_output.rdbuf()->freeze(0);
|
|
#else
|
|
data = f_output.str().c_str();
|
|
#endif
|
|
cerr << "EndTag: " << data.c_str() << endl;
|
|
assert(gElemSymTab->intern(data.c_str()) == name);
|
|
}
|
|
#endif
|
|
// hit end tag, end processing
|
|
if (!f_ignoring_element)
|
|
f_resolver.endElement(name);
|
|
|
|
// if we set ignore flag, unset it
|
|
if (ignore)
|
|
f_ignoring_element = 0;
|
|
}
|
|
mcatch_any()
|
|
{
|
|
rethrow;
|
|
}
|
|
end_try;
|
|
ON_DEBUG(cerr << "exit process: " << name << endl);
|
|
delete last_seen_child_name;
|
|
}
|
|
|
|
|
|
void
|
|
DocParser::process_attributes(istream &input, ostringstream &output,
|
|
AttributeList *&attrs,
|
|
AttributeList *&olias_attrs)
|
|
{
|
|
#if !defined(SC3)
|
|
string theData;
|
|
#endif
|
|
TagType tt ;
|
|
|
|
Attribute* newAttribute = 0;
|
|
|
|
AttributeList* orig_attrs = attrs;
|
|
AttributeList* orig_olias_attrs = olias_attrs;
|
|
|
|
mtry {
|
|
while ((tt = read_tag(input,output)) != NoTag)
|
|
{
|
|
switch (tt)
|
|
{
|
|
case StartTag:
|
|
{
|
|
#if !defined(SC3)
|
|
theData = f_output.str().c_str();
|
|
#endif
|
|
if (!attrs)
|
|
attrs = new AttributeList ;
|
|
|
|
newAttribute =
|
|
process_attribute(input, output,
|
|
#if defined(SC3)
|
|
gSymTab->intern(terminate(f_output).str()),
|
|
gSymTab->intern(f_streambuf->str()),
|
|
#else
|
|
gSymTab->intern(theData.c_str()),
|
|
#endif
|
|
StartTag
|
|
);
|
|
attrs->add(newAttribute);
|
|
break;
|
|
}
|
|
case EndTag:
|
|
return ; // EXIT FUNCTION
|
|
|
|
case AttributeSection:
|
|
throw(CASTDPUTEXCEPT docParserUnexpectedTag());
|
|
break;
|
|
case OliasAttribute:
|
|
#if !defined(SC3)
|
|
theData = f_output.str().c_str();
|
|
#endif
|
|
// mirrors attribute
|
|
if (!olias_attrs)
|
|
olias_attrs = new AttributeList ;
|
|
|
|
newAttribute =
|
|
process_attribute(input, output,
|
|
#if defined(SC3)
|
|
gSymTab->intern(terminate(f_output).str()),
|
|
gSymTab->intern(f_streambuf->str()),
|
|
#else
|
|
gSymTab->intern(theData.c_str()),
|
|
#endif
|
|
OliasAttribute
|
|
);
|
|
|
|
olias_attrs->add(newAttribute);
|
|
break;
|
|
case NoTag:
|
|
throw(CASTDPUDEXCEPT docParserUnexpectedData());
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
mcatch_any()
|
|
{
|
|
delete newAttribute;
|
|
|
|
if ( orig_attrs == 0 ) {
|
|
delete attrs;
|
|
attrs = 0;
|
|
}
|
|
|
|
if ( orig_olias_attrs == 0 ) {
|
|
delete olias_attrs;
|
|
olias_attrs = 0;
|
|
}
|
|
|
|
rethrow;
|
|
}
|
|
end_try;
|
|
}
|
|
|
|
Attribute *
|
|
DocParser::process_attribute(istream &input, ostringstream &output,
|
|
const Symbol &name, TagType tt)
|
|
{
|
|
string data;
|
|
|
|
//ON_DEBUG(cerr << "process_attribute: " << name << endl);
|
|
|
|
// If the attribute is OLIAS internal, we use DocParser's
|
|
// read_data(). This is to prevent the attribte value
|
|
// from change in a call to specific renderer engine's
|
|
// read_data().
|
|
//
|
|
// Example: LoutDocparser::read_data() quotes any '.' char
|
|
// which changes the graphic locator value if the element
|
|
// is OLIAS internal attribute #GRAPHIC.
|
|
|
|
if ( tt == OliasAttribute ) {
|
|
DocParser::read_data(input, output);
|
|
} else
|
|
(void)read_data(input, output);
|
|
#if defined(SC3)
|
|
char *data = f_output.str();
|
|
*(data + f_output.pcount()) = 0;
|
|
f_output.rdbuf()->freeze(0);
|
|
Attribute *attr = new Attribute(name, strdup(data));
|
|
#else
|
|
data = f_output.str().c_str();
|
|
Attribute *attr = new Attribute(name, strdup(data.c_str()));
|
|
#endif
|
|
|
|
switch (read_tag(input, output))
|
|
{
|
|
case StartTag:
|
|
case AttributeSection:
|
|
case OliasAttribute:
|
|
delete attr ;
|
|
throw(CASTDPUTEXCEPT docParserUnexpectedTag());
|
|
break;
|
|
case NoTag:
|
|
delete attr;
|
|
throw(CASTDPUDEXCEPT docParserUnexpectedData());
|
|
break;
|
|
case EndTag:
|
|
break;
|
|
}
|
|
|
|
return attr ;
|
|
|
|
}
|
|
|
|
|
|
DocParser::TagType
|
|
DocParser::read_tag(istream &input, ostringstream &output)
|
|
{
|
|
output.seekp(streampos(0));
|
|
|
|
TagType tt = StartTag;
|
|
|
|
char c ;
|
|
|
|
// strip newlines before/after tags
|
|
while ((input >> c) && (c == '\n'));
|
|
if (input.eof())
|
|
throw(CASTDPUEEXCEPT docParserUnexpectedEof());
|
|
|
|
if (c != '<')
|
|
{
|
|
input.putback(c);
|
|
return NoTag;
|
|
}
|
|
|
|
|
|
input >> c ;
|
|
|
|
switch (c)
|
|
{
|
|
case '/':
|
|
tt = EndTag ;
|
|
break;
|
|
case '#':
|
|
input >> c;
|
|
if (c == '>')
|
|
return AttributeSection ; // EXIT
|
|
else
|
|
{
|
|
tt = OliasAttribute ;
|
|
output << c; // keep char we just read
|
|
}
|
|
break;
|
|
case '>':
|
|
throw(CASTUTEXCEPT unknownTagException());
|
|
// NOT REACHED
|
|
break;
|
|
default:
|
|
output << c ; // keep char we just read
|
|
break;
|
|
}
|
|
|
|
|
|
// get (remainder of) tag name
|
|
while ((input >> c) && (c != '>'))
|
|
output << c ;
|
|
output << ends;
|
|
|
|
return tt ;
|
|
}
|
|
|
|
|
|
void
|
|
DocParser::read_data(istream &input, ostringstream &output)
|
|
{
|
|
char c ;
|
|
|
|
output.seekp(streampos(0));
|
|
|
|
while ((input >> c) && (c != '<'))
|
|
{
|
|
// handle entities
|
|
if (c == '&')
|
|
{
|
|
char tmpbuf[64];
|
|
unsigned int tmplen = 0;
|
|
while ((input >> c ) && (c != ';'))
|
|
{
|
|
tmpbuf[tmplen++] = c ;
|
|
if (tmplen > 63)
|
|
{
|
|
cerr << "Temp Buf overflow (ampersand problem)" << endl;
|
|
throw(CASTEXCEPT Exception());
|
|
}
|
|
}
|
|
if (input.eof())
|
|
throw(CASTDPUEEXCEPT docParserUnexpectedEof());
|
|
|
|
tmpbuf[tmplen] = 0 ;
|
|
|
|
#ifdef ENTITY_DEBUG
|
|
cerr << "Entity: " << tmpbuf << endl;
|
|
#endif
|
|
|
|
if ((!strcmp(tmpbuf, "hardreturn")) ||
|
|
(!strcmp(tmpbuf, "lnfeed")))
|
|
c = '\n';
|
|
else
|
|
if ((!strcmp(tmpbuf, "lang")) ||
|
|
(!strcmp(tmpbuf, "lt")))
|
|
c = '<' ;
|
|
else
|
|
if (!strcmp(tmpbuf, "amp"))
|
|
c = '&' ;
|
|
else
|
|
if (!strcmp(tmpbuf, "nbsp")) // non-break space
|
|
{
|
|
if (MB_CUR_MAX > 1) output << 0xC2;
|
|
c = (char)0xA0;
|
|
}
|
|
else
|
|
c = ' ';
|
|
|
|
}
|
|
|
|
output << c;
|
|
}
|
|
|
|
output << ends;
|
|
|
|
// can never run out of input while reading data, tags must be balanced
|
|
if (input.eof())
|
|
throw(CASTDPUEEXCEPT docParserUnexpectedEof());
|
|
|
|
input.putback(c);
|
|
|
|
}
|