2841 lines
78 KiB
C
2841 lines
78 KiB
C
/*
|
|
* CDE - Common Desktop Environment
|
|
*
|
|
* Copyright (c) 1993-2012, The Open Group. All rights reserved.
|
|
*
|
|
* These libraries and programs are free software; you can
|
|
* redistribute them and/or modify them under the terms of the GNU
|
|
* Lesser General Public License as published by the Free Software
|
|
* Foundation; either version 2 of the License, or (at your option)
|
|
* any later version.
|
|
*
|
|
* These libraries and programs are distributed in the hope that
|
|
* they will be useful, but WITHOUT ANY WARRANTY; without even the
|
|
* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
|
* PURPOSE. See the GNU Lesser General Public License for more
|
|
* details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with these librararies and programs; if not, write
|
|
* to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
|
|
* Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
/* $XConsortium: parseSd.C /main/2 1996/08/12 15:47:30 mgreess $ */
|
|
// Copyright (c) 1994, 1995 James Clark
|
|
// See the file COPYING for copying permission.
|
|
|
|
#include "splib.h"
|
|
#include "Parser.h"
|
|
#include "macros.h"
|
|
#include "SdFormalError.h"
|
|
#include "MessageBuilder.h"
|
|
#include "ParserMessages.h"
|
|
#include "MessageArg.h"
|
|
#include "CharsetRegistry.h"
|
|
#include "ISetIter.h"
|
|
#include "token.h"
|
|
#include "TokenMessageArg.h"
|
|
#include "constant.h"
|
|
#include "SdText.h"
|
|
#include "NumericCharRefOrigin.h"
|
|
|
|
#ifdef SP_NAMESPACE
|
|
namespace SP_NAMESPACE {
|
|
#endif
|
|
|
|
class CharSwitcher {
|
|
public:
|
|
CharSwitcher();
|
|
void addSwitch(WideChar from, WideChar to);
|
|
SyntaxChar subst(WideChar c);
|
|
size_t nSwitches() const;
|
|
Boolean switchUsed(size_t i) const;
|
|
WideChar switchFrom(size_t i) const;
|
|
WideChar switchTo(size_t i) const;
|
|
private:
|
|
Vector<PackedBoolean> switchUsed_;
|
|
Vector<WideChar> switches_;
|
|
};
|
|
|
|
// Information about the SGML declaration being built.
|
|
|
|
struct SdBuilder {
|
|
SdBuilder();
|
|
void addFormalError(const Location &, const MessageType1 &, const StringC &);
|
|
Ptr<Sd> sd;
|
|
Ptr<Syntax> syntax;
|
|
CharsetDecl syntaxCharsetDecl;
|
|
CharsetInfo syntaxCharset;
|
|
CharSwitcher switcher;
|
|
Boolean externalSyntax;
|
|
Boolean valid;
|
|
IList<SdFormalError> formalErrorList;
|
|
};
|
|
|
|
class CharsetMessageArg : public MessageArg {
|
|
public:
|
|
CharsetMessageArg(const ISet<WideChar> &set);
|
|
MessageArg *copy() const;
|
|
void append(MessageBuilder &) const;
|
|
private:
|
|
ISet<WideChar> set_;
|
|
};
|
|
|
|
struct SdParam {
|
|
typedef unsigned char Type;
|
|
enum {
|
|
invalid,
|
|
eE,
|
|
minimumLiteral,
|
|
mdc,
|
|
ellipsis,
|
|
number,
|
|
capacityName,
|
|
name,
|
|
paramLiteral,
|
|
generalDelimiterName,
|
|
referenceReservedName,
|
|
quantityName,
|
|
reservedName // Sd::ReservedName is added to this
|
|
};
|
|
Type type;
|
|
StringC token;
|
|
Text literalText;
|
|
String<SyntaxChar> paramLiteralText;
|
|
union {
|
|
Number n;
|
|
Sd::Capacity capacityIndex;
|
|
Syntax::Quantity quantityIndex;
|
|
Syntax::ReservedName reservedNameIndex;
|
|
Syntax::DelimGeneral delimGeneralIndex;
|
|
};
|
|
};
|
|
|
|
class AllowedSdParams {
|
|
public:
|
|
AllowedSdParams(SdParam::Type,
|
|
SdParam::Type = SdParam::invalid,
|
|
SdParam::Type = SdParam::invalid,
|
|
SdParam::Type = SdParam::invalid,
|
|
SdParam::Type = SdParam::invalid,
|
|
SdParam::Type = SdParam::invalid);
|
|
Boolean param(SdParam::Type) const;
|
|
SdParam::Type get(int i) const;
|
|
private:
|
|
enum { maxAllow = 6 };
|
|
SdParam::Type allow_[maxAllow];
|
|
};
|
|
|
|
class AllowedSdParamsMessageArg : public MessageArg {
|
|
public:
|
|
AllowedSdParamsMessageArg(const AllowedSdParams &allow,
|
|
const ConstPtr<Sd> &sd);
|
|
MessageArg *copy() const;
|
|
void append(MessageBuilder &) const;
|
|
private:
|
|
AllowedSdParams allow_;
|
|
ConstPtr<Sd> sd_;
|
|
};
|
|
|
|
struct StandardSyntaxSpec {
|
|
struct AddedFunction {
|
|
const char *name;
|
|
Syntax::FunctionClass functionClass;
|
|
SyntaxChar syntaxChar;
|
|
};
|
|
const AddedFunction *addedFunction;
|
|
size_t nAddedFunction;
|
|
Boolean shortref;
|
|
};
|
|
|
|
static StandardSyntaxSpec::AddedFunction coreFunctions[] = {
|
|
{ "TAB", Syntax::cSEPCHAR, 9 },
|
|
};
|
|
|
|
static StandardSyntaxSpec coreSyntax = {
|
|
coreFunctions, SIZEOF(coreFunctions), 0
|
|
};
|
|
|
|
static StandardSyntaxSpec refSyntax = {
|
|
coreFunctions, SIZEOF(coreFunctions), 1
|
|
};
|
|
|
|
void Parser::doInit()
|
|
{
|
|
if (cancelled()) {
|
|
allDone();
|
|
return;
|
|
}
|
|
// When document entity doesn't exist, don't give any errors
|
|
// other than the cannot open error.
|
|
if (currentInput()->get(messenger()) == InputSource::eE) {
|
|
if (currentInput()->accessError()) {
|
|
allDone();
|
|
return;
|
|
}
|
|
}
|
|
else
|
|
currentInput()->ungetToken();
|
|
const CharsetInfo &initCharset = sd().docCharset();
|
|
ISet<WideChar> missing;
|
|
findMissingMinimum(initCharset, missing);
|
|
if (!missing.isEmpty()) {
|
|
message(ParserMessages::sdMissingCharacters, CharsetMessageArg(missing));
|
|
giveUp();
|
|
return;
|
|
}
|
|
Boolean found = 0;
|
|
StringC systemId;
|
|
if (scanForSgmlDecl(initCharset))
|
|
found = 1;
|
|
else {
|
|
currentInput()->ungetToken();
|
|
if (entityCatalog().sgmlDecl(initCharset, messenger(), systemId)) {
|
|
InputSource *in = entityManager().open(systemId,
|
|
initCharset,
|
|
new InputSourceOrigin,
|
|
0,
|
|
messenger());
|
|
if (in) {
|
|
pushInput(in);
|
|
if (scanForSgmlDecl(initCharset))
|
|
found = 1;
|
|
else {
|
|
message(ParserMessages::badDefaultSgmlDecl);
|
|
popInputStack();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if (found) {
|
|
if (startMarkup(eventsWanted().wantPrologMarkup(), currentLocation())) {
|
|
size_t nS = currentInput()->currentTokenLength() - 6;
|
|
for (size_t i = 0; i < nS; i++)
|
|
currentMarkup()->addS(currentInput()->currentTokenStart()[i]);
|
|
currentMarkup()->addDelim(Syntax::dMDO);
|
|
currentMarkup()->addSdReservedName(Sd::rSGML,
|
|
currentInput()->currentTokenStart()
|
|
+ (currentInput()->currentTokenLength() - 4),
|
|
4);
|
|
}
|
|
Syntax *syntaxp = new Syntax(sd());
|
|
CharSwitcher switcher;
|
|
if (!setStandardSyntax(*syntaxp, refSyntax, sd().docCharset(),
|
|
switcher)) {
|
|
giveUp();
|
|
return;
|
|
}
|
|
syntaxp->implySgmlChar(sd().docCharset());
|
|
setSyntax(syntaxp);
|
|
compileSdModes();
|
|
ConstPtr<Sd> refSd(sdPointer());
|
|
ConstPtr<Syntax> refSyntax(syntaxPointer());
|
|
if (!parseSgmlDecl()) {
|
|
giveUp();
|
|
return;
|
|
}
|
|
// queue an SGML declaration event
|
|
eventHandler().sgmlDecl(new (eventAllocator())
|
|
SgmlDeclEvent(sdPointer(),
|
|
syntaxPointer(),
|
|
instanceSyntaxPointer(),
|
|
refSd,
|
|
refSyntax,
|
|
currentInput()->nextIndex(),
|
|
systemId,
|
|
markupLocation(),
|
|
currentMarkup()));
|
|
if (inputLevel() == 2) {
|
|
// FIXME perhaps check for junk after SGML declaration
|
|
popInputStack();
|
|
}
|
|
}
|
|
else {
|
|
if (!implySgmlDecl()) {
|
|
giveUp();
|
|
return;
|
|
}
|
|
// queue an SGML declaration event
|
|
eventHandler().sgmlDecl(new (eventAllocator())
|
|
SgmlDeclEvent(sdPointer(),
|
|
syntaxPointer()));
|
|
}
|
|
|
|
// Now we have sd and syntax set up, prepare to parse the prolog.
|
|
compilePrologModes();
|
|
setPhase(prologPhase);
|
|
}
|
|
|
|
Boolean Parser::implySgmlDecl()
|
|
{
|
|
Syntax *syntaxp = new Syntax(sd());
|
|
const StandardSyntaxSpec *spec;
|
|
if (options().shortref)
|
|
spec = &refSyntax;
|
|
else
|
|
spec = &coreSyntax;
|
|
CharSwitcher switcher;
|
|
if (!setStandardSyntax(*syntaxp, *spec, sd().docCharset(), switcher))
|
|
return 0;
|
|
syntaxp->implySgmlChar(sd().docCharset());
|
|
for (int i = 0; i < Syntax::nQuantity; i++)
|
|
syntaxp->setQuantity(i, options().quantity[i]);
|
|
setSyntax(syntaxp);
|
|
return 1;
|
|
}
|
|
|
|
Boolean Parser::setStandardSyntax(Syntax &syn,
|
|
const StandardSyntaxSpec &spec,
|
|
const CharsetInfo &docCharset,
|
|
CharSwitcher &switcher)
|
|
{
|
|
static UnivCharsetDesc::Range syntaxCharsetRanges[] = {
|
|
{ 0, 128, 0 },
|
|
};
|
|
static UnivCharsetDesc syntaxCharsetDesc(syntaxCharsetRanges,
|
|
SIZEOF(syntaxCharsetRanges));
|
|
static CharsetInfo syntaxCharset(syntaxCharsetDesc);
|
|
|
|
Boolean valid = 1;
|
|
if (!checkSwitches(switcher, syntaxCharset))
|
|
valid = 0;
|
|
size_t i;
|
|
for (i = 0; i < switcher.nSwitches(); i++)
|
|
if (switcher.switchTo(i) >= 128)
|
|
message(ParserMessages::switchNotInCharset,
|
|
NumberMessageArg(switcher.switchTo(i)));
|
|
static const Char shunchar[] = {
|
|
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
|
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
|
127, 255
|
|
};
|
|
|
|
for (i = 0; i < SIZEOF(shunchar); i++)
|
|
syn.addShunchar(shunchar[i]);
|
|
syn.setShuncharControls();
|
|
static Syntax::StandardFunction standardFunctions[3] = {
|
|
Syntax::fRE, Syntax::fRS, Syntax::fSPACE
|
|
};
|
|
static SyntaxChar functionChars[3] = { 13, 10, 32 };
|
|
for (i = 0; i < 3; i++) {
|
|
Char docChar;
|
|
if (translateSyntax(switcher,
|
|
syntaxCharset,
|
|
docCharset,
|
|
functionChars[i],
|
|
docChar)
|
|
&& checkNotFunction(syn, docChar))
|
|
syn.setStandardFunction(standardFunctions[i], docChar);
|
|
else
|
|
valid = 0;
|
|
}
|
|
for (i = 0; i < spec.nAddedFunction; i++) {
|
|
Char docChar;
|
|
if (translateSyntax(switcher,
|
|
syntaxCharset,
|
|
docCharset,
|
|
spec.addedFunction[i].syntaxChar,
|
|
docChar)
|
|
&& checkNotFunction(syn, docChar))
|
|
syn.addFunctionChar(docCharset.execToDesc(spec.addedFunction[i].name),
|
|
spec.addedFunction[i].functionClass,
|
|
docChar);
|
|
else
|
|
valid = 0;
|
|
}
|
|
|
|
static SyntaxChar nameChars[2] = { 45, 46 }; // '-' '.'
|
|
ISet<Char> nameCharSet;
|
|
for (i = 0; i < 2; i++) {
|
|
Char docChar;
|
|
if (translateSyntax(switcher,
|
|
syntaxCharset,
|
|
docCharset,
|
|
nameChars[i],
|
|
docChar))
|
|
nameCharSet.add(docChar);
|
|
else
|
|
valid = 0;
|
|
}
|
|
if (!checkNmchars(nameCharSet, syn))
|
|
valid = 0;
|
|
else
|
|
syn.addNameCharacters(nameCharSet);
|
|
syn.setNamecaseGeneral(1);
|
|
syn.setNamecaseEntity(0);
|
|
if (!setRefDelimGeneral(syn, syntaxCharset, docCharset, switcher))
|
|
valid = 0;
|
|
setRefNames(syn, docCharset);
|
|
syn.enterStandardFunctionNames();
|
|
if (spec.shortref
|
|
&& !addRefDelimShortref(syn, syntaxCharset, docCharset, switcher))
|
|
valid = 0;
|
|
return valid;
|
|
}
|
|
|
|
Boolean Parser::setRefDelimGeneral(Syntax &syntax,
|
|
const CharsetInfo &syntaxCharset,
|
|
const CharsetInfo &docCharset,
|
|
CharSwitcher &switcher)
|
|
{
|
|
// Column 3 from Figure 3
|
|
static const char delims[][2] = {
|
|
{ 38 },
|
|
{ 45, 45 },
|
|
{ 38, 35 },
|
|
{ 93 },
|
|
{ 91 },
|
|
{ 93 },
|
|
{ 91 },
|
|
{ 38 },
|
|
{ 60, 47 },
|
|
{ 41 },
|
|
{ 40 },
|
|
{ 34 },
|
|
{ 39 },
|
|
{ 62 },
|
|
{ 60, 33 },
|
|
{ 45 },
|
|
{ 93, 93 },
|
|
{ 47 },
|
|
{ 63 },
|
|
{ 124 },
|
|
{ 37 },
|
|
{ 62 },
|
|
{ 60, 63 },
|
|
{ 43 },
|
|
{ 59 },
|
|
{ 42 },
|
|
{ 35 },
|
|
{ 44 },
|
|
{ 60 },
|
|
{ 62 },
|
|
{ 61 },
|
|
};
|
|
Boolean valid = 1;
|
|
ISet<WideChar> missing;
|
|
for (int i = 0; i < Syntax::nDelimGeneral; i++)
|
|
if (syntax.delimGeneral(i).size() == 0) {
|
|
StringC delim;
|
|
size_t j;
|
|
for (j = 0; j < 2 && delims[i][j] != '\0'; j++) {
|
|
UnivChar univChar = translateUniv(delims[i][j], switcher,
|
|
syntaxCharset);
|
|
Char c;
|
|
if (univToDescCheck(docCharset, univChar, c))
|
|
delim += c;
|
|
else {
|
|
missing += univChar;
|
|
valid = 0;
|
|
}
|
|
}
|
|
if (delim.size() == j) {
|
|
if (checkGeneralDelim(syntax, delim))
|
|
syntax.setDelimGeneral(i, delim);
|
|
else
|
|
valid = 0;
|
|
}
|
|
}
|
|
if (!missing.isEmpty())
|
|
message(ParserMessages::missingSignificant646, CharsetMessageArg(missing));
|
|
return valid;
|
|
}
|
|
|
|
void Parser::setRefNames(Syntax &syntax, const CharsetInfo &docCharset)
|
|
{
|
|
static const char *const referenceNames[] = {
|
|
"ANY",
|
|
"ATTLIST",
|
|
"CDATA",
|
|
"CONREF",
|
|
"CURRENT",
|
|
"DEFAULT",
|
|
"DOCTYPE",
|
|
"ELEMENT",
|
|
"EMPTY",
|
|
"ENDTAG",
|
|
"ENTITIES",
|
|
"ENTITY",
|
|
"FIXED",
|
|
"ID",
|
|
"IDLINK",
|
|
"IDREF",
|
|
"IDREFS",
|
|
"IGNORE",
|
|
"IMPLIED",
|
|
"INCLUDE",
|
|
"INITIAL",
|
|
"LINK",
|
|
"LINKTYPE",
|
|
"MD",
|
|
"MS",
|
|
"NAME",
|
|
"NAMES",
|
|
"NDATA",
|
|
"NMTOKEN",
|
|
"NMTOKENS",
|
|
"NOTATION",
|
|
"NUMBER",
|
|
"NUMBERS",
|
|
"NUTOKEN",
|
|
"NUTOKENS",
|
|
"O",
|
|
"PCDATA",
|
|
"PI",
|
|
"POSTLINK",
|
|
"PUBLIC",
|
|
"RCDATA",
|
|
"RE",
|
|
"REQUIRED",
|
|
"RESTORE",
|
|
"RS",
|
|
"SDATA",
|
|
"SHORTREF",
|
|
"SIMPLE",
|
|
"SPACE",
|
|
"STARTTAG",
|
|
"SUBDOC",
|
|
"SYSTEM",
|
|
"TEMP",
|
|
"USELINK",
|
|
"USEMAP"
|
|
};
|
|
|
|
int i;
|
|
for (i = 0; i < Syntax::nNames; i++) {
|
|
StringC docName(docCharset.execToDesc(referenceNames[i]));
|
|
Syntax::ReservedName tem;
|
|
if (syntax.lookupReservedName(docName, &tem))
|
|
message(ParserMessages::nameReferenceReservedName,
|
|
StringMessageArg(docName));
|
|
if (syntax.reservedName(Syntax::ReservedName(i)).size() == 0)
|
|
syntax.setName(i, docName);
|
|
}
|
|
}
|
|
|
|
Boolean Parser::addRefDelimShortref(Syntax &syntax,
|
|
const CharsetInfo &syntaxCharset,
|
|
const CharsetInfo &docCharset,
|
|
CharSwitcher &switcher)
|
|
{
|
|
// Column 2 from Figure 4
|
|
static const char delimShortref[][3] = {
|
|
{ 9 },
|
|
{ 13 },
|
|
{ 10 },
|
|
{ 10, 66 },
|
|
{ 10, 13 },
|
|
{ 10, 66, 13 },
|
|
{ 66, 13 },
|
|
{ 32 },
|
|
{ 66, 66 },
|
|
{ 34 },
|
|
{ 35 },
|
|
{ 37 },
|
|
{ 39 },
|
|
{ 40 },
|
|
{ 41 },
|
|
{ 42 },
|
|
{ 43 },
|
|
{ 44 },
|
|
{ 45 },
|
|
{ 45, 45 },
|
|
{ 58 },
|
|
{ 59 },
|
|
{ 61 },
|
|
{ 64 },
|
|
{ 91 },
|
|
{ 93 },
|
|
{ 94 },
|
|
{ 95 },
|
|
{ 123 },
|
|
{ 124 },
|
|
{ 125 },
|
|
{ 126 },
|
|
};
|
|
ISet<WideChar> missing;
|
|
|
|
for (size_t i = 0; i < SIZEOF(delimShortref); i++) {
|
|
StringC delim;
|
|
|
|
size_t j;
|
|
for (j = 0; j < 3 && delimShortref[i][j] != '\0'; j++) {
|
|
Char c;
|
|
UnivChar univChar = translateUniv(delimShortref[i][j], switcher,
|
|
syntaxCharset);
|
|
if (univToDescCheck(docCharset, univChar, c))
|
|
delim += c;
|
|
else
|
|
missing += univChar;
|
|
}
|
|
if (delim.size() == j) {
|
|
if (switcher.nSwitches() > 0 && syntax.isValidShortref(delim))
|
|
message(ParserMessages::duplicateDelimShortref,
|
|
StringMessageArg(delim));
|
|
else
|
|
syntax.addDelimShortref(delim, docCharset);
|
|
}
|
|
}
|
|
if (!missing.isEmpty())
|
|
message(ParserMessages::missingSignificant646, CharsetMessageArg(missing));
|
|
return 1;
|
|
}
|
|
|
|
// Determine whether the document starts with an SGML declaration.
|
|
// There is no current syntax at this point.
|
|
|
|
Boolean Parser::scanForSgmlDecl(const CharsetInfo &initCharset)
|
|
{
|
|
Char rs;
|
|
if (!univToDescCheck(initCharset, UnivCharsetDesc::rs, rs))
|
|
return 0;
|
|
Char re;
|
|
if (!univToDescCheck(initCharset, UnivCharsetDesc::re, re))
|
|
return 0;
|
|
Char space;
|
|
if (!univToDescCheck(initCharset, UnivCharsetDesc::space, space))
|
|
return 0;
|
|
Char tab;
|
|
if (!univToDescCheck(initCharset, UnivCharsetDesc::tab, tab))
|
|
return 0;
|
|
InputSource *in = currentInput();
|
|
Xchar c = in->get(messenger());
|
|
while (c == rs || c == space || c == re || c == tab)
|
|
c = in->tokenChar(messenger());
|
|
if (c != initCharset.execToDesc('<'))
|
|
return 0;
|
|
if (in->tokenChar(messenger()) != initCharset.execToDesc('!'))
|
|
return 0;
|
|
c = in->tokenChar(messenger());
|
|
if (c != initCharset.execToDesc('S')
|
|
&& c != initCharset.execToDesc('s'))
|
|
return 0;
|
|
c = in->tokenChar(messenger());
|
|
if (c != initCharset.execToDesc('G')
|
|
&& c != initCharset.execToDesc('g'))
|
|
return 0;
|
|
c = in->tokenChar(messenger());
|
|
if (c != initCharset.execToDesc('M')
|
|
&& c != initCharset.execToDesc('m'))
|
|
return 0;
|
|
c = in->tokenChar(messenger());
|
|
if (c != initCharset.execToDesc('L')
|
|
&& c != initCharset.execToDesc('l'))
|
|
return 0;
|
|
c = in->tokenChar(messenger());
|
|
// Don't recognize this if SGML is followed by a name character.
|
|
if (c == InputSource::eE)
|
|
return 1;
|
|
in->endToken(in->currentTokenLength() - 1);
|
|
if (c == initCharset.execToDesc('-'))
|
|
return 0;
|
|
if (c == initCharset.execToDesc('.'))
|
|
return 0;
|
|
UnivChar univ;
|
|
if (!initCharset.descToUniv(c, univ))
|
|
return 1;
|
|
if (UnivCharsetDesc::a <= univ && univ < UnivCharsetDesc::a + 26)
|
|
return 0;
|
|
if (UnivCharsetDesc::A <= univ && univ < UnivCharsetDesc::A + 26)
|
|
return 0;
|
|
if (UnivCharsetDesc::zero <= univ && univ < UnivCharsetDesc::zero + 10)
|
|
return 0;
|
|
return 1;
|
|
}
|
|
|
|
void Parser::findMissingMinimum(const CharsetInfo &charset,
|
|
ISet<WideChar> &missing)
|
|
{
|
|
Char to;
|
|
size_t i;
|
|
for (i = 0; i < 26; i++) {
|
|
if (!univToDescCheck(charset, UnivCharsetDesc::A + i, to))
|
|
missing += UnivCharsetDesc::A + i;
|
|
if (!univToDescCheck(charset, UnivCharsetDesc::a + i, to))
|
|
missing += UnivCharsetDesc::a + i;
|
|
}
|
|
for (i = 0; i < 10; i++) {
|
|
Char to;
|
|
if (!univToDescCheck(charset, UnivCharsetDesc::zero + i, to))
|
|
missing += UnivCharsetDesc::zero + i;
|
|
}
|
|
static const UnivChar special[] = {
|
|
39, 40, 41, 43, 44, 45, 46, 47, 58, 61, 63
|
|
};
|
|
|
|
for (i = 0; i < SIZEOF(special); i++)
|
|
if (!univToDescCheck(charset, special[i], to))
|
|
missing += special[i];
|
|
}
|
|
|
|
|
|
Boolean Parser::parseSgmlDecl()
|
|
{
|
|
SdParam parm;
|
|
SdBuilder sdBuilder;
|
|
|
|
if (!parseSdParam(AllowedSdParams(SdParam::minimumLiteral), parm))
|
|
return 0;
|
|
StringC version(sd().execToDoc("ISO 8879:1986"));
|
|
if (parm.literalText.string() != version)
|
|
message(ParserMessages::standardVersion,
|
|
StringMessageArg(parm.literalText.string()));
|
|
sdBuilder.sd = new Sd;
|
|
typedef Boolean (Parser::*SdParser)(SdBuilder &, SdParam &);
|
|
static SdParser parsers[] = {
|
|
&Parser::sdParseDocumentCharset,
|
|
&Parser::sdParseCapacity,
|
|
&Parser::sdParseScope,
|
|
&Parser::sdParseSyntax,
|
|
&Parser::sdParseFeatures,
|
|
&Parser::sdParseAppinfo,
|
|
};
|
|
for (size_t i = 0; i < SIZEOF(parsers); i++) {
|
|
if (!(this->*(parsers[i]))(sdBuilder, parm))
|
|
return 0;
|
|
if (!sdBuilder.valid)
|
|
return 0;
|
|
}
|
|
if (!parseSdParam(AllowedSdParams(SdParam::mdc), parm))
|
|
return 0;
|
|
if (sdBuilder.sd->formal()) {
|
|
while (!sdBuilder.formalErrorList.empty()) {
|
|
SdFormalError *p = sdBuilder.formalErrorList.get();
|
|
ParserState *state = this; // work around lcc 3.0 bug
|
|
p->send(*state);
|
|
delete p;
|
|
}
|
|
}
|
|
setSd(sdBuilder.sd.pointer());
|
|
if (sdBuilder.sd->scopeInstance()) {
|
|
Syntax *proSyntax = new Syntax(sd());
|
|
CharSwitcher switcher;
|
|
setStandardSyntax(*proSyntax, refSyntax, sd().docCharset(), switcher);
|
|
proSyntax->setSgmlChar(*sdBuilder.syntax->charSet(Syntax::sgmlChar));
|
|
ISet<WideChar> invalidSgmlChar;
|
|
proSyntax->checkSgmlChar(sdBuilder.sd->docCharset(),
|
|
sdBuilder.syntax.pointer(),
|
|
invalidSgmlChar);
|
|
sdBuilder.syntax->checkSgmlChar(sdBuilder.sd->docCharset(),
|
|
proSyntax,
|
|
invalidSgmlChar);
|
|
if (!invalidSgmlChar.isEmpty())
|
|
message(ParserMessages::invalidSgmlChar, CharsetMessageArg(invalidSgmlChar));
|
|
setSyntaxes(proSyntax, sdBuilder.syntax.pointer());
|
|
}
|
|
else
|
|
setSyntax(sdBuilder.syntax.pointer());
|
|
if (syntax().multicode())
|
|
currentInput()->setMarkupScanTable(syntax().markupScanTable());
|
|
return 1;
|
|
}
|
|
|
|
Boolean Parser::sdParseDocumentCharset(SdBuilder &sdBuilder, SdParam &parm)
|
|
{
|
|
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rCHARSET),
|
|
parm))
|
|
return 0;
|
|
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rBASESET),
|
|
parm))
|
|
return 0;
|
|
CharsetDecl decl;
|
|
UnivCharsetDesc desc;
|
|
if (!sdParseCharset(sdBuilder, parm, 1, decl, desc))
|
|
return 0;
|
|
ISet<WideChar> missing;
|
|
findMissingMinimum(desc, missing);
|
|
if (!missing.isEmpty()) {
|
|
message(ParserMessages::missingMinimumChars,
|
|
CharsetMessageArg(missing));
|
|
return 0;
|
|
}
|
|
ISet<Char> sgmlChar;
|
|
decl.usedSet(sgmlChar);
|
|
sdBuilder.sd->setDocCharsetDesc(desc);
|
|
sdBuilder.sd->setDocCharsetDecl(decl);
|
|
sdBuilder.syntax = new Syntax(*sdBuilder.sd);
|
|
sdBuilder.syntax->setSgmlChar(sgmlChar);
|
|
return 1;
|
|
}
|
|
|
|
Boolean Parser::sdParseCharset(SdBuilder &sdBuilder,
|
|
SdParam &parm,
|
|
Boolean isDocument,
|
|
CharsetDecl &decl,
|
|
UnivCharsetDesc &desc)
|
|
{
|
|
decl.clear();
|
|
ISet<WideChar> multiplyDeclared;
|
|
// This is for checking whether the syntax reference character set
|
|
// is ISO 646 when SCOPE is INSTANCE.
|
|
Boolean maybeISO646 = 1;
|
|
do {
|
|
if (!parseSdParam(AllowedSdParams(SdParam::minimumLiteral), parm))
|
|
return 0;
|
|
UnivCharsetDesc baseDesc;
|
|
PublicId id;
|
|
Boolean found;
|
|
PublicId::TextClass textClass;
|
|
const MessageType1 *err;
|
|
if (!id.init(parm.literalText, sd().docCharset(), syntax().space(), err))
|
|
sdBuilder.addFormalError(currentLocation(),
|
|
*err,
|
|
id.string());
|
|
else if (id.getTextClass(textClass)
|
|
&& textClass != PublicId::CHARSET)
|
|
sdBuilder.addFormalError(currentLocation(),
|
|
ParserMessages::basesetTextClass,
|
|
id.string());
|
|
Boolean givenError;
|
|
if (referencePublic(id, PublicId::CHARSET, givenError))
|
|
found = sdParseExternalCharset(*sdBuilder.sd, baseDesc);
|
|
else if (!givenError) {
|
|
found = CharsetRegistry::findCharset(id, sd().docCharset(), baseDesc);
|
|
if (!found && options().warnSgmlDecl)
|
|
message(ParserMessages::unknownBaseset, StringMessageArg(id.string()));
|
|
}
|
|
else
|
|
found = 0;
|
|
if (!found)
|
|
maybeISO646 = 0;
|
|
decl.addSection(id);
|
|
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rDESCSET),
|
|
parm))
|
|
return 0;
|
|
if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
|
|
return 0;
|
|
do {
|
|
WideChar min = parm.n;
|
|
if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
|
|
return 0;
|
|
Number count = parm.n;
|
|
Number adjCount;
|
|
if (options().warnSgmlDecl && count == 0)
|
|
message(ParserMessages::zeroNumberOfCharacters);
|
|
decl.rangeDeclared(min, count, multiplyDeclared);
|
|
if (isDocument
|
|
&& count > 0
|
|
&& (min > charMax || count - 1 > charMax - min)) {
|
|
message(ParserMessages::documentCharMax, NumberMessageArg(charMax));
|
|
adjCount = min > charMax ? 0 : 1 + (charMax - min);
|
|
sdBuilder.valid = 0;
|
|
maybeISO646 = 0;
|
|
}
|
|
else
|
|
adjCount = count;
|
|
if (!parseSdParam(AllowedSdParams(SdParam::number,
|
|
SdParam::minimumLiteral,
|
|
SdParam::reservedName + Sd::rUNUSED),
|
|
parm))
|
|
return 0;
|
|
switch (parm.type) {
|
|
case SdParam::number:
|
|
decl.addRange(min, count, parm.n);
|
|
if (found && adjCount > 0) {
|
|
ISet<WideChar> baseMissing;
|
|
desc.addBaseRange(baseDesc, min, min + (adjCount - 1), parm.n,
|
|
baseMissing);
|
|
if (!baseMissing.isEmpty() && options().warnSgmlDecl)
|
|
message(ParserMessages::basesetCharsMissing,
|
|
CharsetMessageArg(baseMissing));
|
|
}
|
|
break;
|
|
case SdParam::reservedName + Sd::rUNUSED:
|
|
decl.addRange(min, count);
|
|
break;
|
|
case SdParam::minimumLiteral:
|
|
{
|
|
UnivChar c = sdBuilder.sd->nameToUniv(parm.literalText.string());
|
|
if (adjCount > 256) {
|
|
message(ParserMessages::tooManyCharsMinimumLiteral);
|
|
adjCount = 256;
|
|
}
|
|
for (Number i = 0; i < adjCount; i++)
|
|
desc.addRange(min + i, min + i, c);
|
|
}
|
|
maybeISO646 = 0;
|
|
decl.addRange(min, count, parm.literalText.string());
|
|
break;
|
|
default:
|
|
CANNOT_HAPPEN();
|
|
}
|
|
SdParam::Type follow = (isDocument
|
|
? SdParam::reservedName + Sd::rCAPACITY
|
|
: SdParam::reservedName + Sd::rFUNCTION);
|
|
if (!parseSdParam(AllowedSdParams(SdParam::number,
|
|
SdParam::reservedName + Sd::rBASESET,
|
|
follow),
|
|
parm))
|
|
return 0;
|
|
|
|
} while (parm.type == SdParam::number);
|
|
} while (parm.type == SdParam::reservedName + Sd::rBASESET);
|
|
if (!multiplyDeclared.isEmpty())
|
|
message(ParserMessages::duplicateCharNumbers,
|
|
CharsetMessageArg(multiplyDeclared));
|
|
ISet<WideChar> declaredSet;
|
|
decl.declaredSet(declaredSet);
|
|
ISetIter<WideChar> iter(declaredSet);
|
|
WideChar min, max, lastMax;
|
|
if (iter.next(min, max)) {
|
|
ISet<WideChar> holes;
|
|
lastMax = max;
|
|
while (iter.next(min, max)) {
|
|
if (min - lastMax > 1)
|
|
holes.addRange(lastMax + 1, min - 1);
|
|
lastMax = max;
|
|
}
|
|
if (!holes.isEmpty())
|
|
message(ParserMessages::codeSetHoles, CharsetMessageArg(holes));
|
|
}
|
|
if (!isDocument && sdBuilder.sd->scopeInstance()) {
|
|
// If scope is INSTANCE, syntax reference character set
|
|
// must be same as reference.
|
|
UnivCharsetDescIter iter(desc);
|
|
WideChar descMin, descMax;
|
|
UnivChar univMin;
|
|
if (!iter.next(descMin, descMax, univMin)
|
|
|| descMin != 0
|
|
|| descMax != 127
|
|
|| univMin != 0
|
|
|| !maybeISO646)
|
|
message(ParserMessages::scopeInstanceSyntaxCharset);
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
Boolean Parser::sdParseExternalCharset(Sd &sd, UnivCharsetDesc &desc)
|
|
{
|
|
SdParam parm;
|
|
for (;;) {
|
|
if (!parseSdParam(AllowedSdParams(SdParam::number, SdParam::eE),
|
|
parm))
|
|
break;
|
|
if (parm.type == SdParam::eE)
|
|
return 1;
|
|
WideChar min = parm.n;
|
|
if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
|
|
break;
|
|
Number count = parm.n;
|
|
if (!parseSdParam(AllowedSdParams(SdParam::number,
|
|
SdParam::minimumLiteral,
|
|
SdParam::reservedName + Sd::rUNUSED),
|
|
parm))
|
|
break;
|
|
if (parm.type == SdParam::number) {
|
|
if (count > 0)
|
|
desc.addRange(min, min + (count - 1), parm.n);
|
|
}
|
|
else if (parm.type == SdParam::minimumLiteral) {
|
|
UnivChar c = sd.nameToUniv(parm.literalText.string());
|
|
if (count > 256) {
|
|
message(ParserMessages::tooManyCharsMinimumLiteral);
|
|
count = 256;
|
|
}
|
|
for (Number i = 0; i < count; i++)
|
|
desc.addRange(min + i, min + i, c);
|
|
}
|
|
}
|
|
popInputStack();
|
|
return 0;
|
|
}
|
|
|
|
Boolean Parser::sdParseCapacity(SdBuilder &sdBuilder, SdParam &parm)
|
|
{
|
|
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rPUBLIC,
|
|
SdParam::reservedName + Sd::rSGMLREF),
|
|
parm))
|
|
return 0;
|
|
Boolean pushed = 0;
|
|
if (parm.type == SdParam::reservedName + Sd::rPUBLIC) {
|
|
if (!parseSdParam(AllowedSdParams(SdParam::minimumLiteral), parm))
|
|
return 0;
|
|
PublicId id;
|
|
PublicId::TextClass textClass;
|
|
const MessageType1 *err;
|
|
if (!id.init(parm.literalText, sd().docCharset(), syntax().space(), err))
|
|
sdBuilder.addFormalError(currentLocation(),
|
|
*err,
|
|
id.string());
|
|
else if (id.getTextClass(textClass)
|
|
&& textClass != PublicId::CAPACITY)
|
|
sdBuilder.addFormalError(currentLocation(),
|
|
ParserMessages::capacityTextClass,
|
|
id.string());
|
|
const StringC &str = id.string();
|
|
if (str != sd().execToDoc("ISO 8879-1986//CAPACITY Reference//EN")
|
|
&& str != sd().execToDoc("ISO 8879:1986//CAPACITY Reference//EN")) {
|
|
Boolean givenError;
|
|
if (referencePublic(id, PublicId::CAPACITY, givenError))
|
|
pushed = 1;
|
|
else if (!givenError)
|
|
message(ParserMessages::unknownCapacitySet, StringMessageArg(str));
|
|
}
|
|
if (!pushed)
|
|
return parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSCOPE),
|
|
parm);
|
|
}
|
|
|
|
PackedBoolean capacitySpecified[Sd::nCapacity];
|
|
int i;
|
|
for (i = 0; i < Sd::nCapacity; i++)
|
|
capacitySpecified[i] = 0;
|
|
if (!parseSdParam(AllowedSdParams(SdParam::capacityName), parm))
|
|
return 0;
|
|
do {
|
|
Sd::Capacity capacityIndex = parm.capacityIndex;
|
|
if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
|
|
return 0;
|
|
|
|
if (!capacitySpecified[capacityIndex]) {
|
|
sdBuilder.sd->setCapacity(capacityIndex, parm.n);
|
|
capacitySpecified[capacityIndex] = 1;
|
|
}
|
|
else if (options().warnSgmlDecl)
|
|
message(ParserMessages::duplicateCapacity,
|
|
StringMessageArg(sd().capacityName(i)));
|
|
int final = pushed ? int(SdParam::eE) : SdParam::reservedName + Sd::rSCOPE;
|
|
if (!parseSdParam(AllowedSdParams(SdParam::capacityName, final),
|
|
parm))
|
|
return 0;
|
|
} while (parm.type == SdParam::capacityName);
|
|
Number totalcap = sdBuilder.sd->capacity(0);
|
|
for (i = 1; i < Sd::nCapacity; i++)
|
|
if (sdBuilder.sd->capacity(i) > totalcap)
|
|
message(ParserMessages::capacityExceedsTotalcap,
|
|
StringMessageArg(sd().capacityName(i)));
|
|
if (pushed)
|
|
return parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSCOPE),
|
|
parm);
|
|
return 1;
|
|
}
|
|
|
|
Boolean Parser::referencePublic(const PublicId &id,
|
|
PublicId::TextClass entityType,
|
|
Boolean &givenError)
|
|
{
|
|
givenError = 0;
|
|
StringC sysid;
|
|
if (entityCatalog().lookupPublic(id.string(),
|
|
sd().docCharset(),
|
|
messenger(),
|
|
sysid)) {
|
|
Location loc = currentLocation();
|
|
eventHandler().sgmlDeclEntity(new (eventAllocator())
|
|
SgmlDeclEntityEvent(id,
|
|
entityType,
|
|
sysid,
|
|
loc));
|
|
Ptr<EntityOrigin> origin(new EntityOrigin(loc));
|
|
if (currentMarkup())
|
|
currentMarkup()->addEntityStart(origin);
|
|
InputSource *in = entityManager().open(sysid,
|
|
sd().docCharset(),
|
|
origin.pointer(),
|
|
0,
|
|
messenger());
|
|
if (!in) {
|
|
givenError = 1;
|
|
return 0;
|
|
}
|
|
pushInput(in);
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
Boolean Parser::sdParseScope(SdBuilder &sdBuilder, SdParam &parm)
|
|
{
|
|
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rINSTANCE,
|
|
SdParam::reservedName + Sd::rDOCUMENT),
|
|
parm))
|
|
return 0;
|
|
if (parm.type == SdParam::reservedName + Sd::rINSTANCE)
|
|
sdBuilder.sd->setScopeInstance();
|
|
return 1;
|
|
}
|
|
|
|
Boolean Parser::sdParseSyntax(SdBuilder &sdBuilder, SdParam &parm)
|
|
{
|
|
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSYNTAX),
|
|
parm))
|
|
return 0;
|
|
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSHUNCHAR,
|
|
SdParam::reservedName + Sd::rPUBLIC),
|
|
parm))
|
|
return 0;
|
|
|
|
if (parm.type == SdParam::reservedName + Sd::rPUBLIC) {
|
|
if (!parseSdParam(AllowedSdParams(SdParam::minimumLiteral), parm))
|
|
return 0;
|
|
PublicId id;
|
|
const MessageType1 *err;
|
|
PublicId::TextClass textClass;
|
|
if (!id.init(parm.literalText, sd().docCharset(), syntax().space(), err))
|
|
sdBuilder.addFormalError(currentLocation(),
|
|
*err,
|
|
id.string());
|
|
else if (id.getTextClass(textClass)
|
|
&& textClass != PublicId::SYNTAX)
|
|
sdBuilder.addFormalError(currentLocation(),
|
|
ParserMessages::syntaxTextClass,
|
|
id.string());
|
|
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rFEATURES,
|
|
SdParam::reservedName + Sd::rSWITCHES),
|
|
parm))
|
|
return 0;
|
|
Vector<UnivChar> charSwitches;
|
|
if (parm.type == SdParam::reservedName + Sd::rSWITCHES) {
|
|
if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
|
|
return 0;
|
|
for (;;) {
|
|
SyntaxChar c = parm.n;
|
|
if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
|
|
return 0;
|
|
sdBuilder.switcher.addSwitch(c, parm.n);
|
|
if (!parseSdParam(AllowedSdParams(SdParam::number,
|
|
SdParam::reservedName
|
|
+ Sd::rFEATURES),
|
|
parm))
|
|
return 0;
|
|
if (parm.type != SdParam::number)
|
|
break;
|
|
}
|
|
}
|
|
const StandardSyntaxSpec *spec = lookupSyntax(id);
|
|
if (spec) {
|
|
if (!setStandardSyntax(*sdBuilder.syntax,
|
|
*spec,
|
|
sdBuilder.sd->docCharset(),
|
|
sdBuilder.switcher))
|
|
sdBuilder.valid = 0;
|
|
}
|
|
else {
|
|
Boolean givenError;
|
|
if (referencePublic(id, PublicId::SYNTAX, givenError)) {
|
|
sdBuilder.externalSyntax = 1;
|
|
SdParam parm2;
|
|
if (!parseSdParam(AllowedSdParams(SdParam::reservedName
|
|
+ Sd::rSHUNCHAR),
|
|
parm2))
|
|
return 0;
|
|
if (!sdParseExplicitSyntax(sdBuilder, parm2))
|
|
return 0;
|
|
}
|
|
else {
|
|
if (!givenError)
|
|
message(ParserMessages::unknownPublicSyntax,
|
|
StringMessageArg(id.string()));
|
|
sdBuilder.valid = 0;
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if (!sdParseExplicitSyntax(sdBuilder, parm))
|
|
return 0;
|
|
}
|
|
if (!sdBuilder.sd->scopeInstance()) {
|
|
// we know the significant chars now
|
|
ISet<WideChar> invalidSgmlChar;
|
|
sdBuilder.syntax->checkSgmlChar(sdBuilder.sd->docCharset(),
|
|
0,
|
|
invalidSgmlChar);
|
|
if (!invalidSgmlChar.isEmpty())
|
|
message(ParserMessages::invalidSgmlChar, CharsetMessageArg(invalidSgmlChar));
|
|
}
|
|
checkSyntaxNamelen(*sdBuilder.syntax);
|
|
checkSwitchesMarkup(sdBuilder.switcher);
|
|
return 1;
|
|
}
|
|
|
|
Boolean Parser::sdParseExplicitSyntax(SdBuilder &sdBuilder,
|
|
SdParam &parm)
|
|
{
|
|
typedef Boolean (Parser::*SdParser)(SdBuilder &, SdParam &);
|
|
static SdParser parsers[] = {
|
|
&Parser::sdParseShunchar,
|
|
&Parser::sdParseSyntaxCharset,
|
|
&Parser::sdParseFunction,
|
|
&Parser::sdParseNaming,
|
|
&Parser::sdParseDelim,
|
|
&Parser::sdParseNames,
|
|
&Parser::sdParseQuantity
|
|
};
|
|
for (size_t i = 0; i < SIZEOF(parsers); i++)
|
|
if (!(this->*(parsers[i]))(sdBuilder, parm))
|
|
return 0;
|
|
return 1;
|
|
}
|
|
|
|
const StandardSyntaxSpec *Parser::lookupSyntax(const PublicId &id)
|
|
{
|
|
PublicId::OwnerType ownerType;
|
|
if (!id.getOwnerType(ownerType) || ownerType != PublicId::ISO)
|
|
return 0;
|
|
StringC str;
|
|
if (!id.getOwner(str))
|
|
return 0;
|
|
if (str != sd().execToDoc("ISO 8879:1986")
|
|
&& str != sd().execToDoc("ISO 8879-1986"))
|
|
return 0;
|
|
PublicId::TextClass textClass;
|
|
if (!id.getTextClass(textClass) || textClass != PublicId::SYNTAX)
|
|
return 0;
|
|
if (!id.getDescription(str))
|
|
return 0;
|
|
if (str == sd().execToDoc("Reference"))
|
|
return &refSyntax;
|
|
if (str == sd().execToDoc("Core"))
|
|
return &coreSyntax;
|
|
return 0;
|
|
}
|
|
|
|
Boolean Parser::sdParseSyntaxCharset(SdBuilder &sdBuilder, SdParam &parm)
|
|
{
|
|
UnivCharsetDesc desc;
|
|
if (!sdParseCharset(sdBuilder, parm, 0, sdBuilder.syntaxCharsetDecl, desc))
|
|
return 0;
|
|
sdBuilder.syntaxCharset.set(desc);
|
|
checkSwitches(sdBuilder.switcher, sdBuilder.syntaxCharset);
|
|
for (size_t i = 0; i < sdBuilder.switcher.nSwitches(); i++)
|
|
if (!sdBuilder.syntaxCharsetDecl.charDeclared(sdBuilder.switcher.switchTo(i)))
|
|
message(ParserMessages::switchNotInCharset,
|
|
NumberMessageArg(sdBuilder.switcher.switchTo(i)));
|
|
ISet<WideChar> missing;
|
|
findMissingMinimum(sdBuilder.syntaxCharset, missing);
|
|
if (!missing.isEmpty())
|
|
message(ParserMessages::missingMinimumChars,
|
|
CharsetMessageArg(missing));
|
|
return 1;
|
|
}
|
|
|
|
Boolean Parser::sdParseShunchar(SdBuilder &sdBuilder, SdParam &parm)
|
|
{
|
|
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNONE,
|
|
SdParam::reservedName + Sd::rCONTROLS,
|
|
SdParam::number), parm))
|
|
return 0;
|
|
if (parm.type == SdParam::reservedName + Sd::rNONE) {
|
|
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rBASESET),
|
|
parm))
|
|
return 0;
|
|
return 1;
|
|
}
|
|
if (parm.type == SdParam::reservedName + Sd::rCONTROLS)
|
|
sdBuilder.syntax->setShuncharControls();
|
|
else {
|
|
if (parm.n <= charMax)
|
|
sdBuilder.syntax->addShunchar(Char(parm.n));
|
|
}
|
|
for (;;) {
|
|
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rBASESET,
|
|
SdParam::number), parm))
|
|
return 0;
|
|
if (parm.type != SdParam::number)
|
|
break;
|
|
if (parm.n <= charMax)
|
|
sdBuilder.syntax->addShunchar(Char(parm.n));
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
Boolean Parser::sdParseFunction(SdBuilder &sdBuilder, SdParam &parm)
|
|
{
|
|
static Sd::ReservedName standardNames[3] = {
|
|
Sd::rRE, Sd::rRS, Sd::rSPACE
|
|
};
|
|
for (int i = 0; i < 3; i++) {
|
|
if (!parseSdParam(AllowedSdParams(SdParam::reservedName
|
|
+ standardNames[i]),
|
|
parm))
|
|
return 0;
|
|
if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
|
|
return 0;
|
|
Char c;
|
|
if (translateSyntax(sdBuilder, parm.n, c)) {
|
|
if (checkNotFunction(*sdBuilder.syntax, c))
|
|
sdBuilder.syntax->setStandardFunction(Syntax::StandardFunction(i), c);
|
|
else
|
|
sdBuilder.valid = 0;
|
|
}
|
|
}
|
|
Boolean haveMsichar = 0;
|
|
Boolean haveMsochar = 0;
|
|
for (;;) {
|
|
if (!parseSdParam(sdBuilder.externalSyntax
|
|
? AllowedSdParams(SdParam::name, SdParam::paramLiteral)
|
|
: AllowedSdParams(SdParam::name),
|
|
parm))
|
|
return 0;
|
|
Boolean nameWasLiteral;
|
|
Boolean invalidName = 0;
|
|
StringC name;
|
|
if (parm.type == SdParam::paramLiteral) {
|
|
nameWasLiteral = 1;
|
|
if (!translateSyntax(sdBuilder, parm.paramLiteralText, name))
|
|
invalidName = 1;
|
|
}
|
|
else {
|
|
parm.token.swap(name);
|
|
nameWasLiteral = 0;
|
|
}
|
|
if (!parseSdParam(nameWasLiteral
|
|
? AllowedSdParams(SdParam::reservedName + Sd::rFUNCHAR,
|
|
SdParam::reservedName + Sd::rMSICHAR,
|
|
SdParam::reservedName + Sd::rMSOCHAR,
|
|
SdParam::reservedName + Sd::rMSSCHAR,
|
|
SdParam::reservedName + Sd::rSEPCHAR)
|
|
: AllowedSdParams(SdParam::reservedName + Sd::rFUNCHAR,
|
|
SdParam::reservedName + Sd::rMSICHAR,
|
|
SdParam::reservedName + Sd::rMSOCHAR,
|
|
SdParam::reservedName + Sd::rMSSCHAR,
|
|
SdParam::reservedName + Sd::rSEPCHAR,
|
|
SdParam::reservedName + Sd::rLCNMSTRT),
|
|
parm))
|
|
return 0;
|
|
if (parm.type == SdParam::reservedName + Sd::rLCNMSTRT) {
|
|
if (name != sd().reservedName(Sd::rNAMING))
|
|
message(ParserMessages::namingBeforeLcnmstrt,
|
|
StringMessageArg(name));
|
|
break;
|
|
}
|
|
if (!nameWasLiteral) {
|
|
StringC tem;
|
|
name.swap(tem);
|
|
if (!translateName(sdBuilder, tem, name))
|
|
invalidName = 1;
|
|
}
|
|
Syntax::FunctionClass functionClass;
|
|
switch (parm.type) {
|
|
case SdParam::reservedName + Sd::rFUNCHAR:
|
|
functionClass = Syntax::cFUNCHAR;
|
|
break;
|
|
case SdParam::reservedName + Sd::rMSICHAR:
|
|
haveMsichar = 1;
|
|
functionClass = Syntax::cMSICHAR;
|
|
break;
|
|
case SdParam::reservedName + Sd::rMSOCHAR:
|
|
haveMsochar = 1;
|
|
functionClass = Syntax::cMSOCHAR;
|
|
break;
|
|
case SdParam::reservedName + Sd::rMSSCHAR:
|
|
functionClass = Syntax::cMSSCHAR;
|
|
break;
|
|
case SdParam::reservedName + Sd::rSEPCHAR:
|
|
functionClass = Syntax::cSEPCHAR;
|
|
break;
|
|
default:
|
|
CANNOT_HAPPEN();
|
|
}
|
|
if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
|
|
return 0;
|
|
Char c;
|
|
if (translateSyntax(sdBuilder, parm.n, c)
|
|
&& checkNotFunction(*sdBuilder.syntax, c)
|
|
&& !invalidName) {
|
|
Char tem;
|
|
if (sdBuilder.syntax->lookupFunctionChar(name, &tem))
|
|
message(ParserMessages::duplicateFunctionName, StringMessageArg(name));
|
|
else
|
|
sdBuilder.syntax->addFunctionChar(name, functionClass, c);
|
|
}
|
|
}
|
|
if (haveMsochar && !haveMsichar)
|
|
message(ParserMessages::msocharRequiresMsichar);
|
|
return 1;
|
|
}
|
|
|
|
Boolean Parser::sdParseNaming(SdBuilder &sdBuilder, SdParam &parm)
|
|
{
|
|
static Sd::ReservedName keys[4] = {
|
|
Sd::rUCNMSTRT, Sd::rLCNMCHAR, Sd::rUCNMCHAR, Sd::rNAMECASE
|
|
};
|
|
int isNamechar = 0;
|
|
ISet<Char> nameStartChar;
|
|
ISet<Char> nameChar;
|
|
do {
|
|
String<SyntaxChar> lc;
|
|
Vector<size_t> rangeIndex;
|
|
Boolean first = 1;
|
|
Boolean allowThrough = 0;
|
|
for (;;) {
|
|
if (!parseSdParam(sdBuilder.externalSyntax
|
|
? AllowedSdParams(SdParam::reservedName
|
|
+ keys[isNamechar * 2],
|
|
SdParam::paramLiteral,
|
|
SdParam::number,
|
|
SdParam::ellipsis)
|
|
: (first
|
|
? AllowedSdParams(SdParam::paramLiteral)
|
|
: AllowedSdParams(SdParam::reservedName
|
|
+ keys[isNamechar * 2])),
|
|
parm))
|
|
return 0;
|
|
first = 0;
|
|
Boolean wasRange = 0;
|
|
sdParamConvertToLiteral(parm);
|
|
if (parm.type == SdParam::ellipsis) {
|
|
if (!allowThrough)
|
|
message(ParserMessages::sdInvalidEllipsis);
|
|
if (!parseSdParam(AllowedSdParams(SdParam::paramLiteral,
|
|
SdParam::number),
|
|
parm))
|
|
return 0;
|
|
sdParamConvertToLiteral(parm);
|
|
if (parm.paramLiteralText.size() == 0)
|
|
message(ParserMessages::sdInvalidEllipsis);
|
|
else if (allowThrough) {
|
|
SyntaxChar n = parm.paramLiteralText[0];
|
|
if (n < lc[lc.size() - 1])
|
|
message(ParserMessages::sdInvalidRange);
|
|
else if (n > lc[lc.size() - 1] + 1)
|
|
rangeIndex.push_back(lc.size() - 1);
|
|
}
|
|
wasRange = 1;
|
|
}
|
|
if (parm.type != SdParam::paramLiteral)
|
|
break;
|
|
lc += parm.paramLiteralText;
|
|
allowThrough = (parm.paramLiteralText.size() - wasRange) > 0;
|
|
}
|
|
size_t lcPos = 0;
|
|
size_t rangeIndexPos = 0;
|
|
unsigned long rangeLeft = 0;
|
|
SyntaxChar nextRangeChar;
|
|
ISet<Char> &set = isNamechar ? nameChar : nameStartChar;
|
|
String<SyntaxChar> chars;
|
|
Boolean runOut = 0;
|
|
first = 1;
|
|
for (;;) {
|
|
if (!parseSdParam(sdBuilder.externalSyntax
|
|
? AllowedSdParams(SdParam::reservedName
|
|
+ keys[isNamechar * 2 + 1],
|
|
SdParam::paramLiteral,
|
|
SdParam::number,
|
|
SdParam::ellipsis)
|
|
: (first
|
|
? AllowedSdParams(SdParam::paramLiteral)
|
|
: AllowedSdParams(SdParam::reservedName
|
|
+ keys[isNamechar * 2 + 1])),
|
|
parm))
|
|
return 0;
|
|
sdParamConvertToLiteral(parm);
|
|
first = 0;
|
|
Boolean isRange = parm.type == SdParam::ellipsis;
|
|
size_t nChars = chars.size();
|
|
if (nChars)
|
|
nChars -= isRange;
|
|
for (size_t i = 0; i < nChars; i++) {
|
|
if (rangeLeft == 0
|
|
&& rangeIndexPos < rangeIndex.size()
|
|
&& rangeIndex[rangeIndexPos] == lcPos) {
|
|
rangeLeft = 1 + lc[lcPos + 1] - lc[lcPos];
|
|
nextRangeChar = lc[lcPos];
|
|
lcPos += 2;
|
|
rangeIndexPos += 1;
|
|
}
|
|
Char c;
|
|
if (rangeLeft > 0) {
|
|
rangeLeft--;
|
|
c = nextRangeChar++;
|
|
}
|
|
else if (lcPos < lc.size())
|
|
c = lc[lcPos++];
|
|
else {
|
|
runOut = 1;
|
|
c = chars[i];
|
|
}
|
|
// map from c to chars[i]
|
|
Char transLc, transUc;
|
|
if (translateSyntax(sdBuilder, c, transLc)
|
|
&& translateSyntax(sdBuilder, chars[i], transUc)) {
|
|
set.add(transLc);
|
|
if (transLc != transUc) {
|
|
set.add(transUc);
|
|
sdBuilder.syntax->addSubst(transLc, transUc);
|
|
}
|
|
}
|
|
}
|
|
if (isRange) {
|
|
if (!parseSdParam(AllowedSdParams(SdParam::paramLiteral,
|
|
SdParam::number),
|
|
parm))
|
|
return 0;
|
|
sdParamConvertToLiteral(parm);
|
|
if (chars.size() == 0 || parm.paramLiteralText.size() == 0)
|
|
message(ParserMessages::sdInvalidEllipsis);
|
|
else {
|
|
SyntaxChar start = chars[chars.size() - 1];
|
|
SyntaxChar end = parm.paramLiteralText[0];
|
|
if (start > end)
|
|
message(ParserMessages::sdInvalidRange);
|
|
else {
|
|
size_t count = end + 1 - start;
|
|
while (count > 0) {
|
|
if (rangeLeft == 0
|
|
&& rangeIndexPos < rangeIndex.size()
|
|
&& rangeIndex[rangeIndexPos] == lcPos) {
|
|
rangeLeft = 1 + lc[lcPos + 1] - lc[lcPos];
|
|
nextRangeChar = lc[lcPos];
|
|
lcPos += 2;
|
|
rangeIndexPos += 1;
|
|
}
|
|
Char c;
|
|
if (rangeLeft > 0) {
|
|
rangeLeft--;
|
|
c = nextRangeChar++;
|
|
}
|
|
else if (lcPos < lc.size())
|
|
c = lc[lcPos++];
|
|
else {
|
|
c = start;
|
|
runOut = 1;
|
|
}
|
|
if (c == start && count > 1 && (runOut || rangeLeft > 0)) {
|
|
size_t n;
|
|
if (runOut)
|
|
n = count;
|
|
else if (rangeLeft < count)
|
|
n = rangeLeft + 1;
|
|
else
|
|
n = count;
|
|
translateRange(sdBuilder, start, start + (count - 1), set);
|
|
count -= n;
|
|
start += n;
|
|
}
|
|
else {
|
|
Char transLc, transUc;
|
|
if (translateSyntax(sdBuilder, c, transLc)
|
|
&& translateSyntax(sdBuilder, start, transUc)) {
|
|
set.add(transLc);
|
|
if (transLc != transUc) {
|
|
set.add(transUc);
|
|
sdBuilder.syntax->addSubst(transLc, transUc);
|
|
}
|
|
}
|
|
count--;
|
|
start++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
chars.resize(0);
|
|
if (parm.type != SdParam::paramLiteral)
|
|
break;
|
|
chars.append(parm.paramLiteralText.data() + 1,
|
|
parm.paramLiteralText.size() - 1);
|
|
}
|
|
else if (parm.type == SdParam::paramLiteral)
|
|
parm.paramLiteralText.swap(chars);
|
|
else
|
|
break;
|
|
}
|
|
if ((runOut && !sdBuilder.externalSyntax)
|
|
|| rangeLeft > 0 || lcPos < lc.size())
|
|
message(isNamechar
|
|
? ParserMessages::nmcharLength
|
|
: ParserMessages::nmstrtLength);
|
|
if (!checkNmchars(set, *sdBuilder.syntax))
|
|
sdBuilder.valid = 0;
|
|
} while (!isNamechar++);
|
|
ISet<WideChar> bad;
|
|
intersectCharSets(nameStartChar, nameChar, bad);
|
|
if (!bad.isEmpty()) {
|
|
sdBuilder.valid = 0;
|
|
message(ParserMessages::nmcharNmstrt, CharsetMessageArg(bad));
|
|
}
|
|
sdBuilder.syntax->addNameStartCharacters(nameStartChar);
|
|
sdBuilder.syntax->addNameCharacters(nameChar);
|
|
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rGENERAL),
|
|
parm))
|
|
return 0;
|
|
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNO,
|
|
SdParam::reservedName + Sd::rYES),
|
|
parm))
|
|
return 0;
|
|
sdBuilder.syntax->setNamecaseGeneral(parm.type
|
|
== SdParam::reservedName + Sd::rYES);
|
|
|
|
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rENTITY),
|
|
parm))
|
|
return 0;
|
|
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNO,
|
|
SdParam::reservedName + Sd::rYES),
|
|
parm))
|
|
return 0;
|
|
sdBuilder.syntax->setNamecaseEntity(parm.type
|
|
== SdParam::reservedName + Sd::rYES);
|
|
return 1;
|
|
}
|
|
|
|
Boolean Parser::checkNmchars(const ISet<Char> &set, const Syntax &syntax)
|
|
{
|
|
Boolean valid = 1;
|
|
ISet<WideChar> bad;
|
|
intersectCharSets(set, *syntax.charSet(Syntax::nameStart), bad);
|
|
if (!bad.isEmpty()) {
|
|
message(ParserMessages::nmcharLetter, CharsetMessageArg(bad));
|
|
valid = 0;
|
|
bad.clear();
|
|
}
|
|
intersectCharSets(set, *syntax.charSet(Syntax::digit), bad);
|
|
if (!bad.isEmpty()) {
|
|
message(ParserMessages::nmcharDigit, CharsetMessageArg(bad));
|
|
valid = 0;
|
|
bad.clear();
|
|
}
|
|
Char funChar;
|
|
if (syntax.getStandardFunction(Syntax::fRE, funChar)
|
|
&& set.contains(funChar)) {
|
|
message(ParserMessages::nmcharRe, NumberMessageArg(funChar));
|
|
valid = 0;
|
|
}
|
|
if (syntax.getStandardFunction(Syntax::fRS, funChar)
|
|
&& set.contains(funChar)) {
|
|
message(ParserMessages::nmcharRs, NumberMessageArg(funChar));
|
|
valid = 0;
|
|
}
|
|
if (syntax.getStandardFunction(Syntax::fSPACE, funChar)
|
|
&& set.contains(funChar)) {
|
|
message(ParserMessages::nmcharSpace, NumberMessageArg(funChar));
|
|
valid = 0;
|
|
}
|
|
intersectCharSets(set, *syntax.charSet(Syntax::sepchar), bad);
|
|
if (!bad.isEmpty()) {
|
|
message(ParserMessages::nmcharSepchar, CharsetMessageArg(bad));
|
|
valid = 0;
|
|
}
|
|
return valid;
|
|
}
|
|
|
|
// Result is a ISet<WideChar>, so it can be used with CharsetMessageArg.
|
|
|
|
void Parser::intersectCharSets(const ISet<Char> &s1, const ISet<Char> &s2,
|
|
ISet<WideChar> &inter)
|
|
{
|
|
ISetIter<Char> i1(s1);
|
|
ISetIter<Char> i2(s2);
|
|
Char min1, max1, min2, max2;
|
|
if (!i1.next(min1, max1))
|
|
return;
|
|
if (!i2.next(min2, max2))
|
|
return;
|
|
for (;;) {
|
|
if (max1 < min2) {
|
|
if (!i1.next(min1, max1))
|
|
break;
|
|
}
|
|
else if (max2 < min1) {
|
|
if (!i2.next(min2, max2))
|
|
break;
|
|
}
|
|
else {
|
|
// min2 <= max1
|
|
// min1 <= max2
|
|
Char min = min1 > min2 ? min1 : min2;
|
|
Char max = max1 < max2 ? max1 : max2;
|
|
inter.addRange(min, max);
|
|
if (!i1.next(min1, max1))
|
|
break;
|
|
if (!i2.next(min2, max2))
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
Boolean Parser::sdParseDelim(SdBuilder &sdBuilder, SdParam &parm)
|
|
{
|
|
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rDELIM),
|
|
parm))
|
|
return 0;
|
|
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rGENERAL),
|
|
parm))
|
|
return 0;
|
|
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSGMLREF),
|
|
parm))
|
|
return 0;
|
|
PackedBoolean delimGeneralSpecified[Syntax::nDelimGeneral];
|
|
for (int i = 0; i < Syntax::nDelimGeneral; i++)
|
|
delimGeneralSpecified[i] = 0;
|
|
for (;;) {
|
|
if (!parseSdParam(AllowedSdParams(SdParam::generalDelimiterName,
|
|
SdParam::reservedName + Sd::rSHORTREF),
|
|
parm))
|
|
return 0;
|
|
if (parm.type == SdParam::reservedName + Sd::rSHORTREF)
|
|
break;
|
|
Syntax::DelimGeneral delimGeneral = parm.delimGeneralIndex;
|
|
if (delimGeneralSpecified[delimGeneral])
|
|
message(ParserMessages::duplicateDelimGeneral,
|
|
StringMessageArg(sd().generalDelimiterName(delimGeneral)));
|
|
if (!parseSdParam(sdBuilder.externalSyntax
|
|
? AllowedSdParams(SdParam::paramLiteral,
|
|
SdParam::number)
|
|
: AllowedSdParams(SdParam::paramLiteral),
|
|
parm))
|
|
return 0;
|
|
sdParamConvertToLiteral(parm);
|
|
StringC str;
|
|
if (parm.paramLiteralText.size() == 0)
|
|
message(ParserMessages::sdEmptyDelimiter);
|
|
else if (translateSyntax(sdBuilder, parm.paramLiteralText, str)) {
|
|
const SubstTable<Char> *table = sdBuilder.syntax->generalSubstTable();
|
|
for (size_t i = 0; i < str.size(); i++)
|
|
table->subst(str[i]);
|
|
if (checkGeneralDelim(*sdBuilder.syntax, str)
|
|
&& !delimGeneralSpecified[delimGeneral])
|
|
sdBuilder.syntax->setDelimGeneral(delimGeneral, str);
|
|
else
|
|
sdBuilder.valid = 0;
|
|
}
|
|
delimGeneralSpecified[delimGeneral] = 1;
|
|
}
|
|
if (!setRefDelimGeneral(*sdBuilder.syntax,
|
|
sdBuilder.syntaxCharset,
|
|
sdBuilder.sd->docCharset(),
|
|
sdBuilder.switcher))
|
|
sdBuilder.valid = 0;
|
|
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSGMLREF,
|
|
SdParam::reservedName + Sd::rNONE),
|
|
parm))
|
|
return 0;
|
|
if (parm.type == SdParam::reservedName + Sd::rSGMLREF) {
|
|
if (!addRefDelimShortref(*sdBuilder.syntax,
|
|
sdBuilder.syntaxCharset,
|
|
sdBuilder.sd->docCharset(),
|
|
sdBuilder.switcher))
|
|
sdBuilder.valid = 0;
|
|
}
|
|
String<SyntaxChar> lastLiteral;
|
|
for (;;) {
|
|
if (!parseSdParam(sdBuilder.externalSyntax
|
|
? AllowedSdParams(SdParam::paramLiteral,
|
|
SdParam::number,
|
|
SdParam::ellipsis,
|
|
SdParam::reservedName + Sd::rNAMES)
|
|
: AllowedSdParams(SdParam::paramLiteral,
|
|
SdParam::reservedName + Sd::rNAMES),
|
|
parm))
|
|
return 0;
|
|
sdParamConvertToLiteral(parm);
|
|
if (parm.type == SdParam::ellipsis) {
|
|
if (!parseSdParam(AllowedSdParams(SdParam::paramLiteral,
|
|
SdParam::number),
|
|
parm))
|
|
return 0;
|
|
sdParamConvertToLiteral(parm);
|
|
if (parm.paramLiteralText.size() == 0)
|
|
message(ParserMessages::sdEmptyDelimiter);
|
|
else if (lastLiteral.size() != 1
|
|
|| parm.paramLiteralText.size() != 1)
|
|
message(ParserMessages::sdInvalidEllipsis);
|
|
else if (parm.paramLiteralText[0] < lastLiteral[0])
|
|
message(ParserMessages::sdInvalidRange);
|
|
else if (parm.paramLiteralText[0] != lastLiteral[0]) {
|
|
ISet<Char> shortrefChars;
|
|
translateRange(sdBuilder,
|
|
lastLiteral[0] + 1,
|
|
parm.paramLiteralText[0],
|
|
shortrefChars);
|
|
ISet<WideChar> duplicates;
|
|
intersectCharSets(shortrefChars,
|
|
sdBuilder.syntax->delimShortrefSimple(),
|
|
duplicates);
|
|
int nComplexShortrefs = sdBuilder.syntax->nDelimShortrefComplex();
|
|
for (int i = 0; i < nComplexShortrefs; i++) {
|
|
const StringC &delim = sdBuilder.syntax->delimShortrefComplex(i);
|
|
if (delim.size() == 1 && shortrefChars.contains(delim[0]))
|
|
duplicates.add(delim[0]);
|
|
}
|
|
if (!duplicates.isEmpty())
|
|
message(ParserMessages::duplicateDelimShortrefSet,
|
|
CharsetMessageArg(duplicates));
|
|
sdBuilder.syntax->addDelimShortrefs(shortrefChars,
|
|
sdBuilder.sd->docCharset());
|
|
}
|
|
lastLiteral.resize(0);
|
|
}
|
|
else if (parm.type == SdParam::paramLiteral) {
|
|
parm.paramLiteralText.swap(lastLiteral);
|
|
StringC str;
|
|
if (lastLiteral.size() == 0)
|
|
message(ParserMessages::sdEmptyDelimiter);
|
|
else if (translateSyntax(sdBuilder, lastLiteral, str)) {
|
|
const SubstTable<Char> *table = sdBuilder.syntax->generalSubstTable();
|
|
for (size_t i = 0; i < str.size(); i++)
|
|
table->subst(str[i]);
|
|
if (str.size() == 1
|
|
|| checkShortrefDelim(*sdBuilder.syntax,
|
|
sdBuilder.sd->docCharset(),
|
|
str)) {
|
|
if (sdBuilder.syntax->isValidShortref(str))
|
|
message(ParserMessages::duplicateDelimShortref,
|
|
StringMessageArg(str));
|
|
else
|
|
sdBuilder.syntax->addDelimShortref(str,
|
|
sdBuilder.sd->docCharset());
|
|
}
|
|
}
|
|
}
|
|
else
|
|
break;
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
Boolean Parser::sdParseNames(SdBuilder &sdBuilder, SdParam &parm)
|
|
{
|
|
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSGMLREF),
|
|
parm))
|
|
return 0;
|
|
for (;;) {
|
|
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rQUANTITY,
|
|
SdParam::referenceReservedName),
|
|
parm))
|
|
return 0;
|
|
if (parm.type == SdParam::reservedName + Sd::rQUANTITY)
|
|
break;
|
|
Syntax::ReservedName reservedName = parm.reservedNameIndex;
|
|
if (!parseSdParam(sdBuilder.externalSyntax
|
|
? AllowedSdParams(SdParam::name, SdParam::paramLiteral)
|
|
: AllowedSdParams(SdParam::name),
|
|
parm))
|
|
return 0;
|
|
StringC transName;
|
|
if (parm.type == SdParam::name
|
|
? translateName(sdBuilder, parm.token, transName)
|
|
: translateSyntax(sdBuilder, parm.paramLiteralText, transName)) {
|
|
Syntax::ReservedName tem;
|
|
if (sdBuilder.syntax->lookupReservedName(transName, &tem))
|
|
message(ParserMessages::ambiguousReservedName,
|
|
StringMessageArg(transName));
|
|
else {
|
|
if (transName.size() == 0
|
|
|| !sdBuilder.syntax->isNameStartCharacter(transName[0])) {
|
|
message(ParserMessages::reservedNameSyntax,
|
|
StringMessageArg(transName));
|
|
transName.resize(0);
|
|
}
|
|
size_t i;
|
|
// Check that its a valid name in the declared syntax
|
|
// (- and . might not be name characters).
|
|
for (i = 1; i < transName.size(); i++)
|
|
if (!sdBuilder.syntax->isNameCharacter(transName[i])) {
|
|
message(ParserMessages::reservedNameSyntax,
|
|
StringMessageArg(transName));
|
|
transName.resize(0);
|
|
break;
|
|
}
|
|
for (i = 0; i < transName.size(); i++)
|
|
sdBuilder.syntax->generalSubstTable()->subst(transName[i]);
|
|
if (sdBuilder.syntax->reservedName(reservedName).size() > 0)
|
|
message(ParserMessages::duplicateReservedName,
|
|
StringMessageArg(syntax().reservedName(reservedName)));
|
|
else if (transName.size() > 0)
|
|
sdBuilder.syntax->setName(reservedName, transName);
|
|
else
|
|
sdBuilder.valid = 0;
|
|
}
|
|
}
|
|
}
|
|
setRefNames(*sdBuilder.syntax, sdBuilder.sd->docCharset());
|
|
static Syntax::ReservedName functionNameIndex[3] = {
|
|
Syntax::rRE, Syntax::rRS, Syntax::rSPACE
|
|
};
|
|
for (int i = 0; i < 3; i++) {
|
|
const StringC &functionName
|
|
= sdBuilder.syntax->reservedName(functionNameIndex[i]);
|
|
Char tem;
|
|
if (sdBuilder.syntax->lookupFunctionChar(functionName, &tem))
|
|
message(ParserMessages::duplicateFunctionName, StringMessageArg(functionName));
|
|
}
|
|
sdBuilder.syntax->enterStandardFunctionNames();
|
|
return 1;
|
|
}
|
|
|
|
Boolean Parser::sdParseQuantity(SdBuilder &sdBuilder, SdParam &parm)
|
|
{
|
|
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSGMLREF),
|
|
parm))
|
|
return 0;
|
|
for (;;) {
|
|
int final = (sdBuilder.externalSyntax
|
|
? int(SdParam::eE)
|
|
: SdParam::reservedName + Sd::rFEATURES);
|
|
if (!parseSdParam(AllowedSdParams(SdParam::quantityName, final), parm))
|
|
return 0;
|
|
if (parm.type != SdParam::quantityName)
|
|
break;
|
|
Syntax::Quantity quantity = parm.quantityIndex;
|
|
if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
|
|
return 0;
|
|
sdBuilder.syntax->setQuantity(quantity, parm.n);
|
|
}
|
|
if (sdBuilder.sd->scopeInstance()) {
|
|
for (int i = 0; i < Syntax::nQuantity; i++)
|
|
if (sdBuilder.syntax->quantity(Syntax::Quantity(i))
|
|
< syntax().quantity(Syntax::Quantity(i)))
|
|
message(ParserMessages::scopeInstanceQuantity,
|
|
StringMessageArg(sd().quantityName(Syntax::Quantity(i))));
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
Boolean Parser::sdParseFeatures(SdBuilder &sdBuilder, SdParam &parm)
|
|
{
|
|
struct FeatureInfo {
|
|
Sd::ReservedName name;
|
|
enum {
|
|
__none,
|
|
__boolean,
|
|
__number
|
|
} arg;
|
|
};
|
|
static FeatureInfo features[] = {
|
|
{ Sd::rMINIMIZE, FeatureInfo::__none },
|
|
{ Sd::rDATATAG, FeatureInfo::__boolean },
|
|
{ Sd::rOMITTAG, FeatureInfo::__boolean },
|
|
{ Sd::rRANK, FeatureInfo::__boolean },
|
|
{ Sd::rSHORTTAG, FeatureInfo::__boolean },
|
|
{ Sd::rLINK, FeatureInfo::__none },
|
|
{ Sd::rSIMPLE, FeatureInfo::__number },
|
|
{ Sd::rIMPLICIT, FeatureInfo::__boolean },
|
|
{ Sd::rEXPLICIT, FeatureInfo::__number },
|
|
{ Sd::rOTHER, FeatureInfo::__none },
|
|
{ Sd::rCONCUR, FeatureInfo::__number },
|
|
{ Sd::rSUBDOC, FeatureInfo::__number },
|
|
{ Sd::rFORMAL, FeatureInfo::__boolean }
|
|
};
|
|
int booleanFeature = 0;
|
|
int numberFeature = 0;
|
|
for (size_t i = 0; i < SIZEOF(features); i++) {
|
|
if (!parseSdParam(AllowedSdParams(SdParam::reservedName
|
|
+ features[i].name), parm))
|
|
return 0;
|
|
if (features[i].arg != FeatureInfo::__none) {
|
|
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNO,
|
|
SdParam::reservedName + Sd::rYES),
|
|
parm))
|
|
return 0;
|
|
#if 0
|
|
if (features[i].name == Sd::rDATATAG
|
|
&& parm.type == (SdParam::reservedName + Sd::rYES))
|
|
message(ParserMessages::datatagNotImplemented);
|
|
#endif
|
|
if (features[i].arg == FeatureInfo::__number) {
|
|
if (parm.type == SdParam::reservedName + Sd::rYES) {
|
|
if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
|
|
return 0;
|
|
sdBuilder.sd->setNumberFeature(Sd::NumberFeature(numberFeature++),
|
|
parm.n);
|
|
}
|
|
else
|
|
sdBuilder.sd->setNumberFeature(Sd::NumberFeature(numberFeature++),
|
|
0);
|
|
}
|
|
else
|
|
sdBuilder.sd->setBooleanFeature(Sd::BooleanFeature(booleanFeature++),
|
|
parm.type == (SdParam::reservedName
|
|
+ Sd::rYES));
|
|
}
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
Boolean Parser::sdParseAppinfo(SdBuilder &, SdParam &parm)
|
|
{
|
|
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rAPPINFO),
|
|
parm))
|
|
return 0;
|
|
Location location(currentLocation());
|
|
if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNONE,
|
|
SdParam::minimumLiteral),
|
|
parm))
|
|
return 0;
|
|
AppinfoEvent *event;
|
|
if (parm.type == SdParam::minimumLiteral)
|
|
event = new (eventAllocator()) AppinfoEvent(parm.literalText, location);
|
|
else
|
|
event = new (eventAllocator()) AppinfoEvent(location);
|
|
eventHandler().appinfo(event);
|
|
return 1;
|
|
}
|
|
|
|
Boolean Parser::translateSyntax(CharSwitcher &switcher,
|
|
const CharsetInfo &syntaxCharset,
|
|
const CharsetInfo &docCharset,
|
|
WideChar syntaxChar,
|
|
Char &docChar)
|
|
{
|
|
syntaxChar = switcher.subst(syntaxChar);
|
|
UnivChar univChar;
|
|
if (syntaxCharset.descToUniv(syntaxChar, univChar)
|
|
&& univToDescCheck(docCharset, univChar, docChar))
|
|
return 1;
|
|
message(ParserMessages::translateSyntaxChar, NumberMessageArg(syntaxChar));
|
|
return 0;
|
|
}
|
|
|
|
void Parser::translateRange(SdBuilder &sdBuilder, SyntaxChar start,
|
|
SyntaxChar end, ISet<Char> &chars)
|
|
{
|
|
#if 0
|
|
do {
|
|
Char docChar;
|
|
if (!translateSyntax(sdBuilder, start, docChar))
|
|
break;
|
|
chars.add(docChar);
|
|
} while (start++ != end);
|
|
#endif
|
|
for (;;) {
|
|
SyntaxChar doneUpTo = end;
|
|
Boolean gotSwitch = 0;
|
|
WideChar firstSwitch;
|
|
for (size_t i = 0; i < sdBuilder.switcher.nSwitches(); i++) {
|
|
WideChar c = sdBuilder.switcher.switchFrom(i);
|
|
if (start <= c && c <= end) {
|
|
if (!gotSwitch) {
|
|
gotSwitch = 1;
|
|
firstSwitch = c;
|
|
}
|
|
else if (c < firstSwitch)
|
|
firstSwitch = c;
|
|
}
|
|
}
|
|
if (gotSwitch && firstSwitch == start) {
|
|
doneUpTo = start;
|
|
Char docChar;
|
|
if (translateSyntax(sdBuilder, start, docChar))
|
|
chars.add(docChar);
|
|
}
|
|
else {
|
|
if (gotSwitch)
|
|
doneUpTo = firstSwitch - 1;
|
|
Char docChar;
|
|
Number count;
|
|
if (translateSyntaxNoSwitch(sdBuilder, start, docChar, count)) {
|
|
if (count - 1 < doneUpTo - start)
|
|
doneUpTo = start + (count - 1);
|
|
chars.addRange(docChar, docChar + (doneUpTo - start));
|
|
}
|
|
}
|
|
if (doneUpTo == end)
|
|
break;
|
|
start = doneUpTo + 1;
|
|
}
|
|
}
|
|
|
|
Boolean Parser::translateSyntax(SdBuilder &sdBuilder,
|
|
WideChar syntaxChar, Char &docChar)
|
|
{
|
|
Number count;
|
|
return translateSyntaxNoSwitch(sdBuilder,
|
|
sdBuilder.switcher.subst(syntaxChar),
|
|
docChar,
|
|
count);
|
|
}
|
|
|
|
Boolean Parser::translateSyntaxNoSwitch(SdBuilder &sdBuilder,
|
|
WideChar syntaxChar, Char &docChar,
|
|
Number &count)
|
|
{
|
|
Number n;
|
|
StringC str;
|
|
CharsetDeclRange::Type type;
|
|
const PublicId *id;
|
|
if (sdBuilder.syntaxCharsetDecl.getCharInfo(syntaxChar,
|
|
id,
|
|
type,
|
|
n,
|
|
str,
|
|
count)) {
|
|
ISet<WideChar> docChars;
|
|
switch (type) {
|
|
case CharsetDeclRange::unused:
|
|
break;
|
|
case CharsetDeclRange::string:
|
|
sdBuilder.sd->docCharsetDecl().stringToChar(str, docChars);
|
|
break;
|
|
case CharsetDeclRange::number:
|
|
{
|
|
Number count2;
|
|
sdBuilder.sd->docCharsetDecl().numberToChar(id, n, docChars, count2);
|
|
if (!docChars.isEmpty() && count2 < count)
|
|
count = count2;
|
|
}
|
|
break;
|
|
default:
|
|
CANNOT_HAPPEN();
|
|
}
|
|
if (!docChars.isEmpty()) {
|
|
if (!docChars.isSingleton() && options().warnSgmlDecl)
|
|
message(ParserMessages::ambiguousDocCharacter,
|
|
CharsetMessageArg(docChars));
|
|
ISetIter<WideChar> iter(docChars);
|
|
WideChar min, max;
|
|
if (iter.next(min, max) && min <= charMax) {
|
|
docChar = Char(min);
|
|
return 1;
|
|
}
|
|
}
|
|
}
|
|
UnivChar univChar;
|
|
WideChar alsoMax, count2;
|
|
if (sdBuilder.syntaxCharset.descToUniv(syntaxChar, univChar, alsoMax)
|
|
&& univToDescCheck(sdBuilder.sd->docCharset(), univChar, docChar,
|
|
count2)) {
|
|
count = (alsoMax - syntaxChar) + 1;
|
|
if (count2 < count)
|
|
count = count2;
|
|
return 1;
|
|
}
|
|
sdBuilder.valid = 0;
|
|
message(ParserMessages::translateSyntaxChar, NumberMessageArg(syntaxChar));
|
|
return 0;
|
|
}
|
|
|
|
|
|
Boolean Parser::translateSyntax(SdBuilder &sdBuilder,
|
|
const String<SyntaxChar> &syntaxString,
|
|
StringC &docString)
|
|
{
|
|
docString.resize(0);
|
|
int ret = 1;
|
|
for (size_t i = 0; i < syntaxString.size(); i++) {
|
|
Char c;
|
|
if (translateSyntax(sdBuilder, syntaxString[i], c))
|
|
docString += c;
|
|
else
|
|
ret = 0;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
Boolean Parser::translateName(SdBuilder &sdBuilder,
|
|
const StringC &name,
|
|
StringC &str)
|
|
{
|
|
str.resize(name.size());
|
|
for (size_t i = 0; i < name.size(); i++) {
|
|
UnivChar univChar;
|
|
Boolean ret = sd().docCharset().descToUniv(name[i], univChar);
|
|
// Might switch hyphen or period.
|
|
univChar = translateUniv(univChar, sdBuilder.switcher,
|
|
sdBuilder.syntaxCharset);
|
|
ASSERT(ret != 0);
|
|
if (!univToDescCheck(sdBuilder.sd->docCharset(), univChar, str[i])) {
|
|
message(ParserMessages::translateDocChar, NumberMessageArg(univChar));
|
|
sdBuilder.valid = 0;
|
|
return 0;
|
|
}
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
UnivChar Parser::translateUniv(UnivChar univChar,
|
|
CharSwitcher &switcher,
|
|
const CharsetInfo &syntaxCharset)
|
|
{
|
|
WideChar syntaxChar;
|
|
ISet<WideChar> syntaxChars;
|
|
if (syntaxCharset.univToDesc(univChar, syntaxChar, syntaxChars) != 1) {
|
|
message(ParserMessages::missingSyntaxChar,
|
|
NumberMessageArg(univChar));
|
|
return univChar;
|
|
}
|
|
SyntaxChar tem = switcher.subst(syntaxChar);
|
|
if (tem != syntaxChar && !syntaxCharset.descToUniv(tem, univChar))
|
|
message(ParserMessages::translateSyntaxChar, NumberMessageArg(tem));
|
|
return univChar;
|
|
}
|
|
|
|
Boolean Parser::checkNotFunction(const Syntax &syn, Char c)
|
|
{
|
|
if (syn.charSet(Syntax::functionChar)->contains(c)) {
|
|
message(ParserMessages::oneFunction, NumberMessageArg(c));
|
|
return 0;
|
|
}
|
|
else
|
|
return 1;
|
|
}
|
|
|
|
|
|
// Check that it has at most one B sequence and that it
|
|
// is not adjacent to a blank sequence.
|
|
|
|
Boolean Parser::checkShortrefDelim(const Syntax &syn,
|
|
const CharsetInfo &charset,
|
|
const StringC &delim)
|
|
{
|
|
Boolean hadB = 0;
|
|
Char letterB = charset.execToDesc('B');
|
|
const ISet<Char> *bSet = syn.charSet(Syntax::blank);
|
|
for (size_t i = 0; i < delim.size(); i++)
|
|
if (delim[i] == letterB) {
|
|
if (hadB) {
|
|
message(ParserMessages::multipleBSequence, StringMessageArg(delim));
|
|
return 0;
|
|
}
|
|
hadB = 1;
|
|
if (i > 0 && bSet->contains(delim[i - 1])) {
|
|
message(ParserMessages::blankAdjacentBSequence,
|
|
StringMessageArg(delim));
|
|
return 0;
|
|
}
|
|
while (i + 1 < delim.size() && delim[i + 1] == letterB)
|
|
i++;
|
|
if (i < delim.size() - 1 && bSet->contains(delim[i + 1])) {
|
|
message(ParserMessages::blankAdjacentBSequence,
|
|
StringMessageArg(delim));
|
|
return 0;
|
|
}
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
Boolean Parser::checkGeneralDelim(const Syntax &syn, const StringC &delim)
|
|
{
|
|
const ISet<Char> *functionSet = syn.charSet(Syntax::functionChar);
|
|
if (delim.size() > 0) {
|
|
Boolean allFunction = 1;
|
|
for (size_t i = 0; i < delim.size(); i++)
|
|
if (!functionSet->contains(delim[i]))
|
|
allFunction = 0;
|
|
if (allFunction) {
|
|
message(ParserMessages::generalDelimAllFunction,
|
|
StringMessageArg(delim));
|
|
return 0;
|
|
}
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
Boolean Parser::checkSwitches(CharSwitcher &switcher,
|
|
const CharsetInfo &syntaxCharset)
|
|
{
|
|
Boolean valid = 1;
|
|
for (size_t i = 0; i < switcher.nSwitches(); i++) {
|
|
WideChar c[2];
|
|
c[0] = switcher.switchFrom(i);
|
|
c[1] = switcher.switchTo(i);
|
|
for (int j = 0; j < 2; j++) {
|
|
UnivChar univChar;
|
|
if (syntaxCharset.descToUniv(c[j], univChar)) {
|
|
// Check that it is not Digit Lcletter or Ucletter
|
|
if ((UnivCharsetDesc::a <= univChar
|
|
&& univChar < UnivCharsetDesc::a + 26)
|
|
|| (UnivCharsetDesc::A <= univChar
|
|
&& univChar < UnivCharsetDesc::A + 26)
|
|
|| (UnivCharsetDesc::zero <= univChar
|
|
&& univChar < UnivCharsetDesc::zero + 10)) {
|
|
message(ParserMessages::switchLetterDigit,
|
|
NumberMessageArg(univChar));
|
|
valid = 0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return valid;
|
|
}
|
|
|
|
Boolean Parser::checkSwitchesMarkup(CharSwitcher &switcher)
|
|
{
|
|
Boolean valid = 1;
|
|
size_t nSwitches = switcher.nSwitches();
|
|
for (size_t i = 0; i < nSwitches; i++)
|
|
if (!switcher.switchUsed(i)) {
|
|
// If the switch wasn't used,
|
|
// then the character wasn't a markup character.
|
|
message(ParserMessages::switchNotMarkup,
|
|
NumberMessageArg(switcher.switchFrom(i)));
|
|
valid = 0;
|
|
}
|
|
return valid;
|
|
}
|
|
|
|
void Parser::checkSyntaxNamelen(const Syntax &syn)
|
|
{
|
|
size_t namelen = syn.namelen();
|
|
int i;
|
|
for (i = 0; i < Syntax::nDelimGeneral; i++)
|
|
if (syn.delimGeneral(i).size() > namelen)
|
|
message(ParserMessages::delimiterLength,
|
|
StringMessageArg(syn.delimGeneral(i)),
|
|
NumberMessageArg(namelen));
|
|
for (i = 0; i < syn.nDelimShortrefComplex(); i++)
|
|
if (syn.delimShortrefComplex(i).size() > namelen)
|
|
message(ParserMessages::delimiterLength,
|
|
StringMessageArg(syn.delimShortrefComplex(i)),
|
|
NumberMessageArg(namelen));
|
|
for (i = 0; i < Syntax::nNames; i++)
|
|
if (syn.reservedName(Syntax::ReservedName(i)).size() > namelen
|
|
&& options().warnSgmlDecl)
|
|
message(ParserMessages::reservedNameLength,
|
|
StringMessageArg(syn.reservedName(Syntax::ReservedName(i))),
|
|
NumberMessageArg(namelen));
|
|
}
|
|
|
|
Boolean Parser::univToDescCheck(const CharsetInfo &charset, UnivChar from,
|
|
Char &to)
|
|
{
|
|
WideChar count;
|
|
return univToDescCheck(charset, from, to, count);
|
|
}
|
|
|
|
Boolean Parser::univToDescCheck(const CharsetInfo &charset, UnivChar from,
|
|
Char &to, WideChar &count)
|
|
{
|
|
WideChar c;
|
|
ISet<WideChar> descSet;
|
|
unsigned ret = charset.univToDesc(from, c, descSet, count);
|
|
if (ret > 1) {
|
|
if (options().warnSgmlDecl)
|
|
message(ParserMessages::ambiguousDocCharacter,
|
|
CharsetMessageArg(descSet));
|
|
ret = 1;
|
|
}
|
|
if (ret && c <= charMax) {
|
|
to = Char(c);
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
Boolean Parser::parseSdParam(const AllowedSdParams &allow,
|
|
SdParam &parm)
|
|
{
|
|
for (;;) {
|
|
Token token = getToken(mdMode);
|
|
switch (token) {
|
|
case tokenUnrecognized:
|
|
if (reportNonSgmlCharacter())
|
|
break;
|
|
{
|
|
message(ParserMessages::markupDeclarationCharacter,
|
|
StringMessageArg(currentToken()),
|
|
AllowedSdParamsMessageArg(allow, sdPointer()));
|
|
}
|
|
return 0;
|
|
case tokenEe:
|
|
if (allow.param(SdParam::eE)) {
|
|
parm.type = SdParam::eE;
|
|
if (currentMarkup())
|
|
currentMarkup()->addEntityEnd();
|
|
popInputStack();
|
|
return 1;
|
|
}
|
|
message(ParserMessages::sdEntityEnd,
|
|
AllowedSdParamsMessageArg(allow, sdPointer()));
|
|
return 0;
|
|
case tokenS:
|
|
if (currentMarkup())
|
|
currentMarkup()->addS(currentChar());
|
|
break;
|
|
case tokenCom:
|
|
if (!parseComment(sdcomMode))
|
|
return 0;
|
|
break;
|
|
case tokenDso:
|
|
case tokenGrpo:
|
|
case tokenMinusGrpo:
|
|
case tokenPlusGrpo:
|
|
case tokenRni:
|
|
case tokenPeroNameStart:
|
|
case tokenPeroGrpo:
|
|
sdParamInvalidToken(token, allow);
|
|
return 0;
|
|
case tokenLcUcNmchar:
|
|
if (allow.param(SdParam::ellipsis)) {
|
|
extendNameToken(syntax().namelen(), ParserMessages::nameLength);
|
|
getCurrentToken(syntax().generalSubstTable(), parm.token);
|
|
if (parm.token == sd().execToDoc("...")) {
|
|
parm.type = SdParam::ellipsis;
|
|
return 1;
|
|
}
|
|
message(ParserMessages::sdInvalidNameToken,
|
|
StringMessageArg(parm.token),
|
|
AllowedSdParamsMessageArg(allow, sdPointer()));
|
|
}
|
|
else {
|
|
sdParamInvalidToken(token, allow);
|
|
return 0;
|
|
}
|
|
case tokenLita:
|
|
case tokenLit:
|
|
{
|
|
Boolean lita = (token == tokenLita);
|
|
if (allow.param(SdParam::minimumLiteral)) {
|
|
if (!parseMinimumLiteral(lita, parm.literalText))
|
|
return 0;
|
|
parm.type = SdParam::minimumLiteral;
|
|
if (currentMarkup())
|
|
currentMarkup()->addLiteral(parm.literalText);
|
|
}
|
|
else if (allow.param(SdParam::paramLiteral)) {
|
|
if (!parseSdParamLiteral(lita, parm.paramLiteralText))
|
|
return 0;
|
|
parm.type = SdParam::paramLiteral;
|
|
}
|
|
else {
|
|
sdParamInvalidToken(token, allow);
|
|
return 0;
|
|
}
|
|
return 1;
|
|
}
|
|
case tokenMdc:
|
|
if (allow.param(SdParam::mdc)) {
|
|
parm.type = SdParam::mdc;
|
|
if (currentMarkup())
|
|
currentMarkup()->addDelim(Syntax::dMDC);
|
|
return 1;
|
|
}
|
|
sdParamInvalidToken(tokenMdc, allow);
|
|
return 0;
|
|
case tokenNameStart:
|
|
{
|
|
extendNameToken(syntax().namelen(), ParserMessages::nameLength);
|
|
getCurrentToken(syntax().generalSubstTable(), parm.token);
|
|
if (allow.param(SdParam::capacityName)) {
|
|
if (sd().lookupCapacityName(parm.token, parm.capacityIndex)) {
|
|
parm.type = SdParam::capacityName;
|
|
if (currentMarkup())
|
|
currentMarkup()->addName(currentInput());
|
|
return 1;
|
|
}
|
|
}
|
|
if (allow.param(SdParam::referenceReservedName)) {
|
|
if (syntax().lookupReservedName(parm.token,
|
|
&parm.reservedNameIndex)) {
|
|
parm.type = SdParam::referenceReservedName;
|
|
if (currentMarkup())
|
|
currentMarkup()->addName(currentInput());
|
|
return 1;
|
|
}
|
|
}
|
|
if (allow.param(SdParam::generalDelimiterName)) {
|
|
if (sd().lookupGeneralDelimiterName(parm.token,
|
|
parm.delimGeneralIndex)) {
|
|
parm.type = SdParam::generalDelimiterName;
|
|
if (currentMarkup())
|
|
currentMarkup()->addName(currentInput());
|
|
return 1;
|
|
}
|
|
}
|
|
if (allow.param(SdParam::quantityName)) {
|
|
if (sd().lookupQuantityName(parm.token, parm.quantityIndex)) {
|
|
parm.type = SdParam::quantityName;
|
|
if (currentMarkup())
|
|
currentMarkup()->addName(currentInput());
|
|
return 1;
|
|
}
|
|
}
|
|
for (int i = 0;; i++) {
|
|
SdParam::Type t = allow.get(i);
|
|
if (t == SdParam::invalid)
|
|
break;
|
|
if (t >= SdParam::reservedName) {
|
|
Sd::ReservedName sdReservedName
|
|
= Sd::ReservedName(t - SdParam::reservedName);
|
|
if (parm.token == sd().reservedName(sdReservedName)) {
|
|
parm.type = t;
|
|
if (currentMarkup())
|
|
currentMarkup()->addSdReservedName(sdReservedName,
|
|
currentInput());
|
|
return 1;
|
|
}
|
|
}
|
|
}
|
|
if (allow.param(SdParam::name)) {
|
|
parm.type = SdParam::name;
|
|
if (currentMarkup())
|
|
currentMarkup()->addName(currentInput());
|
|
return 1;
|
|
}
|
|
{
|
|
message(ParserMessages::sdInvalidNameToken,
|
|
StringMessageArg(parm.token),
|
|
AllowedSdParamsMessageArg(allow, sdPointer()));
|
|
}
|
|
return 0;
|
|
}
|
|
case tokenDigit:
|
|
if (allow.param(SdParam::number)) {
|
|
extendNumber(syntax().namelen(), ParserMessages::numberLength);
|
|
parm.type = SdParam::number;
|
|
unsigned long n;
|
|
if (!stringToNumber(currentInput()->currentTokenStart(),
|
|
currentInput()->currentTokenLength(),
|
|
n)
|
|
|| n > Number(-1)) {
|
|
message(ParserMessages::numberTooBig,
|
|
StringMessageArg(currentToken()));
|
|
parm.n = Number(-1);
|
|
}
|
|
else {
|
|
if (currentMarkup())
|
|
currentMarkup()->addNumber(currentInput());
|
|
parm.n = Number(n);
|
|
}
|
|
Token token = getToken(mdMode);
|
|
if (token == tokenNameStart)
|
|
message(ParserMessages::psRequired);
|
|
currentInput()->ungetToken();
|
|
return 1;
|
|
}
|
|
sdParamInvalidToken(tokenDigit, allow);
|
|
return 0;
|
|
default:
|
|
CANNOT_HAPPEN();
|
|
}
|
|
}
|
|
}
|
|
|
|
// This is a separate function, because we might want SyntaxChar
|
|
// to be bigger than Char.
|
|
|
|
Boolean Parser::parseSdParamLiteral(Boolean lita, String<SyntaxChar> &str)
|
|
{
|
|
Location loc(currentLocation());
|
|
loc += 1;
|
|
SdText text(loc, lita); // first character of content
|
|
str.resize(0);
|
|
const unsigned refLitlen = Syntax::referenceQuantity(Syntax::qLITLEN);
|
|
|
|
Mode mode = lita ? sdplitaMode : sdplitMode;
|
|
int done = 0;
|
|
for (;;) {
|
|
Token token = getToken(mode);
|
|
switch (token) {
|
|
case tokenEe:
|
|
message(ParserMessages::literalLevel);
|
|
return 0;
|
|
case tokenUnrecognized:
|
|
if (reportNonSgmlCharacter())
|
|
break;
|
|
if (options().errorSignificant)
|
|
message(ParserMessages::sdLiteralSignificant,
|
|
StringMessageArg(currentToken()));
|
|
text.addChar(currentChar(), currentLocation());
|
|
break;
|
|
case tokenCroDigit:
|
|
{
|
|
InputSource *in = currentInput();
|
|
Location startLocation = currentLocation();
|
|
in->discardInitial();
|
|
extendNumber(syntax().namelen(), ParserMessages::numberLength);
|
|
unsigned long n;
|
|
Boolean valid;
|
|
if (!stringToNumber(in->currentTokenStart(),
|
|
in->currentTokenLength(),
|
|
n)
|
|
|| n > syntaxCharMax) {
|
|
message(ParserMessages::syntaxCharacterNumber,
|
|
StringMessageArg(currentToken()));
|
|
valid = 0;
|
|
}
|
|
else
|
|
valid = 1;
|
|
Owner<Markup> markupPtr;
|
|
if (eventsWanted().wantPrologMarkup()) {
|
|
markupPtr = new Markup;
|
|
markupPtr->addDelim(Syntax::dCRO);
|
|
markupPtr->addNumber(in);
|
|
switch (getToken(refMode)) {
|
|
case tokenRefc:
|
|
markupPtr->addDelim(Syntax::dREFC);
|
|
break;
|
|
case tokenRe:
|
|
markupPtr->addRefEndRe();
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
(void)getToken(refMode);
|
|
if (valid)
|
|
text.addChar(SyntaxChar(n),
|
|
Location(new NumericCharRefOrigin(startLocation,
|
|
currentLocation().index()
|
|
+ currentInput()->currentTokenLength()
|
|
- startLocation.index(),
|
|
markupPtr),
|
|
0));
|
|
}
|
|
break;
|
|
case tokenCroNameStart:
|
|
if (!parseNamedCharRef())
|
|
return 0;
|
|
break;
|
|
case tokenLit:
|
|
case tokenLita:
|
|
done = 1;
|
|
break;
|
|
case tokenPeroNameStart:
|
|
case tokenPeroGrpo:
|
|
message(ParserMessages::sdParameterEntity);
|
|
{
|
|
Location loc(currentLocation());
|
|
const Char *p = currentInput()->currentTokenStart();
|
|
for (size_t count = currentInput()->currentTokenLength();
|
|
count > 0;
|
|
count--) {
|
|
text.addChar(*p++, loc);
|
|
loc += 1;
|
|
}
|
|
}
|
|
break;
|
|
case tokenChar:
|
|
if (text.string().size() > refLitlen
|
|
&& currentChar() == syntax().standardFunction(Syntax::fRE)) {
|
|
message(ParserMessages::parameterLiteralLength, NumberMessageArg(refLitlen));
|
|
// guess that the closing delimiter has been omitted
|
|
message(ParserMessages::literalClosingDelimiter);
|
|
return 0;
|
|
}
|
|
text.addChar(currentChar(), currentLocation());
|
|
break;
|
|
}
|
|
if (done) break;
|
|
}
|
|
if (text.string().size() > refLitlen)
|
|
message(ParserMessages::parameterLiteralLength,
|
|
NumberMessageArg(refLitlen));
|
|
|
|
str = text.string();
|
|
if (currentMarkup())
|
|
currentMarkup()->addSdLiteral(text);
|
|
return 1;
|
|
}
|
|
|
|
Boolean Parser::stringToNumber(const Char *s, size_t length,
|
|
unsigned long &result)
|
|
{
|
|
unsigned long n = 0;
|
|
for (; length > 0; length--, s++) {
|
|
int val = sd().digitWeight(*s);
|
|
if (n <= ULONG_MAX/10 && (n *= 10) <= ULONG_MAX - val)
|
|
n += val;
|
|
else
|
|
return 0;
|
|
}
|
|
result = n;
|
|
return 1;
|
|
}
|
|
|
|
void Parser::sdParamInvalidToken(Token token,
|
|
const AllowedSdParams &allow)
|
|
{
|
|
message(ParserMessages::sdParamInvalidToken,
|
|
TokenMessageArg(token, mdMode, syntaxPointer(), sdPointer()),
|
|
AllowedSdParamsMessageArg(allow, sdPointer()));
|
|
}
|
|
|
|
void Parser::sdParamConvertToLiteral(SdParam &parm)
|
|
{
|
|
if (parm.type == SdParam::number) {
|
|
parm.type = SdParam::paramLiteral;
|
|
parm.paramLiteralText.resize(1);
|
|
parm.paramLiteralText[0] = parm.n;
|
|
}
|
|
}
|
|
|
|
AllowedSdParams::AllowedSdParams(SdParam::Type arg1, SdParam::Type arg2,
|
|
SdParam::Type arg3, SdParam::Type arg4,
|
|
SdParam::Type arg5, SdParam::Type arg6)
|
|
{
|
|
allow_[0] = arg1;
|
|
allow_[1] = arg2;
|
|
allow_[2] = arg3;
|
|
allow_[3] = arg4;
|
|
allow_[4] = arg5;
|
|
allow_[5] = arg6;
|
|
}
|
|
|
|
Boolean AllowedSdParams::param(SdParam::Type t) const
|
|
{
|
|
for (int i = 0; i < maxAllow && allow_[i] != SdParam::invalid; i++)
|
|
if (t == allow_[i])
|
|
return 1;
|
|
return 0;
|
|
}
|
|
|
|
SdParam::Type AllowedSdParams::get(int i) const
|
|
{
|
|
return i < 0 || i >= maxAllow ? SdParam::Type(SdParam::invalid) : allow_[i];
|
|
}
|
|
|
|
AllowedSdParamsMessageArg::AllowedSdParamsMessageArg(
|
|
const AllowedSdParams &allow,
|
|
const ConstPtr<Sd> &sd)
|
|
: allow_(allow), sd_(sd)
|
|
{
|
|
}
|
|
|
|
MessageArg *AllowedSdParamsMessageArg::copy() const
|
|
{
|
|
return new AllowedSdParamsMessageArg(*this);
|
|
}
|
|
|
|
void AllowedSdParamsMessageArg::append(MessageBuilder &builder) const
|
|
{
|
|
for (int i = 0;; i++) {
|
|
SdParam::Type type = allow_.get(i);
|
|
if (type == SdParam::invalid)
|
|
break;
|
|
if (i != 0)
|
|
builder.appendFragment(ParserMessages::listSep);
|
|
switch (type) {
|
|
case SdParam::eE:
|
|
builder.appendFragment(ParserMessages::entityEnd);
|
|
break;
|
|
case SdParam::minimumLiteral:
|
|
builder.appendFragment(ParserMessages::minimumLiteral);
|
|
break;
|
|
case SdParam::mdc:
|
|
{
|
|
builder.appendFragment(ParserMessages::delimStart);
|
|
Char c = sd_->execToDoc('>');
|
|
builder.appendChars(&c, 1);
|
|
builder.appendFragment(ParserMessages::delimEnd);
|
|
}
|
|
break;
|
|
case SdParam::number:
|
|
builder.appendFragment(ParserMessages::number);
|
|
break;
|
|
case SdParam::name:
|
|
builder.appendFragment(ParserMessages::name);
|
|
break;
|
|
case SdParam::paramLiteral:
|
|
builder.appendFragment(ParserMessages::parameterLiteral);
|
|
break;
|
|
case SdParam::capacityName:
|
|
builder.appendFragment(ParserMessages::capacityName);
|
|
break;
|
|
case SdParam::generalDelimiterName:
|
|
builder.appendFragment(ParserMessages::generalDelimiteRoleName);
|
|
break;
|
|
case SdParam::referenceReservedName:
|
|
builder.appendFragment(ParserMessages::referenceReservedName);
|
|
break;
|
|
case SdParam::quantityName:
|
|
builder.appendFragment(ParserMessages::quantityName);
|
|
break;
|
|
case SdParam::ellipsis:
|
|
{
|
|
StringC str(sd_->execToDoc("..."));
|
|
builder.appendChars(str.data(), str.size());
|
|
break;
|
|
}
|
|
default:
|
|
{
|
|
StringC str(sd_->reservedName(type - SdParam::reservedName));
|
|
builder.appendChars(str.data(), str.size());
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
SdBuilder::SdBuilder()
|
|
: valid(1), externalSyntax(0)
|
|
{
|
|
}
|
|
|
|
void SdBuilder::addFormalError(const Location &location,
|
|
const MessageType1 &message,
|
|
const StringC &id)
|
|
{
|
|
formalErrorList.insert(new SdFormalError(location, message, id));
|
|
}
|
|
|
|
SdFormalError::SdFormalError(const Location &location,
|
|
const MessageType1 &message,
|
|
const StringC &id)
|
|
: location_(location),
|
|
message_(&message),
|
|
id_(id)
|
|
{
|
|
}
|
|
|
|
void SdFormalError::send(ParserState &parser)
|
|
{
|
|
parser.Messenger::setNextLocation(location_);
|
|
parser.message(*message_, StringMessageArg(id_));
|
|
}
|
|
|
|
CharSwitcher::CharSwitcher()
|
|
{
|
|
}
|
|
|
|
void CharSwitcher::addSwitch(WideChar from, WideChar to)
|
|
{
|
|
switches_.push_back(from);
|
|
switches_.push_back(to);
|
|
switchUsed_.push_back(0);
|
|
}
|
|
|
|
SyntaxChar CharSwitcher::subst(WideChar c)
|
|
{
|
|
for (size_t i = 0; i < switches_.size(); i += 2)
|
|
if (switches_[i] == c) {
|
|
switchUsed_[i/2] = 1;
|
|
return switches_[i + 1];
|
|
}
|
|
return c;
|
|
}
|
|
|
|
size_t CharSwitcher::nSwitches() const
|
|
{
|
|
return switchUsed_.size();
|
|
}
|
|
|
|
Boolean CharSwitcher::switchUsed(size_t i) const
|
|
{
|
|
return switchUsed_[i];
|
|
}
|
|
|
|
WideChar CharSwitcher::switchFrom(size_t i) const
|
|
{
|
|
return switches_[i*2];
|
|
}
|
|
|
|
WideChar CharSwitcher::switchTo(size_t i) const
|
|
{
|
|
return switches_[i*2 + 1];
|
|
}
|
|
|
|
CharsetMessageArg::CharsetMessageArg(const ISet<WideChar> &set)
|
|
: set_(set)
|
|
{
|
|
}
|
|
|
|
MessageArg *CharsetMessageArg::copy() const
|
|
{
|
|
return new CharsetMessageArg(*this);
|
|
}
|
|
|
|
void CharsetMessageArg::append(MessageBuilder &builder) const
|
|
{
|
|
ISetIter<WideChar> iter(set_);
|
|
WideChar min, max;
|
|
Boolean first = 1;
|
|
while (iter.next(min, max)) {
|
|
if (first)
|
|
first = 0;
|
|
else
|
|
builder.appendFragment(ParserMessages::listSep);
|
|
builder.appendNumber(min);
|
|
if (max != min) {
|
|
builder.appendFragment(max == min + 1
|
|
? ParserMessages::listSep
|
|
: ParserMessages::rangeSep);
|
|
builder.appendNumber(max);
|
|
}
|
|
}
|
|
}
|
|
|
|
#ifdef SP_NAMESPACE
|
|
}
|
|
#endif
|