cdesktopenv/cde/programs/dtdocbook/infolib/AusTextStorage.C

369 lines
9.7 KiB
C

/*
* CDE - Common Desktop Environment
*
* Copyright (c) 1993-2012, The Open Group. All rights reserved.
*
* These libraries and programs are free software; you can
* redistribute them and/or modify them under the terms of the GNU
* Lesser General Public License as published by the Free Software
* Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* These libraries and programs are distributed in the hope that
* they will be useful, but WITHOUT ANY WARRANTY; without even the
* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with these libraries and programs; if not, write
* to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
* Floor, Boston, MA 02110-1301 USA
*/
/* $XConsortium: AusTextStorage.cc /main/5 1996/07/23 18:08:29 cde-hal $
*
* (c) Copyright 1996 Digital Equipment Corporation.
* (c) Copyright 1996 Hewlett-Packard Company.
* (c) Copyright 1996 International Business Machines Corp.
* (c) Copyright 1996 Sun Microsystems, Inc.
* (c) Copyright 1996 Novell, Inc.
* (c) Copyright 1996 FUJITSU LIMITED.
* (c) Copyright 1996 Hitachi.
*/
#include <stdio.h>
#include <stddef.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <assert.h>
#include <sstream>
using namespace std;
/* imported interfaces */
#include <misc/unique_id.h>
#include "FlexBuffer.h"
#include "Task.h"
#include "DataBase.h"
#include "DataRepository.h"
#include "api/utility.h"
/* exported interfaces */
#include "AusTextStorage.h"
#ifdef DTSR_USE_CNTR_L
const char CNTR_L = '\014'; /* This is for ascii system only */
#endif
// If NodeParser ever gets setup to run on all bookcases at one time, we
// will need a reset() function for this member.
unsigned long AusTextStore::f_recordcount = 0;
#ifdef DTSR_LIKES_FGETS
const int LINE_SIZE = 80; /* this is the line size allowed for data in
* the *.fzk file
*/
/* Most of the non-alphanumeric character in ascii code set */
const char *DELIMITER_SET = "\t\n !@#$%^&*()_-=+\\|~[]{};:,.<>/?";
enum EucCodeSet {
CodeSetInv = -1,
CodeSet0 = 0,
CodeSet1 = 1,
CodeSet2 = 2,
CodeSet3 = 3
};
/*
* charcspn determines if ch is found in the set
* returns 1 if so, 0 if otherwise
*/
/*
* @@ charset is expensive, alternative approach is to use a
* static array
* static char char_tab[] = { 0, 0, 0, 1,...... }
* where 1 indicates the character is in the delimiter character set
* however, this might not be portable for character set other than
* ascii , so this has to be done carefully
* If the format of the fzk is changed, all this will no longer be
* required. So, I am not going to do anything at this point
*/
//-----------------------------------------------------------------
static int charset ( const char ch, const char *set)
{
for ( const char *ptr = set;
*ptr != '\0';
ptr++ ) {
if ( ch == *ptr ) return 1;
}
return 0;
}
/*
* getline returns the no. of bytes that should be read as a line.
* Normally it should read line_size, but if there is a token that
* spans 2 lines, getline need to determine the line size such that
* at the end of the line, no token should be spanning the next line.
*/
/*
* start_ptr is the start of the buffer and end_ptr is the end of the buffer
* it is similar to fread except that end_ptr is supplied as the bounding
* condition as opposed to the EOF in fread. Besides, no actual character
* is read , only the number of characters that should be read as a line.
*/
//--------------------------------------------------------------------------
static unsigned int DefaultGetLine ( const char *start_ptr,
const char *end_ptr,
int line_size )
{
if ( start_ptr > end_ptr ) { return 0; }
if ( start_ptr + line_size - 1 <= end_ptr ) { // not @ the end yet
/*
* FIrst see if there is a token that spans multiple lines
*/
const char *ptr = start_ptr + line_size - 1;
if ( ptr == end_ptr ) { return line_size; }
if ( charset( *(ptr+1), DELIMITER_SET ) || charset ( *ptr, DELIMITER_SET ) ) {
return ( line_size );
}
/* That means found a token that spans 2 lines */
/* So now loop back until *ptr is not in DELIMITER_SET */
const char *new_end_ptr;
for ( new_end_ptr = ptr;
new_end_ptr > start_ptr && !charset( *new_end_ptr , DELIMITER_SET );
new_end_ptr-- );
return( new_end_ptr - start_ptr + 1 );
}
else {
// last chunk of line
return ( end_ptr - start_ptr + 1 );
}
}
inline EucCodeSet JpEucCodeSet(const unsigned char* text)
{
EucCodeSet codeset;
if (text == NULL)
codeset = CodeSetInv;
else if (*text < 0x80)
codeset = CodeSet0;
else if (*text == 0x8E)
codeset = CodeSet2;
else if (*text == 0x8F)
codeset = CodeSet3;
else {
assert( *text > 0xA0 && *text < 0xFF);
codeset = CodeSet1;
}
return codeset;
}
static unsigned int JpGetLine ( const char *start_ptr,
const char *end_ptr,
int line_size )
{
if (start_ptr > end_ptr)
return 0;
if (end_ptr - start_ptr + 1 <= line_size)
return (end_ptr - start_ptr + 1);
// reference limit
const char* limit = start_ptr + line_size;
EucCodeSet codeset = JpEucCodeSet((const unsigned char*)start_ptr);
int len;
const char* p;
for (p = start_ptr; p < limit; p += len) {
if (JpEucCodeSet((const unsigned char*)p) != codeset)
break;
if (codeset == CodeSet0)
len = 1;
else if ((codeset == CodeSet1) || (codeset == CodeSet2))
len = 2;
else if (codeset == CodeSet3)
len = 3;
else
len = 0;
if ((len == 0) || (p + len - 1 > end_ptr))
break;
}
return (p - start_ptr);
}
#endif // DTSR_LIKES_FGETS
//-----------------------------------------------------------------------
static int isdir(const char* filename)
{
int ret = 0;
struct stat sb;
if(stat(filename, &sb) == 0){
if(S_ISDIR(sb.st_mode)){
ret = 1;
}
}
return ret;
}
//-----------------------------------------------------------------------
static void makedir(const char *path) /* throw(PosixError) */
{
if(mkdir((char*)path, 0775) != 0){
throw(PosixError(errno, path));
}
}
//-----------------------------------------------------------------------
AusTextStore::AusTextStore( const char *path, const char *name )
{
if ( !isdir(path) ) {
makedir(path);
}
int textlen = strlen(path) + 1 + strlen("dtsearch") + 1;
austext_path = new char [ textlen ];
/*
* throw(ResourceExhausted)
*
*/
assert ( austext_path != NULL );
snprintf( austext_path, textlen, "%s/dtsearch", path );
if ( !isdir(austext_path) ) {
makedir(austext_path);
}
char *fzk = form("%s/%s.fzk", austext_path, name );
/* Use append instead because this fzk file is going to be appended
* all the time
*/
afp = fopen ( fzk, "a" );
if ( !afp ) {
throw(PosixError(errno, form("unable to open fzk file %s\n", fzk) ) );
}
}
//-----------------------------------------------------------------------
void
AusTextStore::insert( const char *BookShortTitle,
const char *BookID,
const char *SectionID,
const char *SectionTitle,
DataRepository *store
)
{
/* write the abstract and record stuff in the fzk file */
if ( afp ) {
f_recordcount++;
/* Record type ie for all the zone content */
FlexBuffer **table = store->tabbuf();
for ( int pos=store->Default;
pos < store->Total;
pos++ ) {
if ( table[pos] ) {
if ( table[pos]->GetSize() > 0 ) {
fprintf(afp, " 0,2\n");
/* abstract includes SectionID\tBookShortTitle\tSectionTitle */
fprintf(afp, "ABSTRACT: %s\t%s\t%s\n", SectionID,
BookShortTitle,
SectionTitle );
// first the record type
// The following was unique, but there is a limit to the size of
// the key, so let's just use a simple counter.
// fprintf(afp, "%s%s%s\n", store->get_zone_name(pos), BookID, SectionID);
fprintf(afp, "%s%d\n", store->get_zone_name(pos), (int)f_recordcount);
fprintf(afp, "0/0/0~0:0\n"); // null date
// Now the actual buffer
const char *start_ptr = table[pos]->GetBuffer();
const char *end_ptr = start_ptr + table[pos]->GetSize() - 1;
#ifdef DTSR_LIKES_FGETS
unsigned int (*getline)(const char *, const char *, int);
const char* lang = getenv("LANG");
if (lang && !strncmp(lang, "ja", strlen("ja")))
getline = JpGetLine;
else
getline = DefaultGetLine;
int num_byte;
while ( num_byte = getline(start_ptr, end_ptr, LINE_SIZE) ) {
if ( !fwrite(start_ptr, num_byte, 1, afp ) )
{
throw(PosixError(errno, "unable to write to fzk file\n" ) );
}
fputc('\n', afp );
start_ptr += num_byte;
}
// for current section and book level scopes, place the book and
// section ids into the indexed data.
fprintf(afp, "\n%s\n%s\n", BookID, SectionID);
#else
char *ptr = (char*)start_ptr;
for (; ptr <= end_ptr; ptr++) {
if (*ptr == '\n')
*ptr = ' ';
}
if (fwrite(start_ptr, table[pos]->GetSize(), 1, afp) == 0)
throw(PosixError(errno, "unable to write to fzk file\n"));
// for current section and book level scopes, place the book and
// section ids into the indexed data.
fprintf(afp, "\t%s\t%s", BookID, SectionID);
#endif
#ifdef DTSR_USE_CNTR_L
// Then the ^L character at the end
fprintf(afp, "\n%c\n", CNTR_L );
#else
fprintf(afp, "\n");
#endif
}
}
}
}
}
//-----------------------------------------------------------------------
AusTextStore::~AusTextStore()
{
if ( afp ) { fclose(afp); }
if ( austext_path ) { delete [] austext_path; }
}