369 lines
9.7 KiB
C
369 lines
9.7 KiB
C
/*
|
|
* CDE - Common Desktop Environment
|
|
*
|
|
* Copyright (c) 1993-2012, The Open Group. All rights reserved.
|
|
*
|
|
* These libraries and programs are free software; you can
|
|
* redistribute them and/or modify them under the terms of the GNU
|
|
* Lesser General Public License as published by the Free Software
|
|
* Foundation; either version 2 of the License, or (at your option)
|
|
* any later version.
|
|
*
|
|
* These libraries and programs are distributed in the hope that
|
|
* they will be useful, but WITHOUT ANY WARRANTY; without even the
|
|
* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
|
* PURPOSE. See the GNU Lesser General Public License for more
|
|
* details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with these libraries and programs; if not, write
|
|
* to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
|
|
* Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
/* $XConsortium: AusTextStorage.cc /main/5 1996/07/23 18:08:29 cde-hal $
|
|
*
|
|
* (c) Copyright 1996 Digital Equipment Corporation.
|
|
* (c) Copyright 1996 Hewlett-Packard Company.
|
|
* (c) Copyright 1996 International Business Machines Corp.
|
|
* (c) Copyright 1996 Sun Microsystems, Inc.
|
|
* (c) Copyright 1996 Novell, Inc.
|
|
* (c) Copyright 1996 FUJITSU LIMITED.
|
|
* (c) Copyright 1996 Hitachi.
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <stddef.h>
|
|
#include <errno.h>
|
|
#include <sys/types.h>
|
|
#include <sys/stat.h>
|
|
#include <assert.h>
|
|
#include <sstream>
|
|
using namespace std;
|
|
|
|
/* imported interfaces */
|
|
#include <misc/unique_id.h>
|
|
#include "FlexBuffer.h"
|
|
#include "Task.h"
|
|
#include "DataBase.h"
|
|
#include "DataRepository.h"
|
|
#include "api/utility.h"
|
|
|
|
/* exported interfaces */
|
|
#include "AusTextStorage.h"
|
|
|
|
#ifdef DTSR_USE_CNTR_L
|
|
const char CNTR_L = '\014'; /* This is for ascii system only */
|
|
#endif
|
|
|
|
// If NodeParser ever gets setup to run on all bookcases at one time, we
|
|
// will need a reset() function for this member.
|
|
unsigned long AusTextStore::f_recordcount = 0;
|
|
|
|
#ifdef DTSR_LIKES_FGETS
|
|
const int LINE_SIZE = 80; /* this is the line size allowed for data in
|
|
* the *.fzk file
|
|
*/
|
|
|
|
/* Most of the non-alphanumeric character in ascii code set */
|
|
const char *DELIMITER_SET = "\t\n !@#$%^&*()_-=+\\|~[]{};:,.<>/?";
|
|
|
|
enum EucCodeSet {
|
|
CodeSetInv = -1,
|
|
CodeSet0 = 0,
|
|
CodeSet1 = 1,
|
|
CodeSet2 = 2,
|
|
CodeSet3 = 3
|
|
};
|
|
|
|
/*
|
|
* charcspn determines if ch is found in the set
|
|
* returns 1 if so, 0 if otherwise
|
|
*/
|
|
/*
|
|
* @@ charset is expensive, alternative approach is to use a
|
|
* static array
|
|
* static char char_tab[] = { 0, 0, 0, 1,...... }
|
|
* where 1 indicates the character is in the delimiter character set
|
|
* however, this might not be portable for character set other than
|
|
* ascii , so this has to be done carefully
|
|
* If the format of the fzk is changed, all this will no longer be
|
|
* required. So, I am not going to do anything at this point
|
|
*/
|
|
|
|
//-----------------------------------------------------------------
|
|
static int charset ( const char ch, const char *set)
|
|
{
|
|
for ( const char *ptr = set;
|
|
*ptr != '\0';
|
|
ptr++ ) {
|
|
if ( ch == *ptr ) return 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* getline returns the no. of bytes that should be read as a line.
|
|
* Normally it should read line_size, but if there is a token that
|
|
* spans 2 lines, getline need to determine the line size such that
|
|
* at the end of the line, no token should be spanning the next line.
|
|
*/
|
|
/*
|
|
* start_ptr is the start of the buffer and end_ptr is the end of the buffer
|
|
* it is similar to fread except that end_ptr is supplied as the bounding
|
|
* condition as opposed to the EOF in fread. Besides, no actual character
|
|
* is read , only the number of characters that should be read as a line.
|
|
*/
|
|
//--------------------------------------------------------------------------
|
|
|
|
static unsigned int DefaultGetLine ( const char *start_ptr,
|
|
const char *end_ptr,
|
|
int line_size )
|
|
{
|
|
if ( start_ptr > end_ptr ) { return 0; }
|
|
|
|
if ( start_ptr + line_size - 1 <= end_ptr ) { // not @ the end yet
|
|
/*
|
|
* FIrst see if there is a token that spans multiple lines
|
|
*/
|
|
const char *ptr = start_ptr + line_size - 1;
|
|
if ( ptr == end_ptr ) { return line_size; }
|
|
|
|
if ( charset( *(ptr+1), DELIMITER_SET ) || charset ( *ptr, DELIMITER_SET ) ) {
|
|
return ( line_size );
|
|
}
|
|
|
|
/* That means found a token that spans 2 lines */
|
|
/* So now loop back until *ptr is not in DELIMITER_SET */
|
|
const char *new_end_ptr;
|
|
for ( new_end_ptr = ptr;
|
|
new_end_ptr > start_ptr && !charset( *new_end_ptr , DELIMITER_SET );
|
|
new_end_ptr-- );
|
|
|
|
return( new_end_ptr - start_ptr + 1 );
|
|
|
|
}
|
|
else {
|
|
// last chunk of line
|
|
return ( end_ptr - start_ptr + 1 );
|
|
}
|
|
}
|
|
|
|
inline EucCodeSet JpEucCodeSet(const unsigned char* text)
|
|
{
|
|
EucCodeSet codeset;
|
|
|
|
if (text == NULL)
|
|
codeset = CodeSetInv;
|
|
else if (*text < 0x80)
|
|
codeset = CodeSet0;
|
|
else if (*text == 0x8E)
|
|
codeset = CodeSet2;
|
|
else if (*text == 0x8F)
|
|
codeset = CodeSet3;
|
|
else {
|
|
assert( *text > 0xA0 && *text < 0xFF);
|
|
codeset = CodeSet1;
|
|
}
|
|
|
|
return codeset;
|
|
}
|
|
|
|
static unsigned int JpGetLine ( const char *start_ptr,
|
|
const char *end_ptr,
|
|
int line_size )
|
|
{
|
|
if (start_ptr > end_ptr)
|
|
return 0;
|
|
|
|
if (end_ptr - start_ptr + 1 <= line_size)
|
|
return (end_ptr - start_ptr + 1);
|
|
|
|
// reference limit
|
|
const char* limit = start_ptr + line_size;
|
|
|
|
EucCodeSet codeset = JpEucCodeSet((const unsigned char*)start_ptr);
|
|
|
|
int len;
|
|
const char* p;
|
|
|
|
for (p = start_ptr; p < limit; p += len) {
|
|
|
|
if (JpEucCodeSet((const unsigned char*)p) != codeset)
|
|
break;
|
|
|
|
if (codeset == CodeSet0)
|
|
len = 1;
|
|
else if ((codeset == CodeSet1) || (codeset == CodeSet2))
|
|
len = 2;
|
|
else if (codeset == CodeSet3)
|
|
len = 3;
|
|
else
|
|
len = 0;
|
|
|
|
if ((len == 0) || (p + len - 1 > end_ptr))
|
|
break;
|
|
}
|
|
|
|
return (p - start_ptr);
|
|
}
|
|
#endif // DTSR_LIKES_FGETS
|
|
|
|
//-----------------------------------------------------------------------
|
|
static int isdir(const char* filename)
|
|
{
|
|
int ret = 0;
|
|
struct stat sb;
|
|
|
|
if(stat(filename, &sb) == 0){
|
|
if(S_ISDIR(sb.st_mode)){
|
|
ret = 1;
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
//-----------------------------------------------------------------------
|
|
static void makedir(const char *path) /* throw(PosixError) */
|
|
{
|
|
if(mkdir((char*)path, 0775) != 0){
|
|
throw(PosixError(errno, path));
|
|
}
|
|
}
|
|
|
|
//-----------------------------------------------------------------------
|
|
AusTextStore::AusTextStore( const char *path, const char *name )
|
|
{
|
|
if ( !isdir(path) ) {
|
|
makedir(path);
|
|
}
|
|
|
|
int textlen = strlen(path) + 1 + strlen("dtsearch") + 1;
|
|
austext_path = new char [ textlen ];
|
|
/*
|
|
* throw(ResourceExhausted)
|
|
*
|
|
*/
|
|
assert ( austext_path != NULL );
|
|
|
|
snprintf( austext_path, textlen, "%s/dtsearch", path );
|
|
|
|
if ( !isdir(austext_path) ) {
|
|
makedir(austext_path);
|
|
}
|
|
|
|
char *fzk = form("%s/%s.fzk", austext_path, name );
|
|
|
|
/* Use append instead because this fzk file is going to be appended
|
|
* all the time
|
|
*/
|
|
|
|
afp = fopen ( fzk, "a" );
|
|
if ( !afp ) {
|
|
throw(PosixError(errno, form("unable to open fzk file %s\n", fzk) ) );
|
|
}
|
|
}
|
|
|
|
//-----------------------------------------------------------------------
|
|
void
|
|
AusTextStore::insert( const char *BookShortTitle,
|
|
const char *BookID,
|
|
const char *SectionID,
|
|
const char *SectionTitle,
|
|
DataRepository *store
|
|
)
|
|
{
|
|
|
|
/* write the abstract and record stuff in the fzk file */
|
|
if ( afp ) {
|
|
|
|
f_recordcount++;
|
|
/* Record type ie for all the zone content */
|
|
FlexBuffer **table = store->tabbuf();
|
|
for ( int pos=store->Default;
|
|
pos < store->Total;
|
|
pos++ ) {
|
|
|
|
if ( table[pos] ) {
|
|
|
|
if ( table[pos]->GetSize() > 0 ) {
|
|
|
|
fprintf(afp, " 0,2\n");
|
|
|
|
/* abstract includes SectionID\tBookShortTitle\tSectionTitle */
|
|
fprintf(afp, "ABSTRACT: %s\t%s\t%s\n", SectionID,
|
|
BookShortTitle,
|
|
SectionTitle );
|
|
|
|
// first the record type
|
|
// The following was unique, but there is a limit to the size of
|
|
// the key, so let's just use a simple counter.
|
|
// fprintf(afp, "%s%s%s\n", store->get_zone_name(pos), BookID, SectionID);
|
|
fprintf(afp, "%s%d\n", store->get_zone_name(pos), (int)f_recordcount);
|
|
|
|
fprintf(afp, "0/0/0~0:0\n"); // null date
|
|
|
|
// Now the actual buffer
|
|
const char *start_ptr = table[pos]->GetBuffer();
|
|
const char *end_ptr = start_ptr + table[pos]->GetSize() - 1;
|
|
|
|
#ifdef DTSR_LIKES_FGETS
|
|
unsigned int (*getline)(const char *, const char *, int);
|
|
|
|
const char* lang = getenv("LANG");
|
|
if (lang && !strncmp(lang, "ja", strlen("ja")))
|
|
getline = JpGetLine;
|
|
else
|
|
getline = DefaultGetLine;
|
|
|
|
int num_byte;
|
|
while ( num_byte = getline(start_ptr, end_ptr, LINE_SIZE) ) {
|
|
if ( !fwrite(start_ptr, num_byte, 1, afp ) )
|
|
{
|
|
throw(PosixError(errno, "unable to write to fzk file\n" ) );
|
|
}
|
|
fputc('\n', afp );
|
|
|
|
start_ptr += num_byte;
|
|
}
|
|
|
|
// for current section and book level scopes, place the book and
|
|
// section ids into the indexed data.
|
|
fprintf(afp, "\n%s\n%s\n", BookID, SectionID);
|
|
#else
|
|
char *ptr = (char*)start_ptr;
|
|
for (; ptr <= end_ptr; ptr++) {
|
|
if (*ptr == '\n')
|
|
*ptr = ' ';
|
|
}
|
|
|
|
if (fwrite(start_ptr, table[pos]->GetSize(), 1, afp) == 0)
|
|
throw(PosixError(errno, "unable to write to fzk file\n"));
|
|
|
|
// for current section and book level scopes, place the book and
|
|
// section ids into the indexed data.
|
|
fprintf(afp, "\t%s\t%s", BookID, SectionID);
|
|
#endif
|
|
|
|
#ifdef DTSR_USE_CNTR_L
|
|
// Then the ^L character at the end
|
|
fprintf(afp, "\n%c\n", CNTR_L );
|
|
#else
|
|
fprintf(afp, "\n");
|
|
#endif
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
//-----------------------------------------------------------------------
|
|
AusTextStore::~AusTextStore()
|
|
{
|
|
if ( afp ) { fclose(afp); }
|
|
if ( austext_path ) { delete [] austext_path; }
|
|
}
|
|
|
|
|