From 6f952545fbcc6d294b15a0d3def02959ef3ddefb Mon Sep 17 00:00:00 2001 From: Liang Chang Date: Fri, 21 May 2021 05:06:38 +0800 Subject: [PATCH] dtdocbook: Migrate doc2sdl to UTF-8. --- cde/programs/dtdocbook/doc2sdl/docbook.sgml | 31 ++++++------- cde/programs/dtdocbook/doc2sdl/docbook.tcl | 48 ++++++++++++++++----- cde/programs/dtdocbook/doc2sdl/dtdocbook | 22 +++++++++- 3 files changed, 72 insertions(+), 29 deletions(-) diff --git a/cde/programs/dtdocbook/doc2sdl/docbook.sgml b/cde/programs/dtdocbook/doc2sdl/docbook.sgml index b8fec5583..32340715c 100644 --- a/cde/programs/dtdocbook/doc2sdl/docbook.sgml +++ b/cde/programs/dtdocbook/doc2sdl/docbook.sgml @@ -2,23 +2,20 @@ CHARSET - BASESET "ISO 646:1983//CHARSET - International Reference Version (IRV)//ESC 2/5 4/0" - DESCSET - 0 9 UNUSED - 9 2 9 - 11 2 UNUSED - 13 1 13 - 14 18 UNUSED - 32 95 32 - 127 1 UNUSED - - BASESET "ISO Registration Number 100//CHARSET - ECMA-94 Right Part of Latin Alphabet Nr. 1//ESC 2/13 4/1" - DESCSET - 128 32 UNUSED - 160 96 32 - + BASESET "ISO Registration Number 177//CHARSET + ISO/IEC 10646-1:1993 UCS-4 with + implementation level 3//ESC 2/5 2/15 4/6" + DESCSET 0 9 UNUSED + 9 2 9 + 11 2 UNUSED + 13 1 13 + 14 18 UNUSED + 32 95 32 + 127 1 UNUSED + 128 32 UNUSED + 160 55136 160 + 55296 2048 UNUSED -- SURROGATES -- + 57344 1056768 57344 CAPACITY SGMLREF diff --git a/cde/programs/dtdocbook/doc2sdl/docbook.tcl b/cde/programs/dtdocbook/doc2sdl/docbook.tcl index 17a0d08b8..7357d088e 100755 --- a/cde/programs/dtdocbook/doc2sdl/docbook.tcl +++ b/cde/programs/dtdocbook/doc2sdl/docbook.tcl @@ -77,6 +77,32 @@ set snbLocation 0 # EXAMPLE, for instance, we need to default to TYPE="LITERAL" set defaultParaType "" +proc TrimWhiteSpaces {str} { + set characters [split $str ""] + + for {set i 0} {$i < 2} {incr i} { + set tmp "" + set trimmed 0 + + foreach c $characters { + if {$trimmed == 0} { + if {[string is ascii -strict $c]} { + if {[string is space -strict $c]} { + continue + } + } + } + + set trimmed 1 + + lappend tmp $c + } + + set characters [lreverse $tmp] + } + + return [join $characters ""] +} # print internal error message and exit proc InternalError {what} { @@ -255,7 +281,7 @@ proc AddToMarkArray {mark} { global validMarkArray set m [string range $mark 1 6] - set m [string trim $m] + set m [TrimWhiteSpaces $m] set validMarkArray($m) $mark @@ -1987,13 +2013,13 @@ proc EndAGlossedTerm {id role} { # trim whitespace from the front and back of the string to be # glossed, also turn line feeds into spaces and compress out # duplicate whitespace - set glossString [string trim $glossString] + set glossString [TrimWhiteSpaces $glossString] set glossString [split $glossString '\n'] set tmpGlossString $glossString set glossString [lindex $tmpGlossString 0] foreach str [lrange $tmpGlossString 1 end] { if {$str != ""} { - append glossString " " [string trim $str] + append glossString " " [TrimWhiteSpaces $str] } } @@ -2045,7 +2071,7 @@ proc EndATermInAGlossary {id} { set glossString [lindex $tmpGlossString 0] foreach str [lrange $tmpGlossString 1 end] { if {$str != ""} { - append glossString " " [string trim $str] + append glossString " " [TrimWhiteSpaces $str] } } @@ -2347,7 +2373,7 @@ proc AddIndexEntry {loc} { # trim superfluous whitespace at the beginning and end of the # indexed term - set indexBuffer [string trim $indexBuffer] + set indexBuffer [TrimWhiteSpaces $indexBuffer] # get an array index and determine whether 1st, 2nd or 3rd level set index [join $indexVals ", "] @@ -2411,7 +2437,7 @@ proc EndIndexTerm {} { proc StartPrimaryIndexEntry {id cdata} { global indexVals - set indexVals [list [string trim $cdata]] + set indexVals [list [TrimWhiteSpaces $cdata]] } @@ -2425,7 +2451,7 @@ proc StartSecondaryIndexEntry {id cdata} { global indexVals AddIndexEntry "" ;# make sure our primary is defined - lappend indexVals [string trim $cdata] + lappend indexVals [TrimWhiteSpaces $cdata] } @@ -2439,7 +2465,7 @@ proc StartTertiaryIndexEntry {id cdata} { global indexVals AddIndexEntry "" ;# make sure our secondary is defined - lappend indexVals [string trim $cdata] + lappend indexVals [TrimWhiteSpaces $cdata] } @@ -4692,7 +4718,9 @@ proc IncludeTOSS {} { proc GetLocalizedAutoGeneratedStringArray {filename} { global localizedAutoGeneratedStringArray - set buffer [ReadLocaleStrings $filename] + set fp [open $filename] + set buffer [read $fp] + close $fp set regExp {^(".*")[ ]*(".*")$} ;# look for 2 quoted strings @@ -4701,7 +4729,7 @@ proc GetLocalizedAutoGeneratedStringArray {filename} { set index 0 while {$listLength > 0} { set line [lindex $stringList $index] - set line [string trim $line] + set line [TrimWhiteSpaces $line] if {([string length $line] > 0) && ([string index $line 0] != "#")} { if {[regexp $regExp $line match match1 match2]} { set match1 [string trim $match1 \"] diff --git a/cde/programs/dtdocbook/doc2sdl/dtdocbook b/cde/programs/dtdocbook/doc2sdl/dtdocbook index 22c78b1b3..66de2ebba 100755 --- a/cde/programs/dtdocbook/doc2sdl/dtdocbook +++ b/cde/programs/dtdocbook/doc2sdl/dtdocbook @@ -1,5 +1,7 @@ #!/bin/ksh +export LC_CTYPE="${LANG}" + # get the name of this command for errors, warnings and messages command_name=`basename $0` @@ -86,6 +88,7 @@ do esac done +default_charset='UTF-8' dbk_lib="${dbk_lib:-/usr/dt/dthelp/dtdocbook}" # if no -t, use installed dir sgml_dir="${sgml_dir:-${dbk_lib}}" # if no -s, use -t info_dir="${info_dir:-/usr/dt/infolib}" # if no -i, use installed dir @@ -99,12 +102,24 @@ if [[ ${#sgmls} -eq 0 ]] then # if no -S, use installed one fi sgmls="${sgmls:-${info_dir}/etc/sgmls}" # if no -S, use installed one instant="${instant:-${dbk_lib}/instant}" # if no -I, use installed one -x_locale="${x_locale:-${dbk_lib}/xlate_locale}" # if no -L, use installed one +x_locale="${x_locale:-${LANG}}" # if no -L, use installed one helptag2="${helptag2:-dthelp_htag2}" # if no -H, use one in PATH +if [[ "$x_locale" == *.* ]] then + x_lang="${x_locale%%.*}" + x_charset="${x_locale##*.}" + + if [[ "$x_charset" != "$default_charset" ]] then + x_locale="${x_lang}.$default_charset" + echo "Warning: charset is changed to ${default_charset}." + fi +else + x_locale="${x_locale}.$default_charset" +fi + # Set the environment variables for instant(1) to find its files export TPT_LIB="${dbk_lib}" -export LOCALE_DIR="${dbk_lib}/$($x_locale)" +export LOCALE_DIR="${dbk_lib}/${x_locale}" # Determine whether we are using sgmls or nsgmls parser=`basename $sgmls` @@ -123,6 +138,9 @@ elif ([[ "$SGML_CATALOG_FILES" = "" ]]) then export SGML_CATALOG_FILES="${SGML_CATALOG_FILES}:${sgml_cat}/catalog" fi +export SP_CHARSET_FIXED=1 +export SP_ENCODING="$default_charset" + # Set the environment variable to be picked up inside instant(1) when it # goes to call Tcl. export DBKTCL_DIR="${dbk_lib}/"