diff -ru abi-0.7.11-orig~/abi-0.7.11/CREDITS.TXT abi-0.7.11-orig-orig/abi-0.7.11/CREDITS.TXT
--- abi-0.7.11-orig~/abi-0.7.11/CREDITS.TXT Thu Nov 9 19:38:46 2000
+++ abi-0.7.11-orig-orig/abi-0.7.11/CREDITS.TXT Fri Nov 10 15:05:41 2000
@@ -44,7 +44,7 @@
Vlad Harchev <hvv@hippo.ru> Support for non latin-1 Languages
Stephen Hack <shack@uiuc.edu> options dialog
Martin Willemoes Hansen <mwh@stampede.org>
-hj <huangj@citiz.net> XIM, focus
+hj <huangj@citiz.net> XIM, focus, principal author of CJK support patch
Roman Hodek <Roman.Hodek@informatik.uni-erlangen.de> m68k Endian patch
Ming-I Hsieh <mihs@wm28.csie.ncu.edu.tw> FreeBSD
Perry Ismangil <perry@trabas.com> AbiHello
@@ -106,6 +106,7 @@
Robert G. Werner <rwerner@lx1.microbsys.com> VI keybindings
John Wood <jyonw@asu.edu> NetBSD
Alan Young <ayoung@teleport.com> Alpha/NT
+Belcon Zhao <rainfall@yeah.net> Testing/fixing CJK support
translators
-----------
Only in abi-0.7.11-orig-orig/abi-0.7.11: l1.latex
diff -ru abi-0.7.11-orig~/abi-0.7.11/src/af/util/xp/ut_mbtowc.cpp abi-0.7.11-orig-orig/abi-0.7.11/src/af/util/xp/ut_mbtowc.cpp
--- abi-0.7.11-orig~/abi-0.7.11/src/af/util/xp/ut_mbtowc.cpp Thu Nov 9 19:38:46 2000
+++ abi-0.7.11-orig-orig/abi-0.7.11/src/af/util/xp/ut_mbtowc.cpp Thu Nov 9 22:09:50 2000
@@ -240,7 +240,9 @@
UT_Mbtowc::~UT_Mbtowc()
{
- iconv_close(cd);
+ /*libiconv is stupid - we'll get segfault if we don't check - VH */
+ if (cd!=(iconv_t)-1)
+ iconv_close(cd);
};
int UT_Mbtowc::mbtowc(wchar_t &wc,char mb)
diff -ru abi-0.7.11-orig~/abi-0.7.11/src/af/util/xp/ut_wctomb.cpp abi-0.7.11-orig-orig/abi-0.7.11/src/af/util/xp/ut_wctomb.cpp
--- abi-0.7.11-orig~/abi-0.7.11/src/af/util/xp/ut_wctomb.cpp Thu Nov 9 19:38:47 2000
+++ abi-0.7.11-orig-orig/abi-0.7.11/src/af/util/xp/ut_wctomb.cpp Thu Nov 9 22:09:30 2000
@@ -185,7 +185,9 @@
UT_Wctomb::~UT_Wctomb()
{
- iconv_close(cd);
+ /*libiconv is stupid - we'll get segfault if we don't check - VH */
+ if (cd!=(iconv_t)-1)
+ iconv_close(cd);
};
int UT_Wctomb::wctomb(char * pC,int &length,wchar_t wc)
diff -ru abi-0.7.11-orig~/abi-0.7.11/src/af/xap/xp/xap_EncodingManager.cpp abi-0.7.11-orig-orig/abi-0.7.11/src/af/xap/xp/xap_EncodingManager.cpp
--- abi-0.7.11-orig~/abi-0.7.11/src/af/xap/xp/xap_EncodingManager.cpp Thu Nov 9 19:38:47 2000
+++ abi-0.7.11-orig-orig/abi-0.7.11/src/af/xap/xp/xap_EncodingManager.cpp Fri Nov 10 15:08:48 2000
@@ -395,6 +395,15 @@
static const char* wincharsetcode_th[]= /* thai charset*/
{ "th", NULL };
+/*I'm not sure that charset code is the same for Big5 and GB2312.
+ Tested with GB2312 only.
+*/
+static const char* wincharsetcode_zh_GB2312[]= /* chinese*/
+{ "zh_CN.GB2312", "zh_TW.GB2312", NULL };
+
+static const char* wincharsetcode_zh_BIG5[]= /* chinese*/
+{ "zh_CN.BIG5", "zh_TW.BIG5", NULL };
+
static const _rmap langcode_to_wincharsetcode[]=
{
{"0"}, /* default value - ansi charset*/
@@ -403,6 +412,8 @@
{"162",wincharsetcode_tr},
{"163",wincharsetcode_vi},
{"222",wincharsetcode_th},
+ {"134",wincharsetcode_zh_GB2312},
+ {"136",wincharsetcode_zh_BIG5},
{NULL}
};
@@ -449,12 +460,16 @@
{NULL}
};
+/*
+ This table is useful since iconv implementations don't know some cpNNNN
+ charsets but under some different name.
+*/
static const _map MSCodepagename_to_charset_name_map[]=
{
/*key, value*/
{NULL,NULL},
- {"CP936","BIG5"}, /* most probably it's correct - VH*/
- {"CP950","GB2312"}, /* 100% correct */
+ {"CP936","GB2312"},
+ {"CP950","BIG5"},
{NULL,NULL}
};
@@ -463,7 +478,10 @@
{
/*key, value*/
{NULL},
-/* {"0x404","zh_CN"},*/ /*I guess - VH*/
+ {"zh_CN.BIG5", "0x404"},
+ {"zh_CN.GB2312", "0x804"},
+ {"zh_TW.BIG5", "0x404"},
+ {"zh_TW.GB2312", "0x804"},
{NULL}
};
@@ -728,7 +746,7 @@
len += sprintf(buf+len,"\\usepackage[%s]{inputenc}\n",NativeTexEncodingName);
if (NativeBabelArgument)
len += sprintf(buf+len,"\\usepackage[%s]{babel}\n",NativeBabelArgument);
- TexPrologue = len ? UT_strdup(buf) : "";
+ TexPrologue = len ? UT_strdup(buf) : " ";
};
}
if (cjk_locale()) {
@@ -815,7 +833,9 @@
const char* XAP_EncodingManager::charsetFromCodepage(int lid) const
{
- char* cpname = wvLIDToCodePageConverter(lid);
+ static char buf[100];
+ sprintf(buf,"CP%d",lid);
+ char* cpname = buf;
UT_Bool is_default;
const char* ret = search_map(MSCodepagename_to_charset_name_map,cpname,&is_default);
return is_default ? cpname : ret;
@@ -823,7 +843,10 @@
const char* XAP_EncodingManager::WindowsCharsetName() const
{
- return charsetFromCodepage( getWinLanguageCode() );
+ char* cpname = wvLIDToCodePageConverter(getWinLanguageCode());
+ UT_Bool is_default;
+ const char* ret = search_map(MSCodepagename_to_charset_name_map,cpname,&is_default);
+ return is_default ? cpname : ret;
};
UT_uint32 XAP_EncodingManager::getWinLanguageCode() const
diff -ru abi-0.7.11-orig~/abi-0.7.11/src/wp/impexp/xp/ie_exp_RTF_listenerWriteDoc.cpp abi-0.7.11-orig-orig/abi-0.7.11/src/wp/impexp/xp/ie_exp_RTF_listenerWriteDoc.cpp
--- abi-0.7.11-orig~/abi-0.7.11/src/wp/impexp/xp/ie_exp_RTF_listenerWriteDoc.cpp Thu Nov 9 19:38:49 2000
+++ abi-0.7.11-orig-orig/abi-0.7.11/src/wp/impexp/xp/ie_exp_RTF_listenerWriteDoc.cpp Sat Nov 11 21:54:27 2000
@@ -196,14 +196,20 @@
{
/*FIXME: can it happen that wctomb will fail under CJK locales? */
m_wctomb.wctomb_or_fallback(mbbuf,mblen,*pData++);
- for(int i=0;i<mblen;++i) {
- unsigned char c = mbbuf[i];
- if ( c > 0x007f)
+ if (mbbuf[0] & 0x80)
+ {
+ FlushBuffer();
+ for(int i=0;i<mblen;++i) {
+ unsigned char c = mbbuf[i];
m_pie->_rtf_nonascii_hex2(c);
- else
- *pBuf++ = c;
-
- };
+ }
+ }
+ else
+ {
+ for(int i=0;i<mblen;++i) {
+ *pBuf++ = mbbuf[i];
+ }
+ }
} else if (!m_pie->m_atticFormat)
{
if (*pData > 0x00ff) // emit unicode character
diff -ru abi-0.7.11-orig~/abi-0.7.11/src/wp/impexp/xp/ie_imp_MsWord_97.cpp abi-0.7.11-orig-orig/abi-0.7.11/src/wp/impexp/xp/ie_imp_MsWord_97.cpp
--- abi-0.7.11-orig~/abi-0.7.11/src/wp/impexp/xp/ie_imp_MsWord_97.cpp Thu Nov 9 19:38:49 2000
+++ abi-0.7.11-orig-orig/abi-0.7.11/src/wp/impexp/xp/ie_imp_MsWord_97.cpp Sat Nov 11 12:48:13 2000
@@ -629,7 +629,7 @@
else
{
FREEP(fname);
- fname=UT_strdup(f);
+ fname=UT_strdup(f ? f : "helvetic");
}
}
}
diff -ru abi-0.7.11-orig~/abi-0.7.11/src/wp/impexp/xp/ie_imp_RTF.cpp abi-0.7.11-orig-orig/abi-0.7.11/src/wp/impexp/xp/ie_imp_RTF.cpp
--- abi-0.7.11-orig~/abi-0.7.11/src/wp/impexp/xp/ie_imp_RTF.cpp Thu Nov 9 19:38:49 2000
+++ abi-0.7.11-orig-orig/abi-0.7.11/src/wp/impexp/xp/ie_imp_RTF.cpp Fri Nov 10 15:11:25 2000
@@ -1447,6 +1447,10 @@
//is seen
// Now comes the font name, terminated by either a close brace or a slash or a semi-colon
int count = 0;
+ /*
+ FIXME: CJK font names come in form \'aa\'cd\'ef - so we have to
+ parse \'HH correctly (currently we ignore them!) - VH
+ */
while ( ch != '}' && ch != '\\' && ch != ';' && ch!= '{')
{
keyword[count++] = ch;
@@ -1472,6 +1476,8 @@
{
if (!ReadCharFromFile(&ch))
return UT_FALSE;
+ if (ch=='{')
+ ++nesting;
}
if (nesting>0 && i!=nesting) //we need to skip '}' we've just seen.
if (!ReadCharFromFile(&ch))