Re: Request for review: tsearch2 patch

Поиск
Список
Период
Сортировка
От Tatsuo Ishii
Тема Re: Request for review: tsearch2 patch
Дата
Msg-id 20070104.212538.68066907.t-ishii@sraoss.co.jp
обсуждение исходный текст
Ответ на Request for review: tsearch2 patch  (Tatsuo Ishii <ishii@postgresql.org>)
Ответы Re: Request for review: tsearch2 patch  (Teodor Sigaev <teodor@sigaev.ru>)
Re: Request for review: tsearch2 patch  (Teodor Sigaev <teodor@sigaev.ru>)
Список pgsql-hackers
I have tested with local-enabled environment and found a bug. Included
is the new version of patches. 

Teodor, Oleg, what do you think about these patches?
If ok, shall I commit to CVS head?
--
Tatsuo Ishii
SRA OSS, Inc. Japan

> Hi,
> 
> Here are patches against tsearch2 with CVS head.  Currently tsearch2
> does not work with multibyte encoding which uses C locale. These
> patches are intended to solve the problem by using PostgreSQL in-house
> multibyte function instead of mbstowcs which does not work with C
> locale. Also iswalpha etc. will not be called in case of C locale
> since they are not working with it. Tested with the EUC_JP encoding
> (should be working with any multibye encodings). Existing single byte
> encodings should not be broken by the patches, I did not test though.
> --
> Tatsuo Ishii
> SRA OSS, Inc. Japan
Index: ts_locale.c
===================================================================
RCS file: /cvsroot/pgsql/contrib/tsearch2/ts_locale.c,v
retrieving revision 1.7
diff -c -r1.7 ts_locale.c
*** ts_locale.c    20 Nov 2006 14:03:30 -0000    1.7
--- ts_locale.c    4 Jan 2007 12:16:00 -0000
***************
*** 63,68 ****
--- 63,101 ----      return mbstowcs(to, from, len); }
+ 
+ #else    /* WIN32 */
+ 
+ size_t
+ char2wchar(wchar_t *to, const char *from, size_t len)
+ {
+     wchar_t *result;
+     size_t n;
+ 
+     if (to == NULL)
+         return 0;
+ 
+     if (lc_ctype_is_c())
+     {
+         /* allocate neccesary memory for "to" including NULL terminate */
+         result = (wchar_t *)palloc((len+1)*sizeof(wchar_t));
+ 
+         /* do the conversion */
+         n = (size_t)pg_mb2wchar_with_len(from, (pg_wchar *)result, len);
+         if (n > 0)
+         {
+             /* store the result */
+             if (n > len)
+                 n = len;
+             memcpy(to, result, n*sizeof(wchar_t));
+             pfree(result);
+             *(to + n) = '\0';
+         }
+         return n;
+     }
+     return mbstowcs(to, from, len);
+ }
+  #endif   /* WIN32 */  int
***************
*** 70,75 ****
--- 103,113 ---- {     wchar_t        character; 
+     if (lc_ctype_is_c())
+     {
+         return isalpha(TOUCHAR(ptr));
+     }
+      char2wchar(&character, ptr, 1);      return iswalpha((wint_t) character);
***************
*** 80,85 ****
--- 118,128 ---- {     wchar_t        character; 
+     if (lc_ctype_is_c())
+     {
+         return isprint(TOUCHAR(ptr));
+     }
+      char2wchar(&character, ptr, 1);      return iswprint((wint_t) character);
***************
*** 126,132 ****         if ( wlen < 0 )             ereport(ERROR,
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
!                      errmsg("transalation failed from server encoding to wchar_t")));          Assert(wlen<=len);
   wstr[wlen] = 0;
 
--- 169,175 ----         if ( wlen < 0 )             ereport(ERROR,
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
!                      errmsg("translation failed from server encoding to wchar_t")));          Assert(wlen<=len);
  wstr[wlen] = 0;
 
***************
*** 152,158 ****         if ( wlen < 0 )             ereport(ERROR,
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
!                      errmsg("transalation failed from wchar_t to server encoding %d", errno)));
Assert(wlen<=len);        out[wlen]='\0';     }
 
--- 195,201 ----         if ( wlen < 0 )             ereport(ERROR,
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
!                      errmsg("translation failed from wchar_t to server encoding %d", errno)));
Assert(wlen<=len);        out[wlen]='\0';     }
 
Index: ts_locale.h
===================================================================
RCS file: /cvsroot/pgsql/contrib/tsearch2/ts_locale.h,v
retrieving revision 1.7
diff -c -r1.7 ts_locale.h
*** ts_locale.h    4 Oct 2006 00:29:47 -0000    1.7
--- ts_locale.h    4 Jan 2007 12:16:00 -0000
***************
*** 38,45 **** #else                            /* WIN32 */  /* correct mbstowcs */
- #define char2wchar mbstowcs #define wchar2char wcstombs #endif   /* WIN32 */  #define t_isdigit(x)    (
pg_mblen(x)==1&& isdigit( TOUCHAR(x) ) )
 
--- 38,46 ---- #else                            /* WIN32 */  /* correct mbstowcs */ #define wchar2char wcstombs
+ size_t        char2wchar(wchar_t *to, const char *from, size_t len);
+  #endif   /* WIN32 */  #define t_isdigit(x)    ( pg_mblen(x)==1 && isdigit( TOUCHAR(x) ) )
***************
*** 54,59 ****
--- 55,61 ----  * t_iseq() should be called only for ASCII symbols  */ #define t_iseq(x,c) ( (pg_mblen(x)==1) ? (
TOUCHAR(x)== ((unsigned char)(c)) ) : false )
 
+ /*#define t_iseq(x,c) ( TOUCHAR(x) == ((unsigned char)(c)))*/  #define COPYCHAR(d,s)    do {                \     int
lll= pg_mblen( s );            \
 
Index: wordparser/parser.c
===================================================================
RCS file: /cvsroot/pgsql/contrib/tsearch2/wordparser/parser.c,v
retrieving revision 1.11
diff -c -r1.11 parser.c
*** wordparser/parser.c    4 Oct 2006 00:29:47 -0000    1.11
--- wordparser/parser.c    4 Jan 2007 12:16:01 -0000
***************
*** 44,52 ****      * Some operating systems fail with multi-byte encodings and a C locale.      * Also, for a C locale
thereis no need to process as multibyte. From      * backend/utils/adt/oracle_compat.c Teodor      */ 
 
!     if (prs->charmaxlen > 1 && !lc_ctype_is_c())     {         prs->usewide = true;         prs->wstr = (wchar_t *)
palloc(sizeof(wchar_t)* prs->lenstr);
 
--- 44,54 ----      * Some operating systems fail with multi-byte encodings and a C locale.      * Also, for a C locale
thereis no need to process as multibyte. From      * backend/utils/adt/oracle_compat.c Teodor
 
+      *
+      * This is wrong assumption. even if locale is C, multibyte is necceary.      */ 
!     if (prs->charmaxlen > 1)     {         prs->usewide = true;         prs->wstr = (wchar_t *)
palloc(sizeof(wchar_t)* prs->lenstr);
 
***************
*** 92,98 **** static int                                            \ p_is##type(TParser *prs) {
            \     Assert( prs->state );                                    \
 
!     return ( ( prs->usewide ) ? isw##type( (wint_t)*( prs->wstr + prs->state->poschar ) ) : \         is##type(
(unsignedchar)*( prs->str + prs->state->posbyte ) ) );        \ }    \
\
--- 94,102 ---- static int                                            \ p_is##type(TParser *prs) {
             \     Assert( prs->state );                                    \
 
!     return ( ( prs->usewide ) ? \
!              (lc_ctype_is_c()? is##type( 0xff & *( prs->wstr + prs->state->poschar)): \
!               isw##type( (wint_t)*( prs->wstr + prs->state->poschar))): \         is##type( (unsigned char)*(
prs->str+ prs->state->posbyte ) ) );        \ }    \                                                 \
 
***************
*** 134,141 **** } #endif   /* TS_USE_WIDE */ 
! p_iswhat(alnum)
! p_iswhat(alpha) p_iswhat(digit) p_iswhat(lower) p_iswhat(print)
--- 138,197 ---- } #endif   /* TS_USE_WIDE */ 
! static int p_isalnum(TParser *prs) {
!     Assert( prs->state );
! 
!     if (prs->usewide)
!     {
!         unsigned int c;
! 
!         c = *(prs->wstr + prs->state->poschar);
! 
!         if (lc_ctype_is_c())
!         {
!             if (c > 0x7f)
!                 return 1;
!             return isalnum(0xff & c);
!         }
!         else
!             return iswalnum( (wint_t)*( prs->wstr + prs->state->poschar));
!     }
!     else
!         return isalnum( (unsigned char)*( prs->str + prs->state->posbyte ));
! }
! 
! static int    p_isnotalnum(TParser *prs)
! {
!     return !p_isalnum(prs);
! }
! 
! static int p_isalpha(TParser *prs) {
!     Assert( prs->state );
! 
!     if (prs->usewide)
!     {
!         unsigned int c;
! 
!         c = *(prs->wstr + prs->state->poschar);
! 
!         if (lc_ctype_is_c())
!         {
!             if (c > 0x7f)
!                 return 1;
!             return isalpha(0xff & c);
!         }
!         else
!             return iswalpha( (wint_t)*( prs->wstr + prs->state->poschar));
!     }
!     else
!         return isalpha( (unsigned char)*( prs->str + prs->state->posbyte ));
! }
! 
! static int    p_isnotalpha(TParser *prs)
! {
!     return !p_isalpha(prs);
! }
!  p_iswhat(digit) p_iswhat(lower) p_iswhat(print)

В списке pgsql-hackers по дате отправления:

Предыдущее
От: "Simon Riggs"
Дата:
Сообщение: Re: 8.3 pending patch queue
Следующее
От: "Simon Riggs"
Дата:
Сообщение: Tabs or Spaces