Re: Request for review: tsearch2 patch

Поиск
Список
Период
Сортировка
От Teodor Sigaev
Тема Re: Request for review: tsearch2 patch
Дата
Msg-id 45A4BF77.6020604@sigaev.ru
обсуждение исходный текст
Ответ на Re: Request for review: tsearch2 patch  (Tatsuo Ishii <ishii@postgresql.org>)
Ответы Re: Request for review: tsearch2 patch  (Tatsuo Ishii <ishii@sraoss.co.jp>)
Список pgsql-hackers
Sorry for delay, I was on holidays :)

Did you test patch on Windows platform?

Tatsuo Ishii wrote:
> I have tested with local-enabled environment and found a bug. Included
> is the new version of patches. 
> 
> Teodor, Oleg, what do you think about these patches?
> If ok, shall I commit to CVS head?
> --
> Tatsuo Ishii
> SRA OSS, Inc. Japan
> 
>> Hi,
>>
>> Here are patches against tsearch2 with CVS head.  Currently tsearch2
>> does not work with multibyte encoding which uses C locale. These
>> patches are intended to solve the problem by using PostgreSQL in-house
>> multibyte function instead of mbstowcs which does not work with C
>> locale. Also iswalpha etc. will not be called in case of C locale
>> since they are not working with it. Tested with the EUC_JP encoding
>> (should be working with any multibye encodings). Existing single byte
>> encodings should not be broken by the patches, I did not test though.
>> --
>> Tatsuo Ishii
>> SRA OSS, Inc. Japan
>>
>> ------------------------------------------------------------------------
>>
>> Index: ts_locale.c
>> ===================================================================
>> RCS file: /cvsroot/pgsql/contrib/tsearch2/ts_locale.c,v
>> retrieving revision 1.7
>> diff -c -r1.7 ts_locale.c
>> *** ts_locale.c    20 Nov 2006 14:03:30 -0000    1.7
>> --- ts_locale.c    4 Jan 2007 12:16:00 -0000
>> ***************
>> *** 63,68 ****
>> --- 63,101 ----
>>   
>>       return mbstowcs(to, from, len);
>>   }
>> + 
>> + #else    /* WIN32 */
>> + 
>> + size_t
>> + char2wchar(wchar_t *to, const char *from, size_t len)
>> + {
>> +     wchar_t *result;
>> +     size_t n;
>> + 
>> +     if (to == NULL)
>> +         return 0;
>> + 
>> +     if (lc_ctype_is_c())
>> +     {
>> +         /* allocate neccesary memory for "to" including NULL terminate */
>> +         result = (wchar_t *)palloc((len+1)*sizeof(wchar_t));
>> + 
>> +         /* do the conversion */
>> +         n = (size_t)pg_mb2wchar_with_len(from, (pg_wchar *)result, len);
>> +         if (n > 0)
>> +         {
>> +             /* store the result */
>> +             if (n > len)
>> +                 n = len;
>> +             memcpy(to, result, n*sizeof(wchar_t));
>> +             pfree(result);
>> +             *(to + n) = '\0';
>> +         }
>> +         return n;
>> +     }
>> +     return mbstowcs(to, from, len);
>> + }
>> + 
>>   #endif   /* WIN32 */
>>   
>>   int
>> ***************
>> *** 70,75 ****
>> --- 103,113 ----
>>   {
>>       wchar_t        character;
>>   
>> +     if (lc_ctype_is_c())
>> +     {
>> +         return isalpha(TOUCHAR(ptr));
>> +     }
>> + 
>>       char2wchar(&character, ptr, 1);
>>   
>>       return iswalpha((wint_t) character);
>> ***************
>> *** 80,85 ****
>> --- 118,128 ----
>>   {
>>       wchar_t        character;
>>   
>> +     if (lc_ctype_is_c())
>> +     {
>> +         return isprint(TOUCHAR(ptr));
>> +     }
>> + 
>>       char2wchar(&character, ptr, 1);
>>   
>>       return iswprint((wint_t) character);
>> ***************
>> *** 126,132 ****
>>           if ( wlen < 0 )
>>               ereport(ERROR,
>>                       (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
>> !                      errmsg("transalation failed from server encoding to wchar_t")));
>>   
>>           Assert(wlen<=len);
>>           wstr[wlen] = 0;
>> --- 169,175 ----
>>           if ( wlen < 0 )
>>               ereport(ERROR,
>>                       (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
>> !                      errmsg("translation failed from server encoding to wchar_t")));
>>   
>>           Assert(wlen<=len);
>>           wstr[wlen] = 0;
>> ***************
>> *** 152,158 ****
>>           if ( wlen < 0 )
>>               ereport(ERROR,
>>                       (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
>> !                      errmsg("transalation failed from wchar_t to server encoding %d", errno)));
>>           Assert(wlen<=len);
>>           out[wlen]='\0';
>>       }
>> --- 195,201 ----
>>           if ( wlen < 0 )
>>               ereport(ERROR,
>>                       (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
>> !                      errmsg("translation failed from wchar_t to server encoding %d", errno)));
>>           Assert(wlen<=len);
>>           out[wlen]='\0';
>>       }
>> Index: ts_locale.h
>> ===================================================================
>> RCS file: /cvsroot/pgsql/contrib/tsearch2/ts_locale.h,v
>> retrieving revision 1.7
>> diff -c -r1.7 ts_locale.h
>> *** ts_locale.h    4 Oct 2006 00:29:47 -0000    1.7
>> --- ts_locale.h    4 Jan 2007 12:16:00 -0000
>> ***************
>> *** 38,45 ****
>>   #else                            /* WIN32 */
>>   
>>   /* correct mbstowcs */
>> - #define char2wchar mbstowcs
>>   #define wchar2char wcstombs
>>   #endif   /* WIN32 */
>>   
>>   #define t_isdigit(x)    ( pg_mblen(x)==1 && isdigit( TOUCHAR(x) ) )
>> --- 38,46 ----
>>   #else                            /* WIN32 */
>>   
>>   /* correct mbstowcs */
>>   #define wchar2char wcstombs
>> + size_t        char2wchar(wchar_t *to, const char *from, size_t len);
>> + 
>>   #endif   /* WIN32 */
>>   
>>   #define t_isdigit(x)    ( pg_mblen(x)==1 && isdigit( TOUCHAR(x) ) )
>> ***************
>> *** 54,59 ****
>> --- 55,61 ----
>>    * t_iseq() should be called only for ASCII symbols
>>    */
>>   #define t_iseq(x,c) ( (pg_mblen(x)==1) ? ( TOUCHAR(x) == ((unsigned char)(c)) ) : false )
>> + /*#define t_iseq(x,c) ( TOUCHAR(x) == ((unsigned char)(c)))*/
>>   
>>   #define COPYCHAR(d,s)    do {                \
>>       int lll = pg_mblen( s );            \
>> Index: wordparser/parser.c
>> ===================================================================
>> RCS file: /cvsroot/pgsql/contrib/tsearch2/wordparser/parser.c,v
>> retrieving revision 1.11
>> diff -c -r1.11 parser.c
>> *** wordparser/parser.c    4 Oct 2006 00:29:47 -0000    1.11
>> --- wordparser/parser.c    4 Jan 2007 12:16:01 -0000
>> ***************
>> *** 44,52 ****
>>        * Some operating systems fail with multi-byte encodings and a C locale.
>>        * Also, for a C locale there is no need to process as multibyte. From
>>        * backend/utils/adt/oracle_compat.c Teodor
>>        */
>>   
>> !     if (prs->charmaxlen > 1 && !lc_ctype_is_c())
>>       {
>>           prs->usewide = true;
>>           prs->wstr = (wchar_t *) palloc(sizeof(wchar_t) * prs->lenstr);
>> --- 44,54 ----
>>        * Some operating systems fail with multi-byte encodings and a C locale.
>>        * Also, for a C locale there is no need to process as multibyte. From
>>        * backend/utils/adt/oracle_compat.c Teodor
>> +      *
>> +      * This is wrong assumption. even if locale is C, multibyte is necceary.
>>        */
>>   
>> !     if (prs->charmaxlen > 1)
>>       {
>>           prs->usewide = true;
>>           prs->wstr = (wchar_t *) palloc(sizeof(wchar_t) * prs->lenstr);
>> ***************
>> *** 92,98 ****
>>   static int                                            \
>>   p_is##type(TParser *prs) {                                    \
>>       Assert( prs->state );                                    \
>> !     return ( ( prs->usewide ) ? isw##type( (wint_t)*( prs->wstr + prs->state->poschar ) ) : \
>>           is##type( (unsigned char)*( prs->str + prs->state->posbyte ) ) );        \
>>   }    \
>>                                                   \
>> --- 94,102 ----
>>   static int                                            \
>>   p_is##type(TParser *prs) {                                    \
>>       Assert( prs->state );                                    \
>> !     return ( ( prs->usewide ) ? \
>> !              (lc_ctype_is_c()? is##type( 0xff & *( prs->wstr + prs->state->poschar)): \
>> !               isw##type( (wint_t)*( prs->wstr + prs->state->poschar))): \
>>           is##type( (unsigned char)*( prs->str + prs->state->posbyte ) ) );        \
>>   }    \
>>                                                   \
>> ***************
>> *** 134,141 ****
>>   }
>>   #endif   /* TS_USE_WIDE */
>>   
>> ! p_iswhat(alnum)
>> ! p_iswhat(alpha)
>>   p_iswhat(digit)
>>   p_iswhat(lower)
>>   p_iswhat(print)
>> --- 138,197 ----
>>   }
>>   #endif   /* TS_USE_WIDE */
>>   
>> ! static int p_isalnum(TParser *prs) {
>> !     Assert( prs->state );
>> ! 
>> !     if (prs->usewide)
>> !     {
>> !         unsigned int c;
>> ! 
>> !         c = *(prs->wstr + prs->state->poschar);
>> ! 
>> !         if (lc_ctype_is_c())
>> !         {
>> !             if (c > 0x7f)
>> !                 return 1;
>> !             return isalnum(0xff & c);
>> !         }
>> !         else
>> !             return iswalnum( (wint_t)*( prs->wstr + prs->state->poschar));
>> !     }
>> !     else
>> !         return isalnum( (unsigned char)*( prs->str + prs->state->posbyte ));
>> ! }
>> ! 
>> ! static int    p_isnotalnum(TParser *prs)
>> ! {
>> !     return !p_isalnum(prs);
>> ! }
>> !
>> ! static int p_isalpha(TParser *prs) {
>> !     Assert( prs->state );
>> ! 
>> !     if (prs->usewide)
>> !     {
>> !         unsigned int c;
>> ! 
>> !         c = *(prs->wstr + prs->state->poschar);
>> ! 
>> !         if (lc_ctype_is_c())
>> !         {
>> !             if (c > 0x7f)
>> !                 return 1;
>> !             return isalpha(0xff & c);
>> !         }
>> !         else
>> !             return iswalpha( (wint_t)*( prs->wstr + prs->state->poschar));
>> !     }
>> !     else
>> !         return isalpha( (unsigned char)*( prs->str + prs->state->posbyte ));
>> ! }
>> ! 
>> ! static int    p_isnotalpha(TParser *prs)
>> ! {
>> !     return !p_isalpha(prs);
>> ! }
>> ! 
>>   p_iswhat(digit)
>>   p_iswhat(lower)
>>   p_iswhat(print)
>>
>> ------------------------------------------------------------------------
>>
>>
>> ---------------------------(end of broadcast)---------------------------
>> TIP 9: In versions below 8.0, the planner will ignore your desire to
>>        choose an index scan if your joining column's datatypes do not
>>        match

-- 
Teodor Sigaev                                   E-mail: teodor@sigaev.ru
  WWW: http://www.sigaev.ru/
 


В списке pgsql-hackers по дате отправления:

Предыдущее
От: Benny Amorsen
Дата:
Сообщение: Re: Patch to log usage of temporary files
Следующее
От: Teodor Sigaev
Дата:
Сообщение: Re: [PATCHES] Bundle of patches