Re: Request for review: tsearch2 patch

Поиск
Список
Период
Сортировка
От Tatsuo Ishii
Тема Re: Request for review: tsearch2 patch
Дата
Msg-id 20070110.195518.85401170.t-ishii@sraoss.co.jp
обсуждение исходный текст
Ответ на Re: Request for review: tsearch2 patch  (Teodor Sigaev <teodor@sigaev.ru>)
Список pgsql-hackers
> Sorry for delay, I was on holidays :)
> 
> Did you test patch on Windows platform?

No. I myself does not use Windows platform.

Do you have any concern on Windows regarding my patches?
--
Tatsuo Ishii
SRA OSS, Inc. Japan

> Tatsuo Ishii wrote:
> > I have tested with local-enabled environment and found a bug. Included
> > is the new version of patches. 
> > 
> > Teodor, Oleg, what do you think about these patches?
> > If ok, shall I commit to CVS head?
> > --
> > Tatsuo Ishii
> > SRA OSS, Inc. Japan
> > 
> >> Hi,
> >>
> >> Here are patches against tsearch2 with CVS head.  Currently tsearch2
> >> does not work with multibyte encoding which uses C locale. These
> >> patches are intended to solve the problem by using PostgreSQL in-house
> >> multibyte function instead of mbstowcs which does not work with C
> >> locale. Also iswalpha etc. will not be called in case of C locale
> >> since they are not working with it. Tested with the EUC_JP encoding
> >> (should be working with any multibye encodings). Existing single byte
> >> encodings should not be broken by the patches, I did not test though.
> >> --
> >> Tatsuo Ishii
> >> SRA OSS, Inc. Japan
> >>
> >> ------------------------------------------------------------------------
> >>
> >> Index: ts_locale.c
> >> ===================================================================
> >> RCS file: /cvsroot/pgsql/contrib/tsearch2/ts_locale.c,v
> >> retrieving revision 1.7
> >> diff -c -r1.7 ts_locale.c
> >> *** ts_locale.c    20 Nov 2006 14:03:30 -0000    1.7
> >> --- ts_locale.c    4 Jan 2007 12:16:00 -0000
> >> ***************
> >> *** 63,68 ****
> >> --- 63,101 ----
> >>   
> >>       return mbstowcs(to, from, len);
> >>   }
> >> + 
> >> + #else    /* WIN32 */
> >> + 
> >> + size_t
> >> + char2wchar(wchar_t *to, const char *from, size_t len)
> >> + {
> >> +     wchar_t *result;
> >> +     size_t n;
> >> + 
> >> +     if (to == NULL)
> >> +         return 0;
> >> + 
> >> +     if (lc_ctype_is_c())
> >> +     {
> >> +         /* allocate neccesary memory for "to" including NULL terminate */
> >> +         result = (wchar_t *)palloc((len+1)*sizeof(wchar_t));
> >> + 
> >> +         /* do the conversion */
> >> +         n = (size_t)pg_mb2wchar_with_len(from, (pg_wchar *)result, len);
> >> +         if (n > 0)
> >> +         {
> >> +             /* store the result */
> >> +             if (n > len)
> >> +                 n = len;
> >> +             memcpy(to, result, n*sizeof(wchar_t));
> >> +             pfree(result);
> >> +             *(to + n) = '\0';
> >> +         }
> >> +         return n;
> >> +     }
> >> +     return mbstowcs(to, from, len);
> >> + }
> >> + 
> >>   #endif   /* WIN32 */
> >>   
> >>   int
> >> ***************
> >> *** 70,75 ****
> >> --- 103,113 ----
> >>   {
> >>       wchar_t        character;
> >>   
> >> +     if (lc_ctype_is_c())
> >> +     {
> >> +         return isalpha(TOUCHAR(ptr));
> >> +     }
> >> + 
> >>       char2wchar(&character, ptr, 1);
> >>   
> >>       return iswalpha((wint_t) character);
> >> ***************
> >> *** 80,85 ****
> >> --- 118,128 ----
> >>   {
> >>       wchar_t        character;
> >>   
> >> +     if (lc_ctype_is_c())
> >> +     {
> >> +         return isprint(TOUCHAR(ptr));
> >> +     }
> >> + 
> >>       char2wchar(&character, ptr, 1);
> >>   
> >>       return iswprint((wint_t) character);
> >> ***************
> >> *** 126,132 ****
> >>           if ( wlen < 0 )
> >>               ereport(ERROR,
> >>                       (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
> >> !                      errmsg("transalation failed from server encoding to wchar_t")));
> >>   
> >>           Assert(wlen<=len);
> >>           wstr[wlen] = 0;
> >> --- 169,175 ----
> >>           if ( wlen < 0 )
> >>               ereport(ERROR,
> >>                       (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
> >> !                      errmsg("translation failed from server encoding to wchar_t")));
> >>   
> >>           Assert(wlen<=len);
> >>           wstr[wlen] = 0;
> >> ***************
> >> *** 152,158 ****
> >>           if ( wlen < 0 )
> >>               ereport(ERROR,
> >>                       (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
> >> !                      errmsg("transalation failed from wchar_t to server encoding %d", errno)));
> >>           Assert(wlen<=len);
> >>           out[wlen]='\0';
> >>       }
> >> --- 195,201 ----
> >>           if ( wlen < 0 )
> >>               ereport(ERROR,
> >>                       (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
> >> !                      errmsg("translation failed from wchar_t to server encoding %d", errno)));
> >>           Assert(wlen<=len);
> >>           out[wlen]='\0';
> >>       }
> >> Index: ts_locale.h
> >> ===================================================================
> >> RCS file: /cvsroot/pgsql/contrib/tsearch2/ts_locale.h,v
> >> retrieving revision 1.7
> >> diff -c -r1.7 ts_locale.h
> >> *** ts_locale.h    4 Oct 2006 00:29:47 -0000    1.7
> >> --- ts_locale.h    4 Jan 2007 12:16:00 -0000
> >> ***************
> >> *** 38,45 ****
> >>   #else                            /* WIN32 */
> >>   
> >>   /* correct mbstowcs */
> >> - #define char2wchar mbstowcs
> >>   #define wchar2char wcstombs
> >>   #endif   /* WIN32 */
> >>   
> >>   #define t_isdigit(x)    ( pg_mblen(x)==1 && isdigit( TOUCHAR(x) ) )
> >> --- 38,46 ----
> >>   #else                            /* WIN32 */
> >>   
> >>   /* correct mbstowcs */
> >>   #define wchar2char wcstombs
> >> + size_t        char2wchar(wchar_t *to, const char *from, size_t len);
> >> + 
> >>   #endif   /* WIN32 */
> >>   
> >>   #define t_isdigit(x)    ( pg_mblen(x)==1 && isdigit( TOUCHAR(x) ) )
> >> ***************
> >> *** 54,59 ****
> >> --- 55,61 ----
> >>    * t_iseq() should be called only for ASCII symbols
> >>    */
> >>   #define t_iseq(x,c) ( (pg_mblen(x)==1) ? ( TOUCHAR(x) == ((unsigned char)(c)) ) : false )
> >> + /*#define t_iseq(x,c) ( TOUCHAR(x) == ((unsigned char)(c)))*/
> >>   
> >>   #define COPYCHAR(d,s)    do {                \
> >>       int lll = pg_mblen( s );            \
> >> Index: wordparser/parser.c
> >> ===================================================================
> >> RCS file: /cvsroot/pgsql/contrib/tsearch2/wordparser/parser.c,v
> >> retrieving revision 1.11
> >> diff -c -r1.11 parser.c
> >> *** wordparser/parser.c    4 Oct 2006 00:29:47 -0000    1.11
> >> --- wordparser/parser.c    4 Jan 2007 12:16:01 -0000
> >> ***************
> >> *** 44,52 ****
> >>        * Some operating systems fail with multi-byte encodings and a C locale.
> >>        * Also, for a C locale there is no need to process as multibyte. From
> >>        * backend/utils/adt/oracle_compat.c Teodor
> >>        */
> >>   
> >> !     if (prs->charmaxlen > 1 && !lc_ctype_is_c())
> >>       {
> >>           prs->usewide = true;
> >>           prs->wstr = (wchar_t *) palloc(sizeof(wchar_t) * prs->lenstr);
> >> --- 44,54 ----
> >>        * Some operating systems fail with multi-byte encodings and a C locale.
> >>        * Also, for a C locale there is no need to process as multibyte. From
> >>        * backend/utils/adt/oracle_compat.c Teodor
> >> +      *
> >> +      * This is wrong assumption. even if locale is C, multibyte is necceary.
> >>        */
> >>   
> >> !     if (prs->charmaxlen > 1)
> >>       {
> >>           prs->usewide = true;
> >>           prs->wstr = (wchar_t *) palloc(sizeof(wchar_t) * prs->lenstr);
> >> ***************
> >> *** 92,98 ****
> >>   static int                                            \
> >>   p_is##type(TParser *prs) {                                    \
> >>       Assert( prs->state );                                    \
> >> !     return ( ( prs->usewide ) ? isw##type( (wint_t)*( prs->wstr + prs->state->poschar ) ) : \
> >>           is##type( (unsigned char)*( prs->str + prs->state->posbyte ) ) );        \
> >>   }    \
> >>                                                   \
> >> --- 94,102 ----
> >>   static int                                            \
> >>   p_is##type(TParser *prs) {                                    \
> >>       Assert( prs->state );                                    \
> >> !     return ( ( prs->usewide ) ? \
> >> !              (lc_ctype_is_c()? is##type( 0xff & *( prs->wstr + prs->state->poschar)): \
> >> !               isw##type( (wint_t)*( prs->wstr + prs->state->poschar))): \
> >>           is##type( (unsigned char)*( prs->str + prs->state->posbyte ) ) );        \
> >>   }    \
> >>                                                   \
> >> ***************
> >> *** 134,141 ****
> >>   }
> >>   #endif   /* TS_USE_WIDE */
> >>   
> >> ! p_iswhat(alnum)
> >> ! p_iswhat(alpha)
> >>   p_iswhat(digit)
> >>   p_iswhat(lower)
> >>   p_iswhat(print)
> >> --- 138,197 ----
> >>   }
> >>   #endif   /* TS_USE_WIDE */
> >>
> >> ! static int p_isalnum(TParser *prs) {
> >> !     Assert( prs->state );
> >> ! 
> >> !     if (prs->usewide)
> >> !     {
> >> !         unsigned int c;
> >> ! 
> >> !         c = *(prs->wstr + prs->state->poschar);
> >> ! 
> >> !         if (lc_ctype_is_c())
> >> !         {
> >> !             if (c > 0x7f)
> >> !                 return 1;
> >> !             return isalnum(0xff & c);
> >> !         }
> >> !         else
> >> !             return iswalnum( (wint_t)*( prs->wstr + prs->state->poschar));
> >> !     }
> >> !     else
> >> !         return isalnum( (unsigned char)*( prs->str + prs->state->posbyte ));
> >> ! }
> >> ! 
> >> ! static int    p_isnotalnum(TParser *prs)
> >> ! {
> >> !     return !p_isalnum(prs);
> >> ! }
> >> ! 
> >> ! static int p_isalpha(TParser *prs) {
> >> !     Assert( prs->state );
> >> ! 
> >> !     if (prs->usewide)
> >> !     {
> >> !         unsigned int c;
> >> ! 
> >> !         c = *(prs->wstr + prs->state->poschar);
> >> ! 
> >> !         if (lc_ctype_is_c())
> >> !         {
> >> !             if (c > 0x7f)
> >> !                 return 1;
> >> !             return isalpha(0xff & c);
> >> !         }
> >> !         else
> >> !             return iswalpha( (wint_t)*( prs->wstr + prs->state->poschar));
> >> !     }
> >> !     else
> >> !         return isalpha( (unsigned char)*( prs->str + prs->state->posbyte ));
> >> ! }
> >> ! 
> >> ! static int    p_isnotalpha(TParser *prs)
> >> ! {
> >> !     return !p_isalpha(prs);
> >> ! }
> >> ! 
> >>   p_iswhat(digit)
> >>   p_iswhat(lower)
> >>   p_iswhat(print)
> >>
> >> ------------------------------------------------------------------------
> >>
> >>
> >> ---------------------------(end of broadcast)---------------------------
> >> TIP 9: In versions below 8.0, the planner will ignore your desire to
> >>        choose an index scan if your joining column's datatypes do not
> >>        match
> 
> -- 
> Teodor Sigaev                                   E-mail: teodor@sigaev.ru
>                                                     WWW: http://www.sigaev.ru/
> 


В списке pgsql-hackers по дате отправления:

Предыдущее
От: Teodor Sigaev
Дата:
Сообщение: Re: [PATCHES] Bundle of patches
Следующее
От: Tom Lane
Дата:
Сообщение: Re: [COMMITTERS] pgsql: Change Windows rename and unlink substitutes so that they time