Re: BUG #8970: ts_parse incorrectly split numbers in digit token

Поиск
Список
Период
Сортировка
От Tom Lane
Тема Re: BUG #8970: ts_parse incorrectly split numbers in digit token
Дата
Msg-id 15800.1391293631@sss.pgh.pa.us
обсуждение исходный текст
Ответ на Re: BUG #8970: ts_parse incorrectly split numbers in digit token  (Marco Atzeri <marco.atzeri@gmail.com>)
Ответы Re: BUG #8970: ts_parse incorrectly split numbers in digit token  (Marco Atzeri <marco.atzeri@gmail.com>)
Список pgsql-bugs
Marco Atzeri <marco.atzeri@gmail.com> writes:
> Eventually it was assumed that size of a wide char is always 32 bit ?

Hm.  It looks like there's an entirely unnecessary assumption that wchar_t
and wint_t are the same width.  Does the attached patch make things better
for you?

            regards, tom lane

diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c
index fa73dff..6728212 100644
*** a/src/backend/tsearch/wparser_def.c
--- b/src/backend/tsearch/wparser_def.c
*************** TParserCopyClose(TParser *prs)
*** 432,438 ****
   *      or give wrong result.
   *    - multibyte encoding and C-locale often are used for
   *      Asian languages.
!  *    - if locale is C the we use pgwstr instead of wstr
   */

  #ifdef USE_WIDE_UPPER_LOWER
--- 432,438 ----
   *      or give wrong result.
   *    - multibyte encoding and C-locale often are used for
   *      Asian languages.
!  *    - if locale is C then we use pgwstr instead of wstr.
   */

  #ifdef USE_WIDE_UPPER_LOWER
*************** p_is##type(TParser *prs) {                                                    \
*** 444,452 ****
      if ( prs->usewide )                                                        \
      {                                                                        \
          if ( prs->pgwstr )                                                    \
!             return is##type( 0xff & *( prs->pgwstr + prs->state->poschar) );\
!                                                                             \
!         return isw##type( *(wint_t*)( prs->wstr + prs->state->poschar ) );    \
      }                                                                        \
                                                                              \
      return is##type( *(unsigned char*)( prs->str + prs->state->posbyte ) ); \
--- 444,456 ----
      if ( prs->usewide )                                                        \
      {                                                                        \
          if ( prs->pgwstr )                                                    \
!         {                                                                    \
!             unsigned int c = *(prs->pgwstr + prs->state->poschar);            \
!             if ( c > 0x7f )                                                    \
!                 return 0;                                                    \
!             return is##type( c );                                            \
!         }                                                                    \
!         return isw##type( *( prs->wstr + prs->state->poschar ) );            \
      }                                                                        \
                                                                              \
      return is##type( *(unsigned char*)( prs->str + prs->state->posbyte ) ); \
*************** p_isalnum(TParser *prs)
*** 475,484 ****
              if (c > 0x7f)
                  return 1;

!             return isalnum(0xff & c);
          }

!         return iswalnum((wint_t) *(prs->wstr + prs->state->poschar));
      }

      return isalnum(*(unsigned char *) (prs->str + prs->state->posbyte));
--- 479,488 ----
              if (c > 0x7f)
                  return 1;

!             return isalnum(c);
          }

!         return iswalnum(*(prs->wstr + prs->state->poschar));
      }

      return isalnum(*(unsigned char *) (prs->str + prs->state->posbyte));
*************** p_isalpha(TParser *prs)
*** 507,516 ****
              if (c > 0x7f)
                  return 1;

!             return isalpha(0xff & c);
          }

!         return iswalpha((wint_t) *(prs->wstr + prs->state->poschar));
      }

      return isalpha(*(unsigned char *) (prs->str + prs->state->posbyte));
--- 511,520 ----
              if (c > 0x7f)
                  return 1;

!             return isalpha(c);
          }

!         return iswalpha(*(prs->wstr + prs->state->poschar));
      }

      return isalpha(*(unsigned char *) (prs->str + prs->state->posbyte));

В списке pgsql-bugs по дате отправления:

Предыдущее
От: Marco Atzeri
Дата:
Сообщение: Re: BUG #8970: ts_parse incorrectly split numbers in digit token
Следующее
От: Marco Atzeri
Дата:
Сообщение: Re: BUG #8970: ts_parse incorrectly split numbers in digit token