Pavel Stehule wrote:
> I used dictionaries from fedora core packages
>
> hunspell-cs-20060303-5.fc7.i386.rpm
>
> then I converted it to utf8 with iconv
Ok, thanks.
Apparently it's a bug I introduced when I refactored spell.c to use the
readline function for reading and recoding the input file. I didn't
notice that some calls to STRNCMP used the non-lowercased version of the
input line. Patch attached.
--
Heikki Linnakangas
EnterpriseDB http://www.enterprisedb.com
Index: src/backend/tsearch/spell.c
===================================================================
RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/backend/tsearch/spell.c,v
retrieving revision 1.2
diff -c -r1.2 spell.c
*** src/backend/tsearch/spell.c 25 Aug 2007 00:03:59 -0000 1.2
--- src/backend/tsearch/spell.c 4 Sep 2007 12:31:55 -0000
***************
*** 733,739 ****
while ((recoded = t_readline(affix)) != NULL)
{
pstr = lowerstr(recoded);
- pfree(recoded);
lineno++;
--- 733,738 ----
***************
*** 813,820 ****
flag = (unsigned char) *s;
goto nextline;
}
! if (STRNCMP(str, "COMPOUNDFLAG") == 0 || STRNCMP(str, "COMPOUNDMIN") == 0 ||
! STRNCMP(str, "PFX") == 0 || STRNCMP(str, "SFX") == 0)
{
if (oldformat)
ereport(ERROR,
--- 812,819 ----
flag = (unsigned char) *s;
goto nextline;
}
! if (STRNCMP(recoded, "COMPOUNDFLAG") == 0 || STRNCMP(recoded, "COMPOUNDMIN") == 0 ||
! STRNCMP(recoded, "PFX") == 0 || STRNCMP(recoded, "SFX") == 0)
{
if (oldformat)
ereport(ERROR,
***************
*** 834,839 ****
--- 833,839 ----
NIAddAffix(Conf, flag, flagflags, mask, find, repl, suffixes ? FF_SUFFIX : FF_PREFIX);
nextline:
+ pfree(recoded);
pfree(pstr);
}
FreeFile(affix);