I have rewritten the code like
existing_words_array := ARRAY( select word
from WORD_COUNTS
where word = ANY
( array_of_words ) );
not_existing_words_array := ARRAY( select distinct_word
from ( select distinct
(array_of_words)[s.index] as distinct_word
from
generate_series(1, array_upper( array_of_words, 1 ) ) as s(index)
) as distinct_words
where distinct_word <> ALL
( existing_words_array ) );
-- insert the missing words
if not_existing_words_array is not null then
insert into WORD_COUNTS
( word, count )
( select word, 1
from ( select
not_existing_words_array[s.index] as word
from generate_series( 1,
array_upper( not_existing_words_array, 1 ) ) as s(index) ) as
distinct_words
);
end if;
-- update the counts
if existing_words_array is not null then
update WORD_COUNTS
set count = COALESCE( count, 0 ) + 1
where sw_word = ANY ( existing_words_array );
end if;
Now it processes a million records in 14 seconds... so it was probably
the problem of looking up NOT IN WORD_COUNTS was way too expencive