Обсуждение: Slow update/insert process

Поиск
Список
Период
Сортировка

Slow update/insert process

От
Patrick Hatcher
Дата:

Pg: 7.4.5
RH 7.3
8g Ram
200 g drive space
RAID0+1
Tables vacuum on a nightly basis

The following process below takes 8 hours to run on 90k records and I'm not sure where to being to look for the bottleneck.  This isn't the only updating on this database that seems to take a long time to complete.  Is there something I should be looking for in my conf settings?  

TIA
Patrick


SQL:
---Bring back only selected records to run through the update process.
--Without the function the SQL takes < 10secs to return 90,000 records
SELECT count(pm.pm_delta_function_amazon(upc.keyp_upc,'amazon'))
FROM mdc_upc upc
JOIN public.mdc_products prod ON upc.keyf_products = prod.keyp_products
JOIN public.mdc_price_post_inc price ON prod.keyp_products = price.keyf_product
JOIN public.mdc_attribute_product ap on ap.keyf_products = prod.keyp_products and keyf_attribute=22
WHERE
upper(trim(ap.attributevalue)) NOT IN ('ESTEE LAUDER', 'CLINIQUE','ORGINS','PRESCRIPTIVES','LANC?ME','CHANEL','ARAMIS','M.A.C','TAG HEUER')
AND keyf_producttype<>222
AND prod.action_publish = 1;


Function:

CREATE OR REPLACE FUNCTION pm.pm_delta_function_amazon(int4, "varchar")
 RETURNS bool AS
'DECLARE
   varkeyf_upc                ALIAS FOR $1;
   varPassword                ALIAS FOR $2;
   varRealMD5                varchar;
   varDeltaMD5                varchar;
   varLastTouchDate        date;
   varQuery                 text;
   varQuery1                 text;
   varQueryMD5                text;
   varQueryRecord        record;
   varFuncStatus        boolean := false;
   
BEGIN

-- Check the password
 IF varPassword <> \'amazon\' THEN
   Return false;
 END IF;


--  Get the md5 hash for this product
 SELECT into varQueryRecord md5(upc.keyp_upc || prod.description || pm.pm_price_post_inc(prod.keyp_products)) AS md5
   FROM public.mdc_upc upc
   JOIN public.mdc_products prod ON upc.keyf_products = prod.keyp_products
   JOIN public.mdc_price_post_inc price ON price.keyf_product = prod.keyp_products
   WHERE upc.keyp_upc = varkeyf_upc LIMIT 1 ;
 

 IF NOT FOUND THEN
   RAISE EXCEPTION \'varRealMD5 is NULL. UPC ID is %\', varkeyf_upc;
 ELSE
   varRealMD5:=varQueryRecord.md5;
 END IF;

--  Check that the product is in the delta table and return its hash for comparison
 SELECT into varQueryRecord md5_hash,last_touch_date
   FROM pm.pm_delta_master_amazon
   WHERE keyf_upc = varkeyf_upc LIMIT 1;

 IF NOT FOUND THEN
   -- ADD and exit
   INSERT INTO pm.pm_delta_master_amazon (keyf_upc,status,md5_hash,last_touch_date)
   values (varkeyf_upc,\'add\',varRealMD5,CURRENT_DATE);
   varFuncStatus:=true;
   RETURN varFuncStatus;
 ELSE
   --Update the record
     --- If the hash matches then set the record to HOLD
   IF varRealMD5 = varQueryRecord.md5_hash THEN
       UPDATE pm.pm_delta_master_amazon
         SET status= \'hold\',
         last_touch_date =  CURRENT_DATE
       WHERE keyf_upc =  varkeyf_upc AND last_touch_date <> CURRENT_DATE;
       varFuncStatus:=true;
   ELSE
       --  ELSE mark the item as ADD
       UPDATE pm.pm_delta_master_amazon
         SET status= \'add\',
         last_touch_date =  CURRENT_DATE
       WHERE keyf_upc =  varkeyf_upc;
       varFuncStatus:=true;
   END IF;
  END IF;

 RETURN varFuncStatus;
END;'
 LANGUAGE 'plpgsql' IMMUTABLE;



TableDef
CREATE TABLE pm.pm_delta_master_amazon (
    keyf_upc               int4 ,
    status                 varchar(6) ,
    md5_hash               varchar(40) ,
    last_touch_date        date
    )
GO

CREATE INDEX status_idx
    ON pm.pm_delta_master_amazon(status)
GO




CONF
--------
# WRITE AHEAD LOG
#---------------------------------------------------------------------------

# - Settings -

#fsync = true                   # turns forced synchronization on or off
#wal_sync_method = fsync        # the default varies across platforms:
                                # fsync, fdatasync, open_sync, or open_datasync
wal_buffers = 32                # min 4, 8KB each

# - Checkpoints -

checkpoint_segments = 50        # in logfile segments, min 1, 16MB each
checkpoint_timeout = 600        # range 30-3600, in seconds
#checkpoint_warning = 30        # 0 is off, in seconds
#commit_delay = 0               # range 0-100000, in microseconds
#commit_siblings = 5            # range 1-1000


Patrick Hatcher
Macys.Com

Re: Slow update/insert process

От
"Aaron Werman"
Дата:
Some quick notes:
 
- Using a side effect of a function to update the database feels bad to me
- how long does the  SELECT into varQueryRecord md5(upc.keyp....
  function take / what does it's explain look like?
- There are a lot of non-indexed columns on that delta master table, such as keyf_upc.
   I'm guessing you're doing 90,000 x {a lot of slow scans}
- My temptation would be to rewrite the processing to do a pass of updates, a pass of inserts,
  and then the SELECT
----- Original Message -----
Sent: Friday, October 01, 2004 2:14 PM
Subject: [PERFORM] Slow update/insert process


Pg: 7.4.5
RH 7.3
8g Ram
200 g drive space
RAID0+1
Tables vacuum on a nightly basis

The following process below takes 8 hours to run on 90k records and I'm not sure where to being to look for the bottleneck.  This isn't the only updating on this database that seems to take a long time to complete.  Is there something I should be looking for in my conf settings?  

TIA
Patrick


SQL:
---Bring back only selected records to run through the update process.
--Without the function the SQL takes < 10secs to return 90,000 records
SELECT count(pm.pm_delta_function_amazon(upc.keyp_upc,'amazon'))
FROM mdc_upc upc
JOIN public.mdc_products prod ON upc.keyf_products = prod.keyp_products
JOIN public.mdc_price_post_inc price ON prod.keyp_products = price.keyf_product
JOIN public.mdc_attribute_product ap on ap.keyf_products = prod.keyp_products and keyf_attribute=22
WHERE
upper(trim(ap.attributevalue)) NOT IN ('ESTEE LAUDER', 'CLINIQUE','ORGINS','PRESCRIPTIVES','LANC?ME','CHANEL','ARAMIS','M.A.C','TAG HEUER')
AND keyf_producttype<>222
AND prod.action_publish = 1;


Function:

CREATE OR REPLACE FUNCTION pm.pm_delta_function_amazon(int4, "varchar")
 RETURNS bool AS
'DECLARE
   varkeyf_upc                ALIAS FOR $1;
   varPassword                ALIAS FOR $2;
   varRealMD5                varchar;
   varDeltaMD5                varchar;
   varLastTouchDate        date;
   varQuery                 text;
   varQuery1                 text;
   varQueryMD5                text;
   varQueryRecord        record;
   varFuncStatus        boolean := false;
   
BEGIN

-- Check the password
 IF varPassword <> \'amazon\' THEN
   Return false;
 END IF;


--  Get the md5 hash for this product
 SELECT into varQueryRecord md5(upc.keyp_upc || prod.description || pm.pm_price_post_inc(prod.keyp_products)) AS md5
   FROM public.mdc_upc upc
   JOIN public.mdc_products prod ON upc.keyf_products = prod.keyp_products
   JOIN public.mdc_price_post_inc price ON price.keyf_product = prod.keyp_products
   WHERE upc.keyp_upc = varkeyf_upc LIMIT 1 ;
 

 IF NOT FOUND THEN
   RAISE EXCEPTION \'varRealMD5 is NULL. UPC ID is %\', varkeyf_upc;
 ELSE
   varRealMD5:=varQueryRecord.md5;
 END IF;

--  Check that the product is in the delta table and return its hash for comparison
 SELECT into varQueryRecord md5_hash,last_touch_date
   FROM pm.pm_delta_master_amazon
   WHERE keyf_upc = varkeyf_upc LIMIT 1;

 IF NOT FOUND THEN
   -- ADD and exit
   INSERT INTO pm.pm_delta_master_amazon (keyf_upc,status,md5_hash,last_touch_date)
   values (varkeyf_upc,\'add\',varRealMD5,CURRENT_DATE);
   varFuncStatus:=true;
   RETURN varFuncStatus;
 ELSE
   --Update the record
     --- If the hash matches then set the record to HOLD
   IF varRealMD5 = varQueryRecord.md5_hash THEN
       UPDATE pm.pm_delta_master_amazon
         SET status= \'hold\',
         last_touch_date =  CURRENT_DATE
       WHERE keyf_upc =  varkeyf_upc AND last_touch_date <> CURRENT_DATE;
       varFuncStatus:=true;
   ELSE
       --  ELSE mark the item as ADD
       UPDATE pm.pm_delta_master_amazon
         SET status= \'add\',
         last_touch_date =  CURRENT_DATE
       WHERE keyf_upc =  varkeyf_upc;
       varFuncStatus:=true;
   END IF;
  END IF;

 RETURN varFuncStatus;
END;'
 LANGUAGE 'plpgsql' IMMUTABLE;



TableDef
CREATE TABLE pm.pm_delta_master_amazon (
    keyf_upc               int4 ,
    status                 varchar(6) ,
    md5_hash               varchar(40) ,
    last_touch_date        date
    )
GO

CREATE INDEX status_idx
    ON pm.pm_delta_master_amazon(status)
GO




CONF
--------
# WRITE AHEAD LOG
#---------------------------------------------------------------------------

# - Settings -

#fsync = true                   # turns forced synchronization on or off
#wal_sync_method = fsync        # the default varies across platforms:
                                # fsync, fdatasync, open_sync, or open_datasync
wal_buffers = 32                # min 4, 8KB each

# - Checkpoints -

checkpoint_segments = 50        # in logfile segments, min 1, 16MB each
checkpoint_timeout = 600        # range 30-3600, in seconds
#checkpoint_warning = 30        # 0 is off, in seconds
#commit_delay = 0               # range 0-100000, in microseconds
#commit_siblings = 5            # range 1-1000


Patrick Hatcher
Macys.Com

Re: Slow update/insert process

От
Patrick Hatcher
Дата:

Thanks for the help.
I found the culprit.  The user had created a function within the function ( pm.pm_price_post_inc(prod.keyp_products)). Once this was fixed the time dropped dramatically.

Patrick Hatcher
Macys.Com
Legacy Integration Developer
415-422-1610 office
HatcherPT - AIM



Patrick Hatcher <PHatcher@macys.com>
Sent by: pgsql-performance-owner@postgresql.org

10/01/04 11:14 AM

To
<pgsql-performance@postgresql.org>
cc
Subject
[PERFORM] Slow update/insert process






Pg: 7.4.5

RH 7.3

8g Ram

200 g drive space

RAID0+1

Tables vacuum on a nightly basis


The following process below takes 8 hours to run on 90k records and I'm not sure where to being to look for the bottleneck.  This isn't the only updating on this database that seems to take a long time to complete.  Is there something I should be looking for in my conf settings?  


TIA

Patrick



SQL:

---Bring back only selected records to run through the update process.

--Without the function the SQL takes < 10secs to return 90,000 records

SELECT count(pm.pm_delta_function_amazon(upc.keyp_upc,'amazon'))

FROM mdc_upc upc

JOIN public.mdc_products prod ON upc.keyf_products = prod.keyp_products

JOIN public.mdc_price_post_inc price ON prod.keyp_products = price.keyf_product

JOIN public.mdc_attribute_product ap on ap.keyf_products = prod.keyp_products and keyf_attribute=22

WHERE
upper(trim(ap.attributevalue)) NOT IN ('ESTEE LAUDER', 'CLINIQUE','ORGINS','PRESCRIPTIVES','LANC?ME','CHANEL','ARAMIS','M.A.C','TAG HEUER')

AND keyf_producttype<>222

AND prod.action_publish = 1;



Function:


CREATE OR REPLACE FUNCTION pm.pm_delta_function_amazon(int4, "varchar")
RETURNS bool AS
'DECLARE
  varkeyf_upc                ALIAS FOR $1;
  varPassword                ALIAS FOR $2;
  varRealMD5                varchar;
  varDeltaMD5                varchar;
  varLastTouchDate        date;
  varQuery                 text;
  varQuery1                 text;
  varQueryMD5                text;
  varQueryRecord        record;
  varFuncStatus        boolean := false;
 
BEGIN

-- Check the password
IF varPassword <> \'amazon\' THEN
  Return false;
END IF;


--  Get the md5 hash for this product
SELECT into varQueryRecord md5(upc.keyp_upc || prod.description || pm.pm_price_post_inc(prod.keyp_products)) AS md5
  FROM public.mdc_upc upc
  JOIN public.mdc_products prod ON upc.keyf_products = prod.keyp_products
  JOIN public.mdc_price_post_inc price ON price.keyf_product = prod.keyp_products
  WHERE upc.keyp_upc = varkeyf_upc LIMIT 1 ;


IF NOT FOUND THEN
  RAISE EXCEPTION \'varRealMD5 is NULL. UPC ID is %\', varkeyf_upc;
ELSE
  varRealMD5:=varQueryRecord.md5;
END IF;

--  Check that the product is in the delta table and return its hash for comparison
SELECT into varQueryRecord md5_hash,last_touch_date
  FROM pm.pm_delta_master_amazon
  WHERE keyf_upc = varkeyf_upc LIMIT 1;

IF NOT FOUND THEN
  -- ADD and exit
  INSERT INTO pm.pm_delta_master_amazon (keyf_upc,status,md5_hash,last_touch_date)
  values (varkeyf_upc,\'add\',varRealMD5,CURRENT_DATE);
  varFuncStatus:=true;
  RETURN varFuncStatus;
ELSE
  --Update the record
    --- If the hash matches then set the record to HOLD
  IF varRealMD5 = varQueryRecord.md5_hash THEN
      UPDATE pm.pm_delta_master_amazon
        SET status= \'hold\',
        last_touch_date =  CURRENT_DATE
      WHERE keyf_upc =  varkeyf_upc AND last_touch_date <> CURRENT_DATE;
      varFuncStatus:=true;
  ELSE
      --  ELSE mark the item as ADD
      UPDATE pm.pm_delta_master_amazon
        SET status= \'add\',
        last_touch_date =  CURRENT_DATE
      WHERE keyf_upc =  varkeyf_upc;
      varFuncStatus:=true;
  END IF;
 END IF;

RETURN varFuncStatus;
END;'
LANGUAGE 'plpgsql' IMMUTABLE;




TableDef

CREATE TABLE pm.pm_delta_master_amazon (
   keyf_upc               int4 ,

   status                 varchar(6) ,

   md5_hash               varchar(40) ,

   last_touch_date        date
   )

GO


CREATE INDEX status_idx

   ON pm.pm_delta_master_amazon(status)

GO





CONF

--------

# WRITE AHEAD LOG

#---------------------------------------------------------------------------


# - Settings -


#fsync = true                   # turns forced synchronization on or off

#wal_sync_method = fsync        # the default varies across platforms:

                               # fsync, fdatasync, open_sync, or open_datasync

wal_buffers = 32                # min 4, 8KB each


# - Checkpoints -


checkpoint_segments = 50        # in logfile segments, min 1, 16MB each

checkpoint_timeout = 600        # range 30-3600, in seconds

#checkpoint_warning = 30        # 0 is off, in seconds

#commit_delay = 0               # range 0-100000, in microseconds

#commit_siblings = 5            # range 1-1000



Patrick Hatcher
Macys.Com