Обсуждение: Slow query. Any way to speed up?

Поиск
Список
Период
Сортировка

Slow query. Any way to speed up?

От
Patrick Hatcher
Дата:
Pg: 7.4.9
RH: ES v3
Quad-Xeon
16G ram

The following SQL takes 4+ mins to run.  I have indexes on all join fields
and I've tried rearranging the table orders but haven't had any luck.  I
have done the usual vacuums analyze and even vacuum FULL just to make sure
but still the same results. The ending resultset is around 169K rows which,
if I'm reading the analyze output, is more than double.   Any suggestions?

TIA
-patrick

Select gmmid, gmmname, divid, divname, feddept, fedvend,itemnumber as
mstyle,amc_week_id,
sum(tran_itm_total) as net_dollars

FROM
public.tbldetaillevel_report a2 join  cdm.cdm_ddw_tran_item a1  on
a1.item_upc = a2.upc
join public.date_dim a3 on a3.date_dim_id = a1.cal_date
where
a3.date_dim_id between '2005-10-30' and '2005-12-31'
and
a1.appl_id  in  ('MCOM','NET')
and
 a1.tran_typ_id in ('S','R')
group by 1,2,3,4,5,6,7,8
order by 1,2,3,4,5,6,7,8


GroupAggregate  (cost=1646283.56..1648297.72 rows=73242 width=65)
  ->  Sort  (cost=1646283.56..1646466.67 rows=73242 width=65)
        Sort Key: a2.gmmid, a2.gmmname, a2.divid, a2.divname, a2.feddept,
a2.fedvend, a2.itemnumber, a3.amc_week_id
        ->  Merge Join  (cost=1595839.67..1640365.47 rows=73242 width=65)
              Merge Cond: ("outer".upc = "inner".item_upc)
              ->  Index Scan using report_upc_idx on tbldetaillevel_report
a2  (cost=0.00..47236.85 rows=366234 width=58)
              ->  Sort  (cost=1595839.67..1596022.77 rows=73242 width=23)
                    Sort Key: a1.item_upc
                    ->  Hash Join  (cost=94.25..1589921.57 rows=73242
width=23)
                          Hash Cond: ("outer".cal_date =
"inner".date_dim_id)
                          ->  Seq Scan on cdm_ddw_tran_item a1
(cost=0.00..1545236.00 rows=8771781 width=23)
                                Filter: ((((appl_id)::text = 'MCOM'::text)
OR ((appl_id)::text = 'NET'::text)) AND ((tran_typ_id = 'S'::bpchar) OR
(tran_typ_id = 'R'::bpchar)))
                          ->  Hash  (cost=94.09..94.09 rows=64 width=8)
                                ->  Index Scan using date_date_idx on
date_dim a3  (cost=0.00..94.09 rows=64 width=8)
                                      Index Cond: ((date_dim_id >=
'2005-10-30'::date) AND (date_dim_id <= '2005-12-31'::date))



-- Table: tbldetaillevel_report

-- DROP TABLE tbldetaillevel_report;

CREATE TABLE tbldetaillevel_report
(
  pageid int4,
  feddept int4,
  fedvend int4,
  oz_description varchar(254),
  price_owned_retail float8,
  oz_color varchar(50),
  oz_size varchar(50),
  total_oh int4 DEFAULT 0,
  total_oo int4 DEFAULT 0,
  vendorname varchar(40),
  dunsnumber varchar(9),
  current_week int4,
  current_period int4,
  week_end date,
  varweek int4,
  varperiod int4,
  upc int8,
  itemnumber varchar(15),
  mkd_status int2,
 inforem_flag int2
)
WITH OIDS;

-- DROP INDEX report_dept_vend_idx;

CREATE INDEX report_dept_vend_idx
  ON tbldetaillevel_report
  USING btree
  (feddept, fedvend);

-- Index: report_upc_idx

-- DROP INDEX report_upc_idx;

CREATE INDEX report_upc_idx
  ON tbldetaillevel_report
  USING btree
  (upc);



-- Table: cdm.cdm_ddw_tran_item

-- DROP TABLE cdm.cdm_ddw_tran_item;

CREATE TABLE cdm.cdm_ddw_tran_item
(
  appl_xref varchar(22),
  intr_xref varchar(13),
  tran_typ_id char(1),
  cal_date date,
  cal_time time,
  tran_itm_total numeric(15,2),
  itm_qty int4,
  itm_price numeric(8,2),
  item_id int8,
  item_upc int8,
  item_pid varchar(20),
  item_desc varchar(30),
  nrf_color_name varchar(10),
  nrf_size_name varchar(10),
  dept_id int4,
  vend_id int4,
  mkstyl int4,
  item_group varchar(20),
  appl_id varchar(20),
  cost float8 DEFAULT 0,
  onhand int4 DEFAULT 0,
  onorder int4 DEFAULT 0,
  avail int4 DEFAULT 0,
  owned float8 DEFAULT 0,
  fill_store_loc int4,
  ddw_tran_key bigserial NOT NULL,
  price_type_id int2 DEFAULT 999,
  last_update date DEFAULT ('now'::text)::date,
  tran_id int8,
  tran_seq_nbr int4,
  CONSTRAINT ddw_tritm_pk PRIMARY KEY (ddw_tran_key)
)
WITHOUT OIDS;


-- Index: cdm.cdm_ddw_tran_item_applid_idx

-- DROP INDEX cdm.cdm_ddw_tran_item_applid_idx;

CREATE INDEX cdm_ddw_tran_item_applid_idx
  ON cdm.cdm_ddw_tran_item
  USING btree
  (appl_id);

-- Index: cdm.cdm_ddw_tran_item_cal_date

-- DROP INDEX cdm.cdm_ddw_tran_item_cal_date;

CREATE INDEX cdm_ddw_tran_item_cal_date
  ON cdm.cdm_ddw_tran_item
  USING btree
  (cal_date);

-- Index: cdm.cdm_ddw_tran_item_trn_type

-- DROP INDEX cdm.cdm_ddw_tran_item_trn_type;

CREATE INDEX cdm_ddw_tran_item_trn_type
  ON cdm.cdm_ddw_tran_item
  USING btree
  (tran_typ_id);

-- Index: cdm.ddw_ti_upc_idx

-- DROP INDEX cdm.ddw_ti_upc_idx;

CREATE INDEX ddw_ti_upc_idx
  ON cdm.cdm_ddw_tran_item
  USING btree
  (item_upc);

-- Index: cdm.ddw_tran_item_dept_idx

-- DROP INDEX cdm.ddw_tran_item_dept_idx;

CREATE INDEX ddw_tran_item_dept_idx
  ON cdm.cdm_ddw_tran_item
  USING btree
  (dept_id);

-- Index: cdm.ddw_trn_ittotal_idx

-- DROP INDEX cdm.ddw_trn_ittotal_idx;

CREATE INDEX ddw_trn_ittotal_idx
  ON cdm.cdm_ddw_tran_item
  USING btree
  (tran_itm_total);

-- Table: date_dim

-- DROP TABLE date_dim;

CREATE TABLE date_dim
(
  date_dim_id date NOT NULL,
  amc_date char(8),
  amc_day_nbr int2 NOT NULL,
  amc_week int2 NOT NULL,
  amc_period int2 NOT NULL,
  amc_quarter int2 NOT NULL,
  amc_season int2 NOT NULL,
  amc_year int4 NOT NULL,
  amc_period_id int4 NOT NULL,
  amc_week_id int4 NOT NULL,
  nbr_weeks_per_peri int2 NOT NULL,
  nbr_weeks_per_year int2 NOT NULL,
  calendar_day int2 NOT NULL,
  calendar_month int2 NOT NULL,
  julian_day int2 NOT NULL,
  CONSTRAINT date_dimph PRIMARY KEY (date_dim_id)
)
WITH OIDS;


-- Index: amc_weekid_idx

-- DROP INDEX amc_weekid_idx;

CREATE INDEX amc_weekid_idx
  ON date_dim
  USING btree
  (amc_week_id);

-- Index: date_date_idx

-- DROP INDEX date_date_idx;

CREATE INDEX date_date_idx
  ON date_dim
  USING btree
  (date_dim_id);


Patrick Hatcher
Development Manager  Analytics/MIO
Macys.com


Re: Slow query. Any way to speed up?

От
Tom Lane
Дата:
Patrick Hatcher <PHatcher@macys.com> writes:
> The following SQL takes 4+ mins to run.  I have indexes on all join fields
> and I've tried rearranging the table orders but haven't had any luck.

Please show EXPLAIN ANALYZE output, not just EXPLAIN.  It's impossible
to tell whether the planner is making any wrong guesses when you can't
see the actual times/rowcounts ...

(BTW, 7.4 is looking pretty long in the tooth.)

            regards, tom lane

Re: Slow query. Any way to speed up?

От
Patrick Hatcher
Дата:
Duh sorry.  We will eventually move to 8.x, it's just a matter of finding
the time:

Explain analyze
Select gmmid, gmmname, divid, divname, feddept, fedvend,itemnumber as
mstyle,amc_week_id,
sum(tran_itm_total) as net_dollars

FROM
public.tbldetaillevel_report a2 join  cdm.cdm_ddw_tran_item a1  on
a1.item_upc = a2.upc
join public.date_dim a3 on a3.date_dim_id = a1.cal_date
where
a3.date_dim_id between '2005-10-30' and '2005-12-31'
and
a1.appl_id  in  ('MCOM','NET')
and
 a1.tran_typ_id in ('S','R')
group by 1,2,3,4,5,6,7,8
order by 1,2,3,4,5,6,7,8


GroupAggregate  (cost=1648783.47..1650793.74 rows=73101 width=65) (actual
time=744556.289..753136.278 rows=168343 loops=1)
  ->  Sort  (cost=1648783.47..1648966.22 rows=73101 width=65) (actual
time=744556.236..746634.566 rows=1185096 loops=1)
        Sort Key: a2.gmmid, a2.gmmname, a2.divid, a2.divname, a2.feddept,
a2.fedvend, a2.itemnumber, a3.amc_week_id
        ->  Merge Join  (cost=1598067.59..1642877.78 rows=73101 width=65)
(actual time=564862.772..636550.484 rows=1185096 loops=1)
              Merge Cond: ("outer".upc = "inner".item_upc)
              ->  Index Scan using report_upc_idx on tbldetaillevel_report
a2  (cost=0.00..47642.36 rows=367309 width=58) (actual
time=82.512..65458.137 rows=365989 loops=1)
              ->  Sort  (cost=1598067.59..1598250.34 rows=73100 width=23)
(actual time=564764.506..566529.796 rows=1248862 loops=1)
                    Sort Key: a1.item_upc
                    ->  Hash Join  (cost=94.25..1592161.99 rows=73100
width=23) (actual time=493500.913..548924.039 rows=1248851 loops=1)
                          Hash Cond: ("outer".cal_date =
"inner".date_dim_id)
                          ->  Seq Scan on cdm_ddw_tran_item a1
(cost=0.00..1547562.88 rows=8754773 width=23) (actual
time=14.219..535704.691 rows=10838135 loops=1)
                                Filter: ((((appl_id)::text = 'MCOM'::text)
OR ((appl_id)::text = 'NET'::text)) AND ((tran_typ_id = 'S'::bpchar) OR
(tran_typ_id = 'R'::bpchar)))
                          ->  Hash  (cost=94.09..94.09 rows=64 width=8)
(actual time=362.953..362.953 rows=0 loops=1)
                                ->  Index Scan using date_date_idx on
date_dim a3  (cost=0.00..94.09 rows=64 width=8) (actual
time=93.710..362.802 rows=63 loops=1)
                                      Index Cond: ((date_dim_id >=
'2005-10-30'::date) AND (date_dim_id <= '2005-12-31'::date))
Total runtime: 753467.847 ms


Patrick Hatcher
Development Manager  Analytics/MIO
Macys.com
415-422-1610




             Tom Lane
             <tgl@sss.pgh.pa.u
             s>                                                         To
                                       Patrick Hatcher
             01/05/06 09:07 PM         <PHatcher@macys.com>
                                                                        cc
                                       pgsql-performance@postgresql.org
                                                                   Subject
                                       Re: [PERFORM] Slow query. Any way
                                       to speed up?










Patrick Hatcher <PHatcher@macys.com> writes:
> The following SQL takes 4+ mins to run.  I have indexes on all join
fields
> and I've tried rearranging the table orders but haven't had any luck.

Please show EXPLAIN ANALYZE output, not just EXPLAIN.  It's impossible
to tell whether the planner is making any wrong guesses when you can't
see the actual times/rowcounts ...

(BTW, 7.4 is looking pretty long in the tooth.)

                                     regards, tom lane



Re: Slow query. Any way to speed up?

От
Tom Lane
Дата:
Patrick Hatcher <PHatcher@macys.com> writes:
>                           ->  Seq Scan on cdm_ddw_tran_item a1
> (cost=0.00..1547562.88 rows=8754773 width=23) (actual
> time=14.219..535704.691 rows=10838135 loops=1)
>                                 Filter: ((((appl_id)::text = 'MCOM'::text)
> OR ((appl_id)::text = 'NET'::text)) AND ((tran_typ_id = 'S'::bpchar) OR
> (tran_typ_id = 'R'::bpchar)))

The bulk of the time is evidently going into this step.  You didn't say
how big cdm_ddw_tran_item is, but unless it's in the billion-row range,
an indexscan isn't going to help for pulling out 10 million rows.
This may be about the best you can do :-(

If it *is* in the billion-row range, PG 8.1's bitmap indexscan facility
would probably help.

            regards, tom lane