64-bit hashjoins
| От | Erik Riedel |
|---|---|
| Тема | 64-bit hashjoins |
| Дата | |
| Msg-id | Uqy6V2G00gNtImTMwA@andrew.cmu.edu обсуждение исходный текст |
| Ответы |
Re: [HACKERS] 64-bit hashjoins
Re: [HACKERS] 64-bit hashjoins |
| Список | pgsql-hackers |
Platform: Alpha, Digital UNIX 4.0D
Software: PostgreSQL 6.5 snaphot (11 March 1999)
I have two tables as follows:
Table = orderfoo
+-------------------------+----------------------------+-------+
| Field | Type | Length|
+-------------------------+----------------------------+-------+
| o_orderkey | int4 not null | 4 |
| o_custkey | int4 not null | 4 |
| o_orderstatus | char() not null | 1 |
| o_totalprice | float8 not null | 8 |
| o_orderdate | date | 4 |
| o_orderpriority | char() not null | 15 |
| o_clerk | char() not null | 15 |
| o_shippriority | int4 not null | 4 |
| o_comment | char() not null | 79 |
+-------------------------+----------------------------+-------+
Index: orderfoo_index_
Table = customer
+-------------------------+----------------------------+-------+
| Field | Type | Length|
+-------------------------+----------------------------+-------+
| c_custkey | int4 not null | 4 |
| c_name | char() not null | 25 |
| c_address | char() not null | 40 |
| c_nationkey | int4 not null | 4 |
| c_phone | char() not null | 15 |
| c_acctbal | float8 not null | 8 |
| c_mktsegment | char() not null | 10 |
| c_comment | char() not null | 117 |
+-------------------------+----------------------------+-------+
Index: customer_index_
and a query:
--
-- Query 3
--
select l_orderkey, sum(l_extendedprice*(1-l_discount)) as revenue,
o_orderdate, o_shippriority
from customer, orderfoo, lineitem
where c_mktsegment = 'BUILDING'
and c_custkey = o_custkey
and l_orderkey = o_orderkey
and o_orderdate < '1995-03-15'
and l_shipdate > '1995-03-15'
group by l_orderkey, o_orderdate, o_shippriority
order by revenue desc, o_orderdate;
whose plan includes the segment that hash joins the two tables listed
above:
-> Hash Join (cost=12268.29 size=5284 width=20) -> Seq Scan on orderfoo (cost=8609.00 size=72911 width=16)
-> Hash (cost=0.00 size=0 width=0) -> Seq Scan on customer (cost=1031.00 size=1087 width=4)
and which crashes the backend during the Hash Join with:
Unaligned access pid=28933 <postgres> va=0x14027e934 pc=0x120099430 ra=0x14027e93c inst=0xb74e0010
followed swiftly by:
Bus error (core dumped)
(note that the unaligned access and the bus error may be unrelated
events, but I suspect not - more below)
we then have a dbx backtrace that shows:
0 ExecScanHashBucket(hjstate = 0x140238478, bucket = 0x14027e8d8,
curtuple = 0x11fffd000, hjclauses = 0x140234118, econtext = 0x140238588)
["nodeHash.c":706, 0x120099434] 1 ExecHashJoin(node = 0x140234258) ["nodeHashjoin.c":288, 0x120099f00] 2
ExecProcNode(node= 0x140234258, parent = 0x1402347d8)
["execProcnode.c":315, 0x120090ffc] 3 ExecNestLoop(node = 0x1402347d8, parent = 0x140236108)
["nodeNestloop.c":160, 0x12009d104] 4 ExecProcNode(node = 0x1402347d8, parent = 0x140236108)
["execProcnode.c":279, 0x120090e7c] 5 createfirstrun(node = 0x140236108) ["psort.c":409, 0x1201762fc] 6
initialrun(node= 0x140236108) ["psort.c":291, 0x120176024] 7 psort_begin(node = 0x140236108, nkeys = 5159, key =
0x14023c560)
["psort.c":150, 0x120175e64] 8 ExecSort(node = 0x140236108) ["nodeSort.c":156, 0x12009e1c4] 9 ExecProcNode(node =
0x140236108,parent = 0x1402366d8)
["execProcnode.c":295, 0x120090f0c] 10 ExecGroupEveryTuple(node = 0x1402366d8) ["nodeGroup.c":104, 0x12009fab4] 11
ExecGroup(node= (nil)) ["nodeGroup.c":56, 0x12009fa0c] 12 ExecProcNode(node = 0x1402366d8, parent = 0x140237020)
["execProcnode.c":303, 0x120090f6c] 13 ExecAgg(node = 0x140237020) ["nodeAgg.c":243, 0x120097064] 14 ExecProcNode(node
=0x140237020, parent = 0x140237478)
["execProcnode.c":307, 0x120090f9c] 15 createfirstrun(node = 0x140237478) ["psort.c":409, 0x1201762fc]
More (n if no)? 16 initialrun(node = 0x140237478) ["psort.c":291, 0x120176024] 17 psort_begin(node = 0x140237478, nkeys
=5159, key = 0x14023cd80)
["psort.c":150, 0x120175e64] 18 ExecSort(node = 0x140237478) ["nodeSort.c":156, 0x12009e1c4] 19 ExecProcNode(node =
0x140237478,parent = 0x140237478)
["execProcnode.c":295, 0x120090f0c] 20 ExecutePlan(estate = 0x140237ea8, plan = 0x140237478, direction =
ForwardScanDirection, destfunc = 0x14001b118) ["execMain.c":985,
0x12008f2bc] 21 ExecutorRun(queryDesc = (nil), estate = 0x140237ea8, limoffset =
0x1, limcount = 0x14001b118) ["execMain.c":360, 0x12008e780] 22 ProcessQueryDesc(queryDesc = 0x140237e78, limoffset =
(nil),
limcount = (nil)) ["pquery.c":334, 0x120123a1c] 23
ProcessQuery(parsetree = 0x1401f3a88, plan = 0x140237478, dest =
536858624) ["pquery.c":377, 0x120123b1c] 24 pg_exec_query_dest(query_string = 0x11fffd7c0 = "select l_orderkey,
sum(l_extendedprice*(1-l_discount)) as revenue, o_orderdate,
o_shippriority from customer, orderfoo, lineitem where c\...",
aclOverride = '^@') ["postgres.c":805, 0x1201217bc] 25 PostgresMain(argv = 0x11ffff808, real_argv = 0x11ffff808)
["postgres.c":703, 0x12012302c] 26 main(argv = 0x11ffff808) ["main.c":103, 0x1200ae28c]
(dbx)
the offending code in nodeHash.c is near line 700 as:
...
if (curtuple == NULL) heapTuple = (HeapTuple) LONGALIGN(ABSADDR(bucket->top)); else heapTuple = (HeapTuple)
LONGALIGN(((char *) curtuple + curtuple->t_len + HEAPTUPLESIZE));
while (heapTuple < (HeapTuple) ABSADDR(bucket->bottom)) {
heapTuple->t_data = (HeapTupleHeader) ((char *) heapTuple + HEAPTUPLESIZE);
inntuple = ExecStoreTuple(heapTuple, /* tuple to store */ hjstate->hj_HashTupleSlot,
/*slot */ InvalidBuffer, /* tuple has no buffer */ false); /* do not
pfreethis tuple */
...
it crashes at the ExecStoreTuple().
Since it gives that unaligned access error beforehand, I suspect the
LONGALIGN() macros in the lines above are the culprits somehow. This
takes me to include/utils/memutils.h which has:
#if (defined(sun) && ! defined(sparc)) || defined(m68k)
#define LONGALIGN(LEN) SHORTALIGN(LEN)
#elif defined (__alpha)
/* * even though "long alignment" should really be on 8-byte boundaries for * linuxalpha, we want the strictest
alignmentto be on 4-byte (int) * boundaries, because otherwise things break when they try to use the * FormData_pg_*
structures. --djm 12/12/96 */
#define LONGALIGN(LEN)\ (((long)(LEN) + (sizeof (int) - 1)) & ~(sizeof (int) -1))
#else
#define LONGALIGN(LEN)\ (((long)(LEN) + (sizeof (long) - 1)) & ~(sizeof (long) -1))
#endif
since I am __alpha (but __osf__, not linux), I get the version with
the sizeof(int) instead of the sizeof(long).
Can someone explain the comment from djm to me (or is djm still
listening somewhere?). At first blush, I suspect that I actually
_want_ it to do the latter version of LONGALIGN(), since my longs
really are 8 bytes. But when I try to do that instead, I am unable to
even run "initdb" - dies with an error like "attribute not
found/invalid" (sorry, scrolled away the window with the actual error
- I can re-create if the exact message would help anyone).
Anyone have suggestions on how I might proceed? Are there known
problems (or known-workings) of HashJoin on 64-bit platforms?
Erik Riedel
Carnegie Mellon University
www.cs.cmu.edu/~riedel
В списке pgsql-hackers по дате отправления: