Обсуждение: BUG #15032: Segmentation fault when running a particular query
The following bug has been logged on the website:
Bug reference: 15032
Logged by: Guo Xiang Tan
Email address: gxtan1990@gmail.com
PostgreSQL version: 10.1
Operating system: Ubuntu 14.04.4 LTS
Description:
## `SELECT version()`
`PostgreSQL 10.1 on x86_64-pc-linux-gnu, compiled by gcc (Ubuntu
5.4.0-6ubuntu1~16.04.4) 5.4.0 20160609, 64-bit`
## Query that results in segmentation fault
```
UPDATE posts
SET avg_time = (x.gmean / 1000)
FROM (SELECT post_timings.topic_id,
post_timings.post_number,
round(exp(avg(CASE WHEN msecs > 0 THEN ln(msecs) ELSE 0 END)))
AS gmean
FROM post_timings
INNER JOIN posts AS p2
ON p2.post_number = post_timings.post_number
AND p2.topic_id = post_timings.topic_id
AND p2.user_id <> post_timings.user_id
GROUP BY post_timings.topic_id, post_timings.post_number) AS x
WHERE (x.topic_id = posts.topic_id
AND x.post_number = posts.post_number
AND (posts.avg_time <> (x.gmean / 1000)::int OR posts.avg_time IS
NULL));
```
## Error in PostgreSQL log file
```
[163]: [15-1] LOG: server process (PID 1983) was terminated by signal 11:
Segmentation fault
[163]: [16-1] DETAIL: Failed process was running: UPDATE posts
SET avg_time = (x.gmean / 1000)
FROM (SELECT post_timings.topic_id,
post_timings.post_number,
round(exp(avg(CASE WHEN msecs > 0 THEN ln(msecs) ELSE 0 END)))
AS gmean
FROM post_timings
INNER JOIN posts AS p2
ON p2.post_number = post_timings.post_number
AND p2.topic_id = post_timings.topic_id
AND p2.user_id <> post_timings.user_id
GROUP BY post_timings.topic_id, post_timings.post_number) AS x
WHERE (x.topic_id = posts.topic_id
AND x.post_number = posts.post_number
AND (posts.avg_time <> (x.gmean / 1000)::int OR posts.avg_time IS
NULL))
;
[163]: [17-1] LOG: terminating any other active server processes
[2003]: [4-1] WARNING: terminating connection because of crash of another
server process
[2003]: [5-1] DETAIL: The postmaster has commanded this server process to
roll back the current transaction and exit, because another server process
exited abnormally and possibly corrupted shared memory.
[2003]: [6-1] HINT: In a moment you should be able to reconnect to the
database and repeat your command.
[2015]: [4-1] WARNING: terminating connection because of crash of another
server process
[2015]: [5-1] DETAIL: The postmaster has commanded this server process to
roll back the current transaction and exit, because another server process
exited abnormally and possibly corrupted shared memory.
```
## pg_dump for relevant tables
```
--
-- PostgreSQL database dump
--
-- Dumped from database version 10.1
-- Dumped by pg_dump version 10.1
SET statement_timeout = 0;
SET lock_timeout = 0;
SET idle_in_transaction_session_timeout = 0;
SET client_encoding = 'UTF8';
SET standard_conforming_strings = on;
SET check_function_bodies = false;
SET client_min_messages = warning;
SET row_security = off;
SET search_path = public, pg_catalog;
SET default_tablespace = '';
SET default_with_oids = false;
--
-- Name: posts; Type: TABLE; Schema: public; Owner: -
--
CREATE TABLE posts (
id integer NOT NULL,
user_id integer,
topic_id integer NOT NULL,
post_number integer NOT NULL,
raw text NOT NULL,
cooked text NOT NULL,
created_at timestamp without time zone NOT NULL,
updated_at timestamp without time zone NOT NULL,
reply_to_post_number integer,
reply_count integer DEFAULT 0 NOT NULL,
quote_count integer DEFAULT 0 NOT NULL,
deleted_at timestamp without time zone,
off_topic_count integer DEFAULT 0 NOT NULL,
like_count integer DEFAULT 0 NOT NULL,
incoming_link_count integer DEFAULT 0 NOT NULL,
bookmark_count integer DEFAULT 0 NOT NULL,
avg_time integer,
score double precision,
reads integer DEFAULT 0 NOT NULL,
post_type integer DEFAULT 1 NOT NULL,
vote_count integer DEFAULT 0 NOT NULL,
sort_order integer,
last_editor_id integer,
hidden boolean DEFAULT false NOT NULL,
hidden_reason_id integer,
notify_moderators_count integer DEFAULT 0 NOT NULL,
spam_count integer DEFAULT 0 NOT NULL,
illegal_count integer DEFAULT 0 NOT NULL,
inappropriate_count integer DEFAULT 0 NOT NULL,
last_version_at timestamp without time zone NOT NULL,
user_deleted boolean DEFAULT false NOT NULL,
reply_to_user_id integer,
percent_rank double precision DEFAULT 1.0,
notify_user_count integer DEFAULT 0 NOT NULL,
like_score integer DEFAULT 0 NOT NULL,
deleted_by_id integer,
edit_reason character varying(255),
word_count integer,
version integer DEFAULT 1 NOT NULL,
cook_method integer DEFAULT 1 NOT NULL,
wiki boolean DEFAULT false NOT NULL,
baked_at timestamp without time zone,
baked_version integer,
hidden_at timestamp without time zone,
self_edits integer DEFAULT 0 NOT NULL,
reply_quoted boolean DEFAULT false NOT NULL,
via_email boolean DEFAULT false NOT NULL,
raw_email text,
public_version integer DEFAULT 1 NOT NULL,
action_code character varying(255),
image_url character varying
);
--
-- Name: TABLE posts; Type: COMMENT; Schema: public; Owner: -
--
COMMENT ON TABLE posts IS 'If you want to query public posts only, use the
badge_posts view.';
--
-- Name: COLUMN posts.post_number; Type: COMMENT; Schema: public; Owner: -
--
COMMENT ON COLUMN posts.post_number IS 'The position of this post in the
topic. The pair (topic_id, post_number) forms a natural key on the posts
table.';
--
-- Name: COLUMN posts.raw; Type: COMMENT; Schema: public; Owner: -
--
COMMENT ON COLUMN posts.raw IS 'The raw Markdown that the user entered into
the composer.';
--
-- Name: COLUMN posts.cooked; Type: COMMENT; Schema: public; Owner: -
--
COMMENT ON COLUMN posts.cooked IS 'The processed HTML that is presented in a
topic.';
--
-- Name: COLUMN posts.reply_to_post_number; Type: COMMENT; Schema: public;
Owner: -
--
COMMENT ON COLUMN posts.reply_to_post_number IS 'If this post is a reply to
another, this column is the post_number of the post it''s replying to. [FKEY
posts.topic_id, posts.post_number]';
--
-- Name: COLUMN posts.reply_quoted; Type: COMMENT; Schema: public; Owner:
-
--
COMMENT ON COLUMN posts.reply_quoted IS 'This column is true if the post
contains a quote-reply, which causes the in-reply-to indicator to be
absent.';
--
-- Name: topics; Type: TABLE; Schema: public; Owner: -
--
CREATE TABLE topics (
id integer NOT NULL,
title character varying(255) NOT NULL,
last_posted_at timestamp without time zone,
created_at timestamp without time zone NOT NULL,
updated_at timestamp without time zone NOT NULL,
views integer DEFAULT 0 NOT NULL,
posts_count integer DEFAULT 0 NOT NULL,
user_id integer,
last_post_user_id integer NOT NULL,
reply_count integer DEFAULT 0 NOT NULL,
featured_user1_id integer,
featured_user2_id integer,
featured_user3_id integer,
avg_time integer,
deleted_at timestamp without time zone,
highest_post_number integer DEFAULT 0 NOT NULL,
image_url character varying(255),
like_count integer DEFAULT 0 NOT NULL,
incoming_link_count integer DEFAULT 0 NOT NULL,
category_id integer,
visible boolean DEFAULT true NOT NULL,
moderator_posts_count integer DEFAULT 0 NOT NULL,
closed boolean DEFAULT false NOT NULL,
archived boolean DEFAULT false NOT NULL,
bumped_at timestamp without time zone NOT NULL,
has_summary boolean DEFAULT false NOT NULL,
vote_count integer DEFAULT 0 NOT NULL,
archetype character varying(255) DEFAULT 'regular'::character varying
NOT NULL,
featured_user4_id integer,
notify_moderators_count integer DEFAULT 0 NOT NULL,
spam_count integer DEFAULT 0 NOT NULL,
pinned_at timestamp without time zone,
score double precision,
percent_rank double precision DEFAULT 1.0 NOT NULL,
subtype character varying(255),
slug character varying(255),
deleted_by_id integer,
participant_count integer DEFAULT 1,
word_count integer,
excerpt character varying(1000),
pinned_globally boolean DEFAULT false NOT NULL,
pinned_until timestamp without time zone,
fancy_title character varying(400),
highest_staff_post_number integer DEFAULT 0 NOT NULL,
featured_link character varying,
CONSTRAINT has_category_id CHECK (((category_id IS NOT NULL) OR
((archetype)::text <> 'regular'::text))),
CONSTRAINT pm_has_no_category CHECK (((category_id IS NULL) OR
((archetype)::text <> 'private_message'::text)))
);
--
-- Name: TABLE topics; Type: COMMENT; Schema: public; Owner: -
--
COMMENT ON TABLE topics IS 'To query public topics only: SELECT ... FROM
topics t LEFT INNER JOIN categories c ON (t.category_id = c.id AND
c.read_restricted = false)';
--
-- Name: post_timings; Type: TABLE; Schema: public; Owner: -
--
CREATE TABLE post_timings (
topic_id integer NOT NULL,
post_number integer NOT NULL,
user_id integer NOT NULL,
msecs integer NOT NULL
);
--
-- Name: posts_id_seq; Type: SEQUENCE; Schema: public; Owner: -
--
CREATE SEQUENCE posts_id_seq
START WITH 1
INCREMENT BY 1
NO MINVALUE
NO MAXVALUE
CACHE 1;
--
-- Name: posts_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: -
--
ALTER SEQUENCE posts_id_seq OWNED BY posts.id;
--
-- Name: topics_id_seq; Type: SEQUENCE; Schema: public; Owner: -
--
CREATE SEQUENCE topics_id_seq
START WITH 1
INCREMENT BY 1
NO MINVALUE
NO MAXVALUE
CACHE 1;
--
-- Name: topics_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: -
--
ALTER SEQUENCE topics_id_seq OWNED BY topics.id;
--
-- Name: posts id; Type: DEFAULT; Schema: public; Owner: -
--
ALTER TABLE ONLY posts ALTER COLUMN id SET DEFAULT
nextval('posts_id_seq'::regclass);
--
-- Name: topics id; Type: DEFAULT; Schema: public; Owner: -
--
ALTER TABLE ONLY topics ALTER COLUMN id SET DEFAULT
nextval('topics_id_seq'::regclass);
--
-- Name: topics forum_threads_pkey; Type: CONSTRAINT; Schema: public; Owner:
-
--
ALTER TABLE ONLY topics
ADD CONSTRAINT forum_threads_pkey PRIMARY KEY (id);
--
-- Name: posts posts_pkey; Type: CONSTRAINT; Schema: public; Owner: -
--
ALTER TABLE ONLY posts
ADD CONSTRAINT posts_pkey PRIMARY KEY (id);
--
-- Name: idx_posts_created_at_topic_id; Type: INDEX; Schema: public; Owner:
-
--
CREATE INDEX idx_posts_created_at_topic_id ON posts USING btree (created_at,
topic_id) WHERE (deleted_at IS NULL);
--
-- Name: idx_posts_deleted_posts; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX idx_posts_deleted_posts ON posts USING btree (topic_id,
post_number) WHERE (deleted_at IS NOT NULL);
--
-- Name: idx_posts_user_id_deleted_at; Type: INDEX; Schema: public; Owner:
-
--
CREATE INDEX idx_posts_user_id_deleted_at ON posts USING btree (user_id)
WHERE (deleted_at IS NULL);
--
-- Name: idx_topics_front_page; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX idx_topics_front_page ON topics USING btree (deleted_at,
visible, archetype, category_id, id);
--
-- Name: idx_topics_user_id_deleted_at; Type: INDEX; Schema: public; Owner:
-
--
CREATE INDEX idx_topics_user_id_deleted_at ON topics USING btree (user_id)
WHERE (deleted_at IS NULL);
--
-- Name: idxtopicslug; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX idxtopicslug ON topics USING btree (slug) WHERE ((deleted_at IS
NULL) AND (slug IS NOT NULL));
--
-- Name: index_forum_threads_on_bumped_at; Type: INDEX; Schema: public;
Owner: -
--
CREATE INDEX index_forum_threads_on_bumped_at ON topics USING btree
(bumped_at DESC);
--
-- Name: index_post_timings_on_user_id; Type: INDEX; Schema: public; Owner:
-
--
CREATE INDEX index_post_timings_on_user_id ON post_timings USING btree
(user_id);
--
-- Name: index_posts_on_reply_to_post_number; Type: INDEX; Schema: public;
Owner: -
--
CREATE INDEX index_posts_on_reply_to_post_number ON posts USING btree
(reply_to_post_number);
--
-- Name: index_posts_on_topic_id_and_post_number; Type: INDEX; Schema:
public; Owner: -
--
CREATE UNIQUE INDEX index_posts_on_topic_id_and_post_number ON posts USING
btree (topic_id, post_number);
--
-- Name: index_posts_on_user_id_and_created_at; Type: INDEX; Schema: public;
Owner: -
--
CREATE INDEX index_posts_on_user_id_and_created_at ON posts USING btree
(user_id, created_at);
--
-- Name: index_topics_on_created_at_and_visible; Type: INDEX; Schema:
public; Owner: -
--
CREATE INDEX index_topics_on_created_at_and_visible ON topics USING btree
(created_at, visible) WHERE ((deleted_at IS NULL) AND ((archetype)::text <>
'private_message'::text));
--
-- Name: index_topics_on_id_and_deleted_at; Type: INDEX; Schema: public;
Owner: -
--
CREATE INDEX index_topics_on_id_and_deleted_at ON topics USING btree (id,
deleted_at);
--
-- Name: index_topics_on_lower_title; Type: INDEX; Schema: public; Owner:
-
--
CREATE INDEX index_topics_on_lower_title ON topics USING btree
(lower((title)::text));
--
-- Name: index_topics_on_pinned_at; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX index_topics_on_pinned_at ON topics USING btree (pinned_at)
WHERE (pinned_at IS NOT NULL);
--
-- Name: index_topics_on_pinned_globally; Type: INDEX; Schema: public;
Owner: -
--
CREATE INDEX index_topics_on_pinned_globally ON topics USING btree
(pinned_globally) WHERE pinned_globally;
--
-- Name: post_timings_summary; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX post_timings_summary ON post_timings USING btree (topic_id,
post_number);
--
-- Name: post_timings_unique; Type: INDEX; Schema: public; Owner: -
--
CREATE UNIQUE INDEX post_timings_unique ON post_timings USING btree
(topic_id, post_number, user_id);
--
-- PostgreSQL database dump complete
--
```
## Stack trace after following
https://wiki.edubuntu.org/DebuggingProgramCrash
```
Program received signal SIGSEGV, Segmentation fault.
index_markpos (scan=0x0) at
/build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../src/backend/access/index/indexam.c:373
373 /build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../src/backend/access/index/indexam.c:
No such file or directory.
(gdb) bt
#0 index_markpos (scan=0x0) at
/build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../src/backend/access/index/indexam.c:373
#1 0x000055a812746c68 in ExecMergeJoin (pstate=0x55a8131bc778) at
/build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../src/backend/executor/nodeMergejoin.c:1188
#2 0x000055a81272cf3f in ExecProcNode (node=0x55a8131bc778) at
/build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../src/include/executor/executor.h:250
#3 EvalPlanQualNext (epqstate=epqstate@entry=0x55a81318c518) at
/build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../src/backend/executor/execMain.c:3005
#4 0x000055a81272d342 in EvalPlanQual (estate=estate@entry=0x55a81318c018,
epqstate=epqstate@entry=0x55a81318c518,
relation=relation@entry=0x7f4e4e25ab68, rti=1, lockmode=<optimized out>,
tid=tid@entry=0x7ffd54492330, priorXmax=8959603) at
/build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../src/backend/executor/execMain.c:2521
#5 0x000055a812747af7 in ExecUpdate (mtstate=mtstate@entry=0x55a81318c468,
tupleid=tupleid@entry=0x7ffd54492450, oldtuple=oldtuple@entry=0x0,
slot=<optimized out>, slot@entry=0x55a8131a2f08,
planSlot=planSlot@entry=0x55a81319db60,
epqstate=epqstate@entry=0x55a81318c518, estate=0x55a81318c018, canSetTag=1
'\001') at
/build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../src/backend/executor/nodeModifyTable.c:1113
#6 0x000055a8127480c5 in ExecModifyTable (pstate=0x55a81318c468) at
/build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../src/backend/executor/nodeModifyTable.c:1770
#7 0x000055a81272a28b in ExecProcNode (node=0x55a81318c468) at
/build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../src/include/executor/executor.h:250
#8 ExecutePlan (execute_once=<optimized out>, dest=0x7f4e4e18d7a8,
direction=<optimized out>, numberTuples=0, sendTuples=<optimized out>,
operation=CMD_UPDATE, use_parallel_mode=<optimized out>,
planstate=0x55a81318c468, estate=0x55a81318c018) at
/build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../src/backend/executor/execMain.c:1722
#9 standard_ExecutorRun (queryDesc=0x55a813156838, direction=<optimized
out>, count=0, execute_once=<optimized out>) at
/build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../src/backend/executor/execMain.c:363
#10 0x000055a812864f05 in ProcessQuery (plan=<optimized out>,
sourceText=0x55a8130f3b08 "UPDATE posts\n", ' ' <repeats 24 times>, "SET
avg_time = (x.gmean / 1000)\n", ' ' <repeats 24 times>, "FROM (SELECT
post_timings.topic_id,\n", ' ' <repeats 37 times>,
"post_timings.post_number,\n "..., params=0x0, queryEnv=0x0,
dest=0x7f4e4e18d7a8, completionTag=0x7ffd544927f0 "") at
/build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../src/backend/tcop/pquery.c:161
#11 0x000055a81286515b in PortalRunMulti
(portal=portal@entry=0x55a81318a008, isTopLevel=isTopLevel@entry=1 '\001',
setHoldSnapshot=setHoldSnapshot@entry=0 '\000',
dest=dest@entry=0x7f4e4e18d7a8, altdest=altdest@entry=0x7f4e4e18d7a8,
completionTag=completionTag@entry=0x7ffd544927f0 "") at
/build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../src/backend/tcop/pquery.c:1286
#12 0x000055a812865e04 in PortalRun (portal=portal@entry=0x55a81318a008,
count=count@entry=9223372036854775807, isTopLevel=isTopLevel@entry=1 '\001',
run_once=run_once@entry=1 '\001', dest=dest@entry=0x7f4e4e18d7a8,
altdest=altdest@entry=0x7f4e4e18d7a8, completionTag=0x7ffd544927f0 "") at
/build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../src/backend/tcop/pquery.c:799
#13 0x000055a8128618dc in exec_simple_query (query_string=0x55a8130f3b08
"UPDATE posts\n", ' ' <repeats 24 times>, "SET avg_time = (x.gmean /
1000)\n", ' ' <repeats 24 times>, "FROM (SELECT post_timings.topic_id,\n", '
' <repeats 37 times>, "post_timings.post_number,\n "...) at
/build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../src/backend/tcop/postgres.c:1099
#14 0x000055a812862df6 in PostgresMain (argc=<optimized out>,
argv=argv@entry=0x55a81309b738, dbname=0x55a813053b78
"boingboing_discourse", username=<optimized out>) at
/build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../src/backend/tcop/postgres.c:4088
#15 0x000055a81259cfe9 in BackendRun (port=0x55a813092c70) at
/build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../src/backend/postmaster/postmaster.c:4357
#16 BackendStartup (port=0x55a813092c70) at
/build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../src/backend/postmaster/postmaster.c:4029
#17 ServerLoop () at
/build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../src/backend/postmaster/postmaster.c:1753
#18 0x000055a8127f236b in PostmasterMain (argc=3, argv=<optimized out>) at
/build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../src/backend/postmaster/postmaster.c:1361
#19 0x000055a81259e3e5 in main (argc=3, argv=0x55a813051a90) at
/build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../src/backend/main/main.c:228
(gdb) bt full
#0 index_markpos (scan=0x0) at
/build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../src/backend/access/index/indexam.c:373
__func__ = "index_markpos"
#1 0x000055a812746c68 in ExecMergeJoin (pstate=0x55a8131bc778) at
/build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../src/backend/executor/nodeMergejoin.c:1188
node = 0x55a8131bc778
joinqual = 0x55a8131bcac8
otherqual = 0x0
qualResult = <optimized out>
compareResult = <optimized out>
innerPlan = 0x55a8131c8958
innerTupleSlot = <optimized out>
outerPlan = 0x55a8131bdd60
outerTupleSlot = <optimized out>
econtext = 0x55a8131bc888
doFillOuter = 0 '\000'
doFillInner = 0 '\000'
__func__ = "ExecMergeJoin"
#2 0x000055a81272cf3f in ExecProcNode (node=0x55a8131bc778) at
/build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../src/include/executor/executor.h:250
No locals.
#3 EvalPlanQualNext (epqstate=epqstate@entry=0x55a81318c518) at
/build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../src/backend/executor/execMain.c:3005
oldcontext = 0x55a813146198
#4 0x000055a81272d342 in EvalPlanQual (estate=estate@entry=0x55a81318c018,
epqstate=epqstate@entry=0x55a81318c518,
relation=relation@entry=0x7f4e4e25ab68, rti=1, lockmode=<optimized out>,
tid=tid@entry=0x7ffd54492330, priorXmax=8959603) at
/build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../src/backend/executor/execMain.c:2521
slot = <optimized out>
copyTuple = 0x7f4e4e167d68
#5 0x000055a812747af7 in ExecUpdate (mtstate=mtstate@entry=0x55a81318c468,
tupleid=tupleid@entry=0x7ffd54492450, oldtuple=oldtuple@entry=0x0,
slot=<optimized out>, slot@entry=0x55a8131a2f08,
planSlot=planSlot@entry=0x55a81319db60,
epqstate=epqstate@entry=0x55a81318c518, estate=0x55a81318c018, canSetTag=1
'\001') at
/build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../src/backend/executor/nodeModifyTable.c:1113
epqslot = <optimized out>
lockmode = LockTupleNoKeyExclusive
tuple = <optimized out>
resultRelInfo = 0x55a81318c2b8
resultRelationDesc = 0x7f4e4e25ab68
result = <optimized out>
hufd = {ctid = {ip_blkid = {bi_hi = 2, bi_lo = 29090}, ip_posid =
7}, xmax = 8959603, cmax = 4294967295}
recheckIndexes = 0x0
__func__ = "ExecUpdate"
#6 0x000055a8127480c5 in ExecModifyTable (pstate=0x55a81318c468) at
/build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../src/backend/executor/nodeModifyTable.c:1770
node = 0x55a81318c468
estate = 0x55a81318c018
operation = CMD_UPDATE
saved_resultRelInfo = 0x0
resultRelInfo = 0x55a81318c2b8
subplanstate = 0x55a81318d238
junkfilter = 0x55a8131a50a8
slot = 0x55a8131a2f08
tupleid = 0x7ffd54492450
tuple_ctid = {ip_blkid = {bi_hi = 2, bi_lo = 29090}, ip_posid =
11}
oldtupdata = {t_len = 0, t_self = {ip_blkid = {bi_hi = 0, bi_lo =
0}, ip_posid = 26680}, t_tableOid = 21928, t_data = 0x55a81318c018}
oldtuple = 0x0
__func__ = "ExecModifyTable"
#7 0x000055a81272a28b in ExecProcNode (node=0x55a81318c468) at
/build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../src/include/executor/executor.h:250
No locals.
#8 ExecutePlan (execute_once=<optimized out>, dest=0x7f4e4e18d7a8,
direction=<optimized out>, numberTuples=0, sendTuples=<optimized out>,
operation=CMD_UPDATE, use_parallel_mode=<optimized out>,
planstate=0x55a81318c468, estate=0x55a81318c018) at
/build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../src/backend/executor/execMain.c:1722
slot = <optimized out>
current_tuple_count = 0
#9 standard_ExecutorRun (queryDesc=0x55a813156838, direction=<optimized
out>, count=0, execute_once=<optimized out>) at
/build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../src/backend/executor/execMain.c:363
estate = 0x55a81318c018
operation = CMD_UPDATE
dest = 0x7f4e4e18d7a8
sendTuples = <optimized out>
__func__ = "standard_ExecutorRun"
#10 0x000055a812864f05 in ProcessQuery (plan=<optimized out>,
sourceText=0x55a8130f3b08 "UPDATE posts\n", ' ' <repeats 24 times>, "SET
avg_time = (x.gmean / 1000)\n", ' ' <repeats 24 times>, "FROM (SELECT
post_timings.topic_id,\n", ' ' <repeats 37 times>,
"post_timings.post_number,\n "..., params=0x0, queryEnv=0x0,
dest=0x7f4e4e18d7a8, completionTag=0x7ffd544927f0 "") at
/build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../src/backend/tcop/pquery.c:161
queryDesc = 0x55a813156838
#11 0x000055a81286515b in PortalRunMulti
(portal=portal@entry=0x55a81318a008, isTopLevel=isTopLevel@entry=1 '\001',
setHoldSnapshot=setHoldSnapshot@entry=0 '\000',
dest=dest@entry=0x7f4e4e18d7a8, altdest=altdest@entry=0x7f4e4e18d7a8,
completionTag=completionTag@entry=0x7ffd544927f0 "") at
/build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../src/backend/tcop/pquery.c:1286
pstmt = 0x7f4e4e18d6c8
active_snapshot_set = 1 '\001'
stmtlist_item = 0x7f4e4e18d758
#12 0x000055a812865e04 in PortalRun (portal=portal@entry=0x55a81318a008,
count=count@entry=9223372036854775807, isTopLevel=isTopLevel@entry=1 '\001',
run_once=run_once@entry=1 '\001', dest=dest@entry=0x7f4e4e18d7a8,
altdest=altdest@entry=0x7f4e4e18d7a8, completionTag=0x7ffd544927f0 "") at
/build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../src/backend/tcop/pquery.c:799
save_exception_stack = 0x7ffd54492a20
save_context_stack = 0x0
local_sigjmp_buf = {{__jmpbuf = {1, -2410647209184299319,
94180362802640, 140726017533936, 94180363247624, 94180362802688,
-2410647209169619255, -8493467030827796791}, __mask_was_saved = 0,
__saved_mask = {__val = {8192, 0, 0, 12099560782854432544, 0,
94180356443286, 64, 140726017533776, 94180363247624, 88, 94180362802640, 2,
139974294427512, 140726017533808, 94180354932849, 2}}}}
result = <optimized out>
nprocessed = <optimized out>
saveTopTransactionResourceOwner = 0x55a8130a1938
saveTopTransactionContext = 0x55a8130a1bd8
saveActivePortal = 0x0
saveResourceOwner = 0x55a8130a1938
savePortalContext = 0x0
saveMemoryContext = 0x55a8130a1bd8
__func__ = "PortalRun"
#13 0x000055a8128618dc in exec_simple_query (query_string=0x55a8130f3b08
"UPDATE posts\n", ' ' <repeats 24 times>, "SET avg_time = (x.gmean /
1000)\n", ' ' <repeats 24 times>, "FROM (SELECT post_timings.topic_id,\n", '
' <repeats 37 times>, "post_timings.post_number,\n "...) at
/build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../src/backend/tcop/postgres.c:1099
parsetree = 0x55a81311d5d0
portal = 0x55a81318a008
snapshot_set = <optimized out>
commandTag = <optimized out>
completionTag =
"\000(IT\375\177\000\000\001u\230\022\250U\000\000\060(IT\375\177\000\000(\022\346\022\001\000\000\000\234$\336\022\250U\000\000\234$\336\022\250U\000\000\b;\017\023\250U\000\000\001\000\000\000\000\000\000"
querytree_list = <optimized out>
plantree_list = <optimized out>
receiver = 0x7f4e4e18d7a8
format = 0
dest = DestRemote
parsetree_list = 0x55a81311d620
parsetree_item = 0x55a81311d600
save_log_statement_stats = 0 '\000'
was_logged = 0 '\000'
isTopLevel = 1 '\001'
msec_str =
"\000(IT\375\177\000\000\001u\230\022\250U\000\000\060(IT\375\177\000\000(\022\346\022\001\000\000"
__func__ = "exec_simple_query"
#14 0x000055a812862df6 in PostgresMain (argc=<optimized out>,
argv=argv@entry=0x55a81309b738, dbname=0x55a813053b78
"boingboing_discourse", username=<optimized out>) at
/build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../src/backend/tcop/postgres.c:4088
query_string = 0x55a8130f3b08 "UPDATE posts\n", ' ' <repeats 24
times>, "SET avg_time = (x.gmean / 1000)\n", ' ' <repeats 24 times>, "FROM
(SELECT post_timings.topic_id,\n", ' ' <repeats 37 times>,
"post_timings.post_number,\n "...
firstchar = 81
input_message = {data = 0x55a8130f3b08 "UPDATE posts\n", ' '
<repeats 24 times>, "SET avg_time = (x.gmean / 1000)\n", ' ' <repeats 24
times>, "FROM (SELECT post_timings.topic_id,\n", ' ' <repeats 37 times>,
"post_timings.post_number,\n "..., len = 942, maxlen = 1024, cursor =
942}
local_sigjmp_buf = {{__jmpbuf = {140726017534400,
-2410647209056373047, 94180362270520, 1, 94180362270200, 94180362234992,
-2410647208949418295, -8493467028741261623}, __mask_was_saved = 1,
__saved_mask = {__val = {0, 140726017534704, 94180352583639, 94180359415580,
140726017534620, 94180359731456, 94180356159802, 140726017534928, 686, 0,
94180359927584, 94180362234992, 139974255290296, 94180359464352,
206158430256, 140726017534912}}}}
send_ready_for_query = 0 '\000'
disable_idle_in_transaction_timeout = <optimized out>
__func__ = "PostgresMain"
#15 0x000055a81259cfe9 in BackendRun (port=0x55a813092c70) at
/build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../src/backend/postmaster/postmaster.c:4357
ac = 1
secs = 570264982
usecs = 174473
i = 1
av = 0x55a81309b738
maxac = <optimized out>
#16 BackendStartup (port=0x55a813092c70) at
/build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../src/backend/postmaster/postmaster.c:4029
bn = <optimized out>
pid = <optimized out>
#17 ServerLoop () at
/build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../src/backend/postmaster/postmaster.c:1753
rmask = {fds_bits = {64, 0 <repeats 15 times>}}
selres = <optimized out>
now = <optimized out>
readmask = {fds_bits = {112, 0 <repeats 15 times>}}
last_lockfile_recheck_time = 1516949750
last_touch_time = 1516949630
__func__ = "ServerLoop"
#18 0x000055a8127f236b in PostmasterMain (argc=3, argv=<optimized out>) at
/build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../src/backend/postmaster/postmaster.c:1361
opt = <optimized out>
status = <optimized out>
userDoption = <optimized out>
listen_addr_saved = 1 '\001'
i = <optimized out>
output_config_variable = <optimized out>
__func__ = "PostmasterMain"
#19 0x000055a81259e3e5 in main (argc=3, argv=0x55a813051a90) at
/build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../src/backend/main/main.c:228
No locals.
```
## Configuration changes
```
shared_buffers = "1GB"
work_mem = "16MB"
maintenance_work_mem = "16MB"
```
Do feel free to let me know if I'm required to provide more information and
I'll be happy to do so.
=?utf-8?q?PG_Bug_reporting_form?= <noreply@postgresql.org> writes:
> ## Query that results in segmentation fault
Unsurprisingly, the given info is not enough to reproduce the crash.
However, looking at the stack trace:
> (gdb) bt
> #0 index_markpos (scan=0x0) at
> /build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../src/backend/access/index/indexam.c:373
> #1 0x000055a812746c68 in ExecMergeJoin (pstate=0x55a8131bc778) at
> /build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../src/backend/executor/nodeMergejoin.c:1188
> #2 0x000055a81272cf3f in ExecProcNode (node=0x55a8131bc778) at
> /build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../src/include/executor/executor.h:250
> #3 EvalPlanQualNext (epqstate=epqstate@entry=0x55a81318c518) at
> /build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../src/backend/executor/execMain.c:3005
> #4 0x000055a81272d342 in EvalPlanQual (estate=estate@entry=0x55a81318c018,
> epqstate=epqstate@entry=0x55a81318c518,
> relation=relation@entry=0x7f4e4e25ab68, rti=1, lockmode=<optimized out>,
> tid=tid@entry=0x7ffd54492330, priorXmax=8959603) at
> /build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../src/backend/executor/execMain.c:2521
> #5 0x000055a812747af7 in ExecUpdate (mtstate=mtstate@entry=0x55a81318c468,
> tupleid=tupleid@entry=0x7ffd54492450, oldtuple=oldtuple@entry=0x0,
> slot=<optimized out>, slot@entry=0x55a8131a2f08,
> planSlot=planSlot@entry=0x55a81319db60,
> epqstate=epqstate@entry=0x55a81318c518, estate=0x55a81318c018, canSetTag=1
> '\001') at
> /build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../src/backend/executor/nodeModifyTable.c:1113
it seems fairly clear that somebody passed a NULL scandesc pointer
to index_markpos. Looking at the only two callers of that function,
this must mean that either an IndexScan's node->iss_ScanDesc or an
IndexOnlyScan's node->ioss_ScanDesc was null. (We don't see
ExecIndexMarkPos in the trace because the compiler optimized the tail
call.) And that leads me to commit 09529a70b, which changed the logic
in those node types to allow initialization of the index scandesc to be
delayed to the first tuple fetch, rather than necessarily performed during
ExecInitNode.
Because this is happening inside an EvalPlanQual, it's unsurprising
that we'd be taking an unusual code path. I believe what happened
was that the IndexScan node returned a jammed-in EPQ tuple on its
first call, and so hadn't opened the scandesc at all, while
ExecMergeJoin would do an ExecMarkPos if the tuple matched (which
it typically would if we'd gotten to EPQ), whereupon kaboom.
It's tempting to think that this is an oversight in commit 09529a70b
and we need to rectify it by something along the lines of teaching
ExecIndexMarkPos and ExecIndexOnlyMarkPos to initialize the scandesc
if needed before calling index_markpos.
However, on further reflection, it seems like this is a bug of far
older standing, to wit that ExecIndexMarkPos/ExecIndexRestrPos
are doing entirely the wrong thing when EPQ is active. It's not
meaningful, or at least not correct, to be messing with the index
scan state at all in that case. Rather, what the scan is supposed
to do is return the single jammed-in EPQ tuple, and what "restore"
ought to mean is "clear my es_epqScanDone flag so that that tuple
can be returned again".
It's not clear to me whether the failure to do that has any real
consequences though. It would only matter if there's more than
one tuple available on the outer side of the mergejoin, which
I think there never would be in an EPQ situation. Still, if there
ever were more outer tuples, the mergejoin would misbehave and maybe
even crash itself (because it probably assumes that restoring to
a point where there had been a tuple would allow it to re-fetch
that tuple successfully).
So what I'm inclined to do is teach the mark/restore infrastructure
to do the right thing with EPQ state when EPQ is active. But I'm not
clear on whether that needs to be back-patched earlier than v10.
regards, tom lane
> Unsurprisingly, the given info is not enough to reproduce the crash.
We could only reproduce this on our production PostgreSQL cluster. I took a full pg_dump and restored into a PostgreSQL cluster (10.1) locally but could not reproduce the seg fault. If it helps, the data in the cluster was restored via pg_dump (10.1) from a PostgreSQL 9.5 cluster.
We could only reproduce this on our production PostgreSQL cluster. I took a full pg_dump and restored into a PostgreSQL cluster (10.1) locally but could not reproduce the seg fault. If it helps, the data in the cluster was restored via pg_dump (10.1) from a PostgreSQL 9.5 cluster.
On Sat, Jan 27, 2018 at 2:45 AM, Tom Lane <tgl@sss.pgh.pa.us> wrote:
PG Bug reportingform <noreply@postgresql.org> writes:
> ## Query that results in segmentation fault
Unsurprisingly, the given info is not enough to reproduce the crash.
However, looking at the stack trace:
> (gdb) bt
> #0 index_markpos (scan=0x0) at
> /build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../ src/backend/access/index/ indexam.c:373
> #1 0x000055a812746c68 in ExecMergeJoin (pstate=0x55a8131bc778) at
> /build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../ src/backend/executor/ nodeMergejoin.c:1188
> #2 0x000055a81272cf3f in ExecProcNode (node=0x55a8131bc778) at
> /build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../ src/include/executor/executor. h:250
> #3 EvalPlanQualNext (epqstate=epqstate@entry=0x55a81318c518) at
> /build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../ src/backend/executor/execMain. c:3005
> #4 0x000055a81272d342 in EvalPlanQual (estate=estate@entry=0x55a81318c018,
> epqstate=epqstate@entry=0x55a81318c518,
> relation=relation@entry=0x7f4e4e25ab68, rti=1, lockmode=<optimized out>,
> tid=tid@entry=0x7ffd54492330, priorXmax=8959603) at
> /build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../ src/backend/executor/execMain. c:2521
> #5 0x000055a812747af7 in ExecUpdate (mtstate=mtstate@entry=0x55a81318c468,
> tupleid=tupleid@entry=0x7ffd54492450, oldtuple=oldtuple@entry=0x0,
> slot=<optimized out>, slot@entry=0x55a8131a2f08,
> planSlot=planSlot@entry=0x55a81319db60,
> epqstate=epqstate@entry=0x55a81318c518, estate=0x55a81318c018, canSetTag=1
> '\001') at
> /build/postgresql-10-qAeTPy/postgresql-10-10.1/build/../ src/backend/executor/ nodeModifyTable.c:1113
it seems fairly clear that somebody passed a NULL scandesc pointer
to index_markpos. Looking at the only two callers of that function,
this must mean that either an IndexScan's node->iss_ScanDesc or an
IndexOnlyScan's node->ioss_ScanDesc was null. (We don't see
ExecIndexMarkPos in the trace because the compiler optimized the tail
call.) And that leads me to commit 09529a70b, which changed the logic
in those node types to allow initialization of the index scandesc to be
delayed to the first tuple fetch, rather than necessarily performed during
ExecInitNode.
Because this is happening inside an EvalPlanQual, it's unsurprising
that we'd be taking an unusual code path. I believe what happened
was that the IndexScan node returned a jammed-in EPQ tuple on its
first call, and so hadn't opened the scandesc at all, while
ExecMergeJoin would do an ExecMarkPos if the tuple matched (which
it typically would if we'd gotten to EPQ), whereupon kaboom.
It's tempting to think that this is an oversight in commit 09529a70b
and we need to rectify it by something along the lines of teaching
ExecIndexMarkPos and ExecIndexOnlyMarkPos to initialize the scandesc
if needed before calling index_markpos.
However, on further reflection, it seems like this is a bug of far
older standing, to wit that ExecIndexMarkPos/ExecIndexRestrPos
are doing entirely the wrong thing when EPQ is active. It's not
meaningful, or at least not correct, to be messing with the index
scan state at all in that case. Rather, what the scan is supposed
to do is return the single jammed-in EPQ tuple, and what "restore"
ought to mean is "clear my es_epqScanDone flag so that that tuple
can be returned again".
It's not clear to me whether the failure to do that has any real
consequences though. It would only matter if there's more than
one tuple available on the outer side of the mergejoin, which
I think there never would be in an EPQ situation. Still, if there
ever were more outer tuples, the mergejoin would misbehave and maybe
even crash itself (because it probably assumes that restoring to
a point where there had been a tuple would allow it to re-fetch
that tuple successfully).
So what I'm inclined to do is teach the mark/restore infrastructure
to do the right thing with EPQ state when EPQ is active. But I'm not
clear on whether that needs to be back-patched earlier than v10.
regards, tom lane
Guo Xiang Tan <gxtan1990@gmail.com> writes:
>> Unsurprisingly, the given info is not enough to reproduce the crash.
> We could only reproduce this on our production PostgreSQL cluster.
Since the problem occurs during EvalPlanQual, it'd only manifest when
the update touched a concurrently-updated row, which is why it's hard
to reproduce if you aren't deliberately trying to trigger it. But
I've been able to reproduce a crash at the same spot, and have made
a test case using the isolationtester infrastructure (attached).
I'll look into an actual fix tomorrow.
regards, tom lane
diff --git a/src/test/isolation/specs/eval-plan-qual.spec b/src/test/isolation/specs/eval-plan-qual.spec
index 7ff6f6b..53a32e6 100644
*** a/src/test/isolation/specs/eval-plan-qual.spec
--- b/src/test/isolation/specs/eval-plan-qual.spec
*************** setup
*** 21,33 ****
CREATE TABLE table_b (id integer, value text);
INSERT INTO table_a VALUES (1, 'tableAValue');
INSERT INTO table_b VALUES (1, 'tableBValue');
}
teardown
{
DROP TABLE accounts;
DROP TABLE p CASCADE;
! DROP TABLE table_a, table_b;
}
session "s1"
--- 21,36 ----
CREATE TABLE table_b (id integer, value text);
INSERT INTO table_a VALUES (1, 'tableAValue');
INSERT INTO table_b VALUES (1, 'tableBValue');
+
+ CREATE TABLE jointest AS SELECT generate_series(1,10) AS id, 0 AS data;
+ CREATE INDEX ON jointest(id);
}
teardown
{
DROP TABLE accounts;
DROP TABLE p CASCADE;
! DROP TABLE table_a, table_b, jointest;
}
session "s1"
*************** step "updateforss" {
*** 78,83 ****
--- 81,96 ----
UPDATE table_b SET value = 'newTableBValue' WHERE id = 1;
}
+ # these tests exercise mark/restore during EPQ recheck, cf bug #15032
+
+ step "selectjoinforupdate" {
+ set enable_hashjoin to 0;
+ set enable_seqscan to 0;
+ explain (costs off)
+ select * from jointest a join jointest b on a.id=b.id for update;
+ select * from jointest a join jointest b on a.id=b.id for update;
+ }
+
session "s2"
setup { BEGIN ISOLATION LEVEL READ COMMITTED; }
*************** step "readforss" {
*** 104,109 ****
--- 117,123 ----
WHERE ta.id = 1 FOR UPDATE OF ta;
}
step "wrtwcte" { UPDATE table_a SET value = 'tableAValue2' WHERE id = 1; }
+ step "wrjt" { UPDATE jointest SET data = 42 WHERE id = 7; }
step "c2" { COMMIT; }
session "s3"
*************** permutation "wx2" "partiallock" "c2" "c1
*** 135,137 ****
--- 149,152 ----
permutation "wx2" "lockwithvalues" "c2" "c1" "read"
permutation "updateforss" "readforss" "c1" "c2"
permutation "wrtwcte" "readwcte" "c1" "c2"
+ permutation "wrjt" "selectjoinforupdate" "c2" "c1"
I wrote:
> I'll look into an actual fix tomorrow.
I've pushed a fix for this. I concluded that there's no real reason
to touch branches before 10: the index_markpos call is useless in the
case in question, but it doesn't do any more than waste some cycles.
regards, tom lane
Hi Tom,
We've upgraded to 10.2 and are not getting segmentation faults while running the query anymore. Thank you for the fix.
We've upgraded to 10.2 and are not getting segmentation faults while running the query anymore. Thank you for the fix.
On Sun, Jan 28, 2018 at 2:57 AM, Tom Lane <tgl@sss.pgh.pa.us> wrote:
I wrote:
> I'll look into an actual fix tomorrow.
I've pushed a fix for this. I concluded that there's no real reason
to touch branches before 10: the index_markpos call is useless in the
case in question, but it doesn't do any more than waste some cycles.
regards, tom lane