@@ -184,6 +184,8 @@ pgaio_io_acquire(struct ResourceOwnerData *resowner, PgAioReturn *ret)
184184PgAioHandle *
185185pgaio_io_acquire_nb (struct ResourceOwnerData * resowner , PgAioReturn * ret )
186186{
187+ PgAioHandle * ioh = NULL ;
188+
187189 if (pgaio_my_backend -> num_staged_ios >= PGAIO_SUBMIT_BATCH_SIZE )
188190 {
189191 Assert (pgaio_my_backend -> num_staged_ios == PGAIO_SUBMIT_BATCH_SIZE );
@@ -193,10 +195,17 @@ pgaio_io_acquire_nb(struct ResourceOwnerData *resowner, PgAioReturn *ret)
193195 if (pgaio_my_backend -> handed_out_io )
194196 elog (ERROR , "API violation: Only one IO can be handed out" );
195197
198+ /*
199+ * Probably not needed today, as interrupts should not process this IO,
200+ * but...
201+ */
202+ HOLD_INTERRUPTS ();
203+
196204 if (!dclist_is_empty (& pgaio_my_backend -> idle_ios ))
197205 {
198206 dlist_node * ion = dclist_pop_head_node (& pgaio_my_backend -> idle_ios );
199- PgAioHandle * ioh = dclist_container (PgAioHandle , node , ion );
207+
208+ ioh = dclist_container (PgAioHandle , node , ion );
200209
201210 Assert (ioh -> state == PGAIO_HS_IDLE );
202211 Assert (ioh -> owner_procno == MyProcNumber );
@@ -212,11 +221,11 @@ pgaio_io_acquire_nb(struct ResourceOwnerData *resowner, PgAioReturn *ret)
212221 ioh -> report_return = ret ;
213222 ret -> result .status = PGAIO_RS_UNKNOWN ;
214223 }
215-
216- return ioh ;
217224 }
218225
219- return NULL ;
226+ RESUME_INTERRUPTS ();
227+
228+ return ioh ;
220229}
221230
222231/*
@@ -233,6 +242,12 @@ pgaio_io_release(PgAioHandle *ioh)
233242 Assert (ioh -> resowner );
234243
235244 pgaio_my_backend -> handed_out_io = NULL ;
245+
246+ /*
247+ * Note that no interrupts are processed between the handed_out_io
248+ * check and the call to reclaim - that's important as otherwise an
249+ * interrupt could have already reclaimed the handle.
250+ */
236251 pgaio_io_reclaim (ioh );
237252 }
238253 else
@@ -251,6 +266,12 @@ pgaio_io_release_resowner(dlist_node *ioh_node, bool on_error)
251266
252267 Assert (ioh -> resowner );
253268
269+ /*
270+ * Otherwise an interrupt, in the middle of releasing the IO, could end up
271+ * trying to wait for the IO, leading to state confusion.
272+ */
273+ HOLD_INTERRUPTS ();
274+
254275 ResourceOwnerForgetAioHandle (ioh -> resowner , & ioh -> resowner_node );
255276 ioh -> resowner = NULL ;
256277
@@ -291,6 +312,8 @@ pgaio_io_release_resowner(dlist_node *ioh_node, bool on_error)
291312 */
292313 if (ioh -> report_return )
293314 ioh -> report_return = NULL ;
315+
316+ RESUME_INTERRUPTS ();
294317}
295318
296319/*
@@ -359,6 +382,13 @@ pgaio_io_get_wref(PgAioHandle *ioh, PgAioWaitRef *iow)
359382static inline void
360383pgaio_io_update_state (PgAioHandle * ioh , PgAioHandleState new_state )
361384{
385+ /*
386+ * All callers need to have held interrupts in some form, otherwise
387+ * interrupt processing could wait for the IO to complete, while in an
388+ * intermediary state.
389+ */
390+ Assert (!INTERRUPTS_CAN_BE_PROCESSED ());
391+
362392 pgaio_debug_io (DEBUG5 , ioh ,
363393 "updating state to %s" ,
364394 pgaio_io_state_get_name (new_state ));
@@ -396,6 +426,13 @@ pgaio_io_stage(PgAioHandle *ioh, PgAioOp op)
396426 Assert (pgaio_my_backend -> handed_out_io == ioh );
397427 Assert (pgaio_io_has_target (ioh ));
398428
429+ /*
430+ * Otherwise an interrupt, in the middle of staging and possibly executing
431+ * the IO, could end up trying to wait for the IO, leading to state
432+ * confusion.
433+ */
434+ HOLD_INTERRUPTS ();
435+
399436 ioh -> op = op ;
400437 ioh -> result = 0 ;
401438
@@ -435,6 +472,8 @@ pgaio_io_stage(PgAioHandle *ioh, PgAioOp op)
435472 pgaio_io_prepare_submit (ioh );
436473 pgaio_io_perform_synchronously (ioh );
437474 }
475+
476+ RESUME_INTERRUPTS ();
438477}
439478
440479bool
@@ -544,8 +583,8 @@ pgaio_io_wait(PgAioHandle *ioh, uint64 ref_generation)
544583 && state != PGAIO_HS_COMPLETED_SHARED
545584 && state != PGAIO_HS_COMPLETED_LOCAL )
546585 {
547- elog (PANIC , "waiting for own IO in wrong state: %d " ,
548- state );
586+ elog (PANIC , "waiting for own IO %d in wrong state: %s " ,
587+ pgaio_io_get_id ( ioh ), pgaio_io_get_state_name ( ioh ) );
549588 }
550589 }
551590
@@ -599,7 +638,13 @@ pgaio_io_wait(PgAioHandle *ioh, uint64 ref_generation)
599638
600639 case PGAIO_HS_COMPLETED_SHARED :
601640 case PGAIO_HS_COMPLETED_LOCAL :
602- /* see above */
641+
642+ /*
643+ * Note that no interrupts are processed between
644+ * pgaio_io_was_recycled() and this check - that's important
645+ * as otherwise an interrupt could have already reclaimed the
646+ * handle.
647+ */
603648 if (am_owner )
604649 pgaio_io_reclaim (ioh );
605650 return ;
@@ -610,6 +655,11 @@ pgaio_io_wait(PgAioHandle *ioh, uint64 ref_generation)
610655/*
611656 * Make IO handle ready to be reused after IO has completed or after the
612657 * handle has been released without being used.
658+ *
659+ * Note that callers need to be careful about only calling this in the right
660+ * state and that no interrupts can be processed between the state check and
661+ * the call to pgaio_io_reclaim(). Otherwise interrupt processing could
662+ * already have reclaimed the handle.
613663 */
614664static void
615665pgaio_io_reclaim (PgAioHandle * ioh )
@@ -618,6 +668,9 @@ pgaio_io_reclaim(PgAioHandle *ioh)
618668 Assert (ioh -> owner_procno == MyProcNumber );
619669 Assert (ioh -> state != PGAIO_HS_IDLE );
620670
671+ /* see comment in function header */
672+ HOLD_INTERRUPTS ();
673+
621674 /*
622675 * It's a bit ugly, but right now the easiest place to put the execution
623676 * of local completion callbacks is this function, as we need to execute
@@ -685,6 +738,8 @@ pgaio_io_reclaim(PgAioHandle *ioh)
685738 * efficient in cases where only a few IOs are used.
686739 */
687740 dclist_push_head (& pgaio_my_backend -> idle_ios , & ioh -> node );
741+
742+ RESUME_INTERRUPTS ();
688743}
689744
690745/*
@@ -700,7 +755,7 @@ pgaio_io_wait_for_free(void)
700755 pgaio_debug (DEBUG2 , "waiting for free IO with %d pending, %d in-flight, %d idle IOs" ,
701756 pgaio_my_backend -> num_staged_ios ,
702757 dclist_count (& pgaio_my_backend -> in_flight_ios ),
703- dclist_is_empty (& pgaio_my_backend -> idle_ios ));
758+ dclist_count (& pgaio_my_backend -> idle_ios ));
704759
705760 /*
706761 * First check if any of our IOs actually have completed - when using
@@ -714,6 +769,11 @@ pgaio_io_wait_for_free(void)
714769
715770 if (ioh -> state == PGAIO_HS_COMPLETED_SHARED )
716771 {
772+ /*
773+ * Note that no interrupts are processed between the state check
774+ * and the call to reclaim - that's important as otherwise an
775+ * interrupt could have already reclaimed the handle.
776+ */
717777 pgaio_io_reclaim (ioh );
718778 reclaimed ++ ;
719779 }
@@ -730,13 +790,17 @@ pgaio_io_wait_for_free(void)
730790 if (pgaio_my_backend -> num_staged_ios > 0 )
731791 pgaio_submit_staged ();
732792
793+ /* possibly some IOs finished during submission */
794+ if (!dclist_is_empty (& pgaio_my_backend -> idle_ios ))
795+ return ;
796+
733797 if (dclist_count (& pgaio_my_backend -> in_flight_ios ) == 0 )
734798 ereport (ERROR ,
735799 errmsg_internal ("no free IOs despite no in-flight IOs" ),
736800 errdetail_internal ("%d pending, %d in-flight, %d idle IOs" ,
737801 pgaio_my_backend -> num_staged_ios ,
738802 dclist_count (& pgaio_my_backend -> in_flight_ios ),
739- dclist_is_empty (& pgaio_my_backend -> idle_ios )));
803+ dclist_count (& pgaio_my_backend -> idle_ios )));
740804
741805 /*
742806 * Wait for the oldest in-flight IO to complete.
@@ -747,6 +811,7 @@ pgaio_io_wait_for_free(void)
747811 {
748812 PgAioHandle * ioh = dclist_head_element (PgAioHandle , node ,
749813 & pgaio_my_backend -> in_flight_ios );
814+ uint64 generation = ioh -> generation ;
750815
751816 switch (ioh -> state )
752817 {
@@ -770,13 +835,24 @@ pgaio_io_wait_for_free(void)
770835 * In a more general case this would be racy, because the
771836 * generation could increase after we read ioh->state above.
772837 * But we are only looking at IOs by the current backend and
773- * the IO can only be recycled by this backend.
838+ * the IO can only be recycled by this backend. Even this is
839+ * only OK because we get the handle's generation before
840+ * potentially processing interrupts, e.g. as part of
841+ * pgaio_debug_io().
774842 */
775- pgaio_io_wait (ioh , ioh -> generation );
843+ pgaio_io_wait (ioh , generation );
776844 break ;
777845
778846 case PGAIO_HS_COMPLETED_SHARED :
779- /* it's possible that another backend just finished this IO */
847+
848+ /*
849+ * It's possible that another backend just finished this IO.
850+ *
851+ * Note that no interrupts are processed between the state
852+ * check and the call to reclaim - that's important as
853+ * otherwise an interrupt could have already reclaimed the
854+ * handle.
855+ */
780856 pgaio_io_reclaim (ioh );
781857 break ;
782858 }
@@ -926,6 +1002,11 @@ pgaio_wref_check_done(PgAioWaitRef *iow)
9261002 if (state == PGAIO_HS_COMPLETED_SHARED ||
9271003 state == PGAIO_HS_COMPLETED_LOCAL )
9281004 {
1005+ /*
1006+ * Note that no interrupts are processed between
1007+ * pgaio_io_was_recycled() and this check - that's important as
1008+ * otherwise an interrupt could have already reclaimed the handle.
1009+ */
9291010 if (am_owner )
9301011 pgaio_io_reclaim (ioh );
9311012 return true;
@@ -1153,11 +1234,14 @@ pgaio_closing_fd(int fd)
11531234 {
11541235 dlist_iter iter ;
11551236 PgAioHandle * ioh = NULL ;
1237+ uint64 generation ;
11561238
11571239 dclist_foreach (iter , & pgaio_my_backend -> in_flight_ios )
11581240 {
11591241 ioh = dclist_container (PgAioHandle , node , iter .cur );
11601242
1243+ generation = ioh -> generation ;
1244+
11611245 if (pgaio_io_uses_fd (ioh , fd ))
11621246 break ;
11631247 else
@@ -1172,7 +1256,7 @@ pgaio_closing_fd(int fd)
11721256 fd , dclist_count (& pgaio_my_backend -> in_flight_ios ));
11731257
11741258 /* see comment in pgaio_io_wait_for_free() about raciness */
1175- pgaio_io_wait (ioh , ioh -> generation );
1259+ pgaio_io_wait (ioh , generation );
11761260 }
11771261 }
11781262}
@@ -1201,13 +1285,14 @@ pgaio_shutdown(int code, Datum arg)
12011285 while (!dclist_is_empty (& pgaio_my_backend -> in_flight_ios ))
12021286 {
12031287 PgAioHandle * ioh = dclist_head_element (PgAioHandle , node , & pgaio_my_backend -> in_flight_ios );
1288+ uint64 generation = ioh -> generation ;
12041289
12051290 pgaio_debug_io (DEBUG2 , ioh ,
12061291 "waiting for IO to complete during shutdown, %d in-flight IOs" ,
12071292 dclist_count (& pgaio_my_backend -> in_flight_ios ));
12081293
12091294 /* see comment in pgaio_io_wait_for_free() about raciness */
1210- pgaio_io_wait (ioh , ioh -> generation );
1295+ pgaio_io_wait (ioh , generation );
12111296 }
12121297
12131298 pgaio_my_backend = NULL ;
0 commit comments