24
24
import threading
25
25
import time
26
26
27
+ from pyomo .common .errors import DeveloperError
27
28
from pyomo .common .log import LoggingIntercept , LogStream
28
29
29
30
_poll_interval = 0.0001
34
35
# polling timeout when waiting to close threads. This will bail on
35
36
# closing threast after a minimum of 13.1 seconds and a worst case of
36
37
# ~(13.1 * #threads) seconds
37
- _poll_timeout = 3 # 15 rounds: 0.0001 * 2**15 == 3.2768
38
- _poll_timeout_deadlock = 200 # 21 rounds: 0.0001 * 2**21 == 209.7152
38
+ _poll_timeout = 1 # 14 rounds: 0.0001 * 2**14 == 1.6384
39
+ _poll_timeout_deadlock = 100 # seconds
39
40
40
41
_noop = lambda : None
41
42
_mswindows = sys .platform .startswith ('win' )
@@ -240,10 +241,11 @@ class capture_output(object):
240
241
241
242
"""
242
243
244
+ startup_shutdown = threading .Lock ()
245
+
243
246
def __init__ (self , output = None , capture_fd = False ):
244
- if output is None :
245
- output = io .StringIO ()
246
247
self .output = output
248
+ self .output_stream = None
247
249
self .old = None
248
250
self .tee = None
249
251
self .capture_fd = capture_fd
@@ -285,13 +287,45 @@ def _exit_context_stack(self, et, ev, tb):
285
287
return FAIL
286
288
287
289
def __enter__ (self ):
290
+ if not capture_output .startup_shutdown .acquire (timeout = _poll_timeout_deadlock ):
291
+ # This situation *shouldn't* happen. If it does, it is
292
+ # unlikely that the user can fix it (or even debug it).
293
+ # Instead they should report it back to us.
294
+ #
295
+ # Breadcrumbs:
296
+ #
297
+ # - The last time we hit this [5/2025], it was because we
298
+ # were using capture_output in a solver's __del__. This
299
+ # led to the GC deleting the solver while another solver
300
+ # was trying to start up / run (so the other solver held
301
+ # the lock, but the GC interrupted that thread and
302
+ # wouldn't let go).
303
+ raise DeveloperError ("Deadlock starting capture_output" )
304
+ try :
305
+ return self ._enter_impl ()
306
+ finally :
307
+ capture_output .startup_shutdown .release ()
308
+
309
+ def __exit__ (self , et , ev , tb ):
310
+ if not capture_output .startup_shutdown .acquire (timeout = _poll_timeout_deadlock ):
311
+ # See comments & breadcrumbs in __enter__() above.
312
+ raise DeveloperError ("Deadlock closing capture_output" )
313
+ try :
314
+ return self ._exit_impl (et , ev , tb )
315
+ finally :
316
+ capture_output .startup_shutdown .release ()
317
+
318
+ def _enter_impl (self ):
288
319
self .old = (sys .stdout , sys .stderr )
289
320
old_fd = []
290
321
for stream in self .old :
291
- stream .flush ()
292
322
try :
293
- old_fd .append (stream .fileno ())
294
- except (AttributeError , OSError ):
323
+ stream .flush ()
324
+ try :
325
+ old_fd .append (stream .fileno ())
326
+ except (AttributeError , OSError ):
327
+ old_fd .append (None )
328
+ except (ValueError , OSError ):
295
329
old_fd .append (None )
296
330
try :
297
331
# We have an issue where we are (very aggressively)
@@ -325,6 +359,8 @@ def __enter__(self):
325
359
326
360
if isinstance (self .output , str ):
327
361
self .output_stream = self ._enter_context (open (self .output , 'w' ))
362
+ elif self .output is None :
363
+ self .output_stream = io .StringIO ()
328
364
else :
329
365
self .output_stream = self .output
330
366
if isinstance (self .output , TeeStream ):
@@ -399,26 +435,28 @@ def __enter__(self):
399
435
buf = buf [0 ]
400
436
return buf
401
437
402
- def __exit__ (self , et , ev , tb ):
438
+ def _exit_impl (self , et , ev , tb ):
403
439
# Check that we were nested correctly
404
440
FAIL = []
405
- if self .tee ._stdout is not None and self .tee .STDOUT is not sys .stdout :
406
- FAIL .append (
407
- 'Captured output (%s) does not match sys.stdout (%s).'
408
- % (self .tee ._stdout , sys .stdout )
409
- )
410
- if self .tee ._stderr is not None and self .tee .STDERR is not sys .stderr :
411
- FAIL .append (
412
- 'Captured output (%s) does not match sys.stderr (%s).'
413
- % (self .tee ._stdout , sys .stdout )
414
- )
441
+ if self .tee is not None :
442
+ if self .tee ._stdout is not None and self .tee .STDOUT is not sys .stdout :
443
+ FAIL .append (
444
+ 'Captured output (%s) does not match sys.stdout (%s).'
445
+ % (self .tee ._stdout , sys .stdout )
446
+ )
447
+ if self .tee ._stderr is not None and self .tee .STDERR is not sys .stderr :
448
+ FAIL .append (
449
+ 'Captured output (%s) does not match sys.stderr (%s).'
450
+ % (self .tee ._stdout , sys .stdout )
451
+ )
415
452
# Exit all context managers. This includes
416
453
# - Restore any file descriptors we commandeered
417
454
# - Close / join the TeeStream
418
455
# - Close any opened files
419
456
FAIL .extend (self ._exit_context_stack (et , ev , tb ))
420
- sys .stdout , sys .stderr = self .old
421
- self .old = None
457
+ if self .old is not None :
458
+ sys .stdout , sys .stderr = self .old
459
+ self .old = None
422
460
self .tee = None
423
461
self .output_stream = None
424
462
if FAIL :
@@ -667,15 +705,18 @@ def close(self, in_exception=False):
667
705
668
706
# Join all stream processing threads
669
707
_poll = _poll_interval
708
+ _timeout = 0.0
670
709
FAIL = False
671
710
while True :
672
711
for th in self ._threads :
673
712
th .join (_poll )
713
+ _timeout += _poll
674
714
self ._threads [:] = [th for th in self ._threads if th .is_alive ()]
675
715
if not self ._threads :
676
716
break
677
- _poll *= 2
678
- if _poll_timeout <= _poll < 2 * _poll_timeout :
717
+ if _poll < _poll_timeout :
718
+ _poll *= 2.0
719
+ if _poll_timeout * 0.5 <= _poll < _poll_timeout :
679
720
if in_exception :
680
721
# We are already processing an exception: no reason
681
722
# to trigger another, nor to deadlock for an
@@ -687,7 +728,7 @@ def close(self, in_exception=False):
687
728
"Significant delay observed waiting to join reader "
688
729
"threads, possible output stream deadlock"
689
730
)
690
- elif _poll >= _poll_timeout_deadlock :
731
+ elif _timeout > _poll_timeout_deadlock :
691
732
logger .error ("TeeStream: deadlock observed joining reader threads" )
692
733
# Defer raising the exception until after we have
693
734
# cleaned things up
@@ -713,7 +754,8 @@ def __exit__(self, et, ev, tb):
713
754
if not self ._enter_count :
714
755
raise RuntimeError ("TeeStream: exiting a context that was not entered" )
715
756
self ._enter_count -= 1
716
- self .close (et is not None )
757
+ if not self ._enter_count :
758
+ self .close (et is not None )
717
759
718
760
def __del__ (self ):
719
761
# Implement __del__ to guarantee that file descriptors are closed
@@ -785,19 +827,18 @@ def _mergedReader(self):
785
827
if _mswindows :
786
828
for handle in list (handles ):
787
829
try :
788
- if handle .flush :
789
- flush = True
790
- handle .flush = False
791
830
pipe = get_osfhandle (handle .read_pipe )
792
831
numAvail = PeekNamedPipe (pipe , 0 )[1 ]
793
832
if numAvail :
794
833
result , new_data = ReadFile (pipe , numAvail , None )
795
834
handle .decoder_buffer += new_data
796
835
break
836
+ elif handle .flush :
837
+ break
797
838
except :
798
839
handles .remove (handle )
799
840
new_data = '' # not None so the poll interval doesn't increase
800
- if new_data is None and not flush :
841
+ if new_data is None and not handle . flush :
801
842
# PeekNamedPipe is non-blocking; to avoid swamping
802
843
# the core, sleep for a "short" amount of time
803
844
time .sleep (_poll )
@@ -822,15 +863,14 @@ def _mergedReader(self):
822
863
else :
823
864
for handle in handles :
824
865
if handle .flush :
825
- new_data = ''
826
866
break
827
867
else :
828
- new_data = None
829
868
continue
830
869
831
- if handle .flush :
832
- flush = True
833
- handle .flush = False
870
+ if handle .flush :
871
+ new_data = ''
872
+ flush = True
873
+ handle .flush = False
834
874
835
875
# At this point, we have new data sitting in the
836
876
# handle.decoder_buffer
0 commit comments