7
7
import threading
8
8
9
9
import requests
10
+ from urllib3 .util .retry import Retry
10
11
11
12
from runpod .serverless .modules .rp_logger import RunPodLogger
12
13
from .worker_state import Jobs , WORKER_ID
@@ -22,8 +23,27 @@ class Heartbeat:
22
23
PING_URL = PING_URL .replace ('$RUNPOD_POD_ID' , WORKER_ID )
23
24
PING_INTERVAL = int (os .environ .get ('RUNPOD_PING_INTERVAL' , 10000 ))// 1000
24
25
25
- _session = requests .Session ()
26
- _session .headers .update ({"Authorization" : f"{ os .environ .get ('RUNPOD_AI_API_KEY' )} " })
26
+ def __init__ (self , pool_connections = 100 , retries = 3 ) -> None :
27
+ '''
28
+ Initializes the Heartbeat class.
29
+ '''
30
+ self ._session = requests .Session ()
31
+ self ._session .headers .update ({"Authorization" : f"{ os .environ .get ('RUNPOD_AI_API_KEY' )} " })
32
+
33
+ retry_strategy = Retry (
34
+ total = retries ,
35
+ status_forcelist = [429 , 500 , 502 , 503 , 504 ],
36
+ method_whitelist = ["GET" ],
37
+ backoff_factor = 1
38
+ )
39
+
40
+ adapter = requests .adapters .HTTPAdapter (
41
+ pool_connections = pool_connections ,
42
+ pool_maxsize = pool_connections ,
43
+ max_retries = retry_strategy
44
+ )
45
+ self ._session .mount ('http://' , adapter )
46
+ self ._session .mount ('https://' , adapter )
27
47
28
48
def start_ping (self , test = False ):
29
49
'''
@@ -40,13 +60,8 @@ def ping_loop(self, test=False):
40
60
Sends heartbeat pings to the Runpod server.
41
61
'''
42
62
while True :
43
- try :
44
- self ._send_ping ()
45
- time .sleep (self .PING_INTERVAL )
46
- except requests .RequestException as err :
47
- log .error (f"Ping Error: { err } , attempting to restart ping." )
48
- if test :
49
- return
63
+ self ._send_ping ()
64
+ time .sleep (self .PING_INTERVAL )
50
65
51
66
if test :
52
67
return
@@ -58,9 +73,13 @@ def _send_ping(self):
58
73
job_ids = jobs .get_job_list ()
59
74
ping_params = {'job_id' : job_ids } if job_ids is not None else None
60
75
61
- result = self ._session .get (
62
- self .PING_URL , params = ping_params ,
63
- timeout = self .PING_INTERVAL
64
- )
76
+ try :
77
+ result = self ._session .get (
78
+ self .PING_URL , params = ping_params ,
79
+ timeout = self .PING_INTERVAL
80
+ )
81
+
82
+ log .debug (f"Heartbeat Sent | URL: { self .PING_URL } | Status: { result .status_code } " )
65
83
66
- log .debug (f"Heartbeat Sent | URL: { self .PING_URL } | Status: { result .status_code } " )
84
+ except requests .RequestException as err :
85
+ log .error (f"Ping Request Error: { err } , attempting to restart ping." )
0 commit comments