@@ -38,9 +38,6 @@ def __init__(self, agent_video_track=None, sample_rate=24000, channels=1, chunk_
3838 self .last_batch_time = time .time ()
3939 self .batch_timeout = 0.040 # Force batch processing after 40ms max
4040
41- # Track current audio session
42- self .current_audio_session = None
43-
4441 # WebRTC stats debugging
4542 self .last_stats_time = 0
4643 self .stats_interval = 5.0 # Print stats every 5 seconds
@@ -56,13 +53,6 @@ def __init__(self, agent_video_track=None, sample_rate=24000, channels=1, chunk_
5653 # Fixed timing for consistent audio frame rate
5754 self .target_fps = 50.0 # Target 50 FPS (20ms chunks)
5855
59- # Dynamic chunk sizing
60- self .base_chunk_size_bytes = self .chunk_size_bytes # Store original
61- self .last_network_check = time .time ()
62- self .network_check_interval = 3.0 # Check network every 3 seconds
63- self .recent_rtt_samples = []
64- self .recent_jitter_samples = []
65-
6656 logger .info (
6757 f"🔊 AgentAudioTrack initialized - chunk_size: { self .chunk_size_bytes } bytes (~{ self .chunk_size_bytes // 2 / sample_rate * 1000 :.1f} ms)"
6858 )
@@ -94,113 +84,6 @@ async def _print_debug_stats(self):
9484 f"Buffer empty rate: { buffer_empty_rate :.2%} , Batch: { batch_buffer_size } bytes"
9585 )
9686
97- # Try to get WebRTC stats if peer connection is available
98-
99- if self .peer_connection :
100- try :
101- logger .debug ("🔍 Attempting to get WebRTC stats..." )
102- stats = await self .peer_connection .getStats ()
103- logger .debug (f"📊 Got { len (stats )} WebRTC stats objects" )
104-
105- # Debug: print all stat types we're seeing
106- stat_types = [getattr (stat , "type" , "no-type" ) for stat in stats .values () if hasattr (stat , "type" )]
107- logger .debug (f"📊 Stat types found: { set (stat_types )} " )
108-
109- # Look for relevant audio stats
110- found_audio_stats = False
111- found_network_stats = False
112-
113- for stat in stats .values ():
114- if hasattr (stat , "type" ):
115- # Audio outbound RTP stats
116- if stat .type == "outbound-rtp" and hasattr (stat , "kind" ) and stat .kind == "audio" :
117- found_audio_stats = True
118- logger .debug (
119- f"📡 WebRTC Audio Out - Packets sent: { getattr (stat , 'packetsSent' , 'N/A' )} , "
120- f"Bytes sent: { getattr (stat , 'bytesSent' , 'N/A' )} "
121- )
122- # Network stats from remote inbound RTP (has RTT and jitter)
123- elif stat .type == "remote-inbound-rtp" :
124- found_network_stats = True
125- rtt = getattr (stat , "roundTripTime" , None )
126- jitter = getattr (stat , "jitter" , None )
127- packets_lost = getattr (stat , "packetsLost" , None )
128- if rtt is not None :
129- logger .debug (f"🌐 Network - RTT: { rtt * 1000 :.1f} ms, " f"Jitter: { jitter } , Packets lost: { packets_lost } " )
130- # Collect network samples for chunk size adaptation
131- # DISABLED: self._collect_network_sample(rtt, jitter)
132-
133- if not found_audio_stats :
134- logger .debug ("⚠️ No outbound audio RTP stats found" )
135- if not found_network_stats :
136- logger .debug ("⚠️ No successful candidate-pair stats found" )
137-
138- except Exception as e :
139- logger .info (f"❌ Could not get WebRTC stats: { e } " )
140- else :
141- logger .debug ("⚠️ No peer connection available for stats" )
142-
143- # Note: Removed adaptive timing adjustment as it was causing performance issues
144-
145- # Adaptive chunk sizing based on network conditions
146- # DISABLED: Causes audio jitter due to frequent chunk size changes
147- # self._adjust_chunk_size_based_on_network()
148-
149- def _collect_network_sample (self , rtt , jitter ):
150- """Collect network performance samples for chunk size adaptation"""
151- current_time = time .time ()
152-
153- # Keep recent samples (last 10 seconds worth)
154- self .recent_rtt_samples .append ((current_time , rtt ))
155- self .recent_jitter_samples .append ((current_time , jitter ))
156-
157- # Remove old samples
158- cutoff_time = current_time - 10.0
159- self .recent_rtt_samples = [(t , v ) for t , v in self .recent_rtt_samples if t > cutoff_time ]
160- self .recent_jitter_samples = [(t , v ) for t , v in self .recent_jitter_samples if t > cutoff_time ]
161-
162- def _adjust_chunk_size_based_on_network (self ):
163- """Adjust chunk size based on network conditions"""
164- current_time = time .time ()
165- if current_time - self .last_network_check >= self .network_check_interval :
166- self .last_network_check = current_time
167-
168- if len (self .recent_rtt_samples ) >= 3 and len (self .recent_jitter_samples ) >= 3 :
169- # Calculate network stability metrics
170- rtt_values = [v for _ , v in self .recent_rtt_samples ]
171- jitter_values = [v for _ , v in self .recent_jitter_samples ]
172-
173- avg_rtt = sum (rtt_values ) / len (rtt_values )
174- avg_jitter = sum (jitter_values ) / len (jitter_values )
175- rtt_variance = max (rtt_values ) - min (rtt_values )
176-
177- # Determine optimal chunk size based on network conditions
178- old_chunk_size = self .chunk_size_bytes
179-
180- if avg_jitter > 1500 or rtt_variance > 0.050 : # High jitter or RTT variance
181- # Use larger chunks for stability (40ms)
182- self .chunk_size_bytes = int (self .sample_rate * 0.040 * 2 ) # 40ms chunks
183- reason = f"high jitter ({ avg_jitter :.0f} ) or RTT variance ({ rtt_variance * 1000 :.1f} ms)"
184- elif avg_rtt > 0.080 : # High RTT (>80ms)
185- # Use larger chunks to compensate for latency (30ms)
186- self .chunk_size_bytes = int (self .sample_rate * 0.030 * 2 ) # 30ms chunks
187- reason = f"high RTT ({ avg_rtt * 1000 :.1f} ms)"
188- elif avg_jitter < 100 and avg_rtt < 0.040 : # Excellent network
189- # Use smaller chunks for low latency (15ms)
190- self .chunk_size_bytes = int (self .sample_rate * 0.015 * 2 ) # 15ms chunks
191- reason = f"excellent network (RTT: { avg_rtt * 1000 :.1f} ms, jitter: { avg_jitter :.0f} )"
192- else :
193- # Use default chunk size (20ms)
194- self .chunk_size_bytes = self .base_chunk_size_bytes
195- reason = "balanced network conditions"
196-
197- # Update minimum buffer threshold based on new chunk size
198- self .min_buffer_threshold = self .chunk_size_bytes * 3
199-
200- if old_chunk_size != self .chunk_size_bytes :
201- chunk_duration_ms = (self .chunk_size_bytes // 2 ) / self .sample_rate * 1000
202- logger .info (f"📦 Chunk size adapted: { old_chunk_size } → { self .chunk_size_bytes } bytes " f"({ chunk_duration_ms :.1f} ms) - { reason } " )
203-
20487 async def recv (self ):
20588 """Generate and return audio frames from Nova responses - back to basics"""
20689 try :
@@ -270,7 +153,7 @@ async def recv(self):
270153 # Fixed timing to maintain proper audio frame rate
271154 # For 24kHz audio with 20ms chunks (480 samples), we should target 50 FPS
272155 # Only sleep if we're running at or above target FPS to avoid slowing down low FPS streams
273- target_sleep = 0.020 # 20ms = 50 FPS
156+ target_sleep = 0.015 # 20ms = 50 FPS
274157
275158 if self .avg_fps >= 50 :
276159 if buffer_was_empty :
@@ -353,7 +236,6 @@ async def stop_current_audio(self):
353236 async with self .buffer_lock :
354237 self .audio_buffer .clear ()
355238 self .batch_buffer .clear () # Clear batch buffer too
356- self .current_audio_session = None
357239 logger .info ("🛑 Audio buffer cleared due to interruption" )
358240
359241 # Reset video throb to idle state
0 commit comments