Skip to content

Commit 49ae96b

Browse files
author
Todd Sharp
committed
update nova audio track
1 parent 7e2b17c commit 49ae96b

File tree

1 file changed

+1
-119
lines changed

1 file changed

+1
-119
lines changed

stages-nova-s2s/agent_audio_track.py

Lines changed: 1 addition & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,6 @@ def __init__(self, agent_video_track=None, sample_rate=24000, channels=1, chunk_
3838
self.last_batch_time = time.time()
3939
self.batch_timeout = 0.040 # Force batch processing after 40ms max
4040

41-
# Track current audio session
42-
self.current_audio_session = None
43-
4441
# WebRTC stats debugging
4542
self.last_stats_time = 0
4643
self.stats_interval = 5.0 # Print stats every 5 seconds
@@ -56,13 +53,6 @@ def __init__(self, agent_video_track=None, sample_rate=24000, channels=1, chunk_
5653
# Fixed timing for consistent audio frame rate
5754
self.target_fps = 50.0 # Target 50 FPS (20ms chunks)
5855

59-
# Dynamic chunk sizing
60-
self.base_chunk_size_bytes = self.chunk_size_bytes # Store original
61-
self.last_network_check = time.time()
62-
self.network_check_interval = 3.0 # Check network every 3 seconds
63-
self.recent_rtt_samples = []
64-
self.recent_jitter_samples = []
65-
6656
logger.info(
6757
f"🔊 AgentAudioTrack initialized - chunk_size: {self.chunk_size_bytes} bytes (~{self.chunk_size_bytes//2/sample_rate*1000:.1f}ms)"
6858
)
@@ -94,113 +84,6 @@ async def _print_debug_stats(self):
9484
f"Buffer empty rate: {buffer_empty_rate:.2%}, Batch: {batch_buffer_size} bytes"
9585
)
9686

97-
# Try to get WebRTC stats if peer connection is available
98-
99-
if self.peer_connection:
100-
try:
101-
logger.debug("🔍 Attempting to get WebRTC stats...")
102-
stats = await self.peer_connection.getStats()
103-
logger.debug(f"📊 Got {len(stats)} WebRTC stats objects")
104-
105-
# Debug: print all stat types we're seeing
106-
stat_types = [getattr(stat, "type", "no-type") for stat in stats.values() if hasattr(stat, "type")]
107-
logger.debug(f"📊 Stat types found: {set(stat_types)}")
108-
109-
# Look for relevant audio stats
110-
found_audio_stats = False
111-
found_network_stats = False
112-
113-
for stat in stats.values():
114-
if hasattr(stat, "type"):
115-
# Audio outbound RTP stats
116-
if stat.type == "outbound-rtp" and hasattr(stat, "kind") and stat.kind == "audio":
117-
found_audio_stats = True
118-
logger.debug(
119-
f"📡 WebRTC Audio Out - Packets sent: {getattr(stat, 'packetsSent', 'N/A')}, "
120-
f"Bytes sent: {getattr(stat, 'bytesSent', 'N/A')}"
121-
)
122-
# Network stats from remote inbound RTP (has RTT and jitter)
123-
elif stat.type == "remote-inbound-rtp":
124-
found_network_stats = True
125-
rtt = getattr(stat, "roundTripTime", None)
126-
jitter = getattr(stat, "jitter", None)
127-
packets_lost = getattr(stat, "packetsLost", None)
128-
if rtt is not None:
129-
logger.debug(f"🌐 Network - RTT: {rtt*1000:.1f}ms, " f"Jitter: {jitter}, Packets lost: {packets_lost}")
130-
# Collect network samples for chunk size adaptation
131-
# DISABLED: self._collect_network_sample(rtt, jitter)
132-
133-
if not found_audio_stats:
134-
logger.debug("⚠️ No outbound audio RTP stats found")
135-
if not found_network_stats:
136-
logger.debug("⚠️ No successful candidate-pair stats found")
137-
138-
except Exception as e:
139-
logger.info(f"❌ Could not get WebRTC stats: {e}")
140-
else:
141-
logger.debug("⚠️ No peer connection available for stats")
142-
143-
# Note: Removed adaptive timing adjustment as it was causing performance issues
144-
145-
# Adaptive chunk sizing based on network conditions
146-
# DISABLED: Causes audio jitter due to frequent chunk size changes
147-
# self._adjust_chunk_size_based_on_network()
148-
149-
def _collect_network_sample(self, rtt, jitter):
150-
"""Collect network performance samples for chunk size adaptation"""
151-
current_time = time.time()
152-
153-
# Keep recent samples (last 10 seconds worth)
154-
self.recent_rtt_samples.append((current_time, rtt))
155-
self.recent_jitter_samples.append((current_time, jitter))
156-
157-
# Remove old samples
158-
cutoff_time = current_time - 10.0
159-
self.recent_rtt_samples = [(t, v) for t, v in self.recent_rtt_samples if t > cutoff_time]
160-
self.recent_jitter_samples = [(t, v) for t, v in self.recent_jitter_samples if t > cutoff_time]
161-
162-
def _adjust_chunk_size_based_on_network(self):
163-
"""Adjust chunk size based on network conditions"""
164-
current_time = time.time()
165-
if current_time - self.last_network_check >= self.network_check_interval:
166-
self.last_network_check = current_time
167-
168-
if len(self.recent_rtt_samples) >= 3 and len(self.recent_jitter_samples) >= 3:
169-
# Calculate network stability metrics
170-
rtt_values = [v for _, v in self.recent_rtt_samples]
171-
jitter_values = [v for _, v in self.recent_jitter_samples]
172-
173-
avg_rtt = sum(rtt_values) / len(rtt_values)
174-
avg_jitter = sum(jitter_values) / len(jitter_values)
175-
rtt_variance = max(rtt_values) - min(rtt_values)
176-
177-
# Determine optimal chunk size based on network conditions
178-
old_chunk_size = self.chunk_size_bytes
179-
180-
if avg_jitter > 1500 or rtt_variance > 0.050: # High jitter or RTT variance
181-
# Use larger chunks for stability (40ms)
182-
self.chunk_size_bytes = int(self.sample_rate * 0.040 * 2) # 40ms chunks
183-
reason = f"high jitter ({avg_jitter:.0f}) or RTT variance ({rtt_variance*1000:.1f}ms)"
184-
elif avg_rtt > 0.080: # High RTT (>80ms)
185-
# Use larger chunks to compensate for latency (30ms)
186-
self.chunk_size_bytes = int(self.sample_rate * 0.030 * 2) # 30ms chunks
187-
reason = f"high RTT ({avg_rtt*1000:.1f}ms)"
188-
elif avg_jitter < 100 and avg_rtt < 0.040: # Excellent network
189-
# Use smaller chunks for low latency (15ms)
190-
self.chunk_size_bytes = int(self.sample_rate * 0.015 * 2) # 15ms chunks
191-
reason = f"excellent network (RTT: {avg_rtt*1000:.1f}ms, jitter: {avg_jitter:.0f})"
192-
else:
193-
# Use default chunk size (20ms)
194-
self.chunk_size_bytes = self.base_chunk_size_bytes
195-
reason = "balanced network conditions"
196-
197-
# Update minimum buffer threshold based on new chunk size
198-
self.min_buffer_threshold = self.chunk_size_bytes * 3
199-
200-
if old_chunk_size != self.chunk_size_bytes:
201-
chunk_duration_ms = (self.chunk_size_bytes // 2) / self.sample_rate * 1000
202-
logger.info(f"📦 Chunk size adapted: {old_chunk_size}{self.chunk_size_bytes} bytes " f"({chunk_duration_ms:.1f}ms) - {reason}")
203-
20487
async def recv(self):
20588
"""Generate and return audio frames from Nova responses - back to basics"""
20689
try:
@@ -270,7 +153,7 @@ async def recv(self):
270153
# Fixed timing to maintain proper audio frame rate
271154
# For 24kHz audio with 20ms chunks (480 samples), we should target 50 FPS
272155
# Only sleep if we're running at or above target FPS to avoid slowing down low FPS streams
273-
target_sleep = 0.020 # 20ms = 50 FPS
156+
target_sleep = 0.015 # 20ms = 50 FPS
274157

275158
if self.avg_fps >= 50:
276159
if buffer_was_empty:
@@ -353,7 +236,6 @@ async def stop_current_audio(self):
353236
async with self.buffer_lock:
354237
self.audio_buffer.clear()
355238
self.batch_buffer.clear() # Clear batch buffer too
356-
self.current_audio_session = None
357239
logger.info("🛑 Audio buffer cleared due to interruption")
358240

359241
# Reset video throb to idle state

0 commit comments

Comments
 (0)