-
Notifications
You must be signed in to change notification settings - Fork 0
feat: add microphone input, strip markdown, fix system prompt #135
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,139 @@ | ||
| import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; | ||
| import { renderHook, act } from '@testing-library/react'; | ||
| import { useMicrophone } from '../../hooks/useMicrophone'; | ||
|
|
||
| // Mock getUserMedia | ||
| const mockGetUserMedia = vi.fn(); | ||
| const mockTrackStop = vi.fn(); | ||
|
|
||
| const mockStream = { | ||
| getTracks: () => [{ stop: mockTrackStop }], | ||
| }; | ||
|
|
||
| // Mock AudioContext + ScriptProcessorNode | ||
| const mockDisconnect = vi.fn(); | ||
| const mockConnect = vi.fn(); | ||
| const mockProcessorConnect = vi.fn(); | ||
| const mockClose = vi.fn(); | ||
|
|
||
| let audioProcessHandler: ((e: { inputBuffer: { getChannelData: (ch: number) => Float32Array } }) => void) | null = null; | ||
|
|
||
| const mockProcessor = { | ||
| connect: mockProcessorConnect, | ||
| disconnect: mockDisconnect, | ||
| set onaudioprocess(fn: typeof audioProcessHandler) { | ||
| audioProcessHandler = fn; | ||
| }, | ||
| get onaudioprocess() { | ||
| return audioProcessHandler; | ||
| }, | ||
| }; | ||
|
|
||
| const mockSource = { | ||
| connect: mockConnect, | ||
| }; | ||
|
|
||
| class MockAudioContext { | ||
| sampleRate = 16000; | ||
| close = mockClose; | ||
| destination = {}; | ||
| createMediaStreamSource = vi.fn(() => mockSource); | ||
| createScriptProcessor = vi.fn(() => mockProcessor); | ||
| } | ||
|
|
||
| beforeEach(() => { | ||
| vi.clearAllMocks(); | ||
| audioProcessHandler = null; | ||
| mockGetUserMedia.mockResolvedValue(mockStream); | ||
| vi.stubGlobal('AudioContext', MockAudioContext); | ||
| vi.stubGlobal('navigator', { | ||
| mediaDevices: { getUserMedia: mockGetUserMedia }, | ||
| }); | ||
| }); | ||
|
|
||
| afterEach(() => { | ||
| vi.unstubAllGlobals(); | ||
| }); | ||
|
|
||
| describe('useMicrophone', () => { | ||
| it('initial isRecording is false', () => { | ||
| const { result } = renderHook(() => useMicrophone()); | ||
| expect(result.current.isRecording).toBe(false); | ||
| }); | ||
|
|
||
| it('start requests microphone and sets isRecording to true', async () => { | ||
| const { result } = renderHook(() => useMicrophone()); | ||
| const onData = vi.fn(); | ||
|
|
||
| await act(async () => { | ||
| await result.current.start(onData); | ||
| }); | ||
|
|
||
| expect(mockGetUserMedia).toHaveBeenCalledWith( | ||
| expect.objectContaining({ | ||
| audio: expect.objectContaining({ | ||
| echoCancellation: true, | ||
| noiseSuppression: true, | ||
| }), | ||
| }), | ||
| ); | ||
| expect(result.current.isRecording).toBe(true); | ||
| }); | ||
|
|
||
| it('sends PCM data via onData callback when audio is processed', async () => { | ||
| const { result } = renderHook(() => useMicrophone()); | ||
| const onData = vi.fn(); | ||
|
|
||
| await act(async () => { | ||
| await result.current.start(onData); | ||
| }); | ||
|
|
||
| // Simulate audio processing | ||
| const float32 = new Float32Array([0.5, -0.5, 0, 1.0]); | ||
| act(() => { | ||
| audioProcessHandler?.({ | ||
| inputBuffer: { getChannelData: () => float32 }, | ||
| }); | ||
| }); | ||
|
|
||
| expect(onData).toHaveBeenCalledTimes(1); | ||
| const buffer = onData.mock.calls[0][0] as ArrayBuffer; | ||
| expect(buffer).toBeInstanceOf(ArrayBuffer); | ||
|
|
||
| // Verify Int16 conversion | ||
| const int16 = new Int16Array(buffer); | ||
| expect(int16.length).toBe(4); | ||
| expect(int16[0]).toBeGreaterThan(0); // 0.5 → positive | ||
| expect(int16[1]).toBeLessThan(0); // -0.5 → negative | ||
| }); | ||
|
|
||
| it('stop cleans up resources and sets isRecording to false', async () => { | ||
| const { result } = renderHook(() => useMicrophone()); | ||
| const onData = vi.fn(); | ||
|
|
||
| await act(async () => { | ||
| await result.current.start(onData); | ||
| }); | ||
| expect(result.current.isRecording).toBe(true); | ||
|
|
||
| act(() => { | ||
| result.current.stop(); | ||
| }); | ||
|
|
||
| expect(mockDisconnect).toHaveBeenCalled(); | ||
| expect(mockClose).toHaveBeenCalled(); | ||
| expect(mockTrackStop).toHaveBeenCalled(); | ||
| expect(result.current.isRecording).toBe(false); | ||
| }); | ||
|
|
||
| it('stop is safe to call without start', () => { | ||
| const { result } = renderHook(() => useMicrophone()); | ||
|
|
||
| // Should not throw | ||
| act(() => { | ||
| result.current.stop(); | ||
| }); | ||
|
|
||
| expect(result.current.isRecording).toBe(false); | ||
| }); | ||
| }); |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,47 @@ | ||
| import { describe, it, expect } from 'vitest'; | ||
| import { stripMarkdown } from '../../lib/stripMarkdown'; | ||
|
|
||
| describe('stripMarkdown', () => { | ||
| it('strips bold markers', () => { | ||
| expect(stripMarkdown('**hello** world')).toBe('hello world'); | ||
| }); | ||
|
|
||
| it('strips italic markers', () => { | ||
| expect(stripMarkdown('*hello* world')).toBe('hello world'); | ||
| }); | ||
|
|
||
| it('strips bold+italic markers', () => { | ||
| expect(stripMarkdown('***hello*** world')).toBe('hello world'); | ||
| }); | ||
|
|
||
| it('strips headers', () => { | ||
| expect(stripMarkdown('## Hello\nWorld')).toBe('Hello\nWorld'); | ||
| }); | ||
|
|
||
| it('strips inline code', () => { | ||
| expect(stripMarkdown('use `npm install`')).toBe('use npm install'); | ||
| }); | ||
|
|
||
| it('strips links preserving text', () => { | ||
| expect(stripMarkdown('[click here](https://example.com)')).toBe('click here'); | ||
| }); | ||
|
|
||
| it('strips bullet list markers', () => { | ||
| expect(stripMarkdown('- item one\n- item two')).toBe('item one\nitem two'); | ||
| }); | ||
|
|
||
| it('strips blockquotes', () => { | ||
| expect(stripMarkdown('> quoted text')).toBe('quoted text'); | ||
| }); | ||
|
|
||
| it('returns plain text unchanged', () => { | ||
| expect(stripMarkdown('Hello there!')).toBe('Hello there!'); | ||
| }); | ||
|
|
||
| it('handles the exact model output from screenshot', () => { | ||
| const input = '**Initiating Welcome Sequence** I\'ve just received the user\'s connection.'; | ||
| const output = stripMarkdown(input); | ||
| expect(output).not.toContain('**'); | ||
| expect(output).toContain('Initiating Welcome Sequence'); | ||
| }); | ||
| }); |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,6 +3,8 @@ | |
| import { useCallback, useEffect, useRef, useState } from 'react'; | ||
| import { useWebSocket, ServerMessage } from '../hooks/useWebSocket'; | ||
| import { useAudio } from '../hooks/useAudio'; | ||
| import { useMicrophone } from '../hooks/useMicrophone'; | ||
| import { stripMarkdown } from '../lib/stripMarkdown'; | ||
| import SceneDisplay from '../components/SceneDisplay'; | ||
| import SessionTransition from '../components/SessionTransition'; | ||
| import OnboardingFlow, { type OnboardingStage } from '../components/OnboardingFlow'; | ||
|
|
@@ -37,6 +39,7 @@ export default function Home() { | |
| const [bgmUrl, setBgmUrl] = useState<string | null>(null); | ||
|
|
||
| const { initAudioContext, playPCM, cleanup: cleanupAudio } = useAudio(); | ||
| const mic = useMicrophone(); | ||
|
|
||
| const handleMessage = useCallback((msg: ServerMessage) => { | ||
| switch (msg.type) { | ||
|
|
@@ -58,7 +61,7 @@ export default function Home() { | |
| setOnboardingStage('reunion'); | ||
| break; | ||
| case 'transcript': | ||
| setTranscript(msg.text); | ||
| setTranscript(stripMarkdown(msg.text)); | ||
| break; | ||
| case 'youtube_videos': | ||
| setVideos(msg.videos as YouTubeVideo[]); | ||
|
|
@@ -124,9 +127,16 @@ export default function Home() { | |
| initAudioContext(); | ||
| connect(); | ||
| setStarted(true); | ||
| // Start microphone after a short delay to ensure WebSocket is connected. | ||
| setTimeout(() => { | ||
| mic.start((pcm) => { | ||
| send({ type: 'audio', data: pcm }); | ||
| }); | ||
| }, 500); | ||
|
Comment on lines
+131
to
+135
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The delayed Useful? React with 👍 / 👎. |
||
| }; | ||
|
Comment on lines
+130
to
136
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. WebSocket 연결 전 마이크 시작 경합 조건 500ms 고정 지연은 WebSocket이 연결되었음을 보장하지 않습니다. 네트워크 상태에 따라 연결이 더 오래 걸릴 수 있고, 연결 실패 시에도 마이크가 시작됩니다. 🔧 WebSocket 상태 기반 마이크 시작으로 개선+ // Start mic when WebSocket connects
+ useEffect(() => {
+ if (started && state === 'connected' && !mic.isRecording) {
+ mic.start((pcm) => {
+ send({ type: 'audio', data: pcm });
+ });
+ }
+ }, [started, state, mic, send]);
+
const handleStart = () => {
initAudioContext();
connect();
setStarted(true);
- // Start microphone after a short delay to ensure WebSocket is connected.
- setTimeout(() => {
- mic.start((pcm) => {
- send({ type: 'audio', data: pcm });
- });
- }, 500);
};🤖 Prompt for AI Agents |
||
|
|
||
| const handleStop = () => { | ||
| mic.stop(); | ||
| disconnect(); | ||
| cleanupAudio(); | ||
| setStarted(false); | ||
|
|
@@ -384,6 +394,18 @@ export default function Home() { | |
| <span style={{ fontSize: '0.75rem', color: 'var(--color-muted)' }}> | ||
| {state} | ||
| </span> | ||
| {mic.isRecording && ( | ||
| <div | ||
| style={{ | ||
| width: 8, | ||
| height: 8, | ||
| borderRadius: '50%', | ||
| background: '#ef4444', | ||
| animation: 'pulse 1.5s infinite', | ||
| }} | ||
| title="Microphone active" | ||
| /> | ||
| )} | ||
| </div> | ||
|
|
||
| {/* Transcript overlay */} | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,68 @@ | ||
| import { useCallback, useRef, useState } from 'react'; | ||
|
|
||
| // Target sample rate for Gemini Live API input. | ||
| const TARGET_SAMPLE_RATE = 16000; | ||
|
|
||
| // ScriptProcessorNode buffer size (4096 is a good balance of latency vs. efficiency). | ||
| const BUFFER_SIZE = 4096; | ||
|
|
||
| export function useMicrophone() { | ||
| const streamRef = useRef<MediaStream | null>(null); | ||
| const ctxRef = useRef<AudioContext | null>(null); | ||
| const processorRef = useRef<ScriptProcessorNode | null>(null); | ||
| const [isRecording, setIsRecording] = useState(false); | ||
| const onDataRef = useRef<((pcm: ArrayBuffer) => void) | null>(null); | ||
|
|
||
| const start = useCallback(async (onData: (pcm: ArrayBuffer) => void) => { | ||
| if (streamRef.current) return; // already recording | ||
|
|
||
| onDataRef.current = onData; | ||
|
|
||
| const stream = await navigator.mediaDevices.getUserMedia({ | ||
| audio: { | ||
| echoCancellation: true, | ||
| noiseSuppression: true, | ||
| sampleRate: TARGET_SAMPLE_RATE, | ||
| }, | ||
| }); | ||
| streamRef.current = stream; | ||
|
Comment on lines
+21
to
+28
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. getUserMedia 오류 처리 부재
🛡️ try-catch 또는 에러 상태 추가 권장+ const [error, setError] = useState<string | null>(null);
+
const start = useCallback(async (onData: (pcm: ArrayBuffer) => void) => {
if (streamRef.current) return; // already recording
onDataRef.current = onData;
- const stream = await navigator.mediaDevices.getUserMedia({
- audio: {
- echoCancellation: true,
- noiseSuppression: true,
- sampleRate: TARGET_SAMPLE_RATE,
- },
- });
+ let stream: MediaStream;
+ try {
+ stream = await navigator.mediaDevices.getUserMedia({
+ audio: {
+ echoCancellation: true,
+ noiseSuppression: true,
+ sampleRate: TARGET_SAMPLE_RATE,
+ },
+ });
+ } catch (err) {
+ setError(err instanceof Error ? err.message : 'Microphone access denied');
+ return;
+ }
+ setError(null);
streamRef.current = stream;🤖 Prompt for AI Agents |
||
|
|
||
| const ctx = new AudioContext({ sampleRate: TARGET_SAMPLE_RATE }); | ||
| ctxRef.current = ctx; | ||
|
|
||
| const source = ctx.createMediaStreamSource(stream); | ||
| const processor = ctx.createScriptProcessor(BUFFER_SIZE, 1, 1); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
You would need to create a worklet file (e.g., Here's an example of what the worklet and the updated hook would look like:
class AudioProcessor extends AudioWorkletProcessor {
process(inputs, outputs, parameters) {
const pcm = inputs[0][0];
if (!pcm) return true;
// Convert Float32 [-1, 1] to Int16 [-32768, 32767]
const int16 = new Int16Array(pcm.length);
for (let i = 0; i < pcm.length; i++) {
const s = Math.max(-1, Math.min(1, pcm[i]));
int16[i] = s < 0 ? s * 0x8000 : s * 0x7fff;
}
this.port.postMessage(int16.buffer, [int16.buffer]);
return true;
}
}
registerProcessor('audio-processor', AudioProcessor);Updated const start = useCallback(async (onData: (pcm: ArrayBuffer) => void) => {
if (streamRef.current) return;
onDataRef.current = onData;
const stream = await navigator.mediaDevices.getUserMedia({ audio: { echoCancellation: true, noiseSuppression: true, sampleRate: TARGET_SAMPLE_RATE } });
streamRef.current = stream;
const ctx = new AudioContext({ sampleRate: TARGET_SAMPLE_RATE });
ctxRef.current = ctx;
await ctx.audioWorklet.addModule('/audio-processor.js');
const workletNode = new AudioWorkletNode(ctx, 'audio-processor');
processorRef.current = workletNode; // Store for cleanup
workletNode.port.onmessage = (event) => {
onDataRef.current?.(event.data);
};
const source = ctx.createMediaStreamSource(stream);
source.connect(workletNode);
workletNode.connect(ctx.destination);
setIsRecording(true);
}, []);Note that |
||
| processorRef.current = processor; | ||
|
|
||
| processor.onaudioprocess = (e) => { | ||
| const float32 = e.inputBuffer.getChannelData(0); | ||
| // Convert Float32 [-1, 1] → Int16 [-32768, 32767] | ||
| const int16 = new Int16Array(float32.length); | ||
| for (let i = 0; i < float32.length; i++) { | ||
| const s = Math.max(-1, Math.min(1, float32[i])); | ||
| int16[i] = s < 0 ? s * 0x8000 : s * 0x7fff; | ||
| } | ||
| onDataRef.current?.(int16.buffer); | ||
| }; | ||
|
|
||
| source.connect(processor); | ||
| processor.connect(ctx.destination); | ||
| setIsRecording(true); | ||
| }, []); | ||
|
|
||
| const stop = useCallback(() => { | ||
| processorRef.current?.disconnect(); | ||
| processorRef.current = null; | ||
|
|
||
| ctxRef.current?.close(); | ||
| ctxRef.current = null; | ||
|
|
||
| streamRef.current?.getTracks().forEach((t) => t.stop()); | ||
| streamRef.current = null; | ||
|
|
||
| onDataRef.current = null; | ||
| setIsRecording(false); | ||
| }, []); | ||
|
|
||
| return { start, stop, isRecording }; | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Using
setTimeoutwith a fixed delay to wait for the WebSocket connection is fragile and can lead to race conditions on slower networks. The microphone might start sending data before the connection is established, or the delay might be unnecessarily long.A more robust approach is to use a
useEffecthook to react to the WebSocket connection state. This ensures the microphone is started exactly when the connection becomes available.Please remove this
setTimeoutand add the followinguseEffectto the component: