From c09612439312e4405cfc8f21228d4931ffc4c24e Mon Sep 17 00:00:00 2001 From: bgrozev Date: Tue, 9 Aug 2022 14:19:05 -0400 Subject: [PATCH] feat: Silence detection (#1926) feat: Silence detection. --- .../org/jitsi/videobridge/Conference.java | 20 +++-- .../videobridge/ConferenceSpeechActivity.java | 81 ++++++++++++++----- .../ConferenceSpeechActivityConfig.kt | 11 ++- .../message/BridgeChannelMessage.kt | 7 +- jvb/src/main/resources/reference.conf | 8 ++ .../jitsi/videobridge/SpeechActivityTest.kt | 6 +- pom.xml | 2 +- 7 files changed, 103 insertions(+), 32 deletions(-) diff --git a/jvb/src/main/java/org/jitsi/videobridge/Conference.java b/jvb/src/main/java/org/jitsi/videobridge/Conference.java index d4ac78318e..27446a8482 100644 --- a/jvb/src/main/java/org/jitsi/videobridge/Conference.java +++ b/jvb/src/main/java/org/jitsi/videobridge/Conference.java @@ -500,17 +500,20 @@ private void lastNEndpointsChanged() * dominant speaker. * @param recentSpeakers the list of recent speakers (including the dominant speaker at index 0). */ - private void recentSpeakersChanged(List recentSpeakers, boolean dominantSpeakerChanged) + private void recentSpeakersChanged( + List recentSpeakers, + boolean dominantSpeakerChanged, + boolean silence) { if (!recentSpeakers.isEmpty()) { List recentSpeakersIds = recentSpeakers.stream().map(AbstractEndpoint::getId).collect(Collectors.toList()); logger.info("Recent speakers changed: " + recentSpeakersIds + ", dominant speaker changed: " - + dominantSpeakerChanged); - broadcastMessage(new DominantSpeakerMessage(recentSpeakersIds)); + + dominantSpeakerChanged + " silence:" + silence); + broadcastMessage(new DominantSpeakerMessage(recentSpeakersIds, silence)); - if (dominantSpeakerChanged) + if (dominantSpeakerChanged && !silence) { getVideobridge().getStatistics().totalDominantSpeakerChanges.increment(); if (getEndpointCount() > 2) @@ -1077,7 +1080,7 @@ public void endpointMessageTransportConnected(@NotNull AbstractEndpoint abstract if (!recentSpeakers.isEmpty()) { - endpoint.sendMessage(new DominantSpeakerMessage(recentSpeakers)); + endpoint.sendMessage(new DominantSpeakerMessage(recentSpeakers, speechActivity.isInSilence())); } } } @@ -1522,9 +1525,12 @@ public Object put(String key, Object value) private class SpeechActivityListener implements ConferenceSpeechActivity.Listener { @Override - public void recentSpeakersChanged(List recentSpeakers, boolean dominantSpeakerChanged) + public void recentSpeakersChanged( + List recentSpeakers, + boolean dominantSpeakerChanged, + boolean silence) { - Conference.this.recentSpeakersChanged(recentSpeakers, dominantSpeakerChanged); + Conference.this.recentSpeakersChanged(recentSpeakers, dominantSpeakerChanged, silence); } @Override diff --git a/jvb/src/main/java/org/jitsi/videobridge/ConferenceSpeechActivity.java b/jvb/src/main/java/org/jitsi/videobridge/ConferenceSpeechActivity.java index 6319d09675..527617122a 100644 --- a/jvb/src/main/java/org/jitsi/videobridge/ConferenceSpeechActivity.java +++ b/jvb/src/main/java/org/jitsi/videobridge/ConferenceSpeechActivity.java @@ -54,8 +54,7 @@ public class ConferenceSpeechActivity * The DominantSpeakerIdentification instance which detects/identifies the active/dominant speaker in a * conference. */ - private DominantSpeakerIdentification dominantSpeakerIdentification - = new DominantSpeakerIdentification<>(); + private DominantSpeakerIdentification dominantSpeakerIdentification; /** * The listener to be notified when the dominant speaker or endpoint order changes. @@ -88,7 +87,13 @@ public class ConferenceSpeechActivity */ @NotNull private final RecentSpeakersList recentSpeakers - = new RecentSpeakersList<>(ConferenceSpeechActivityConfig.getConfig().getRecentSpeakersCount() + 1); + = new RecentSpeakersList<>(ConferenceSpeechActivityConfig.config.getRecentSpeakersCount() + 1); + + /** + * Whether we're currently in a period of silence. With silence detection enabled we initialize to `true` because + * (the {@link #dominantSpeakerIdentification} will fire an initial "silence" event and we don't want to act on it. + */ + private boolean inSilence = ConferenceSpeechActivityConfig.config.getEnableSilenceDetection(); /** * The Object used to synchronize the access to the state of this @@ -114,6 +119,14 @@ public ConferenceSpeechActivity(@NotNull Listener listener, Logger parentLogger) new LoggerImpl(ConferenceSpeechActivity.class.getName()) : parentLogger.createChildLogger(ConferenceSpeechActivity.class.getName()); + long silenceTimeoutMs = -1; + if (ConferenceSpeechActivityConfig.config.getEnableSilenceDetection()) + { + silenceTimeoutMs = ConferenceSpeechActivityConfig.config.getSilenceDetectionTimeout().toMillis(); + + } + dominantSpeakerIdentification = new DominantSpeakerIdentification<>(silenceTimeoutMs); + dominantSpeakerIdentification.addActiveSpeakerChangedListener(activeSpeakerChangedListener); int numLoudestToTrack = LoudestConfig.Companion.getRouteLoudestOnly() ? LoudestConfig.Companion.getNumLoudest() : 0; @@ -122,13 +135,18 @@ public ConferenceSpeechActivity(@NotNull Listener listener, Logger parentLogger) LoudestConfig.Companion.getEnergyAlphaPct()); } + boolean isInSilence() + { + return inSilence; + } + /** * Notifies this instance that the underlying {@code dominant speaker identification} has elected a new * active/dominant speaker. * - * @param id the ID of the new active/dominant speaker. + * @param id the ID of the new active/dominant speaker or null if a period of silence began. */ - protected void activeSpeakerChanged(@NotNull String id) + protected void activeSpeakerChanged(@Nullable String id) { final Listener listener = this.listener; if (listener == null) @@ -136,31 +154,49 @@ protected void activeSpeakerChanged(@NotNull String id) return; } - Objects.requireNonNull(id); logger.trace(() -> "The dominant speaker is now " + id + "."); boolean endpointListChanged; + boolean dominantSpeakerChanged; synchronized (syncRoot) { - AbstractEndpoint endpoint - = endpointsBySpeechActivity.stream() - .filter(e -> id.equals(e.getId())) - .findFirst().orElse(null); - // Move this endpoint to the top of our sorted list - if (!endpointsBySpeechActivity.remove(endpoint)) + if (id == null) { - logger.warn("Got active speaker notification for an unknown endpoint: " + id + ", ignoring"); - return; + endpointListChanged = false; + dominantSpeakerChanged = false; + if (!inSilence) + { + inSilence = true; + } } - endpointsBySpeechActivity.add(0, endpoint); + else + { + dominantSpeakerChanged = true; + if (inSilence) + { + inSilence = false; + } - recentSpeakers.promote(endpoint); + AbstractEndpoint endpoint + = endpointsBySpeechActivity.stream() + .filter(e -> id.equals(e.getId())) + .findFirst().orElse(null); + // Move this endpoint to the top of our sorted list + if (!endpointsBySpeechActivity.remove(endpoint)) + { + logger.warn("Got active speaker notification for an unknown endpoint: " + id + ", ignoring"); + return; + } + endpointsBySpeechActivity.add(0, endpoint); + + recentSpeakers.promote(endpoint); - endpointListChanged = updateLastNEndpoints(); + endpointListChanged = updateLastNEndpoints(); + } } TaskPools.IO_POOL.execute(() -> { - listener.recentSpeakersChanged(recentSpeakers.getRecentSpeakers(), true); + listener.recentSpeakersChanged(recentSpeakers.getRecentSpeakers(), dominantSpeakerChanged, inSilence); if (endpointListChanged) { listener.lastNEndpointsChanged(); @@ -321,7 +357,8 @@ public void endpointsChanged(List conferenceEndpoints) TaskPools.IO_POOL.execute(() -> { if (finalRecentSpeakersChanged) { - listener.recentSpeakersChanged(recentSpeakers.getRecentSpeakers(), dominantSpeakerChanged); + listener.recentSpeakersChanged( + recentSpeakers.getRecentSpeakers(), dominantSpeakerChanged, inSilence); } if (finalEndpointsChanged) { @@ -386,8 +423,12 @@ interface Listener * endpoint was removed). * @param recentSpeakers the new list of recent speakers (including the dominant speaker at index 0). * @param dominantSpeakerChanged whether the dominant speaker changed. + * @param silence whether we're in a period of silence */ - void recentSpeakersChanged(List recentSpeakers, boolean dominantSpeakerChanged); + void recentSpeakersChanged( + List recentSpeakers, + boolean dominantSpeakerChanged, + boolean silence); void lastNEndpointsChanged(); } } diff --git a/jvb/src/main/kotlin/org/jitsi/videobridge/ConferenceSpeechActivityConfig.kt b/jvb/src/main/kotlin/org/jitsi/videobridge/ConferenceSpeechActivityConfig.kt index f44cac9afd..21b844ebd5 100644 --- a/jvb/src/main/kotlin/org/jitsi/videobridge/ConferenceSpeechActivityConfig.kt +++ b/jvb/src/main/kotlin/org/jitsi/videobridge/ConferenceSpeechActivityConfig.kt @@ -17,14 +17,23 @@ package org.jitsi.videobridge import org.jitsi.config.JitsiConfig import org.jitsi.metaconfig.config +import java.time.Duration class ConferenceSpeechActivityConfig { val recentSpeakersCount: Int by config { "videobridge.speech-activity.recent-speakers-count".from(JitsiConfig.newConfig) } + val enableSilenceDetection: Boolean by config { + "videobridge.speech-activity.enable-silence-detection".from(JitsiConfig.newConfig) + } + + val silenceDetectionTimeout: Duration by config { + "videobridge.speech-activity.silence-detection-timeout".from(JitsiConfig.newConfig) + } + companion object { - @JvmStatic + @JvmField val config = ConferenceSpeechActivityConfig() } } diff --git a/jvb/src/main/kotlin/org/jitsi/videobridge/message/BridgeChannelMessage.kt b/jvb/src/main/kotlin/org/jitsi/videobridge/message/BridgeChannelMessage.kt index c19a5a1c55..dcd613c5aa 100644 --- a/jvb/src/main/kotlin/org/jitsi/videobridge/message/BridgeChannelMessage.kt +++ b/jvb/src/main/kotlin/org/jitsi/videobridge/message/BridgeChannelMessage.kt @@ -259,13 +259,16 @@ class LastNMessage(val lastN: Int) : BridgeChannelMessage(TYPE) { @JsonInclude(JsonInclude.Include.NON_NULL) class DominantSpeakerMessage @JvmOverloads constructor( val dominantSpeakerEndpoint: String, - val previousSpeakers: List? = null + val previousSpeakers: List? = null, + val silence: Boolean = false ) : BridgeChannelMessage(TYPE) { /** * Construct a message from a list of speakers with the dominant speaker on top. The list must have at least one * element. */ - constructor(previousSpeakers: List) : this(previousSpeakers[0], previousSpeakers.drop(1)) + constructor(previousSpeakers: List, silence: Boolean) : this( + previousSpeakers[0], previousSpeakers.drop(1), silence + ) companion object { const val TYPE = "DominantSpeakerEndpointChangeEvent" } diff --git a/jvb/src/main/resources/reference.conf b/jvb/src/main/resources/reference.conf index 9c5c4903cc..3158886c1a 100644 --- a/jvb/src/main/resources/reference.conf +++ b/jvb/src/main/resources/reference.conf @@ -301,6 +301,14 @@ videobridge { # The number of speakers to include in the list of recent speakers sent with dominant speaker change # notifications. recent-speakers-count = 10 + + # Whether to enable silence detection. When silence detection is enabled and there is no speech activity for a + # certain time (see silence-detection-timeout below) we fire a "dominant speaker changed" event notifying endpoints + # that we entered a period of silence. + enable-silence-detection = false + + # How long to wait for lack of speech activity before a period of silence begins. + silence-detection-timeout = 3 seconds } loudest { diff --git a/jvb/src/test/kotlin/org/jitsi/videobridge/SpeechActivityTest.kt b/jvb/src/test/kotlin/org/jitsi/videobridge/SpeechActivityTest.kt index 3e3ac86a6c..63c51d41bf 100644 --- a/jvb/src/test/kotlin/org/jitsi/videobridge/SpeechActivityTest.kt +++ b/jvb/src/test/kotlin/org/jitsi/videobridge/SpeechActivityTest.kt @@ -34,7 +34,11 @@ class SpeechActivityTest : ShouldSpec() { private val d = mockEndpoint("d") private val conferenceSpeechActivity = ConferenceSpeechActivity(object : ConferenceSpeechActivity.Listener { override fun lastNEndpointsChanged() {} - override fun recentSpeakersChanged(recentSpeakers: List, dominantSpeakerChanged: Boolean) {} + override fun recentSpeakersChanged( + recentSpeakers: List, + dominantSpeakerChanged: Boolean, + silence: Boolean + ) {} }) init { diff --git a/pom.xml b/pom.xml index da148b9b2f..9ed72ff16d 100644 --- a/pom.xml +++ b/pom.xml @@ -28,7 +28,7 @@ 5.3.0 5.8.2 1.1-115-g332f4e7 - 1.0-119-ga7b23ff + 1.0-123-gb819a87 1.13.1 3.2.2 4.6.0