[+] RNNOISE

This commit is contained in:
acgist
2024-07-23 18:15:49 +08:00
parent a5ccc17ecb
commit 32a3a1053e
8 changed files with 361 additions and 14 deletions

View File

@@ -2,6 +2,9 @@
.idea .idea
.gradle .gradle
media/deps media/deps/webrtc
media/deps/rnnoise/src
media/deps/rnnoise/include
media/deps/libmediasoupclient
local.properties local.properties

View File

@@ -2,19 +2,13 @@ cmake_minimum_required(VERSION 3.22.1)
project(taoyao VERSION 1.0.0 LANGUAGES C CXX) project(taoyao VERSION 1.0.0 LANGUAGES C CXX)
# Debug | Release
#-DCMAKE_BUILD_TYPE=Debug
#set(CMAKE_BUILD_TYPE Debug)
# C编译选项 # C编译选项
set(CMAKE_C_STANDARD 17) set(CMAKE_C_STANDARD 17)
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c17 -O3")
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -std=c17 -O0 -g") set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -std=c17 -O0 -g")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -std=c17 -O3") set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -std=c17 -O3")
# C++编译选项 # C++编译选项
set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD 17)
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -O3")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -std=c++17 -O0 -g") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -std=c++17 -O0 -g")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -std=c++17 -O3") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -std=c++17 -O3")
@@ -25,12 +19,9 @@ set(
set( set(
SOURCE_FILES SOURCE_FILES
${SOURCE_DIR}/include/Log.hpp
${SOURCE_DIR}/include/MediaManager.hpp
${SOURCE_DIR}/include/Room.hpp
${SOURCE_DIR}/include/RouterCallback.hpp
${SOURCE_DIR}/webrtc/MediaManager.cpp
${SOURCE_DIR}/webrtc/Room.cpp ${SOURCE_DIR}/webrtc/Room.cpp
${SOURCE_DIR}/webrtc/Rnnoise.cpp
${SOURCE_DIR}/webrtc/MediaManager.cpp
${SOURCE_DIR}/webrtc/RouterCallback.cpp ${SOURCE_DIR}/webrtc/RouterCallback.cpp
) )
@@ -50,6 +41,7 @@ endif ()
add_library(${PROJECT_NAME} SHARED ${SOURCE_FILES}) add_library(${PROJECT_NAME} SHARED ${SOURCE_FILES})
add_subdirectory("deps/rnnoise")
add_subdirectory("deps/libmediasoupclient") add_subdirectory("deps/libmediasoupclient")
set_source_files_properties( set_source_files_properties(
@@ -59,6 +51,7 @@ set_source_files_properties(
target_include_directories( target_include_directories(
${PROJECT_NAME} PUBLIC ${PROJECT_NAME} PUBLIC
"${SOURCE_DIR}/include" "${SOURCE_DIR}/include"
"${PROJECT_SOURCE_DIR}/deps/rnnoise/include"
"${PROJECT_SOURCE_DIR}/deps/libmediasoupclient/include" "${PROJECT_SOURCE_DIR}/deps/libmediasoupclient/include"
"${PROJECT_SOURCE_DIR}/deps/libmediasoupclient/deps/libsdptransform/include" "${PROJECT_SOURCE_DIR}/deps/libmediasoupclient/deps/libsdptransform/include"
) )
@@ -67,6 +60,7 @@ target_link_libraries(
${PROJECT_NAME} PUBLIC ${PROJECT_NAME} PUBLIC
log log
android android
rnnoise
OpenSLES OpenSLES
mediasoupclient mediasoupclient
) )

View File

@@ -0,0 +1,30 @@
cmake_minimum_required(VERSION 3.22.1)
project(rnnoise VERSION 1.0.0 LANGUAGES C)
set(CMAKE_C_STANDARD 17)
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -std=c17 -O0 -g")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -std=c17 -O3")
include_directories(include)
set(
SOURCE_FILES
src/rnn.c
src/nnet.c
src/pitch.c
src/denoise.c
src/celt_lpc.c
src/kiss_fft.c
src/nnet_default.c
src/rnnoise_tables.c
src/rnnoise_data.c
# src/rnnoise_data_little.c
src/parse_lpcnet_weights.c
)
add_library(${PROJECT_NAME} SHARED ${SOURCE_FILES})
set_source_files_properties(
${SOURCE_FILES} PROPERTIES COMPILE_FLAGS -Wall -Wextra -Wpedantic
)

View File

@@ -0,0 +1,57 @@
# RNNOISE模型
https://github.com/xiph/rnnoise.git
## FFmpeg
```
ffplay -ar 48000 -ac 1 -f s16le input.pcm
ffmpeg.exe -i .\source.wav -ar 48000 -ac 1 -f s16le -c:a pcm_s16le source.raw
ffmpeg.exe -i .\source.ts -vn -ar 48000 -ac 1 -f s16le -c:a pcm_s16le source.raw
```
## 环境
```
sudo apt install pip3 python3
vim ~/.pip/pip.conf
---
[global]
index-url=https://pypi.tuna.tsinghua.edu.cn/simple
---
pip3 install tqdm torch
```
## 训练
训练音频`48000`采样单声道的`PCM`音频数据
```
# 混合数据mix.pcm
# 噪音数据noise.pcm
# 原始数据speech.pcm
# 克隆仓库
cd /data
git clone http://192.168.8.184:9999/dev/hsx/rnnoise.git
cd rnnoise
# 编译代码
./autogen.sh
./configure
make
# 提取特征
./dump_features speech.pcm noise.pcm features.f32 200000
./script/dump_features_parallel.sh ./dump_features speech.pcm noise.pcm features.f32 200000 8
# 模型训练
python3 train_rnnoise.py --gru-size=32 --cond-size=32 --epochs=15 features.f32 ./
# 导出权重
python3 dump_rnnoise_weights.py --quantize ./checkpoints/rnnoise_1.pth rnnoise_c
# 验证效果
./examples/rnnoise_demo mix.pcm output.pcm
```

View File

@@ -0,0 +1,32 @@
#pragma once
#include "rnnoise.h"
namespace acgist {
/**
* 降噪配置
*/
class RnnoiseConfig {
public:
// 采样位深16
int bits = 16;
// 数据大小960
int size = 960;
// 采样率48000
int rate = 48000;
// 降噪数据大小
int rnnoiseSize = 480;
// 降噪数据
float* rnnoiseData = nullptr;
// 降噪对象
DenoiseState* denoiseState = nullptr;
public:
RnnoiseConfig();
virtual ~RnnoiseConfig();
};
}

View File

@@ -0,0 +1,82 @@
#include "Rnnoise.hpp"
#include "jni.h"
#include "android/log.h"
#include <limits>
#ifndef RNNOISE_TAG
#define RNNOISE_TAG "rnnoise"
#endif
acgist::RnnoiseConfig::RnnoiseConfig() {
}
acgist::RnnoiseConfig::~RnnoiseConfig() {
delete[] this->rnnoiseData;
rnnoise_destroy(this->denoiseState);
}
extern "C" JNIEXPORT jlong JNICALL
Java_com_acgist_taoyao_media_audio_RnnoiseProcesser_Init(
JNIEnv* env,
jobject processer,
jint bits,
jint size,
jint rate
) {
__android_log_print(ANDROID_LOG_DEBUG, RNNOISE_TAG, "加载Rnnoise");
acgist::RnnoiseConfig* config = new acgist::RnnoiseConfig();
config->bits = bits;
config->size = size;
config->rate = rate;
config->rnnoiseSize = size / 2;
config->rnnoiseData = new float[config->rnnoiseSize];
config->denoiseState = rnnoise_create(NULL);
return (jlong) config;
}
extern "C" JNIEXPORT jbyteArray JNICALL
Java_com_acgist_taoyao_media_audio_RnnoiseProcesser_Rnnoise(
JNIEnv* env,
jobject processer,
jlong pointer,
jbyteArray pcm
) {
acgist::RnnoiseConfig* config = (acgist::RnnoiseConfig*) pointer;
jbyte* srcBytes = env->GetByteArrayElements(pcm, 0);
short* srcBuffer = (short*) srcBytes;
for (int i = 0; i < config->rnnoiseSize; i++) {
config->rnnoiseData[i] = srcBuffer[i];
}
rnnoise_process_frame(config->denoiseState, config->rnnoiseData, config->rnnoiseData);
// 返回值不用释放否则需要手动释放
const jbyteArray result = env->NewByteArray(config->size);
jbyte dstBytes[config->size];
for (int i = 0; i < config->rnnoiseSize; i++) {
short v = config->rnnoiseData[i];
if(v > std::numeric_limits<short>::max()) {
v = std::numeric_limits<short>::max();
} else if(v < std::numeric_limits<short>::min()) {
v = std::numeric_limits<short>::min();
}
dstBytes[2 * i] = (int8_t) (v >> 0);
dstBytes[2 * i + 1] = (int8_t) (v >> 8);
}
env->SetByteArrayRegion(result, 0, config->size, dstBytes);
env->ReleaseByteArrayElements(pcm, srcBytes, 0);
// env->DeleteLocalRef(result);
// env->ReleaseByteArrayElements(result, dstBytes, 0);
return result;
}
extern "C" JNIEXPORT void JNICALL
Java_com_acgist_taoyao_media_audio_RnnoiseProcesser_Release(
JNIEnv* env,
jobject processer,
jlong pointer
) {
__android_log_print(ANDROID_LOG_DEBUG, RNNOISE_TAG, "释放Rnnoise");
acgist::RnnoiseConfig* config = (acgist::RnnoiseConfig*) pointer;
delete config;
}

View File

@@ -0,0 +1,141 @@
package com.acgist.taoyao.media.audio;
import android.util.Log;
/**
* Rnnoise降噪
*
* 注意暂时只支持48K采样率其他采样率需要先重新采样。
*
* https://github.com/xiph/rnnoise.git
*/
public class RnnoiseProcesser {
/**
* 声道数据
*/
private byte[] src;
/**
* 降噪配置对象指针
*/
private long pointer;
/**
* 是否启用
*/
private final boolean enabled = true;
private static final int BITS = 16;
private static final int RATE = 48000;
private static final int SIZE_MONO = 960;
private static final int SIZE_STEREO = 1920;
private static final int CHANNEL_COUNT = 1;
/**
* @see #init()
*/
public final void init() {
this.init(BITS, SIZE_MONO, RATE);
}
/**
* @see #Init(int, int, int)
*/
public final void init(int bits, int size, int rate) {
if(!this.enabled) {
return;
}
if(CHANNEL_COUNT == 1) {
this.src = new byte[SIZE_MONO];
this.pointer = Init(bits, size, rate);
} else {
this.src = new byte[SIZE_MONO];
this.pointer = Init(bits, size, rate);
}
Log.i(RnnoiseProcesser.class.getSimpleName(), String.format("配置降噪参数:%d - %d - %d", bits, size, rate));
}
/**
* @see #rnnoise(int, int, byte[])
*/
public final byte[] rnnoise(byte[] pcm) {
return this.rnnoise(0, pcm.length, pcm);
}
/**
* @see #Rnnoise(long, byte[])
*/
public final byte[] rnnoise(final int offset, final int capacity, final byte[] pcm) {
if(!this.enabled) {
return pcm;
}
if(this.pointer == 0L) {
Log.w(RnnoiseProcesser.class.getSimpleName(), "降噪对象没有初始成功原样返回");
return pcm;
}
if(capacity == SIZE_MONO) {
System.arraycopy(pcm, offset, this.src, 0, SIZE_MONO);
final byte[] dst = Rnnoise(this.pointer, this.src);
System.arraycopy(dst, 0, pcm, offset, SIZE_MONO);
return pcm;
} else if(capacity == SIZE_STEREO) {
// 提取单个声道
for (int index = offset, jndex = 0; index < capacity + offset; index += 4, jndex += 2) {
this.src[jndex] = pcm[index];
this.src[jndex + 1] = pcm[index + 1];
}
final byte[] dst = Rnnoise(this.pointer, this.src);
for (int index = offset, jndex = 0; index < capacity + offset; index += 4, jndex += 2) {
pcm[index] = dst[jndex];
pcm[index + 1] = dst[jndex + 1];
pcm[index + 2] = dst[jndex];
pcm[index + 3] = dst[jndex + 1];
}
return pcm;
} else {
return pcm;
}
}
/**
* @see #Release(long)
*/
public final void release() {
if(!this.enabled) {
return;
}
Log.i(RnnoiseProcesser.class.getSimpleName(), "释放降噪对象");
if(this.pointer == 0L) {
return;
}
Release(this.pointer);
}
/**
* 初始化
*
* @param bits 采样位深
* @param size 数据大小
* @param rate 采样率
*
* @return 降噪配置对象指针
*/
private native final long Init(int bits, int size, int rate);
/**
* 降噪
*
* @param pointer 降噪配置对象指针
* @param pcm PCM数据
*
* @return 降噪后的PCM数据
*/
private native final byte[] Rnnoise(long pointer, byte[] pcm);
/**
* 释放资源
*
* @param pointer 降噪配置对象指针
*/
private native final void Release(long pointer);
}

View File

@@ -24,6 +24,8 @@ import android.os.Process;
import androidx.annotation.Nullable; import androidx.annotation.Nullable;
import androidx.annotation.RequiresApi; import androidx.annotation.RequiresApi;
import com.acgist.taoyao.media.audio.RnnoiseProcesser;
import org.webrtc.CalledByNative; import org.webrtc.CalledByNative;
import org.webrtc.Logging; import org.webrtc.Logging;
import org.webrtc.ThreadUtils; import org.webrtc.ThreadUtils;
@@ -108,6 +110,7 @@ class WebRtcAudioRecord {
private @Nullable SamplesReadyCallback audioSamplesReadyCallback; private @Nullable SamplesReadyCallback audioSamplesReadyCallback;
private final boolean isAcousticEchoCancelerSupported; private final boolean isAcousticEchoCancelerSupported;
private final boolean isNoiseSuppressorSupported; private final boolean isNoiseSuppressorSupported;
private RnnoiseProcesser rnnoiseProcesser;
/** /**
* 设置录音工具 * 设置录音工具
@@ -159,6 +162,8 @@ class WebRtcAudioRecord {
if (microphoneMute) { if (microphoneMute) {
byteBuffer.clear(); byteBuffer.clear();
byteBuffer.put(emptyBytes); byteBuffer.put(emptyBytes);
} else {
WebRtcAudioRecord.this.rnnoiseProcesser.rnnoise(byteBuffer.arrayOffset(), byteBuffer.capacity(), byteBuffer.array());
} }
// It's possible we've been shut down during the read, and stopRecording() tried and // It's possible we've been shut down during the read, and stopRecording() tried and
// failed to join this thread. To be a bit safer, try to avoid calling any native methods // failed to join this thread. To be a bit safer, try to avoid calling any native methods
@@ -196,6 +201,7 @@ class WebRtcAudioRecord {
try { try {
if (audioRecord != null) { if (audioRecord != null) {
audioRecord.stop(); audioRecord.stop();
WebRtcAudioRecord.this.rnnoiseProcesser.release();
doAudioRecordStateCallback(AUDIO_RECORD_STOP); doAudioRecordStateCallback(AUDIO_RECORD_STOP);
} }
} catch (IllegalStateException e) { } catch (IllegalStateException e) {
@@ -408,6 +414,8 @@ class WebRtcAudioRecord {
} }
} }
try { try {
this.rnnoiseProcesser = new RnnoiseProcesser();
this.rnnoiseProcesser.init();
audioRecord.startRecording(); audioRecord.startRecording();
} catch (IllegalStateException e) { } catch (IllegalStateException e) {
reportWebRtcAudioRecordStartError(AudioRecordStartErrorCode.AUDIO_RECORD_START_EXCEPTION, reportWebRtcAudioRecordStartError(AudioRecordStartErrorCode.AUDIO_RECORD_START_EXCEPTION,