FunASR 离线文件转写服务开发指南与实践

FunASR 离线文件转写服务开发指南与实践 | 极客日志

# 下载安装脚本
curl -O https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/shell/install_docker.sh
# 执行安装命令
sudo bash install_docker.sh

# 拉取镜像
sudo docker pull registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-0.4.7
# 创建资源文件夹
mkdir -p ./funasr-runtime-resources/models
# 交互式启动容器
sudo docker run -p 10095:10095 -it --privileged=true \
-v $PWD/funasr-runtime-resources/models:/workspace/models \
registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-0.4.7

# 容器内的操作
cd FunASR/runtime
nohup bash run_server.sh \
--download-model-dir /workspace/models \
--vad-dir damo/speech_fsmn_vad_zh-cn-16k-common-onnx \
--model-dir damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-onnx \
--punc-dir damo/punc_ct-transformer_cn-en-common-vocab471067-large-onnx \
--lm-dir damo/speech_ngram_lm_zh-cn-ai-wesp-fst \
--itn-dir thuduj12/fst_itn_zh \
--hotword /workspace/models/hotwords.txt > log.txt 2>&1 &

# 如果您想关闭 ssl，增加参数：--certfile 0
# 如果您想使用 SenseVoiceSmall 模型、时间戳、nn 热词模型进行部署，请设置--model-dir 为对应模型：
# iic/SenseVoiceSmall-onnx
# damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-onnx（时间戳）
# damo/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404-onnx（nn 热词）
# 如果您想在服务端加载热词，请在宿主机文件./funasr-runtime-resources/models/hotwords.txt 配置热词（docker 映射地址为/workspace/models/hotwords.txt）:
# 每行一个热词，格式 (热词 权重)：阿里巴巴 20（注：热词理论上无限制，但为了兼顾性能和效果，建议热词长度不超过 10，个数不超过 1k，权重 1~100）
# SenseVoiceSmall-onnx 识别结果中'<|zh|><|NEUTRAL|><|Speech|> '分别为对应的语种、情感、事件信息

root@485ac2db1a1a:/workspace/FunASR/runtime# tail -f log.txt
...
I20260303 11:01:52.311225 56 funasr-wss-server.cpp:516] asr model init finished. listen on port:10095

cd FunASR/runtime
nohup bash run_server.sh \
--download-model-dir /workspace/models \
--vad-dir damo/speech_fsmn_vad_zh-cn-8k-common-onnx \
--model-dir damo/speech_paraformer_asr_nat-zh-cn-8k-common-vocab8358-tensorflow1-onnx \
--punc-dir damo/punc_ct-transformer_cn-en-common-vocab471067-large-onnx \
--lm-dir damo/speech_ngram_lm_zh-cn-ai-wesp-fst-token8358 \
--itn-dir thuduj12/fst_itn_zh \
--hotword /workspace/models/hotwords.txt > log.txt 2>&1 &

wget https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/sample/funasr_samples.tar.gz

python3 funasr_wss_client.py --host "127.0.0.1" --port 10095 --mode offline --audio_in "../audio/asr_example.wav"

Namespace(host='127.0.0.1', port=10095, chunk_size=[5, 10, 5], chunk_interval=10, hotword='', audio_in='../audio/asr_example.wav', audio_fs=16000, send_without_sleep=True, thread_num=1, words_max_print=10000, output_dir=None, ssl=1, use_itn=1, mode='offline')
connect to wss://127.0.0.1:10095
pid0_0: demo: 欢迎大家来体验达摩院推出的语音识别模型。
timestamp: [[880,1120],[1120,1380],[1380,1540],[1540,1780],[1780,2020],[2020,2180],[2180,2480],[2480,2600],[2600,2780],[2780,3040],[3040,3240],[3240,3480],[3480,3699],[3699,3900],[3900,4180],[4180,4420],[4420,4620],[4620,4780],[4780,5195]]
Exception: sent 1000(OK);then received 1000(OK) end

# 启动时的报错信息 Traceback (most recent call last): File "/home/jsgx/funasr/samples/python/funasr_wss_client.py", line 4, in <module>
import websockets, ssl ModuleNotFoundError: No module named 'websockets'
# 1. 安装 websockets
pip install websockets
# 2. 安装兼容性较好的指定版本
pip install websockets==13.1

python3 funasr_wss_client.py --host "127.0.0.1" --port 10095 --mode offline \
--audio_in "../audio/asr_example.wav" --output_dir "./results"

--host 为 FunASR runtime-SDK 服务部署机器 ip，默认为本机 ip（127.0.0.1），如果 client 与服务不在同一台服务器，需要改为部署机器 ip
--port 10095 部署端口号
--mode offline 表示离线文件转写
--audio_in 需要进行转写的音频文件，支持文件路径，文件列表 wav.scp
--thread_num 设置并发发送线程数，默认为 1
--ssl 设置是否开启 ssl 证书校验，默认 1 开启，设置为 0 关闭
--hotword 热词文件，每行一个热词，格式 (热词 权重)：阿里巴巴 20
--use_itn 设置是否使用 itn，默认 1 开启，设置为 0 关闭

./funasr-wss-client --server-ip 127.0.0.1 --port 10095 --wav-path ../audio/asr_example.wav

--server-ip 为 FunASR runtime-SDK 服务部署机器 ip，默认为本机 ip（127.0.0.1），如果 client 与服务不在同一台服务器，需要改为部署机器 ip
--port 10095 部署端口号
--wav-path 需要进行转写的音频文件，支持文件路径
--hotword 热词文件，每行一个热词，格式 (热词 权重)：阿里巴巴 20
--thread-num 设置客户端线程数
--use-itn 设置是否使用 itn，默认 1 开启，设置为 0 关闭

{"is_final":false,"mode":"offline","stamp_sents":[{"end":5195,"punc":"。","start":880,"text_seg":"欢 迎 大 家 来 体 验 达 摩 院 推 出 的 语 音 识 别 模 型","ts_list":[[880,1120],[1120,1380],[1380,1540],[1540,1780],[1780,2020],[2020,2180],[2180,2480],[2480,2600],[2600,2780],[2780,3040],[3040,3240],[3240,3480],[3480,3699],[3699,3900],[3900,4180],[4180,4420],[4420,4620],[4620,4780],[4780,5195]]}],"text":"欢迎大家来体验达摩院推出的语音识别模型。","timestamp":"[[880,1120],[1120,1380],[1380,1540],[1540,1780],[1780,2020],[2020,2180],[2180,2480],[2480,2600],[2600,2780],[2780,3040],[3040,3240],[3240,3480],[3480,3699],[3699,3900],[3900,4180],[4180,4420],[4420,4620],[4620,4780],[4780,5195]]","wav_name":"wav_default_id"}

I20260303 14:01:36.164183 530 websocket-server.cpp:29] on_tls_init called with hdl: 0x7fb98c010ac0
I20260303 14:01:36.164292 530 websocket-server.cpp:30] using TLS mode: Mozilla Intermediate
[2026-03-03 14:01:36][error] handle_transport_init received error: TLS handshake failed
[2026-03-03 14:01:36][info] asio async_shutdown error: asio.ssl:336462231 (shutdown whilein init (SSL routines, SSL_shutdown))

# 如果您想关闭 ssl，增加参数：--certfile 0
cd FunASR/runtime
nohup bash run_server.sh \
--download-model-dir /workspace/models \
--vad-dir damo/speech_fsmn_vad_zh-cn-16k-common-onnx \
--model-dir damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-onnx \
--punc-dir damo/punc_ct-transformer_cn-en-common-vocab471067-large-onnx \
--lm-dir damo/speech_ngram_lm_zh-cn-ai-wesp-fst \
--itn-dir thuduj12/fst_itn_zh \
--certfile 0 \
--hotword /workspace/models/hotwords.txt > log.txt 2>&1 &

FunasrWsClient --host localhost --port 10095 --audio_in ./asr_example.wav --mode offline

# in ubuntu
apt-get install openjdk-11-jdk

cd funasr/runtime/java
# download java lib
make downjar
# compile
make buildwebsocket
# run client
make runclient

# full command refer to Makefile runclient usage: FunasrWsClient [-h][--port PORT][--host HOST][--audio_in AUDIO_IN][--num_threads NUM_THREADS][--chunk_size CHUNK_SIZE][--chunk_interval CHUNK_INTERVAL][--mode MODE]
# Where:
# --host<string>(required) server-ip
# --port<int>(required) port
# --audio_in<string>(required) the wav or pcm file path
# --num_threads<int> thread number for test
# --mode asr mode, support "offline" "online" "2pass"
# example: FunasrWsClient --host localhost --port 8889 --audio_in ./asr_example.wav --num_threads 1 --mode 2pass
# result json, example like: {"mode":"offline","text":"欢迎大家来体验达摩院推出的语音识别模型","wav_name":"javatest"}

cd /workspace/FunASR/runtime
nohup bash run_server.sh \
--download-model-dir /workspace/models \
--model-dir damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx \
--vad-dir damo/speech_fsmn_vad_zh-cn-16k-common-onnx \
--punc-dir damo/punc_ct-transformer_cn-en-common-vocab471067-large-onnx \
--lm-dir damo/speech_ngram_lm_zh-cn-ai-wesp-fst \
--itn-dir thuduj12/fst_itn_zh \
--certfile ../../../ssl_key/server.crt \
--keyfile ../../../ssl_key/server.key \
--hotword ../../hotwords.txt > log.txt 2>&1 &

--download-model-dir 模型下载地址，通过设置 model ID 从 Modelscope 下载模型
--model-dir modelscope model ID 或者 本地模型路径
--vad-dir modelscope model ID 或者 本地模型路径
--punc-dir modelscope model ID 或者 本地模型路径
--lm-dir modelscope model ID 或者 本地模型路径
--itn-dir modelscope model ID 或者 本地模型路径
--port 服务端监听的端口号，默认为 10095
--decoder-thread-num 服务端线程池个数 (支持的最大并发路数)，脚本会根据服务器线程数自动配置 decoder-thread-num、io-thread-num
--io-thread-num 服务端启动的 IO 线程数
--model-thread-num 每路识别的内部线程数 (控制 ONNX 模型的并行)，默认为 1，其中建议 decoder-thread-num*model-thread-num 等于总线程数
--certfile ssl 的证书文件，默认为：../../../ssl_key/server.crt，如果需要关闭 ssl，参数设置为 0
--keyfile ssl 的密钥文件，默认为：../../../ssl_key/server.key
--hotword 热词文件路径，每行一个热词，格式：热词 权重 (例如：阿里巴巴 20)，如果客户端提供热词，则与客户端提供的热词合并一起使用，服务端热词全局生效，客户端热词只针对对应客户端生效。

# 查看 funasr-wss-server 对应的 PID
ps -x | grep funasr-wss-server
kill -9 PID

# 例如替换 ASR 模型为 damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx，则如下设置参数 --model-dir
--model-dir damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx
# 设置端口号
--port <port number>
# 设置服务端启动的推理线程数
--decoder-thread-num <decoder thread num>
# 设置服务端启动的 IO 线程数
--io-thread-num <io thread num>
# 关闭 SSL 证书
--certfile 0

// VAD 模型的使用分为 FsmnVadInit 和 FsmnVadInfer 两个步骤：
FUNASR_HANDLE vad_hanlde=FsmnVadInit(model_path, thread_num);
// 其中：model_path 包含"model-dir"、"quantize"，thread_num 为 onnx 线程数；
FUNASR_RESULT result=FsmnVadInfer(vad_hanlde, wav_file.c_str(), NULL, 16000);
// 其中：vad_hanlde 为 FunOfflineInit 返回值，wav_file 为音频路径，sampling_rate 为采样率 (默认 16k)

// ASR 模型的使用分为 FunOfflineInit 和 FunOfflineInfer 两个步骤：
FUNASR_HANDLE asr_hanlde=FunOfflineInit(model_path, thread_num);
// 其中：model_path 包含"model-dir"、"quantize"，thread_num 为 onnx 线程数；
FUNASR_RESULT result=FunOfflineInfer(asr_hanlde, wav_file.c_str(), RASR_NONE, NULL, 16000);
// 其中：asr_hanlde 为 FunOfflineInit 返回值，wav_file 为音频路径，sampling_rate 为采样率 (默认 16k)

// PUNC 模型的使用分为 CTTransformerInit 和 CTTransformerInfer 两个步骤：
FUNASR_HANDLE punc_hanlde=CTTransformerInit(model_path, thread_num);
// 其中：model_path 包含"model-dir"、"quantize"，thread_num 为 onnx 线程数；
FUNASR_RESULT result=CTTransformerInfer(punc_hanlde, txt_str.c_str(), RASR_NONE, NULL);
// 其中：punc_hanlde 为 CTTransformerInit 返回值，txt_str 为文本

FunASR 离线文件转写服务开发指南与实践

FunASR 离线文件转写服务开发指南（实践篇）

1. 是什么

2. 快速上手

2.1 docker 安装

2.2 镜像启动

2.3 服务端启动

2.4 客户端测试与使用

3. 客户端用法详解

3.1 python-client

3.2 cpp-client

3.3 Html 网页版

3.4 Java-client

3.4.1 Building for Linux/Unix

4. 服务端用法详解

4.1 启动 FunASR 服务

4.2 关闭 FunASR 服务

4.3 修改模型及其他参数

5. 如何定制服务部署

更多推荐文章

相关免费在线工具

FunASR 离线文件转写服务开发指南与实践

FunASR 离线文件转写服务开发指南（实践篇）

1. 是什么

2. 快速上手

2.1 docker 安装

2.2 镜像启动

2.3 服务端启动

2.4 客户端测试与使用

3. 客户端用法详解

3.1 python-client

3.2 cpp-client

3.3 Html 网页版

3.4 Java-client

3.4.1 Building for Linux/Unix

4. 服务端用法详解

4.1 启动 FunASR 服务

4.2 关闭 FunASR 服务

4.3 修改模型及其他参数

5. 如何定制服务部署

微信扫一扫，关注极客日志

更多推荐文章

相关免费在线工具