label studio引入nemo_asr实现预测标注文本
Published in:2024-12-25 |
Words: 473 | Reading time: 2min | reading:

label studio引入nemo_asr实现预测标注文本

clone and use

1
git clone https://github.com/HumanSignal/label-studio-ml-backend

install

1
2
3
pip install -r requirements.txt

pip install -e .

start

1
label-studio-ml start ./nemo_asr

coding

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108

# _*_ coding: utf-8 _*_
"""
Time: 2024/12/19 16:59
Author: ZhaoQi Cao(czq)
Version: V 0.1
File: transit_nameko.py
Describe: Write during the python at zgxmt, Github link: https://github.com/caozhaoqi
"""
import nemo
import nemo.collections.asr as nemo_asr
import requests
import os
import nemo.collections.asr as nemo_asr

# 列出所有可用的模型
# print(nemo_asr.models.ASRModel.list_available_models())
# 配置 Label Studio API 和项目
LABEL_STUDIO_API_URL = "http://localhost:8080/api"
PROJECT_ID = 12 # 你的 Label Studio 项目 ID
API_KEY = "93e8ebc81cc1337c41567aa20113fd74934a4f17"

# 设置请求头
headers = {
'Authorization': f"Token {API_KEY}",
'Content-Type': 'application/json',
}

# 加载 Nemo ASR 模型
asr_model = nemo_asr.models.ASRModel.from_pretrained(model_name="stt_zh_citrinet_1024_gamma_0_25")


# 音频转录函数
def transcribe_audio(audio_file_path):
transcription = asr_model.transcribe([audio_file_path])
return transcription[0] # 返回转录文本


# 创建任务并上传音频文件
def create_task(audio_file_path):
files = {
'file': open(audio_file_path, 'rb'),
}
response = requests.post(f"{LABEL_STUDIO_API_URL}/projects/{PROJECT_ID}/import", headers=headers, files=files)

if response.status_code == 200:
task_data = response.json()
task_id = task_data['id']
print(f"Task created with ID: {task_id}")
return task_id
else:
print(f"Error uploading file: {response.text}")
return None


# 更新任务并填充转录文本
def update_task_with_transcription(task_id, transcription):
data = {
"data": {
"audio": task_id,
},
"annotations": [
{
"result": [
{
"from_name": "transcription",
"to_name": "audio",
"type": "textarea",
"value": {
"text": transcription
}
}
]
}
]
}

response = requests.post(f"{LABEL_STUDIO_API_URL}/tasks/{task_id}/annotations/", headers=headers, json=data)

if response.status_code == 200:
print(f"Task {task_id} updated with transcription.")
else:
print(f"Error updating task {task_id}: {response.text}")


# 处理和提交音频文件
def process_and_submit_audio_files(audio_files_dir):
for audio_file in os.listdir(audio_files_dir):
audio_file_path = os.path.join(audio_files_dir, audio_file)

# 获取转录结果
transcription = transcribe_audio(audio_file_path)

# 创建任务并上传音频文件
task_id = create_task(audio_file_path)

if task_id:
# 更新任务并提交转录文本
update_task_with_transcription(task_id, transcription)



# 指定音频文件存放目录
audio_files_directory = "./out"

# 执行批量处理
process_and_submit_audio_files(audio_files_directory)

Prev:
借助深度学习模型实现分离人声与背景声
Next:
在label studio 引入whisper 实现语音转写与说话人分离