Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
Huanshere committed Sep 12, 2024
2 parents b3574b7 + 58fe0cf commit 62afd41
Show file tree
Hide file tree
Showing 11 changed files with 118 additions and 338 deletions.
273 changes: 0 additions & 273 deletions ALL_IN_ONE.ipynb

This file was deleted.

3 changes: 1 addition & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,8 @@ https://github.com/user-attachments/assets/0f5d5878-bfa5-41e4-ade1-d2b81d925a7d

| 限制 | 当前 | 计划 |
|------|----------|--------------|
| 安装步骤 | 需要一定的代码能力和计算资源 | 将Whisper部分上传到Replicate云 |
| 音频长度 | 仅支持30分钟以内 | 将很快扩展这一限制 |
| 多语言支持 | 英文较准确<br>其他语言(如日语)精度待提高 | 引入针对不同语言的专门模型 |
| 多语言支持 | 英语识别效果较好<br>日语识别效果一般<br>中文识别非常不稳定且容易报错 | 引入针对不同语言的专门模型 |

## 🙏 致谢

Expand Down
22 changes: 10 additions & 12 deletions config.example.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,15 @@
## ======================== 基本设置 ======================== ##
## ======================== Basic Settings ======================== ##

# API 设置 建议使用唯一真神 https://api.wlai.vip/register?aff=TXMB, sonnet 价格仅 10r/1M。
# API Settings. Recommended to use the one true god https://api.wlai.vip/register?aff=TXMB, sonnet price is only 10r/1M.
# 申请令牌时勾选模型`claude-3-5-sonnet-20240620`,渠道建议选`默认渠道1.0`
# When applying for a token, check the model `claude-3-5-sonnet-20240620`, recommended to choose `Default Channel 1.0`
# API Settings
# 为了最好的效果,请使用 claude-3.5-sonnet. 实测 deepseek-coder 也能有较好的效果且性价比高
# For best results, please use claude-3.5-sonnet. In practice, deepseek-coder also performs well with lower cost.
API_KEY = 'sk-xxx'
BASE_URL = 'https://api2.wlai.vip'
MODEL = ['claude-3-5-sonnet-20240620']
BASE_URL = 'https://api.deepseek.com'
MODEL = ['deepseek-coder']

# Replicate API 设置
# Replicate API settings
# Replicate API settings for using whisperX
REPLICATE_API_TOKEN = "xxx"

# 语言设置,用自然语言描述
Expand Down Expand Up @@ -43,7 +42,7 @@

# Whisper 设置 [whisperx, whisperxapi, whisper_timestamped]
# Whisper settings [whisperx, whisperxapi, whisper_timestamped]
WHISPER_METHOD = 'whisperx'
WHISPER_METHOD = 'whisperxapi'

# 预留给 whisper_timestamped 的模型,英语场景下 medium 甚至比 large-v2 的时间轴还准
# Reserved for whisper_timestamped model, in English scenarios, medium is even more accurate in timeline than large-v2
Expand Down Expand Up @@ -96,8 +95,8 @@
# Spacy model
# Spacy 模型
SPACY_MODEL_MAP = {
"en": "en_core_web_lg",
"zh": "zh_core_web_lg",
"en": "en_core_web_sm",
"zh": "zh_core_web_sm",
"es": "es_core_news_lg",
"fr": "fr_core_news_lg",
"de": "de_core_news_lg",
Expand Down Expand Up @@ -136,8 +135,7 @@ def get_joiner(language):
elif language in LANGUAGE_SPLIT_WITHOUT_SPACE:
return ""
else:
raise ValueError(f"不支持的语言代码: {language}")
# raise ValueError(f"Unsupported language code: {language}")
raise ValueError(f"Unsupported language code: {language}")


# 配音设置 暂时弃用
Expand Down
19 changes: 15 additions & 4 deletions core/all_whisper_methods/whisperX.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,18 +71,29 @@ def transcribe_audio(audio_file: str) -> Dict:

def process_transcription(result: Dict) -> pd.DataFrame:
all_words = []
# save to debug as json
with open('output/log/debug.json', 'a', encoding='utf-8') as f:
json.dump(result, f, ensure_ascii=False, indent=4)
for segment in result['segments']:
for i, word in enumerate(segment['words']):
if 'start' not in word and i > 0:
all_words[-1]['text'] = f'{all_words[-1]["text"][:-1]}{word["word"]}"'
for word in segment['words']:
if 'start' not in word and 'end' not in word:
if all_words:
# 合并到前一个词
all_words[-1]['text'] = f'{all_words[-1]["text"][:-1]}{word["word"]}"'
else:
# 如果是第一个词,暂时保存,等待下一个有时间戳的词
temp_word = word["word"]
else:
# 正常情况,有开始和结束时间
word_dict = {
'text': f'{word["word"]}',
'text': f'"{temp_word}{word["word"]}"' if 'temp_word' in locals() else f'"{word["word"]}"',
'start': word.get('start', all_words[-1]['end'] if all_words else 0),
'end': word['end'],
'score': word.get('score', 0)
}
all_words.append(word_dict)
if 'temp_word' in locals():
del temp_word

return pd.DataFrame(all_words)

Expand Down
Loading

0 comments on commit 62afd41

Please sign in to comment.