Created
August 12, 2023 11:35
-
-
Save AbduEhab/c24cbaeed80d6093149559352de77abd to your computer and use it in GitHub Desktop.
Wisper quick library lyrics generator
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import whisper | |
import os | |
# Replace with the path to your input folder | |
input_folders = ['/mnt/d/OST/Land of the Lustrous/Disk 02/temp'] | |
model_name = 'medium' # Replace with the model name you want to use | |
print(f'Loading model: {model_name}') | |
model = whisper.load_model(model_name) | |
for input_folder in input_folders: | |
# replace '\ ' with '/' in the path | |
input_folder = input_folder.replace('\\', '/') | |
# output_folder = 'out' # Replace with the path to your output folder | |
output_folder = input_folder | |
def transcribe_audio_file(input_path, output_path): | |
# if file exists, skip | |
if os.path.exists(output_path): | |
print(f'Skipping: {input_path}') | |
return | |
print(f'Transcribing: {input_path}') | |
result = model.transcribe(input_path) | |
with open(output_path, 'w') as output_file: | |
for segment in result['segments']: | |
output_file.write( | |
f"[{segment['start']:.2f}] {segment['text']}\n") | |
def transcribe_audio_files(input_folder, output_folder): | |
# if skip file exists, skip | |
skip_file = os.path.join(output_folder, 'skip_lr.txt') | |
if os.path.exists(skip_file): | |
print(f'Skipping: {input_folder}') | |
return | |
if not os.path.exists(output_folder): | |
os.makedirs(output_folder) | |
audio_files = [f for f in os.listdir(input_folder) if f.endswith( | |
'.wav') or f.endswith('.flac') or f.endswith('.mp3') or f.endswith('.m4a') or f.endswith('.opus') or f.endswith('.ogg')] | |
for file_name in audio_files: | |
input_path = os.path.join(input_folder, file_name) | |
output_path = os.path.join( | |
output_folder, f'{os.path.splitext(file_name)[0]}.lrc') | |
transcribe_audio_file(input_path, output_path) | |
# walk through all child folders | |
subfolders = [] | |
for root, dirs, files in os.walk(input_folder): | |
for dir in dirs: | |
subfolders.append(os.path.join(root, dir)) | |
# list all folders | |
print(f'Registered folders:') | |
print(input_folder) | |
for subfolder in subfolders: | |
print(subfolder) | |
transcribe_audio_files(input_folder, output_folder) | |
# transcribe all files in subfolders | |
for subfolder in subfolders: | |
input_folder = subfolder | |
output_folder = subfolder | |
transcribe_audio_files(input_folder, output_folder) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment