Last active
February 28, 2025 14:23
-
-
Save AcTePuKc/eb50cb0f7efa6bb2c8d4e22cdd01857a to your computer and use it in GitHub Desktop.
Spark-TTS-PySide_Gui - Spark-TTS Studio
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Save in the main directory | |
''' | |
SparkTTS Studio - GUI Description (Concise) | |
SparkTTS Studio is a user-friendly interface for text-to-speech synthesis, featuring voice cloning capabilities. | |
Key Features: | |
Text Input: Large text area to enter text for speech synthesis. | |
Voice Sample Loading: Load WAV/MP3 files to enable voice cloning and mimic voice styles. | |
Gender Selection: Choose "Male," "Female," or "Pick Voice (Gender optional)" for voice synthesis. | |
Pitch & Speed Control: Sliders to adjust voice pitch and speaking speed. | |
Interactive Waveform: Visual display of generated audio with playback progress and clickable seeking. | |
Playback Controls: "Play/Pause," "Stop," and Volume slider for audio playback. | |
Save Audio: Save generated speech to WAV files. | |
Multi-Language GUI: Switch UI language between English, Bulgarian, Spanish, French, and Japanese. | |
Status Bar: Displays messages about model loading, generation progress, and errors. | |
System Tray: Option to minimize to system tray for background operation. | |
Workflow Highlights: | |
Enter Text: Input the text you want to convert to speech. | |
Optional Voice Cloning: Load a voice sample to clone a voice. | |
Set Voice Parameters: Adjust gender, pitch, and speed. | |
Generate Speech: Click "Generate Speech" to synthesize audio. | |
Playback & Seek: Use playback controls and click on the waveform to navigate audio. | |
Save Audio: Save the generated audio to a WAV file. | |
''' | |
import traceback | |
import sys | |
import os | |
import time | |
import torch | |
import shutil | |
import numpy as np | |
import soundfile as sf | |
from PySide6.QtWidgets import ( | |
QApplication, QWidget, QVBoxLayout, QPushButton, QLabel, | |
QTextEdit, QSlider, QFileDialog, QComboBox, QHBoxLayout, | |
QGroupBox, QProgressBar, QSystemTrayIcon, QMenu, QSizePolicy | |
) | |
from PySide6.QtCore import Qt, QThread, Signal, QPoint, QTimer, QCoreApplication | |
from PySide6.QtMultimedia import QMediaPlayer, QAudioOutput | |
from PySide6.QtGui import QPainter, QColor, QPen, QLinearGradient, QIcon, QAction | |
from cli.SparkTTS import SparkTTS | |
# ------------------- Modern Style Sheet ------------------- | |
STYLE_SHEET = """ | |
QWidget { | |
background-color: #2D2D2D; | |
color: #FFFFFF; | |
font-family: 'Segoe UI'; | |
font-size: 12px; | |
} | |
QTextEdit { | |
background-color: #404040; | |
border: 2px solid #505050; | |
border-radius: 5px; | |
padding: 8px; | |
selection-background-color: #3DAEE9; | |
} | |
QPushButton { | |
background-color: #3DAEE9; | |
border: none; | |
border-radius: 4px; | |
color: white; | |
padding: 8px 16px; | |
min-width: 80px; | |
} | |
QPushButton:hover { | |
background-color: #2D9CDB; | |
} | |
QPushButton:disabled { | |
background-color: #505050; | |
color: #808080; | |
} | |
QSlider::groove:horizontal { | |
height: 6px; | |
background: #404040; | |
border-radius: 3px; | |
} | |
QSlider::handle:horizontal { | |
background: #3DAEE9; | |
border: 2px solid #2D2D2D; | |
width: 16px; | |
margin: -6px 0; | |
border-radius: 8px; | |
} | |
QComboBox { | |
background-color: #404040; | |
border: 2px solid #505050; | |
border-radius: 4px; | |
padding: 4px; | |
min-width: 100px; | |
} | |
QGroupBox { | |
border: 2px solid #505050; | |
border-radius: 6px; | |
margin-top: 10px; | |
padding-top: 15px; | |
color: #FFFFFF; /* Added to ensure title text is white */ | |
} | |
QGroupBox::title { | |
subcontrol-origin: margin; | |
left: 10px; | |
padding: 0 5px; | |
color: #FFFFFF; /* Added to ensure title text is white */ | |
} | |
""" | |
# ------------------- Enhanced Waveform Widget ------------------- | |
class WaveformWidget(QWidget): | |
seek_position_signal = Signal(int) # New signal to emit seek position in milliseconds | |
def __init__(self, parent=None): | |
super().__init__(parent) | |
self.waveform_data = None | |
self.playback_progress = 0.0 | |
self.playhead_progress = 0.0 | |
self.audio_player_duration_ms = 0 # ADDED: Store audio duration here! | |
self.setMinimumHeight(100) | |
self.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Fixed) | |
self.setMouseTracking(True) # Enable mouse tracking for click events | |
def set_waveform(self, data): | |
self.waveform_data = data | |
self.update() | |
def set_playback_progress_overlay(self, progress): | |
self.playback_progress = progress | |
self.update() | |
def set_playhead_progress(self, progress): | |
self.playhead_progress = progress | |
self.update() | |
def mousePressEvent(self, event): | |
if self.waveform_data is not None and self.audio_player_duration_ms > 0: # Now it has the attribute! | |
click_x = event.position().x() | |
progress_ratio = click_x / self.width() | |
seek_position_ms = int(self.audio_player_duration_ms * progress_ratio) | |
self.seek_position_signal.emit(seek_position_ms) | |
def set_audio_duration(self, duration_ms): # NEW: Method to set duration | |
self.audio_player_duration_ms = duration_ms | |
def paintEvent(self, event): | |
painter = QPainter(self) | |
rect = self.rect() | |
# Draw background, waveform, progress overlay, playhead (No changes in paintEvent itself) | |
# ... (rest of paintEvent code is the same as before) ... | |
# Draw background gradient | |
gradient = QLinearGradient(0, 0, 0, rect.height()) | |
gradient.setColorAt(0, QColor("#363636")) | |
gradient.setColorAt(1, QColor("#2D2D2D")) | |
painter.fillRect(rect, gradient) | |
if self.waveform_data is not None and len(self.waveform_data) > 0: | |
# Normalize waveform data | |
normalized_waveform = self.waveform_data / np.max(np.abs(self.waveform_data)) if np.max(np.abs(self.waveform_data)) > 0 else self.waveform_data | |
# Draw waveform | |
pen = QPen(QColor("#3DAEE9")) | |
pen.setWidth(2) | |
painter.setPen(pen) | |
num_samples = len(normalized_waveform) | |
step = max(1, num_samples // rect.width()) | |
center_y = rect.height() / 2 | |
for x in range(rect.width()): | |
idx = min(int(x * step), num_samples - 1) | |
sample_value = normalized_waveform[idx] | |
value_pixel_height = int(abs(sample_value) * center_y * 0.95) | |
y1 = int(center_y - value_pixel_height) | |
y2 = int(center_y + value_pixel_height) | |
y1 = max(0, min(int(y1), rect.height())) | |
y2 = max(0, min(int(y2), rect.height())) | |
painter.drawLine(x, y1, x, y2) | |
# Draw playback progress overlay | |
painter.setCompositionMode(QPainter.CompositionMode_SourceOver) | |
progress_width = int(rect.width() * self.playback_progress) | |
progress_rect = rect.adjusted(0, 0, progress_width - rect.width(), 0) | |
painter.fillRect(progress_rect, QColor(61, 174, 233, 80)) | |
# Draw playhead | |
if self.playhead_progress > 0: | |
playhead_x = int(rect.width() * self.playhead_progress) | |
playhead_pen = QPen(QColor("white")) | |
playhead_pen.setWidth(2) | |
painter.setPen(playhead_pen) | |
painter.drawLine(playhead_x, 0, playhead_x, rect.height()) | |
def mousePressEvent(self, event): # New mouse click event handler | |
if self.waveform_data is not None and self.audio_player_duration_ms > 0: # Check if waveform data and duration are available | |
click_x = event.position().x() | |
progress_ratio = click_x / self.width() | |
seek_position_ms = int(self.audio_player_duration_ms * progress_ratio) | |
self.seek_position_signal.emit(seek_position_ms) # Emit signal with seek position | |
# ------------------- Main Application Class ------------------- | |
class SparkTTSApp(QWidget): | |
def __init__(self, model, device): | |
super().__init__() # Corrected super().__init__() call - no arguments | |
self.model = model | |
self.device = device # Store device as instance attribute | |
self.voice_sample = None | |
self.current_audio_file = None | |
self.audio_player_duration_ms = 0 # Store audio player duration in milliseconds | |
self.init_ui() | |
self.init_tray_icon() | |
self.status_label.setText(f"Model loaded on {device}") | |
self.audio_player = QMediaPlayer() | |
self.audio_output = QAudioOutput() | |
self.audio_player.setAudioOutput(self.audio_output) | |
self.audio_player.positionChanged.connect(self.update_waveform_playhead) | |
self.audio_player.positionChanged.connect(self.on_position_changed) | |
self.audio_player.durationChanged.connect(self.on_duration_changed) | |
self.timer = QTimer(self) | |
self.timer.timeout.connect(self.update_time_display) | |
self.setWindowIcon(QIcon("src/logo.webp")) # Add actual icon file | |
self.waveform.seek_position_signal.connect(self.seek_audio) # Connect seek_position_signal to seek_audio | |
def init_ui(self): | |
# Window title - needs translation | |
self.setWindowTitle("SparkTTS Studio") | |
self.setMinimumSize(800, 600) | |
self.setStyleSheet(STYLE_SHEET) | |
main_layout = QVBoxLayout() | |
main_layout.setContentsMargins(20, 20, 20, 20) | |
main_layout.setSpacing(15) | |
# GUI Language Selector (Placed at the Top Right) | |
top_bar_layout = QHBoxLayout() | |
top_bar_layout.addStretch() # Push to the right | |
# Language Label - needs translation | |
top_bar_layout.addWidget(QLabel("Language:")) | |
self.language_label = top_bar_layout.itemAt( | |
1).widget() # Store language label | |
self.gui_language_selector = QComboBox() | |
self.gui_language_selector.addItems( | |
["English", "Bulgarian", "Spanish", "French", "Japanese"]) | |
self.gui_language_selector.currentIndexChanged.connect( | |
self.update_gui_language) | |
top_bar_layout.addWidget(self.gui_language_selector) | |
main_layout.addLayout(top_bar_layout) | |
# Text Input Group | |
# Group Box Title - needs translation | |
input_group = QGroupBox("Text Input") | |
self.input_group = input_group # Store group box for translation | |
input_layout = QVBoxLayout() | |
self.text_input = QTextEdit() | |
# Placeholder - needs translation | |
self.text_input.setPlaceholderText( | |
"Enter text for speech synthesis...") | |
input_layout.addWidget(self.text_input) | |
input_group.setLayout(input_layout) | |
main_layout.addWidget(input_group) | |
# Controls Group | |
# Group Box Title - needs translation | |
controls_group = QGroupBox("Synthesis Controls") | |
self.controls_group = controls_group # Store group box for translation | |
controls_layout = QVBoxLayout() | |
# Voice Sample Section | |
voice_layout = QHBoxLayout() | |
# Label - needs translation | |
self.voice_label = QLabel("No voice sample loaded") | |
self.voice_label_status = self.voice_label # Store label for translation | |
# Button text - needs translation | |
self.voice_btn = QPushButton("Load Voice Sample") | |
self.voice_btn_load_voice = self.voice_btn # Store button for translation | |
self.voice_btn.clicked.connect(self.select_voice_sample) | |
self.voice_btn.setIcon(QIcon()) # Add actual icon file | |
voice_layout.addWidget(self.voice_btn) | |
voice_layout.addWidget(self.voice_label) | |
controls_layout.addLayout(voice_layout) | |
# Parameters | |
params_layout = QHBoxLayout() | |
# Gender Selector Group | |
# Group Box Title - needs translation | |
gender_box = QGroupBox("Voice Parameters") | |
self.gender_box = gender_box # Store group box for translation | |
gender_layout = QVBoxLayout() | |
self.gender_selector_label = QLabel( | |
"Gender:") # Label - needs translation | |
# Label - needs translation | |
gender_layout.addWidget(self.gender_selector_label) | |
# Store label for translation | |
self.gender_selector_label_widget = self.gender_selector_label | |
self.gender_selector = QComboBox() | |
self.gender_selector.addItems( | |
# Items - need translation | |
["Pick Voice (Gender optional)", "Male", "Female"]) | |
# Store items for translation | |
self.gender_selector_items = [ | |
"Pick Voice (Gender optional)", "Male", "Female"] | |
self.gender_selector.currentIndexChanged.connect( | |
self.on_gender_changed) | |
gender_layout.addWidget(self.gender_selector) | |
gender_box.setLayout(gender_layout) | |
params_layout.addWidget(gender_box) | |
# Pitch Control Group | |
pitch_box = QGroupBox("Pitch") # Group Box Title - needs translation | |
self.pitch_box = pitch_box # Store group box for translation | |
pitch_layout = QVBoxLayout() | |
self.pitch_slider = QSlider(Qt.Horizontal) | |
self.pitch_slider.setRange(0, 4) | |
self.pitch_slider.setValue(2) | |
pitch_layout.addWidget(self.pitch_slider) | |
pitch_box.setLayout(pitch_layout) | |
params_layout.addWidget(pitch_box) | |
# Speed Control Group | |
speed_box = QGroupBox("Speed") # Group Box Title - needs translation | |
self.speed_box = speed_box # Store group box for translation | |
speed_layout = QVBoxLayout() | |
self.speed_slider = QSlider(Qt.Horizontal) | |
self.speed_slider.setRange(0, 4) | |
self.speed_slider.setValue(2) | |
speed_layout.addWidget(self.speed_slider) | |
speed_box.setLayout(speed_layout) | |
params_layout.addWidget(speed_box) | |
controls_layout.addLayout(params_layout) | |
controls_group.setLayout(controls_layout) | |
main_layout.addWidget(controls_group) | |
# Visualization and Playback Group | |
# Group Box Title - needs translation | |
vis_group = QGroupBox("Audio Visualization") | |
self.vis_group = vis_group # Store group box for translation | |
vis_layout = QVBoxLayout() | |
self.waveform = WaveformWidget() | |
vis_layout.addWidget(self.waveform) | |
# Time Display | |
time_layout = QHBoxLayout() | |
# Label - needs translation (though format is universal) | |
self.current_time = QLabel("00:00") | |
# Label - needs translation (though format is universal) | |
self.total_time = QLabel("00:00") | |
time_layout.addWidget(self.current_time) | |
time_layout.addStretch() | |
time_layout.addWidget(self.total_time) | |
vis_layout.addLayout(time_layout) | |
# Playback Controls | |
playback_layout = QHBoxLayout() | |
self.play_btn = QPushButton("Play") # Button text - needs translation | |
self.play_btn_play = self.play_btn # Store button for translation | |
self.play_btn.clicked.connect(self.play_audio) | |
self.play_btn.setIcon(QIcon()) | |
self.pause_btn = QPushButton("Pause") | |
self.pause_btn_pause = self.pause_btn # Store for translation | |
self.pause_btn.clicked.connect(self.pause_audio) # Connect to pause_audio method | |
self.pause_btn.setIcon(QIcon()) # Add pause icon if you have one | |
self.stop_btn = QPushButton("Stop") # Button text - needs translation | |
self.play_btn_stop = self.stop_btn # Store button for translation | |
self.stop_btn.clicked.connect(self.stop_audio) | |
self.stop_btn.setIcon(QIcon()) | |
self.volume_label = QLabel("Volume:") # Label - needs translation | |
self.volume_label_widget = self.volume_label # Store label for translation | |
self.volume_slider = QSlider(Qt.Horizontal) | |
self.volume_slider.setRange(0, 100) | |
self.volume_slider.setValue(100) | |
self.volume_slider.valueChanged.connect(self.set_volume) | |
playback_layout.addWidget(self.play_btn) | |
playback_layout.addWidget(self.pause_btn) | |
playback_layout.addWidget(self.stop_btn) | |
playback_layout.addWidget(self.volume_label) | |
playback_layout.addWidget(self.volume_slider) | |
vis_layout.addLayout(playback_layout) | |
vis_group.setLayout(vis_layout) | |
main_layout.addWidget(vis_group) | |
# Bottom Panel | |
bottom_layout = QHBoxLayout() | |
# Button text - needs translation | |
self.generate_btn = QPushButton("Generate Speech") | |
self.generate_btn_generate = self.generate_btn # Store button for translation | |
self.generate_btn.clicked.connect(self.run_synthesis) | |
self.generate_btn.setIcon(QIcon()) | |
self.generate_btn.setEnabled(False) # Initially disabled | |
# Button text - needs translation | |
self.save_btn = QPushButton("Save Audio") | |
self.save_btn_save = self.save_btn # Store button for translation | |
self.save_btn.clicked.connect(self.save_audio) | |
self.save_btn.setIcon(QIcon()) # Add actual icon file | |
# New Exit Button - needs translation | |
self.exit_btn = QPushButton("Exit") | |
self.exit_btn_main_window = self.exit_btn # Store for translation | |
self.exit_btn.clicked.connect(self.quit_app) # Connect to quit_app | |
self.exit_btn.setIcon(QIcon()) # Add exit icon if you have one | |
# Progress Bar | |
self.progress_bar = QProgressBar() | |
self.progress_bar.setTextVisible(False) | |
self.progress_bar.setFixedHeight(6) | |
bottom_layout.addWidget(self.generate_btn) | |
bottom_layout.addWidget(self.save_btn) | |
bottom_layout.addWidget(self.exit_btn) # Add Exit button to layout | |
bottom_layout.addWidget(self.progress_bar) | |
main_layout.addLayout(bottom_layout) | |
# Status Bar | |
self.status_label = QLabel("Ready") # Label - needs translation | |
self.status_label_bottom = self.status_label # Store status label | |
self.status_label.setAlignment(Qt.AlignCenter) | |
main_layout.addWidget(self.status_label) | |
self.setLayout(main_layout) | |
self.update_gui_language() # Initial language setup | |
def init_tray_icon(self): | |
self.tray_icon = QSystemTrayIcon(self) | |
self.tray_icon.setIcon(QIcon("src/logo.webp")) # Use logo for tray icon as well | |
tray_menu = QMenu() | |
# Action text - needs translation (if tray menu is always visible) | |
show_action = QAction("Show", self) | |
self.show_action_tray = show_action # Store action for translation | |
show_action.triggered.connect(self.show) | |
# Action text - needs translation (if tray menu is always visible) | |
exit_action = QAction("Exit", self) | |
self.exit_action_tray = exit_action # Store action for translation | |
exit_action.triggered.connect(self.quit_app) # Connect to quit_app instead of close | |
tray_menu.addAction(show_action) | |
tray_menu.addAction(exit_action) | |
self.tray_icon.setContextMenu(tray_menu) | |
self.tray_icon.show() | |
def quit_app(self): # New function to quit the app properly | |
QCoreApplication.quit() # Use QCoreApplication.quit() to properly exit | |
def closeEvent(self, event): | |
self.hide() # Minimize to tray on window close | |
event.ignore() # Still ignore the close event to prevent window destruction, but now we have quit_app for proper exit. | |
self.tray_icon.showMessage( | |
# Title - needs translation (if tray message is always visible) | |
"SparkTTS Studio", | |
# Message - needs translation (if tray message is always visible) | |
"The application is running in the system tray", | |
QSystemTrayIcon.Information, | |
2000 | |
) | |
def set_volume(self, value): | |
self.audio_output.setVolume(value / 100) | |
def update_time_display(self): | |
if self.audio_player.isPlaying(): | |
position_ms = self.audio_player.position() | |
duration_ms = self.audio_player.duration() | |
if not isinstance(position_ms, (int, float)): # Check if position is a number | |
print(f"Error: audio_player.position() returned unexpected type: {type(position_ms)}") | |
return # Exit if unexpected type | |
if not isinstance(duration_ms, (int, float)): # Check if duration is a number | |
print(f"Error: audio_player.duration() returned unexpected type: {type(duration_ms)}") | |
return # Exit if unexpected type | |
current_seconds = position_ms // 1000 | |
total_seconds = duration_ms // 1000 | |
self.current_time.setText(f"{current_seconds//60:02}:{current_seconds % 60:02}") | |
if total_seconds > 0: # Use total_seconds here | |
self.total_time.setText(f"{total_seconds//60:02}:{total_seconds % 60:02}") | |
else: | |
self.total_time.setText("00:00") # Or some default if total duration is invalid | |
def on_generation_complete(self, result, elapsed): | |
if isinstance(result, Exception): | |
error_message = f"Error during speech generation: {result}. " | |
# Error related to voice sample | |
if "prompt_speech_path" in str(result): | |
# Needs translation | |
error_message += "Please load a voice sample or select a gender (if no voice sample is loaded)." | |
# Error related to gender parameter | |
elif "Gender must be 'male' or 'female' or None" in str(result): | |
# Needs translation | |
error_message += "Please select a valid gender (Male or Female) if not using 'Pick Voice (Gender optional)'." | |
else: # Generic error message | |
# Needs translation | |
error_message += "Please check your input and try again. See console for details." | |
self.status_label.setText(error_message) | |
# Print full error to console for debugging | |
print(f"Full Error Details:") | |
traceback.print_exc() # Print full traceback to console! | |
self.progress_bar.setValue(0) | |
else: | |
filename = f"output_{int(time.time())}.wav" | |
sf.write(filename, result, samplerate=16000) | |
self.current_audio_file = filename | |
# No translation needed for technical status | |
self.status_label.setText( | |
f"Generated in {elapsed:.1f}s | Saved to {filename}") | |
self.waveform.set_waveform(result) | |
self.progress_bar.setValue(0) | |
self.generate_btn.setEnabled(True) | |
self.update_generate_button_state() # Re-validate and set button state again | |
def on_position_changed(self, position): | |
if self.audio_player.duration() > 0: # Keep the duration check | |
progress = position / self.audio_player.duration() | |
self.update_waveform_playhead(progress) # Update waveform playhead | |
self.update_time_display() # Call update_time_display from here | |
def update_waveform_playhead(self, progress): # New function to update playhead | |
self.waveform.set_playhead_progress(progress) | |
def on_duration_changed(self, duration): | |
if duration > 0: | |
self.timer.start(200) | |
total_seconds = duration // 1000 | |
self.total_time.setText(f"{total_seconds//60:02}:{total_seconds % 60:02}") | |
self.audio_player_duration_ms = duration | |
self.waveform.set_audio_duration(duration) # NEW: Pass duration to WaveformWidget! | |
else: | |
self.total_time.setText("00:00") | |
self.audio_player_duration_ms = 0 | |
self.waveform.set_audio_duration(0) # Also reset in WaveformWidget | |
def seek_audio(self, position_ms): # New method to seek audio | |
self.audio_player.setPosition(position_ms) | |
if not self.audio_player.isPlaying(): # If not playing, start playing from seek position | |
self.play_audio() | |
def update_word_count(self): | |
"""Updates the word count dynamically as the user types.""" | |
text = self.text_input.toPlainText().strip() | |
word_count = len(text.split()) if text else 0 | |
# Keep word count in English - usually numbers are universal | |
self.word_count_label.setText(f"Word Count: {word_count}") | |
def validate_inputs(self): | |
""" | |
Validates if required inputs (voice sample OR gender) are provided. | |
Returns True if inputs are valid, False otherwise. | |
""" | |
if self.voice_sample is not None: | |
return True # Voice sample loaded, valid | |
elif self.gender_selector.currentIndex() != 0: # Not "Pick Voice (Gender optional)" | |
# Gender selected, valid (assuming not "Pick Voice...") | |
return True | |
else: | |
return False # Neither voice sample nor gender selected, invalid | |
def update_generate_button_state(self): | |
"""Updates the 'Generate Speech' button's enabled state based on input validity.""" | |
is_valid = self.validate_inputs() | |
self.generate_btn.setEnabled(is_valid) | |
if not is_valid: | |
self.status_label.setText( | |
"Load a voice sample or select gender to enable 'Generate Speech'.") # Needs translation | |
def on_gender_changed(self, index): | |
"""Handler for gender selector changes. Updates generate button state.""" | |
self.update_generate_button_state() | |
def reset_voice_sample(self): | |
"""Clears the loaded voice sample and restores gender selection.""" | |
self.voice_sample = None | |
# Needs translation - update status label | |
self.voice_label.setText("No voice sample loaded") | |
self.update_generate_button_state() # Update button state after reset | |
def select_voice_sample(self): | |
file_path, _ = QFileDialog.getOpenFileName( | |
# "Select Voice Sample" - Dialog title - OS dependent usually | |
self, "Select Voice Sample", "", "Audio Files (*.wav *.mp3)" | |
) | |
if file_path: | |
self.voice_sample = file_path | |
# Needs translation - update status label, but keep filename in English | |
self.voice_label.setText( | |
f"Loaded voice sample: {os.path.basename(file_path)}") | |
# Update button state after loading voice sample | |
self.update_generate_button_state() | |
else: | |
# Re-validate in case selection was cancelled | |
self.update_generate_button_state() | |
def save_audio(self): | |
if not (self.current_audio_file and os.path.exists(self.current_audio_file)): | |
self.status_label.setText("No audio to save!") # Needs translation | |
return | |
default_filename = f"SparkTTS_output_{int(time.time())}.wav" # Generate default filename | |
save_path, _ = QFileDialog.getSaveFileName( | |
# "Save Audio" - Dialog title - OS dependent usually | |
self, "Save Audio", default_filename, "WAV Files (*.wav)" # Added default filename here | |
) | |
if save_path: | |
shutil.copy(self.current_audio_file, save_path) | |
# No translation needed for technical status | |
self.status_label.setText( | |
f"Audio saved to: {os.path.basename(save_path)}") | |
def play_audio(self): | |
if self.current_audio_file and os.path.exists(self.current_audio_file): | |
if not self.audio_player.isPlaying(): | |
self.audio_player.setSource(self.current_audio_file) | |
self.audio_player.play() | |
self.play_btn.setText("Pause") | |
self.play_btn_play.setText("Pause") | |
else: | |
self.audio_player.pause() | |
self.play_btn.setText("Play") | |
self.play_btn_play.setText("Play") | |
elif self.audio_player.isPlaying(): | |
self.audio_player.pause() | |
self.play_btn.setText("Play") | |
self.play_btn_play.setText("Play") | |
def pause_audio(self): | |
if self.audio_player.isPlaying(): | |
self.audio_player.pause() | |
self.play_btn.setText("Play") | |
self.play_btn_play.setText("Play") | |
else: | |
self.play_audio() | |
def stop_audio(self): | |
self.audio_player.stop() | |
self.play_btn.setText("Play") | |
self.play_btn_play.setText("Play") | |
self.waveform.set_playback_progress_overlay(0.0) | |
self.waveform.set_playhead_progress(0.0) | |
self.current_time.setText("00:00") | |
def run_synthesis(self): | |
text = self.text_input.toPlainText().strip() | |
if not text: | |
self.status_label.setText( | |
"Please enter some text!") # Needs translation | |
return | |
if not self.validate_inputs(): # Double check validation before synthesis (optional, but good practice) | |
self.status_label.setText( | |
"Load a voice sample or select gender to generate speech.") # Needs translation | |
return | |
# Segmentation: Limit each segment to 150 words. | |
segmentation_threshold = 150 | |
words = text.split() | |
if len(words) > segmentation_threshold: | |
text_to_process = [ | |
' '.join(words[i:i + segmentation_threshold]) | |
for i in range(0, len(words), segmentation_threshold) | |
] | |
self.status_label.setText( | |
"Text too long: processing segments...") # Needs translation | |
# Setup progress bar for segments | |
self.progress_bar.setMaximum(len(text_to_process)) | |
self.progress_bar.setValue(0) | |
else: | |
text_to_process = text | |
self.progress_bar.setMaximum(1) # Single segment | |
self.progress_bar.setValue(0) | |
# Determine parameters based on whether a voice sample is loaded. | |
if self.voice_sample is not None: | |
prompt = self.voice_sample | |
gender = None | |
pitch = None | |
speed = None | |
else: | |
prompt = None | |
gender = self.gender_selector.currentText().lower() | |
# Corrected gender logic | |
gender = None if gender == "pick voice (gender optional)" else gender | |
speed = self.speed_slider.value() | |
pitch = self.pitch_slider.value() | |
# Disable again right before generation, just in case | |
self.generate_btn.setEnabled(False) | |
self.status_label.setText("Generating speech...") # Needs translation | |
self.worker = TTSWorker( | |
self.model, text_to_process, prompt, gender, pitch, speed) | |
self.worker.progress_update.connect(self.on_generation_progress) | |
self.worker.result_ready.connect(self.on_generation_complete) | |
self.worker.start() | |
def on_generation_progress(self, current, total): | |
# Needs translation - segment info | |
self.status_label.setText(f"Generating segment {current} / {total}...") | |
self.progress_bar.setValue(current) # Update progress bar | |
# AI GENERATED LANGUAGE TRANSLATIONS | |
translations = { # --- PASTE THE TRANSLATIONS DICTIONARY HERE --- | |
"English": { | |
"SparkTTS Studio": "SparkTTS Studio", | |
"enter_text": "Enter text for speech synthesis...", | |
"language": "Language:", | |
"word_count": "Word Count:", | |
"load_voice": "Load Voice Sample", | |
"reset_voice": "Reset Voice Sample", | |
"generate_speech": "Generate Speech", | |
"gender": "Gender:", | |
"auto": "Pick Voice (Gender optional)", # Renamed "Auto" | |
"male": "Male", | |
"female": "Female", | |
"pitch_label": "Pitch", | |
"speed_label": "Speed", | |
"play_button": "Play", | |
"stop": "Stop", | |
"save_audio": "Save Audio", | |
"model_cuda": "Model loaded on CUDA", | |
"pitch": "Pitch", | |
"speed": "Speed", | |
"text_input_group": "Text Input", | |
"synthesis_controls_group": "Synthesis Controls", | |
"audio_visualization_group": "Audio Visualization", | |
"voice_parameters_group": "Voice Parameters", | |
"no_voice_sample_loaded": "No voice sample loaded", | |
"volume_label": "Volume:", | |
"ready_status": "Ready", | |
"tray_show": "Show", | |
"tray_exit": "Exit", | |
"tray_message_title": "SparkTTS Studio", | |
"tray_message_text": "The application is running in the system tray", | |
"error_voice_sample_missing": "Please load a voice sample or select a gender (if no voice sample is loaded).", | |
"error_gender_invalid": "Please select a valid gender (Male or Female) if not using 'Pick Voice (Gender optional)'.", | |
"error_generic": "Please check your input and try again. See console for details.", | |
"status_generating_segment": "Generating segment {current} / {total}...", | |
"status_generating_speech": "Generating speech...", | |
"status_load_voice_sample_enable_generate": "Load a voice sample or select gender to enable 'Generate Speech'.", | |
"status_no_audio_to_save": "No audio to save!", | |
"status_please_enter_text": "Please enter some text!", | |
"status_text_too_long_segments": "Text too long: processing segments...", | |
"status_voice_sample_cleared": "Voice sample cleared.", | |
"status_loaded_voice_sample": "Loaded voice sample: {filename}", | |
"status_audio_saved_to": "Audio saved to: {filename}", | |
"play_button_play": "Play", | |
"play_button_pause": "Pause", | |
"pause_button": "Pause", | |
"stop_button": "Stop", | |
"generate_button": "Generate Speech", | |
"save_audio_button": "Save Audio", | |
"reset_voice_sample_status": "Voice sample cleared.", | |
"exit_button": "Exit", | |
}, | |
"Bulgarian": { | |
"SparkTTS Studio": "SparkTTS Студио", | |
"enter_text": "Въведете текст за синтез на реч...", | |
"language": "Език:", | |
"word_count": "Брой думи:", | |
"load_voice": "Зареди гласов файл", | |
"reset_voice": "Изчисти гласов файл", | |
"generate_speech": "Генерирай реч", | |
"gender": "Пол:", | |
"auto": "Избери глас", | |
"male": "Мъжски", | |
"female": "Женоски", | |
"pitch_label": "Височина на тона", | |
"speed_label": "Скорост", | |
"play": "Пусни", | |
"stop": "Спри", | |
"save_audio": "Запази аудио", | |
"model_cuda": "Моделът е зареден на CUDA", | |
"pitch": "Височина на тона", | |
"speed": "Скорост", | |
"text_input_group": "Въвеждане на текст", | |
"synthesis_controls_group": "Контрол на синтеза", | |
"audio_visualization_group": "Визуализация на аудио", | |
"voice_parameters_group": "Гласови параметри", | |
"no_voice_sample_loaded": "Не е зареден гласов файл", | |
"volume_label": "Сила на звука:", | |
"ready_status": "Готов", | |
"tray_show": "Покажи", | |
"tray_exit": "Изход", | |
"tray_message_title": "SparkTTS Studio", | |
"tray_message_text": "Приложението работи в системния трей", | |
"error_voice_sample_missing": "Моля, заредете гласов файл или изберете пол (ако не е зареден гласов файл).", | |
"error_gender_invalid": "Моля, изберете валиден пол (Мъж или Жена), ако не използвате 'Избери глас (Пол по избор)'.", | |
"error_generic": "Моля, проверете въведените данни и опитайте отново. Вижте конзолата за подробности.", | |
"status_generating_segment": "Генериране на сегмент {current} / {total}...", | |
"status_generating_speech": "Генериране на реч...", | |
"status_load_voice_sample_enable_generate": "Заредете гласов файл или изберете пол, за да активирате 'Генерирай реч'.", | |
"status_no_audio_to_save": "Няма аудио за запазване!", | |
"status_please_enter_text": "Моля, въведете текст!", | |
"status_text_too_long_segments": "Текстът е твърде дълъг: обработка на сегменти...", | |
"status_voice_sample_cleared": "Гласовият файл е изчистен.", | |
"status_loaded_voice_sample": "Зареден гласов файл: {filename}", | |
"status_audio_saved_to": "Аудиото е запазено в: {filename}", | |
"play_button_play": "Пусни", | |
"play_button_pause": "Пауза", | |
"stop_button": "Спри", | |
"generate_button": "Генерирай реч", | |
"save_audio_button": "Запази аудио", | |
"reset_voice_sample_status": "Гласовият файл е изчистен.", | |
"exit_button": "Затвори", # Added translation for Exit button | |
"pause_button": "Пауза", # Added translation for Pause button | |
}, | |
"Spanish": { | |
"SparkTTS Studio": "SparkTTS Studio", | |
"enter_text": "Introduzca texto para la síntesis de voz...", | |
"language": "Idioma:", | |
"word_count": "Recuento de palabras:", | |
"load_voice": "Cargar muestra de voz", | |
"reset_voice": "Restablecer muestra de voz", | |
"generate_speech": "Generar voz", | |
"gender": "Género:", | |
# Renamed "Auto" - Example translation, verify! | |
"auto": "Elegir voz (Género opcional)", | |
"male": "Masculino", | |
"female": "Femenino", | |
"pitch_label": "Tono", | |
"speed_label": "Velocidad", | |
"play": "Reproducir", | |
"stop": "Detener", | |
"save_audio": "Guardar audio", | |
"model_cuda": "Modelo cargado en CUDA", | |
"pitch": "Tono", | |
"speed": "Velocidad", | |
"text_input_group": "Entrada de texto", | |
"synthesis_controls_group": "Controles de síntesis", | |
"audio_visualization_group": "Visualización de audio", | |
"voice_parameters_group": "Parámetros de voz", | |
"no_voice_sample_loaded": "No se ha cargado ninguna muestra de voz", | |
"volume_label": "Volumen:", | |
"ready_status": "Listo", | |
"tray_show": "Mostrar", | |
"tray_exit": "Salir", | |
"tray_message_title": "SparkTTS Studio", | |
"tray_message_text": "La aplicación se está ejecutando en la bandeja del sistema", | |
"error_voice_sample_missing": "Por favor, cargue una muestra de voz o seleccione un género (si no se carga ninguna muestra de voz).", | |
"error_gender_invalid": "Por favor, seleccione un género válido (Masculino o Femenino) si no utiliza 'Elegir voz (Género opcional)'.", | |
"error_generic": "Por favor, revise su entrada e inténtelo de nuevo. Consulte la consola para obtener más detalles.", | |
"status_generating_segment": "Generando segmento {current} / {total}...", | |
"status_generating_speech": "Generando voz...", | |
"status_load_voice_sample_enable_generate": "Cargue una muestra de voz o seleccione un género para activar 'Generar voz'.", | |
"status_no_audio_to_save": "¡No hay audio para guardar!", | |
"status_please_enter_text": "¡Por favor, introduzca algún texto!", | |
"status_text_too_long_segments": "Texto demasiado largo: procesando segmentos...", | |
"status_voice_sample_cleared": "Muestra de voz borrada.", | |
"status_loaded_voice_sample": "Muestra de voz cargada: {filename}", | |
"status_audio_saved_to": "Audio guardado en: {filename}", | |
"play_button_play": "Reproducir", | |
"play_button_pause": "Pausa", | |
"stop_button": "Detener", | |
"generate_button": "Generar voz", | |
"save_audio_button": "Guardar audio", | |
"reset_voice_sample_status": "Muestra de voz borrada.", | |
"exit_button": "Salir", # Added translation for Exit button | |
"pause_button": "Pausa", # Added translation for Pause button | |
}, | |
"French": { | |
"SparkTTS Studio": "SparkTTS Studio", | |
"enter_text": "Entrez du texte pour la synthèse vocale...", | |
"language": "Langue:", | |
"word_count": "Nombre de mots:", | |
"load_voice": "Charger un échantillon de voix", | |
"reset_voice": "Réinitialiser l'échantillon vocal", | |
"generate_speech": "Générer la parole", | |
"gender": "Genre:", | |
# Renamed "Auto" - Example translation, verify! | |
"auto": "Choisir une voix (Genre optionnel)", | |
"male": "Masculin", | |
"female": "Féminin", | |
"pitch_label": "Hauteur", | |
"speed_label": "Vitesse", | |
"play": "Lecture", | |
"stop": "Arrêter", | |
"save_audio": "Enregistrer l'audio", | |
"model_cuda": "Modèle chargé sur CUDA", | |
"pitch": "Hauteur", | |
"speed": "Vitesse", | |
"text_input_group": "Saisie de texte", | |
"synthesis_controls_group": "Contrôles de synthèse", | |
"audio_visualization_group": "Visualisation audio", | |
"voice_parameters_group": "Paramètres vocaux", | |
"no_voice_sample_loaded": "Aucun échantillon vocal chargé", | |
"volume_label": "Volume:", | |
"ready_status": "Prêt", | |
"tray_show": "Afficher", | |
"tray_exit": "Quitter", | |
"tray_message_title": "SparkTTS Studio", | |
"tray_message_text": "L'application fonctionne dans la barre des tâches", | |
"error_voice_sample_missing": "Veuillez charger un échantillon vocal ou sélectionner un genre (si aucun échantillon vocal n'est chargé).", | |
"error_gender_invalid": "Veuillez sélectionner un genre valide (Masculin ou Féminin) si vous n'utilisez pas 'Choisir une voix (Genre optionnel)'.", | |
"error_generic": "Veuillez vérifier votre saisie et réessayer. Consultez la console pour plus de détails.", | |
"status_generating_segment": "Génération du segment {current} / {total}...", | |
"status_generating_speech": "Génération de la parole...", | |
"status_load_voice_sample_enable_generate": "Chargez un échantillon vocal ou sélectionnez un genre pour activer 'Générer la parole'.", | |
"status_no_audio_to_save": "Aucun audio à enregistrer !", | |
"status_please_enter_text": "Veuillez saisir du texte !", | |
"status_text_too_long_segments": "Texte trop long : traitement des segments...", | |
"status_voice_sample_cleared": "Échantillon vocal effacé.", | |
"status_loaded_voice_sample": "Échantillon vocal chargé : {filename}", | |
"status_audio_saved_to": "Audio enregistré dans : {filename}", | |
"play_button_play": "Lecture", | |
"play_button_pause": "Pause", | |
"stop_button": "Arrêter", | |
"generate_button": "Générer la parole", | |
"save_audio_button": "Enregistrer l'audio", | |
"reset_voice_sample_status": "Échantillon vocal effacé.", | |
"exit_button": "Quitter", # Added translation for Exit button | |
"pause_button": "Pause", # Added translation for Pause button | |
}, | |
"Japanese": { | |
"SparkTTS Studio": "SparkTTS Studio", | |
"enter_text": "音声合成のためのテキストを入力してください…", | |
"language": "言語:", | |
"word_count": "単語数:", | |
"load_voice": "音声サンプルを読み込む", | |
"reset_voice": "音声サンプルをリセット", | |
"generate_speech": "音声を生成", | |
"gender": "性別:", | |
# Renamed "Auto" - Example translation, verify! | |
"auto": "音声を選択 (性別はオプション)", | |
"male": "男性", | |
"female": "女性", | |
"pitch_label": "ピッチ", | |
"speed_label": "速度", | |
"play": "再生", | |
"stop": "停止", | |
"save_audio": "音声を保存", | |
"model_cuda": "CUDAでモデルが読み込まれました", | |
"pitch": "ピッチ", | |
"speed": "速度", | |
"text_input_group": "テキスト入力", | |
"synthesis_controls_group": "合成コントロール", | |
"audio_visualization_group": "オーディオ可視化", | |
"voice_parameters_group": "音声パラメータ", | |
"no_voice_sample_loaded": "音声サンプルはロードされていません", | |
"volume_label": "音量:", | |
"ready_status": "準備完了", | |
"tray_show": "表示", | |
"tray_exit": "終了", | |
"tray_message_title": "SparkTTS Studio", | |
"tray_message_text": "アプリケーションはシステムトレイで実行されています", | |
"error_voice_sample_missing": "音声サンプルをロードするか、性別を選択してください(音声サンプルがロードされていない場合)。", | |
"error_gender_invalid": "'音声を選択(性別はオプション)'を使用しない場合は、有効な性別(男性または女性)を選択してください。", | |
"error_generic": "入力内容を確認して、もう一度お試しください。詳細については、コンソールを参照してください。", | |
"status_generating_segment": "セグメント {current} / {total} を生成中...", | |
"status_generating_speech": "音声を生成中...", | |
"status_load_voice_sample_enable_generate": "音声サンプルをロードするか、性別を選択して「音声を生成」を有効にしてください。", | |
"status_no_audio_to_save": "保存するオーディオはありません!", | |
"status_please_enter_text": "テキストを入力してください!", | |
"status_text_too_long_segments": "テキストが長すぎます: セグメントを処理中...", | |
"status_voice_sample_cleared": "音声サンプルをクリアしました。", | |
"status_loaded_voice_sample": "音声サンプルをロードしました: {filename}", | |
"status_audio_saved_to": "オーディオを保存しました: {filename}", | |
"play_button_play": "再生", | |
"play_button_pause": "一時停止", | |
"stop_button": "停止", | |
"generate_button": "音声を生成", | |
"save_audio_button": "音声を保存", | |
"reset_voice_sample_status": "音声サンプルをクリアしました。", | |
"exit_button": "終了", # Added translation for Exit button | |
"pause_button": "一時停止", # Added translation for Pause button | |
} | |
} | |
def update_gui_language(self): | |
"""Updates the GUI labels based on the selected language.""" | |
# Get selected language, default to English | |
selected_lang = self.gui_language_selector.currentText() | |
t = self.translations.get(selected_lang, self.translations["English"]) | |
# Apply translations to UI elements | |
self.setWindowTitle(t["SparkTTS Studio"]) # Window title | |
self.language_label.setText(t["language"]) # "Language:" label | |
# "Text Input" group box | |
self.input_group.setTitle(t["text_input_group"]) | |
self.text_input.setPlaceholderText( | |
t["enter_text"]) # Text input placeholder | |
# "Synthesis Controls" group box | |
self.controls_group.setTitle(t["synthesis_controls_group"]) | |
self.voice_btn_load_voice.setText( | |
t["load_voice"]) # "Load Voice Sample" button | |
# "No voice sample loaded" label | |
self.voice_label_status.setText(t["no_voice_sample_loaded"]) | |
# "Voice Parameters" group box | |
self.gender_box.setTitle(t["voice_parameters_group"]) | |
self.gender_selector_label_widget.setText( | |
t["gender"]) # "Gender:" label | |
# Gender ComboBox items | |
for i, item_text in enumerate([t["auto"], t["male"], t["female"]]): | |
self.gender_selector.setItemText(i, item_text) | |
self.pitch_box.setTitle(t["pitch_label"]) # "Pitch" group box | |
self.speed_box.setTitle(t["speed_label"]) # "Speed" group box | |
# "Audio Visualization" group box | |
self.vis_group.setTitle(t["audio_visualization_group"]) | |
self.volume_label_widget.setText(t["volume_label"]) # "Volume:" label | |
# "Play" button (initial text) | |
self.play_btn_play.setText(t["play_button_play"]) | |
self.play_btn_play.setText("Play") # Ensure initial text is "Play" after language change too. | |
self.play_btn_stop.setText(t["stop_button"]) # "Stop" button | |
self.generate_btn_generate.setText( | |
t["generate_button"]) # "Generate Speech" button | |
self.save_btn_save.setText( | |
t["save_audio_button"]) # "Save Audio" button | |
# New Exit button text | |
self.exit_btn_main_window.setText(t["exit_button"]) # Set Exit button text | |
# Bottom status bar "Ready" text | |
self.status_label_bottom.setText(t["ready_status"]) | |
# Tray message - conditionally translate if it's always shown | |
# self.tray_icon.showMessage( | |
# t["tray_message_title"], | |
# t["tray_message_text"], | |
# QSystemTrayIcon.Information, | |
# 2000 | |
# ) | |
self.update_generate_button_state() # Update button state after language change | |
# --- TTSWorker class (no changes needed) --- | |
class TTSWorker(QThread): | |
# Emits (final result, generation_time) | |
result_ready = Signal(object, float) | |
# Emits (current_segment, total_segments) | |
progress_update = Signal(int, int) | |
def __init__(self, model, text, voice_sample, gender, pitch, speed): | |
""" | |
text: Either a string or a list of strings (segments). | |
""" | |
super().__init__() | |
self.model = model | |
self.text = text | |
self.voice_sample = voice_sample | |
self.gender = gender | |
self.pitch = pitch | |
self.speed = speed | |
def run(self): | |
start = time.time() | |
try: | |
results = [] | |
if isinstance(self.text, list): | |
total = len(self.text) | |
for i, segment in enumerate(self.text): | |
with torch.no_grad(): | |
wav = self.model.inference( | |
segment, | |
prompt_speech_path=self.voice_sample, | |
gender=self.gender, | |
pitch=self.pitch, | |
speed=self.speed | |
) | |
results.append(wav) | |
self.progress_update.emit(i + 1, total) | |
final_wav = np.concatenate(results, axis=0) | |
else: | |
with torch.no_grad(): | |
final_wav = self.model.inference( | |
self.text, | |
prompt_speech_path=self.voice_sample, | |
gender=self.gender, | |
pitch=self.pitch, | |
speed=self.speed | |
) | |
self.progress_update.emit(1, 1) | |
elapsed = time.time() - start | |
self.result_ready.emit(final_wav, elapsed) | |
except Exception as e: | |
self.result_ready.emit(e, 0) | |
if __name__ == "__main__": | |
app = QApplication(sys.argv) | |
app.setStyleSheet(STYLE_SHEET) # Apply stylesheet globally for the app | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
model = SparkTTS("pretrained_models/Spark-TTS-0.5B", device=device) | |
window = SparkTTSApp(model, device.type.upper()) | |
window.show() | |
sys.exit(app.exec()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment