Last active
March 6, 2025 22:17
-
-
Save paulsheldrake/f8f522789e41ea6b419f935aa6b8faea to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
namespace Drupal\deepgram\Plugin\AiProvider; | |
use Drupal\ai\Attribute\AiProvider; | |
use Drupal\ai\Base\AiProviderClientBase; | |
use Drupal\ai\Exception\AiBadRequestException; | |
use Drupal\ai\OperationType\GenericType\AudioFile; | |
use Drupal\ai\OperationType\SpeechToText\SpeechToTextInput; | |
use Drupal\ai\OperationType\SpeechToText\SpeechToTextInterface; | |
use Drupal\ai\OperationType\SpeechToText\SpeechToTextOutput; | |
use Drupal\ai\OperationType\TextToSpeech\TextToSpeechInput; | |
use Drupal\ai\OperationType\TextToSpeech\TextToSpeechInterface; | |
use Drupal\ai\OperationType\TextToSpeech\TextToSpeechOutput; | |
use Drupal\Core\Config\ImmutableConfig; | |
use Drupal\Core\File\FileExists; | |
use Drupal\Core\Plugin\ContainerFactoryPluginInterface; | |
use Drupal\Core\StringTranslation\TranslatableMarkup; | |
use Drupal\deepgram\Deepgram; | |
use Drupal\file\Entity\File; | |
use Symfony\Component\DependencyInjection\ContainerInterface; | |
use Symfony\Component\Yaml\Yaml; | |
/** | |
* Plugin implementation of the 'deepgram' provider. | |
*/ | |
#[AiProvider( | |
id: 'deepgram', | |
label: new TranslatableMarkup('Deepgram'), | |
)] | |
class DeepgramProvider extends AiProviderClientBase implements | |
ContainerFactoryPluginInterface, | |
SpeechToTextInterface, | |
TextToSpeechInterface | |
{ | |
/** | |
* The Deepgram Client. | |
* | |
* @var \Drupal\deepgram\Deepgram | |
*/ | |
protected $deepgramClient; | |
/** | |
* The entity type manager. | |
* | |
* @var \Drupal\Core\Entity\EntityTypeManagerInterface | |
*/ | |
protected $entityTypeManager; | |
/** | |
* The temporary files. | |
* | |
* @var array | |
*/ | |
protected $temporaryFiles = []; | |
/** | |
* Destructor. | |
*/ | |
public function __destruct() | |
{ | |
foreach ($this->temporaryFiles as $file) { | |
$file->delete(); | |
} | |
} | |
/** | |
* {@inheritdoc} | |
*/ | |
public static function create(ContainerInterface $container, array $configuration, $plugin_id, $plugin_definition): static | |
{ | |
$instance = parent::create($container, $configuration, $plugin_id, $plugin_definition); | |
$instance->deepgramClient = $container->get('deepgram.api'); | |
$instance->entityTypeManager = $container->get('entity_type.manager'); | |
return $instance; | |
} | |
/** | |
* {@inheritdoc} | |
*/ | |
public function getConfiguredModels(string $operation_type = NULL, array $capabilities = []): array | |
{ | |
$models = []; | |
if (is_null($operation_type) || $operation_type == 'speech_to_text') { | |
$models['nova-3'] = 'Nova 3'; | |
$models['nova-2'] = 'Nova 2'; | |
$models['nova'] = 'Nova'; | |
$models['base'] = 'Base'; | |
} | |
if (is_null($operation_type) || $operation_type == 'text_to_speech') { | |
$models['aura-asteria-en'] = 'Asteria English (US)'; | |
$models['aura-luna-en'] = 'Luna English (US)'; | |
$models['aura-stella-en'] = 'Stella English (US)'; | |
$models['aura-athena-en'] = 'Athena English (UK)'; | |
$models['aura-hera-en'] = 'Hera English (US)'; | |
$models['aura-orion-en'] = 'Orion English (US)'; | |
$models['aura-arcas-en'] = 'Arcas English (US)'; | |
$models['aura-perseus-en'] = 'Perseus English (US)'; | |
$models['aura-angus-en'] = 'Angus English (Ireland)'; | |
$models['aura-orpheus-en'] = 'Orpheus English (US)'; | |
$models['aura-helios-en'] = 'Hellos English (UK)'; | |
$models['aura-zeus-en'] = 'Zeus English (US)'; | |
} | |
return $models; | |
} | |
/** | |
* {@inheritdoc} | |
*/ | |
public function isUsable(string $operation_type = NULL, array $capabilities = []): bool | |
{ | |
// If its not configured, it is not usable. | |
if (!$this->getConfig()->get('api_key')) { | |
return FALSE; | |
} | |
// If its one of the bundles that Mistral supports its usable. | |
if ($operation_type) { | |
return in_array($operation_type, $this->getSupportedOperationTypes()); | |
} | |
return TRUE; | |
} | |
/** | |
* {@inheritdoc} | |
*/ | |
public function getSupportedOperationTypes(): array | |
{ | |
return [ | |
'speech_to_text', | |
'text_to_speech', | |
]; | |
} | |
/** | |
* {@inheritdoc} | |
*/ | |
public function getConfig(): ImmutableConfig | |
{ | |
return $this->configFactory->get('provider_deepgram.settings'); | |
} | |
/** | |
* {@inheritdoc} | |
*/ | |
public function getApiDefinition(): array | |
{ | |
// Load the configuration. | |
$definition = Yaml::parseFile($this->moduleHandler->getModule('deepgram')->getPath() . '/definitions/api_defaults.yml'); | |
return $definition; | |
} | |
/** | |
* {@inheritdoc} | |
*/ | |
public function getModelSettings(string $model_id, array $generalConfig = []): array | |
{ | |
// These are all booleans. | |
$model_options = [ | |
'smart_format' => [ | |
'label' => 'Smart Format', | |
'models' => ['nova-3', 'nova-2', 'nova', 'base'], | |
'description' => "Smart Format improves readability by applying additional formatting. When enabled, punctuation and paragraph breaks will be applied as well as formatting of other entities, such as dates, times, and numbers." | |
], | |
'punctuate' => [ | |
'label' => 'Punctuate', | |
'models' => ['nova-3', 'nova-2', 'nova', 'base'], | |
'description' => "Indicates whether to add punctuation and capitalization to the transcript." | |
], | |
'paragraphs' => [ | |
'label' => 'Paragraphs', | |
'models' => ['nova-3', 'nova-2', 'nova', 'base'], | |
'description' => "Indicates whether Deepgram will split audio into paragraphs to improve transcript readability. When paragraphs is set to true, punctuate will also be set to true." | |
], | |
'profanity_filter' => [ | |
'label' => 'Profanity Filter', | |
'models' => ['nova-3', 'nova-2', 'nova', 'base'], | |
'description' => "Indicates whether to remove profanity from the transcript." | |
], 'diarize' => [ | |
'label' => 'Diarization', | |
'models' => ['nova-3', 'nova-2', 'nova', 'base'], | |
'description' => "Indicates whether to recognize speaker changes." | |
], 'filler_words' => [ | |
'label' => 'Filler Words', | |
'models' => ['nova-3', 'nova-2', 'nova'], | |
'description' => 'Indicates whether Deepgram will transcribe disfluencies in your audio, like "uh" and "um".' | |
], | |
]; | |
foreach ($model_options as $model_option => $option_details) { | |
if (in_array($model_id, $option_details['models'])) { | |
$generalConfig[$model_option] = [ | |
'label' => $option_details['label'], | |
'description' => $option_details['description'], | |
'type' => 'checkbox', | |
'default' => 'false', | |
'required' => FALSE, | |
'constraints' => [ | |
'options' => [ | |
'true', | |
'false', | |
], | |
], | |
]; | |
} | |
} | |
return $generalConfig; | |
} | |
/** | |
* {@inheritdoc} | |
*/ | |
public function setAuthentication(mixed $authentication): void | |
{ | |
// Set the new API key and reset the client. | |
$this->deepgramClient->setApiKey($authentication); | |
} | |
/** | |
* {@inheritdoc} | |
*/ | |
public function speechToText(string|SpeechToTextInput $input, string $model_id, array $tags = []): SpeechToTextOutput | |
{ | |
// Normalize the input if needed. | |
$audio_input = ""; | |
if ($input instanceof SpeechToTextInput) { | |
$audio_input = $this->generateTemporaryFile($input->getBinary(), 'tmp.mp3'); | |
} else { | |
// Otherwise extract raw input to parts. | |
$audio_input = $this->generateTemporaryFile($input, 'tmp.mp3'); | |
} | |
// Start transcribing. | |
$configuration = $this->configuration + [ | |
'model' => $model_id, | |
]; | |
$response = $this->deepgramClient->transcribe($audio_input, $configuration); | |
if (!isset($response['results']['channels'][0]['alternatives'][0]['transcript'])) { | |
throw new AiBadRequestException('No transcription found'); | |
} | |
$result = $response['results']['channels'][0]['alternatives'][0]['transcript']; | |
if (isset($response['results']['channels'][0]['alternatives'][0]['paragraphs'])) { | |
$result = nl2br($response['results']['channels'][0]['alternatives'][0]['paragraphs']['transcript']); | |
} | |
return new SpeechToTextOutput( | |
$result, | |
$response, | |
$response['metadata'] | |
); | |
} | |
/** | |
* {@inheritdoc} | |
*/ | |
public function textToSpeech(string|TextToSpeechInput $input, string $model_id, array $tags = []): TextToSpeechOutput | |
{ | |
// Normalize the input. | |
$text = $input; | |
if ($input instanceof TextToSpeechInput) { | |
$text = $input->getText(); | |
} | |
// Start generating a file. | |
$configuration = $this->configuration + [ | |
'model' => $model_id, | |
]; | |
$response = $this->deepgramClient->textToSpeech($text, $configuration); | |
if (empty($response)) { | |
throw new AiBadRequestException('No audio found'); | |
} | |
$output = new AudioFile($response, 'audio/mpeg', 'deepgram.mp3'); | |
return new TextToSpeechOutput([$output], $response, []); | |
} | |
/** | |
* Generate a temporary file. | |
* | |
* @param string $binary | |
* The binary. | |
* @param string $filename | |
* The filename. | |
* | |
* @return \Drupal\file\Entity\File | |
* The file. | |
*/ | |
protected function generateTemporaryFile(string $binary, string $filename): File | |
{ | |
$tmp_path = $this->fileSystem->getTempDirectory() . '/' . $filename; | |
$path = $this->fileSystem->saveData($binary, $tmp_path, FileExists::Replace); | |
$fileStorage = $this->entityTypeManager->getStorage('file'); | |
$file = $fileStorage->create([ | |
'uri' => $path, | |
'filename' => $filename, | |
'status' => 0, | |
]); | |
$file->save(); | |
// Set the file for desctruction when finished. | |
$this->temporaryFiles[] = $file; | |
return $file; | |
} | |
/** | |
* Gets the raw client. | |
* | |
* This is the client for inference. | |
* | |
* @return \Drupal\deepgram\Deepgram | |
* The Deepgram Client. | |
*/ | |
public function getClient(): Deepgram | |
{ | |
return $this->deepgramClient; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment