Skip to content

Instantly share code, notes, and snippets.

@paulsheldrake
Last active March 6, 2025 22:17
Show Gist options
  • Save paulsheldrake/f8f522789e41ea6b419f935aa6b8faea to your computer and use it in GitHub Desktop.
Save paulsheldrake/f8f522789e41ea6b419f935aa6b8faea to your computer and use it in GitHub Desktop.
<?php
namespace Drupal\deepgram\Plugin\AiProvider;
use Drupal\ai\Attribute\AiProvider;
use Drupal\ai\Base\AiProviderClientBase;
use Drupal\ai\Exception\AiBadRequestException;
use Drupal\ai\OperationType\GenericType\AudioFile;
use Drupal\ai\OperationType\SpeechToText\SpeechToTextInput;
use Drupal\ai\OperationType\SpeechToText\SpeechToTextInterface;
use Drupal\ai\OperationType\SpeechToText\SpeechToTextOutput;
use Drupal\ai\OperationType\TextToSpeech\TextToSpeechInput;
use Drupal\ai\OperationType\TextToSpeech\TextToSpeechInterface;
use Drupal\ai\OperationType\TextToSpeech\TextToSpeechOutput;
use Drupal\Core\Config\ImmutableConfig;
use Drupal\Core\File\FileExists;
use Drupal\Core\Plugin\ContainerFactoryPluginInterface;
use Drupal\Core\StringTranslation\TranslatableMarkup;
use Drupal\deepgram\Deepgram;
use Drupal\file\Entity\File;
use Symfony\Component\DependencyInjection\ContainerInterface;
use Symfony\Component\Yaml\Yaml;
/**
* Plugin implementation of the 'deepgram' provider.
*/
#[AiProvider(
id: 'deepgram',
label: new TranslatableMarkup('Deepgram'),
)]
class DeepgramProvider extends AiProviderClientBase implements
ContainerFactoryPluginInterface,
SpeechToTextInterface,
TextToSpeechInterface
{
/**
* The Deepgram Client.
*
* @var \Drupal\deepgram\Deepgram
*/
protected $deepgramClient;
/**
* The entity type manager.
*
* @var \Drupal\Core\Entity\EntityTypeManagerInterface
*/
protected $entityTypeManager;
/**
* The temporary files.
*
* @var array
*/
protected $temporaryFiles = [];
/**
* Destructor.
*/
public function __destruct()
{
foreach ($this->temporaryFiles as $file) {
$file->delete();
}
}
/**
* {@inheritdoc}
*/
public static function create(ContainerInterface $container, array $configuration, $plugin_id, $plugin_definition): static
{
$instance = parent::create($container, $configuration, $plugin_id, $plugin_definition);
$instance->deepgramClient = $container->get('deepgram.api');
$instance->entityTypeManager = $container->get('entity_type.manager');
return $instance;
}
/**
* {@inheritdoc}
*/
public function getConfiguredModels(string $operation_type = NULL, array $capabilities = []): array
{
$models = [];
if (is_null($operation_type) || $operation_type == 'speech_to_text') {
$models['nova-3'] = 'Nova 3';
$models['nova-2'] = 'Nova 2';
$models['nova'] = 'Nova';
$models['base'] = 'Base';
}
if (is_null($operation_type) || $operation_type == 'text_to_speech') {
$models['aura-asteria-en'] = 'Asteria English (US)';
$models['aura-luna-en'] = 'Luna English (US)';
$models['aura-stella-en'] = 'Stella English (US)';
$models['aura-athena-en'] = 'Athena English (UK)';
$models['aura-hera-en'] = 'Hera English (US)';
$models['aura-orion-en'] = 'Orion English (US)';
$models['aura-arcas-en'] = 'Arcas English (US)';
$models['aura-perseus-en'] = 'Perseus English (US)';
$models['aura-angus-en'] = 'Angus English (Ireland)';
$models['aura-orpheus-en'] = 'Orpheus English (US)';
$models['aura-helios-en'] = 'Hellos English (UK)';
$models['aura-zeus-en'] = 'Zeus English (US)';
}
return $models;
}
/**
* {@inheritdoc}
*/
public function isUsable(string $operation_type = NULL, array $capabilities = []): bool
{
// If its not configured, it is not usable.
if (!$this->getConfig()->get('api_key')) {
return FALSE;
}
// If its one of the bundles that Mistral supports its usable.
if ($operation_type) {
return in_array($operation_type, $this->getSupportedOperationTypes());
}
return TRUE;
}
/**
* {@inheritdoc}
*/
public function getSupportedOperationTypes(): array
{
return [
'speech_to_text',
'text_to_speech',
];
}
/**
* {@inheritdoc}
*/
public function getConfig(): ImmutableConfig
{
return $this->configFactory->get('provider_deepgram.settings');
}
/**
* {@inheritdoc}
*/
public function getApiDefinition(): array
{
// Load the configuration.
$definition = Yaml::parseFile($this->moduleHandler->getModule('deepgram')->getPath() . '/definitions/api_defaults.yml');
return $definition;
}
/**
* {@inheritdoc}
*/
public function getModelSettings(string $model_id, array $generalConfig = []): array
{
// These are all booleans.
$model_options = [
'smart_format' => [
'label' => 'Smart Format',
'models' => ['nova-3', 'nova-2', 'nova', 'base'],
'description' => "Smart Format improves readability by applying additional formatting. When enabled, punctuation and paragraph breaks will be applied as well as formatting of other entities, such as dates, times, and numbers."
],
'punctuate' => [
'label' => 'Punctuate',
'models' => ['nova-3', 'nova-2', 'nova', 'base'],
'description' => "Indicates whether to add punctuation and capitalization to the transcript."
],
'paragraphs' => [
'label' => 'Paragraphs',
'models' => ['nova-3', 'nova-2', 'nova', 'base'],
'description' => "Indicates whether Deepgram will split audio into paragraphs to improve transcript readability. When paragraphs is set to true, punctuate will also be set to true."
],
'profanity_filter' => [
'label' => 'Profanity Filter',
'models' => ['nova-3', 'nova-2', 'nova', 'base'],
'description' => "Indicates whether to remove profanity from the transcript."
], 'diarize' => [
'label' => 'Diarization',
'models' => ['nova-3', 'nova-2', 'nova', 'base'],
'description' => "Indicates whether to recognize speaker changes."
], 'filler_words' => [
'label' => 'Filler Words',
'models' => ['nova-3', 'nova-2', 'nova'],
'description' => 'Indicates whether Deepgram will transcribe disfluencies in your audio, like "uh" and "um".'
],
];
foreach ($model_options as $model_option => $option_details) {
if (in_array($model_id, $option_details['models'])) {
$generalConfig[$model_option] = [
'label' => $option_details['label'],
'description' => $option_details['description'],
'type' => 'checkbox',
'default' => 'false',
'required' => FALSE,
'constraints' => [
'options' => [
'true',
'false',
],
],
];
}
}
return $generalConfig;
}
/**
* {@inheritdoc}
*/
public function setAuthentication(mixed $authentication): void
{
// Set the new API key and reset the client.
$this->deepgramClient->setApiKey($authentication);
}
/**
* {@inheritdoc}
*/
public function speechToText(string|SpeechToTextInput $input, string $model_id, array $tags = []): SpeechToTextOutput
{
// Normalize the input if needed.
$audio_input = "";
if ($input instanceof SpeechToTextInput) {
$audio_input = $this->generateTemporaryFile($input->getBinary(), 'tmp.mp3');
} else {
// Otherwise extract raw input to parts.
$audio_input = $this->generateTemporaryFile($input, 'tmp.mp3');
}
// Start transcribing.
$configuration = $this->configuration + [
'model' => $model_id,
];
$response = $this->deepgramClient->transcribe($audio_input, $configuration);
if (!isset($response['results']['channels'][0]['alternatives'][0]['transcript'])) {
throw new AiBadRequestException('No transcription found');
}
$result = $response['results']['channels'][0]['alternatives'][0]['transcript'];
if (isset($response['results']['channels'][0]['alternatives'][0]['paragraphs'])) {
$result = nl2br($response['results']['channels'][0]['alternatives'][0]['paragraphs']['transcript']);
}
return new SpeechToTextOutput(
$result,
$response,
$response['metadata']
);
}
/**
* {@inheritdoc}
*/
public function textToSpeech(string|TextToSpeechInput $input, string $model_id, array $tags = []): TextToSpeechOutput
{
// Normalize the input.
$text = $input;
if ($input instanceof TextToSpeechInput) {
$text = $input->getText();
}
// Start generating a file.
$configuration = $this->configuration + [
'model' => $model_id,
];
$response = $this->deepgramClient->textToSpeech($text, $configuration);
if (empty($response)) {
throw new AiBadRequestException('No audio found');
}
$output = new AudioFile($response, 'audio/mpeg', 'deepgram.mp3');
return new TextToSpeechOutput([$output], $response, []);
}
/**
* Generate a temporary file.
*
* @param string $binary
* The binary.
* @param string $filename
* The filename.
*
* @return \Drupal\file\Entity\File
* The file.
*/
protected function generateTemporaryFile(string $binary, string $filename): File
{
$tmp_path = $this->fileSystem->getTempDirectory() . '/' . $filename;
$path = $this->fileSystem->saveData($binary, $tmp_path, FileExists::Replace);
$fileStorage = $this->entityTypeManager->getStorage('file');
$file = $fileStorage->create([
'uri' => $path,
'filename' => $filename,
'status' => 0,
]);
$file->save();
// Set the file for desctruction when finished.
$this->temporaryFiles[] = $file;
return $file;
}
/**
* Gets the raw client.
*
* This is the client for inference.
*
* @return \Drupal\deepgram\Deepgram
* The Deepgram Client.
*/
public function getClient(): Deepgram
{
return $this->deepgramClient;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment