paulsheldrake · March 6, 2025 22:17
diff --git a/DeepgramProvider.php b/DeepgramProvider.php
 <?php

 namespace Drupal\deepgram\Plugin\AiProvider;

 use Drupal\ai\Attribute\AiProvider;
 use Drupal\ai\Base\AiProviderClientBase;
 use Drupal\ai\Exception\AiBadRequestException;
 use Drupal\ai\OperationType\GenericType\AudioFile;
 use Drupal\ai\OperationType\SpeechToText\SpeechToTextInput;
 use Drupal\ai\OperationType\SpeechToText\SpeechToTextInterface;
 use Drupal\ai\OperationType\SpeechToText\SpeechToTextOutput;
 use Drupal\ai\OperationType\TextToSpeech\TextToSpeechInput;
 use Drupal\ai\OperationType\TextToSpeech\TextToSpeechInterface;
 use Drupal\ai\OperationType\TextToSpeech\TextToSpeechOutput;
 use Drupal\Core\Config\ImmutableConfig;
 use Drupal\Core\File\FileExists;
 use Drupal\Core\Plugin\ContainerFactoryPluginInterface;
 use Drupal\Core\StringTranslation\TranslatableMarkup;
 use Drupal\deepgram\Deepgram;
 use Drupal\file\Entity\File;
 use Symfony\Component\DependencyInjection\ContainerInterface;
 use Symfony\Component\Yaml\Yaml;

 /**
 * Plugin implementation of the 'deepgram' provider.
 */
 #[AiProvider(
  id: 'deepgram',
  label: new TranslatableMarkup('Deepgram'),
 )]
 class DeepgramProvider extends AiProviderClientBase implements
  ContainerFactoryPluginInterface,
  SpeechToTextInterface,
  TextToSpeechInterface
 {

  /**
   * The Deepgram Client.
   *
   * @var \Drupal\deepgram\Deepgram
   */
  protected $deepgramClient;

  /**
   * The entity type manager.
   *
   * @var \Drupal\Core\Entity\EntityTypeManagerInterface
   */
  protected $entityTypeManager;

  /**
   * The temporary files.
   *
   * @var array
   */
  protected $temporaryFiles = [];

  /**
   * Destructor.
   */
  public function __destruct()
  {
    foreach ($this->temporaryFiles as $file) {
      $file->delete();
    }
  }

  /**
   * {@inheritdoc}
   */
  public static function create(ContainerInterface $container, array $configuration, $plugin_id, $plugin_definition): static
  {
    $instance = parent::create($container, $configuration, $plugin_id, $plugin_definition);
    $instance->deepgramClient = $container->get('deepgram.api');
    $instance->entityTypeManager = $container->get('entity_type.manager');
    return $instance;
  }

  /**
   * {@inheritdoc}
   */
  public function getConfiguredModels(string $operation_type = NULL, array $capabilities = []): array
  {
    $models = [];
    if (is_null($operation_type) || $operation_type == 'speech_to_text') {
      $models['nova-3'] = 'Nova 3';
      $models['nova-2'] = 'Nova 2';
      $models['nova'] = 'Nova';
      $models['base'] = 'Base';
    }

    if (is_null($operation_type) || $operation_type == 'text_to_speech') {
      $models['aura-asteria-en'] = 'Asteria English (US)';
      $models['aura-luna-en'] = 'Luna English (US)';
      $models['aura-stella-en'] = 'Stella English (US)';
      $models['aura-athena-en'] = 'Athena English (UK)';
      $models['aura-hera-en'] = 'Hera English (US)';
      $models['aura-orion-en'] = 'Orion English (US)';
      $models['aura-arcas-en'] = 'Arcas English (US)';
      $models['aura-perseus-en'] = 'Perseus English (US)';
      $models['aura-angus-en'] = 'Angus English (Ireland)';
      $models['aura-orpheus-en'] = 'Orpheus English (US)';
      $models['aura-helios-en'] = 'Hellos English (UK)';
      $models['aura-zeus-en'] = 'Zeus English (US)';
    }

    return $models;
  }

  /**
   * {@inheritdoc}
   */
  public function isUsable(string $operation_type = NULL, array $capabilities = []): bool
  {
    // If its not configured, it is not usable.
    if (!$this->getConfig()->get('api_key')) {
      return FALSE;
    }
    // If its one of the bundles that Mistral supports its usable.
    if ($operation_type) {
      return in_array($operation_type, $this->getSupportedOperationTypes());
    }
    return TRUE;
  }

  /**
   * {@inheritdoc}
   */
  public function getSupportedOperationTypes(): array
  {
    return [
      'speech_to_text',
      'text_to_speech',
    ];
  }

  /**
   * {@inheritdoc}
   */
  public function getConfig(): ImmutableConfig
  {
    return $this->configFactory->get('provider_deepgram.settings');
  }

  /**
   * {@inheritdoc}
   */
  public function getApiDefinition(): array
  {
    // Load the configuration.
    $definition = Yaml::parseFile($this->moduleHandler->getModule('deepgram')->getPath() . '/definitions/api_defaults.yml');
    return $definition;
  }

  /**
   * {@inheritdoc}
   */
  public function getModelSettings(string $model_id, array $generalConfig = []): array
  {
    // These are all booleans.
    $model_options = [
      'smart_format' => [
        'label' => 'Smart Format',
        'models' => ['nova-3', 'nova-2', 'nova', 'base'],
        'description' => "Smart Format improves readability by applying additional formatting. When enabled, punctuation and paragraph breaks will be applied as well as formatting of other entities, such as dates, times, and numbers."
      ],
      'punctuate' => [
        'label' => 'Punctuate',
        'models' => ['nova-3', 'nova-2', 'nova', 'base'],
        'description' => "Indicates whether to add punctuation and capitalization to the transcript."
      ],
      'paragraphs' => [
        'label' => 'Paragraphs',
        'models' => ['nova-3', 'nova-2', 'nova', 'base'],
        'description' => "Indicates whether Deepgram will split audio into paragraphs to improve transcript readability. When paragraphs is set to true, punctuate will also be set to true."
      ],
      'profanity_filter' => [
        'label' => 'Profanity Filter',
        'models' => ['nova-3', 'nova-2', 'nova', 'base'],
        'description' => "Indicates whether to remove profanity from the transcript."
      ], 'diarize' => [
        'label' => 'Diarization',
        'models' => ['nova-3', 'nova-2', 'nova', 'base'],
        'description' => "Indicates whether to recognize speaker changes."
      ], 'filler_words' => [
        'label' => 'Filler Words',
        'models' => ['nova-3', 'nova-2', 'nova'],
        'description' => 'Indicates whether Deepgram will transcribe disfluencies in your audio, like "uh" and "um".'
      ],


    ];

    foreach ($model_options as $model_option => $option_details) {
      if (in_array($model_id, $option_details['models'])) {
        $generalConfig[$model_option] = [
          'label' => $option_details['label'],
          'description' => $option_details['description'],
          'type' => 'checkbox',
          'default' => 'false',
          'required' => FALSE,
          'constraints' => [
            'options' => [
              'true',
              'false',
            ],
          ],
        ];
      }
    }
    return $generalConfig;
  }

  /**
   * {@inheritdoc}
   */
  public function setAuthentication(mixed $authentication): void
  {
    // Set the new API key and reset the client.
    $this->deepgramClient->setApiKey($authentication);
  }

  /**
   * {@inheritdoc}
   */
  public function speechToText(string|SpeechToTextInput $input, string $model_id, array $tags = []): SpeechToTextOutput
  {
    // Normalize the input if needed.
    $audio_input = "";
    if ($input instanceof SpeechToTextInput) {
      $audio_input = $this->generateTemporaryFile($input->getBinary(), 'tmp.mp3');
    } else {
      // Otherwise extract raw input to parts.
      $audio_input = $this->generateTemporaryFile($input, 'tmp.mp3');
    }
    // Start transcribing.
    $configuration = $this->configuration + [
        'model' => $model_id,
      ];
    $response = $this->deepgramClient->transcribe($audio_input, $configuration);
    if (!isset($response['results']['channels'][0]['alternatives'][0]['transcript'])) {
      throw new AiBadRequestException('No transcription found');
    }

    $result = $response['results']['channels'][0]['alternatives'][0]['transcript'];
    if (isset($response['results']['channels'][0]['alternatives'][0]['paragraphs'])) {
      $result = nl2br($response['results']['channels'][0]['alternatives'][0]['paragraphs']['transcript']);
    }
    
    return new SpeechToTextOutput(
      $result,
      $response,
      $response['metadata']
    );
  }

  /**
   * {@inheritdoc}
   */
  public function textToSpeech(string|TextToSpeechInput $input, string $model_id, array $tags = []): TextToSpeechOutput
  {
    // Normalize the input.
    $text = $input;
    if ($input instanceof TextToSpeechInput) {
      $text = $input->getText();
    }
    // Start generating a file.
    $configuration = $this->configuration + [
        'model' => $model_id,
      ];
    $response = $this->deepgramClient->textToSpeech($text, $configuration);
    if (empty($response)) {
      throw new AiBadRequestException('No audio found');
    }
    $output = new AudioFile($response, 'audio/mpeg', 'deepgram.mp3');
    return new TextToSpeechOutput([$output], $response, []);
  }

  /**
   * Generate a temporary file.
   *
   * @param string $binary
   *   The binary.
   * @param string $filename
   *   The filename.
   *
   * @return \Drupal\file\Entity\File
   *   The file.
   */
  protected function generateTemporaryFile(string $binary, string $filename): File
  {
    $tmp_path = $this->fileSystem->getTempDirectory() . '/' . $filename;
    $path = $this->fileSystem->saveData($binary, $tmp_path, FileExists::Replace);
    $fileStorage = $this->entityTypeManager->getStorage('file');
    $file = $fileStorage->create([
      'uri' => $path,
      'filename' => $filename,
      'status' => 0,
    ]);
    $file->save();
    // Set the file for desctruction when finished.
    $this->temporaryFiles[] = $file;
    return $file;
  }

  /**
   * Gets the raw client.
   *
   * This is the client for inference.
   *
   * @return \Drupal\deepgram\Deepgram
   *   The Deepgram Client.
   */
  public function getClient(): Deepgram
  {
    return $this->deepgramClient;
  }

 }
	<?php

	namespace Drupal\deepgram\Plugin\AiProvider;

	use Drupal\ai\Attribute\AiProvider;
	use Drupal\ai\Base\AiProviderClientBase;
	use Drupal\ai\Exception\AiBadRequestException;
	use Drupal\ai\OperationType\GenericType\AudioFile;
	use Drupal\ai\OperationType\SpeechToText\SpeechToTextInput;
	use Drupal\ai\OperationType\SpeechToText\SpeechToTextInterface;
	use Drupal\ai\OperationType\SpeechToText\SpeechToTextOutput;
	use Drupal\ai\OperationType\TextToSpeech\TextToSpeechInput;
	use Drupal\ai\OperationType\TextToSpeech\TextToSpeechInterface;
	use Drupal\ai\OperationType\TextToSpeech\TextToSpeechOutput;
	use Drupal\Core\Config\ImmutableConfig;
	use Drupal\Core\File\FileExists;
	use Drupal\Core\Plugin\ContainerFactoryPluginInterface;
	use Drupal\Core\StringTranslation\TranslatableMarkup;
	use Drupal\deepgram\Deepgram;
	use Drupal\file\Entity\File;
	use Symfony\Component\DependencyInjection\ContainerInterface;
	use Symfony\Component\Yaml\Yaml;

	/**
	* Plugin implementation of the 'deepgram' provider.
	*/
	#[AiProvider(
	id: 'deepgram',
	label: new TranslatableMarkup('Deepgram'),
	)]
	class DeepgramProvider extends AiProviderClientBase implements
	ContainerFactoryPluginInterface,
	SpeechToTextInterface,
	TextToSpeechInterface
	{

	/**
	* The Deepgram Client.
	*
	* @var \Drupal\deepgram\Deepgram
	*/
	protected $deepgramClient;

	/**
	* The entity type manager.
	*
	* @var \Drupal\Core\Entity\EntityTypeManagerInterface
	*/
	protected $entityTypeManager;

	/**
	* The temporary files.
	*
	* @var array
	*/
	protected $temporaryFiles = [];

	/**
	* Destructor.
	*/
	public function __destruct()
	{
	foreach ($this->temporaryFiles as $file) {
	$file->delete();
	}
	}

	/**
	* {@inheritdoc}
	*/
	public static function create(ContainerInterface $container, array $configuration, $plugin_id, $plugin_definition): static
	{
	$instance = parent::create($container, $configuration, $plugin_id, $plugin_definition);
	$instance->deepgramClient = $container->get('deepgram.api');
	$instance->entityTypeManager = $container->get('entity_type.manager');
	return $instance;
	}

	/**
	* {@inheritdoc}
	*/
	public function getConfiguredModels(string $operation_type = NULL, array $capabilities = []): array
	{
	$models = [];
	if (is_null($operation_type) \|\| $operation_type == 'speech_to_text') {
	$models['nova-3'] = 'Nova 3';
	$models['nova-2'] = 'Nova 2';
	$models['nova'] = 'Nova';
	$models['base'] = 'Base';
	}

	if (is_null($operation_type) \|\| $operation_type == 'text_to_speech') {
	$models['aura-asteria-en'] = 'Asteria English (US)';
	$models['aura-luna-en'] = 'Luna English (US)';
	$models['aura-stella-en'] = 'Stella English (US)';
	$models['aura-athena-en'] = 'Athena English (UK)';
	$models['aura-hera-en'] = 'Hera English (US)';
	$models['aura-orion-en'] = 'Orion English (US)';
	$models['aura-arcas-en'] = 'Arcas English (US)';
	$models['aura-perseus-en'] = 'Perseus English (US)';
	$models['aura-angus-en'] = 'Angus English (Ireland)';
	$models['aura-orpheus-en'] = 'Orpheus English (US)';
	$models['aura-helios-en'] = 'Hellos English (UK)';
	$models['aura-zeus-en'] = 'Zeus English (US)';
	}

	return $models;
	}

	/**
	* {@inheritdoc}
	*/
	public function isUsable(string $operation_type = NULL, array $capabilities = []): bool
	{
	// If its not configured, it is not usable.
	if (!$this->getConfig()->get('api_key')) {
	return FALSE;
	}
	// If its one of the bundles that Mistral supports its usable.
	if ($operation_type) {
	return in_array($operation_type, $this->getSupportedOperationTypes());
	}
	return TRUE;
	}

	/**
	* {@inheritdoc}
	*/
	public function getSupportedOperationTypes(): array
	{
	return [
	'speech_to_text',
	'text_to_speech',
	];
	}

	/**
	* {@inheritdoc}
	*/
	public function getConfig(): ImmutableConfig
	{
	return $this->configFactory->get('provider_deepgram.settings');
	}

	/**
	* {@inheritdoc}
	*/
	public function getApiDefinition(): array
	{
	// Load the configuration.
	$definition = Yaml::parseFile($this->moduleHandler->getModule('deepgram')->getPath() . '/definitions/api_defaults.yml');
	return $definition;
	}

	/**
	* {@inheritdoc}
	*/
	public function getModelSettings(string $model_id, array $generalConfig = []): array
	{
	// These are all booleans.
	$model_options = [
	'smart_format' => [
	'label' => 'Smart Format',
	'models' => ['nova-3', 'nova-2', 'nova', 'base'],
	'description' => "Smart Format improves readability by applying additional formatting. When enabled, punctuation and paragraph breaks will be applied as well as formatting of other entities, such as dates, times, and numbers."
	],
	'punctuate' => [
	'label' => 'Punctuate',
	'models' => ['nova-3', 'nova-2', 'nova', 'base'],
	'description' => "Indicates whether to add punctuation and capitalization to the transcript."
	],
	'paragraphs' => [
	'label' => 'Paragraphs',
	'models' => ['nova-3', 'nova-2', 'nova', 'base'],
	'description' => "Indicates whether Deepgram will split audio into paragraphs to improve transcript readability. When paragraphs is set to true, punctuate will also be set to true."
	],
	'profanity_filter' => [
	'label' => 'Profanity Filter',
	'models' => ['nova-3', 'nova-2', 'nova', 'base'],
	'description' => "Indicates whether to remove profanity from the transcript."
	], 'diarize' => [
	'label' => 'Diarization',
	'models' => ['nova-3', 'nova-2', 'nova', 'base'],
	'description' => "Indicates whether to recognize speaker changes."
	], 'filler_words' => [
	'label' => 'Filler Words',
	'models' => ['nova-3', 'nova-2', 'nova'],
	'description' => 'Indicates whether Deepgram will transcribe disfluencies in your audio, like "uh" and "um".'
	],


	];

	foreach ($model_options as $model_option => $option_details) {
	if (in_array($model_id, $option_details['models'])) {
	$generalConfig[$model_option] = [
	'label' => $option_details['label'],
	'description' => $option_details['description'],
	'type' => 'checkbox',
	'default' => 'false',
	'required' => FALSE,
	'constraints' => [
	'options' => [
	'true',
	'false',
	],
	],
	];
	}
	}
	return $generalConfig;
	}

	/**
	* {@inheritdoc}
	*/
	public function setAuthentication(mixed $authentication): void
	{
	// Set the new API key and reset the client.
	$this->deepgramClient->setApiKey($authentication);
	}

	/**
	* {@inheritdoc}
	*/
	public function speechToText(string\|SpeechToTextInput $input, string $model_id, array $tags = []): SpeechToTextOutput
	{
	// Normalize the input if needed.
	$audio_input = "";
	if ($input instanceof SpeechToTextInput) {
	$audio_input = $this->generateTemporaryFile($input->getBinary(), 'tmp.mp3');
	} else {
	// Otherwise extract raw input to parts.
	$audio_input = $this->generateTemporaryFile($input, 'tmp.mp3');
	}
	// Start transcribing.
	$configuration = $this->configuration + [
	'model' => $model_id,
	];
	$response = $this->deepgramClient->transcribe($audio_input, $configuration);
	if (!isset($response['results']['channels'][0]['alternatives'][0]['transcript'])) {
	throw new AiBadRequestException('No transcription found');
	}

	$result = $response['results']['channels'][0]['alternatives'][0]['transcript'];
	if (isset($response['results']['channels'][0]['alternatives'][0]['paragraphs'])) {
	$result = nl2br($response['results']['channels'][0]['alternatives'][0]['paragraphs']['transcript']);
	}

	return new SpeechToTextOutput(
	$result,
	$response,
	$response['metadata']
	);
	}

	/**
	* {@inheritdoc}
	*/
	public function textToSpeech(string\|TextToSpeechInput $input, string $model_id, array $tags = []): TextToSpeechOutput
	{
	// Normalize the input.
	$text = $input;
	if ($input instanceof TextToSpeechInput) {
	$text = $input->getText();
	}
	// Start generating a file.
	$configuration = $this->configuration + [
	'model' => $model_id,
	];
	$response = $this->deepgramClient->textToSpeech($text, $configuration);
	if (empty($response)) {
	throw new AiBadRequestException('No audio found');
	}
	$output = new AudioFile($response, 'audio/mpeg', 'deepgram.mp3');
	return new TextToSpeechOutput([$output], $response, []);
	}

	/**
	* Generate a temporary file.
	*
	* @param string $binary
	* The binary.
	* @param string $filename
	* The filename.
	*
	* @return \Drupal\file\Entity\File
	* The file.
	*/
	protected function generateTemporaryFile(string $binary, string $filename): File
	{
	$tmp_path = $this->fileSystem->getTempDirectory() . '/' . $filename;
	$path = $this->fileSystem->saveData($binary, $tmp_path, FileExists::Replace);
	$fileStorage = $this->entityTypeManager->getStorage('file');
	$file = $fileStorage->create([
	'uri' => $path,
	'filename' => $filename,
	'status' => 0,
	]);
	$file->save();
	// Set the file for desctruction when finished.
	$this->temporaryFiles[] = $file;
	return $file;
	}

	/**
	* Gets the raw client.
	*
	* This is the client for inference.
	*
	* @return \Drupal\deepgram\Deepgram
	* The Deepgram Client.
	*/
	public function getClient(): Deepgram
	{
	return $this->deepgramClient;
	}

	}