SpeechToTextManager.php 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177
  1. <?php
  2. declare(strict_types=1);
  3. /**
  4. * SPDX-FileCopyrightText: 2023 Nextcloud GmbH and Nextcloud contributors
  5. * SPDX-License-Identifier: AGPL-3.0-or-later
  6. */
  7. namespace OC\SpeechToText;
  8. use InvalidArgumentException;
  9. use OC\AppFramework\Bootstrap\Coordinator;
  10. use OCP\BackgroundJob\IJobList;
  11. use OCP\Files\File;
  12. use OCP\Files\InvalidPathException;
  13. use OCP\Files\NotFoundException;
  14. use OCP\IConfig;
  15. use OCP\IServerContainer;
  16. use OCP\IUserSession;
  17. use OCP\PreConditionNotMetException;
  18. use OCP\SpeechToText\ISpeechToTextManager;
  19. use OCP\SpeechToText\ISpeechToTextProvider;
  20. use OCP\SpeechToText\ISpeechToTextProviderWithId;
  21. use OCP\SpeechToText\ISpeechToTextProviderWithUserId;
  22. use OCP\TaskProcessing\IManager as ITaskProcessingManager;
  23. use OCP\TaskProcessing\Task;
  24. use OCP\TaskProcessing\TaskTypes\AudioToText;
  25. use Psr\Container\ContainerExceptionInterface;
  26. use Psr\Container\NotFoundExceptionInterface;
  27. use Psr\Log\LoggerInterface;
  28. use RuntimeException;
  29. use Throwable;
  30. class SpeechToTextManager implements ISpeechToTextManager {
  31. /** @var ?ISpeechToTextProvider[] */
  32. private ?array $providers = null;
  33. public function __construct(
  34. private IServerContainer $serverContainer,
  35. private Coordinator $coordinator,
  36. private LoggerInterface $logger,
  37. private IJobList $jobList,
  38. private IConfig $config,
  39. private IUserSession $userSession,
  40. private ITaskProcessingManager $taskProcessingManager,
  41. ) {
  42. }
  43. public function getProviders(): array {
  44. $context = $this->coordinator->getRegistrationContext();
  45. if ($context === null) {
  46. return [];
  47. }
  48. if ($this->providers !== null) {
  49. return $this->providers;
  50. }
  51. $this->providers = [];
  52. foreach ($context->getSpeechToTextProviders() as $providerServiceRegistration) {
  53. $class = $providerServiceRegistration->getService();
  54. try {
  55. $this->providers[$class] = $this->serverContainer->get($class);
  56. } catch (NotFoundExceptionInterface|ContainerExceptionInterface|Throwable $e) {
  57. $this->logger->error('Failed to load SpeechToText provider ' . $class, [
  58. 'exception' => $e,
  59. ]);
  60. }
  61. }
  62. return $this->providers;
  63. }
  64. public function hasProviders(): bool {
  65. $context = $this->coordinator->getRegistrationContext();
  66. if ($context === null) {
  67. return false;
  68. }
  69. return !empty($context->getSpeechToTextProviders());
  70. }
  71. public function scheduleFileTranscription(File $file, ?string $userId, string $appId): void {
  72. if (!$this->hasProviders()) {
  73. throw new PreConditionNotMetException('No SpeechToText providers have been registered');
  74. }
  75. try {
  76. $this->jobList->add(TranscriptionJob::class, [
  77. 'fileId' => $file->getId(),
  78. 'owner' => $file->getOwner()->getUID(),
  79. 'userId' => $userId,
  80. 'appId' => $appId,
  81. ]);
  82. } catch (NotFoundException|InvalidPathException $e) {
  83. throw new InvalidArgumentException('Invalid file provided for file transcription: ' . $e->getMessage());
  84. }
  85. }
  86. public function cancelScheduledFileTranscription(File $file, ?string $userId, string $appId): void {
  87. try {
  88. $jobArguments = [
  89. 'fileId' => $file->getId(),
  90. 'owner' => $file->getOwner()->getUID(),
  91. 'userId' => $userId,
  92. 'appId' => $appId,
  93. ];
  94. if (!$this->jobList->has(TranscriptionJob::class, $jobArguments)) {
  95. $this->logger->debug('Failed to cancel a Speech-to-text job for file ' . $file->getId() . '. No related job was found.');
  96. return;
  97. }
  98. $this->jobList->remove(TranscriptionJob::class, $jobArguments);
  99. } catch (NotFoundException|InvalidPathException $e) {
  100. throw new InvalidArgumentException('Invalid file provided to cancel file transcription: ' . $e->getMessage());
  101. }
  102. }
  103. public function transcribeFile(File $file, ?string $userId = null, string $appId = 'core'): string {
  104. // try to run a TaskProcessing core:audio2text task
  105. // this covers scheduling as well because OC\SpeechToText\TranscriptionJob calls this method
  106. try {
  107. if (isset($this->taskProcessingManager->getAvailableTaskTypes()['core:audio2text'])) {
  108. $taskProcessingTask = new Task(
  109. AudioToText::ID,
  110. ['input' => $file->getId()],
  111. $appId,
  112. $userId,
  113. 'from-SpeechToTextManager||' . $file->getId() . '||' . ($userId ?? '') . '||' . $appId,
  114. );
  115. $resultTask = $this->taskProcessingManager->runTask($taskProcessingTask);
  116. if ($resultTask->getStatus() === Task::STATUS_SUCCESSFUL) {
  117. $output = $resultTask->getOutput();
  118. if (isset($output['output']) && is_string($output['output'])) {
  119. return $output['output'];
  120. }
  121. }
  122. }
  123. } catch (Throwable $e) {
  124. throw new RuntimeException('Failed to run a Speech-to-text job from STTManager with TaskProcessing for file ' . $file->getId(), 0, $e);
  125. }
  126. if (!$this->hasProviders()) {
  127. throw new PreConditionNotMetException('No SpeechToText providers have been registered');
  128. }
  129. $providers = $this->getProviders();
  130. $json = $this->config->getAppValue('core', 'ai.stt_provider', '');
  131. if ($json !== '') {
  132. $classNameOrId = json_decode($json, true);
  133. $provider = current(array_filter($providers, function ($provider) use ($classNameOrId) {
  134. if ($provider instanceof ISpeechToTextProviderWithId) {
  135. return $provider->getId() === $classNameOrId;
  136. }
  137. return $provider::class === $classNameOrId;
  138. }));
  139. if ($provider !== false) {
  140. $providers = [$provider];
  141. }
  142. }
  143. foreach ($providers as $provider) {
  144. try {
  145. if ($provider instanceof ISpeechToTextProviderWithUserId) {
  146. $provider->setUserId($this->userSession->getUser()?->getUID());
  147. }
  148. return $provider->transcribeFile($file);
  149. } catch (\Throwable $e) {
  150. $this->logger->info('SpeechToText transcription using provider ' . $provider->getName() . ' failed', ['exception' => $e]);
  151. throw new RuntimeException('SpeechToText transcription using provider "' . $provider->getName() . '" failed: ' . $e->getMessage());
  152. }
  153. }
  154. throw new RuntimeException('Could not transcribe file');
  155. }
  156. }