SpeechToTextManager.php 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150
  1. <?php
  2. declare(strict_types=1);
  3. /**
  4. * SPDX-FileCopyrightText: 2023 Nextcloud GmbH and Nextcloud contributors
  5. * SPDX-License-Identifier: AGPL-3.0-or-later
  6. */
  7. namespace OC\SpeechToText;
  8. use InvalidArgumentException;
  9. use OC\AppFramework\Bootstrap\Coordinator;
  10. use OCP\BackgroundJob\IJobList;
  11. use OCP\Files\File;
  12. use OCP\Files\InvalidPathException;
  13. use OCP\Files\NotFoundException;
  14. use OCP\IConfig;
  15. use OCP\IServerContainer;
  16. use OCP\IUserSession;
  17. use OCP\PreConditionNotMetException;
  18. use OCP\SpeechToText\ISpeechToTextManager;
  19. use OCP\SpeechToText\ISpeechToTextProvider;
  20. use OCP\SpeechToText\ISpeechToTextProviderWithId;
  21. use OCP\SpeechToText\ISpeechToTextProviderWithUserId;
  22. use Psr\Container\ContainerExceptionInterface;
  23. use Psr\Container\NotFoundExceptionInterface;
  24. use Psr\Log\LoggerInterface;
  25. use RuntimeException;
  26. use Throwable;
  27. class SpeechToTextManager implements ISpeechToTextManager {
  28. /** @var ?ISpeechToTextProvider[] */
  29. private ?array $providers = null;
  30. public function __construct(
  31. private IServerContainer $serverContainer,
  32. private Coordinator $coordinator,
  33. private LoggerInterface $logger,
  34. private IJobList $jobList,
  35. private IConfig $config,
  36. private IUserSession $userSession,
  37. ) {
  38. }
  39. public function getProviders(): array {
  40. $context = $this->coordinator->getRegistrationContext();
  41. if ($context === null) {
  42. return [];
  43. }
  44. if ($this->providers !== null) {
  45. return $this->providers;
  46. }
  47. $this->providers = [];
  48. foreach ($context->getSpeechToTextProviders() as $providerServiceRegistration) {
  49. $class = $providerServiceRegistration->getService();
  50. try {
  51. $this->providers[$class] = $this->serverContainer->get($class);
  52. } catch (NotFoundExceptionInterface|ContainerExceptionInterface|Throwable $e) {
  53. $this->logger->error('Failed to load SpeechToText provider ' . $class, [
  54. 'exception' => $e,
  55. ]);
  56. }
  57. }
  58. return $this->providers;
  59. }
  60. public function hasProviders(): bool {
  61. $context = $this->coordinator->getRegistrationContext();
  62. if ($context === null) {
  63. return false;
  64. }
  65. return !empty($context->getSpeechToTextProviders());
  66. }
  67. public function scheduleFileTranscription(File $file, ?string $userId, string $appId): void {
  68. if (!$this->hasProviders()) {
  69. throw new PreConditionNotMetException('No SpeechToText providers have been registered');
  70. }
  71. try {
  72. $this->jobList->add(TranscriptionJob::class, [
  73. 'fileId' => $file->getId(),
  74. 'owner' => $file->getOwner()->getUID(),
  75. 'userId' => $userId,
  76. 'appId' => $appId,
  77. ]);
  78. } catch (NotFoundException|InvalidPathException $e) {
  79. throw new InvalidArgumentException('Invalid file provided for file transcription: ' . $e->getMessage());
  80. }
  81. }
  82. public function cancelScheduledFileTranscription(File $file, ?string $userId, string $appId): void {
  83. try {
  84. $jobArguments = [
  85. 'fileId' => $file->getId(),
  86. 'owner' => $file->getOwner()->getUID(),
  87. 'userId' => $userId,
  88. 'appId' => $appId,
  89. ];
  90. if (!$this->jobList->has(TranscriptionJob::class, $jobArguments)) {
  91. $this->logger->debug('Failed to cancel a Speech-to-text job for file ' . $file->getId() . '. No related job was found.');
  92. return;
  93. }
  94. $this->jobList->remove(TranscriptionJob::class, $jobArguments);
  95. } catch (NotFoundException|InvalidPathException $e) {
  96. throw new InvalidArgumentException('Invalid file provided to cancel file transcription: ' . $e->getMessage());
  97. }
  98. }
  99. public function transcribeFile(File $file): string {
  100. if (!$this->hasProviders()) {
  101. throw new PreConditionNotMetException('No SpeechToText providers have been registered');
  102. }
  103. $providers = $this->getProviders();
  104. $json = $this->config->getAppValue('core', 'ai.stt_provider', '');
  105. if ($json !== '') {
  106. $classNameOrId = json_decode($json, true);
  107. $provider = current(array_filter($providers, function ($provider) use ($classNameOrId) {
  108. if ($provider instanceof ISpeechToTextProviderWithId) {
  109. return $provider->getId() === $classNameOrId;
  110. }
  111. return $provider::class === $classNameOrId;
  112. }));
  113. if ($provider !== false) {
  114. $providers = [$provider];
  115. }
  116. }
  117. foreach ($providers as $provider) {
  118. try {
  119. if ($provider instanceof ISpeechToTextProviderWithUserId) {
  120. $provider->setUserId($this->userSession->getUser()?->getUID());
  121. }
  122. return $provider->transcribeFile($file);
  123. } catch (\Throwable $e) {
  124. $this->logger->info('SpeechToText transcription using provider ' . $provider->getName() . ' failed', ['exception' => $e]);
  125. throw new RuntimeException('SpeechToText transcription using provider "' . $provider->getName() . '" failed: ' . $e->getMessage());
  126. }
  127. }
  128. throw new RuntimeException('Could not transcribe file');
  129. }
  130. }