SpeechToTextManager.php 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168
  1. <?php
  2. declare(strict_types=1);
  3. /**
  4. * @copyright Copyright (c) 2023 Julius Härtl <jus@bitgrid.net>
  5. * @copyright Copyright (c) 2023 Marcel Klehr <mklehr@gmx.net>
  6. *
  7. * @author Julius Härtl <jus@bitgrid.net>
  8. * @author Marcel Klehr <mklehr@gmx.net>
  9. *
  10. * @license GNU AGPL version 3 or any later version
  11. *
  12. * This program is free software: you can redistribute it and/or modify
  13. * it under the terms of the GNU Affero General Public License as
  14. * published by the Free Software Foundation, either version 3 of the
  15. * License, or (at your option) any later version.
  16. *
  17. * This program is distributed in the hope that it will be useful,
  18. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  20. * GNU Affero General Public License for more details.
  21. *
  22. * You should have received a copy of the GNU Affero General Public License
  23. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  24. */
  25. namespace OC\SpeechToText;
  26. use InvalidArgumentException;
  27. use OC\AppFramework\Bootstrap\Coordinator;
  28. use OCP\BackgroundJob\IJobList;
  29. use OCP\Files\File;
  30. use OCP\Files\InvalidPathException;
  31. use OCP\Files\NotFoundException;
  32. use OCP\IConfig;
  33. use OCP\IServerContainer;
  34. use OCP\IUserSession;
  35. use OCP\PreConditionNotMetException;
  36. use OCP\SpeechToText\ISpeechToTextManager;
  37. use OCP\SpeechToText\ISpeechToTextProvider;
  38. use OCP\SpeechToText\ISpeechToTextProviderWithId;
  39. use OCP\SpeechToText\ISpeechToTextProviderWithUserId;
  40. use Psr\Container\ContainerExceptionInterface;
  41. use Psr\Container\NotFoundExceptionInterface;
  42. use Psr\Log\LoggerInterface;
  43. use RuntimeException;
  44. use Throwable;
  45. class SpeechToTextManager implements ISpeechToTextManager {
  46. /** @var ?ISpeechToTextProvider[] */
  47. private ?array $providers = null;
  48. public function __construct(
  49. private IServerContainer $serverContainer,
  50. private Coordinator $coordinator,
  51. private LoggerInterface $logger,
  52. private IJobList $jobList,
  53. private IConfig $config,
  54. private IUserSession $userSession,
  55. ) {
  56. }
  57. public function getProviders(): array {
  58. $context = $this->coordinator->getRegistrationContext();
  59. if ($context === null) {
  60. return [];
  61. }
  62. if ($this->providers !== null) {
  63. return $this->providers;
  64. }
  65. $this->providers = [];
  66. foreach ($context->getSpeechToTextProviders() as $providerServiceRegistration) {
  67. $class = $providerServiceRegistration->getService();
  68. try {
  69. $this->providers[$class] = $this->serverContainer->get($class);
  70. } catch (NotFoundExceptionInterface|ContainerExceptionInterface|Throwable $e) {
  71. $this->logger->error('Failed to load SpeechToText provider ' . $class, [
  72. 'exception' => $e,
  73. ]);
  74. }
  75. }
  76. return $this->providers;
  77. }
  78. public function hasProviders(): bool {
  79. $context = $this->coordinator->getRegistrationContext();
  80. if ($context === null) {
  81. return false;
  82. }
  83. return !empty($context->getSpeechToTextProviders());
  84. }
  85. public function scheduleFileTranscription(File $file, ?string $userId, string $appId): void {
  86. if (!$this->hasProviders()) {
  87. throw new PreConditionNotMetException('No SpeechToText providers have been registered');
  88. }
  89. try {
  90. $this->jobList->add(TranscriptionJob::class, [
  91. 'fileId' => $file->getId(),
  92. 'owner' => $file->getOwner()->getUID(),
  93. 'userId' => $userId,
  94. 'appId' => $appId,
  95. ]);
  96. } catch (NotFoundException|InvalidPathException $e) {
  97. throw new InvalidArgumentException('Invalid file provided for file transcription: ' . $e->getMessage());
  98. }
  99. }
  100. public function cancelScheduledFileTranscription(File $file, ?string $userId, string $appId): void {
  101. try {
  102. $jobArguments = [
  103. 'fileId' => $file->getId(),
  104. 'owner' => $file->getOwner()->getUID(),
  105. 'userId' => $userId,
  106. 'appId' => $appId,
  107. ];
  108. if (!$this->jobList->has(TranscriptionJob::class, $jobArguments)) {
  109. $this->logger->debug('Failed to cancel a Speech-to-text job for file ' . $file->getId() . '. No related job was found.');
  110. return;
  111. }
  112. $this->jobList->remove(TranscriptionJob::class, $jobArguments);
  113. } catch (NotFoundException|InvalidPathException $e) {
  114. throw new InvalidArgumentException('Invalid file provided to cancel file transcription: ' . $e->getMessage());
  115. }
  116. }
  117. public function transcribeFile(File $file): string {
  118. if (!$this->hasProviders()) {
  119. throw new PreConditionNotMetException('No SpeechToText providers have been registered');
  120. }
  121. $providers = $this->getProviders();
  122. $json = $this->config->getAppValue('core', 'ai.stt_provider', '');
  123. if ($json !== '') {
  124. $classNameOrId = json_decode($json, true);
  125. $provider = current(array_filter($providers, function ($provider) use ($classNameOrId) {
  126. if ($provider instanceof ISpeechToTextProviderWithId) {
  127. return $provider->getId() === $classNameOrId;
  128. }
  129. return $provider::class === $classNameOrId;
  130. }));
  131. if ($provider !== false) {
  132. $providers = [$provider];
  133. }
  134. }
  135. foreach ($providers as $provider) {
  136. try {
  137. if ($provider instanceof ISpeechToTextProviderWithUserId) {
  138. $provider->setUserId($this->userSession->getUser()?->getUID());
  139. }
  140. return $provider->transcribeFile($file);
  141. } catch (\Throwable $e) {
  142. $this->logger->info('SpeechToText transcription using provider ' . $provider->getName() . ' failed', ['exception' => $e]);
  143. throw new RuntimeException('SpeechToText transcription using provider "' . $provider->getName() . '" failed: ' . $e->getMessage());
  144. }
  145. }
  146. throw new RuntimeException('Could not transcribe file');
  147. }
  148. }