LinkReferenceProvider.php 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193
  1. <?php
  2. declare(strict_types=1);
  3. /**
  4. * @copyright Copyright (c) 2022 Julius Härtl <jus@bitgrid.net>
  5. *
  6. * @author Julius Härtl <jus@bitgrid.net>
  7. *
  8. * @license GNU AGPL version 3 or any later version
  9. *
  10. * This program is free software: you can redistribute it and/or modify
  11. * it under the terms of the GNU Affero General Public License as
  12. * published by the Free Software Foundation, either version 3 of the
  13. * License, or (at your option) any later version.
  14. *
  15. * This program is distributed in the hope that it will be useful,
  16. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  18. * GNU Affero General Public License for more details.
  19. *
  20. * You should have received a copy of the GNU Affero General Public License
  21. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  22. */
  23. namespace OC\Collaboration\Reference;
  24. use Fusonic\OpenGraph\Consumer;
  25. use GuzzleHttp\Exception\GuzzleException;
  26. use GuzzleHttp\Psr7\LimitStream;
  27. use GuzzleHttp\Psr7\Utils;
  28. use OC\Security\RateLimiting\Exception\RateLimitExceededException;
  29. use OC\Security\RateLimiting\Limiter;
  30. use OC\SystemConfig;
  31. use OCP\Collaboration\Reference\IReference;
  32. use OCP\Collaboration\Reference\IReferenceProvider;
  33. use OCP\Collaboration\Reference\Reference;
  34. use OCP\Files\AppData\IAppDataFactory;
  35. use OCP\Files\NotFoundException;
  36. use OCP\Http\Client\IClientService;
  37. use OCP\IRequest;
  38. use OCP\IURLGenerator;
  39. use OCP\IUserSession;
  40. use Psr\Log\LoggerInterface;
  41. class LinkReferenceProvider implements IReferenceProvider {
  42. public const MAX_PREVIEW_SIZE = 1024 * 1024;
  43. public const ALLOWED_CONTENT_TYPES = [
  44. 'image/png',
  45. 'image/jpg',
  46. 'image/jpeg',
  47. 'image/gif',
  48. 'image/svg+xml',
  49. 'image/webp'
  50. ];
  51. public function __construct(
  52. private IClientService $clientService,
  53. private LoggerInterface $logger,
  54. private SystemConfig $systemConfig,
  55. private IAppDataFactory $appDataFactory,
  56. private IURLGenerator $urlGenerator,
  57. private Limiter $limiter,
  58. private IUserSession $userSession,
  59. private IRequest $request,
  60. ) {
  61. }
  62. public function matchReference(string $referenceText): bool {
  63. if ($this->systemConfig->getValue('reference_opengraph', true) !== true) {
  64. return false;
  65. }
  66. return (bool)preg_match(IURLGenerator::URL_REGEX, $referenceText);
  67. }
  68. public function resolveReference(string $referenceText): ?IReference {
  69. if ($this->matchReference($referenceText)) {
  70. $reference = new Reference($referenceText);
  71. $this->fetchReference($reference);
  72. return $reference;
  73. }
  74. return null;
  75. }
  76. private function fetchReference(Reference $reference): void {
  77. try {
  78. $user = $this->userSession->getUser();
  79. if ($user) {
  80. $this->limiter->registerUserRequest('opengraph', 10, 120, $user);
  81. } else {
  82. $this->limiter->registerAnonRequest('opengraph', 10, 120, $this->request->getRemoteAddress());
  83. }
  84. } catch (RateLimitExceededException $e) {
  85. return;
  86. }
  87. $client = $this->clientService->newClient();
  88. try {
  89. $headResponse = $client->head($reference->getId(), [ 'timeout' => 10 ]);
  90. } catch (\Exception $e) {
  91. $this->logger->debug('Failed to perform HEAD request to get target metadata', ['exception' => $e]);
  92. return;
  93. }
  94. $linkContentLength = $headResponse->getHeader('Content-Length');
  95. if (is_numeric($linkContentLength) && (int) $linkContentLength > 5 * 1024 * 1024) {
  96. $this->logger->debug('[Head] Skip resolving links pointing to content length > 5 MiB');
  97. return;
  98. }
  99. $linkContentType = $headResponse->getHeader('Content-Type');
  100. $expectedContentType = 'text/html';
  101. $suffixedExpectedContentType = $expectedContentType . ';';
  102. $startsWithSuffixed = str_starts_with($linkContentType, $suffixedExpectedContentType);
  103. // check the header begins with the expected content type
  104. if ($linkContentType !== $expectedContentType && !$startsWithSuffixed) {
  105. $this->logger->debug('Skip resolving links pointing to content type that is not "text/html"');
  106. return;
  107. }
  108. try {
  109. $response = $client->get($reference->getId(), [ 'timeout' => 10, 'stream' => true ]);
  110. } catch (\Exception $e) {
  111. $this->logger->debug('Failed to fetch link for obtaining open graph data', ['exception' => $e]);
  112. return;
  113. }
  114. $body = $response->getBody();
  115. if (is_resource($body)) {
  116. $responseContent = fread($body, 5 * 1024 * 1024);
  117. if (!feof($body)) {
  118. $this->logger->debug('[Get] Skip resolving links pointing to content length > 5 MiB');
  119. return;
  120. }
  121. } else {
  122. $this->logger->error('[Get] Impossible to check content length');
  123. return;
  124. }
  125. // OpenGraph handling
  126. $consumer = new Consumer();
  127. $consumer->useFallbackMode = true;
  128. $object = $consumer->loadHtml($responseContent);
  129. $reference->setUrl($reference->getId());
  130. if ($object->title) {
  131. $reference->setTitle($object->title);
  132. }
  133. if ($object->description) {
  134. $reference->setDescription($object->description);
  135. }
  136. if ($object->images) {
  137. try {
  138. $host = parse_url($object->images[0]->url, PHP_URL_HOST);
  139. if ($host === false || $host === null) {
  140. $this->logger->warning('Could not detect host of open graph image URI for ' . $reference->getId());
  141. } else {
  142. $appData = $this->appDataFactory->get('core');
  143. try {
  144. $folder = $appData->getFolder('opengraph');
  145. } catch (NotFoundException $e) {
  146. $folder = $appData->newFolder('opengraph');
  147. }
  148. $response = $client->get($object->images[0]->url, ['timeout' => 10]);
  149. $contentType = $response->getHeader('Content-Type');
  150. $contentLength = $response->getHeader('Content-Length');
  151. if (in_array($contentType, self::ALLOWED_CONTENT_TYPES, true) && $contentLength < self::MAX_PREVIEW_SIZE) {
  152. $stream = Utils::streamFor($response->getBody());
  153. $bodyStream = new LimitStream($stream, self::MAX_PREVIEW_SIZE, 0);
  154. $reference->setImageContentType($contentType);
  155. $folder->newFile(md5($reference->getId()), $bodyStream->getContents());
  156. $reference->setImageUrl($this->urlGenerator->linkToRouteAbsolute('core.Reference.preview', ['referenceId' => md5($reference->getId())]));
  157. }
  158. }
  159. } catch (GuzzleException $e) {
  160. $this->logger->info('Failed to fetch and store the open graph image for ' . $reference->getId(), ['exception' => $e]);
  161. } catch (\Throwable $e) {
  162. $this->logger->error('Failed to fetch and store the open graph image for ' . $reference->getId(), ['exception' => $e]);
  163. }
  164. }
  165. }
  166. public function getCachePrefix(string $referenceId): string {
  167. return $referenceId;
  168. }
  169. public function getCacheKey(string $referenceId): ?string {
  170. return null;
  171. }
  172. }