LinkReferenceProvider.php 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. <?php
  2. declare(strict_types=1);
  3. /**
  4. * @copyright Copyright (c) 2022 Julius Härtl <jus@bitgrid.net>
  5. *
  6. * @author Julius Härtl <jus@bitgrid.net>
  7. *
  8. * @license GNU AGPL version 3 or any later version
  9. *
  10. * This program is free software: you can redistribute it and/or modify
  11. * it under the terms of the GNU Affero General Public License as
  12. * published by the Free Software Foundation, either version 3 of the
  13. * License, or (at your option) any later version.
  14. *
  15. * This program is distributed in the hope that it will be useful,
  16. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  18. * GNU Affero General Public License for more details.
  19. *
  20. * You should have received a copy of the GNU Affero General Public License
  21. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  22. */
  23. namespace OC\Collaboration\Reference;
  24. use Fusonic\OpenGraph\Consumer;
  25. use GuzzleHttp\Exception\GuzzleException;
  26. use GuzzleHttp\Psr7\LimitStream;
  27. use GuzzleHttp\Psr7\Utils;
  28. use OC\Security\RateLimiting\Exception\RateLimitExceededException;
  29. use OC\Security\RateLimiting\Limiter;
  30. use OC\SystemConfig;
  31. use OCP\Collaboration\Reference\IReference;
  32. use OCP\Collaboration\Reference\IReferenceProvider;
  33. use OCP\Collaboration\Reference\Reference;
  34. use OCP\Files\AppData\IAppDataFactory;
  35. use OCP\Files\NotFoundException;
  36. use OCP\Http\Client\IClientService;
  37. use OCP\IRequest;
  38. use OCP\IURLGenerator;
  39. use OCP\IUserSession;
  40. use Psr\Log\LoggerInterface;
  41. class LinkReferenceProvider implements IReferenceProvider {
  42. public const MAX_PREVIEW_SIZE = 1024 * 1024;
  43. public const ALLOWED_CONTENT_TYPES = [
  44. 'image/png',
  45. 'image/jpg',
  46. 'image/jpeg',
  47. 'image/gif',
  48. 'image/svg+xml',
  49. 'image/webp'
  50. ];
  51. private IClientService $clientService;
  52. private LoggerInterface $logger;
  53. private SystemConfig $systemConfig;
  54. private IAppDataFactory $appDataFactory;
  55. private IURLGenerator $urlGenerator;
  56. private Limiter $limiter;
  57. private IUserSession $userSession;
  58. private IRequest $request;
  59. public function __construct(IClientService $clientService, LoggerInterface $logger, SystemConfig $systemConfig, IAppDataFactory $appDataFactory, IURLGenerator $urlGenerator, Limiter $limiter, IUserSession $userSession, IRequest $request) {
  60. $this->clientService = $clientService;
  61. $this->logger = $logger;
  62. $this->systemConfig = $systemConfig;
  63. $this->appDataFactory = $appDataFactory;
  64. $this->urlGenerator = $urlGenerator;
  65. $this->limiter = $limiter;
  66. $this->userSession = $userSession;
  67. $this->request = $request;
  68. }
  69. public function matchReference(string $referenceText): bool {
  70. if ($this->systemConfig->getValue('reference_opengraph', true) !== true) {
  71. return false;
  72. }
  73. return (bool)preg_match(IURLGenerator::URL_REGEX, $referenceText);
  74. }
  75. public function resolveReference(string $referenceText): ?IReference {
  76. if ($this->matchReference($referenceText)) {
  77. $reference = new Reference($referenceText);
  78. $this->fetchReference($reference);
  79. return $reference;
  80. }
  81. return null;
  82. }
  83. private function fetchReference(Reference $reference): void {
  84. try {
  85. $user = $this->userSession->getUser();
  86. if ($user) {
  87. $this->limiter->registerUserRequest('opengraph', 10, 120, $user);
  88. } else {
  89. $this->limiter->registerAnonRequest('opengraph', 10, 120, $this->request->getRemoteAddress());
  90. }
  91. } catch (RateLimitExceededException $e) {
  92. return;
  93. }
  94. $client = $this->clientService->newClient();
  95. try {
  96. $headResponse = $client->head($reference->getId(), [ 'timeout' => 10 ]);
  97. } catch (\Exception $e) {
  98. $this->logger->debug('Failed to perform HEAD request to get target metadata', ['exception' => $e]);
  99. return;
  100. }
  101. $linkContentLength = $headResponse->getHeader('Content-Length');
  102. if (is_numeric($linkContentLength) && (int) $linkContentLength > 5 * 1024 * 1024) {
  103. $this->logger->debug('Skip resolving links pointing to content length > 5 MB');
  104. return;
  105. }
  106. $linkContentType = $headResponse->getHeader('Content-Type');
  107. $expectedContentType = 'text/html';
  108. $suffixedExpectedContentType = $expectedContentType . ';';
  109. $startsWithSuffixed = substr($linkContentType, 0, strlen($suffixedExpectedContentType)) === $suffixedExpectedContentType;
  110. // check the header begins with the expected content type
  111. if ($linkContentType !== $expectedContentType && !$startsWithSuffixed) {
  112. $this->logger->debug('Skip resolving links pointing to content type that is not "text/html"');
  113. return;
  114. }
  115. try {
  116. $response = $client->get($reference->getId(), [ 'timeout' => 10 ]);
  117. } catch (\Exception $e) {
  118. $this->logger->debug('Failed to fetch link for obtaining open graph data', ['exception' => $e]);
  119. return;
  120. }
  121. $responseBody = (string)$response->getBody();
  122. // OpenGraph handling
  123. $consumer = new Consumer();
  124. $consumer->useFallbackMode = true;
  125. $object = $consumer->loadHtml($responseBody);
  126. $reference->setUrl($reference->getId());
  127. if ($object->title) {
  128. $reference->setTitle($object->title);
  129. }
  130. if ($object->description) {
  131. $reference->setDescription($object->description);
  132. }
  133. if ($object->images) {
  134. try {
  135. $appData = $this->appDataFactory->get('core');
  136. try {
  137. $folder = $appData->getFolder('opengraph');
  138. } catch (NotFoundException $e) {
  139. $folder = $appData->newFolder('opengraph');
  140. }
  141. $response = $client->get($object->images[0]->url, [ 'timeout' => 10 ]);
  142. $contentType = $response->getHeader('Content-Type');
  143. $contentLength = $response->getHeader('Content-Length');
  144. if (in_array($contentType, self::ALLOWED_CONTENT_TYPES, true) && $contentLength < self::MAX_PREVIEW_SIZE) {
  145. $stream = Utils::streamFor($response->getBody());
  146. $bodyStream = new LimitStream($stream, self::MAX_PREVIEW_SIZE, 0);
  147. $reference->setImageContentType($contentType);
  148. $folder->newFile(md5($reference->getId()), $bodyStream->getContents());
  149. $reference->setImageUrl($this->urlGenerator->linkToRouteAbsolute('core.Reference.preview', ['referenceId' => md5($reference->getId())]));
  150. }
  151. } catch (GuzzleException $e) {
  152. $this->logger->info('Failed to fetch and store the open graph image for ' . $reference->getId(), ['exception' => $e]);
  153. } catch (\Throwable $e) {
  154. $this->logger->error('Failed to fetch and store the open graph image for ' . $reference->getId(), ['exception' => $e]);
  155. }
  156. }
  157. }
  158. public function getCachePrefix(string $referenceId): string {
  159. return $referenceId;
  160. }
  161. public function getCacheKey(string $referenceId): ?string {
  162. return null;
  163. }
  164. }