1
0

BackgroundCleanupJob.php 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201
  1. <?php
  2. declare(strict_types=1);
  3. /**
  4. * SPDX-FileCopyrightText: 2018 Nextcloud GmbH and Nextcloud contributors
  5. * SPDX-License-Identifier: AGPL-3.0-or-later
  6. */
  7. namespace OC\Preview;
  8. use OC\Preview\Storage\Root;
  9. use OCP\AppFramework\Utility\ITimeFactory;
  10. use OCP\BackgroundJob\TimedJob;
  11. use OCP\DB\QueryBuilder\IQueryBuilder;
  12. use OCP\Files\IMimeTypeLoader;
  13. use OCP\Files\NotFoundException;
  14. use OCP\Files\NotPermittedException;
  15. use OCP\IDBConnection;
  16. class BackgroundCleanupJob extends TimedJob {
  17. public function __construct(
  18. ITimeFactory $timeFactory,
  19. private IDBConnection $connection,
  20. private Root $previewFolder,
  21. private IMimeTypeLoader $mimeTypeLoader,
  22. private bool $isCLI,
  23. ) {
  24. parent::__construct($timeFactory);
  25. // Run at most once an hour
  26. $this->setInterval(60 * 60);
  27. $this->setTimeSensitivity(self::TIME_INSENSITIVE);
  28. }
  29. public function run($argument) {
  30. foreach ($this->getDeletedFiles() as $fileId) {
  31. try {
  32. $preview = $this->previewFolder->getFolder((string)$fileId);
  33. $preview->delete();
  34. } catch (NotFoundException $e) {
  35. // continue
  36. } catch (NotPermittedException $e) {
  37. // continue
  38. }
  39. }
  40. }
  41. private function getDeletedFiles(): \Iterator {
  42. yield from $this->getOldPreviewLocations();
  43. yield from $this->getNewPreviewLocations();
  44. }
  45. private function getOldPreviewLocations(): \Iterator {
  46. if ($this->connection->getShardDefinition('filecache')) {
  47. // sharding is new enough that we don't need to support this
  48. return;
  49. }
  50. $qb = $this->connection->getQueryBuilder();
  51. $qb->select('a.name')
  52. ->from('filecache', 'a')
  53. ->leftJoin('a', 'filecache', 'b', $qb->expr()->eq(
  54. $qb->expr()->castColumn('a.name', IQueryBuilder::PARAM_INT), 'b.fileid'
  55. ))
  56. ->where(
  57. $qb->expr()->isNull('b.fileid')
  58. )->andWhere(
  59. $qb->expr()->eq('a.storage', $qb->createNamedParameter($this->previewFolder->getStorageId()))
  60. )->andWhere(
  61. $qb->expr()->eq('a.parent', $qb->createNamedParameter($this->previewFolder->getId()))
  62. )->andWhere(
  63. $qb->expr()->like('a.name', $qb->createNamedParameter('__%'))
  64. );
  65. if (!$this->isCLI) {
  66. $qb->setMaxResults(10);
  67. }
  68. $cursor = $qb->executeQuery();
  69. while ($row = $cursor->fetch()) {
  70. yield $row['name'];
  71. }
  72. $cursor->closeCursor();
  73. }
  74. private function getNewPreviewLocations(): \Iterator {
  75. $qb = $this->connection->getQueryBuilder();
  76. $qb->select('path', 'mimetype')
  77. ->from('filecache')
  78. ->where($qb->expr()->eq('fileid', $qb->createNamedParameter($this->previewFolder->getId())));
  79. $cursor = $qb->executeQuery();
  80. $data = $cursor->fetch();
  81. $cursor->closeCursor();
  82. if ($data === null) {
  83. return [];
  84. }
  85. if ($this->connection->getShardDefinition('filecache')) {
  86. $chunks = $this->getAllPreviewIds($data['path'], 1000);
  87. foreach ($chunks as $chunk) {
  88. yield from $this->findMissingSources($chunk);
  89. }
  90. return;
  91. }
  92. /*
  93. * This lovely like is the result of the way the new previews are stored
  94. * We take the md5 of the name (fileid) and split the first 7 chars. That way
  95. * there are not a gazillion files in the root of the preview appdata.
  96. */
  97. $like = $this->connection->escapeLikeParameter($data['path']) . '/_/_/_/_/_/_/_/%';
  98. /*
  99. * Deleting a file will not delete related previews right away.
  100. *
  101. * A delete request is usually an HTTP request.
  102. * The preview deleting is done by a background job to avoid timeouts.
  103. *
  104. * Previews for a file are stored within a folder in appdata_/preview using the fileid as folder name.
  105. * Preview folders in oc_filecache are identified by a.storage, a.path (cf. $like) and a.mimetype.
  106. *
  107. * To find preview folders to delete, we query oc_filecache for a preview folder in app data, matching the preview folder structure
  108. * and use the name to left join oc_filecache on a.name = b.fileid. A left join returns all rows from the left table (a),
  109. * even if there are no matches in the right table (b).
  110. *
  111. * If the related file is deleted, b.fileid will be null and the preview folder can be deleted.
  112. */
  113. $qb = $this->connection->getQueryBuilder();
  114. $qb->select('a.name')
  115. ->from('filecache', 'a')
  116. ->leftJoin('a', 'filecache', 'b', $qb->expr()->eq(
  117. $qb->expr()->castColumn('a.name', IQueryBuilder::PARAM_INT), 'b.fileid'
  118. ))
  119. ->where(
  120. $qb->expr()->andX(
  121. $qb->expr()->eq('a.storage', $qb->createNamedParameter($this->previewFolder->getStorageId())),
  122. $qb->expr()->isNull('b.fileid'),
  123. $qb->expr()->like('a.path', $qb->createNamedParameter($like)),
  124. $qb->expr()->eq('a.mimetype', $qb->createNamedParameter($this->mimeTypeLoader->getId('httpd/unix-directory')))
  125. )
  126. );
  127. if (!$this->isCLI) {
  128. $qb->setMaxResults(10);
  129. }
  130. $cursor = $qb->executeQuery();
  131. while ($row = $cursor->fetch()) {
  132. yield $row['name'];
  133. }
  134. $cursor->closeCursor();
  135. }
  136. private function getAllPreviewIds(string $previewRoot, int $chunkSize): \Iterator {
  137. // See `getNewPreviewLocations` for some more info about the logic here
  138. $like = $this->connection->escapeLikeParameter($previewRoot) . '/_/_/_/_/_/_/_/%';
  139. $qb = $this->connection->getQueryBuilder();
  140. $qb->select('name', 'fileid')
  141. ->from('filecache')
  142. ->where(
  143. $qb->expr()->andX(
  144. $qb->expr()->eq('storage', $qb->createNamedParameter($this->previewFolder->getStorageId())),
  145. $qb->expr()->like('path', $qb->createNamedParameter($like)),
  146. $qb->expr()->eq('mimetype', $qb->createNamedParameter($this->mimeTypeLoader->getId('httpd/unix-directory'))),
  147. $qb->expr()->gt('fileid', $qb->createParameter('min_id')),
  148. )
  149. )
  150. ->orderBy('fileid', 'ASC')
  151. ->setMaxResults($chunkSize);
  152. $minId = 0;
  153. while (true) {
  154. $qb->setParameter('min_id', $minId);
  155. $rows = $qb->executeQuery()->fetchAll();
  156. if (count($rows) > 0) {
  157. $minId = $rows[count($rows) - 1]['fileid'];
  158. yield array_map(function ($row) {
  159. return (int)$row['name'];
  160. }, $rows);
  161. } else {
  162. break;
  163. }
  164. }
  165. }
  166. private function findMissingSources(array $ids): array {
  167. $qb = $this->connection->getQueryBuilder();
  168. $qb->select('fileid')
  169. ->from('filecache')
  170. ->where($qb->expr()->in('fileid', $qb->createNamedParameter($ids, IQueryBuilder::PARAM_INT_ARRAY)));
  171. $found = $qb->executeQuery()->fetchAll(\PDO::FETCH_COLUMN);
  172. return array_diff($ids, $found);
  173. }
  174. }