BackgroundCleanupJob.php 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214
  1. <?php
  2. declare(strict_types=1);
  3. /**
  4. * SPDX-FileCopyrightText: 2018 Nextcloud GmbH and Nextcloud contributors
  5. * SPDX-License-Identifier: AGPL-3.0-or-later
  6. */
  7. namespace OC\Preview;
  8. use OC\Preview\Storage\Root;
  9. use OCP\AppFramework\Utility\ITimeFactory;
  10. use OCP\BackgroundJob\TimedJob;
  11. use OCP\DB\QueryBuilder\IQueryBuilder;
  12. use OCP\Files\IMimeTypeLoader;
  13. use OCP\Files\NotFoundException;
  14. use OCP\Files\NotPermittedException;
  15. use OCP\IDBConnection;
  16. class BackgroundCleanupJob extends TimedJob {
  17. /** @var IDBConnection */
  18. private $connection;
  19. /** @var Root */
  20. private $previewFolder;
  21. /** @var bool */
  22. private $isCLI;
  23. /** @var IMimeTypeLoader */
  24. private $mimeTypeLoader;
  25. public function __construct(ITimeFactory $timeFactory,
  26. IDBConnection $connection,
  27. Root $previewFolder,
  28. IMimeTypeLoader $mimeTypeLoader,
  29. bool $isCLI) {
  30. parent::__construct($timeFactory);
  31. // Run at most once an hour
  32. $this->setInterval(3600);
  33. $this->connection = $connection;
  34. $this->previewFolder = $previewFolder;
  35. $this->isCLI = $isCLI;
  36. $this->mimeTypeLoader = $mimeTypeLoader;
  37. }
  38. public function run($argument) {
  39. foreach ($this->getDeletedFiles() as $fileId) {
  40. try {
  41. $preview = $this->previewFolder->getFolder((string)$fileId);
  42. $preview->delete();
  43. } catch (NotFoundException $e) {
  44. // continue
  45. } catch (NotPermittedException $e) {
  46. // continue
  47. }
  48. }
  49. }
  50. private function getDeletedFiles(): \Iterator {
  51. yield from $this->getOldPreviewLocations();
  52. yield from $this->getNewPreviewLocations();
  53. }
  54. private function getOldPreviewLocations(): \Iterator {
  55. if ($this->connection->getShardDefinition('filecache')) {
  56. // sharding is new enough that we don't need to support this
  57. return;
  58. }
  59. $qb = $this->connection->getQueryBuilder();
  60. $qb->select('a.name')
  61. ->from('filecache', 'a')
  62. ->leftJoin('a', 'filecache', 'b', $qb->expr()->eq(
  63. $qb->expr()->castColumn('a.name', IQueryBuilder::PARAM_INT), 'b.fileid'
  64. ))
  65. ->where(
  66. $qb->expr()->isNull('b.fileid')
  67. )->andWhere(
  68. $qb->expr()->eq('a.storage', $qb->createNamedParameter($this->previewFolder->getStorageId()))
  69. )->andWhere(
  70. $qb->expr()->eq('a.parent', $qb->createNamedParameter($this->previewFolder->getId()))
  71. )->andWhere(
  72. $qb->expr()->like('a.name', $qb->createNamedParameter('__%'))
  73. );
  74. if (!$this->isCLI) {
  75. $qb->setMaxResults(10);
  76. }
  77. $cursor = $qb->execute();
  78. while ($row = $cursor->fetch()) {
  79. yield $row['name'];
  80. }
  81. $cursor->closeCursor();
  82. }
  83. private function getNewPreviewLocations(): \Iterator {
  84. $qb = $this->connection->getQueryBuilder();
  85. $qb->select('path', 'mimetype')
  86. ->from('filecache')
  87. ->where($qb->expr()->eq('fileid', $qb->createNamedParameter($this->previewFolder->getId())));
  88. $cursor = $qb->execute();
  89. $data = $cursor->fetch();
  90. $cursor->closeCursor();
  91. if ($data === null) {
  92. return [];
  93. }
  94. if ($this->connection->getShardDefinition('filecache')) {
  95. $chunks = $this->getAllPreviewIds($data['path'], 1000);
  96. foreach ($chunks as $chunk) {
  97. yield from $this->findMissingSources($chunk);
  98. }
  99. return;
  100. }
  101. /*
  102. * This lovely like is the result of the way the new previews are stored
  103. * We take the md5 of the name (fileid) and split the first 7 chars. That way
  104. * there are not a gazillion files in the root of the preview appdata.
  105. */
  106. $like = $this->connection->escapeLikeParameter($data['path']) . '/_/_/_/_/_/_/_/%';
  107. /*
  108. * Deleting a file will not delete related previews right away.
  109. *
  110. * A delete request is usually an HTTP request.
  111. * The preview deleting is done by a background job to avoid timeouts.
  112. *
  113. * Previews for a file are stored within a folder in appdata_/preview using the fileid as folder name.
  114. * Preview folders in oc_filecache are identified by a.storage, a.path (cf. $like) and a.mimetype.
  115. *
  116. * To find preview folders to delete, we query oc_filecache for a preview folder in app data, matching the preview folder structure
  117. * and use the name to left join oc_filecache on a.name = b.fileid. A left join returns all rows from the left table (a),
  118. * even if there are no matches in the right table (b).
  119. *
  120. * If the related file is deleted, b.fileid will be null and the preview folder can be deleted.
  121. */
  122. $qb = $this->connection->getQueryBuilder();
  123. $qb->select('a.name')
  124. ->from('filecache', 'a')
  125. ->leftJoin('a', 'filecache', 'b', $qb->expr()->eq(
  126. $qb->expr()->castColumn('a.name', IQueryBuilder::PARAM_INT), 'b.fileid'
  127. ))
  128. ->where(
  129. $qb->expr()->andX(
  130. $qb->expr()->eq('a.storage', $qb->createNamedParameter($this->previewFolder->getStorageId())),
  131. $qb->expr()->isNull('b.fileid'),
  132. $qb->expr()->like('a.path', $qb->createNamedParameter($like)),
  133. $qb->expr()->eq('a.mimetype', $qb->createNamedParameter($this->mimeTypeLoader->getId('httpd/unix-directory')))
  134. )
  135. );
  136. if (!$this->isCLI) {
  137. $qb->setMaxResults(10);
  138. }
  139. $cursor = $qb->execute();
  140. while ($row = $cursor->fetch()) {
  141. yield $row['name'];
  142. }
  143. $cursor->closeCursor();
  144. }
  145. private function getAllPreviewIds(string $previewRoot, int $chunkSize): \Iterator {
  146. // See `getNewPreviewLocations` for some more info about the logic here
  147. $like = $this->connection->escapeLikeParameter($previewRoot). '/_/_/_/_/_/_/_/%';
  148. $qb = $this->connection->getQueryBuilder();
  149. $qb->select('name', 'fileid')
  150. ->from('filecache')
  151. ->where(
  152. $qb->expr()->andX(
  153. $qb->expr()->eq('storage', $qb->createNamedParameter($this->previewFolder->getStorageId())),
  154. $qb->expr()->like('path', $qb->createNamedParameter($like)),
  155. $qb->expr()->eq('mimetype', $qb->createNamedParameter($this->mimeTypeLoader->getId('httpd/unix-directory'))),
  156. $qb->expr()->gt('fileid', $qb->createParameter('min_id')),
  157. )
  158. )
  159. ->orderBy('fileid', 'ASC')
  160. ->setMaxResults($chunkSize);
  161. $minId = 0;
  162. while (true) {
  163. $qb->setParameter('min_id', $minId);
  164. $rows = $qb->executeQuery()->fetchAll();
  165. if (count($rows) > 0) {
  166. $minId = $rows[count($rows) - 1]['fileid'];
  167. yield array_map(function ($row) {
  168. return (int)$row['name'];
  169. }, $rows);
  170. } else {
  171. break;
  172. }
  173. }
  174. }
  175. private function findMissingSources(array $ids): array {
  176. $qb = $this->connection->getQueryBuilder();
  177. $qb->select('fileid')
  178. ->from('filecache')
  179. ->where($qb->expr()->in('fileid', $qb->createNamedParameter($ids, IQueryBuilder::PARAM_INT_ARRAY)));
  180. $found = $qb->executeQuery()->fetchAll(\PDO::FETCH_COLUMN);
  181. return array_diff($ids, $found);
  182. }
  183. }