ScanFiles.php 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
  1. <?php
  2. /**
  3. * SPDX-FileCopyrightText: 2019-2024 Nextcloud GmbH and Nextcloud contributors
  4. * SPDX-FileCopyrightText: 2016 ownCloud, Inc.
  5. * SPDX-License-Identifier: AGPL-3.0-only
  6. */
  7. namespace OCA\Files\BackgroundJob;
  8. use OC\Files\Utils\Scanner;
  9. use OCP\AppFramework\Utility\ITimeFactory;
  10. use OCP\BackgroundJob\TimedJob;
  11. use OCP\DB\QueryBuilder\IQueryBuilder;
  12. use OCP\EventDispatcher\IEventDispatcher;
  13. use OCP\IConfig;
  14. use OCP\IDBConnection;
  15. use Psr\Log\LoggerInterface;
  16. /**
  17. * Class ScanFiles is a background job used to run the file scanner over the user
  18. * accounts to ensure integrity of the file cache.
  19. *
  20. * @package OCA\Files\BackgroundJob
  21. */
  22. class ScanFiles extends TimedJob {
  23. /** Amount of users that should get scanned per execution */
  24. public const USERS_PER_SESSION = 500;
  25. public function __construct(
  26. private IConfig $config,
  27. private IEventDispatcher $dispatcher,
  28. private LoggerInterface $logger,
  29. private IDBConnection $connection,
  30. ITimeFactory $time,
  31. ) {
  32. parent::__construct($time);
  33. // Run once per 10 minutes
  34. $this->setInterval(60 * 10);
  35. }
  36. protected function runScanner(string $user): void {
  37. try {
  38. $scanner = new Scanner(
  39. $user,
  40. null,
  41. $this->dispatcher,
  42. $this->logger
  43. );
  44. $scanner->backgroundScan('');
  45. } catch (\Exception $e) {
  46. $this->logger->error($e->getMessage(), ['exception' => $e, 'app' => 'files']);
  47. }
  48. \OC_Util::tearDownFS();
  49. }
  50. /**
  51. * Find a storage which have unindexed files and return a user with access to the storage
  52. *
  53. * @return string|false
  54. */
  55. private function getUserToScan() {
  56. if ($this->connection->getShardDefinition('filecache')) {
  57. // for sharded filecache, the "LIMIT" from the normal query doesn't work
  58. // first we try it with a "LEFT JOIN" on mounts, this is fast, but might return a storage that isn't mounted.
  59. // we also ask for up to 10 results from different storages to increase the odds of finding a result that is mounted
  60. $query = $this->connection->getQueryBuilder();
  61. $query->select('m.user_id')
  62. ->from('filecache', 'f')
  63. ->leftJoin('f', 'mounts', 'm', $query->expr()->eq('m.storage_id', 'f.storage'))
  64. ->where($query->expr()->lt('f.size', $query->createNamedParameter(0, IQueryBuilder::PARAM_INT)))
  65. ->andWhere($query->expr()->gt('f.parent', $query->createNamedParameter(-1, IQueryBuilder::PARAM_INT)))
  66. ->setMaxResults(10)
  67. ->groupBy('f.storage')
  68. ->runAcrossAllShards();
  69. $result = $query->executeQuery();
  70. while ($res = $result->fetch()) {
  71. if ($res['user_id']) {
  72. return $res['user_id'];
  73. }
  74. }
  75. // as a fallback, we try a slower approach where we find all mounted storages first
  76. // this is essentially doing the inner join manually
  77. $storages = $this->getAllMountedStorages();
  78. $query = $this->connection->getQueryBuilder();
  79. $query->select('m.user_id')
  80. ->from('filecache', 'f')
  81. ->leftJoin('f', 'mounts', 'm', $query->expr()->eq('m.storage_id', 'f.storage'))
  82. ->where($query->expr()->lt('f.size', $query->createNamedParameter(0, IQueryBuilder::PARAM_INT)))
  83. ->andWhere($query->expr()->gt('f.parent', $query->createNamedParameter(-1, IQueryBuilder::PARAM_INT)))
  84. ->andWhere($query->expr()->in('f.storage', $query->createNamedParameter($storages, IQueryBuilder::PARAM_INT_ARRAY)))
  85. ->setMaxResults(1)
  86. ->runAcrossAllShards();
  87. return $query->executeQuery()->fetchOne();
  88. } else {
  89. $query = $this->connection->getQueryBuilder();
  90. $query->select('m.user_id')
  91. ->from('filecache', 'f')
  92. ->innerJoin('f', 'mounts', 'm', $query->expr()->eq('m.storage_id', 'f.storage'))
  93. ->where($query->expr()->lt('f.size', $query->createNamedParameter(0, IQueryBuilder::PARAM_INT)))
  94. ->andWhere($query->expr()->gt('f.parent', $query->createNamedParameter(-1, IQueryBuilder::PARAM_INT)))
  95. ->setMaxResults(1)
  96. ->runAcrossAllShards();
  97. return $query->executeQuery()->fetchOne();
  98. }
  99. }
  100. private function getAllMountedStorages(): array {
  101. $query = $this->connection->getQueryBuilder();
  102. $query->selectDistinct('storage_id')
  103. ->from('mounts');
  104. return $query->executeQuery()->fetchAll(\PDO::FETCH_COLUMN);
  105. }
  106. /**
  107. * @param $argument
  108. * @throws \Exception
  109. */
  110. protected function run($argument) {
  111. if ($this->config->getSystemValueBool('files_no_background_scan', false)) {
  112. return;
  113. }
  114. $usersScanned = 0;
  115. $lastUser = '';
  116. $user = $this->getUserToScan();
  117. while ($user && $usersScanned < self::USERS_PER_SESSION && $lastUser !== $user) {
  118. $this->runScanner($user);
  119. $lastUser = $user;
  120. $user = $this->getUserToScan();
  121. $usersScanned += 1;
  122. }
  123. if ($lastUser === $user) {
  124. $this->logger->warning("User $user still has unscanned files after running background scan, background scan might be stopped prematurely");
  125. }
  126. }
  127. }