ScanFiles.php 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. <?php
  2. /**
  3. * SPDX-FileCopyrightText: 2019-2024 Nextcloud GmbH and Nextcloud contributors
  4. * SPDX-FileCopyrightText: 2016 ownCloud, Inc.
  5. * SPDX-License-Identifier: AGPL-3.0-only
  6. */
  7. namespace OCA\Files\BackgroundJob;
  8. use OC\Files\Utils\Scanner;
  9. use OCP\AppFramework\Utility\ITimeFactory;
  10. use OCP\BackgroundJob\TimedJob;
  11. use OCP\DB\QueryBuilder\IQueryBuilder;
  12. use OCP\EventDispatcher\IEventDispatcher;
  13. use OCP\IConfig;
  14. use OCP\IDBConnection;
  15. use Psr\Log\LoggerInterface;
  16. /**
  17. * Class ScanFiles is a background job used to run the file scanner over the user
  18. * accounts to ensure integrity of the file cache.
  19. *
  20. * @package OCA\Files\BackgroundJob
  21. */
  22. class ScanFiles extends TimedJob {
  23. private IConfig $config;
  24. private IEventDispatcher $dispatcher;
  25. private LoggerInterface $logger;
  26. private IDBConnection $connection;
  27. /** Amount of users that should get scanned per execution */
  28. public const USERS_PER_SESSION = 500;
  29. public function __construct(
  30. IConfig $config,
  31. IEventDispatcher $dispatcher,
  32. LoggerInterface $logger,
  33. IDBConnection $connection,
  34. ITimeFactory $time
  35. ) {
  36. parent::__construct($time);
  37. // Run once per 10 minutes
  38. $this->setInterval(60 * 10);
  39. $this->config = $config;
  40. $this->dispatcher = $dispatcher;
  41. $this->logger = $logger;
  42. $this->connection = $connection;
  43. }
  44. protected function runScanner(string $user): void {
  45. try {
  46. $scanner = new Scanner(
  47. $user,
  48. null,
  49. $this->dispatcher,
  50. $this->logger
  51. );
  52. $scanner->backgroundScan('');
  53. } catch (\Exception $e) {
  54. $this->logger->error($e->getMessage(), ['exception' => $e, 'app' => 'files']);
  55. }
  56. \OC_Util::tearDownFS();
  57. }
  58. /**
  59. * Find a storage which have unindexed files and return a user with access to the storage
  60. *
  61. * @return string|false
  62. */
  63. private function getUserToScan() {
  64. if ($this->connection->getShardDefinition('filecache')) {
  65. // for sharded filecache, the "LIMIT" from the normal query doesn't work
  66. // first we try it with a "LEFT JOIN" on mounts, this is fast, but might return a storage that isn't mounted.
  67. // we also ask for up to 10 results from different storages to increase the odds of finding a result that is mounted
  68. $query = $this->connection->getQueryBuilder();
  69. $query->select('m.user_id')
  70. ->from('filecache', 'f')
  71. ->leftJoin('f', 'mounts', 'm', $query->expr()->eq('m.storage_id', 'f.storage'))
  72. ->where($query->expr()->lt('f.size', $query->createNamedParameter(0, IQueryBuilder::PARAM_INT)))
  73. ->andWhere($query->expr()->gt('f.parent', $query->createNamedParameter(-1, IQueryBuilder::PARAM_INT)))
  74. ->setMaxResults(10)
  75. ->groupBy('f.storage')
  76. ->runAcrossAllShards();
  77. $result = $query->executeQuery();
  78. while ($res = $result->fetch()) {
  79. if ($res['user_id']) {
  80. return $res['user_id'];
  81. }
  82. }
  83. // as a fallback, we try a slower approach where we find all mounted storages first
  84. // this is essentially doing the inner join manually
  85. $storages = $this->getAllMountedStorages();
  86. $query = $this->connection->getQueryBuilder();
  87. $query->select('m.user_id')
  88. ->from('filecache', 'f')
  89. ->leftJoin('f', 'mounts', 'm', $query->expr()->eq('m.storage_id', 'f.storage'))
  90. ->where($query->expr()->lt('f.size', $query->createNamedParameter(0, IQueryBuilder::PARAM_INT)))
  91. ->andWhere($query->expr()->gt('f.parent', $query->createNamedParameter(-1, IQueryBuilder::PARAM_INT)))
  92. ->andWhere($query->expr()->in('f.storage', $query->createNamedParameter($storages, IQueryBuilder::PARAM_INT_ARRAY)))
  93. ->setMaxResults(1)
  94. ->runAcrossAllShards();
  95. return $query->executeQuery()->fetchOne();
  96. } else {
  97. $query = $this->connection->getQueryBuilder();
  98. $query->select('m.user_id')
  99. ->from('filecache', 'f')
  100. ->innerJoin('f', 'mounts', 'm', $query->expr()->eq('m.storage_id', 'f.storage'))
  101. ->where($query->expr()->lt('f.size', $query->createNamedParameter(0, IQueryBuilder::PARAM_INT)))
  102. ->andWhere($query->expr()->gt('f.parent', $query->createNamedParameter(-1, IQueryBuilder::PARAM_INT)))
  103. ->setMaxResults(1)
  104. ->runAcrossAllShards();
  105. return $query->executeQuery()->fetchOne();
  106. }
  107. }
  108. private function getAllMountedStorages(): array {
  109. $query = $this->connection->getQueryBuilder();
  110. $query->selectDistinct('storage_id')
  111. ->from('mounts');
  112. return $query->executeQuery()->fetchAll(\PDO::FETCH_COLUMN);
  113. }
  114. /**
  115. * @param $argument
  116. * @throws \Exception
  117. */
  118. protected function run($argument) {
  119. if ($this->config->getSystemValueBool('files_no_background_scan', false)) {
  120. return;
  121. }
  122. $usersScanned = 0;
  123. $lastUser = '';
  124. $user = $this->getUserToScan();
  125. while ($user && $usersScanned < self::USERS_PER_SESSION && $lastUser !== $user) {
  126. $this->runScanner($user);
  127. $lastUser = $user;
  128. $user = $this->getUserToScan();
  129. $usersScanned += 1;
  130. }
  131. if ($lastUser === $user) {
  132. $this->logger->warning("User $user still has unscanned files after running background scan, background scan might be stopped prematurely");
  133. }
  134. }
  135. }