Scanner.php 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574
  1. <?php
  2. /**
  3. * @copyright Copyright (c) 2016, ownCloud, Inc.
  4. *
  5. * @author Ari Selseng <ari@selseng.net>
  6. * @author Arthur Schiwon <blizzz@arthur-schiwon.de>
  7. * @author Björn Schießle <bjoern@schiessle.org>
  8. * @author Christoph Wurst <christoph@winzerhof-wurst.at>
  9. * @author Daniel Jagszent <daniel@jagszent.de>
  10. * @author Joas Schilling <coding@schilljs.com>
  11. * @author Jörn Friedrich Dreyer <jfd@butonic.de>
  12. * @author Lukas Reschke <lukas@statuscode.ch>
  13. * @author Martin Mattel <martin.mattel@diemattels.at>
  14. * @author Morris Jobke <hey@morrisjobke.de>
  15. * @author Owen Winkler <a_github@midnightcircus.com>
  16. * @author Robin Appelman <robin@icewind.nl>
  17. * @author Robin McCorkell <robin@mccorkell.me.uk>
  18. * @author Thomas Müller <thomas.mueller@tmit.eu>
  19. * @author Vincent Petry <vincent@nextcloud.com>
  20. *
  21. * @license AGPL-3.0
  22. *
  23. * This code is free software: you can redistribute it and/or modify
  24. * it under the terms of the GNU Affero General Public License, version 3,
  25. * as published by the Free Software Foundation.
  26. *
  27. * This program is distributed in the hope that it will be useful,
  28. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  29. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  30. * GNU Affero General Public License for more details.
  31. *
  32. * You should have received a copy of the GNU Affero General Public License, version 3,
  33. * along with this program. If not, see <http://www.gnu.org/licenses/>
  34. *
  35. */
  36. namespace OC\Files\Cache;
  37. use Doctrine\DBAL\Exception;
  38. use OCP\Files\Cache\IScanner;
  39. use OCP\Files\ForbiddenException;
  40. use OCP\Files\Storage\IReliableEtagStorage;
  41. use OCP\Lock\ILockingProvider;
  42. use OC\Files\Storage\Wrapper\Encoding;
  43. use OC\Files\Storage\Wrapper\Jail;
  44. use OC\Hooks\BasicEmitter;
  45. use Psr\Log\LoggerInterface;
  46. /**
  47. * Class Scanner
  48. *
  49. * Hooks available in scope \OC\Files\Cache\Scanner:
  50. * - scanFile(string $path, string $storageId)
  51. * - scanFolder(string $path, string $storageId)
  52. * - postScanFile(string $path, string $storageId)
  53. * - postScanFolder(string $path, string $storageId)
  54. *
  55. * @package OC\Files\Cache
  56. */
  57. class Scanner extends BasicEmitter implements IScanner {
  58. /**
  59. * @var \OC\Files\Storage\Storage $storage
  60. */
  61. protected $storage;
  62. /**
  63. * @var string $storageId
  64. */
  65. protected $storageId;
  66. /**
  67. * @var \OC\Files\Cache\Cache $cache
  68. */
  69. protected $cache;
  70. /**
  71. * @var boolean $cacheActive If true, perform cache operations, if false, do not affect cache
  72. */
  73. protected $cacheActive;
  74. /**
  75. * @var bool $useTransactions whether to use transactions
  76. */
  77. protected $useTransactions = true;
  78. /**
  79. * @var \OCP\Lock\ILockingProvider
  80. */
  81. protected $lockingProvider;
  82. public function __construct(\OC\Files\Storage\Storage $storage) {
  83. $this->storage = $storage;
  84. $this->storageId = $this->storage->getId();
  85. $this->cache = $storage->getCache();
  86. $this->cacheActive = !\OC::$server->getConfig()->getSystemValue('filesystem_cache_readonly', false);
  87. $this->lockingProvider = \OC::$server->getLockingProvider();
  88. }
  89. /**
  90. * Whether to wrap the scanning of a folder in a database transaction
  91. * On default transactions are used
  92. *
  93. * @param bool $useTransactions
  94. */
  95. public function setUseTransactions($useTransactions) {
  96. $this->useTransactions = $useTransactions;
  97. }
  98. /**
  99. * get all the metadata of a file or folder
  100. * *
  101. *
  102. * @param string $path
  103. * @return array|null an array of metadata of the file
  104. */
  105. protected function getData($path) {
  106. $data = $this->storage->getMetaData($path);
  107. if (is_null($data)) {
  108. \OC::$server->get(LoggerInterface::class)->debug("!!! Path '$path' is not accessible or present !!!", ['app' => 'core']);
  109. }
  110. return $data;
  111. }
  112. /**
  113. * scan a single file and store it in the cache
  114. *
  115. * @param string $file
  116. * @param int $reuseExisting
  117. * @param int $parentId
  118. * @param array|null|false $cacheData existing data in the cache for the file to be scanned
  119. * @param bool $lock set to false to disable getting an additional read lock during scanning
  120. * @param null $data the metadata for the file, as returned by the storage
  121. * @return array|null an array of metadata of the scanned file
  122. * @throws \OCP\Lock\LockedException
  123. */
  124. public function scanFile($file, $reuseExisting = 0, $parentId = -1, $cacheData = null, $lock = true, $data = null) {
  125. if ($file !== '') {
  126. try {
  127. $this->storage->verifyPath(dirname($file), basename($file));
  128. } catch (\Exception $e) {
  129. return null;
  130. }
  131. }
  132. // only proceed if $file is not a partial file, blacklist is handled by the storage
  133. if (!self::isPartialFile($file)) {
  134. //acquire a lock
  135. if ($lock) {
  136. if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) {
  137. $this->storage->acquireLock($file, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
  138. }
  139. }
  140. try {
  141. $data = $data ?? $this->getData($file);
  142. } catch (ForbiddenException $e) {
  143. if ($lock) {
  144. if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) {
  145. $this->storage->releaseLock($file, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
  146. }
  147. }
  148. return null;
  149. }
  150. try {
  151. if ($data) {
  152. // pre-emit only if it was a file. By that we avoid counting/treating folders as files
  153. if ($data['mimetype'] !== 'httpd/unix-directory') {
  154. $this->emit('\OC\Files\Cache\Scanner', 'scanFile', [$file, $this->storageId]);
  155. \OC_Hook::emit('\OC\Files\Cache\Scanner', 'scan_file', ['path' => $file, 'storage' => $this->storageId]);
  156. }
  157. $parent = dirname($file);
  158. if ($parent === '.' or $parent === '/') {
  159. $parent = '';
  160. }
  161. if ($parentId === -1) {
  162. $parentId = $this->cache->getParentId($file);
  163. }
  164. // scan the parent if it's not in the cache (id -1) and the current file is not the root folder
  165. if ($file and $parentId === -1) {
  166. $parentData = $this->scanFile($parent);
  167. if (!$parentData) {
  168. return null;
  169. }
  170. $parentId = $parentData['fileid'];
  171. }
  172. if ($parent) {
  173. $data['parent'] = $parentId;
  174. }
  175. if (is_null($cacheData)) {
  176. /** @var CacheEntry $cacheData */
  177. $cacheData = $this->cache->get($file);
  178. }
  179. if ($cacheData and $reuseExisting and isset($cacheData['fileid'])) {
  180. // prevent empty etag
  181. if (empty($cacheData['etag'])) {
  182. $etag = $data['etag'];
  183. } else {
  184. $etag = $cacheData['etag'];
  185. }
  186. $fileId = $cacheData['fileid'];
  187. $data['fileid'] = $fileId;
  188. // only reuse data if the file hasn't explicitly changed
  189. if (isset($data['storage_mtime']) && isset($cacheData['storage_mtime']) && $data['storage_mtime'] === $cacheData['storage_mtime']) {
  190. $data['mtime'] = $cacheData['mtime'];
  191. if (($reuseExisting & self::REUSE_SIZE) && ($data['size'] === -1)) {
  192. $data['size'] = $cacheData['size'];
  193. }
  194. if ($reuseExisting & self::REUSE_ETAG && !$this->storage->instanceOfStorage(IReliableEtagStorage::class)) {
  195. $data['etag'] = $etag;
  196. }
  197. }
  198. // Only update metadata that has changed
  199. $newData = array_diff_assoc($data, $cacheData->getData());
  200. } else {
  201. $newData = $data;
  202. $fileId = -1;
  203. }
  204. if (!empty($newData)) {
  205. // Reset the checksum if the data has changed
  206. $newData['checksum'] = '';
  207. $newData['parent'] = $parentId;
  208. $data['fileid'] = $this->addToCache($file, $newData, $fileId);
  209. }
  210. if ($cacheData && isset($cacheData['size'])) {
  211. $data['oldSize'] = $cacheData['size'];
  212. } else {
  213. $data['oldSize'] = 0;
  214. }
  215. if ($cacheData && isset($cacheData['encrypted'])) {
  216. $data['encrypted'] = $cacheData['encrypted'];
  217. }
  218. // post-emit only if it was a file. By that we avoid counting/treating folders as files
  219. if ($data['mimetype'] !== 'httpd/unix-directory') {
  220. $this->emit('\OC\Files\Cache\Scanner', 'postScanFile', [$file, $this->storageId]);
  221. \OC_Hook::emit('\OC\Files\Cache\Scanner', 'post_scan_file', ['path' => $file, 'storage' => $this->storageId]);
  222. }
  223. } else {
  224. $this->removeFromCache($file);
  225. }
  226. } catch (\Exception $e) {
  227. if ($lock) {
  228. if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) {
  229. $this->storage->releaseLock($file, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
  230. }
  231. }
  232. throw $e;
  233. }
  234. //release the acquired lock
  235. if ($lock) {
  236. if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) {
  237. $this->storage->releaseLock($file, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
  238. }
  239. }
  240. if ($data && !isset($data['encrypted'])) {
  241. $data['encrypted'] = false;
  242. }
  243. return $data;
  244. }
  245. return null;
  246. }
  247. protected function removeFromCache($path) {
  248. \OC_Hook::emit('Scanner', 'removeFromCache', ['file' => $path]);
  249. $this->emit('\OC\Files\Cache\Scanner', 'removeFromCache', [$path]);
  250. if ($this->cacheActive) {
  251. $this->cache->remove($path);
  252. }
  253. }
  254. /**
  255. * @param string $path
  256. * @param array $data
  257. * @param int $fileId
  258. * @return int the id of the added file
  259. */
  260. protected function addToCache($path, $data, $fileId = -1) {
  261. if (isset($data['scan_permissions'])) {
  262. $data['permissions'] = $data['scan_permissions'];
  263. }
  264. \OC_Hook::emit('Scanner', 'addToCache', ['file' => $path, 'data' => $data]);
  265. $this->emit('\OC\Files\Cache\Scanner', 'addToCache', [$path, $this->storageId, $data]);
  266. if ($this->cacheActive) {
  267. if ($fileId !== -1) {
  268. $this->cache->update($fileId, $data);
  269. return $fileId;
  270. } else {
  271. return $this->cache->insert($path, $data);
  272. }
  273. } else {
  274. return -1;
  275. }
  276. }
  277. /**
  278. * @param string $path
  279. * @param array $data
  280. * @param int $fileId
  281. */
  282. protected function updateCache($path, $data, $fileId = -1) {
  283. \OC_Hook::emit('Scanner', 'addToCache', ['file' => $path, 'data' => $data]);
  284. $this->emit('\OC\Files\Cache\Scanner', 'updateCache', [$path, $this->storageId, $data]);
  285. if ($this->cacheActive) {
  286. if ($fileId !== -1) {
  287. $this->cache->update($fileId, $data);
  288. } else {
  289. $this->cache->put($path, $data);
  290. }
  291. }
  292. }
  293. /**
  294. * scan a folder and all it's children
  295. *
  296. * @param string $path
  297. * @param bool $recursive
  298. * @param int $reuse
  299. * @param bool $lock set to false to disable getting an additional read lock during scanning
  300. * @return array|null an array of the meta data of the scanned file or folder
  301. */
  302. public function scan($path, $recursive = self::SCAN_RECURSIVE, $reuse = -1, $lock = true) {
  303. if ($reuse === -1) {
  304. $reuse = ($recursive === self::SCAN_SHALLOW) ? self::REUSE_ETAG | self::REUSE_SIZE : self::REUSE_ETAG;
  305. }
  306. if ($lock) {
  307. if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) {
  308. $this->storage->acquireLock('scanner::' . $path, ILockingProvider::LOCK_EXCLUSIVE, $this->lockingProvider);
  309. $this->storage->acquireLock($path, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
  310. }
  311. }
  312. try {
  313. $data = $this->scanFile($path, $reuse, -1, null, $lock);
  314. if ($data and $data['mimetype'] === 'httpd/unix-directory') {
  315. $size = $this->scanChildren($path, $recursive, $reuse, $data['fileid'], $lock, $data);
  316. $data['size'] = $size;
  317. }
  318. } finally {
  319. if ($lock) {
  320. if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) {
  321. $this->storage->releaseLock($path, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
  322. $this->storage->releaseLock('scanner::' . $path, ILockingProvider::LOCK_EXCLUSIVE, $this->lockingProvider);
  323. }
  324. }
  325. }
  326. return $data;
  327. }
  328. /**
  329. * Get the children currently in the cache
  330. *
  331. * @param int $folderId
  332. * @return array[]
  333. */
  334. protected function getExistingChildren($folderId) {
  335. $existingChildren = [];
  336. $children = $this->cache->getFolderContentsById($folderId);
  337. foreach ($children as $child) {
  338. $existingChildren[$child['name']] = $child;
  339. }
  340. return $existingChildren;
  341. }
  342. /**
  343. * scan all the files and folders in a folder
  344. *
  345. * @param string $path
  346. * @param bool $recursive
  347. * @param int $reuse
  348. * @param int $folderId id for the folder to be scanned
  349. * @param bool $lock set to false to disable getting an additional read lock during scanning
  350. * @param array $data the data of the folder before (re)scanning the children
  351. * @return int the size of the scanned folder or -1 if the size is unknown at this stage
  352. */
  353. protected function scanChildren($path, $recursive = self::SCAN_RECURSIVE, $reuse = -1, $folderId = null, $lock = true, array $data = []) {
  354. if ($reuse === -1) {
  355. $reuse = ($recursive === self::SCAN_SHALLOW) ? self::REUSE_ETAG | self::REUSE_SIZE : self::REUSE_ETAG;
  356. }
  357. $this->emit('\OC\Files\Cache\Scanner', 'scanFolder', [$path, $this->storageId]);
  358. $size = 0;
  359. if (!is_null($folderId)) {
  360. $folderId = $this->cache->getId($path);
  361. }
  362. $childQueue = $this->handleChildren($path, $recursive, $reuse, $folderId, $lock, $size);
  363. foreach ($childQueue as $child => $childId) {
  364. $childSize = $this->scanChildren($child, $recursive, $reuse, $childId, $lock);
  365. if ($childSize === -1) {
  366. $size = -1;
  367. } elseif ($size !== -1) {
  368. $size += $childSize;
  369. }
  370. }
  371. $oldSize = $data['size'] ?? null;
  372. if ($this->cacheActive && $oldSize !== $size) {
  373. $this->cache->update($folderId, ['size' => $size]);
  374. }
  375. $this->emit('\OC\Files\Cache\Scanner', 'postScanFolder', [$path, $this->storageId]);
  376. return $size;
  377. }
  378. private function handleChildren($path, $recursive, $reuse, $folderId, $lock, &$size) {
  379. // we put this in it's own function so it cleans up the memory before we start recursing
  380. $existingChildren = $this->getExistingChildren($folderId);
  381. $newChildren = iterator_to_array($this->storage->getDirectoryContent($path));
  382. if (count($existingChildren) === 0 && count($newChildren) === 0) {
  383. // no need to do a transaction
  384. return [];
  385. }
  386. if ($this->useTransactions) {
  387. \OC::$server->getDatabaseConnection()->beginTransaction();
  388. }
  389. $exceptionOccurred = false;
  390. $childQueue = [];
  391. $newChildNames = [];
  392. foreach ($newChildren as $fileMeta) {
  393. $permissions = isset($fileMeta['scan_permissions']) ? $fileMeta['scan_permissions'] : $fileMeta['permissions'];
  394. if ($permissions === 0) {
  395. continue;
  396. }
  397. $originalFile = $fileMeta['name'];
  398. $file = trim(\OC\Files\Filesystem::normalizePath($originalFile), '/');
  399. if (trim($originalFile, '/') !== $file) {
  400. // encoding mismatch, might require compatibility wrapper
  401. \OC::$server->get(LoggerInterface::class)->debug('Scanner: Skipping non-normalized file name "'. $originalFile . '" in path "' . $path . '".', ['app' => 'core']);
  402. $this->emit('\OC\Files\Cache\Scanner', 'normalizedNameMismatch', [$path ? $path . '/' . $originalFile : $originalFile]);
  403. // skip this entry
  404. continue;
  405. }
  406. $newChildNames[] = $file;
  407. $child = $path ? $path . '/' . $file : $file;
  408. try {
  409. $existingData = isset($existingChildren[$file]) ? $existingChildren[$file] : false;
  410. $data = $this->scanFile($child, $reuse, $folderId, $existingData, $lock, $fileMeta);
  411. if ($data) {
  412. if ($data['mimetype'] === 'httpd/unix-directory' and $recursive === self::SCAN_RECURSIVE) {
  413. $childQueue[$child] = $data['fileid'];
  414. } elseif ($data['mimetype'] === 'httpd/unix-directory' and $recursive === self::SCAN_RECURSIVE_INCOMPLETE and $data['size'] === -1) {
  415. // only recurse into folders which aren't fully scanned
  416. $childQueue[$child] = $data['fileid'];
  417. } elseif ($data['size'] === -1) {
  418. $size = -1;
  419. } elseif ($size !== -1) {
  420. $size += $data['size'];
  421. }
  422. }
  423. } catch (Exception $ex) {
  424. // might happen if inserting duplicate while a scanning
  425. // process is running in parallel
  426. // log and ignore
  427. if ($this->useTransactions) {
  428. \OC::$server->getDatabaseConnection()->rollback();
  429. \OC::$server->getDatabaseConnection()->beginTransaction();
  430. }
  431. \OC::$server->get(LoggerInterface::class)->debug('Exception while scanning file "' . $child . '"', [
  432. 'app' => 'core',
  433. 'exception' => $ex,
  434. ]);
  435. $exceptionOccurred = true;
  436. } catch (\OCP\Lock\LockedException $e) {
  437. if ($this->useTransactions) {
  438. \OC::$server->getDatabaseConnection()->rollback();
  439. }
  440. throw $e;
  441. }
  442. }
  443. $removedChildren = \array_diff(array_keys($existingChildren), $newChildNames);
  444. foreach ($removedChildren as $childName) {
  445. $child = $path ? $path . '/' . $childName : $childName;
  446. $this->removeFromCache($child);
  447. }
  448. if ($this->useTransactions) {
  449. \OC::$server->getDatabaseConnection()->commit();
  450. }
  451. if ($exceptionOccurred) {
  452. // It might happen that the parallel scan process has already
  453. // inserted mimetypes but those weren't available yet inside the transaction
  454. // To make sure to have the updated mime types in such cases,
  455. // we reload them here
  456. \OC::$server->getMimeTypeLoader()->reset();
  457. }
  458. return $childQueue;
  459. }
  460. /**
  461. * check if the file should be ignored when scanning
  462. * NOTE: files with a '.part' extension are ignored as well!
  463. * prevents unfinished put requests to be scanned
  464. *
  465. * @param string $file
  466. * @return boolean
  467. */
  468. public static function isPartialFile($file) {
  469. if (pathinfo($file, PATHINFO_EXTENSION) === 'part') {
  470. return true;
  471. }
  472. if (strpos($file, '.part/') !== false) {
  473. return true;
  474. }
  475. return false;
  476. }
  477. /**
  478. * walk over any folders that are not fully scanned yet and scan them
  479. */
  480. public function backgroundScan() {
  481. if ($this->storage->instanceOfStorage(Jail::class)) {
  482. // for jail storage wrappers (shares, groupfolders) we run the background scan on the source storage
  483. // this is mainly done because the jail wrapper doesn't implement `getIncomplete` (because it would be inefficient).
  484. //
  485. // Running the scan on the source storage might scan more than "needed", but the unscanned files outside the jail will
  486. // have to be scanned at some point anyway.
  487. $unJailedScanner = $this->storage->getUnjailedStorage()->getScanner();
  488. $unJailedScanner->backgroundScan();
  489. } else {
  490. if (!$this->cache->inCache('')) {
  491. // if the storage isn't in the cache yet, just scan the root completely
  492. $this->runBackgroundScanJob(function () {
  493. $this->scan('', self::SCAN_RECURSIVE, self::REUSE_ETAG);
  494. }, '');
  495. } else {
  496. $lastPath = null;
  497. // find any path marked as unscanned and run the scanner until no more paths are unscanned (or we get stuck)
  498. while (($path = $this->cache->getIncomplete()) !== false && $path !== $lastPath) {
  499. $this->runBackgroundScanJob(function () use ($path) {
  500. $this->scan($path, self::SCAN_RECURSIVE_INCOMPLETE, self::REUSE_ETAG | self::REUSE_SIZE);
  501. }, $path);
  502. // FIXME: this won't proceed with the next item, needs revamping of getIncomplete()
  503. // to make this possible
  504. $lastPath = $path;
  505. }
  506. }
  507. }
  508. }
  509. private function runBackgroundScanJob(callable $callback, $path) {
  510. try {
  511. $callback();
  512. \OC_Hook::emit('Scanner', 'correctFolderSize', ['path' => $path]);
  513. if ($this->cacheActive && $this->cache instanceof Cache) {
  514. $this->cache->correctFolderSize($path, null, true);
  515. }
  516. } catch (\OCP\Files\StorageInvalidException $e) {
  517. // skip unavailable storages
  518. } catch (\OCP\Files\StorageNotAvailableException $e) {
  519. // skip unavailable storages
  520. } catch (\OCP\Files\ForbiddenException $e) {
  521. // skip forbidden storages
  522. } catch (\OCP\Lock\LockedException $e) {
  523. // skip unavailable storages
  524. }
  525. }
  526. /**
  527. * Set whether the cache is affected by scan operations
  528. *
  529. * @param boolean $active The active state of the cache
  530. */
  531. public function setCacheActive($active) {
  532. $this->cacheActive = $active;
  533. }
  534. }