Scanner.php 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556
  1. <?php
  2. /**
  3. * @copyright Copyright (c) 2016, ownCloud, Inc.
  4. *
  5. * @author Arthur Schiwon <blizzz@arthur-schiwon.de>
  6. * @author Björn Schießle <bjoern@schiessle.org>
  7. * @author Daniel Jagszent <daniel@jagszent.de>
  8. * @author Joas Schilling <coding@schilljs.com>
  9. * @author Jörn Friedrich Dreyer <jfd@butonic.de>
  10. * @author Lukas Reschke <lukas@statuscode.ch>
  11. * @author Martin Mattel <martin.mattel@diemattels.at>
  12. * @author Morris Jobke <hey@morrisjobke.de>
  13. * @author Owen Winkler <a_github@midnightcircus.com>
  14. * @author Robin Appelman <robin@icewind.nl>
  15. * @author Robin McCorkell <robin@mccorkell.me.uk>
  16. * @author Thomas Müller <thomas.mueller@tmit.eu>
  17. * @author Vincent Petry <pvince81@owncloud.com>
  18. *
  19. * @license AGPL-3.0
  20. *
  21. * This code is free software: you can redistribute it and/or modify
  22. * it under the terms of the GNU Affero General Public License, version 3,
  23. * as published by the Free Software Foundation.
  24. *
  25. * This program is distributed in the hope that it will be useful,
  26. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  27. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  28. * GNU Affero General Public License for more details.
  29. *
  30. * You should have received a copy of the GNU Affero General Public License, version 3,
  31. * along with this program. If not, see <http://www.gnu.org/licenses/>
  32. *
  33. */
  34. namespace OC\Files\Cache;
  35. use OC\Files\Filesystem;
  36. use OC\Hooks\BasicEmitter;
  37. use OCP\Files\Cache\IScanner;
  38. use OCP\Files\ForbiddenException;
  39. use OCP\ILogger;
  40. use OCP\Lock\ILockingProvider;
  41. /**
  42. * Class Scanner
  43. *
  44. * Hooks available in scope \OC\Files\Cache\Scanner:
  45. * - scanFile(string $path, string $storageId)
  46. * - scanFolder(string $path, string $storageId)
  47. * - postScanFile(string $path, string $storageId)
  48. * - postScanFolder(string $path, string $storageId)
  49. *
  50. * @package OC\Files\Cache
  51. */
  52. class Scanner extends BasicEmitter implements IScanner {
  53. /**
  54. * @var \OC\Files\Storage\Storage $storage
  55. */
  56. protected $storage;
  57. /**
  58. * @var string $storageId
  59. */
  60. protected $storageId;
  61. /**
  62. * @var \OC\Files\Cache\Cache $cache
  63. */
  64. protected $cache;
  65. /**
  66. * @var boolean $cacheActive If true, perform cache operations, if false, do not affect cache
  67. */
  68. protected $cacheActive;
  69. /**
  70. * @var bool $useTransactions whether to use transactions
  71. */
  72. protected $useTransactions = true;
  73. /**
  74. * @var \OCP\Lock\ILockingProvider
  75. */
  76. protected $lockingProvider;
  77. public function __construct(\OC\Files\Storage\Storage $storage) {
  78. $this->storage = $storage;
  79. $this->storageId = $this->storage->getId();
  80. $this->cache = $storage->getCache();
  81. $this->cacheActive = !\OC::$server->getConfig()->getSystemValue('filesystem_cache_readonly', false);
  82. $this->lockingProvider = \OC::$server->getLockingProvider();
  83. }
  84. /**
  85. * Whether to wrap the scanning of a folder in a database transaction
  86. * On default transactions are used
  87. *
  88. * @param bool $useTransactions
  89. */
  90. public function setUseTransactions($useTransactions) {
  91. $this->useTransactions = $useTransactions;
  92. }
  93. /**
  94. * get all the metadata of a file or folder
  95. * *
  96. *
  97. * @param string $path
  98. * @return array an array of metadata of the file
  99. */
  100. protected function getData($path) {
  101. $data = $this->storage->getMetaData($path);
  102. if (is_null($data)) {
  103. \OCP\Util::writeLog(Scanner::class, "!!! Path '$path' is not accessible or present !!!", ILogger::DEBUG);
  104. }
  105. return $data;
  106. }
  107. /**
  108. * scan a single file and store it in the cache
  109. *
  110. * @param string $file
  111. * @param int $reuseExisting
  112. * @param int $parentId
  113. * @param array | null $cacheData existing data in the cache for the file to be scanned
  114. * @param bool $lock set to false to disable getting an additional read lock during scanning
  115. * @return array an array of metadata of the scanned file
  116. * @throws \OC\ServerNotAvailableException
  117. * @throws \OCP\Lock\LockedException
  118. */
  119. public function scanFile($file, $reuseExisting = 0, $parentId = -1, $cacheData = null, $lock = true) {
  120. if ($file !== '') {
  121. try {
  122. $this->storage->verifyPath(dirname($file), basename($file));
  123. } catch (\Exception $e) {
  124. return null;
  125. }
  126. }
  127. // only proceed if $file is not a partial file nor a blacklisted file
  128. if (!self::isPartialFile($file) and !Filesystem::isFileBlacklisted($file)) {
  129. //acquire a lock
  130. if ($lock) {
  131. if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) {
  132. $this->storage->acquireLock($file, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
  133. }
  134. }
  135. try {
  136. $data = $this->getData($file);
  137. } catch (ForbiddenException $e) {
  138. if ($lock) {
  139. if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) {
  140. $this->storage->releaseLock($file, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
  141. }
  142. }
  143. return null;
  144. }
  145. try {
  146. if ($data) {
  147. // pre-emit only if it was a file. By that we avoid counting/treating folders as files
  148. if ($data['mimetype'] !== 'httpd/unix-directory') {
  149. $this->emit('\OC\Files\Cache\Scanner', 'scanFile', array($file, $this->storageId));
  150. \OC_Hook::emit('\OC\Files\Cache\Scanner', 'scan_file', array('path' => $file, 'storage' => $this->storageId));
  151. }
  152. $parent = dirname($file);
  153. if ($parent === '.' or $parent === '/') {
  154. $parent = '';
  155. }
  156. if ($parentId === -1) {
  157. $parentId = $this->cache->getParentId($file);
  158. }
  159. // scan the parent if it's not in the cache (id -1) and the current file is not the root folder
  160. if ($file and $parentId === -1) {
  161. $parentData = $this->scanFile($parent);
  162. if (!$parentData) {
  163. return null;
  164. }
  165. $parentId = $parentData['fileid'];
  166. }
  167. if ($parent) {
  168. $data['parent'] = $parentId;
  169. }
  170. if (is_null($cacheData)) {
  171. /** @var CacheEntry $cacheData */
  172. $cacheData = $this->cache->get($file);
  173. }
  174. if ($cacheData and $reuseExisting and isset($cacheData['fileid'])) {
  175. // prevent empty etag
  176. if (empty($cacheData['etag'])) {
  177. $etag = $data['etag'];
  178. } else {
  179. $etag = $cacheData['etag'];
  180. }
  181. $fileId = $cacheData['fileid'];
  182. $data['fileid'] = $fileId;
  183. // only reuse data if the file hasn't explicitly changed
  184. if (isset($data['storage_mtime']) && isset($cacheData['storage_mtime']) && $data['storage_mtime'] === $cacheData['storage_mtime']) {
  185. $data['mtime'] = $cacheData['mtime'];
  186. if (($reuseExisting & self::REUSE_SIZE) && ($data['size'] === -1)) {
  187. $data['size'] = $cacheData['size'];
  188. }
  189. if ($reuseExisting & self::REUSE_ETAG) {
  190. $data['etag'] = $etag;
  191. }
  192. }
  193. // Only update metadata that has changed
  194. $newData = array_diff_assoc($data, $cacheData->getData());
  195. } else {
  196. $newData = $data;
  197. $fileId = -1;
  198. }
  199. if (!empty($newData)) {
  200. // Reset the checksum if the data has changed
  201. $newData['checksum'] = '';
  202. $data['fileid'] = $this->addToCache($file, $newData, $fileId);
  203. }
  204. if (isset($cacheData['size'])) {
  205. $data['oldSize'] = $cacheData['size'];
  206. } else {
  207. $data['oldSize'] = 0;
  208. }
  209. if (isset($cacheData['encrypted'])) {
  210. $data['encrypted'] = $cacheData['encrypted'];
  211. }
  212. // post-emit only if it was a file. By that we avoid counting/treating folders as files
  213. if ($data['mimetype'] !== 'httpd/unix-directory') {
  214. $this->emit('\OC\Files\Cache\Scanner', 'postScanFile', array($file, $this->storageId));
  215. \OC_Hook::emit('\OC\Files\Cache\Scanner', 'post_scan_file', array('path' => $file, 'storage' => $this->storageId));
  216. }
  217. } else {
  218. $this->removeFromCache($file);
  219. }
  220. } catch (\Exception $e) {
  221. if ($lock) {
  222. if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) {
  223. $this->storage->releaseLock($file, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
  224. }
  225. }
  226. throw $e;
  227. }
  228. //release the acquired lock
  229. if ($lock) {
  230. if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) {
  231. $this->storage->releaseLock($file, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
  232. }
  233. }
  234. if ($data && !isset($data['encrypted'])) {
  235. $data['encrypted'] = false;
  236. }
  237. return $data;
  238. }
  239. return null;
  240. }
  241. protected function removeFromCache($path) {
  242. \OC_Hook::emit('Scanner', 'removeFromCache', array('file' => $path));
  243. $this->emit('\OC\Files\Cache\Scanner', 'removeFromCache', array($path));
  244. if ($this->cacheActive) {
  245. $this->cache->remove($path);
  246. }
  247. }
  248. /**
  249. * @param string $path
  250. * @param array $data
  251. * @param int $fileId
  252. * @return int the id of the added file
  253. */
  254. protected function addToCache($path, $data, $fileId = -1) {
  255. if (isset($data['scan_permissions'])) {
  256. $data['permissions'] = $data['scan_permissions'];
  257. }
  258. \OC_Hook::emit('Scanner', 'addToCache', array('file' => $path, 'data' => $data));
  259. $this->emit('\OC\Files\Cache\Scanner', 'addToCache', array($path, $this->storageId, $data));
  260. if ($this->cacheActive) {
  261. if ($fileId !== -1) {
  262. $this->cache->update($fileId, $data);
  263. return $fileId;
  264. } else {
  265. return $this->cache->put($path, $data);
  266. }
  267. } else {
  268. return -1;
  269. }
  270. }
  271. /**
  272. * @param string $path
  273. * @param array $data
  274. * @param int $fileId
  275. */
  276. protected function updateCache($path, $data, $fileId = -1) {
  277. \OC_Hook::emit('Scanner', 'addToCache', array('file' => $path, 'data' => $data));
  278. $this->emit('\OC\Files\Cache\Scanner', 'updateCache', array($path, $this->storageId, $data));
  279. if ($this->cacheActive) {
  280. if ($fileId !== -1) {
  281. $this->cache->update($fileId, $data);
  282. } else {
  283. $this->cache->put($path, $data);
  284. }
  285. }
  286. }
  287. /**
  288. * scan a folder and all it's children
  289. *
  290. * @param string $path
  291. * @param bool $recursive
  292. * @param int $reuse
  293. * @param bool $lock set to false to disable getting an additional read lock during scanning
  294. * @return array an array of the meta data of the scanned file or folder
  295. */
  296. public function scan($path, $recursive = self::SCAN_RECURSIVE, $reuse = -1, $lock = true) {
  297. if ($reuse === -1) {
  298. $reuse = ($recursive === self::SCAN_SHALLOW) ? self::REUSE_ETAG | self::REUSE_SIZE : self::REUSE_ETAG;
  299. }
  300. if ($lock) {
  301. if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) {
  302. $this->storage->acquireLock('scanner::' . $path, ILockingProvider::LOCK_EXCLUSIVE, $this->lockingProvider);
  303. $this->storage->acquireLock($path, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
  304. }
  305. }
  306. try {
  307. $data = $this->scanFile($path, $reuse, -1, null, $lock);
  308. if ($data and $data['mimetype'] === 'httpd/unix-directory') {
  309. $size = $this->scanChildren($path, $recursive, $reuse, $data['fileid'], $lock);
  310. $data['size'] = $size;
  311. }
  312. } finally {
  313. if ($lock) {
  314. if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) {
  315. $this->storage->releaseLock($path, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
  316. $this->storage->releaseLock('scanner::' . $path, ILockingProvider::LOCK_EXCLUSIVE, $this->lockingProvider);
  317. }
  318. }
  319. }
  320. return $data;
  321. }
  322. /**
  323. * Get the children currently in the cache
  324. *
  325. * @param int $folderId
  326. * @return array[]
  327. */
  328. protected function getExistingChildren($folderId) {
  329. $existingChildren = array();
  330. $children = $this->cache->getFolderContentsById($folderId);
  331. foreach ($children as $child) {
  332. $existingChildren[$child['name']] = $child;
  333. }
  334. return $existingChildren;
  335. }
  336. /**
  337. * Get the children from the storage
  338. *
  339. * @param string $folder
  340. * @return string[]
  341. */
  342. protected function getNewChildren($folder) {
  343. $children = array();
  344. if ($dh = $this->storage->opendir($folder)) {
  345. if (is_resource($dh)) {
  346. while (($file = readdir($dh)) !== false) {
  347. if (!Filesystem::isIgnoredDir($file)) {
  348. $children[] = trim(\OC\Files\Filesystem::normalizePath($file), '/');
  349. }
  350. }
  351. }
  352. }
  353. return $children;
  354. }
  355. /**
  356. * scan all the files and folders in a folder
  357. *
  358. * @param string $path
  359. * @param bool $recursive
  360. * @param int $reuse
  361. * @param int $folderId id for the folder to be scanned
  362. * @param bool $lock set to false to disable getting an additional read lock during scanning
  363. * @return int the size of the scanned folder or -1 if the size is unknown at this stage
  364. */
  365. protected function scanChildren($path, $recursive = self::SCAN_RECURSIVE, $reuse = -1, $folderId = null, $lock = true) {
  366. if ($reuse === -1) {
  367. $reuse = ($recursive === self::SCAN_SHALLOW) ? self::REUSE_ETAG | self::REUSE_SIZE : self::REUSE_ETAG;
  368. }
  369. $this->emit('\OC\Files\Cache\Scanner', 'scanFolder', array($path, $this->storageId));
  370. $size = 0;
  371. if (!is_null($folderId)) {
  372. $folderId = $this->cache->getId($path);
  373. }
  374. $childQueue = $this->handleChildren($path, $recursive, $reuse, $folderId, $lock, $size);
  375. foreach ($childQueue as $child => $childId) {
  376. $childSize = $this->scanChildren($child, $recursive, $reuse, $childId, $lock);
  377. if ($childSize === -1) {
  378. $size = -1;
  379. } else if ($size !== -1) {
  380. $size += $childSize;
  381. }
  382. }
  383. if ($this->cacheActive) {
  384. $this->cache->update($folderId, array('size' => $size));
  385. }
  386. $this->emit('\OC\Files\Cache\Scanner', 'postScanFolder', array($path, $this->storageId));
  387. return $size;
  388. }
  389. private function handleChildren($path, $recursive, $reuse, $folderId, $lock, &$size) {
  390. // we put this in it's own function so it cleans up the memory before we start recursing
  391. $existingChildren = $this->getExistingChildren($folderId);
  392. $newChildren = $this->getNewChildren($path);
  393. if ($this->useTransactions) {
  394. \OC::$server->getDatabaseConnection()->beginTransaction();
  395. }
  396. $exceptionOccurred = false;
  397. $childQueue = [];
  398. foreach ($newChildren as $file) {
  399. $child = $path ? $path . '/' . $file : $file;
  400. try {
  401. $existingData = isset($existingChildren[$file]) ? $existingChildren[$file] : null;
  402. $data = $this->scanFile($child, $reuse, $folderId, $existingData, $lock);
  403. if ($data) {
  404. if ($data['mimetype'] === 'httpd/unix-directory' and $recursive === self::SCAN_RECURSIVE) {
  405. $childQueue[$child] = $data['fileid'];
  406. } else if ($data['mimetype'] === 'httpd/unix-directory' and $recursive === self::SCAN_RECURSIVE_INCOMPLETE and $data['size'] === -1) {
  407. // only recurse into folders which aren't fully scanned
  408. $childQueue[$child] = $data['fileid'];
  409. } else if ($data['size'] === -1) {
  410. $size = -1;
  411. } else if ($size !== -1) {
  412. $size += $data['size'];
  413. }
  414. }
  415. } catch (\Doctrine\DBAL\DBALException $ex) {
  416. // might happen if inserting duplicate while a scanning
  417. // process is running in parallel
  418. // log and ignore
  419. if ($this->useTransactions) {
  420. \OC::$server->getDatabaseConnection()->rollback();
  421. \OC::$server->getDatabaseConnection()->beginTransaction();
  422. }
  423. \OC::$server->getLogger()->logException($ex, [
  424. 'message' => 'Exception while scanning file "' . $child . '"',
  425. 'level' => ILogger::DEBUG,
  426. 'app' => 'core',
  427. ]);
  428. $exceptionOccurred = true;
  429. } catch (\OCP\Lock\LockedException $e) {
  430. if ($this->useTransactions) {
  431. \OC::$server->getDatabaseConnection()->rollback();
  432. }
  433. throw $e;
  434. }
  435. }
  436. $removedChildren = \array_diff(array_keys($existingChildren), $newChildren);
  437. foreach ($removedChildren as $childName) {
  438. $child = $path ? $path . '/' . $childName : $childName;
  439. $this->removeFromCache($child);
  440. }
  441. if ($this->useTransactions) {
  442. \OC::$server->getDatabaseConnection()->commit();
  443. }
  444. if ($exceptionOccurred) {
  445. // It might happen that the parallel scan process has already
  446. // inserted mimetypes but those weren't available yet inside the transaction
  447. // To make sure to have the updated mime types in such cases,
  448. // we reload them here
  449. \OC::$server->getMimeTypeLoader()->reset();
  450. }
  451. return $childQueue;
  452. }
  453. /**
  454. * check if the file should be ignored when scanning
  455. * NOTE: files with a '.part' extension are ignored as well!
  456. * prevents unfinished put requests to be scanned
  457. *
  458. * @param string $file
  459. * @return boolean
  460. */
  461. public static function isPartialFile($file) {
  462. if (pathinfo($file, PATHINFO_EXTENSION) === 'part') {
  463. return true;
  464. }
  465. if (strpos($file, '.part/') !== false) {
  466. return true;
  467. }
  468. return false;
  469. }
  470. /**
  471. * walk over any folders that are not fully scanned yet and scan them
  472. */
  473. public function backgroundScan() {
  474. if (!$this->cache->inCache('')) {
  475. $this->runBackgroundScanJob(function () {
  476. $this->scan('', self::SCAN_RECURSIVE, self::REUSE_ETAG);
  477. }, '');
  478. } else {
  479. $lastPath = null;
  480. while (($path = $this->cache->getIncomplete()) !== false && $path !== $lastPath) {
  481. $this->runBackgroundScanJob(function () use ($path) {
  482. $this->scan($path, self::SCAN_RECURSIVE_INCOMPLETE, self::REUSE_ETAG | self::REUSE_SIZE);
  483. }, $path);
  484. // FIXME: this won't proceed with the next item, needs revamping of getIncomplete()
  485. // to make this possible
  486. $lastPath = $path;
  487. }
  488. }
  489. }
  490. private function runBackgroundScanJob(callable $callback, $path) {
  491. try {
  492. $callback();
  493. \OC_Hook::emit('Scanner', 'correctFolderSize', array('path' => $path));
  494. if ($this->cacheActive && $this->cache instanceof Cache) {
  495. $this->cache->correctFolderSize($path, null, true);
  496. }
  497. } catch (\OCP\Files\StorageInvalidException $e) {
  498. // skip unavailable storages
  499. } catch (\OCP\Files\StorageNotAvailableException $e) {
  500. // skip unavailable storages
  501. } catch (\OCP\Files\ForbiddenException $e) {
  502. // skip forbidden storages
  503. } catch (\OCP\Lock\LockedException $e) {
  504. // skip unavailable storages
  505. }
  506. }
  507. /**
  508. * Set whether the cache is affected by scan operations
  509. *
  510. * @param boolean $active The active state of the cache
  511. */
  512. public function setCacheActive($active) {
  513. $this->cacheActive = $active;
  514. }
  515. }