scanner.php 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526
  1. <?php
  2. /**
  3. * @copyright Copyright (c) 2016, ownCloud, Inc.
  4. *
  5. * @author Arthur Schiwon <blizzz@arthur-schiwon.de>
  6. * @author Björn Schießle <bjoern@schiessle.org>
  7. * @author Daniel Jagszent <daniel@jagszent.de>
  8. * @author Jörn Friedrich Dreyer <jfd@butonic.de>
  9. * @author Lukas Reschke <lukas@statuscode.ch>
  10. * @author Martin Mattel <martin.mattel@diemattels.at>
  11. * @author Michael Gapczynski <GapczynskiM@gmail.com>
  12. * @author Morris Jobke <hey@morrisjobke.de>
  13. * @author Owen Winkler <a_github@midnightcircus.com>
  14. * @author Robin Appelman <robin@icewind.nl>
  15. * @author Robin McCorkell <robin@mccorkell.me.uk>
  16. * @author Roeland Jago Douma <roeland@famdouma.nl>
  17. * @author Thomas Müller <thomas.mueller@tmit.eu>
  18. * @author Vincent Petry <pvince81@owncloud.com>
  19. *
  20. * @license AGPL-3.0
  21. *
  22. * This code is free software: you can redistribute it and/or modify
  23. * it under the terms of the GNU Affero General Public License, version 3,
  24. * as published by the Free Software Foundation.
  25. *
  26. * This program is distributed in the hope that it will be useful,
  27. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  28. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  29. * GNU Affero General Public License for more details.
  30. *
  31. * You should have received a copy of the GNU Affero General Public License, version 3,
  32. * along with this program. If not, see <http://www.gnu.org/licenses/>
  33. *
  34. */
  35. namespace OC\Files\Cache;
  36. use OC\Files\Filesystem;
  37. use OC\Hooks\BasicEmitter;
  38. use OCP\Config;
  39. use OCP\Files\Cache\IScanner;
  40. use OCP\Files\Storage\ILockingStorage;
  41. use OCP\Lock\ILockingProvider;
  42. /**
  43. * Class Scanner
  44. *
  45. * Hooks available in scope \OC\Files\Cache\Scanner:
  46. * - scanFile(string $path, string $storageId)
  47. * - scanFolder(string $path, string $storageId)
  48. * - postScanFile(string $path, string $storageId)
  49. * - postScanFolder(string $path, string $storageId)
  50. *
  51. * @package OC\Files\Cache
  52. */
  53. class Scanner extends BasicEmitter implements IScanner {
  54. /**
  55. * @var \OC\Files\Storage\Storage $storage
  56. */
  57. protected $storage;
  58. /**
  59. * @var string $storageId
  60. */
  61. protected $storageId;
  62. /**
  63. * @var \OC\Files\Cache\Cache $cache
  64. */
  65. protected $cache;
  66. /**
  67. * @var boolean $cacheActive If true, perform cache operations, if false, do not affect cache
  68. */
  69. protected $cacheActive;
  70. /**
  71. * @var bool $useTransactions whether to use transactions
  72. */
  73. protected $useTransactions = true;
  74. /**
  75. * @var \OCP\Lock\ILockingProvider
  76. */
  77. protected $lockingProvider;
  78. public function __construct(\OC\Files\Storage\Storage $storage) {
  79. $this->storage = $storage;
  80. $this->storageId = $this->storage->getId();
  81. $this->cache = $storage->getCache();
  82. $this->cacheActive = !Config::getSystemValue('filesystem_cache_readonly', false);
  83. $this->lockingProvider = \OC::$server->getLockingProvider();
  84. }
  85. /**
  86. * Whether to wrap the scanning of a folder in a database transaction
  87. * On default transactions are used
  88. *
  89. * @param bool $useTransactions
  90. */
  91. public function setUseTransactions($useTransactions) {
  92. $this->useTransactions = $useTransactions;
  93. }
  94. /**
  95. * get all the metadata of a file or folder
  96. * *
  97. *
  98. * @param string $path
  99. * @return array an array of metadata of the file
  100. */
  101. protected function getData($path) {
  102. $data = $this->storage->getMetaData($path);
  103. if (is_null($data)) {
  104. \OCP\Util::writeLog('OC\Files\Cache\Scanner', "!!! Path '$path' is not accessible or present !!!", \OCP\Util::DEBUG);
  105. }
  106. return $data;
  107. }
  108. /**
  109. * scan a single file and store it in the cache
  110. *
  111. * @param string $file
  112. * @param int $reuseExisting
  113. * @param int $parentId
  114. * @param array | null $cacheData existing data in the cache for the file to be scanned
  115. * @param bool $lock set to false to disable getting an additional read lock during scanning
  116. * @return array an array of metadata of the scanned file
  117. * @throws \OC\ServerNotAvailableException
  118. * @throws \OCP\Lock\LockedException
  119. */
  120. public function scanFile($file, $reuseExisting = 0, $parentId = -1, $cacheData = null, $lock = true) {
  121. // verify database - e.g. mysql only 3-byte chars
  122. if (preg_match('%(?:
  123. \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
  124. | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
  125. | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
  126. )%xs', $file)) {
  127. // 4-byte characters are not supported in file names
  128. return null;
  129. }
  130. try {
  131. $this->storage->verifyPath(dirname($file), basename($file));
  132. } catch (\Exception $e) {
  133. return null;
  134. }
  135. // only proceed if $file is not a partial file nor a blacklisted file
  136. if (!self::isPartialFile($file) and !Filesystem::isFileBlacklisted($file)) {
  137. //acquire a lock
  138. if ($lock) {
  139. if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) {
  140. $this->storage->acquireLock($file, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
  141. }
  142. }
  143. $data = $this->getData($file);
  144. if ($data) {
  145. // pre-emit only if it was a file. By that we avoid counting/treating folders as files
  146. if ($data['mimetype'] !== 'httpd/unix-directory') {
  147. $this->emit('\OC\Files\Cache\Scanner', 'scanFile', array($file, $this->storageId));
  148. \OC_Hook::emit('\OC\Files\Cache\Scanner', 'scan_file', array('path' => $file, 'storage' => $this->storageId));
  149. }
  150. $parent = dirname($file);
  151. if ($parent === '.' or $parent === '/') {
  152. $parent = '';
  153. }
  154. if ($parentId === -1) {
  155. $parentId = $this->cache->getId($parent);
  156. }
  157. // scan the parent if it's not in the cache (id -1) and the current file is not the root folder
  158. if ($file and $parentId === -1) {
  159. $parentData = $this->scanFile($parent);
  160. if (!$parentData) {
  161. return null;
  162. }
  163. $parentId = $parentData['fileid'];
  164. }
  165. if ($parent) {
  166. $data['parent'] = $parentId;
  167. }
  168. if (is_null($cacheData)) {
  169. /** @var CacheEntry $cacheData */
  170. $cacheData = $this->cache->get($file);
  171. }
  172. if ($cacheData and $reuseExisting and isset($cacheData['fileid'])) {
  173. // prevent empty etag
  174. if (empty($cacheData['etag'])) {
  175. $etag = $data['etag'];
  176. } else {
  177. $etag = $cacheData['etag'];
  178. }
  179. $fileId = $cacheData['fileid'];
  180. $data['fileid'] = $fileId;
  181. // only reuse data if the file hasn't explicitly changed
  182. if (isset($data['storage_mtime']) && isset($cacheData['storage_mtime']) && $data['storage_mtime'] === $cacheData['storage_mtime']) {
  183. $data['mtime'] = $cacheData['mtime'];
  184. if (($reuseExisting & self::REUSE_SIZE) && ($data['size'] === -1)) {
  185. $data['size'] = $cacheData['size'];
  186. }
  187. if ($reuseExisting & self::REUSE_ETAG) {
  188. $data['etag'] = $etag;
  189. }
  190. }
  191. // Only update metadata that has changed
  192. $newData = array_diff_assoc($data, $cacheData->getData());
  193. } else {
  194. $newData = $data;
  195. $fileId = -1;
  196. }
  197. if (!empty($newData)) {
  198. // Reset the checksum if the data has changed
  199. $newData['checksum'] = '';
  200. $data['fileid'] = $this->addToCache($file, $newData, $fileId);
  201. }
  202. if (isset($cacheData['size'])) {
  203. $data['oldSize'] = $cacheData['size'];
  204. } else {
  205. $data['oldSize'] = 0;
  206. }
  207. if (isset($cacheData['encrypted'])) {
  208. $data['encrypted'] = $cacheData['encrypted'];
  209. }
  210. // post-emit only if it was a file. By that we avoid counting/treating folders as files
  211. if ($data['mimetype'] !== 'httpd/unix-directory') {
  212. $this->emit('\OC\Files\Cache\Scanner', 'postScanFile', array($file, $this->storageId));
  213. \OC_Hook::emit('\OC\Files\Cache\Scanner', 'post_scan_file', array('path' => $file, 'storage' => $this->storageId));
  214. }
  215. } else {
  216. $this->removeFromCache($file);
  217. }
  218. //release the acquired lock
  219. if ($lock) {
  220. if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) {
  221. $this->storage->releaseLock($file, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
  222. }
  223. }
  224. if ($data && !isset($data['encrypted'])) {
  225. $data['encrypted'] = false;
  226. }
  227. return $data;
  228. }
  229. return null;
  230. }
  231. protected function removeFromCache($path) {
  232. \OC_Hook::emit('Scanner', 'removeFromCache', array('file' => $path));
  233. $this->emit('\OC\Files\Cache\Scanner', 'removeFromCache', array($path));
  234. if ($this->cacheActive) {
  235. $this->cache->remove($path);
  236. }
  237. }
  238. /**
  239. * @param string $path
  240. * @param array $data
  241. * @param int $fileId
  242. * @return int the id of the added file
  243. */
  244. protected function addToCache($path, $data, $fileId = -1) {
  245. \OC_Hook::emit('Scanner', 'addToCache', array('file' => $path, 'data' => $data));
  246. $this->emit('\OC\Files\Cache\Scanner', 'addToCache', array($path, $this->storageId, $data));
  247. if ($this->cacheActive) {
  248. if ($fileId !== -1) {
  249. $this->cache->update($fileId, $data);
  250. return $fileId;
  251. } else {
  252. return $this->cache->put($path, $data);
  253. }
  254. } else {
  255. return -1;
  256. }
  257. }
  258. /**
  259. * @param string $path
  260. * @param array $data
  261. * @param int $fileId
  262. */
  263. protected function updateCache($path, $data, $fileId = -1) {
  264. \OC_Hook::emit('Scanner', 'addToCache', array('file' => $path, 'data' => $data));
  265. $this->emit('\OC\Files\Cache\Scanner', 'updateCache', array($path, $this->storageId, $data));
  266. if ($this->cacheActive) {
  267. if ($fileId !== -1) {
  268. $this->cache->update($fileId, $data);
  269. } else {
  270. $this->cache->put($path, $data);
  271. }
  272. }
  273. }
  274. /**
  275. * scan a folder and all it's children
  276. *
  277. * @param string $path
  278. * @param bool $recursive
  279. * @param int $reuse
  280. * @param bool $lock set to false to disable getting an additional read lock during scanning
  281. * @return array an array of the meta data of the scanned file or folder
  282. */
  283. public function scan($path, $recursive = self::SCAN_RECURSIVE, $reuse = -1, $lock = true) {
  284. if ($reuse === -1) {
  285. $reuse = ($recursive === self::SCAN_SHALLOW) ? self::REUSE_ETAG | self::REUSE_SIZE : self::REUSE_ETAG;
  286. }
  287. if ($lock) {
  288. if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) {
  289. $this->storage->acquireLock('scanner::' . $path, ILockingProvider::LOCK_EXCLUSIVE, $this->lockingProvider);
  290. $this->storage->acquireLock($path, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
  291. }
  292. }
  293. $data = $this->scanFile($path, $reuse, -1, null, $lock);
  294. if ($data and $data['mimetype'] === 'httpd/unix-directory') {
  295. $size = $this->scanChildren($path, $recursive, $reuse, $data, $lock);
  296. $data['size'] = $size;
  297. }
  298. if ($lock) {
  299. if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) {
  300. $this->storage->releaseLock($path, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
  301. $this->storage->releaseLock('scanner::' . $path, ILockingProvider::LOCK_EXCLUSIVE, $this->lockingProvider);
  302. }
  303. }
  304. return $data;
  305. }
  306. /**
  307. * Get the children currently in the cache
  308. *
  309. * @param int $folderId
  310. * @return array[]
  311. */
  312. protected function getExistingChildren($folderId) {
  313. $existingChildren = array();
  314. $children = $this->cache->getFolderContentsById($folderId);
  315. foreach ($children as $child) {
  316. $existingChildren[$child['name']] = $child;
  317. }
  318. return $existingChildren;
  319. }
  320. /**
  321. * Get the children from the storage
  322. *
  323. * @param string $folder
  324. * @return string[]
  325. */
  326. protected function getNewChildren($folder) {
  327. $children = array();
  328. if ($dh = $this->storage->opendir($folder)) {
  329. if (is_resource($dh)) {
  330. while (($file = readdir($dh)) !== false) {
  331. if (!Filesystem::isIgnoredDir($file)) {
  332. $children[] = $file;
  333. }
  334. }
  335. }
  336. }
  337. return $children;
  338. }
  339. /**
  340. * scan all the files and folders in a folder
  341. *
  342. * @param string $path
  343. * @param bool $recursive
  344. * @param int $reuse
  345. * @param array $folderData existing cache data for the folder to be scanned
  346. * @param bool $lock set to false to disable getting an additional read lock during scanning
  347. * @return int the size of the scanned folder or -1 if the size is unknown at this stage
  348. */
  349. protected function scanChildren($path, $recursive = self::SCAN_RECURSIVE, $reuse = -1, $folderData = null, $lock = true) {
  350. if ($reuse === -1) {
  351. $reuse = ($recursive === self::SCAN_SHALLOW) ? self::REUSE_ETAG | self::REUSE_SIZE : self::REUSE_ETAG;
  352. }
  353. $this->emit('\OC\Files\Cache\Scanner', 'scanFolder', array($path, $this->storageId));
  354. $size = 0;
  355. $childQueue = array();
  356. if (is_array($folderData) and isset($folderData['fileid'])) {
  357. $folderId = $folderData['fileid'];
  358. } else {
  359. $folderId = $this->cache->getId($path);
  360. }
  361. $existingChildren = $this->getExistingChildren($folderId);
  362. $newChildren = $this->getNewChildren($path);
  363. if ($this->useTransactions) {
  364. \OC::$server->getDatabaseConnection()->beginTransaction();
  365. }
  366. $exceptionOccurred = false;
  367. foreach ($newChildren as $file) {
  368. $child = ($path) ? $path . '/' . $file : $file;
  369. try {
  370. $existingData = isset($existingChildren[$file]) ? $existingChildren[$file] : null;
  371. $data = $this->scanFile($child, $reuse, $folderId, $existingData, $lock);
  372. if ($data) {
  373. if ($data['mimetype'] === 'httpd/unix-directory' and $recursive === self::SCAN_RECURSIVE) {
  374. $childQueue[$child] = $data;
  375. } else if ($data['size'] === -1) {
  376. $size = -1;
  377. } else if ($size !== -1) {
  378. $size += $data['size'];
  379. }
  380. }
  381. } catch (\Doctrine\DBAL\DBALException $ex) {
  382. // might happen if inserting duplicate while a scanning
  383. // process is running in parallel
  384. // log and ignore
  385. \OCP\Util::writeLog('core', 'Exception while scanning file "' . $child . '": ' . $ex->getMessage(), \OCP\Util::DEBUG);
  386. $exceptionOccurred = true;
  387. } catch (\OCP\Lock\LockedException $e) {
  388. if ($this->useTransactions) {
  389. \OC::$server->getDatabaseConnection()->rollback();
  390. }
  391. throw $e;
  392. }
  393. }
  394. $removedChildren = \array_diff(array_keys($existingChildren), $newChildren);
  395. foreach ($removedChildren as $childName) {
  396. $child = ($path) ? $path . '/' . $childName : $childName;
  397. $this->removeFromCache($child);
  398. }
  399. if ($this->useTransactions) {
  400. \OC::$server->getDatabaseConnection()->commit();
  401. }
  402. if ($exceptionOccurred) {
  403. // It might happen that the parallel scan process has already
  404. // inserted mimetypes but those weren't available yet inside the transaction
  405. // To make sure to have the updated mime types in such cases,
  406. // we reload them here
  407. \OC::$server->getMimeTypeLoader()->reset();
  408. }
  409. foreach ($childQueue as $child => $childData) {
  410. $childSize = $this->scanChildren($child, self::SCAN_RECURSIVE, $reuse, $childData, $lock);
  411. if ($childSize === -1) {
  412. $size = -1;
  413. } else if ($size !== -1) {
  414. $size += $childSize;
  415. }
  416. }
  417. if (!is_array($folderData) or !isset($folderData['size']) or $folderData['size'] !== $size) {
  418. $this->updateCache($path, array('size' => $size), $folderId);
  419. }
  420. $this->emit('\OC\Files\Cache\Scanner', 'postScanFolder', array($path, $this->storageId));
  421. return $size;
  422. }
  423. /**
  424. * check if the file should be ignored when scanning
  425. * NOTE: files with a '.part' extension are ignored as well!
  426. * prevents unfinished put requests to be scanned
  427. *
  428. * @param string $file
  429. * @return boolean
  430. */
  431. public static function isPartialFile($file) {
  432. if (pathinfo($file, PATHINFO_EXTENSION) === 'part') {
  433. return true;
  434. }
  435. if (strpos($file, '.part/') !== false) {
  436. return true;
  437. }
  438. return false;
  439. }
  440. /**
  441. * walk over any folders that are not fully scanned yet and scan them
  442. */
  443. public function backgroundScan() {
  444. if (!$this->cache->inCache('')) {
  445. $this->runBackgroundScanJob(function () {
  446. $this->scan('', self::SCAN_RECURSIVE, self::REUSE_ETAG);
  447. }, '');
  448. } else {
  449. $lastPath = null;
  450. while (($path = $this->cache->getIncomplete()) !== false && $path !== $lastPath) {
  451. $this->runBackgroundScanJob(function() use ($path) {
  452. $this->scan($path, self::SCAN_RECURSIVE, self::REUSE_ETAG);
  453. }, $path);
  454. // FIXME: this won't proceed with the next item, needs revamping of getIncomplete()
  455. // to make this possible
  456. $lastPath = $path;
  457. }
  458. }
  459. }
  460. private function runBackgroundScanJob(callable $callback, $path) {
  461. try {
  462. $callback();
  463. \OC_Hook::emit('Scanner', 'correctFolderSize', array('path' => $path));
  464. if ($this->cacheActive && $this->cache instanceof Cache) {
  465. $this->cache->correctFolderSize($path);
  466. }
  467. } catch (\OCP\Files\StorageInvalidException $e) {
  468. // skip unavailable storages
  469. } catch (\OCP\Files\StorageNotAvailableException $e) {
  470. // skip unavailable storages
  471. } catch (\OCP\Files\ForbiddenException $e) {
  472. // skip forbidden storages
  473. } catch (\OCP\Lock\LockedException $e) {
  474. // skip unavailable storages
  475. }
  476. }
  477. /**
  478. * Set whether the cache is affected by scan operations
  479. *
  480. * @param boolean $active The active state of the cache
  481. */
  482. public function setCacheActive($active) {
  483. $this->cacheActive = $active;
  484. }
  485. }