123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630 |
- <?php
- /**
- * SPDX-FileCopyrightText: 2016-2024 Nextcloud GmbH and Nextcloud contributors
- * SPDX-FileCopyrightText: 2016 ownCloud, Inc.
- * SPDX-License-Identifier: AGPL-3.0-only
- */
- namespace OC\Files\Cache;
- use Doctrine\DBAL\Exception;
- use OC\Files\Storage\Wrapper\Encryption;
- use OC\Files\Storage\Wrapper\Jail;
- use OC\Hooks\BasicEmitter;
- use OCP\Files\Cache\IScanner;
- use OCP\Files\ForbiddenException;
- use OCP\Files\NotFoundException;
- use OCP\Files\Storage\IReliableEtagStorage;
- use OCP\IDBConnection;
- use OCP\Lock\ILockingProvider;
- use Psr\Log\LoggerInterface;
- /**
- * Class Scanner
- *
- * Hooks available in scope \OC\Files\Cache\Scanner:
- * - scanFile(string $path, string $storageId)
- * - scanFolder(string $path, string $storageId)
- * - postScanFile(string $path, string $storageId)
- * - postScanFolder(string $path, string $storageId)
- *
- * @package OC\Files\Cache
- */
- class Scanner extends BasicEmitter implements IScanner {
- /**
- * @var \OC\Files\Storage\Storage $storage
- */
- protected $storage;
- /**
- * @var string $storageId
- */
- protected $storageId;
- /**
- * @var \OC\Files\Cache\Cache $cache
- */
- protected $cache;
- /**
- * @var boolean $cacheActive If true, perform cache operations, if false, do not affect cache
- */
- protected $cacheActive;
- /**
- * @var bool $useTransactions whether to use transactions
- */
- protected $useTransactions = true;
- /**
- * @var \OCP\Lock\ILockingProvider
- */
- protected $lockingProvider;
- protected IDBConnection $connection;
- public function __construct(\OC\Files\Storage\Storage $storage) {
- $this->storage = $storage;
- $this->storageId = $this->storage->getId();
- $this->cache = $storage->getCache();
- $this->cacheActive = !\OC::$server->getConfig()->getSystemValueBool('filesystem_cache_readonly', false);
- $this->lockingProvider = \OC::$server->get(ILockingProvider::class);
- $this->connection = \OC::$server->get(IDBConnection::class);
- }
- /**
- * Whether to wrap the scanning of a folder in a database transaction
- * On default transactions are used
- *
- * @param bool $useTransactions
- */
- public function setUseTransactions($useTransactions) {
- $this->useTransactions = $useTransactions;
- }
- /**
- * get all the metadata of a file or folder
- * *
- *
- * @param string $path
- * @return array|null an array of metadata of the file
- */
- protected function getData($path) {
- $data = $this->storage->getMetaData($path);
- if (is_null($data)) {
- \OC::$server->get(LoggerInterface::class)->debug("!!! Path '$path' is not accessible or present !!!", ['app' => 'core']);
- }
- return $data;
- }
- /**
- * scan a single file and store it in the cache
- *
- * @param string $file
- * @param int $reuseExisting
- * @param int $parentId
- * @param array|null|false $cacheData existing data in the cache for the file to be scanned
- * @param bool $lock set to false to disable getting an additional read lock during scanning
- * @param null $data the metadata for the file, as returned by the storage
- * @return array|null an array of metadata of the scanned file
- * @throws \OCP\Lock\LockedException
- */
- public function scanFile($file, $reuseExisting = 0, $parentId = -1, $cacheData = null, $lock = true, $data = null) {
- if ($file !== '') {
- try {
- $this->storage->verifyPath(dirname($file), basename($file));
- } catch (\Exception $e) {
- return null;
- }
- }
- // only proceed if $file is not a partial file, blacklist is handled by the storage
- if (!self::isPartialFile($file)) {
- // acquire a lock
- if ($lock) {
- if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) {
- $this->storage->acquireLock($file, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
- }
- }
- try {
- $data = $data ?? $this->getData($file);
- } catch (ForbiddenException $e) {
- if ($lock) {
- if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) {
- $this->storage->releaseLock($file, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
- }
- }
- return null;
- }
- try {
- if ($data) {
- // pre-emit only if it was a file. By that we avoid counting/treating folders as files
- if ($data['mimetype'] !== 'httpd/unix-directory') {
- $this->emit('\OC\Files\Cache\Scanner', 'scanFile', [$file, $this->storageId]);
- \OC_Hook::emit('\OC\Files\Cache\Scanner', 'scan_file', ['path' => $file, 'storage' => $this->storageId]);
- }
- $parent = dirname($file);
- if ($parent === '.' || $parent === '/') {
- $parent = '';
- }
- if ($parentId === -1) {
- $parentId = $this->cache->getParentId($file);
- }
- // scan the parent if it's not in the cache (id -1) and the current file is not the root folder
- if ($file && $parentId === -1) {
- $parentData = $this->scanFile($parent);
- if (!$parentData) {
- return null;
- }
- $parentId = $parentData['fileid'];
- }
- if ($parent) {
- $data['parent'] = $parentId;
- }
- if (is_null($cacheData)) {
- /** @var CacheEntry $cacheData */
- $cacheData = $this->cache->get($file);
- }
- if ($cacheData && $reuseExisting && isset($cacheData['fileid'])) {
- // prevent empty etag
- $etag = empty($cacheData['etag']) ? $data['etag'] : $cacheData['etag'];
- $fileId = $cacheData['fileid'];
- $data['fileid'] = $fileId;
- // only reuse data if the file hasn't explicitly changed
- $mtimeUnchanged = isset($data['storage_mtime']) && isset($cacheData['storage_mtime']) && $data['storage_mtime'] === $cacheData['storage_mtime'];
- // if the folder is marked as unscanned, never reuse etags
- if ($mtimeUnchanged && $cacheData['size'] !== -1) {
- $data['mtime'] = $cacheData['mtime'];
- if (($reuseExisting & self::REUSE_SIZE) && ($data['size'] === -1)) {
- $data['size'] = $cacheData['size'];
- }
- if ($reuseExisting & self::REUSE_ETAG && !$this->storage->instanceOfStorage(IReliableEtagStorage::class)) {
- $data['etag'] = $etag;
- }
- }
- // we only updated unencrypted_size if it's already set
- if ($cacheData['unencrypted_size'] === 0) {
- unset($data['unencrypted_size']);
- }
- // Only update metadata that has changed
- // i.e. get all the values in $data that are not present in the cache already
- $newData = $this->array_diff_assoc_multi($data, $cacheData->getData());
-
- // make it known to the caller that etag has been changed and needs propagation
- if (isset($newData['etag'])) {
- $data['etag_changed'] = true;
- }
- } else {
- // we only updated unencrypted_size if it's already set
- unset($data['unencrypted_size']);
- $newData = $data;
- $fileId = -1;
- }
- if (!empty($newData)) {
- // Reset the checksum if the data has changed
- $newData['checksum'] = '';
- $newData['parent'] = $parentId;
- $data['fileid'] = $this->addToCache($file, $newData, $fileId);
- }
- $data['oldSize'] = ($cacheData && isset($cacheData['size'])) ? $cacheData['size'] : 0;
- if ($cacheData && isset($cacheData['encrypted'])) {
- $data['encrypted'] = $cacheData['encrypted'];
- }
- // post-emit only if it was a file. By that we avoid counting/treating folders as files
- if ($data['mimetype'] !== 'httpd/unix-directory') {
- $this->emit('\OC\Files\Cache\Scanner', 'postScanFile', [$file, $this->storageId]);
- \OC_Hook::emit('\OC\Files\Cache\Scanner', 'post_scan_file', ['path' => $file, 'storage' => $this->storageId]);
- }
- } else {
- $this->removeFromCache($file);
- }
- } catch (\Exception $e) {
- if ($lock) {
- if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) {
- $this->storage->releaseLock($file, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
- }
- }
- throw $e;
- }
- // release the acquired lock
- if ($lock) {
- if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) {
- $this->storage->releaseLock($file, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
- }
- }
- if ($data && !isset($data['encrypted'])) {
- $data['encrypted'] = false;
- }
- return $data;
- }
- return null;
- }
- protected function removeFromCache($path) {
- \OC_Hook::emit('Scanner', 'removeFromCache', ['file' => $path]);
- $this->emit('\OC\Files\Cache\Scanner', 'removeFromCache', [$path]);
- if ($this->cacheActive) {
- $this->cache->remove($path);
- }
- }
- /**
- * @param string $path
- * @param array $data
- * @param int $fileId
- * @return int the id of the added file
- */
- protected function addToCache($path, $data, $fileId = -1) {
- if (isset($data['scan_permissions'])) {
- $data['permissions'] = $data['scan_permissions'];
- }
- \OC_Hook::emit('Scanner', 'addToCache', ['file' => $path, 'data' => $data]);
- $this->emit('\OC\Files\Cache\Scanner', 'addToCache', [$path, $this->storageId, $data, $fileId]);
- if ($this->cacheActive) {
- if ($fileId !== -1) {
- $this->cache->update($fileId, $data);
- return $fileId;
- } else {
- return $this->cache->insert($path, $data);
- }
- } else {
- return -1;
- }
- }
- /**
- * @param string $path
- * @param array $data
- * @param int $fileId
- */
- protected function updateCache($path, $data, $fileId = -1) {
- \OC_Hook::emit('Scanner', 'addToCache', ['file' => $path, 'data' => $data]);
- $this->emit('\OC\Files\Cache\Scanner', 'updateCache', [$path, $this->storageId, $data]);
- if ($this->cacheActive) {
- if ($fileId !== -1) {
- $this->cache->update($fileId, $data);
- } else {
- $this->cache->put($path, $data);
- }
- }
- }
- /**
- * scan a folder and all it's children
- *
- * @param string $path
- * @param bool $recursive
- * @param int $reuse
- * @param bool $lock set to false to disable getting an additional read lock during scanning
- * @return array|null an array of the meta data of the scanned file or folder
- */
- public function scan($path, $recursive = self::SCAN_RECURSIVE, $reuse = -1, $lock = true) {
- if ($reuse === -1) {
- $reuse = ($recursive === self::SCAN_SHALLOW) ? self::REUSE_ETAG | self::REUSE_SIZE : self::REUSE_ETAG;
- }
- if ($lock) {
- if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) {
- $this->storage->acquireLock('scanner::' . $path, ILockingProvider::LOCK_EXCLUSIVE, $this->lockingProvider);
- $this->storage->acquireLock($path, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
- }
- }
- try {
- try {
- $data = $this->scanFile($path, $reuse, -1, null, $lock);
- if ($data && $data['mimetype'] === 'httpd/unix-directory') {
- $size = $this->scanChildren($path, $recursive, $reuse, $data['fileid'], $lock, $data['size']);
- $data['size'] = $size;
- }
- } catch (NotFoundException $e) {
- $this->removeFromCache($path);
- return null;
- }
- } finally {
- if ($lock) {
- if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) {
- $this->storage->releaseLock($path, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
- $this->storage->releaseLock('scanner::' . $path, ILockingProvider::LOCK_EXCLUSIVE, $this->lockingProvider);
- }
- }
- }
- return $data;
- }
- /**
- * Compares $array1 against $array2 and returns all the values in $array1 that are not in $array2
- * Note this is a one-way check - i.e. we don't care about things that are in $array2 that aren't in $array1
- *
- * Supports multi-dimensional arrays
- * Also checks keys/indexes
- * Comparisons are strict just like array_diff_assoc
- * Order of keys/values does not matter
- *
- * @param array $array1
- * @param array $array2
- * @return array with the differences between $array1 and $array1
- * @throws \InvalidArgumentException if $array1 isn't an actual array
- *
- */
- protected function array_diff_assoc_multi(array $array1, array $array2) {
-
- $result = [];
- foreach ($array1 as $key => $value) {
-
- // if $array2 doesn't have the same key, that's a result
- if (!array_key_exists($key, $array2)) {
- $result[$key] = $value;
- continue;
- }
-
- // if $array2's value for the same key is different, that's a result
- if ($array2[$key] !== $value && !is_array($value)) {
- $result[$key] = $value;
- continue;
- }
-
- if (is_array($value)) {
- $nestedDiff = $this->array_diff_assoc_multi($value, $array2[$key]);
- if (!empty($nestedDiff)) {
- $result[$key] = $nestedDiff;
- continue;
- }
- }
- }
- return $result;
- }
- /**
- * Get the children currently in the cache
- *
- * @param int $folderId
- * @return array[]
- */
- protected function getExistingChildren($folderId) {
- $existingChildren = [];
- $children = $this->cache->getFolderContentsById($folderId);
- foreach ($children as $child) {
- $existingChildren[$child['name']] = $child;
- }
- return $existingChildren;
- }
- /**
- * scan all the files and folders in a folder
- *
- * @param string $path
- * @param bool|IScanner::SCAN_RECURSIVE_INCOMPLETE $recursive
- * @param int $reuse a combination of self::REUSE_*
- * @param int $folderId id for the folder to be scanned
- * @param bool $lock set to false to disable getting an additional read lock during scanning
- * @param int|float $oldSize the size of the folder before (re)scanning the children
- * @return int|float the size of the scanned folder or -1 if the size is unknown at this stage
- */
- protected function scanChildren(string $path, $recursive, int $reuse, int $folderId, bool $lock, int|float $oldSize, &$etagChanged = false) {
- if ($reuse === -1) {
- $reuse = ($recursive === self::SCAN_SHALLOW) ? self::REUSE_ETAG | self::REUSE_SIZE : self::REUSE_ETAG;
- }
- $this->emit('\OC\Files\Cache\Scanner', 'scanFolder', [$path, $this->storageId]);
- $size = 0;
- $childQueue = $this->handleChildren($path, $recursive, $reuse, $folderId, $lock, $size, $etagChanged);
- foreach ($childQueue as $child => [$childId, $childSize]) {
- // "etag changed" propagates up, but not down, so we pass `false` to the children even if we already know that the etag of the current folder changed
- $childEtagChanged = false;
- $childSize = $this->scanChildren($child, $recursive, $reuse, $childId, $lock, $childSize, $childEtagChanged);
- $etagChanged |= $childEtagChanged;
- if ($childSize === -1) {
- $size = -1;
- } elseif ($size !== -1) {
- $size += $childSize;
- }
- }
- // for encrypted storages, we trigger a regular folder size calculation instead of using the calculated size
- // to make sure we also updated the unencrypted-size where applicable
- if ($this->storage->instanceOfStorage(Encryption::class)) {
- $this->cache->calculateFolderSize($path);
- } else {
- if ($this->cacheActive) {
- $updatedData = [];
- if ($oldSize !== $size) {
- $updatedData['size'] = $size;
- }
- if ($etagChanged) {
- $updatedData['etag'] = uniqid();
- }
- if ($updatedData) {
- $this->cache->update($folderId, $updatedData);
- }
- }
- }
- $this->emit('\OC\Files\Cache\Scanner', 'postScanFolder', [$path, $this->storageId]);
- return $size;
- }
- /**
- * @param bool|IScanner::SCAN_RECURSIVE_INCOMPLETE $recursive
- */
- private function handleChildren(string $path, $recursive, int $reuse, int $folderId, bool $lock, int|float &$size, bool &$etagChanged): array {
- // we put this in it's own function so it cleans up the memory before we start recursing
- $existingChildren = $this->getExistingChildren($folderId);
- $newChildren = iterator_to_array($this->storage->getDirectoryContent($path));
- if (count($existingChildren) === 0 && count($newChildren) === 0) {
- // no need to do a transaction
- return [];
- }
- if ($this->useTransactions) {
- $this->connection->beginTransaction();
- }
- $exceptionOccurred = false;
- $childQueue = [];
- $newChildNames = [];
- foreach ($newChildren as $fileMeta) {
- $permissions = $fileMeta['scan_permissions'] ?? $fileMeta['permissions'];
- if ($permissions === 0) {
- continue;
- }
- $originalFile = $fileMeta['name'];
- $file = trim(\OC\Files\Filesystem::normalizePath($originalFile), '/');
- if (trim($originalFile, '/') !== $file) {
- // encoding mismatch, might require compatibility wrapper
- \OC::$server->get(LoggerInterface::class)->debug('Scanner: Skipping non-normalized file name "'. $originalFile . '" in path "' . $path . '".', ['app' => 'core']);
- $this->emit('\OC\Files\Cache\Scanner', 'normalizedNameMismatch', [$path ? $path . '/' . $originalFile : $originalFile]);
- // skip this entry
- continue;
- }
- $newChildNames[] = $file;
- $child = $path ? $path . '/' . $file : $file;
- try {
- $existingData = $existingChildren[$file] ?? false;
- $data = $this->scanFile($child, $reuse, $folderId, $existingData, $lock, $fileMeta);
- if ($data) {
- if ($data['mimetype'] === 'httpd/unix-directory' && $recursive === self::SCAN_RECURSIVE) {
- $childQueue[$child] = [$data['fileid'], $data['size']];
- } elseif ($data['mimetype'] === 'httpd/unix-directory' && $recursive === self::SCAN_RECURSIVE_INCOMPLETE && $data['size'] === -1) {
- // only recurse into folders which aren't fully scanned
- $childQueue[$child] = [$data['fileid'], $data['size']];
- } elseif ($data['size'] === -1) {
- $size = -1;
- } elseif ($size !== -1) {
- $size += $data['size'];
- }
- if (isset($data['etag_changed']) && $data['etag_changed']) {
- $etagChanged = true;
- }
- }
- } catch (Exception $ex) {
- // might happen if inserting duplicate while a scanning
- // process is running in parallel
- // log and ignore
- if ($this->useTransactions) {
- $this->connection->rollback();
- $this->connection->beginTransaction();
- }
- \OC::$server->get(LoggerInterface::class)->debug('Exception while scanning file "' . $child . '"', [
- 'app' => 'core',
- 'exception' => $ex,
- ]);
- $exceptionOccurred = true;
- } catch (\OCP\Lock\LockedException $e) {
- if ($this->useTransactions) {
- $this->connection->rollback();
- }
- throw $e;
- }
- }
- $removedChildren = \array_diff(array_keys($existingChildren), $newChildNames);
- foreach ($removedChildren as $childName) {
- $child = $path ? $path . '/' . $childName : $childName;
- $this->removeFromCache($child);
- }
- if ($this->useTransactions) {
- $this->connection->commit();
- }
- if ($exceptionOccurred) {
- // It might happen that the parallel scan process has already
- // inserted mimetypes but those weren't available yet inside the transaction
- // To make sure to have the updated mime types in such cases,
- // we reload them here
- \OC::$server->getMimeTypeLoader()->reset();
- }
- return $childQueue;
- }
- /**
- * check if the file should be ignored when scanning
- * NOTE: files with a '.part' extension are ignored as well!
- * prevents unfinished put requests to be scanned
- *
- * @param string $file
- * @return boolean
- */
- public static function isPartialFile($file) {
- if (pathinfo($file, PATHINFO_EXTENSION) === 'part') {
- return true;
- }
- if (str_contains($file, '.part/')) {
- return true;
- }
- return false;
- }
- /**
- * walk over any folders that are not fully scanned yet and scan them
- */
- public function backgroundScan() {
- if ($this->storage->instanceOfStorage(Jail::class)) {
- // for jail storage wrappers (shares, groupfolders) we run the background scan on the source storage
- // this is mainly done because the jail wrapper doesn't implement `getIncomplete` (because it would be inefficient).
- //
- // Running the scan on the source storage might scan more than "needed", but the unscanned files outside the jail will
- // have to be scanned at some point anyway.
- $unJailedScanner = $this->storage->getUnjailedStorage()->getScanner();
- $unJailedScanner->backgroundScan();
- } else {
- if (!$this->cache->inCache('')) {
- // if the storage isn't in the cache yet, just scan the root completely
- $this->runBackgroundScanJob(function () {
- $this->scan('', self::SCAN_RECURSIVE, self::REUSE_ETAG);
- }, '');
- } else {
- $lastPath = null;
- // find any path marked as unscanned and run the scanner until no more paths are unscanned (or we get stuck)
- while (($path = $this->cache->getIncomplete()) !== false && $path !== $lastPath) {
- $this->runBackgroundScanJob(function () use ($path) {
- $this->scan($path, self::SCAN_RECURSIVE_INCOMPLETE, self::REUSE_ETAG | self::REUSE_SIZE);
- }, $path);
- // FIXME: this won't proceed with the next item, needs revamping of getIncomplete()
- // to make this possible
- $lastPath = $path;
- }
- }
- }
- }
- protected function runBackgroundScanJob(callable $callback, $path) {
- try {
- $callback();
- \OC_Hook::emit('Scanner', 'correctFolderSize', ['path' => $path]);
- if ($this->cacheActive && $this->cache instanceof Cache) {
- $this->cache->correctFolderSize($path, null, true);
- }
- } catch (\OCP\Files\StorageInvalidException $e) {
- // skip unavailable storages
- } catch (\OCP\Files\StorageNotAvailableException $e) {
- // skip unavailable storages
- } catch (\OCP\Files\ForbiddenException $e) {
- // skip forbidden storages
- } catch (\OCP\Lock\LockedException $e) {
- // skip unavailable storages
- }
- }
- /**
- * Set whether the cache is affected by scan operations
- *
- * @param boolean $active The active state of the cache
- */
- public function setCacheActive($active) {
- $this->cacheActive = $active;
- }
- }
|