You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

520 lines
16 KiB

10 years ago
10 years ago
10 years ago
10 years ago
12 years ago
11 years ago
11 years ago
12 years ago
11 years ago
11 years ago
11 years ago
  1. <?php
  2. /**
  3. * @copyright Copyright (c) 2016, ownCloud, Inc.
  4. *
  5. * @author Arthur Schiwon <blizzz@arthur-schiwon.de>
  6. * @author Björn Schießle <bjoern@schiessle.org>
  7. * @author Daniel Jagszent <daniel@jagszent.de>
  8. * @author Jörn Friedrich Dreyer <jfd@butonic.de>
  9. * @author Lukas Reschke <lukas@statuscode.ch>
  10. * @author Martin Mattel <martin.mattel@diemattels.at>
  11. * @author Michael Gapczynski <GapczynskiM@gmail.com>
  12. * @author Morris Jobke <hey@morrisjobke.de>
  13. * @author Owen Winkler <a_github@midnightcircus.com>
  14. * @author Robin Appelman <robin@icewind.nl>
  15. * @author Robin McCorkell <robin@mccorkell.me.uk>
  16. * @author Roeland Jago Douma <roeland@famdouma.nl>
  17. * @author Thomas Müller <thomas.mueller@tmit.eu>
  18. * @author Vincent Petry <pvince81@owncloud.com>
  19. *
  20. * @license AGPL-3.0
  21. *
  22. * This code is free software: you can redistribute it and/or modify
  23. * it under the terms of the GNU Affero General Public License, version 3,
  24. * as published by the Free Software Foundation.
  25. *
  26. * This program is distributed in the hope that it will be useful,
  27. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  28. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  29. * GNU Affero General Public License for more details.
  30. *
  31. * You should have received a copy of the GNU Affero General Public License, version 3,
  32. * along with this program. If not, see <http://www.gnu.org/licenses/>
  33. *
  34. */
  35. namespace OC\Files\Cache;
  36. use OC\Files\Filesystem;
  37. use OC\Hooks\BasicEmitter;
  38. use OCP\Config;
  39. use OCP\Files\Cache\IScanner;
  40. use OCP\Files\ForbiddenException;
  41. use OCP\Files\Storage\ILockingStorage;
  42. use OCP\Lock\ILockingProvider;
  43. /**
  44. * Class Scanner
  45. *
  46. * Hooks available in scope \OC\Files\Cache\Scanner:
  47. * - scanFile(string $path, string $storageId)
  48. * - scanFolder(string $path, string $storageId)
  49. * - postScanFile(string $path, string $storageId)
  50. * - postScanFolder(string $path, string $storageId)
  51. *
  52. * @package OC\Files\Cache
  53. */
  54. class Scanner extends BasicEmitter implements IScanner {
  55. /**
  56. * @var \OC\Files\Storage\Storage $storage
  57. */
  58. protected $storage;
  59. /**
  60. * @var string $storageId
  61. */
  62. protected $storageId;
  63. /**
  64. * @var \OC\Files\Cache\Cache $cache
  65. */
  66. protected $cache;
  67. /**
  68. * @var boolean $cacheActive If true, perform cache operations, if false, do not affect cache
  69. */
  70. protected $cacheActive;
  71. /**
  72. * @var bool $useTransactions whether to use transactions
  73. */
  74. protected $useTransactions = true;
  75. /**
  76. * @var \OCP\Lock\ILockingProvider
  77. */
  78. protected $lockingProvider;
  79. public function __construct(\OC\Files\Storage\Storage $storage) {
  80. $this->storage = $storage;
  81. $this->storageId = $this->storage->getId();
  82. $this->cache = $storage->getCache();
  83. $this->cacheActive = !Config::getSystemValue('filesystem_cache_readonly', false);
  84. $this->lockingProvider = \OC::$server->getLockingProvider();
  85. }
  86. /**
  87. * Whether to wrap the scanning of a folder in a database transaction
  88. * On default transactions are used
  89. *
  90. * @param bool $useTransactions
  91. */
  92. public function setUseTransactions($useTransactions) {
  93. $this->useTransactions = $useTransactions;
  94. }
  95. /**
  96. * get all the metadata of a file or folder
  97. * *
  98. *
  99. * @param string $path
  100. * @return array an array of metadata of the file
  101. */
  102. protected function getData($path) {
  103. $data = $this->storage->getMetaData($path);
  104. if (is_null($data)) {
  105. \OCP\Util::writeLog('OC\Files\Cache\Scanner', "!!! Path '$path' is not accessible or present !!!", \OCP\Util::DEBUG);
  106. }
  107. return $data;
  108. }
  109. /**
  110. * scan a single file and store it in the cache
  111. *
  112. * @param string $file
  113. * @param int $reuseExisting
  114. * @param int $parentId
  115. * @param array | null $cacheData existing data in the cache for the file to be scanned
  116. * @param bool $lock set to false to disable getting an additional read lock during scanning
  117. * @return array an array of metadata of the scanned file
  118. * @throws \OC\ServerNotAvailableException
  119. * @throws \OCP\Lock\LockedException
  120. */
  121. public function scanFile($file, $reuseExisting = 0, $parentId = -1, $cacheData = null, $lock = true) {
  122. // only proceed if $file is not a partial file nor a blacklisted file
  123. if (!self::isPartialFile($file) and !Filesystem::isFileBlacklisted($file)) {
  124. //acquire a lock
  125. if ($lock) {
  126. if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) {
  127. $this->storage->acquireLock($file, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
  128. }
  129. }
  130. try {
  131. $data = $this->getData($file);
  132. } catch (ForbiddenException $e) {
  133. return null;
  134. }
  135. if ($data) {
  136. // pre-emit only if it was a file. By that we avoid counting/treating folders as files
  137. if ($data['mimetype'] !== 'httpd/unix-directory') {
  138. $this->emit('\OC\Files\Cache\Scanner', 'scanFile', array($file, $this->storageId));
  139. \OC_Hook::emit('\OC\Files\Cache\Scanner', 'scan_file', array('path' => $file, 'storage' => $this->storageId));
  140. }
  141. $parent = dirname($file);
  142. if ($parent === '.' or $parent === '/') {
  143. $parent = '';
  144. }
  145. if ($parentId === -1) {
  146. $parentId = $this->cache->getId($parent);
  147. }
  148. // scan the parent if it's not in the cache (id -1) and the current file is not the root folder
  149. if ($file and $parentId === -1) {
  150. $parentData = $this->scanFile($parent);
  151. $parentId = $parentData['fileid'];
  152. }
  153. if ($parent) {
  154. $data['parent'] = $parentId;
  155. }
  156. if (is_null($cacheData)) {
  157. /** @var CacheEntry $cacheData */
  158. $cacheData = $this->cache->get($file);
  159. }
  160. if ($cacheData and $reuseExisting and isset($cacheData['fileid'])) {
  161. // prevent empty etag
  162. if (empty($cacheData['etag'])) {
  163. $etag = $data['etag'];
  164. } else {
  165. $etag = $cacheData['etag'];
  166. }
  167. $fileId = $cacheData['fileid'];
  168. $data['fileid'] = $fileId;
  169. // only reuse data if the file hasn't explicitly changed
  170. if (isset($data['storage_mtime']) && isset($cacheData['storage_mtime']) && $data['storage_mtime'] === $cacheData['storage_mtime']) {
  171. $data['mtime'] = $cacheData['mtime'];
  172. if (($reuseExisting & self::REUSE_SIZE) && ($data['size'] === -1)) {
  173. $data['size'] = $cacheData['size'];
  174. }
  175. if ($reuseExisting & self::REUSE_ETAG) {
  176. $data['etag'] = $etag;
  177. }
  178. }
  179. // Only update metadata that has changed
  180. $newData = array_diff_assoc($data, $cacheData->getData());
  181. } else {
  182. $newData = $data;
  183. $fileId = -1;
  184. }
  185. if (!empty($newData)) {
  186. // Reset the checksum if the data has changed
  187. $newData['checksum'] = '';
  188. $data['fileid'] = $this->addToCache($file, $newData, $fileId);
  189. }
  190. if (isset($cacheData['size'])) {
  191. $data['oldSize'] = $cacheData['size'];
  192. } else {
  193. $data['oldSize'] = 0;
  194. }
  195. if (isset($cacheData['encrypted'])) {
  196. $data['encrypted'] = $cacheData['encrypted'];
  197. }
  198. // post-emit only if it was a file. By that we avoid counting/treating folders as files
  199. if ($data['mimetype'] !== 'httpd/unix-directory') {
  200. $this->emit('\OC\Files\Cache\Scanner', 'postScanFile', array($file, $this->storageId));
  201. \OC_Hook::emit('\OC\Files\Cache\Scanner', 'post_scan_file', array('path' => $file, 'storage' => $this->storageId));
  202. }
  203. } else {
  204. $this->removeFromCache($file);
  205. }
  206. //release the acquired lock
  207. if ($lock) {
  208. if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) {
  209. $this->storage->releaseLock($file, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
  210. }
  211. }
  212. if ($data && !isset($data['encrypted'])) {
  213. $data['encrypted'] = false;
  214. }
  215. return $data;
  216. }
  217. return null;
  218. }
  219. protected function removeFromCache($path) {
  220. \OC_Hook::emit('Scanner', 'removeFromCache', array('file' => $path));
  221. $this->emit('\OC\Files\Cache\Scanner', 'removeFromCache', array($path));
  222. if ($this->cacheActive) {
  223. $this->cache->remove($path);
  224. }
  225. }
  226. /**
  227. * @param string $path
  228. * @param array $data
  229. * @param int $fileId
  230. * @return int the id of the added file
  231. */
  232. protected function addToCache($path, $data, $fileId = -1) {
  233. \OC_Hook::emit('Scanner', 'addToCache', array('file' => $path, 'data' => $data));
  234. $this->emit('\OC\Files\Cache\Scanner', 'addToCache', array($path, $this->storageId, $data));
  235. if ($this->cacheActive) {
  236. if ($fileId !== -1) {
  237. $this->cache->update($fileId, $data);
  238. return $fileId;
  239. } else {
  240. return $this->cache->put($path, $data);
  241. }
  242. } else {
  243. return -1;
  244. }
  245. }
  246. /**
  247. * @param string $path
  248. * @param array $data
  249. * @param int $fileId
  250. */
  251. protected function updateCache($path, $data, $fileId = -1) {
  252. \OC_Hook::emit('Scanner', 'addToCache', array('file' => $path, 'data' => $data));
  253. $this->emit('\OC\Files\Cache\Scanner', 'updateCache', array($path, $this->storageId, $data));
  254. if ($this->cacheActive) {
  255. if ($fileId !== -1) {
  256. $this->cache->update($fileId, $data);
  257. } else {
  258. $this->cache->put($path, $data);
  259. }
  260. }
  261. }
  262. /**
  263. * scan a folder and all it's children
  264. *
  265. * @param string $path
  266. * @param bool $recursive
  267. * @param int $reuse
  268. * @param bool $lock set to false to disable getting an additional read lock during scanning
  269. * @return array an array of the meta data of the scanned file or folder
  270. */
  271. public function scan($path, $recursive = self::SCAN_RECURSIVE, $reuse = -1, $lock = true) {
  272. if ($reuse === -1) {
  273. $reuse = ($recursive === self::SCAN_SHALLOW) ? self::REUSE_ETAG | self::REUSE_SIZE : self::REUSE_ETAG;
  274. }
  275. if ($lock) {
  276. if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) {
  277. $this->storage->acquireLock('scanner::' . $path, ILockingProvider::LOCK_EXCLUSIVE, $this->lockingProvider);
  278. $this->storage->acquireLock($path, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
  279. }
  280. }
  281. $data = $this->scanFile($path, $reuse, -1, null, $lock);
  282. if ($data and $data['mimetype'] === 'httpd/unix-directory') {
  283. $size = $this->scanChildren($path, $recursive, $reuse, $data['fileid'], $lock);
  284. $data['size'] = $size;
  285. }
  286. if ($lock) {
  287. if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) {
  288. $this->storage->releaseLock($path, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
  289. $this->storage->releaseLock('scanner::' . $path, ILockingProvider::LOCK_EXCLUSIVE, $this->lockingProvider);
  290. }
  291. }
  292. return $data;
  293. }
  294. /**
  295. * Get the children currently in the cache
  296. *
  297. * @param int $folderId
  298. * @return array[]
  299. */
  300. protected function getExistingChildren($folderId) {
  301. $existingChildren = array();
  302. $children = $this->cache->getFolderContentsById($folderId);
  303. foreach ($children as $child) {
  304. $existingChildren[$child['name']] = $child;
  305. }
  306. return $existingChildren;
  307. }
  308. /**
  309. * Get the children from the storage
  310. *
  311. * @param string $folder
  312. * @return string[]
  313. */
  314. protected function getNewChildren($folder) {
  315. $children = array();
  316. if ($dh = $this->storage->opendir($folder)) {
  317. if (is_resource($dh)) {
  318. while (($file = readdir($dh)) !== false) {
  319. if (!Filesystem::isIgnoredDir($file)) {
  320. $children[] = trim(\OC\Files\Filesystem::normalizePath($file), '/');
  321. }
  322. }
  323. }
  324. }
  325. return $children;
  326. }
  327. /**
  328. * scan all the files and folders in a folder
  329. *
  330. * @param string $path
  331. * @param bool $recursive
  332. * @param int $reuse
  333. * @param int $folderId id for the folder to be scanned
  334. * @param bool $lock set to false to disable getting an additional read lock during scanning
  335. * @return int the size of the scanned folder or -1 if the size is unknown at this stage
  336. */
  337. protected function scanChildren($path, $recursive = self::SCAN_RECURSIVE, $reuse = -1, $folderId = null, $lock = true) {
  338. if ($reuse === -1) {
  339. $reuse = ($recursive === self::SCAN_SHALLOW) ? self::REUSE_ETAG | self::REUSE_SIZE : self::REUSE_ETAG;
  340. }
  341. $this->emit('\OC\Files\Cache\Scanner', 'scanFolder', array($path, $this->storageId));
  342. $size = 0;
  343. if (!is_null($folderId)) {
  344. $folderId = $this->cache->getId($path);
  345. }
  346. $childQueue = $this->handleChildren($path, $recursive, $reuse, $folderId, $lock, $size);
  347. foreach ($childQueue as $child => $childId) {
  348. $childSize = $this->scanChildren($child, $recursive, $reuse, $childId, $lock);
  349. if ($childSize === -1) {
  350. $size = -1;
  351. } else if ($size !== -1) {
  352. $size += $childSize;
  353. }
  354. }
  355. if ($this->cacheActive) {
  356. $this->cache->update($folderId, array('size' => $size));
  357. }
  358. $this->emit('\OC\Files\Cache\Scanner', 'postScanFolder', array($path, $this->storageId));
  359. return $size;
  360. }
  361. private function handleChildren($path, $recursive, $reuse, $folderId, $lock, &$size) {
  362. // we put this in it's own function so it cleans up the memory before we start recursing
  363. $existingChildren = $this->getExistingChildren($folderId);
  364. $newChildren = $this->getNewChildren($path);
  365. if ($this->useTransactions) {
  366. \OC::$server->getDatabaseConnection()->beginTransaction();
  367. }
  368. $exceptionOccurred = false;
  369. $childQueue = [];
  370. foreach ($newChildren as $file) {
  371. $child = ($path) ? $path . '/' . $file : $file;
  372. try {
  373. $existingData = isset($existingChildren[$file]) ? $existingChildren[$file] : null;
  374. $data = $this->scanFile($child, $reuse, $folderId, $existingData, $lock);
  375. if ($data) {
  376. if ($data['mimetype'] === 'httpd/unix-directory' and $recursive === self::SCAN_RECURSIVE) {
  377. $childQueue[$child] = $data['fileid'];
  378. } else if ($data['mimetype'] === 'httpd/unix-directory' and $recursive === self::SCAN_RECURSIVE_INCOMPLETE and $data['size'] === -1) {
  379. // only recurse into folders which aren't fully scanned
  380. $childQueue[$child] = $data['fileid'];
  381. } else if ($data['size'] === -1) {
  382. $size = -1;
  383. } else if ($size !== -1) {
  384. $size += $data['size'];
  385. }
  386. }
  387. } catch (\Doctrine\DBAL\DBALException $ex) {
  388. // might happen if inserting duplicate while a scanning
  389. // process is running in parallel
  390. // log and ignore
  391. \OCP\Util::writeLog('core', 'Exception while scanning file "' . $child . '": ' . $ex->getMessage(), \OCP\Util::DEBUG);
  392. $exceptionOccurred = true;
  393. } catch (\OCP\Lock\LockedException $e) {
  394. if ($this->useTransactions) {
  395. \OC::$server->getDatabaseConnection()->rollback();
  396. }
  397. throw $e;
  398. }
  399. }
  400. $removedChildren = \array_diff(array_keys($existingChildren), $newChildren);
  401. foreach ($removedChildren as $childName) {
  402. $child = ($path) ? $path . '/' . $childName : $childName;
  403. $this->removeFromCache($child);
  404. }
  405. if ($this->useTransactions) {
  406. \OC::$server->getDatabaseConnection()->commit();
  407. }
  408. if ($exceptionOccurred) {
  409. // It might happen that the parallel scan process has already
  410. // inserted mimetypes but those weren't available yet inside the transaction
  411. // To make sure to have the updated mime types in such cases,
  412. // we reload them here
  413. \OC::$server->getMimeTypeLoader()->reset();
  414. }
  415. return $childQueue;
  416. }
  417. /**
  418. * check if the file should be ignored when scanning
  419. * NOTE: files with a '.part' extension are ignored as well!
  420. * prevents unfinished put requests to be scanned
  421. *
  422. * @param string $file
  423. * @return boolean
  424. */
  425. public static function isPartialFile($file) {
  426. if (pathinfo($file, PATHINFO_EXTENSION) === 'part') {
  427. return true;
  428. }
  429. if (strpos($file, '.part/') !== false) {
  430. return true;
  431. }
  432. return false;
  433. }
  434. /**
  435. * walk over any folders that are not fully scanned yet and scan them
  436. */
  437. public function backgroundScan() {
  438. if (!$this->cache->inCache('')) {
  439. $this->runBackgroundScanJob(function () {
  440. $this->scan('', self::SCAN_RECURSIVE, self::REUSE_ETAG);
  441. }, '');
  442. } else {
  443. $lastPath = null;
  444. while (($path = $this->cache->getIncomplete()) !== false && $path !== $lastPath) {
  445. $this->runBackgroundScanJob(function() use ($path) {
  446. $this->scan($path, self::SCAN_RECURSIVE_INCOMPLETE, self::REUSE_ETAG | self::REUSE_SIZE);
  447. }, $path);
  448. // FIXME: this won't proceed with the next item, needs revamping of getIncomplete()
  449. // to make this possible
  450. $lastPath = $path;
  451. }
  452. }
  453. }
  454. private function runBackgroundScanJob(callable $callback, $path) {
  455. try {
  456. $callback();
  457. \OC_Hook::emit('Scanner', 'correctFolderSize', array('path' => $path));
  458. if ($this->cacheActive && $this->cache instanceof Cache) {
  459. $this->cache->correctFolderSize($path);
  460. }
  461. } catch (\OCP\Files\StorageInvalidException $e) {
  462. // skip unavailable storages
  463. } catch (\OCP\Files\StorageNotAvailableException $e) {
  464. // skip unavailable storages
  465. } catch (\OCP\Files\ForbiddenException $e) {
  466. // skip forbidden storages
  467. } catch (\OCP\Lock\LockedException $e) {
  468. // skip unavailable storages
  469. }
  470. }
  471. /**
  472. * Set whether the cache is affected by scan operations
  473. *
  474. * @param boolean $active The active state of the cache
  475. */
  476. public function setCacheActive($active) {
  477. $this->cacheActive = $active;
  478. }
  479. }