You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

530 lines
17 KiB

10 years ago
10 years ago
10 years ago
10 years ago
11 years ago
11 years ago
11 years ago
12 years ago
11 years ago
11 years ago
11 years ago
  1. <?php
  2. /**
  3. * @copyright Copyright (c) 2016, ownCloud, Inc.
  4. *
  5. * @author Arthur Schiwon <blizzz@arthur-schiwon.de>
  6. * @author Björn Schießle <bjoern@schiessle.org>
  7. * @author Daniel Jagszent <daniel@jagszent.de>
  8. * @author Jörn Friedrich Dreyer <jfd@butonic.de>
  9. * @author Lukas Reschke <lukas@statuscode.ch>
  10. * @author Martin Mattel <martin.mattel@diemattels.at>
  11. * @author Michael Gapczynski <GapczynskiM@gmail.com>
  12. * @author Morris Jobke <hey@morrisjobke.de>
  13. * @author Owen Winkler <a_github@midnightcircus.com>
  14. * @author Robin Appelman <robin@icewind.nl>
  15. * @author Robin McCorkell <robin@mccorkell.me.uk>
  16. * @author Roeland Jago Douma <roeland@famdouma.nl>
  17. * @author Thomas Müller <thomas.mueller@tmit.eu>
  18. * @author Vincent Petry <pvince81@owncloud.com>
  19. *
  20. * @license AGPL-3.0
  21. *
  22. * This code is free software: you can redistribute it and/or modify
  23. * it under the terms of the GNU Affero General Public License, version 3,
  24. * as published by the Free Software Foundation.
  25. *
  26. * This program is distributed in the hope that it will be useful,
  27. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  28. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  29. * GNU Affero General Public License for more details.
  30. *
  31. * You should have received a copy of the GNU Affero General Public License, version 3,
  32. * along with this program. If not, see <http://www.gnu.org/licenses/>
  33. *
  34. */
  35. namespace OC\Files\Cache;
  36. use OC\Files\Filesystem;
  37. use OC\Hooks\BasicEmitter;
  38. use OCP\Config;
  39. use OCP\Files\Cache\IScanner;
  40. use OCP\Files\ForbiddenException;
  41. use OCP\Files\Storage\ILockingStorage;
  42. use OCP\Lock\ILockingProvider;
  43. /**
  44. * Class Scanner
  45. *
  46. * Hooks available in scope \OC\Files\Cache\Scanner:
  47. * - scanFile(string $path, string $storageId)
  48. * - scanFolder(string $path, string $storageId)
  49. * - postScanFile(string $path, string $storageId)
  50. * - postScanFolder(string $path, string $storageId)
  51. *
  52. * @package OC\Files\Cache
  53. */
  54. class Scanner extends BasicEmitter implements IScanner {
  55. /**
  56. * @var \OC\Files\Storage\Storage $storage
  57. */
  58. protected $storage;
  59. /**
  60. * @var string $storageId
  61. */
  62. protected $storageId;
  63. /**
  64. * @var \OC\Files\Cache\Cache $cache
  65. */
  66. protected $cache;
  67. /**
  68. * @var boolean $cacheActive If true, perform cache operations, if false, do not affect cache
  69. */
  70. protected $cacheActive;
  71. /**
  72. * @var bool $useTransactions whether to use transactions
  73. */
  74. protected $useTransactions = true;
  75. /**
  76. * @var \OCP\Lock\ILockingProvider
  77. */
  78. protected $lockingProvider;
  79. public function __construct(\OC\Files\Storage\Storage $storage) {
  80. $this->storage = $storage;
  81. $this->storageId = $this->storage->getId();
  82. $this->cache = $storage->getCache();
  83. $this->cacheActive = !Config::getSystemValue('filesystem_cache_readonly', false);
  84. $this->lockingProvider = \OC::$server->getLockingProvider();
  85. }
  86. /**
  87. * Whether to wrap the scanning of a folder in a database transaction
  88. * On default transactions are used
  89. *
  90. * @param bool $useTransactions
  91. */
  92. public function setUseTransactions($useTransactions) {
  93. $this->useTransactions = $useTransactions;
  94. }
  95. /**
  96. * get all the metadata of a file or folder
  97. * *
  98. *
  99. * @param string $path
  100. * @return array an array of metadata of the file
  101. */
  102. protected function getData($path) {
  103. $data = $this->storage->getMetaData($path);
  104. if (is_null($data)) {
  105. \OCP\Util::writeLog('OC\Files\Cache\Scanner', "!!! Path '$path' is not accessible or present !!!", \OCP\Util::DEBUG);
  106. }
  107. return $data;
  108. }
  109. /**
  110. * scan a single file and store it in the cache
  111. *
  112. * @param string $file
  113. * @param int $reuseExisting
  114. * @param int $parentId
  115. * @param array | null $cacheData existing data in the cache for the file to be scanned
  116. * @param bool $lock set to false to disable getting an additional read lock during scanning
  117. * @return array an array of metadata of the scanned file
  118. * @throws \OC\ServerNotAvailableException
  119. * @throws \OCP\Lock\LockedException
  120. */
  121. public function scanFile($file, $reuseExisting = 0, $parentId = -1, $cacheData = null, $lock = true) {
  122. if ($file !== '') {
  123. try {
  124. $this->storage->verifyPath(dirname($file), basename($file));
  125. } catch (\Exception $e) {
  126. return null;
  127. }
  128. }
  129. // only proceed if $file is not a partial file nor a blacklisted file
  130. if (!self::isPartialFile($file) and !Filesystem::isFileBlacklisted($file)) {
  131. //acquire a lock
  132. if ($lock) {
  133. if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) {
  134. $this->storage->acquireLock($file, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
  135. }
  136. }
  137. try {
  138. $data = $this->getData($file);
  139. } catch (ForbiddenException $e) {
  140. return null;
  141. }
  142. if ($data) {
  143. // pre-emit only if it was a file. By that we avoid counting/treating folders as files
  144. if ($data['mimetype'] !== 'httpd/unix-directory') {
  145. $this->emit('\OC\Files\Cache\Scanner', 'scanFile', array($file, $this->storageId));
  146. \OC_Hook::emit('\OC\Files\Cache\Scanner', 'scan_file', array('path' => $file, 'storage' => $this->storageId));
  147. }
  148. $parent = dirname($file);
  149. if ($parent === '.' or $parent === '/') {
  150. $parent = '';
  151. }
  152. if ($parentId === -1) {
  153. $parentId = $this->cache->getId($parent);
  154. }
  155. // scan the parent if it's not in the cache (id -1) and the current file is not the root folder
  156. if ($file and $parentId === -1) {
  157. $parentData = $this->scanFile($parent);
  158. if (!$parentData) {
  159. return null;
  160. }
  161. $parentId = $parentData['fileid'];
  162. }
  163. if ($parent) {
  164. $data['parent'] = $parentId;
  165. }
  166. if (is_null($cacheData)) {
  167. /** @var CacheEntry $cacheData */
  168. $cacheData = $this->cache->get($file);
  169. }
  170. if ($cacheData and $reuseExisting and isset($cacheData['fileid'])) {
  171. // prevent empty etag
  172. if (empty($cacheData['etag'])) {
  173. $etag = $data['etag'];
  174. } else {
  175. $etag = $cacheData['etag'];
  176. }
  177. $fileId = $cacheData['fileid'];
  178. $data['fileid'] = $fileId;
  179. // only reuse data if the file hasn't explicitly changed
  180. if (isset($data['storage_mtime']) && isset($cacheData['storage_mtime']) && $data['storage_mtime'] === $cacheData['storage_mtime']) {
  181. $data['mtime'] = $cacheData['mtime'];
  182. if (($reuseExisting & self::REUSE_SIZE) && ($data['size'] === -1)) {
  183. $data['size'] = $cacheData['size'];
  184. }
  185. if ($reuseExisting & self::REUSE_ETAG) {
  186. $data['etag'] = $etag;
  187. }
  188. }
  189. // Only update metadata that has changed
  190. $newData = array_diff_assoc($data, $cacheData->getData());
  191. } else {
  192. $newData = $data;
  193. $fileId = -1;
  194. }
  195. if (!empty($newData)) {
  196. // Reset the checksum if the data has changed
  197. $newData['checksum'] = '';
  198. $data['fileid'] = $this->addToCache($file, $newData, $fileId);
  199. }
  200. if (isset($cacheData['size'])) {
  201. $data['oldSize'] = $cacheData['size'];
  202. } else {
  203. $data['oldSize'] = 0;
  204. }
  205. if (isset($cacheData['encrypted'])) {
  206. $data['encrypted'] = $cacheData['encrypted'];
  207. }
  208. // post-emit only if it was a file. By that we avoid counting/treating folders as files
  209. if ($data['mimetype'] !== 'httpd/unix-directory') {
  210. $this->emit('\OC\Files\Cache\Scanner', 'postScanFile', array($file, $this->storageId));
  211. \OC_Hook::emit('\OC\Files\Cache\Scanner', 'post_scan_file', array('path' => $file, 'storage' => $this->storageId));
  212. }
  213. } else {
  214. $this->removeFromCache($file);
  215. }
  216. //release the acquired lock
  217. if ($lock) {
  218. if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) {
  219. $this->storage->releaseLock($file, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
  220. }
  221. }
  222. if ($data && !isset($data['encrypted'])) {
  223. $data['encrypted'] = false;
  224. }
  225. return $data;
  226. }
  227. return null;
  228. }
  229. protected function removeFromCache($path) {
  230. \OC_Hook::emit('Scanner', 'removeFromCache', array('file' => $path));
  231. $this->emit('\OC\Files\Cache\Scanner', 'removeFromCache', array($path));
  232. if ($this->cacheActive) {
  233. $this->cache->remove($path);
  234. }
  235. }
  236. /**
  237. * @param string $path
  238. * @param array $data
  239. * @param int $fileId
  240. * @return int the id of the added file
  241. */
  242. protected function addToCache($path, $data, $fileId = -1) {
  243. \OC_Hook::emit('Scanner', 'addToCache', array('file' => $path, 'data' => $data));
  244. $this->emit('\OC\Files\Cache\Scanner', 'addToCache', array($path, $this->storageId, $data));
  245. if ($this->cacheActive) {
  246. if ($fileId !== -1) {
  247. $this->cache->update($fileId, $data);
  248. return $fileId;
  249. } else {
  250. return $this->cache->put($path, $data);
  251. }
  252. } else {
  253. return -1;
  254. }
  255. }
  256. /**
  257. * @param string $path
  258. * @param array $data
  259. * @param int $fileId
  260. */
  261. protected function updateCache($path, $data, $fileId = -1) {
  262. \OC_Hook::emit('Scanner', 'addToCache', array('file' => $path, 'data' => $data));
  263. $this->emit('\OC\Files\Cache\Scanner', 'updateCache', array($path, $this->storageId, $data));
  264. if ($this->cacheActive) {
  265. if ($fileId !== -1) {
  266. $this->cache->update($fileId, $data);
  267. } else {
  268. $this->cache->put($path, $data);
  269. }
  270. }
  271. }
  272. /**
  273. * scan a folder and all it's children
  274. *
  275. * @param string $path
  276. * @param bool $recursive
  277. * @param int $reuse
  278. * @param bool $lock set to false to disable getting an additional read lock during scanning
  279. * @return array an array of the meta data of the scanned file or folder
  280. */
  281. public function scan($path, $recursive = self::SCAN_RECURSIVE, $reuse = -1, $lock = true) {
  282. if ($reuse === -1) {
  283. $reuse = ($recursive === self::SCAN_SHALLOW) ? self::REUSE_ETAG | self::REUSE_SIZE : self::REUSE_ETAG;
  284. }
  285. if ($lock) {
  286. if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) {
  287. $this->storage->acquireLock('scanner::' . $path, ILockingProvider::LOCK_EXCLUSIVE, $this->lockingProvider);
  288. $this->storage->acquireLock($path, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
  289. }
  290. }
  291. $data = $this->scanFile($path, $reuse, -1, null, $lock);
  292. if ($data and $data['mimetype'] === 'httpd/unix-directory') {
  293. $size = $this->scanChildren($path, $recursive, $reuse, $data['fileid'], $lock);
  294. $data['size'] = $size;
  295. }
  296. if ($lock) {
  297. if ($this->storage->instanceOfStorage('\OCP\Files\Storage\ILockingStorage')) {
  298. $this->storage->releaseLock($path, ILockingProvider::LOCK_SHARED, $this->lockingProvider);
  299. $this->storage->releaseLock('scanner::' . $path, ILockingProvider::LOCK_EXCLUSIVE, $this->lockingProvider);
  300. }
  301. }
  302. return $data;
  303. }
  304. /**
  305. * Get the children currently in the cache
  306. *
  307. * @param int $folderId
  308. * @return array[]
  309. */
  310. protected function getExistingChildren($folderId) {
  311. $existingChildren = array();
  312. $children = $this->cache->getFolderContentsById($folderId);
  313. foreach ($children as $child) {
  314. $existingChildren[$child['name']] = $child;
  315. }
  316. return $existingChildren;
  317. }
  318. /**
  319. * Get the children from the storage
  320. *
  321. * @param string $folder
  322. * @return string[]
  323. */
  324. protected function getNewChildren($folder) {
  325. $children = array();
  326. if ($dh = $this->storage->opendir($folder)) {
  327. if (is_resource($dh)) {
  328. while (($file = readdir($dh)) !== false) {
  329. if (!Filesystem::isIgnoredDir($file)) {
  330. $children[] = trim(\OC\Files\Filesystem::normalizePath($file), '/');
  331. }
  332. }
  333. }
  334. }
  335. return $children;
  336. }
  337. /**
  338. * scan all the files and folders in a folder
  339. *
  340. * @param string $path
  341. * @param bool $recursive
  342. * @param int $reuse
  343. * @param int $folderId id for the folder to be scanned
  344. * @param bool $lock set to false to disable getting an additional read lock during scanning
  345. * @return int the size of the scanned folder or -1 if the size is unknown at this stage
  346. */
  347. protected function scanChildren($path, $recursive = self::SCAN_RECURSIVE, $reuse = -1, $folderId = null, $lock = true) {
  348. if ($reuse === -1) {
  349. $reuse = ($recursive === self::SCAN_SHALLOW) ? self::REUSE_ETAG | self::REUSE_SIZE : self::REUSE_ETAG;
  350. }
  351. $this->emit('\OC\Files\Cache\Scanner', 'scanFolder', array($path, $this->storageId));
  352. $size = 0;
  353. if (!is_null($folderId)) {
  354. $folderId = $this->cache->getId($path);
  355. }
  356. $childQueue = $this->handleChildren($path, $recursive, $reuse, $folderId, $lock, $size);
  357. foreach ($childQueue as $child => $childId) {
  358. $childSize = $this->scanChildren($child, $recursive, $reuse, $childId, $lock);
  359. if ($childSize === -1) {
  360. $size = -1;
  361. } else if ($size !== -1) {
  362. $size += $childSize;
  363. }
  364. }
  365. if ($this->cacheActive) {
  366. $this->cache->update($folderId, array('size' => $size));
  367. }
  368. $this->emit('\OC\Files\Cache\Scanner', 'postScanFolder', array($path, $this->storageId));
  369. return $size;
  370. }
  371. private function handleChildren($path, $recursive, $reuse, $folderId, $lock, &$size) {
  372. // we put this in it's own function so it cleans up the memory before we start recursing
  373. $existingChildren = $this->getExistingChildren($folderId);
  374. $newChildren = $this->getNewChildren($path);
  375. if ($this->useTransactions) {
  376. \OC::$server->getDatabaseConnection()->beginTransaction();
  377. }
  378. $exceptionOccurred = false;
  379. $childQueue = [];
  380. foreach ($newChildren as $file) {
  381. $child = ($path) ? $path . '/' . $file : $file;
  382. try {
  383. $existingData = isset($existingChildren[$file]) ? $existingChildren[$file] : null;
  384. $data = $this->scanFile($child, $reuse, $folderId, $existingData, $lock);
  385. if ($data) {
  386. if ($data['mimetype'] === 'httpd/unix-directory' and $recursive === self::SCAN_RECURSIVE) {
  387. $childQueue[$child] = $data['fileid'];
  388. } else if ($data['mimetype'] === 'httpd/unix-directory' and $recursive === self::SCAN_RECURSIVE_INCOMPLETE and $data['size'] === -1) {
  389. // only recurse into folders which aren't fully scanned
  390. $childQueue[$child] = $data['fileid'];
  391. } else if ($data['size'] === -1) {
  392. $size = -1;
  393. } else if ($size !== -1) {
  394. $size += $data['size'];
  395. }
  396. }
  397. } catch (\Doctrine\DBAL\DBALException $ex) {
  398. // might happen if inserting duplicate while a scanning
  399. // process is running in parallel
  400. // log and ignore
  401. \OCP\Util::writeLog('core', 'Exception while scanning file "' . $child . '": ' . $ex->getMessage(), \OCP\Util::DEBUG);
  402. $exceptionOccurred = true;
  403. } catch (\OCP\Lock\LockedException $e) {
  404. if ($this->useTransactions) {
  405. \OC::$server->getDatabaseConnection()->rollback();
  406. }
  407. throw $e;
  408. }
  409. }
  410. $removedChildren = \array_diff(array_keys($existingChildren), $newChildren);
  411. foreach ($removedChildren as $childName) {
  412. $child = ($path) ? $path . '/' . $childName : $childName;
  413. $this->removeFromCache($child);
  414. }
  415. if ($this->useTransactions) {
  416. \OC::$server->getDatabaseConnection()->commit();
  417. }
  418. if ($exceptionOccurred) {
  419. // It might happen that the parallel scan process has already
  420. // inserted mimetypes but those weren't available yet inside the transaction
  421. // To make sure to have the updated mime types in such cases,
  422. // we reload them here
  423. \OC::$server->getMimeTypeLoader()->reset();
  424. }
  425. return $childQueue;
  426. }
  427. /**
  428. * check if the file should be ignored when scanning
  429. * NOTE: files with a '.part' extension are ignored as well!
  430. * prevents unfinished put requests to be scanned
  431. *
  432. * @param string $file
  433. * @return boolean
  434. */
  435. public static function isPartialFile($file) {
  436. if (pathinfo($file, PATHINFO_EXTENSION) === 'part') {
  437. return true;
  438. }
  439. if (strpos($file, '.part/') !== false) {
  440. return true;
  441. }
  442. return false;
  443. }
  444. /**
  445. * walk over any folders that are not fully scanned yet and scan them
  446. */
  447. public function backgroundScan() {
  448. if (!$this->cache->inCache('')) {
  449. $this->runBackgroundScanJob(function () {
  450. $this->scan('', self::SCAN_RECURSIVE, self::REUSE_ETAG);
  451. }, '');
  452. } else {
  453. $lastPath = null;
  454. while (($path = $this->cache->getIncomplete()) !== false && $path !== $lastPath) {
  455. $this->runBackgroundScanJob(function() use ($path) {
  456. $this->scan($path, self::SCAN_RECURSIVE_INCOMPLETE, self::REUSE_ETAG | self::REUSE_SIZE);
  457. }, $path);
  458. // FIXME: this won't proceed with the next item, needs revamping of getIncomplete()
  459. // to make this possible
  460. $lastPath = $path;
  461. }
  462. }
  463. }
  464. private function runBackgroundScanJob(callable $callback, $path) {
  465. try {
  466. $callback();
  467. \OC_Hook::emit('Scanner', 'correctFolderSize', array('path' => $path));
  468. if ($this->cacheActive && $this->cache instanceof Cache) {
  469. $this->cache->correctFolderSize($path);
  470. }
  471. } catch (\OCP\Files\StorageInvalidException $e) {
  472. // skip unavailable storages
  473. } catch (\OCP\Files\StorageNotAvailableException $e) {
  474. // skip unavailable storages
  475. } catch (\OCP\Files\ForbiddenException $e) {
  476. // skip forbidden storages
  477. } catch (\OCP\Lock\LockedException $e) {
  478. // skip unavailable storages
  479. }
  480. }
  481. /**
  482. * Set whether the cache is affected by scan operations
  483. *
  484. * @param boolean $active The active state of the cache
  485. */
  486. public function setCacheActive($active) {
  487. $this->cacheActive = $active;
  488. }
  489. }