scrub.c 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410
  1. /*
  2. * Copyright (C) 2011 STRATO. All rights reserved.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public
  6. * License v2 as published by the Free Software Foundation.
  7. *
  8. * This program is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. * General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU General Public
  14. * License along with this program; if not, write to the
  15. * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16. * Boston, MA 021110-1307, USA.
  17. */
  18. #include <linux/blkdev.h>
  19. #include "ctree.h"
  20. #include "volumes.h"
  21. #include "disk-io.h"
  22. #include "ordered-data.h"
  23. /*
  24. * This is only the first step towards a full-features scrub. It reads all
  25. * extent and super block and verifies the checksums. In case a bad checksum
  26. * is found or the extent cannot be read, good data will be written back if
  27. * any can be found.
  28. *
  29. * Future enhancements:
  30. * - To enhance the performance, better read-ahead strategies for the
  31. * extent-tree can be employed.
  32. * - In case an unrepairable extent is encountered, track which files are
  33. * affected and report them
  34. * - In case of a read error on files with nodatasum, map the file and read
  35. * the extent to trigger a writeback of the good copy
  36. * - track and record media errors, throw out bad devices
  37. * - add a mode to also read unallocated space
  38. * - make the prefetch cancellable
  39. */
  40. struct scrub_bio;
  41. struct scrub_page;
  42. struct scrub_dev;
  43. static void scrub_bio_end_io(struct bio *bio, int err);
  44. static void scrub_checksum(struct btrfs_work *work);
  45. static int scrub_checksum_data(struct scrub_dev *sdev,
  46. struct scrub_page *spag, void *buffer);
  47. static int scrub_checksum_tree_block(struct scrub_dev *sdev,
  48. struct scrub_page *spag, u64 logical,
  49. void *buffer);
  50. static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer);
  51. static int scrub_fixup_check(struct scrub_bio *sbio, int ix);
  52. static void scrub_fixup_end_io(struct bio *bio, int err);
  53. static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector,
  54. struct page *page);
  55. static void scrub_fixup(struct scrub_bio *sbio, int ix);
  56. #define SCRUB_PAGES_PER_BIO 16 /* 64k per bio */
  57. #define SCRUB_BIOS_PER_DEV 16 /* 1 MB per device in flight */
  58. struct scrub_page {
  59. u64 flags; /* extent flags */
  60. u64 generation;
  61. u64 mirror_num;
  62. int have_csum;
  63. u8 csum[BTRFS_CSUM_SIZE];
  64. };
  65. struct scrub_bio {
  66. int index;
  67. struct scrub_dev *sdev;
  68. struct bio *bio;
  69. int err;
  70. u64 logical;
  71. u64 physical;
  72. struct scrub_page spag[SCRUB_PAGES_PER_BIO];
  73. u64 count;
  74. int next_free;
  75. struct btrfs_work work;
  76. };
  77. struct scrub_dev {
  78. struct scrub_bio *bios[SCRUB_BIOS_PER_DEV];
  79. struct btrfs_device *dev;
  80. int first_free;
  81. int curr;
  82. atomic_t in_flight;
  83. spinlock_t list_lock;
  84. wait_queue_head_t list_wait;
  85. u16 csum_size;
  86. struct list_head csum_list;
  87. atomic_t cancel_req;
  88. int readonly;
  89. /*
  90. * statistics
  91. */
  92. struct btrfs_scrub_progress stat;
  93. spinlock_t stat_lock;
  94. };
  95. static void scrub_free_csums(struct scrub_dev *sdev)
  96. {
  97. while (!list_empty(&sdev->csum_list)) {
  98. struct btrfs_ordered_sum *sum;
  99. sum = list_first_entry(&sdev->csum_list,
  100. struct btrfs_ordered_sum, list);
  101. list_del(&sum->list);
  102. kfree(sum);
  103. }
  104. }
  105. static void scrub_free_bio(struct bio *bio)
  106. {
  107. int i;
  108. struct page *last_page = NULL;
  109. if (!bio)
  110. return;
  111. for (i = 0; i < bio->bi_vcnt; ++i) {
  112. if (bio->bi_io_vec[i].bv_page == last_page)
  113. continue;
  114. last_page = bio->bi_io_vec[i].bv_page;
  115. __free_page(last_page);
  116. }
  117. bio_put(bio);
  118. }
  119. static noinline_for_stack void scrub_free_dev(struct scrub_dev *sdev)
  120. {
  121. int i;
  122. if (!sdev)
  123. return;
  124. for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
  125. struct scrub_bio *sbio = sdev->bios[i];
  126. if (!sbio)
  127. break;
  128. scrub_free_bio(sbio->bio);
  129. kfree(sbio);
  130. }
  131. scrub_free_csums(sdev);
  132. kfree(sdev);
  133. }
  134. static noinline_for_stack
  135. struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev)
  136. {
  137. struct scrub_dev *sdev;
  138. int i;
  139. struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
  140. sdev = kzalloc(sizeof(*sdev), GFP_NOFS);
  141. if (!sdev)
  142. goto nomem;
  143. sdev->dev = dev;
  144. for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
  145. struct scrub_bio *sbio;
  146. sbio = kzalloc(sizeof(*sbio), GFP_NOFS);
  147. if (!sbio)
  148. goto nomem;
  149. sdev->bios[i] = sbio;
  150. sbio->index = i;
  151. sbio->sdev = sdev;
  152. sbio->count = 0;
  153. sbio->work.func = scrub_checksum;
  154. if (i != SCRUB_BIOS_PER_DEV-1)
  155. sdev->bios[i]->next_free = i + 1;
  156. else
  157. sdev->bios[i]->next_free = -1;
  158. }
  159. sdev->first_free = 0;
  160. sdev->curr = -1;
  161. atomic_set(&sdev->in_flight, 0);
  162. atomic_set(&sdev->cancel_req, 0);
  163. sdev->csum_size = btrfs_super_csum_size(&fs_info->super_copy);
  164. INIT_LIST_HEAD(&sdev->csum_list);
  165. spin_lock_init(&sdev->list_lock);
  166. spin_lock_init(&sdev->stat_lock);
  167. init_waitqueue_head(&sdev->list_wait);
  168. return sdev;
  169. nomem:
  170. scrub_free_dev(sdev);
  171. return ERR_PTR(-ENOMEM);
  172. }
  173. /*
  174. * scrub_recheck_error gets called when either verification of the page
  175. * failed or the bio failed to read, e.g. with EIO. In the latter case,
  176. * recheck_error gets called for every page in the bio, even though only
  177. * one may be bad
  178. */
  179. static int scrub_recheck_error(struct scrub_bio *sbio, int ix)
  180. {
  181. struct scrub_dev *sdev = sbio->sdev;
  182. u64 sector = (sbio->physical + ix * PAGE_SIZE) >> 9;
  183. if (sbio->err) {
  184. if (scrub_fixup_io(READ, sbio->sdev->dev->bdev, sector,
  185. sbio->bio->bi_io_vec[ix].bv_page) == 0) {
  186. if (scrub_fixup_check(sbio, ix) == 0)
  187. return 0;
  188. }
  189. }
  190. spin_lock(&sdev->stat_lock);
  191. ++sdev->stat.read_errors;
  192. spin_unlock(&sdev->stat_lock);
  193. scrub_fixup(sbio, ix);
  194. return 1;
  195. }
  196. static int scrub_fixup_check(struct scrub_bio *sbio, int ix)
  197. {
  198. int ret = 1;
  199. struct page *page;
  200. void *buffer;
  201. u64 flags = sbio->spag[ix].flags;
  202. page = sbio->bio->bi_io_vec[ix].bv_page;
  203. buffer = kmap_atomic(page, KM_USER0);
  204. if (flags & BTRFS_EXTENT_FLAG_DATA) {
  205. ret = scrub_checksum_data(sbio->sdev,
  206. sbio->spag + ix, buffer);
  207. } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
  208. ret = scrub_checksum_tree_block(sbio->sdev,
  209. sbio->spag + ix,
  210. sbio->logical + ix * PAGE_SIZE,
  211. buffer);
  212. } else {
  213. WARN_ON(1);
  214. }
  215. kunmap_atomic(buffer, KM_USER0);
  216. return ret;
  217. }
  218. static void scrub_fixup_end_io(struct bio *bio, int err)
  219. {
  220. complete((struct completion *)bio->bi_private);
  221. }
  222. static void scrub_fixup(struct scrub_bio *sbio, int ix)
  223. {
  224. struct scrub_dev *sdev = sbio->sdev;
  225. struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
  226. struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
  227. struct btrfs_multi_bio *multi = NULL;
  228. u64 logical = sbio->logical + ix * PAGE_SIZE;
  229. u64 length;
  230. int i;
  231. int ret;
  232. DECLARE_COMPLETION_ONSTACK(complete);
  233. if ((sbio->spag[ix].flags & BTRFS_EXTENT_FLAG_DATA) &&
  234. (sbio->spag[ix].have_csum == 0)) {
  235. /*
  236. * nodatasum, don't try to fix anything
  237. * FIXME: we can do better, open the inode and trigger a
  238. * writeback
  239. */
  240. goto uncorrectable;
  241. }
  242. length = PAGE_SIZE;
  243. ret = btrfs_map_block(map_tree, REQ_WRITE, logical, &length,
  244. &multi, 0);
  245. if (ret || !multi || length < PAGE_SIZE) {
  246. printk(KERN_ERR
  247. "scrub_fixup: btrfs_map_block failed us for %llu\n",
  248. (unsigned long long)logical);
  249. WARN_ON(1);
  250. return;
  251. }
  252. if (multi->num_stripes == 1)
  253. /* there aren't any replicas */
  254. goto uncorrectable;
  255. /*
  256. * first find a good copy
  257. */
  258. for (i = 0; i < multi->num_stripes; ++i) {
  259. if (i == sbio->spag[ix].mirror_num)
  260. continue;
  261. if (scrub_fixup_io(READ, multi->stripes[i].dev->bdev,
  262. multi->stripes[i].physical >> 9,
  263. sbio->bio->bi_io_vec[ix].bv_page)) {
  264. /* I/O-error, this is not a good copy */
  265. continue;
  266. }
  267. if (scrub_fixup_check(sbio, ix) == 0)
  268. break;
  269. }
  270. if (i == multi->num_stripes)
  271. goto uncorrectable;
  272. if (!sdev->readonly) {
  273. /*
  274. * bi_io_vec[ix].bv_page now contains good data, write it back
  275. */
  276. if (scrub_fixup_io(WRITE, sdev->dev->bdev,
  277. (sbio->physical + ix * PAGE_SIZE) >> 9,
  278. sbio->bio->bi_io_vec[ix].bv_page)) {
  279. /* I/O-error, writeback failed, give up */
  280. goto uncorrectable;
  281. }
  282. }
  283. kfree(multi);
  284. spin_lock(&sdev->stat_lock);
  285. ++sdev->stat.corrected_errors;
  286. spin_unlock(&sdev->stat_lock);
  287. if (printk_ratelimit())
  288. printk(KERN_ERR "btrfs: fixed up at %llu\n",
  289. (unsigned long long)logical);
  290. return;
  291. uncorrectable:
  292. kfree(multi);
  293. spin_lock(&sdev->stat_lock);
  294. ++sdev->stat.uncorrectable_errors;
  295. spin_unlock(&sdev->stat_lock);
  296. if (printk_ratelimit())
  297. printk(KERN_ERR "btrfs: unable to fixup at %llu\n",
  298. (unsigned long long)logical);
  299. }
  300. static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector,
  301. struct page *page)
  302. {
  303. struct bio *bio = NULL;
  304. int ret;
  305. DECLARE_COMPLETION_ONSTACK(complete);
  306. bio = bio_alloc(GFP_NOFS, 1);
  307. bio->bi_bdev = bdev;
  308. bio->bi_sector = sector;
  309. bio_add_page(bio, page, PAGE_SIZE, 0);
  310. bio->bi_end_io = scrub_fixup_end_io;
  311. bio->bi_private = &complete;
  312. submit_bio(rw, bio);
  313. /* this will also unplug the queue */
  314. wait_for_completion(&complete);
  315. ret = !test_bit(BIO_UPTODATE, &bio->bi_flags);
  316. bio_put(bio);
  317. return ret;
  318. }
  319. static void scrub_bio_end_io(struct bio *bio, int err)
  320. {
  321. struct scrub_bio *sbio = bio->bi_private;
  322. struct scrub_dev *sdev = sbio->sdev;
  323. struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
  324. sbio->err = err;
  325. sbio->bio = bio;
  326. btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work);
  327. }
  328. static void scrub_checksum(struct btrfs_work *work)
  329. {
  330. struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
  331. struct scrub_dev *sdev = sbio->sdev;
  332. struct page *page;
  333. void *buffer;
  334. int i;
  335. u64 flags;
  336. u64 logical;
  337. int ret;
  338. if (sbio->err) {
  339. ret = 0;
  340. for (i = 0; i < sbio->count; ++i)
  341. ret |= scrub_recheck_error(sbio, i);
  342. if (!ret) {
  343. spin_lock(&sdev->stat_lock);
  344. ++sdev->stat.unverified_errors;
  345. spin_unlock(&sdev->stat_lock);
  346. }
  347. sbio->bio->bi_flags &= ~(BIO_POOL_MASK - 1);
  348. sbio->bio->bi_flags |= 1 << BIO_UPTODATE;
  349. sbio->bio->bi_phys_segments = 0;
  350. sbio->bio->bi_idx = 0;
  351. for (i = 0; i < sbio->count; i++) {
  352. struct bio_vec *bi;
  353. bi = &sbio->bio->bi_io_vec[i];
  354. bi->bv_offset = 0;
  355. bi->bv_len = PAGE_SIZE;
  356. }
  357. goto out;
  358. }
  359. for (i = 0; i < sbio->count; ++i) {
  360. page = sbio->bio->bi_io_vec[i].bv_page;
  361. buffer = kmap_atomic(page, KM_USER0);
  362. flags = sbio->spag[i].flags;
  363. logical = sbio->logical + i * PAGE_SIZE;
  364. ret = 0;
  365. if (flags & BTRFS_EXTENT_FLAG_DATA) {
  366. ret = scrub_checksum_data(sdev, sbio->spag + i, buffer);
  367. } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
  368. ret = scrub_checksum_tree_block(sdev, sbio->spag + i,
  369. logical, buffer);
  370. } else if (flags & BTRFS_EXTENT_FLAG_SUPER) {
  371. BUG_ON(i);
  372. (void)scrub_checksum_super(sbio, buffer);
  373. } else {
  374. WARN_ON(1);
  375. }
  376. kunmap_atomic(buffer, KM_USER0);
  377. if (ret) {
  378. ret = scrub_recheck_error(sbio, i);
  379. if (!ret) {
  380. spin_lock(&sdev->stat_lock);
  381. ++sdev->stat.unverified_errors;
  382. spin_unlock(&sdev->stat_lock);
  383. }
  384. }
  385. }
  386. out:
  387. scrub_free_bio(sbio->bio);
  388. sbio->bio = NULL;
  389. spin_lock(&sdev->list_lock);
  390. sbio->next_free = sdev->first_free;
  391. sdev->first_free = sbio->index;
  392. spin_unlock(&sdev->list_lock);
  393. atomic_dec(&sdev->in_flight);
  394. wake_up(&sdev->list_wait);
  395. }
  396. static int scrub_checksum_data(struct scrub_dev *sdev,
  397. struct scrub_page *spag, void *buffer)
  398. {
  399. u8 csum[BTRFS_CSUM_SIZE];
  400. u32 crc = ~(u32)0;
  401. int fail = 0;
  402. struct btrfs_root *root = sdev->dev->dev_root;
  403. if (!spag->have_csum)
  404. return 0;
  405. crc = btrfs_csum_data(root, buffer, crc, PAGE_SIZE);
  406. btrfs_csum_final(crc, csum);
  407. if (memcmp(csum, spag->csum, sdev->csum_size))
  408. fail = 1;
  409. spin_lock(&sdev->stat_lock);
  410. ++sdev->stat.data_extents_scrubbed;
  411. sdev->stat.data_bytes_scrubbed += PAGE_SIZE;
  412. if (fail)
  413. ++sdev->stat.csum_errors;
  414. spin_unlock(&sdev->stat_lock);
  415. return fail;
  416. }
  417. static int scrub_checksum_tree_block(struct scrub_dev *sdev,
  418. struct scrub_page *spag, u64 logical,
  419. void *buffer)
  420. {
  421. struct btrfs_header *h;
  422. struct btrfs_root *root = sdev->dev->dev_root;
  423. struct btrfs_fs_info *fs_info = root->fs_info;
  424. u8 csum[BTRFS_CSUM_SIZE];
  425. u32 crc = ~(u32)0;
  426. int fail = 0;
  427. int crc_fail = 0;
  428. /*
  429. * we don't use the getter functions here, as we
  430. * a) don't have an extent buffer and
  431. * b) the page is already kmapped
  432. */
  433. h = (struct btrfs_header *)buffer;
  434. if (logical != le64_to_cpu(h->bytenr))
  435. ++fail;
  436. if (spag->generation != le64_to_cpu(h->generation))
  437. ++fail;
  438. if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
  439. ++fail;
  440. if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
  441. BTRFS_UUID_SIZE))
  442. ++fail;
  443. crc = btrfs_csum_data(root, buffer + BTRFS_CSUM_SIZE, crc,
  444. PAGE_SIZE - BTRFS_CSUM_SIZE);
  445. btrfs_csum_final(crc, csum);
  446. if (memcmp(csum, h->csum, sdev->csum_size))
  447. ++crc_fail;
  448. spin_lock(&sdev->stat_lock);
  449. ++sdev->stat.tree_extents_scrubbed;
  450. sdev->stat.tree_bytes_scrubbed += PAGE_SIZE;
  451. if (crc_fail)
  452. ++sdev->stat.csum_errors;
  453. if (fail)
  454. ++sdev->stat.verify_errors;
  455. spin_unlock(&sdev->stat_lock);
  456. return fail || crc_fail;
  457. }
  458. static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer)
  459. {
  460. struct btrfs_super_block *s;
  461. u64 logical;
  462. struct scrub_dev *sdev = sbio->sdev;
  463. struct btrfs_root *root = sdev->dev->dev_root;
  464. struct btrfs_fs_info *fs_info = root->fs_info;
  465. u8 csum[BTRFS_CSUM_SIZE];
  466. u32 crc = ~(u32)0;
  467. int fail = 0;
  468. s = (struct btrfs_super_block *)buffer;
  469. logical = sbio->logical;
  470. if (logical != le64_to_cpu(s->bytenr))
  471. ++fail;
  472. if (sbio->spag[0].generation != le64_to_cpu(s->generation))
  473. ++fail;
  474. if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
  475. ++fail;
  476. crc = btrfs_csum_data(root, buffer + BTRFS_CSUM_SIZE, crc,
  477. PAGE_SIZE - BTRFS_CSUM_SIZE);
  478. btrfs_csum_final(crc, csum);
  479. if (memcmp(csum, s->csum, sbio->sdev->csum_size))
  480. ++fail;
  481. if (fail) {
  482. /*
  483. * if we find an error in a super block, we just report it.
  484. * They will get written with the next transaction commit
  485. * anyway
  486. */
  487. spin_lock(&sdev->stat_lock);
  488. ++sdev->stat.super_errors;
  489. spin_unlock(&sdev->stat_lock);
  490. }
  491. return fail;
  492. }
  493. static int scrub_submit(struct scrub_dev *sdev)
  494. {
  495. struct scrub_bio *sbio;
  496. struct bio *bio;
  497. int i;
  498. if (sdev->curr == -1)
  499. return 0;
  500. sbio = sdev->bios[sdev->curr];
  501. bio = bio_alloc(GFP_NOFS, sbio->count);
  502. if (!bio)
  503. goto nomem;
  504. bio->bi_private = sbio;
  505. bio->bi_end_io = scrub_bio_end_io;
  506. bio->bi_bdev = sdev->dev->bdev;
  507. bio->bi_sector = sbio->physical >> 9;
  508. for (i = 0; i < sbio->count; ++i) {
  509. struct page *page;
  510. int ret;
  511. page = alloc_page(GFP_NOFS);
  512. if (!page)
  513. goto nomem;
  514. ret = bio_add_page(bio, page, PAGE_SIZE, 0);
  515. if (!ret) {
  516. __free_page(page);
  517. goto nomem;
  518. }
  519. }
  520. sbio->err = 0;
  521. sdev->curr = -1;
  522. atomic_inc(&sdev->in_flight);
  523. submit_bio(READ, bio);
  524. return 0;
  525. nomem:
  526. scrub_free_bio(bio);
  527. return -ENOMEM;
  528. }
  529. static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len,
  530. u64 physical, u64 flags, u64 gen, u64 mirror_num,
  531. u8 *csum, int force)
  532. {
  533. struct scrub_bio *sbio;
  534. again:
  535. /*
  536. * grab a fresh bio or wait for one to become available
  537. */
  538. while (sdev->curr == -1) {
  539. spin_lock(&sdev->list_lock);
  540. sdev->curr = sdev->first_free;
  541. if (sdev->curr != -1) {
  542. sdev->first_free = sdev->bios[sdev->curr]->next_free;
  543. sdev->bios[sdev->curr]->next_free = -1;
  544. sdev->bios[sdev->curr]->count = 0;
  545. spin_unlock(&sdev->list_lock);
  546. } else {
  547. spin_unlock(&sdev->list_lock);
  548. wait_event(sdev->list_wait, sdev->first_free != -1);
  549. }
  550. }
  551. sbio = sdev->bios[sdev->curr];
  552. if (sbio->count == 0) {
  553. sbio->physical = physical;
  554. sbio->logical = logical;
  555. } else if (sbio->physical + sbio->count * PAGE_SIZE != physical ||
  556. sbio->logical + sbio->count * PAGE_SIZE != logical) {
  557. int ret;
  558. ret = scrub_submit(sdev);
  559. if (ret)
  560. return ret;
  561. goto again;
  562. }
  563. sbio->spag[sbio->count].flags = flags;
  564. sbio->spag[sbio->count].generation = gen;
  565. sbio->spag[sbio->count].have_csum = 0;
  566. sbio->spag[sbio->count].mirror_num = mirror_num;
  567. if (csum) {
  568. sbio->spag[sbio->count].have_csum = 1;
  569. memcpy(sbio->spag[sbio->count].csum, csum, sdev->csum_size);
  570. }
  571. ++sbio->count;
  572. if (sbio->count == SCRUB_PAGES_PER_BIO || force) {
  573. int ret;
  574. ret = scrub_submit(sdev);
  575. if (ret)
  576. return ret;
  577. }
  578. return 0;
  579. }
  580. static int scrub_find_csum(struct scrub_dev *sdev, u64 logical, u64 len,
  581. u8 *csum)
  582. {
  583. struct btrfs_ordered_sum *sum = NULL;
  584. int ret = 0;
  585. unsigned long i;
  586. unsigned long num_sectors;
  587. u32 sectorsize = sdev->dev->dev_root->sectorsize;
  588. while (!list_empty(&sdev->csum_list)) {
  589. sum = list_first_entry(&sdev->csum_list,
  590. struct btrfs_ordered_sum, list);
  591. if (sum->bytenr > logical)
  592. return 0;
  593. if (sum->bytenr + sum->len > logical)
  594. break;
  595. ++sdev->stat.csum_discards;
  596. list_del(&sum->list);
  597. kfree(sum);
  598. sum = NULL;
  599. }
  600. if (!sum)
  601. return 0;
  602. num_sectors = sum->len / sectorsize;
  603. for (i = 0; i < num_sectors; ++i) {
  604. if (sum->sums[i].bytenr == logical) {
  605. memcpy(csum, &sum->sums[i].sum, sdev->csum_size);
  606. ret = 1;
  607. break;
  608. }
  609. }
  610. if (ret && i == num_sectors - 1) {
  611. list_del(&sum->list);
  612. kfree(sum);
  613. }
  614. return ret;
  615. }
  616. /* scrub extent tries to collect up to 64 kB for each bio */
  617. static int scrub_extent(struct scrub_dev *sdev, u64 logical, u64 len,
  618. u64 physical, u64 flags, u64 gen, u64 mirror_num)
  619. {
  620. int ret;
  621. u8 csum[BTRFS_CSUM_SIZE];
  622. while (len) {
  623. u64 l = min_t(u64, len, PAGE_SIZE);
  624. int have_csum = 0;
  625. if (flags & BTRFS_EXTENT_FLAG_DATA) {
  626. /* push csums to sbio */
  627. have_csum = scrub_find_csum(sdev, logical, l, csum);
  628. if (have_csum == 0)
  629. ++sdev->stat.no_csum;
  630. }
  631. ret = scrub_page(sdev, logical, l, physical, flags, gen,
  632. mirror_num, have_csum ? csum : NULL, 0);
  633. if (ret)
  634. return ret;
  635. len -= l;
  636. logical += l;
  637. physical += l;
  638. }
  639. return 0;
  640. }
  641. static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
  642. struct map_lookup *map, int num, u64 base, u64 length)
  643. {
  644. struct btrfs_path *path;
  645. struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
  646. struct btrfs_root *root = fs_info->extent_root;
  647. struct btrfs_root *csum_root = fs_info->csum_root;
  648. struct btrfs_extent_item *extent;
  649. struct blk_plug plug;
  650. u64 flags;
  651. int ret;
  652. int slot;
  653. int i;
  654. u64 nstripes;
  655. int start_stripe;
  656. struct extent_buffer *l;
  657. struct btrfs_key key;
  658. u64 physical;
  659. u64 logical;
  660. u64 generation;
  661. u64 mirror_num;
  662. u64 increment = map->stripe_len;
  663. u64 offset;
  664. nstripes = length;
  665. offset = 0;
  666. do_div(nstripes, map->stripe_len);
  667. if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
  668. offset = map->stripe_len * num;
  669. increment = map->stripe_len * map->num_stripes;
  670. mirror_num = 0;
  671. } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
  672. int factor = map->num_stripes / map->sub_stripes;
  673. offset = map->stripe_len * (num / map->sub_stripes);
  674. increment = map->stripe_len * factor;
  675. mirror_num = num % map->sub_stripes;
  676. } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
  677. increment = map->stripe_len;
  678. mirror_num = num % map->num_stripes;
  679. } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
  680. increment = map->stripe_len;
  681. mirror_num = num % map->num_stripes;
  682. } else {
  683. increment = map->stripe_len;
  684. mirror_num = 0;
  685. }
  686. path = btrfs_alloc_path();
  687. if (!path)
  688. return -ENOMEM;
  689. path->reada = 2;
  690. path->search_commit_root = 1;
  691. path->skip_locking = 1;
  692. /*
  693. * find all extents for each stripe and just read them to get
  694. * them into the page cache
  695. * FIXME: we can do better. build a more intelligent prefetching
  696. */
  697. logical = base + offset;
  698. physical = map->stripes[num].physical;
  699. ret = 0;
  700. for (i = 0; i < nstripes; ++i) {
  701. key.objectid = logical;
  702. key.type = BTRFS_EXTENT_ITEM_KEY;
  703. key.offset = (u64)0;
  704. ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
  705. if (ret < 0)
  706. goto out_noplug;
  707. /*
  708. * we might miss half an extent here, but that doesn't matter,
  709. * as it's only the prefetch
  710. */
  711. while (1) {
  712. l = path->nodes[0];
  713. slot = path->slots[0];
  714. if (slot >= btrfs_header_nritems(l)) {
  715. ret = btrfs_next_leaf(root, path);
  716. if (ret == 0)
  717. continue;
  718. if (ret < 0)
  719. goto out_noplug;
  720. break;
  721. }
  722. btrfs_item_key_to_cpu(l, &key, slot);
  723. if (key.objectid >= logical + map->stripe_len)
  724. break;
  725. path->slots[0]++;
  726. }
  727. btrfs_release_path(path);
  728. logical += increment;
  729. physical += map->stripe_len;
  730. cond_resched();
  731. }
  732. /*
  733. * collect all data csums for the stripe to avoid seeking during
  734. * the scrub. This might currently (crc32) end up to be about 1MB
  735. */
  736. start_stripe = 0;
  737. blk_start_plug(&plug);
  738. again:
  739. logical = base + offset + start_stripe * increment;
  740. for (i = start_stripe; i < nstripes; ++i) {
  741. ret = btrfs_lookup_csums_range(csum_root, logical,
  742. logical + map->stripe_len - 1,
  743. &sdev->csum_list, 1);
  744. if (ret)
  745. goto out;
  746. logical += increment;
  747. cond_resched();
  748. }
  749. /*
  750. * now find all extents for each stripe and scrub them
  751. */
  752. logical = base + offset + start_stripe * increment;
  753. physical = map->stripes[num].physical + start_stripe * map->stripe_len;
  754. ret = 0;
  755. for (i = start_stripe; i < nstripes; ++i) {
  756. /*
  757. * canceled?
  758. */
  759. if (atomic_read(&fs_info->scrub_cancel_req) ||
  760. atomic_read(&sdev->cancel_req)) {
  761. ret = -ECANCELED;
  762. goto out;
  763. }
  764. /*
  765. * check to see if we have to pause
  766. */
  767. if (atomic_read(&fs_info->scrub_pause_req)) {
  768. /* push queued extents */
  769. scrub_submit(sdev);
  770. wait_event(sdev->list_wait,
  771. atomic_read(&sdev->in_flight) == 0);
  772. atomic_inc(&fs_info->scrubs_paused);
  773. wake_up(&fs_info->scrub_pause_wait);
  774. mutex_lock(&fs_info->scrub_lock);
  775. while (atomic_read(&fs_info->scrub_pause_req)) {
  776. mutex_unlock(&fs_info->scrub_lock);
  777. wait_event(fs_info->scrub_pause_wait,
  778. atomic_read(&fs_info->scrub_pause_req) == 0);
  779. mutex_lock(&fs_info->scrub_lock);
  780. }
  781. atomic_dec(&fs_info->scrubs_paused);
  782. mutex_unlock(&fs_info->scrub_lock);
  783. wake_up(&fs_info->scrub_pause_wait);
  784. scrub_free_csums(sdev);
  785. start_stripe = i;
  786. goto again;
  787. }
  788. key.objectid = logical;
  789. key.type = BTRFS_EXTENT_ITEM_KEY;
  790. key.offset = (u64)0;
  791. ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
  792. if (ret < 0)
  793. goto out;
  794. if (ret > 0) {
  795. ret = btrfs_previous_item(root, path, 0,
  796. BTRFS_EXTENT_ITEM_KEY);
  797. if (ret < 0)
  798. goto out;
  799. if (ret > 0) {
  800. /* there's no smaller item, so stick with the
  801. * larger one */
  802. btrfs_release_path(path);
  803. ret = btrfs_search_slot(NULL, root, &key,
  804. path, 0, 0);
  805. if (ret < 0)
  806. goto out;
  807. }
  808. }
  809. while (1) {
  810. l = path->nodes[0];
  811. slot = path->slots[0];
  812. if (slot >= btrfs_header_nritems(l)) {
  813. ret = btrfs_next_leaf(root, path);
  814. if (ret == 0)
  815. continue;
  816. if (ret < 0)
  817. goto out;
  818. break;
  819. }
  820. btrfs_item_key_to_cpu(l, &key, slot);
  821. if (key.objectid + key.offset <= logical)
  822. goto next;
  823. if (key.objectid >= logical + map->stripe_len)
  824. break;
  825. if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY)
  826. goto next;
  827. extent = btrfs_item_ptr(l, slot,
  828. struct btrfs_extent_item);
  829. flags = btrfs_extent_flags(l, extent);
  830. generation = btrfs_extent_generation(l, extent);
  831. if (key.objectid < logical &&
  832. (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
  833. printk(KERN_ERR
  834. "btrfs scrub: tree block %llu spanning "
  835. "stripes, ignored. logical=%llu\n",
  836. (unsigned long long)key.objectid,
  837. (unsigned long long)logical);
  838. goto next;
  839. }
  840. /*
  841. * trim extent to this stripe
  842. */
  843. if (key.objectid < logical) {
  844. key.offset -= logical - key.objectid;
  845. key.objectid = logical;
  846. }
  847. if (key.objectid + key.offset >
  848. logical + map->stripe_len) {
  849. key.offset = logical + map->stripe_len -
  850. key.objectid;
  851. }
  852. ret = scrub_extent(sdev, key.objectid, key.offset,
  853. key.objectid - logical + physical,
  854. flags, generation, mirror_num);
  855. if (ret)
  856. goto out;
  857. next:
  858. path->slots[0]++;
  859. }
  860. btrfs_release_path(path);
  861. logical += increment;
  862. physical += map->stripe_len;
  863. spin_lock(&sdev->stat_lock);
  864. sdev->stat.last_physical = physical;
  865. spin_unlock(&sdev->stat_lock);
  866. }
  867. /* push queued extents */
  868. scrub_submit(sdev);
  869. out:
  870. blk_finish_plug(&plug);
  871. out_noplug:
  872. btrfs_free_path(path);
  873. return ret < 0 ? ret : 0;
  874. }
  875. static noinline_for_stack int scrub_chunk(struct scrub_dev *sdev,
  876. u64 chunk_tree, u64 chunk_objectid, u64 chunk_offset, u64 length)
  877. {
  878. struct btrfs_mapping_tree *map_tree =
  879. &sdev->dev->dev_root->fs_info->mapping_tree;
  880. struct map_lookup *map;
  881. struct extent_map *em;
  882. int i;
  883. int ret = -EINVAL;
  884. read_lock(&map_tree->map_tree.lock);
  885. em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
  886. read_unlock(&map_tree->map_tree.lock);
  887. if (!em)
  888. return -EINVAL;
  889. map = (struct map_lookup *)em->bdev;
  890. if (em->start != chunk_offset)
  891. goto out;
  892. if (em->len < length)
  893. goto out;
  894. for (i = 0; i < map->num_stripes; ++i) {
  895. if (map->stripes[i].dev == sdev->dev) {
  896. ret = scrub_stripe(sdev, map, i, chunk_offset, length);
  897. if (ret)
  898. goto out;
  899. }
  900. }
  901. out:
  902. free_extent_map(em);
  903. return ret;
  904. }
  905. static noinline_for_stack
  906. int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
  907. {
  908. struct btrfs_dev_extent *dev_extent = NULL;
  909. struct btrfs_path *path;
  910. struct btrfs_root *root = sdev->dev->dev_root;
  911. struct btrfs_fs_info *fs_info = root->fs_info;
  912. u64 length;
  913. u64 chunk_tree;
  914. u64 chunk_objectid;
  915. u64 chunk_offset;
  916. int ret;
  917. int slot;
  918. struct extent_buffer *l;
  919. struct btrfs_key key;
  920. struct btrfs_key found_key;
  921. struct btrfs_block_group_cache *cache;
  922. path = btrfs_alloc_path();
  923. if (!path)
  924. return -ENOMEM;
  925. path->reada = 2;
  926. path->search_commit_root = 1;
  927. path->skip_locking = 1;
  928. key.objectid = sdev->dev->devid;
  929. key.offset = 0ull;
  930. key.type = BTRFS_DEV_EXTENT_KEY;
  931. while (1) {
  932. ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
  933. if (ret < 0)
  934. break;
  935. if (ret > 0) {
  936. if (path->slots[0] >=
  937. btrfs_header_nritems(path->nodes[0])) {
  938. ret = btrfs_next_leaf(root, path);
  939. if (ret)
  940. break;
  941. }
  942. }
  943. l = path->nodes[0];
  944. slot = path->slots[0];
  945. btrfs_item_key_to_cpu(l, &found_key, slot);
  946. if (found_key.objectid != sdev->dev->devid)
  947. break;
  948. if (btrfs_key_type(&found_key) != BTRFS_DEV_EXTENT_KEY)
  949. break;
  950. if (found_key.offset >= end)
  951. break;
  952. if (found_key.offset < key.offset)
  953. break;
  954. dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
  955. length = btrfs_dev_extent_length(l, dev_extent);
  956. if (found_key.offset + length <= start) {
  957. key.offset = found_key.offset + length;
  958. btrfs_release_path(path);
  959. continue;
  960. }
  961. chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
  962. chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
  963. chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
  964. /*
  965. * get a reference on the corresponding block group to prevent
  966. * the chunk from going away while we scrub it
  967. */
  968. cache = btrfs_lookup_block_group(fs_info, chunk_offset);
  969. if (!cache) {
  970. ret = -ENOENT;
  971. break;
  972. }
  973. ret = scrub_chunk(sdev, chunk_tree, chunk_objectid,
  974. chunk_offset, length);
  975. btrfs_put_block_group(cache);
  976. if (ret)
  977. break;
  978. key.offset = found_key.offset + length;
  979. btrfs_release_path(path);
  980. }
  981. btrfs_free_path(path);
  982. /*
  983. * ret can still be 1 from search_slot or next_leaf,
  984. * that's not an error
  985. */
  986. return ret < 0 ? ret : 0;
  987. }
  988. static noinline_for_stack int scrub_supers(struct scrub_dev *sdev)
  989. {
  990. int i;
  991. u64 bytenr;
  992. u64 gen;
  993. int ret;
  994. struct btrfs_device *device = sdev->dev;
  995. struct btrfs_root *root = device->dev_root;
  996. gen = root->fs_info->last_trans_committed;
  997. for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
  998. bytenr = btrfs_sb_offset(i);
  999. if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes)
  1000. break;
  1001. ret = scrub_page(sdev, bytenr, PAGE_SIZE, bytenr,
  1002. BTRFS_EXTENT_FLAG_SUPER, gen, i, NULL, 1);
  1003. if (ret)
  1004. return ret;
  1005. }
  1006. wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0);
  1007. return 0;
  1008. }
  1009. /*
  1010. * get a reference count on fs_info->scrub_workers. start worker if necessary
  1011. */
  1012. static noinline_for_stack int scrub_workers_get(struct btrfs_root *root)
  1013. {
  1014. struct btrfs_fs_info *fs_info = root->fs_info;
  1015. mutex_lock(&fs_info->scrub_lock);
  1016. if (fs_info->scrub_workers_refcnt == 0) {
  1017. btrfs_init_workers(&fs_info->scrub_workers, "scrub",
  1018. fs_info->thread_pool_size, &fs_info->generic_worker);
  1019. fs_info->scrub_workers.idle_thresh = 4;
  1020. btrfs_start_workers(&fs_info->scrub_workers, 1);
  1021. }
  1022. ++fs_info->scrub_workers_refcnt;
  1023. mutex_unlock(&fs_info->scrub_lock);
  1024. return 0;
  1025. }
  1026. static noinline_for_stack void scrub_workers_put(struct btrfs_root *root)
  1027. {
  1028. struct btrfs_fs_info *fs_info = root->fs_info;
  1029. mutex_lock(&fs_info->scrub_lock);
  1030. if (--fs_info->scrub_workers_refcnt == 0)
  1031. btrfs_stop_workers(&fs_info->scrub_workers);
  1032. WARN_ON(fs_info->scrub_workers_refcnt < 0);
  1033. mutex_unlock(&fs_info->scrub_lock);
  1034. }
  1035. int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end,
  1036. struct btrfs_scrub_progress *progress, int readonly)
  1037. {
  1038. struct scrub_dev *sdev;
  1039. struct btrfs_fs_info *fs_info = root->fs_info;
  1040. int ret;
  1041. struct btrfs_device *dev;
  1042. if (btrfs_fs_closing(root->fs_info))
  1043. return -EINVAL;
  1044. /*
  1045. * check some assumptions
  1046. */
  1047. if (root->sectorsize != PAGE_SIZE ||
  1048. root->sectorsize != root->leafsize ||
  1049. root->sectorsize != root->nodesize) {
  1050. printk(KERN_ERR "btrfs_scrub: size assumptions fail\n");
  1051. return -EINVAL;
  1052. }
  1053. ret = scrub_workers_get(root);
  1054. if (ret)
  1055. return ret;
  1056. mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
  1057. dev = btrfs_find_device(root, devid, NULL, NULL);
  1058. if (!dev || dev->missing) {
  1059. mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
  1060. scrub_workers_put(root);
  1061. return -ENODEV;
  1062. }
  1063. mutex_lock(&fs_info->scrub_lock);
  1064. if (!dev->in_fs_metadata) {
  1065. mutex_unlock(&fs_info->scrub_lock);
  1066. mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
  1067. scrub_workers_put(root);
  1068. return -ENODEV;
  1069. }
  1070. if (dev->scrub_device) {
  1071. mutex_unlock(&fs_info->scrub_lock);
  1072. mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
  1073. scrub_workers_put(root);
  1074. return -EINPROGRESS;
  1075. }
  1076. sdev = scrub_setup_dev(dev);
  1077. if (IS_ERR(sdev)) {
  1078. mutex_unlock(&fs_info->scrub_lock);
  1079. mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
  1080. scrub_workers_put(root);
  1081. return PTR_ERR(sdev);
  1082. }
  1083. sdev->readonly = readonly;
  1084. dev->scrub_device = sdev;
  1085. atomic_inc(&fs_info->scrubs_running);
  1086. mutex_unlock(&fs_info->scrub_lock);
  1087. mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
  1088. down_read(&fs_info->scrub_super_lock);
  1089. ret = scrub_supers(sdev);
  1090. up_read(&fs_info->scrub_super_lock);
  1091. if (!ret)
  1092. ret = scrub_enumerate_chunks(sdev, start, end);
  1093. wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0);
  1094. atomic_dec(&fs_info->scrubs_running);
  1095. wake_up(&fs_info->scrub_pause_wait);
  1096. if (progress)
  1097. memcpy(progress, &sdev->stat, sizeof(*progress));
  1098. mutex_lock(&fs_info->scrub_lock);
  1099. dev->scrub_device = NULL;
  1100. mutex_unlock(&fs_info->scrub_lock);
  1101. scrub_free_dev(sdev);
  1102. scrub_workers_put(root);
  1103. return ret;
  1104. }
  1105. int btrfs_scrub_pause(struct btrfs_root *root)
  1106. {
  1107. struct btrfs_fs_info *fs_info = root->fs_info;
  1108. mutex_lock(&fs_info->scrub_lock);
  1109. atomic_inc(&fs_info->scrub_pause_req);
  1110. while (atomic_read(&fs_info->scrubs_paused) !=
  1111. atomic_read(&fs_info->scrubs_running)) {
  1112. mutex_unlock(&fs_info->scrub_lock);
  1113. wait_event(fs_info->scrub_pause_wait,
  1114. atomic_read(&fs_info->scrubs_paused) ==
  1115. atomic_read(&fs_info->scrubs_running));
  1116. mutex_lock(&fs_info->scrub_lock);
  1117. }
  1118. mutex_unlock(&fs_info->scrub_lock);
  1119. return 0;
  1120. }
  1121. int btrfs_scrub_continue(struct btrfs_root *root)
  1122. {
  1123. struct btrfs_fs_info *fs_info = root->fs_info;
  1124. atomic_dec(&fs_info->scrub_pause_req);
  1125. wake_up(&fs_info->scrub_pause_wait);
  1126. return 0;
  1127. }
  1128. int btrfs_scrub_pause_super(struct btrfs_root *root)
  1129. {
  1130. down_write(&root->fs_info->scrub_super_lock);
  1131. return 0;
  1132. }
  1133. int btrfs_scrub_continue_super(struct btrfs_root *root)
  1134. {
  1135. up_write(&root->fs_info->scrub_super_lock);
  1136. return 0;
  1137. }
  1138. int btrfs_scrub_cancel(struct btrfs_root *root)
  1139. {
  1140. struct btrfs_fs_info *fs_info = root->fs_info;
  1141. mutex_lock(&fs_info->scrub_lock);
  1142. if (!atomic_read(&fs_info->scrubs_running)) {
  1143. mutex_unlock(&fs_info->scrub_lock);
  1144. return -ENOTCONN;
  1145. }
  1146. atomic_inc(&fs_info->scrub_cancel_req);
  1147. while (atomic_read(&fs_info->scrubs_running)) {
  1148. mutex_unlock(&fs_info->scrub_lock);
  1149. wait_event(fs_info->scrub_pause_wait,
  1150. atomic_read(&fs_info->scrubs_running) == 0);
  1151. mutex_lock(&fs_info->scrub_lock);
  1152. }
  1153. atomic_dec(&fs_info->scrub_cancel_req);
  1154. mutex_unlock(&fs_info->scrub_lock);
  1155. return 0;
  1156. }
  1157. int btrfs_scrub_cancel_dev(struct btrfs_root *root, struct btrfs_device *dev)
  1158. {
  1159. struct btrfs_fs_info *fs_info = root->fs_info;
  1160. struct scrub_dev *sdev;
  1161. mutex_lock(&fs_info->scrub_lock);
  1162. sdev = dev->scrub_device;
  1163. if (!sdev) {
  1164. mutex_unlock(&fs_info->scrub_lock);
  1165. return -ENOTCONN;
  1166. }
  1167. atomic_inc(&sdev->cancel_req);
  1168. while (dev->scrub_device) {
  1169. mutex_unlock(&fs_info->scrub_lock);
  1170. wait_event(fs_info->scrub_pause_wait,
  1171. dev->scrub_device == NULL);
  1172. mutex_lock(&fs_info->scrub_lock);
  1173. }
  1174. mutex_unlock(&fs_info->scrub_lock);
  1175. return 0;
  1176. }
  1177. int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid)
  1178. {
  1179. struct btrfs_fs_info *fs_info = root->fs_info;
  1180. struct btrfs_device *dev;
  1181. int ret;
  1182. /*
  1183. * we have to hold the device_list_mutex here so the device
  1184. * does not go away in cancel_dev. FIXME: find a better solution
  1185. */
  1186. mutex_lock(&fs_info->fs_devices->device_list_mutex);
  1187. dev = btrfs_find_device(root, devid, NULL, NULL);
  1188. if (!dev) {
  1189. mutex_unlock(&fs_info->fs_devices->device_list_mutex);
  1190. return -ENODEV;
  1191. }
  1192. ret = btrfs_scrub_cancel_dev(root, dev);
  1193. mutex_unlock(&fs_info->fs_devices->device_list_mutex);
  1194. return ret;
  1195. }
  1196. int btrfs_scrub_progress(struct btrfs_root *root, u64 devid,
  1197. struct btrfs_scrub_progress *progress)
  1198. {
  1199. struct btrfs_device *dev;
  1200. struct scrub_dev *sdev = NULL;
  1201. mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
  1202. dev = btrfs_find_device(root, devid, NULL, NULL);
  1203. if (dev)
  1204. sdev = dev->scrub_device;
  1205. if (sdev)
  1206. memcpy(progress, &sdev->stat, sizeof(*progress));
  1207. mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
  1208. return dev ? (sdev ? 0 : -ENOTCONN) : -ENODEV;
  1209. }