super.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634
  1. /*
  2. * fs/logfs/super.c
  3. *
  4. * As should be obvious for Linux kernel code, license is GPLv2
  5. *
  6. * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
  7. *
  8. * Generally contains mount/umount code and also serves as a dump area for
  9. * any functions that don't fit elsewhere and neither justify a file of their
  10. * own.
  11. */
  12. #include "logfs.h"
  13. #include <linux/bio.h>
  14. #include <linux/mtd/mtd.h>
  15. #include <linux/statfs.h>
  16. #include <linux/buffer_head.h>
  17. static DEFINE_MUTEX(emergency_mutex);
  18. static struct page *emergency_page;
  19. struct page *emergency_read_begin(struct address_space *mapping, pgoff_t index)
  20. {
  21. filler_t *filler = (filler_t *)mapping->a_ops->readpage;
  22. struct page *page;
  23. int err;
  24. page = read_cache_page(mapping, index, filler, NULL);
  25. if (page)
  26. return page;
  27. /* No more pages available, switch to emergency page */
  28. printk(KERN_INFO"Logfs: Using emergency page\n");
  29. mutex_lock(&emergency_mutex);
  30. err = filler(NULL, emergency_page);
  31. if (err) {
  32. mutex_unlock(&emergency_mutex);
  33. printk(KERN_EMERG"Logfs: Error reading emergency page\n");
  34. return ERR_PTR(err);
  35. }
  36. return emergency_page;
  37. }
  38. void emergency_read_end(struct page *page)
  39. {
  40. if (page == emergency_page)
  41. mutex_unlock(&emergency_mutex);
  42. else
  43. page_cache_release(page);
  44. }
  45. static void dump_segfile(struct super_block *sb)
  46. {
  47. struct logfs_super *super = logfs_super(sb);
  48. struct logfs_segment_entry se;
  49. u32 segno;
  50. for (segno = 0; segno < super->s_no_segs; segno++) {
  51. logfs_get_segment_entry(sb, segno, &se);
  52. printk("%3x: %6x %8x", segno, be32_to_cpu(se.ec_level),
  53. be32_to_cpu(se.valid));
  54. if (++segno < super->s_no_segs) {
  55. logfs_get_segment_entry(sb, segno, &se);
  56. printk(" %6x %8x", be32_to_cpu(se.ec_level),
  57. be32_to_cpu(se.valid));
  58. }
  59. if (++segno < super->s_no_segs) {
  60. logfs_get_segment_entry(sb, segno, &se);
  61. printk(" %6x %8x", be32_to_cpu(se.ec_level),
  62. be32_to_cpu(se.valid));
  63. }
  64. if (++segno < super->s_no_segs) {
  65. logfs_get_segment_entry(sb, segno, &se);
  66. printk(" %6x %8x", be32_to_cpu(se.ec_level),
  67. be32_to_cpu(se.valid));
  68. }
  69. printk("\n");
  70. }
  71. }
  72. /*
  73. * logfs_crash_dump - dump debug information to device
  74. *
  75. * The LogFS superblock only occupies part of a segment. This function will
  76. * write as much debug information as it can gather into the spare space.
  77. */
  78. void logfs_crash_dump(struct super_block *sb)
  79. {
  80. dump_segfile(sb);
  81. }
  82. /*
  83. * TODO: move to lib/string.c
  84. */
  85. /**
  86. * memchr_inv - Find a character in an area of memory.
  87. * @s: The memory area
  88. * @c: The byte to search for
  89. * @n: The size of the area.
  90. *
  91. * returns the address of the first character other than @c, or %NULL
  92. * if the whole buffer contains just @c.
  93. */
  94. void *memchr_inv(const void *s, int c, size_t n)
  95. {
  96. const unsigned char *p = s;
  97. while (n-- != 0)
  98. if ((unsigned char)c != *p++)
  99. return (void *)(p - 1);
  100. return NULL;
  101. }
  102. /*
  103. * FIXME: There should be a reserve for root, similar to ext2.
  104. */
  105. int logfs_statfs(struct dentry *dentry, struct kstatfs *stats)
  106. {
  107. struct super_block *sb = dentry->d_sb;
  108. struct logfs_super *super = logfs_super(sb);
  109. stats->f_type = LOGFS_MAGIC_U32;
  110. stats->f_bsize = sb->s_blocksize;
  111. stats->f_blocks = super->s_size >> LOGFS_BLOCK_BITS >> 3;
  112. stats->f_bfree = super->s_free_bytes >> sb->s_blocksize_bits;
  113. stats->f_bavail = super->s_free_bytes >> sb->s_blocksize_bits;
  114. stats->f_files = 0;
  115. stats->f_ffree = 0;
  116. stats->f_namelen = LOGFS_MAX_NAMELEN;
  117. return 0;
  118. }
  119. static int logfs_sb_set(struct super_block *sb, void *_super)
  120. {
  121. struct logfs_super *super = _super;
  122. sb->s_fs_info = super;
  123. sb->s_mtd = super->s_mtd;
  124. sb->s_bdev = super->s_bdev;
  125. return 0;
  126. }
  127. static int logfs_sb_test(struct super_block *sb, void *_super)
  128. {
  129. struct logfs_super *super = _super;
  130. struct mtd_info *mtd = super->s_mtd;
  131. if (mtd && sb->s_mtd == mtd)
  132. return 1;
  133. if (super->s_bdev && sb->s_bdev == super->s_bdev)
  134. return 1;
  135. return 0;
  136. }
  137. static void set_segment_header(struct logfs_segment_header *sh, u8 type,
  138. u8 level, u32 segno, u32 ec)
  139. {
  140. sh->pad = 0;
  141. sh->type = type;
  142. sh->level = level;
  143. sh->segno = cpu_to_be32(segno);
  144. sh->ec = cpu_to_be32(ec);
  145. sh->gec = cpu_to_be64(segno);
  146. sh->crc = logfs_crc32(sh, LOGFS_SEGMENT_HEADERSIZE, 4);
  147. }
  148. static void logfs_write_ds(struct super_block *sb, struct logfs_disk_super *ds,
  149. u32 segno, u32 ec)
  150. {
  151. struct logfs_super *super = logfs_super(sb);
  152. struct logfs_segment_header *sh = &ds->ds_sh;
  153. int i;
  154. memset(ds, 0, sizeof(*ds));
  155. set_segment_header(sh, SEG_SUPER, 0, segno, ec);
  156. ds->ds_ifile_levels = super->s_ifile_levels;
  157. ds->ds_iblock_levels = super->s_iblock_levels;
  158. ds->ds_data_levels = super->s_data_levels; /* XXX: Remove */
  159. ds->ds_segment_shift = super->s_segshift;
  160. ds->ds_block_shift = sb->s_blocksize_bits;
  161. ds->ds_write_shift = super->s_writeshift;
  162. ds->ds_filesystem_size = cpu_to_be64(super->s_size);
  163. ds->ds_segment_size = cpu_to_be32(super->s_segsize);
  164. ds->ds_bad_seg_reserve = cpu_to_be32(super->s_bad_seg_reserve);
  165. ds->ds_feature_incompat = cpu_to_be64(super->s_feature_incompat);
  166. ds->ds_feature_ro_compat= cpu_to_be64(super->s_feature_ro_compat);
  167. ds->ds_feature_compat = cpu_to_be64(super->s_feature_compat);
  168. ds->ds_feature_flags = cpu_to_be64(super->s_feature_flags);
  169. ds->ds_root_reserve = cpu_to_be64(super->s_root_reserve);
  170. ds->ds_speed_reserve = cpu_to_be64(super->s_speed_reserve);
  171. journal_for_each(i)
  172. ds->ds_journal_seg[i] = cpu_to_be32(super->s_journal_seg[i]);
  173. ds->ds_magic = cpu_to_be64(LOGFS_MAGIC);
  174. ds->ds_crc = logfs_crc32(ds, sizeof(*ds),
  175. LOGFS_SEGMENT_HEADERSIZE + 12);
  176. }
  177. static int write_one_sb(struct super_block *sb,
  178. struct page *(*find_sb)(struct super_block *sb, u64 *ofs))
  179. {
  180. struct logfs_super *super = logfs_super(sb);
  181. struct logfs_disk_super *ds;
  182. struct logfs_segment_entry se;
  183. struct page *page;
  184. u64 ofs;
  185. u32 ec, segno;
  186. int err;
  187. page = find_sb(sb, &ofs);
  188. if (!page)
  189. return -EIO;
  190. ds = page_address(page);
  191. segno = seg_no(sb, ofs);
  192. logfs_get_segment_entry(sb, segno, &se);
  193. ec = be32_to_cpu(se.ec_level) >> 4;
  194. ec++;
  195. logfs_set_segment_erased(sb, segno, ec, 0);
  196. logfs_write_ds(sb, ds, segno, ec);
  197. err = super->s_devops->write_sb(sb, page);
  198. page_cache_release(page);
  199. return err;
  200. }
  201. int logfs_write_sb(struct super_block *sb)
  202. {
  203. struct logfs_super *super = logfs_super(sb);
  204. int err;
  205. /* First superblock */
  206. err = write_one_sb(sb, super->s_devops->find_first_sb);
  207. if (err)
  208. return err;
  209. /* Last superblock */
  210. err = write_one_sb(sb, super->s_devops->find_last_sb);
  211. if (err)
  212. return err;
  213. return 0;
  214. }
  215. static int ds_cmp(const void *ds0, const void *ds1)
  216. {
  217. size_t len = sizeof(struct logfs_disk_super);
  218. /* We know the segment headers differ, so ignore them */
  219. len -= LOGFS_SEGMENT_HEADERSIZE;
  220. ds0 += LOGFS_SEGMENT_HEADERSIZE;
  221. ds1 += LOGFS_SEGMENT_HEADERSIZE;
  222. return memcmp(ds0, ds1, len);
  223. }
  224. static int logfs_recover_sb(struct super_block *sb)
  225. {
  226. struct logfs_super *super = logfs_super(sb);
  227. struct logfs_disk_super _ds0, *ds0 = &_ds0;
  228. struct logfs_disk_super _ds1, *ds1 = &_ds1;
  229. int err, valid0, valid1;
  230. /* read first superblock */
  231. err = wbuf_read(sb, super->s_sb_ofs[0], sizeof(*ds0), ds0);
  232. if (err)
  233. return err;
  234. /* read last superblock */
  235. err = wbuf_read(sb, super->s_sb_ofs[1], sizeof(*ds1), ds1);
  236. if (err)
  237. return err;
  238. valid0 = logfs_check_ds(ds0) == 0;
  239. valid1 = logfs_check_ds(ds1) == 0;
  240. if (!valid0 && valid1) {
  241. printk(KERN_INFO"First superblock is invalid - fixing.\n");
  242. return write_one_sb(sb, super->s_devops->find_first_sb);
  243. }
  244. if (valid0 && !valid1) {
  245. printk(KERN_INFO"Last superblock is invalid - fixing.\n");
  246. return write_one_sb(sb, super->s_devops->find_last_sb);
  247. }
  248. if (valid0 && valid1 && ds_cmp(ds0, ds1)) {
  249. printk(KERN_INFO"Superblocks don't match - fixing.\n");
  250. return write_one_sb(sb, super->s_devops->find_last_sb);
  251. }
  252. /* If neither is valid now, something's wrong. Didn't we properly
  253. * check them before?!? */
  254. BUG_ON(!valid0 && !valid1);
  255. return 0;
  256. }
  257. static int logfs_make_writeable(struct super_block *sb)
  258. {
  259. int err;
  260. /* Repair any broken superblock copies */
  261. err = logfs_recover_sb(sb);
  262. if (err)
  263. return err;
  264. /* Check areas for trailing unaccounted data */
  265. err = logfs_check_areas(sb);
  266. if (err)
  267. return err;
  268. err = logfs_open_segfile(sb);
  269. if (err)
  270. return err;
  271. /* Do one GC pass before any data gets dirtied */
  272. logfs_gc_pass(sb);
  273. /* after all initializations are done, replay the journal
  274. * for rw-mounts, if necessary */
  275. err = logfs_replay_journal(sb);
  276. if (err)
  277. return err;
  278. return 0;
  279. }
  280. static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt)
  281. {
  282. struct inode *rootdir;
  283. int err;
  284. /* root dir */
  285. rootdir = logfs_iget(sb, LOGFS_INO_ROOT);
  286. if (IS_ERR(rootdir))
  287. goto fail;
  288. sb->s_root = d_alloc_root(rootdir);
  289. if (!sb->s_root)
  290. goto fail;
  291. /* FIXME: check for read-only mounts */
  292. err = logfs_make_writeable(sb);
  293. if (err)
  294. goto fail2;
  295. log_super("LogFS: Finished mounting\n");
  296. simple_set_mnt(mnt, sb);
  297. return 0;
  298. fail2:
  299. iput(rootdir);
  300. fail:
  301. iput(logfs_super(sb)->s_master_inode);
  302. return -EIO;
  303. }
  304. int logfs_check_ds(struct logfs_disk_super *ds)
  305. {
  306. struct logfs_segment_header *sh = &ds->ds_sh;
  307. if (ds->ds_magic != cpu_to_be64(LOGFS_MAGIC))
  308. return -EINVAL;
  309. if (sh->crc != logfs_crc32(sh, LOGFS_SEGMENT_HEADERSIZE, 4))
  310. return -EINVAL;
  311. if (ds->ds_crc != logfs_crc32(ds, sizeof(*ds),
  312. LOGFS_SEGMENT_HEADERSIZE + 12))
  313. return -EINVAL;
  314. return 0;
  315. }
  316. static struct page *find_super_block(struct super_block *sb)
  317. {
  318. struct logfs_super *super = logfs_super(sb);
  319. struct page *first, *last;
  320. first = super->s_devops->find_first_sb(sb, &super->s_sb_ofs[0]);
  321. if (!first || IS_ERR(first))
  322. return NULL;
  323. last = super->s_devops->find_last_sb(sb, &super->s_sb_ofs[1]);
  324. if (!last || IS_ERR(first)) {
  325. page_cache_release(first);
  326. return NULL;
  327. }
  328. if (!logfs_check_ds(page_address(first))) {
  329. page_cache_release(last);
  330. return first;
  331. }
  332. /* First one didn't work, try the second superblock */
  333. if (!logfs_check_ds(page_address(last))) {
  334. page_cache_release(first);
  335. return last;
  336. }
  337. /* Neither worked, sorry folks */
  338. page_cache_release(first);
  339. page_cache_release(last);
  340. return NULL;
  341. }
  342. static int __logfs_read_sb(struct super_block *sb)
  343. {
  344. struct logfs_super *super = logfs_super(sb);
  345. struct page *page;
  346. struct logfs_disk_super *ds;
  347. int i;
  348. page = find_super_block(sb);
  349. if (!page)
  350. return -EIO;
  351. ds = page_address(page);
  352. super->s_size = be64_to_cpu(ds->ds_filesystem_size);
  353. super->s_root_reserve = be64_to_cpu(ds->ds_root_reserve);
  354. super->s_speed_reserve = be64_to_cpu(ds->ds_speed_reserve);
  355. super->s_bad_seg_reserve = be32_to_cpu(ds->ds_bad_seg_reserve);
  356. super->s_segsize = 1 << ds->ds_segment_shift;
  357. super->s_segmask = (1 << ds->ds_segment_shift) - 1;
  358. super->s_segshift = ds->ds_segment_shift;
  359. sb->s_blocksize = 1 << ds->ds_block_shift;
  360. sb->s_blocksize_bits = ds->ds_block_shift;
  361. super->s_writesize = 1 << ds->ds_write_shift;
  362. super->s_writeshift = ds->ds_write_shift;
  363. super->s_no_segs = super->s_size >> super->s_segshift;
  364. super->s_no_blocks = super->s_segsize >> sb->s_blocksize_bits;
  365. super->s_feature_incompat = be64_to_cpu(ds->ds_feature_incompat);
  366. super->s_feature_ro_compat = be64_to_cpu(ds->ds_feature_ro_compat);
  367. super->s_feature_compat = be64_to_cpu(ds->ds_feature_compat);
  368. super->s_feature_flags = be64_to_cpu(ds->ds_feature_flags);
  369. journal_for_each(i)
  370. super->s_journal_seg[i] = be32_to_cpu(ds->ds_journal_seg[i]);
  371. super->s_ifile_levels = ds->ds_ifile_levels;
  372. super->s_iblock_levels = ds->ds_iblock_levels;
  373. super->s_data_levels = ds->ds_data_levels;
  374. super->s_total_levels = super->s_ifile_levels + super->s_iblock_levels
  375. + super->s_data_levels;
  376. page_cache_release(page);
  377. return 0;
  378. }
  379. static int logfs_read_sb(struct super_block *sb)
  380. {
  381. struct logfs_super *super = logfs_super(sb);
  382. int ret;
  383. super->s_btree_pool = mempool_create(32, btree_alloc, btree_free, NULL);
  384. if (!super->s_btree_pool)
  385. return -ENOMEM;
  386. btree_init_mempool64(&super->s_shadow_tree.new, super->s_btree_pool);
  387. btree_init_mempool64(&super->s_shadow_tree.old, super->s_btree_pool);
  388. ret = logfs_init_mapping(sb);
  389. if (ret)
  390. return ret;
  391. ret = __logfs_read_sb(sb);
  392. if (ret)
  393. return ret;
  394. mutex_init(&super->s_dirop_mutex);
  395. mutex_init(&super->s_object_alias_mutex);
  396. INIT_LIST_HEAD(&super->s_freeing_list);
  397. ret = logfs_init_rw(sb);
  398. if (ret)
  399. return ret;
  400. ret = logfs_init_areas(sb);
  401. if (ret)
  402. return ret;
  403. ret = logfs_init_gc(sb);
  404. if (ret)
  405. return ret;
  406. ret = logfs_init_journal(sb);
  407. if (ret)
  408. return ret;
  409. return 0;
  410. }
  411. static void logfs_kill_sb(struct super_block *sb)
  412. {
  413. struct logfs_super *super = logfs_super(sb);
  414. log_super("LogFS: Start unmounting\n");
  415. /* Alias entries slow down mount, so evict as many as possible */
  416. sync_filesystem(sb);
  417. logfs_write_anchor(super->s_master_inode);
  418. /*
  419. * From this point on alias entries are simply dropped - and any
  420. * writes to the object store are considered bugs.
  421. */
  422. super->s_flags |= LOGFS_SB_FLAG_SHUTDOWN;
  423. log_super("LogFS: Now in shutdown\n");
  424. generic_shutdown_super(sb);
  425. BUG_ON(super->s_dirty_used_bytes || super->s_dirty_free_bytes);
  426. logfs_cleanup_gc(sb);
  427. logfs_cleanup_journal(sb);
  428. logfs_cleanup_areas(sb);
  429. logfs_cleanup_rw(sb);
  430. super->s_devops->put_device(sb);
  431. mempool_destroy(super->s_btree_pool);
  432. mempool_destroy(super->s_alias_pool);
  433. kfree(super);
  434. log_super("LogFS: Finished unmounting\n");
  435. }
  436. int logfs_get_sb_device(struct file_system_type *type, int flags,
  437. struct mtd_info *mtd, struct block_device *bdev,
  438. const struct logfs_device_ops *devops, struct vfsmount *mnt)
  439. {
  440. struct logfs_super *super;
  441. struct super_block *sb;
  442. int err = -ENOMEM;
  443. static int mount_count;
  444. log_super("LogFS: Start mount %x\n", mount_count++);
  445. super = kzalloc(sizeof(*super), GFP_KERNEL);
  446. if (!super)
  447. goto err0;
  448. super->s_mtd = mtd;
  449. super->s_bdev = bdev;
  450. err = -EINVAL;
  451. sb = sget(type, logfs_sb_test, logfs_sb_set, super);
  452. if (IS_ERR(sb))
  453. goto err0;
  454. if (sb->s_root) {
  455. /* Device is already in use */
  456. err = 0;
  457. simple_set_mnt(mnt, sb);
  458. goto err0;
  459. }
  460. super->s_devops = devops;
  461. /*
  462. * sb->s_maxbytes is limited to 8TB. On 32bit systems, the page cache
  463. * only covers 16TB and the upper 8TB are used for indirect blocks.
  464. * On 64bit system we could bump up the limit, but that would make
  465. * the filesystem incompatible with 32bit systems.
  466. */
  467. sb->s_maxbytes = (1ull << 43) - 1;
  468. sb->s_op = &logfs_super_operations;
  469. sb->s_flags = flags | MS_NOATIME;
  470. err = logfs_read_sb(sb);
  471. if (err)
  472. goto err1;
  473. sb->s_flags |= MS_ACTIVE;
  474. err = logfs_get_sb_final(sb, mnt);
  475. if (err)
  476. goto err1;
  477. return 0;
  478. err1:
  479. up_write(&sb->s_umount);
  480. deactivate_super(sb);
  481. return err;
  482. err0:
  483. kfree(super);
  484. //devops->put_device(sb);
  485. return err;
  486. }
  487. static int logfs_get_sb(struct file_system_type *type, int flags,
  488. const char *devname, void *data, struct vfsmount *mnt)
  489. {
  490. ulong mtdnr;
  491. if (!devname)
  492. return logfs_get_sb_bdev(type, flags, devname, mnt);
  493. if (strncmp(devname, "mtd", 3))
  494. return logfs_get_sb_bdev(type, flags, devname, mnt);
  495. {
  496. char *garbage;
  497. mtdnr = simple_strtoul(devname+3, &garbage, 0);
  498. if (*garbage)
  499. return -EINVAL;
  500. }
  501. return logfs_get_sb_mtd(type, flags, mtdnr, mnt);
  502. }
  503. static struct file_system_type logfs_fs_type = {
  504. .owner = THIS_MODULE,
  505. .name = "logfs",
  506. .get_sb = logfs_get_sb,
  507. .kill_sb = logfs_kill_sb,
  508. .fs_flags = FS_REQUIRES_DEV,
  509. };
  510. static int __init logfs_init(void)
  511. {
  512. int ret;
  513. emergency_page = alloc_pages(GFP_KERNEL, 0);
  514. if (!emergency_page)
  515. return -ENOMEM;
  516. ret = logfs_compr_init();
  517. if (ret)
  518. goto out1;
  519. ret = logfs_init_inode_cache();
  520. if (ret)
  521. goto out2;
  522. return register_filesystem(&logfs_fs_type);
  523. out2:
  524. logfs_compr_exit();
  525. out1:
  526. __free_pages(emergency_page, 0);
  527. return ret;
  528. }
  529. static void __exit logfs_exit(void)
  530. {
  531. unregister_filesystem(&logfs_fs_type);
  532. logfs_destroy_inode_cache();
  533. logfs_compr_exit();
  534. __free_pages(emergency_page, 0);
  535. }
  536. module_init(logfs_init);
  537. module_exit(logfs_exit);
  538. MODULE_LICENSE("GPL v2");
  539. MODULE_AUTHOR("Joern Engel <joern@logfs.org>");
  540. MODULE_DESCRIPTION("scalable flash filesystem");