zfs.c 56 KB


  1. /*
  2. *
  3. * ZFS filesystem ported to u-boot by
  4. * Jorgen Lundman <lundman at lundman.net>
  5. *
  6. * GRUB -- GRand Unified Bootloader
  7. * Copyright (C) 1999,2000,2001,2002,2003,2004
  8. * Free Software Foundation, Inc.
  9. * Copyright 2004 Sun Microsystems, Inc.
  10. *
  11. * GRUB is free software; you can redistribute it and/or modify
  12. * it under the terms of the GNU General Public License as published by
  13. * the Free Software Foundation; either version 2 of the License, or
  14. * (at your option) any later version.
  15. *
  16. * GRUB is distributed in the hope that it will be useful,
  17. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  19. * GNU General Public License for more details.
  20. *
  21. * You should have received a copy of the GNU General Public License
  22. * along with GRUB. If not, see <http://www.gnu.org/licenses/>.
  23. *
  24. */
  25. #include <common.h>
  26. #include <malloc.h>
  27. #include <linux/stat.h>
  28. #include <linux/time.h>
  29. #include <linux/ctype.h>
  30. #include <asm/byteorder.h>
  31. #include "zfs_common.h"
  32. block_dev_desc_t *zfs_dev_desc;
  33. /*
  34. * The zfs plug-in routines for GRUB are:
  35. *
  36. * zfs_mount() - locates a valid uberblock of the root pool and reads
  37. * in its MOS at the memory address MOS.
  38. *
  39. * zfs_open() - locates a plain file object by following the MOS
  40. * and places its dnode at the memory address DNODE.
  41. *
  42. * zfs_read() - read in the data blocks pointed by the DNODE.
  43. *
  44. */
  45. #include <zfs/zfs.h>
  46. #include <zfs/zio.h>
  47. #include <zfs/dnode.h>
  48. #include <zfs/uberblock_impl.h>
  49. #include <zfs/vdev_impl.h>
  50. #include <zfs/zio_checksum.h>
  51. #include <zfs/zap_impl.h>
  52. #include <zfs/zap_leaf.h>
  53. #include <zfs/zfs_znode.h>
  54. #include <zfs/dmu.h>
  55. #include <zfs/dmu_objset.h>
  56. #include <zfs/sa_impl.h>
  57. #include <zfs/dsl_dir.h>
  58. #include <zfs/dsl_dataset.h>
  59. #define ZPOOL_PROP_BOOTFS "bootfs"
  60. /*
  61. * For nvlist manipulation. (from nvpair.h)
  62. */
  63. #define NV_ENCODE_NATIVE 0
  64. #define NV_ENCODE_XDR 1
  65. #define NV_BIG_ENDIAN 0
  66. #define NV_LITTLE_ENDIAN 1
  67. #define DATA_TYPE_UINT64 8
  68. #define DATA_TYPE_STRING 9
  69. #define DATA_TYPE_NVLIST 19
  70. #define DATA_TYPE_NVLIST_ARRAY 20
  71. /*
  72. * Macros to get fields in a bp or DVA.
  73. */
  74. #define P2PHASE(x, align) ((x) & ((align) - 1))
  75. #define DVA_OFFSET_TO_PHYS_SECTOR(offset) \
  76. ((offset + VDEV_LABEL_START_SIZE) >> SPA_MINBLOCKSHIFT)
  77. /*
  78. * return x rounded down to an align boundary
  79. * eg, P2ALIGN(1200, 1024) == 1024 (1*align)
  80. * eg, P2ALIGN(1024, 1024) == 1024 (1*align)
  81. * eg, P2ALIGN(0x1234, 0x100) == 0x1200 (0x12*align)
  82. * eg, P2ALIGN(0x5600, 0x100) == 0x5600 (0x56*align)
  83. */
  84. #define P2ALIGN(x, align) ((x) & -(align))
  85. /*
  86. * FAT ZAP data structures
  87. */
  88. #define ZFS_CRC64_POLY 0xC96C5795D7870F42ULL /* ECMA-182, reflected form */
  89. #define ZAP_HASH_IDX(hash, n) (((n) == 0) ? 0 : ((hash) >> (64 - (n))))
  90. #define CHAIN_END 0xffff /* end of the chunk chain */
  91. /*
  92. * The amount of space within the chunk available for the array is:
  93. * chunk size - space for type (1) - space for next pointer (2)
  94. */
  95. #define ZAP_LEAF_ARRAY_BYTES (ZAP_LEAF_CHUNKSIZE - 3)
  96. #define ZAP_LEAF_HASH_SHIFT(bs) (bs - 5)
  97. #define ZAP_LEAF_HASH_NUMENTRIES(bs) (1 << ZAP_LEAF_HASH_SHIFT(bs))
  98. #define LEAF_HASH(bs, h) \
  99. ((ZAP_LEAF_HASH_NUMENTRIES(bs)-1) & \
  100. ((h) >> (64 - ZAP_LEAF_HASH_SHIFT(bs)-l->l_hdr.lh_prefix_len)))
  101. /*
  102. * The amount of space available for chunks is:
  103. * block size shift - hash entry size (2) * number of hash
  104. * entries - header space (2*chunksize)
  105. */
  106. #define ZAP_LEAF_NUMCHUNKS(bs) \
  107. (((1<<bs) - 2*ZAP_LEAF_HASH_NUMENTRIES(bs)) / \
  108. ZAP_LEAF_CHUNKSIZE - 2)
  109. /*
  110. * The chunks start immediately after the hash table. The end of the
  111. * hash table is at l_hash + HASH_NUMENTRIES, which we simply cast to a
  112. * chunk_t.
  113. */
  114. #define ZAP_LEAF_CHUNK(l, bs, idx) \
  115. ((zap_leaf_chunk_t *)(l->l_hash + ZAP_LEAF_HASH_NUMENTRIES(bs)))[idx]
  116. #define ZAP_LEAF_ENTRY(l, bs, idx) (&ZAP_LEAF_CHUNK(l, bs, idx).l_entry)
  117. /*
  118. * Decompression Entry - lzjb
  119. */
  120. #ifndef NBBY
  121. #define NBBY 8
  122. #endif
  123. typedef int zfs_decomp_func_t(void *s_start, void *d_start,
  124. uint32_t s_len, uint32_t d_len);
  125. typedef struct decomp_entry {
  126. char *name;
  127. zfs_decomp_func_t *decomp_func;
  128. } decomp_entry_t;
  129. typedef struct dnode_end {
  130. dnode_phys_t dn;
  131. zfs_endian_t endian;
  132. } dnode_end_t;
  133. struct zfs_data {
  134. /* cache for a file block of the currently zfs_open()-ed file */
  135. char *file_buf;
  136. uint64_t file_start;
  137. uint64_t file_end;
  138. /* XXX: ashift is per vdev, not per pool. We currently only ever touch
  139. * a single vdev, but when/if raid-z or stripes are supported, this
  140. * may need revision.
  141. */
  142. uint64_t vdev_ashift;
  143. uint64_t label_txg;
  144. uint64_t pool_guid;
  145. /* cache for a dnode block */
  146. dnode_phys_t *dnode_buf;
  147. dnode_phys_t *dnode_mdn;
  148. uint64_t dnode_start;
  149. uint64_t dnode_end;
  150. zfs_endian_t dnode_endian;
  151. uberblock_t current_uberblock;
  152. dnode_end_t mos;
  153. dnode_end_t mdn;
  154. dnode_end_t dnode;
  155. uint64_t vdev_phys_sector;
  156. int (*userhook)(const char *, const struct zfs_dirhook_info *);
  157. struct zfs_dirhook_info *dirinfo;
  158. };
  159. static int
  160. zlib_decompress(void *s, void *d,
  161. uint32_t slen, uint32_t dlen)
  162. {
  163. if (zlib_decompress(s, d, slen, dlen) < 0)
  164. return ZFS_ERR_BAD_FS;
  165. return ZFS_ERR_NONE;
  166. }
  167. static decomp_entry_t decomp_table[ZIO_COMPRESS_FUNCTIONS] = {
  168. {"inherit", NULL}, /* ZIO_COMPRESS_INHERIT */
  169. {"on", lzjb_decompress}, /* ZIO_COMPRESS_ON */
  170. {"off", NULL}, /* ZIO_COMPRESS_OFF */
  171. {"lzjb", lzjb_decompress}, /* ZIO_COMPRESS_LZJB */
  172. {"empty", NULL}, /* ZIO_COMPRESS_EMPTY */
  173. {"gzip-1", zlib_decompress}, /* ZIO_COMPRESS_GZIP1 */
  174. {"gzip-2", zlib_decompress}, /* ZIO_COMPRESS_GZIP2 */
  175. {"gzip-3", zlib_decompress}, /* ZIO_COMPRESS_GZIP3 */
  176. {"gzip-4", zlib_decompress}, /* ZIO_COMPRESS_GZIP4 */
  177. {"gzip-5", zlib_decompress}, /* ZIO_COMPRESS_GZIP5 */
  178. {"gzip-6", zlib_decompress}, /* ZIO_COMPRESS_GZIP6 */
  179. {"gzip-7", zlib_decompress}, /* ZIO_COMPRESS_GZIP7 */
  180. {"gzip-8", zlib_decompress}, /* ZIO_COMPRESS_GZIP8 */
  181. {"gzip-9", zlib_decompress}, /* ZIO_COMPRESS_GZIP9 */
  182. };
  183. static int zio_read_data(blkptr_t *bp, zfs_endian_t endian,
  184. void *buf, struct zfs_data *data);
  185. static int
  186. zio_read(blkptr_t *bp, zfs_endian_t endian, void **buf,
  187. size_t *size, struct zfs_data *data);
  188. /*
  189. * Our own version of log2(). Same thing as highbit()-1.
  190. */
  191. static int
  192. zfs_log2(uint64_t num)
  193. {
  194. int i = 0;
  195. while (num > 1) {
  196. i++;
  197. num = num >> 1;
  198. }
  199. return i;
  200. }
  201. /* Checksum Functions */
  202. static void
  203. zio_checksum_off(const void *buf __attribute__ ((unused)),
  204. uint64_t size __attribute__ ((unused)),
  205. zfs_endian_t endian __attribute__ ((unused)),
  206. zio_cksum_t *zcp)
  207. {
  208. ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0);
  209. }
  210. /* Checksum Table and Values */
  211. static zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = {
  212. {NULL, 0, 0, "inherit"},
  213. {NULL, 0, 0, "on"},
  214. {zio_checksum_off, 0, 0, "off"},
  215. {zio_checksum_SHA256, 1, 1, "label"},
  216. {zio_checksum_SHA256, 1, 1, "gang_header"},
  217. {NULL, 0, 0, "zilog"},
  218. {fletcher_2_endian, 0, 0, "fletcher2"},
  219. {fletcher_4_endian, 1, 0, "fletcher4"},
  220. {zio_checksum_SHA256, 1, 0, "SHA256"},
  221. {NULL, 0, 0, "zilog2"},
  222. };
  223. /*
  224. * zio_checksum_verify: Provides support for checksum verification.
  225. *
  226. * Fletcher2, Fletcher4, and SHA256 are supported.
  227. *
  228. */
  229. static int
  230. zio_checksum_verify(zio_cksum_t zc, uint32_t checksum,
  231. zfs_endian_t endian, char *buf, int size)
  232. {
  233. zio_eck_t *zec = (zio_eck_t *) (buf + size) - 1;
  234. zio_checksum_info_t *ci = &zio_checksum_table[checksum];
  235. zio_cksum_t actual_cksum, expected_cksum;
  236. if (checksum >= ZIO_CHECKSUM_FUNCTIONS || ci->ci_func == NULL) {
  237. printf("zfs unknown checksum function %d\n", checksum);
  238. return ZFS_ERR_NOT_IMPLEMENTED_YET;
  239. }
  240. if (ci->ci_eck) {
  241. expected_cksum = zec->zec_cksum;
  242. zec->zec_cksum = zc;
  243. ci->ci_func(buf, size, endian, &actual_cksum);
  244. zec->zec_cksum = expected_cksum;
  245. zc = expected_cksum;
  246. } else {
  247. ci->ci_func(buf, size, endian, &actual_cksum);
  248. }
  249. if ((actual_cksum.zc_word[0] != zc.zc_word[0])
  250. || (actual_cksum.zc_word[1] != zc.zc_word[1])
  251. || (actual_cksum.zc_word[2] != zc.zc_word[2])
  252. || (actual_cksum.zc_word[3] != zc.zc_word[3])) {
  253. return ZFS_ERR_BAD_FS;
  254. }
  255. return ZFS_ERR_NONE;
  256. }
  257. /*
  258. * vdev_uberblock_compare takes two uberblock structures and returns an integer
  259. * indicating the more recent of the two.
  260. * Return Value = 1 if ub2 is more recent
  261. * Return Value = -1 if ub1 is more recent
  262. * The most recent uberblock is determined using its transaction number and
  263. * timestamp. The uberblock with the highest transaction number is
  264. * considered "newer". If the transaction numbers of the two blocks match, the
  265. * timestamps are compared to determine the "newer" of the two.
  266. */
  267. static int
  268. vdev_uberblock_compare(uberblock_t *ub1, uberblock_t *ub2)
  269. {
  270. zfs_endian_t ub1_endian, ub2_endian;
  271. if (zfs_to_cpu64(ub1->ub_magic, LITTLE_ENDIAN) == UBERBLOCK_MAGIC)
  272. ub1_endian = LITTLE_ENDIAN;
  273. else
  274. ub1_endian = BIG_ENDIAN;
  275. if (zfs_to_cpu64(ub2->ub_magic, LITTLE_ENDIAN) == UBERBLOCK_MAGIC)
  276. ub2_endian = LITTLE_ENDIAN;
  277. else
  278. ub2_endian = BIG_ENDIAN;
  279. if (zfs_to_cpu64(ub1->ub_txg, ub1_endian)
  280. < zfs_to_cpu64(ub2->ub_txg, ub2_endian))
  281. return -1;
  282. if (zfs_to_cpu64(ub1->ub_txg, ub1_endian)
  283. > zfs_to_cpu64(ub2->ub_txg, ub2_endian))
  284. return 1;
  285. if (zfs_to_cpu64(ub1->ub_timestamp, ub1_endian)
  286. < zfs_to_cpu64(ub2->ub_timestamp, ub2_endian))
  287. return -1;
  288. if (zfs_to_cpu64(ub1->ub_timestamp, ub1_endian)
  289. > zfs_to_cpu64(ub2->ub_timestamp, ub2_endian))
  290. return 1;
  291. return 0;
  292. }
  293. /*
  294. * Three pieces of information are needed to verify an uberblock: the magic
  295. * number, the version number, and the checksum.
  296. *
  297. * Currently Implemented: version number, magic number, label txg
  298. * Need to Implement: checksum
  299. *
  300. */
  301. static int
  302. uberblock_verify(uberblock_t *uber, int offset, struct zfs_data *data)
  303. {
  304. int err;
  305. zfs_endian_t endian = UNKNOWN_ENDIAN;
  306. zio_cksum_t zc;
  307. if (uber->ub_txg < data->label_txg) {
  308. debug("ignoring partially written label: uber_txg < label_txg %llu %llu\n",
  309. uber->ub_txg, data->label_txg);
  310. return ZFS_ERR_BAD_FS;
  311. }
  312. if (zfs_to_cpu64(uber->ub_magic, LITTLE_ENDIAN) == UBERBLOCK_MAGIC
  313. && zfs_to_cpu64(uber->ub_version, LITTLE_ENDIAN) > 0
  314. && zfs_to_cpu64(uber->ub_version, LITTLE_ENDIAN) <= SPA_VERSION)
  315. endian = LITTLE_ENDIAN;
  316. if (zfs_to_cpu64(uber->ub_magic, BIG_ENDIAN) == UBERBLOCK_MAGIC
  317. && zfs_to_cpu64(uber->ub_version, BIG_ENDIAN) > 0
  318. && zfs_to_cpu64(uber->ub_version, BIG_ENDIAN) <= SPA_VERSION)
  319. endian = BIG_ENDIAN;
  320. if (endian == UNKNOWN_ENDIAN) {
  321. printf("invalid uberblock magic\n");
  322. return ZFS_ERR_BAD_FS;
  323. }
  324. memset(&zc, 0, sizeof(zc));
  325. zc.zc_word[0] = cpu_to_zfs64(offset, endian);
  326. err = zio_checksum_verify(zc, ZIO_CHECKSUM_LABEL, endian,
  327. (char *) uber, UBERBLOCK_SIZE(data->vdev_ashift));
  328. if (!err) {
  329. /* Check that the data pointed by the rootbp is usable. */
  330. void *osp = NULL;
  331. size_t ospsize;
  332. err = zio_read(&uber->ub_rootbp, endian, &osp, &ospsize, data);
  333. free(osp);
  334. if (!err && ospsize < OBJSET_PHYS_SIZE_V14) {
  335. printf("uberblock rootbp points to invalid data\n");
  336. return ZFS_ERR_BAD_FS;
  337. }
  338. }
  339. return err;
  340. }
  341. /*
  342. * Find the best uberblock.
  343. * Return:
  344. * Success - Pointer to the best uberblock.
  345. * Failure - NULL
  346. */
  347. static uberblock_t *find_bestub(char *ub_array, struct zfs_data *data)
  348. {
  349. const uint64_t sector = data->vdev_phys_sector;
  350. uberblock_t *ubbest = NULL;
  351. uberblock_t *ubnext;
  352. unsigned int i, offset, pickedub = 0;
  353. int err = ZFS_ERR_NONE;
  354. const unsigned int UBCOUNT = UBERBLOCK_COUNT(data->vdev_ashift);
  355. const uint64_t UBBYTES = UBERBLOCK_SIZE(data->vdev_ashift);
  356. for (i = 0; i < UBCOUNT; i++) {
  357. ubnext = (uberblock_t *) (i * UBBYTES + ub_array);
  358. offset = (sector << SPA_MINBLOCKSHIFT) + VDEV_PHYS_SIZE + (i * UBBYTES);
  359. err = uberblock_verify(ubnext, offset, data);
  360. if (err)
  361. continue;
  362. if (ubbest == NULL || vdev_uberblock_compare(ubnext, ubbest) > 0) {
  363. ubbest = ubnext;
  364. pickedub = i;
  365. }
  366. }
  367. if (ubbest)
  368. debug("zfs Found best uberblock at idx %d, txg %llu\n",
  369. pickedub, (unsigned long long) ubbest->ub_txg);
  370. return ubbest;
  371. }
  372. static inline size_t
  373. get_psize(blkptr_t *bp, zfs_endian_t endian)
  374. {
  375. return (((zfs_to_cpu64((bp)->blk_prop, endian) >> 16) & 0xffff) + 1)
  376. << SPA_MINBLOCKSHIFT;
  377. }
  378. static uint64_t
  379. dva_get_offset(dva_t *dva, zfs_endian_t endian)
  380. {
  381. return zfs_to_cpu64((dva)->dva_word[1],
  382. endian) << SPA_MINBLOCKSHIFT;
  383. }
  384. /*
  385. * Read a block of data based on the gang block address dva,
  386. * and put its data in buf.
  387. *
  388. */
  389. static int
  390. zio_read_gang(blkptr_t *bp, zfs_endian_t endian, dva_t *dva, void *buf,
  391. struct zfs_data *data)
  392. {
  393. zio_gbh_phys_t *zio_gb;
  394. uint64_t offset, sector;
  395. unsigned i;
  396. int err;
  397. zio_cksum_t zc;
  398. memset(&zc, 0, sizeof(zc));
  399. zio_gb = malloc(SPA_GANGBLOCKSIZE);
  400. if (!zio_gb)
  401. return ZFS_ERR_OUT_OF_MEMORY;
  402. offset = dva_get_offset(dva, endian);
  403. sector = DVA_OFFSET_TO_PHYS_SECTOR(offset);
  404. /* read in the gang block header */
  405. err = zfs_devread(sector, 0, SPA_GANGBLOCKSIZE, (char *) zio_gb);
  406. if (err) {
  407. free(zio_gb);
  408. return err;
  409. }
  410. /* XXX */
  411. /* self checksuming the gang block header */
  412. ZIO_SET_CHECKSUM(&zc, DVA_GET_VDEV(dva),
  413. dva_get_offset(dva, endian), bp->blk_birth, 0);
  414. err = zio_checksum_verify(zc, ZIO_CHECKSUM_GANG_HEADER, endian,
  415. (char *) zio_gb, SPA_GANGBLOCKSIZE);
  416. if (err) {
  417. free(zio_gb);
  418. return err;
  419. }
  420. endian = (zfs_to_cpu64(bp->blk_prop, endian) >> 63) & 1;
  421. for (i = 0; i < SPA_GBH_NBLKPTRS; i++) {
  422. if (zio_gb->zg_blkptr[i].blk_birth == 0)
  423. continue;
  424. err = zio_read_data(&zio_gb->zg_blkptr[i], endian, buf, data);
  425. if (err) {
  426. free(zio_gb);
  427. return err;
  428. }
  429. buf = (char *) buf + get_psize(&zio_gb->zg_blkptr[i], endian);
  430. }
  431. free(zio_gb);
  432. return ZFS_ERR_NONE;
  433. }
  434. /*
  435. * Read in a block of raw data to buf.
  436. */
  437. static int
  438. zio_read_data(blkptr_t *bp, zfs_endian_t endian, void *buf,
  439. struct zfs_data *data)
  440. {
  441. int i, psize;
  442. int err = ZFS_ERR_NONE;
  443. psize = get_psize(bp, endian);
  444. /* pick a good dva from the block pointer */
  445. for (i = 0; i < SPA_DVAS_PER_BP; i++) {
  446. uint64_t offset, sector;
  447. if (bp->blk_dva[i].dva_word[0] == 0 && bp->blk_dva[i].dva_word[1] == 0)
  448. continue;
  449. if ((zfs_to_cpu64(bp->blk_dva[i].dva_word[1], endian)>>63) & 1) {
  450. err = zio_read_gang(bp, endian, &bp->blk_dva[i], buf, data);
  451. } else {
  452. /* read in a data block */
  453. offset = dva_get_offset(&bp->blk_dva[i], endian);
  454. sector = DVA_OFFSET_TO_PHYS_SECTOR(offset);
  455. err = zfs_devread(sector, 0, psize, buf);
  456. }
  457. if (!err) {
  458. /*Check the underlying checksum before we rule this DVA as "good"*/
  459. uint32_t checkalgo = (zfs_to_cpu64((bp)->blk_prop, endian) >> 40) & 0xff;
  460. err = zio_checksum_verify(bp->blk_cksum, checkalgo, endian, buf, psize);
  461. if (!err)
  462. return ZFS_ERR_NONE;
  463. }
  464. /* If read failed or checksum bad, reset the error. Hopefully we've got some more DVA's to try.*/
  465. }
  466. if (!err) {
  467. printf("couldn't find a valid DVA\n");
  468. err = ZFS_ERR_BAD_FS;
  469. }
  470. return err;
  471. }
  472. /*
  473. * Read in a block of data, verify its checksum, decompress if needed,
  474. * and put the uncompressed data in buf.
  475. */
  476. static int
  477. zio_read(blkptr_t *bp, zfs_endian_t endian, void **buf,
  478. size_t *size, struct zfs_data *data)
  479. {
  480. size_t lsize, psize;
  481. unsigned int comp;
  482. char *compbuf = NULL;
  483. int err;
  484. *buf = NULL;
  485. comp = (zfs_to_cpu64((bp)->blk_prop, endian)>>32) & 0xff;
  486. lsize = (BP_IS_HOLE(bp) ? 0 :
  487. (((zfs_to_cpu64((bp)->blk_prop, endian) & 0xffff) + 1)
  488. << SPA_MINBLOCKSHIFT));
  489. psize = get_psize(bp, endian);
  490. if (size)
  491. *size = lsize;
  492. if (comp >= ZIO_COMPRESS_FUNCTIONS) {
  493. printf("compression algorithm %u not supported\n", (unsigned int) comp);
  494. return ZFS_ERR_NOT_IMPLEMENTED_YET;
  495. }
  496. if (comp != ZIO_COMPRESS_OFF && decomp_table[comp].decomp_func == NULL) {
  497. printf("compression algorithm %s not supported\n", decomp_table[comp].name);
  498. return ZFS_ERR_NOT_IMPLEMENTED_YET;
  499. }
  500. if (comp != ZIO_COMPRESS_OFF) {
  501. compbuf = malloc(psize);
  502. if (!compbuf)
  503. return ZFS_ERR_OUT_OF_MEMORY;
  504. } else {
  505. compbuf = *buf = malloc(lsize);
  506. }
  507. err = zio_read_data(bp, endian, compbuf, data);
  508. if (err) {
  509. free(compbuf);
  510. *buf = NULL;
  511. return err;
  512. }
  513. if (comp != ZIO_COMPRESS_OFF) {
  514. *buf = malloc(lsize);
  515. if (!*buf) {
  516. free(compbuf);
  517. return ZFS_ERR_OUT_OF_MEMORY;
  518. }
  519. err = decomp_table[comp].decomp_func(compbuf, *buf, psize, lsize);
  520. free(compbuf);
  521. if (err) {
  522. free(*buf);
  523. *buf = NULL;
  524. return err;
  525. }
  526. }
  527. return ZFS_ERR_NONE;
  528. }
  529. /*
  530. * Get the block from a block id.
  531. * push the block onto the stack.
  532. *
  533. */
  534. static int
  535. dmu_read(dnode_end_t *dn, uint64_t blkid, void **buf,
  536. zfs_endian_t *endian_out, struct zfs_data *data)
  537. {
  538. int idx, level;
  539. blkptr_t *bp_array = dn->dn.dn_blkptr;
  540. int epbs = dn->dn.dn_indblkshift - SPA_BLKPTRSHIFT;
  541. blkptr_t *bp;
  542. void *tmpbuf = 0;
  543. zfs_endian_t endian;
  544. int err = ZFS_ERR_NONE;
  545. bp = malloc(sizeof(blkptr_t));
  546. if (!bp)
  547. return ZFS_ERR_OUT_OF_MEMORY;
  548. endian = dn->endian;
  549. for (level = dn->dn.dn_nlevels - 1; level >= 0; level--) {
  550. idx = (blkid >> (epbs * level)) & ((1 << epbs) - 1);
  551. *bp = bp_array[idx];
  552. if (bp_array != dn->dn.dn_blkptr) {
  553. free(bp_array);
  554. bp_array = 0;
  555. }
  556. if (BP_IS_HOLE(bp)) {
  557. size_t size = zfs_to_cpu16(dn->dn.dn_datablkszsec,
  558. dn->endian)
  559. << SPA_MINBLOCKSHIFT;
  560. *buf = malloc(size);
  561. if (*buf) {
  562. err = ZFS_ERR_OUT_OF_MEMORY;
  563. break;
  564. }
  565. memset(*buf, 0, size);
  566. endian = (zfs_to_cpu64(bp->blk_prop, endian) >> 63) & 1;
  567. break;
  568. }
  569. if (level == 0) {
  570. err = zio_read(bp, endian, buf, 0, data);
  571. endian = (zfs_to_cpu64(bp->blk_prop, endian) >> 63) & 1;
  572. break;
  573. }
  574. err = zio_read(bp, endian, &tmpbuf, 0, data);
  575. endian = (zfs_to_cpu64(bp->blk_prop, endian) >> 63) & 1;
  576. if (err)
  577. break;
  578. bp_array = tmpbuf;
  579. }
  580. if (bp_array != dn->dn.dn_blkptr)
  581. free(bp_array);
  582. if (endian_out)
  583. *endian_out = endian;
  584. free(bp);
  585. return err;
  586. }
  587. /*
  588. * mzap_lookup: Looks up property described by "name" and returns the value
  589. * in "value".
  590. */
  591. static int
  592. mzap_lookup(mzap_phys_t *zapobj, zfs_endian_t endian,
  593. int objsize, char *name, uint64_t * value)
  594. {
  595. int i, chunks;
  596. mzap_ent_phys_t *mzap_ent = zapobj->mz_chunk;
  597. chunks = objsize / MZAP_ENT_LEN - 1;
  598. for (i = 0; i < chunks; i++) {
  599. if (strcmp(mzap_ent[i].mze_name, name) == 0) {
  600. *value = zfs_to_cpu64(mzap_ent[i].mze_value, endian);
  601. return ZFS_ERR_NONE;
  602. }
  603. }
  604. printf("couldn't find '%s'\n", name);
  605. return ZFS_ERR_FILE_NOT_FOUND;
  606. }
  607. static int
  608. mzap_iterate(mzap_phys_t *zapobj, zfs_endian_t endian, int objsize,
  609. int (*hook)(const char *name,
  610. uint64_t val,
  611. struct zfs_data *data),
  612. struct zfs_data *data)
  613. {
  614. int i, chunks;
  615. mzap_ent_phys_t *mzap_ent = zapobj->mz_chunk;
  616. chunks = objsize / MZAP_ENT_LEN - 1;
  617. for (i = 0; i < chunks; i++) {
  618. if (hook(mzap_ent[i].mze_name,
  619. zfs_to_cpu64(mzap_ent[i].mze_value, endian),
  620. data))
  621. return 1;
  622. }
  623. return 0;
  624. }
  625. static uint64_t
  626. zap_hash(uint64_t salt, const char *name)
  627. {
  628. static uint64_t table[256];
  629. const uint8_t *cp;
  630. uint8_t c;
  631. uint64_t crc = salt;
  632. if (table[128] == 0) {
  633. uint64_t *ct;
  634. int i, j;
  635. for (i = 0; i < 256; i++) {
  636. for (ct = table + i, *ct = i, j = 8; j > 0; j--)
  637. *ct = (*ct >> 1) ^ (-(*ct & 1) & ZFS_CRC64_POLY);
  638. }
  639. }
  640. for (cp = (const uint8_t *) name; (c = *cp) != '\0'; cp++)
  641. crc = (crc >> 8) ^ table[(crc ^ c) & 0xFF];
  642. /*
  643. * Only use 28 bits, since we need 4 bits in the cookie for the
  644. * collision differentiator. We MUST use the high bits, since
  645. * those are the onces that we first pay attention to when
  646. * chosing the bucket.
  647. */
  648. crc &= ~((1ULL << (64 - ZAP_HASHBITS)) - 1);
  649. return crc;
  650. }
  651. /*
  652. * Only to be used on 8-bit arrays.
  653. * array_len is actual len in bytes (not encoded le_value_length).
  654. * buf is null-terminated.
  655. */
  656. /* XXX */
  657. static int
  658. zap_leaf_array_equal(zap_leaf_phys_t *l, zfs_endian_t endian,
  659. int blksft, int chunk, int array_len, const char *buf)
  660. {
  661. int bseen = 0;
  662. while (bseen < array_len) {
  663. struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, blksft, chunk).l_array;
  664. int toread = MIN(array_len - bseen, ZAP_LEAF_ARRAY_BYTES);
  665. if (chunk >= ZAP_LEAF_NUMCHUNKS(blksft))
  666. return 0;
  667. if (memcmp(la->la_array, buf + bseen, toread) != 0)
  668. break;
  669. chunk = zfs_to_cpu16(la->la_next, endian);
  670. bseen += toread;
  671. }
  672. return (bseen == array_len);
  673. }
  674. /* XXX */
  675. static int
  676. zap_leaf_array_get(zap_leaf_phys_t *l, zfs_endian_t endian, int blksft,
  677. int chunk, int array_len, char *buf)
  678. {
  679. int bseen = 0;
  680. while (bseen < array_len) {
  681. struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, blksft, chunk).l_array;
  682. int toread = MIN(array_len - bseen, ZAP_LEAF_ARRAY_BYTES);
  683. if (chunk >= ZAP_LEAF_NUMCHUNKS(blksft))
  684. /* Don't use errno because this error is to be ignored. */
  685. return ZFS_ERR_BAD_FS;
  686. memcpy(buf + bseen, la->la_array, toread);
  687. chunk = zfs_to_cpu16(la->la_next, endian);
  688. bseen += toread;
  689. }
  690. return ZFS_ERR_NONE;
  691. }
  692. /*
  693. * Given a zap_leaf_phys_t, walk thru the zap leaf chunks to get the
  694. * value for the property "name".
  695. *
  696. */
  697. /* XXX */
  698. static int
  699. zap_leaf_lookup(zap_leaf_phys_t *l, zfs_endian_t endian,
  700. int blksft, uint64_t h,
  701. const char *name, uint64_t *value)
  702. {
  703. uint16_t chunk;
  704. struct zap_leaf_entry *le;
  705. /* Verify if this is a valid leaf block */
  706. if (zfs_to_cpu64(l->l_hdr.lh_block_type, endian) != ZBT_LEAF) {
  707. printf("invalid leaf type\n");
  708. return ZFS_ERR_BAD_FS;
  709. }
  710. if (zfs_to_cpu32(l->l_hdr.lh_magic, endian) != ZAP_LEAF_MAGIC) {
  711. printf("invalid leaf magic\n");
  712. return ZFS_ERR_BAD_FS;
  713. }
  714. for (chunk = zfs_to_cpu16(l->l_hash[LEAF_HASH(blksft, h)], endian);
  715. chunk != CHAIN_END; chunk = le->le_next) {
  716. if (chunk >= ZAP_LEAF_NUMCHUNKS(blksft)) {
  717. printf("invalid chunk number\n");
  718. return ZFS_ERR_BAD_FS;
  719. }
  720. le = ZAP_LEAF_ENTRY(l, blksft, chunk);
  721. /* Verify the chunk entry */
  722. if (le->le_type != ZAP_CHUNK_ENTRY) {
  723. printf("invalid chunk entry\n");
  724. return ZFS_ERR_BAD_FS;
  725. }
  726. if (zfs_to_cpu64(le->le_hash, endian) != h)
  727. continue;
  728. if (zap_leaf_array_equal(l, endian, blksft,
  729. zfs_to_cpu16(le->le_name_chunk, endian),
  730. zfs_to_cpu16(le->le_name_length, endian),
  731. name)) {
  732. struct zap_leaf_array *la;
  733. if (le->le_int_size != 8 || le->le_value_length != 1) {
  734. printf("invalid leaf chunk entry\n");
  735. return ZFS_ERR_BAD_FS;
  736. }
  737. /* get the uint64_t property value */
  738. la = &ZAP_LEAF_CHUNK(l, blksft, le->le_value_chunk).l_array;
  739. *value = be64_to_cpu(la->la_array64);
  740. return ZFS_ERR_NONE;
  741. }
  742. }
  743. printf("couldn't find '%s'\n", name);
  744. return ZFS_ERR_FILE_NOT_FOUND;
  745. }
  746. /* Verify if this is a fat zap header block */
  747. static int
  748. zap_verify(zap_phys_t *zap)
  749. {
  750. if (zap->zap_magic != (uint64_t) ZAP_MAGIC) {
  751. printf("bad ZAP magic\n");
  752. return ZFS_ERR_BAD_FS;
  753. }
  754. if (zap->zap_flags != 0) {
  755. printf("bad ZAP flags\n");
  756. return ZFS_ERR_BAD_FS;
  757. }
  758. if (zap->zap_salt == 0) {
  759. printf("bad ZAP salt\n");
  760. return ZFS_ERR_BAD_FS;
  761. }
  762. return ZFS_ERR_NONE;
  763. }
  764. /*
  765. * Fat ZAP lookup
  766. *
  767. */
  768. /* XXX */
  769. static int
  770. fzap_lookup(dnode_end_t *zap_dnode, zap_phys_t *zap,
  771. char *name, uint64_t *value, struct zfs_data *data)
  772. {
  773. void *l;
  774. uint64_t hash, idx, blkid;
  775. int blksft = zfs_log2(zfs_to_cpu16(zap_dnode->dn.dn_datablkszsec,
  776. zap_dnode->endian) << DNODE_SHIFT);
  777. int err;
  778. zfs_endian_t leafendian;
  779. err = zap_verify(zap);
  780. if (err)
  781. return err;
  782. hash = zap_hash(zap->zap_salt, name);
  783. /* get block id from index */
  784. if (zap->zap_ptrtbl.zt_numblks != 0) {
  785. printf("external pointer tables not supported\n");
  786. return ZFS_ERR_NOT_IMPLEMENTED_YET;
  787. }
  788. idx = ZAP_HASH_IDX(hash, zap->zap_ptrtbl.zt_shift);
  789. blkid = ((uint64_t *) zap)[idx + (1 << (blksft - 3 - 1))];
  790. /* Get the leaf block */
  791. if ((1U << blksft) < sizeof(zap_leaf_phys_t)) {
  792. printf("ZAP leaf is too small\n");
  793. return ZFS_ERR_BAD_FS;
  794. }
  795. err = dmu_read(zap_dnode, blkid, &l, &leafendian, data);
  796. if (err)
  797. return err;
  798. err = zap_leaf_lookup(l, leafendian, blksft, hash, name, value);
  799. free(l);
  800. return err;
  801. }
  802. /* XXX */
  803. static int
  804. fzap_iterate(dnode_end_t *zap_dnode, zap_phys_t *zap,
  805. int (*hook)(const char *name,
  806. uint64_t val,
  807. struct zfs_data *data),
  808. struct zfs_data *data)
  809. {
  810. zap_leaf_phys_t *l;
  811. void *l_in;
  812. uint64_t idx, blkid;
  813. uint16_t chunk;
  814. int blksft = zfs_log2(zfs_to_cpu16(zap_dnode->dn.dn_datablkszsec,
  815. zap_dnode->endian) << DNODE_SHIFT);
  816. int err;
  817. zfs_endian_t endian;
  818. if (zap_verify(zap))
  819. return 0;
  820. /* get block id from index */
  821. if (zap->zap_ptrtbl.zt_numblks != 0) {
  822. printf("external pointer tables not supported\n");
  823. return 0;
  824. }
  825. /* Get the leaf block */
  826. if ((1U << blksft) < sizeof(zap_leaf_phys_t)) {
  827. printf("ZAP leaf is too small\n");
  828. return 0;
  829. }
  830. for (idx = 0; idx < zap->zap_ptrtbl.zt_numblks; idx++) {
  831. blkid = ((uint64_t *) zap)[idx + (1 << (blksft - 3 - 1))];
  832. err = dmu_read(zap_dnode, blkid, &l_in, &endian, data);
  833. l = l_in;
  834. if (err)
  835. continue;
  836. /* Verify if this is a valid leaf block */
  837. if (zfs_to_cpu64(l->l_hdr.lh_block_type, endian) != ZBT_LEAF) {
  838. free(l);
  839. continue;
  840. }
  841. if (zfs_to_cpu32(l->l_hdr.lh_magic, endian) != ZAP_LEAF_MAGIC) {
  842. free(l);
  843. continue;
  844. }
  845. for (chunk = 0; chunk < ZAP_LEAF_NUMCHUNKS(blksft); chunk++) {
  846. char *buf;
  847. struct zap_leaf_array *la;
  848. struct zap_leaf_entry *le;
  849. uint64_t val;
  850. le = ZAP_LEAF_ENTRY(l, blksft, chunk);
  851. /* Verify the chunk entry */
  852. if (le->le_type != ZAP_CHUNK_ENTRY)
  853. continue;
  854. buf = malloc(zfs_to_cpu16(le->le_name_length, endian)
  855. + 1);
  856. if (zap_leaf_array_get(l, endian, blksft, le->le_name_chunk,
  857. le->le_name_length, buf)) {
  858. free(buf);
  859. continue;
  860. }
  861. buf[le->le_name_length] = 0;
  862. if (le->le_int_size != 8
  863. || zfs_to_cpu16(le->le_value_length, endian) != 1)
  864. continue;
  865. /* get the uint64_t property value */
  866. la = &ZAP_LEAF_CHUNK(l, blksft, le->le_value_chunk).l_array;
  867. val = be64_to_cpu(la->la_array64);
  868. if (hook(buf, val, data))
  869. return 1;
  870. free(buf);
  871. }
  872. }
  873. return 0;
  874. }
  875. /*
  876. * Read in the data of a zap object and find the value for a matching
  877. * property name.
  878. *
  879. */
  880. static int
  881. zap_lookup(dnode_end_t *zap_dnode, char *name, uint64_t *val,
  882. struct zfs_data *data)
  883. {
  884. uint64_t block_type;
  885. int size;
  886. void *zapbuf;
  887. int err;
  888. zfs_endian_t endian;
  889. /* Read in the first block of the zap object data. */
  890. size = zfs_to_cpu16(zap_dnode->dn.dn_datablkszsec,
  891. zap_dnode->endian) << SPA_MINBLOCKSHIFT;
  892. err = dmu_read(zap_dnode, 0, &zapbuf, &endian, data);
  893. if (err)
  894. return err;
  895. block_type = zfs_to_cpu64(*((uint64_t *) zapbuf), endian);
  896. if (block_type == ZBT_MICRO) {
  897. err = (mzap_lookup(zapbuf, endian, size, name, val));
  898. free(zapbuf);
  899. return err;
  900. } else if (block_type == ZBT_HEADER) {
  901. /* this is a fat zap */
  902. err = (fzap_lookup(zap_dnode, zapbuf, name, val, data));
  903. free(zapbuf);
  904. return err;
  905. }
  906. printf("unknown ZAP type\n");
  907. return ZFS_ERR_BAD_FS;
  908. }
  909. static int
  910. zap_iterate(dnode_end_t *zap_dnode,
  911. int (*hook)(const char *name, uint64_t val,
  912. struct zfs_data *data),
  913. struct zfs_data *data)
  914. {
  915. uint64_t block_type;
  916. int size;
  917. void *zapbuf;
  918. int err;
  919. int ret;
  920. zfs_endian_t endian;
  921. /* Read in the first block of the zap object data. */
  922. size = zfs_to_cpu16(zap_dnode->dn.dn_datablkszsec, zap_dnode->endian) << SPA_MINBLOCKSHIFT;
  923. err = dmu_read(zap_dnode, 0, &zapbuf, &endian, data);
  924. if (err)
  925. return 0;
  926. block_type = zfs_to_cpu64(*((uint64_t *) zapbuf), endian);
  927. if (block_type == ZBT_MICRO) {
  928. ret = mzap_iterate(zapbuf, endian, size, hook, data);
  929. free(zapbuf);
  930. return ret;
  931. } else if (block_type == ZBT_HEADER) {
  932. /* this is a fat zap */
  933. ret = fzap_iterate(zap_dnode, zapbuf, hook, data);
  934. free(zapbuf);
  935. return ret;
  936. }
  937. printf("unknown ZAP type\n");
  938. return 0;
  939. }
  940. /*
  941. * Get the dnode of an object number from the metadnode of an object set.
  942. *
  943. * Input
  944. * mdn - metadnode to get the object dnode
  945. * objnum - object number for the object dnode
  946. * buf - data buffer that holds the returning dnode
  947. */
  948. static int
  949. dnode_get(dnode_end_t *mdn, uint64_t objnum, uint8_t type,
  950. dnode_end_t *buf, struct zfs_data *data)
  951. {
  952. uint64_t blkid, blksz; /* the block id this object dnode is in */
  953. int epbs; /* shift of number of dnodes in a block */
  954. int idx; /* index within a block */
  955. void *dnbuf;
  956. int err;
  957. zfs_endian_t endian;
  958. blksz = zfs_to_cpu16(mdn->dn.dn_datablkszsec,
  959. mdn->endian) << SPA_MINBLOCKSHIFT;
  960. epbs = zfs_log2(blksz) - DNODE_SHIFT;
  961. blkid = objnum >> epbs;
  962. idx = objnum & ((1 << epbs) - 1);
  963. if (data->dnode_buf != NULL && memcmp(data->dnode_mdn, mdn,
  964. sizeof(*mdn)) == 0
  965. && objnum >= data->dnode_start && objnum < data->dnode_end) {
  966. memmove(&(buf->dn), &(data->dnode_buf)[idx], DNODE_SIZE);
  967. buf->endian = data->dnode_endian;
  968. if (type && buf->dn.dn_type != type) {
  969. printf("incorrect dnode type: %02X != %02x\n", buf->dn.dn_type, type);
  970. return ZFS_ERR_BAD_FS;
  971. }
  972. return ZFS_ERR_NONE;
  973. }
  974. err = dmu_read(mdn, blkid, &dnbuf, &endian, data);
  975. if (err)
  976. return err;
  977. free(data->dnode_buf);
  978. free(data->dnode_mdn);
  979. data->dnode_mdn = malloc(sizeof(*mdn));
  980. if (!data->dnode_mdn) {
  981. data->dnode_buf = 0;
  982. } else {
  983. memcpy(data->dnode_mdn, mdn, sizeof(*mdn));
  984. data->dnode_buf = dnbuf;
  985. data->dnode_start = blkid << epbs;
  986. data->dnode_end = (blkid + 1) << epbs;
  987. data->dnode_endian = endian;
  988. }
  989. memmove(&(buf->dn), (dnode_phys_t *) dnbuf + idx, DNODE_SIZE);
  990. buf->endian = endian;
  991. if (type && buf->dn.dn_type != type) {
  992. printf("incorrect dnode type\n");
  993. return ZFS_ERR_BAD_FS;
  994. }
  995. return ZFS_ERR_NONE;
  996. }
  997. /*
  998. * Get the file dnode for a given file name where mdn is the meta dnode
  999. * for this ZFS object set. When found, place the file dnode in dn.
  1000. * The 'path' argument will be mangled.
  1001. *
  1002. */
  1003. static int
  1004. dnode_get_path(dnode_end_t *mdn, const char *path_in, dnode_end_t *dn,
  1005. struct zfs_data *data)
  1006. {
  1007. uint64_t objnum, version;
  1008. char *cname, ch;
  1009. int err = ZFS_ERR_NONE;
  1010. char *path, *path_buf;
  1011. struct dnode_chain {
  1012. struct dnode_chain *next;
  1013. dnode_end_t dn;
  1014. };
  1015. struct dnode_chain *dnode_path = 0, *dn_new, *root;
  1016. dn_new = malloc(sizeof(*dn_new));
  1017. if (!dn_new)
  1018. return ZFS_ERR_OUT_OF_MEMORY;
  1019. dn_new->next = 0;
  1020. dnode_path = root = dn_new;
  1021. err = dnode_get(mdn, MASTER_NODE_OBJ, DMU_OT_MASTER_NODE,
  1022. &(dnode_path->dn), data);
  1023. if (err) {
  1024. free(dn_new);
  1025. return err;
  1026. }
  1027. err = zap_lookup(&(dnode_path->dn), ZPL_VERSION_STR, &version, data);
  1028. if (err) {
  1029. free(dn_new);
  1030. return err;
  1031. }
  1032. if (version > ZPL_VERSION) {
  1033. free(dn_new);
  1034. printf("too new ZPL version\n");
  1035. return ZFS_ERR_NOT_IMPLEMENTED_YET;
  1036. }
  1037. err = zap_lookup(&(dnode_path->dn), ZFS_ROOT_OBJ, &objnum, data);
  1038. if (err) {
  1039. free(dn_new);
  1040. return err;
  1041. }
  1042. err = dnode_get(mdn, objnum, 0, &(dnode_path->dn), data);
  1043. if (err) {
  1044. free(dn_new);
  1045. return err;
  1046. }
  1047. path = path_buf = strdup(path_in);
  1048. if (!path_buf) {
  1049. free(dn_new);
  1050. return ZFS_ERR_OUT_OF_MEMORY;
  1051. }
  1052. while (1) {
  1053. /* skip leading slashes */
  1054. while (*path == '/')
  1055. path++;
  1056. if (!*path)
  1057. break;
  1058. /* get the next component name */
  1059. cname = path;
  1060. while (*path && *path != '/')
  1061. path++;
  1062. /* Skip dot. */
  1063. if (cname + 1 == path && cname[0] == '.')
  1064. continue;
  1065. /* Handle double dot. */
  1066. if (cname + 2 == path && cname[0] == '.' && cname[1] == '.') {
  1067. if (dn_new->next) {
  1068. dn_new = dnode_path;
  1069. dnode_path = dn_new->next;
  1070. free(dn_new);
  1071. } else {
  1072. printf("can't resolve ..\n");
  1073. err = ZFS_ERR_FILE_NOT_FOUND;
  1074. break;
  1075. }
  1076. continue;
  1077. }
  1078. ch = *path;
  1079. *path = 0; /* ensure null termination */
  1080. if (dnode_path->dn.dn.dn_type != DMU_OT_DIRECTORY_CONTENTS) {
  1081. free(path_buf);
  1082. printf("not a directory\n");
  1083. return ZFS_ERR_BAD_FILE_TYPE;
  1084. }
  1085. err = zap_lookup(&(dnode_path->dn), cname, &objnum, data);
  1086. if (err)
  1087. break;
  1088. dn_new = malloc(sizeof(*dn_new));
  1089. if (!dn_new) {
  1090. err = ZFS_ERR_OUT_OF_MEMORY;
  1091. break;
  1092. }
  1093. dn_new->next = dnode_path;
  1094. dnode_path = dn_new;
  1095. objnum = ZFS_DIRENT_OBJ(objnum);
  1096. err = dnode_get(mdn, objnum, 0, &(dnode_path->dn), data);
  1097. if (err)
  1098. break;
  1099. *path = ch;
  1100. }
  1101. if (!err)
  1102. memcpy(dn, &(dnode_path->dn), sizeof(*dn));
  1103. while (dnode_path) {
  1104. dn_new = dnode_path->next;
  1105. free(dnode_path);
  1106. dnode_path = dn_new;
  1107. }
  1108. free(path_buf);
  1109. return err;
  1110. }
  1111. /*
  1112. * Given a MOS metadnode, get the metadnode of a given filesystem name (fsname),
  1113. * e.g. pool/rootfs, or a given object number (obj), e.g. the object number
  1114. * of pool/rootfs.
  1115. *
  1116. * If no fsname and no obj are given, return the DSL_DIR metadnode.
  1117. * If fsname is given, return its metadnode and its matching object number.
  1118. * If only obj is given, return the metadnode for this object number.
  1119. *
  1120. */
  1121. static int
  1122. get_filesystem_dnode(dnode_end_t *mosmdn, char *fsname,
  1123. dnode_end_t *mdn, struct zfs_data *data)
  1124. {
  1125. uint64_t objnum;
  1126. int err;
  1127. err = dnode_get(mosmdn, DMU_POOL_DIRECTORY_OBJECT,
  1128. DMU_OT_OBJECT_DIRECTORY, mdn, data);
  1129. if (err)
  1130. return err;
  1131. err = zap_lookup(mdn, DMU_POOL_ROOT_DATASET, &objnum, data);
  1132. if (err)
  1133. return err;
  1134. err = dnode_get(mosmdn, objnum, DMU_OT_DSL_DIR, mdn, data);
  1135. if (err)
  1136. return err;
  1137. while (*fsname) {
  1138. uint64_t childobj;
  1139. char *cname, ch;
  1140. while (*fsname == '/')
  1141. fsname++;
  1142. if (!*fsname || *fsname == '@')
  1143. break;
  1144. cname = fsname;
  1145. while (*fsname && !isspace(*fsname) && *fsname != '/')
  1146. fsname++;
  1147. ch = *fsname;
  1148. *fsname = 0;
  1149. childobj = zfs_to_cpu64((((dsl_dir_phys_t *) DN_BONUS(&mdn->dn)))->dd_child_dir_zapobj, mdn->endian);
  1150. err = dnode_get(mosmdn, childobj,
  1151. DMU_OT_DSL_DIR_CHILD_MAP, mdn, data);
  1152. if (err)
  1153. return err;
  1154. err = zap_lookup(mdn, cname, &objnum, data);
  1155. if (err)
  1156. return err;
  1157. err = dnode_get(mosmdn, objnum, DMU_OT_DSL_DIR, mdn, data);
  1158. if (err)
  1159. return err;
  1160. *fsname = ch;
  1161. }
  1162. return ZFS_ERR_NONE;
  1163. }
  1164. static int
  1165. make_mdn(dnode_end_t *mdn, struct zfs_data *data)
  1166. {
  1167. void *osp;
  1168. blkptr_t *bp;
  1169. size_t ospsize;
  1170. int err;
  1171. bp = &(((dsl_dataset_phys_t *) DN_BONUS(&mdn->dn))->ds_bp);
  1172. err = zio_read(bp, mdn->endian, &osp, &ospsize, data);
  1173. if (err)
  1174. return err;
  1175. if (ospsize < OBJSET_PHYS_SIZE_V14) {
  1176. free(osp);
  1177. printf("too small osp\n");
  1178. return ZFS_ERR_BAD_FS;
  1179. }
  1180. mdn->endian = (zfs_to_cpu64(bp->blk_prop, mdn->endian)>>63) & 1;
  1181. memmove((char *) &(mdn->dn),
  1182. (char *) &((objset_phys_t *) osp)->os_meta_dnode, DNODE_SIZE);
  1183. free(osp);
  1184. return ZFS_ERR_NONE;
  1185. }
  1186. static int
  1187. dnode_get_fullpath(const char *fullpath, dnode_end_t *mdn,
  1188. uint64_t *mdnobj, dnode_end_t *dn, int *isfs,
  1189. struct zfs_data *data)
  1190. {
  1191. char *fsname, *snapname;
  1192. const char *ptr_at, *filename;
  1193. uint64_t headobj;
  1194. int err;
  1195. ptr_at = strchr(fullpath, '@');
  1196. if (!ptr_at) {
  1197. *isfs = 1;
  1198. filename = 0;
  1199. snapname = 0;
  1200. fsname = strdup(fullpath);
  1201. } else {
  1202. const char *ptr_slash = strchr(ptr_at, '/');
  1203. *isfs = 0;
  1204. fsname = malloc(ptr_at - fullpath + 1);
  1205. if (!fsname)
  1206. return ZFS_ERR_OUT_OF_MEMORY;
  1207. memcpy(fsname, fullpath, ptr_at - fullpath);
  1208. fsname[ptr_at - fullpath] = 0;
  1209. if (ptr_at[1] && ptr_at[1] != '/') {
  1210. snapname = malloc(ptr_slash - ptr_at);
  1211. if (!snapname) {
  1212. free(fsname);
  1213. return ZFS_ERR_OUT_OF_MEMORY;
  1214. }
  1215. memcpy(snapname, ptr_at + 1, ptr_slash - ptr_at - 1);
  1216. snapname[ptr_slash - ptr_at - 1] = 0;
  1217. } else {
  1218. snapname = 0;
  1219. }
  1220. if (ptr_slash)
  1221. filename = ptr_slash;
  1222. else
  1223. filename = "/";
  1224. printf("zfs fsname = '%s' snapname='%s' filename = '%s'\n",
  1225. fsname, snapname, filename);
  1226. }
  1227. err = get_filesystem_dnode(&(data->mos), fsname, dn, data);
  1228. if (err) {
  1229. free(fsname);
  1230. free(snapname);
  1231. return err;
  1232. }
  1233. headobj = zfs_to_cpu64(((dsl_dir_phys_t *) DN_BONUS(&dn->dn))->dd_head_dataset_obj, dn->endian);
  1234. err = dnode_get(&(data->mos), headobj, DMU_OT_DSL_DATASET, mdn, data);
  1235. if (err) {
  1236. free(fsname);
  1237. free(snapname);
  1238. return err;
  1239. }
  1240. if (snapname) {
  1241. uint64_t snapobj;
  1242. snapobj = zfs_to_cpu64(((dsl_dataset_phys_t *) DN_BONUS(&mdn->dn))->ds_snapnames_zapobj, mdn->endian);
  1243. err = dnode_get(&(data->mos), snapobj,
  1244. DMU_OT_DSL_DS_SNAP_MAP, mdn, data);
  1245. if (!err)
  1246. err = zap_lookup(mdn, snapname, &headobj, data);
  1247. if (!err)
  1248. err = dnode_get(&(data->mos), headobj, DMU_OT_DSL_DATASET, mdn, data);
  1249. if (err) {
  1250. free(fsname);
  1251. free(snapname);
  1252. return err;
  1253. }
  1254. }
  1255. if (mdnobj)
  1256. *mdnobj = headobj;
  1257. make_mdn(mdn, data);
  1258. if (*isfs) {
  1259. free(fsname);
  1260. free(snapname);
  1261. return ZFS_ERR_NONE;
  1262. }
  1263. err = dnode_get_path(mdn, filename, dn, data);
  1264. free(fsname);
  1265. free(snapname);
  1266. return err;
  1267. }
  1268. /*
  1269. * For a given XDR packed nvlist, verify the first 4 bytes and move on.
  1270. *
  1271. * An XDR packed nvlist is encoded as (comments from nvs_xdr_create) :
  1272. *
  1273. * encoding method/host endian (4 bytes)
  1274. * nvl_version (4 bytes)
  1275. * nvl_nvflag (4 bytes)
  1276. * encoded nvpairs:
  1277. * encoded size of the nvpair (4 bytes)
  1278. * decoded size of the nvpair (4 bytes)
  1279. * name string size (4 bytes)
  1280. * name string data (sizeof(NV_ALIGN4(string))
  1281. * data type (4 bytes)
  1282. * # of elements in the nvpair (4 bytes)
  1283. * data
  1284. * 2 zero's for the last nvpair
  1285. * (end of the entire list) (8 bytes)
  1286. *
  1287. */
  1288. static int
  1289. nvlist_find_value(char *nvlist, char *name, int valtype, char **val,
  1290. size_t *size_out, size_t *nelm_out)
  1291. {
  1292. int name_len, type, encode_size;
  1293. char *nvpair, *nvp_name;
  1294. /* Verify if the 1st and 2nd byte in the nvlist are valid. */
  1295. /* NOTE: independently of what endianness header announces all
  1296. subsequent values are big-endian. */
  1297. if (nvlist[0] != NV_ENCODE_XDR || (nvlist[1] != NV_LITTLE_ENDIAN
  1298. && nvlist[1] != NV_BIG_ENDIAN)) {
  1299. printf("zfs incorrect nvlist header\n");
  1300. return ZFS_ERR_BAD_FS;
  1301. }
  1302. /* skip the header, nvl_version, and nvl_nvflag */
  1303. nvlist = nvlist + 4 * 3;
  1304. /*
  1305. * Loop thru the nvpair list
  1306. * The XDR representation of an integer is in big-endian byte order.
  1307. */
  1308. while ((encode_size = be32_to_cpu(*(uint32_t *) nvlist))) {
  1309. int nelm;
  1310. nvpair = nvlist + 4 * 2; /* skip the encode/decode size */
  1311. name_len = be32_to_cpu(*(uint32_t *) nvpair);
  1312. nvpair += 4;
  1313. nvp_name = nvpair;
  1314. nvpair = nvpair + ((name_len + 3) & ~3); /* align */
  1315. type = be32_to_cpu(*(uint32_t *) nvpair);
  1316. nvpair += 4;
  1317. nelm = be32_to_cpu(*(uint32_t *) nvpair);
  1318. if (nelm < 1) {
  1319. printf("empty nvpair\n");
  1320. return ZFS_ERR_BAD_FS;
  1321. }
  1322. nvpair += 4;
  1323. if ((strncmp(nvp_name, name, name_len) == 0) && type == valtype) {
  1324. *val = nvpair;
  1325. *size_out = encode_size;
  1326. if (nelm_out)
  1327. *nelm_out = nelm;
  1328. return 1;
  1329. }
  1330. nvlist += encode_size; /* goto the next nvpair */
  1331. }
  1332. return 0;
  1333. }
  1334. int
  1335. zfs_nvlist_lookup_uint64(char *nvlist, char *name, uint64_t *out)
  1336. {
  1337. char *nvpair;
  1338. size_t size;
  1339. int found;
  1340. found = nvlist_find_value(nvlist, name, DATA_TYPE_UINT64, &nvpair, &size, 0);
  1341. if (!found)
  1342. return 0;
  1343. if (size < sizeof(uint64_t)) {
  1344. printf("invalid uint64\n");
  1345. return ZFS_ERR_BAD_FS;
  1346. }
  1347. *out = be64_to_cpu(*(uint64_t *) nvpair);
  1348. return 1;
  1349. }
  1350. char *
  1351. zfs_nvlist_lookup_string(char *nvlist, char *name)
  1352. {
  1353. char *nvpair;
  1354. char *ret;
  1355. size_t slen;
  1356. size_t size;
  1357. int found;
  1358. found = nvlist_find_value(nvlist, name, DATA_TYPE_STRING, &nvpair, &size, 0);
  1359. if (!found)
  1360. return 0;
  1361. if (size < 4) {
  1362. printf("invalid string\n");
  1363. return 0;
  1364. }
  1365. slen = be32_to_cpu(*(uint32_t *) nvpair);
  1366. if (slen > size - 4)
  1367. slen = size - 4;
  1368. ret = malloc(slen + 1);
  1369. if (!ret)
  1370. return 0;
  1371. memcpy(ret, nvpair + 4, slen);
  1372. ret[slen] = 0;
  1373. return ret;
  1374. }
  1375. char *
  1376. zfs_nvlist_lookup_nvlist(char *nvlist, char *name)
  1377. {
  1378. char *nvpair;
  1379. char *ret;
  1380. size_t size;
  1381. int found;
  1382. found = nvlist_find_value(nvlist, name, DATA_TYPE_NVLIST, &nvpair,
  1383. &size, 0);
  1384. if (!found)
  1385. return 0;
  1386. ret = calloc(1, size + 3 * sizeof(uint32_t));
  1387. if (!ret)
  1388. return 0;
  1389. memcpy(ret, nvlist, sizeof(uint32_t));
  1390. memcpy(ret + sizeof(uint32_t), nvpair, size);
  1391. return ret;
  1392. }
  1393. int
  1394. zfs_nvlist_lookup_nvlist_array_get_nelm(char *nvlist, char *name)
  1395. {
  1396. char *nvpair;
  1397. size_t nelm, size;
  1398. int found;
  1399. found = nvlist_find_value(nvlist, name, DATA_TYPE_NVLIST, &nvpair,
  1400. &size, &nelm);
  1401. if (!found)
  1402. return -1;
  1403. return nelm;
  1404. }
  1405. char *
  1406. zfs_nvlist_lookup_nvlist_array(char *nvlist, char *name,
  1407. size_t index)
  1408. {
  1409. char *nvpair, *nvpairptr;
  1410. int found;
  1411. char *ret;
  1412. size_t size;
  1413. unsigned i;
  1414. size_t nelm;
  1415. found = nvlist_find_value(nvlist, name, DATA_TYPE_NVLIST, &nvpair,
  1416. &size, &nelm);
  1417. if (!found)
  1418. return 0;
  1419. if (index >= nelm) {
  1420. printf("trying to lookup past nvlist array\n");
  1421. return 0;
  1422. }
  1423. nvpairptr = nvpair;
  1424. for (i = 0; i < index; i++) {
  1425. uint32_t encode_size;
  1426. /* skip the header, nvl_version, and nvl_nvflag */
  1427. nvpairptr = nvpairptr + 4 * 2;
  1428. while (nvpairptr < nvpair + size
  1429. && (encode_size = be32_to_cpu(*(uint32_t *) nvpairptr)))
  1430. nvlist += encode_size; /* goto the next nvpair */
  1431. nvlist = nvlist + 4 * 2; /* skip the ending 2 zeros - 8 bytes */
  1432. }
  1433. if (nvpairptr >= nvpair + size
  1434. || nvpairptr + be32_to_cpu(*(uint32_t *) (nvpairptr + 4 * 2))
  1435. >= nvpair + size) {
  1436. printf("incorrect nvlist array\n");
  1437. return 0;
  1438. }
  1439. ret = calloc(1, be32_to_cpu(*(uint32_t *) (nvpairptr + 4 * 2))
  1440. + 3 * sizeof(uint32_t));
  1441. if (!ret)
  1442. return 0;
  1443. memcpy(ret, nvlist, sizeof(uint32_t));
  1444. memcpy(ret + sizeof(uint32_t), nvpairptr, size);
  1445. return ret;
  1446. }
  1447. static int
  1448. int_zfs_fetch_nvlist(struct zfs_data *data, char **nvlist)
  1449. {
  1450. int err;
  1451. *nvlist = malloc(VDEV_PHYS_SIZE);
  1452. /* Read in the vdev name-value pair list (112K). */
  1453. err = zfs_devread(data->vdev_phys_sector, 0, VDEV_PHYS_SIZE, *nvlist);
  1454. if (err) {
  1455. free(*nvlist);
  1456. *nvlist = 0;
  1457. return err;
  1458. }
  1459. return ZFS_ERR_NONE;
  1460. }
  1461. /*
  1462. * Check the disk label information and retrieve needed vdev name-value pairs.
  1463. *
  1464. */
  1465. static int
  1466. check_pool_label(struct zfs_data *data)
  1467. {
  1468. uint64_t pool_state;
  1469. char *nvlist; /* for the pool */
  1470. char *vdevnvlist; /* for the vdev */
  1471. uint64_t diskguid;
  1472. uint64_t version;
  1473. int found;
  1474. int err;
  1475. err = int_zfs_fetch_nvlist(data, &nvlist);
  1476. if (err)
  1477. return err;
  1478. found = zfs_nvlist_lookup_uint64(nvlist, ZPOOL_CONFIG_POOL_STATE,
  1479. &pool_state);
  1480. if (!found) {
  1481. free(nvlist);
  1482. printf("zfs pool state not found\n");
  1483. return ZFS_ERR_BAD_FS;
  1484. }
  1485. if (pool_state == POOL_STATE_DESTROYED) {
  1486. free(nvlist);
  1487. printf("zpool is marked as destroyed\n");
  1488. return ZFS_ERR_BAD_FS;
  1489. }
  1490. data->label_txg = 0;
  1491. found = zfs_nvlist_lookup_uint64(nvlist, ZPOOL_CONFIG_POOL_TXG,
  1492. &data->label_txg);
  1493. if (!found) {
  1494. free(nvlist);
  1495. printf("zfs pool txg not found\n");
  1496. return ZFS_ERR_BAD_FS;
  1497. }
  1498. /* not an active device */
  1499. if (data->label_txg == 0) {
  1500. free(nvlist);
  1501. printf("zpool is not active\n");
  1502. return ZFS_ERR_BAD_FS;
  1503. }
  1504. found = zfs_nvlist_lookup_uint64(nvlist, ZPOOL_CONFIG_VERSION,
  1505. &version);
  1506. if (!found) {
  1507. free(nvlist);
  1508. printf("zpool config version not found\n");
  1509. return ZFS_ERR_BAD_FS;
  1510. }
  1511. if (version > SPA_VERSION) {
  1512. free(nvlist);
  1513. printf("SPA version too new %llu > %llu\n",
  1514. (unsigned long long) version,
  1515. (unsigned long long) SPA_VERSION);
  1516. return ZFS_ERR_NOT_IMPLEMENTED_YET;
  1517. }
  1518. vdevnvlist = zfs_nvlist_lookup_nvlist(nvlist, ZPOOL_CONFIG_VDEV_TREE);
  1519. if (!vdevnvlist) {
  1520. free(nvlist);
  1521. printf("ZFS config vdev tree not found\n");
  1522. return ZFS_ERR_BAD_FS;
  1523. }
  1524. found = zfs_nvlist_lookup_uint64(vdevnvlist, ZPOOL_CONFIG_ASHIFT,
  1525. &data->vdev_ashift);
  1526. free(vdevnvlist);
  1527. if (!found) {
  1528. free(nvlist);
  1529. printf("ZPOOL config ashift not found\n");
  1530. return ZFS_ERR_BAD_FS;
  1531. }
  1532. found = zfs_nvlist_lookup_uint64(nvlist, ZPOOL_CONFIG_GUID, &diskguid);
  1533. if (!found) {
  1534. free(nvlist);
  1535. printf("ZPOOL config guid not found\n");
  1536. return ZFS_ERR_BAD_FS;
  1537. }
  1538. found = zfs_nvlist_lookup_uint64(nvlist, ZPOOL_CONFIG_POOL_GUID, &data->pool_guid);
  1539. if (!found) {
  1540. free(nvlist);
  1541. printf("ZPOOL config pool guid not found\n");
  1542. return ZFS_ERR_BAD_FS;
  1543. }
  1544. free(nvlist);
  1545. printf("ZFS Pool GUID: %llu (%016llx) Label: GUID: %llu (%016llx), txg: %llu, SPA v%llu, ashift: %llu\n",
  1546. (unsigned long long) data->pool_guid,
  1547. (unsigned long long) data->pool_guid,
  1548. (unsigned long long) diskguid,
  1549. (unsigned long long) diskguid,
  1550. (unsigned long long) data->label_txg,
  1551. (unsigned long long) version,
  1552. (unsigned long long) data->vdev_ashift);
  1553. return ZFS_ERR_NONE;
  1554. }
  1555. /*
  1556. * vdev_label_start returns the physical disk offset (in bytes) of
  1557. * label "l".
  1558. */
  1559. static uint64_t vdev_label_start(uint64_t psize, int l)
  1560. {
  1561. return (l * sizeof(vdev_label_t) + (l < VDEV_LABELS / 2 ?
  1562. 0 : psize -
  1563. VDEV_LABELS * sizeof(vdev_label_t)));
  1564. }
  1565. void
  1566. zfs_unmount(struct zfs_data *data)
  1567. {
  1568. free(data->dnode_buf);
  1569. free(data->dnode_mdn);
  1570. free(data->file_buf);
  1571. free(data);
  1572. }
  1573. /*
  1574. * zfs_mount() locates a valid uberblock of the root pool and read in its MOS
  1575. * to the memory address MOS.
  1576. *
  1577. */
  1578. struct zfs_data *
  1579. zfs_mount(device_t dev)
  1580. {
  1581. struct zfs_data *data = 0;
  1582. int label = 0, bestlabel = -1;
  1583. char *ub_array;
  1584. uberblock_t *ubbest;
  1585. uberblock_t *ubcur = NULL;
  1586. void *osp = 0;
  1587. size_t ospsize;
  1588. int err;
  1589. data = malloc(sizeof(*data));
  1590. if (!data)
  1591. return 0;
  1592. memset(data, 0, sizeof(*data));
  1593. ub_array = malloc(VDEV_UBERBLOCK_RING);
  1594. if (!ub_array) {
  1595. zfs_unmount(data);
  1596. return 0;
  1597. }
  1598. ubbest = malloc(sizeof(*ubbest));
  1599. if (!ubbest) {
  1600. zfs_unmount(data);
  1601. return 0;
  1602. }
  1603. memset(ubbest, 0, sizeof(*ubbest));
  1604. /*
  1605. * some eltorito stacks don't give us a size and
  1606. * we end up setting the size to MAXUINT, further
  1607. * some of these devices stop working once a single
  1608. * read past the end has been issued. Checking
  1609. * for a maximum part_length and skipping the backup
  1610. * labels at the end of the slice/partition/device
  1611. * avoids breaking down on such devices.
  1612. */
  1613. const int vdevnum =
  1614. dev->part_length == 0 ?
  1615. VDEV_LABELS / 2 : VDEV_LABELS;
  1616. /* Size in bytes of the device (disk or partition) aligned to label size*/
  1617. uint64_t device_size =
  1618. dev->part_length << SECTOR_BITS;
  1619. const uint64_t alignedbytes =
  1620. P2ALIGN(device_size, (uint64_t) sizeof(vdev_label_t));
  1621. for (label = 0; label < vdevnum; label++) {
  1622. uint64_t labelstartbytes = vdev_label_start(alignedbytes, label);
  1623. uint64_t labelstart = labelstartbytes >> SECTOR_BITS;
  1624. debug("zfs reading label %d at sector %llu (byte %llu)\n",
  1625. label, (unsigned long long) labelstart,
  1626. (unsigned long long) labelstartbytes);
  1627. data->vdev_phys_sector = labelstart +
  1628. ((VDEV_SKIP_SIZE + VDEV_BOOT_HEADER_SIZE) >> SECTOR_BITS);
  1629. err = check_pool_label(data);
  1630. if (err) {
  1631. printf("zfs error checking label %d\n", label);
  1632. continue;
  1633. }
  1634. /* Read in the uberblock ring (128K). */
  1635. err = zfs_devread(data->vdev_phys_sector +
  1636. (VDEV_PHYS_SIZE >> SECTOR_BITS),
  1637. 0, VDEV_UBERBLOCK_RING, ub_array);
  1638. if (err) {
  1639. printf("zfs error reading uberblock ring for label %d\n", label);
  1640. continue;
  1641. }
  1642. ubcur = find_bestub(ub_array, data);
  1643. if (!ubcur) {
  1644. printf("zfs No good uberblocks found in label %d\n", label);
  1645. continue;
  1646. }
  1647. if (vdev_uberblock_compare(ubcur, ubbest) > 0) {
  1648. /* Looks like the block is good, so use it.*/
  1649. memcpy(ubbest, ubcur, sizeof(*ubbest));
  1650. bestlabel = label;
  1651. debug("zfs Current best uberblock found in label %d\n", label);
  1652. }
  1653. }
  1654. free(ub_array);
  1655. /* We zero'd the structure to begin with. If we never assigned to it,
  1656. magic will still be zero. */
  1657. if (!ubbest->ub_magic) {
  1658. printf("couldn't find a valid ZFS label\n");
  1659. zfs_unmount(data);
  1660. free(ubbest);
  1661. return 0;
  1662. }
  1663. debug("zfs ubbest %p in label %d\n", ubbest, bestlabel);
  1664. zfs_endian_t ub_endian =
  1665. zfs_to_cpu64(ubbest->ub_magic, LITTLE_ENDIAN) == UBERBLOCK_MAGIC
  1666. ? LITTLE_ENDIAN : BIG_ENDIAN;
  1667. debug("zfs endian set to %s\n", !ub_endian ? "big" : "little");
  1668. err = zio_read(&ubbest->ub_rootbp, ub_endian, &osp, &ospsize, data);
  1669. if (err) {
  1670. printf("couldn't zio_read object directory\n");
  1671. zfs_unmount(data);
  1672. free(ubbest);
  1673. return 0;
  1674. }
  1675. if (ospsize < OBJSET_PHYS_SIZE_V14) {
  1676. printf("osp too small\n");
  1677. zfs_unmount(data);
  1678. free(osp);
  1679. free(ubbest);
  1680. return 0;
  1681. }
  1682. /* Got the MOS. Save it at the memory addr MOS. */
  1683. memmove(&(data->mos.dn), &((objset_phys_t *) osp)->os_meta_dnode, DNODE_SIZE);
  1684. data->mos.endian =
  1685. (zfs_to_cpu64(ubbest->ub_rootbp.blk_prop, ub_endian) >> 63) & 1;
  1686. memmove(&(data->current_uberblock), ubbest, sizeof(uberblock_t));
  1687. free(osp);
  1688. free(ubbest);
  1689. return data;
  1690. }
  1691. int
  1692. zfs_fetch_nvlist(device_t dev, char **nvlist)
  1693. {
  1694. struct zfs_data *zfs;
  1695. int err;
  1696. zfs = zfs_mount(dev);
  1697. if (!zfs)
  1698. return ZFS_ERR_BAD_FS;
  1699. err = int_zfs_fetch_nvlist(zfs, nvlist);
  1700. zfs_unmount(zfs);
  1701. return err;
  1702. }
  1703. static int
  1704. zfs_label(device_t device, char **label)
  1705. {
  1706. char *nvlist;
  1707. int err;
  1708. struct zfs_data *data;
  1709. data = zfs_mount(device);
  1710. if (!data)
  1711. return ZFS_ERR_BAD_FS;
  1712. err = int_zfs_fetch_nvlist(data, &nvlist);
  1713. if (err) {
  1714. zfs_unmount(data);
  1715. return err;
  1716. }
  1717. *label = zfs_nvlist_lookup_string(nvlist, ZPOOL_CONFIG_POOL_NAME);
  1718. free(nvlist);
  1719. zfs_unmount(data);
  1720. return ZFS_ERR_NONE;
  1721. }
  1722. static int
  1723. zfs_uuid(device_t device, char **uuid)
  1724. {
  1725. struct zfs_data *data;
  1726. data = zfs_mount(device);
  1727. if (!data)
  1728. return ZFS_ERR_BAD_FS;
  1729. *uuid = malloc(17); /* %016llx + nil */
  1730. if (!*uuid)
  1731. return ZFS_ERR_OUT_OF_MEMORY;
  1732. /* *uuid = xasprintf ("%016llx", (long long unsigned) data->pool_guid);*/
  1733. snprintf(*uuid, 17, "%016llx", (long long unsigned) data->pool_guid);
  1734. zfs_unmount(data);
  1735. return ZFS_ERR_NONE;
  1736. }
  1737. /*
  1738. * zfs_open() locates a file in the rootpool by following the
  1739. * MOS and places the dnode of the file in the memory address DNODE.
  1740. */
  1741. int
  1742. zfs_open(struct zfs_file *file, const char *fsfilename)
  1743. {
  1744. struct zfs_data *data;
  1745. int err;
  1746. int isfs;
  1747. data = zfs_mount(file->device);
  1748. if (!data)
  1749. return ZFS_ERR_BAD_FS;
  1750. err = dnode_get_fullpath(fsfilename, &(data->mdn), 0,
  1751. &(data->dnode), &isfs, data);
  1752. if (err) {
  1753. zfs_unmount(data);
  1754. return err;
  1755. }
  1756. if (isfs) {
  1757. zfs_unmount(data);
  1758. printf("Missing @ or / separator\n");
  1759. return ZFS_ERR_FILE_NOT_FOUND;
  1760. }
  1761. /* We found the dnode for this file. Verify if it is a plain file. */
  1762. if (data->dnode.dn.dn_type != DMU_OT_PLAIN_FILE_CONTENTS) {
  1763. zfs_unmount(data);
  1764. printf("not a file\n");
  1765. return ZFS_ERR_BAD_FILE_TYPE;
  1766. }
  1767. /* get the file size and set the file position to 0 */
  1768. /*
  1769. * For DMU_OT_SA we will need to locate the SIZE attribute
  1770. * attribute, which could be either in the bonus buffer
  1771. * or the "spill" block.
  1772. */
  1773. if (data->dnode.dn.dn_bonustype == DMU_OT_SA) {
  1774. void *sahdrp;
  1775. int hdrsize;
  1776. if (data->dnode.dn.dn_bonuslen != 0) {
  1777. sahdrp = (sa_hdr_phys_t *) DN_BONUS(&data->dnode.dn);
  1778. } else if (data->dnode.dn.dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
  1779. blkptr_t *bp = &data->dnode.dn.dn_spill;
  1780. err = zio_read(bp, data->dnode.endian, &sahdrp, NULL, data);
  1781. if (err)
  1782. return err;
  1783. } else {
  1784. printf("filesystem is corrupt :(\n");
  1785. return ZFS_ERR_BAD_FS;
  1786. }
  1787. hdrsize = SA_HDR_SIZE(((sa_hdr_phys_t *) sahdrp));
  1788. file->size = *(uint64_t *) ((char *) sahdrp + hdrsize + SA_SIZE_OFFSET);
  1789. } else {
  1790. file->size = zfs_to_cpu64(((znode_phys_t *) DN_BONUS(&data->dnode.dn))->zp_size, data->dnode.endian);
  1791. }
  1792. file->data = data;
  1793. file->offset = 0;
  1794. return ZFS_ERR_NONE;
  1795. }
  1796. uint64_t
  1797. zfs_read(zfs_file_t file, char *buf, uint64_t len)
  1798. {
  1799. struct zfs_data *data = (struct zfs_data *) file->data;
  1800. int blksz, movesize;
  1801. uint64_t length;
  1802. int64_t red;
  1803. int err;
  1804. if (data->file_buf == NULL) {
  1805. data->file_buf = malloc(SPA_MAXBLOCKSIZE);
  1806. if (!data->file_buf)
  1807. return -1;
  1808. data->file_start = data->file_end = 0;
  1809. }
  1810. /*
  1811. * If offset is in memory, move it into the buffer provided and return.
  1812. */
  1813. if (file->offset >= data->file_start
  1814. && file->offset + len <= data->file_end) {
  1815. memmove(buf, data->file_buf + file->offset - data->file_start,
  1816. len);
  1817. return len;
  1818. }
  1819. blksz = zfs_to_cpu16(data->dnode.dn.dn_datablkszsec,
  1820. data->dnode.endian) << SPA_MINBLOCKSHIFT;
  1821. /*
  1822. * Entire Dnode is too big to fit into the space available. We
  1823. * will need to read it in chunks. This could be optimized to
  1824. * read in as large a chunk as there is space available, but for
  1825. * now, this only reads in one data block at a time.
  1826. */
  1827. length = len;
  1828. red = 0;
  1829. while (length) {
  1830. void *t;
  1831. /*
  1832. * Find requested blkid and the offset within that block.
  1833. */
  1834. uint64_t blkid = (file->offset + red) / blksz;
  1835. free(data->file_buf);
  1836. data->file_buf = 0;
  1837. err = dmu_read(&(data->dnode), blkid, &t,
  1838. 0, data);
  1839. data->file_buf = t;
  1840. if (err)
  1841. return -1;
  1842. data->file_start = blkid * blksz;
  1843. data->file_end = data->file_start + blksz;
  1844. movesize = MIN(length, data->file_end - (int) file->offset - red);
  1845. memmove(buf, data->file_buf + file->offset + red
  1846. - data->file_start, movesize);
  1847. buf += movesize;
  1848. length -= movesize;
  1849. red += movesize;
  1850. }
  1851. return len;
  1852. }
  1853. int
  1854. zfs_close(zfs_file_t file)
  1855. {
  1856. zfs_unmount((struct zfs_data *) file->data);
  1857. return ZFS_ERR_NONE;
  1858. }
  1859. int
  1860. zfs_getmdnobj(device_t dev, const char *fsfilename,
  1861. uint64_t *mdnobj)
  1862. {
  1863. struct zfs_data *data;
  1864. int err;
  1865. int isfs;
  1866. data = zfs_mount(dev);
  1867. if (!data)
  1868. return ZFS_ERR_BAD_FS;
  1869. err = dnode_get_fullpath(fsfilename, &(data->mdn), mdnobj,
  1870. &(data->dnode), &isfs, data);
  1871. zfs_unmount(data);
  1872. return err;
  1873. }
  1874. static void
  1875. fill_fs_info(struct zfs_dirhook_info *info,
  1876. dnode_end_t mdn, struct zfs_data *data)
  1877. {
  1878. int err;
  1879. dnode_end_t dn;
  1880. uint64_t objnum;
  1881. uint64_t headobj;
  1882. memset(info, 0, sizeof(*info));
  1883. info->dir = 1;
  1884. if (mdn.dn.dn_type == DMU_OT_DSL_DIR) {
  1885. headobj = zfs_to_cpu64(((dsl_dir_phys_t *) DN_BONUS(&mdn.dn))->dd_head_dataset_obj, mdn.endian);
  1886. err = dnode_get(&(data->mos), headobj, DMU_OT_DSL_DATASET, &mdn, data);
  1887. if (err) {
  1888. printf("zfs failed here 1\n");
  1889. return;
  1890. }
  1891. }
  1892. make_mdn(&mdn, data);
  1893. err = dnode_get(&mdn, MASTER_NODE_OBJ, DMU_OT_MASTER_NODE,
  1894. &dn, data);
  1895. if (err) {
  1896. printf("zfs failed here 2\n");
  1897. return;
  1898. }
  1899. err = zap_lookup(&dn, ZFS_ROOT_OBJ, &objnum, data);
  1900. if (err) {
  1901. printf("zfs failed here 3\n");
  1902. return;
  1903. }
  1904. err = dnode_get(&mdn, objnum, 0, &dn, data);
  1905. if (err) {
  1906. printf("zfs failed here 4\n");
  1907. return;
  1908. }
  1909. info->mtimeset = 1;
  1910. info->mtime = zfs_to_cpu64(((znode_phys_t *) DN_BONUS(&dn.dn))->zp_mtime[0], dn.endian);
  1911. return;
  1912. }
  1913. static int iterate_zap(const char *name, uint64_t val, struct zfs_data *data)
  1914. {
  1915. struct zfs_dirhook_info info;
  1916. dnode_end_t dn;
  1917. memset(&info, 0, sizeof(info));
  1918. dnode_get(&(data->mdn), val, 0, &dn, data);
  1919. info.mtimeset = 1;
  1920. info.mtime = zfs_to_cpu64(((znode_phys_t *) DN_BONUS(&dn.dn))->zp_mtime[0], dn.endian);
  1921. info.dir = (dn.dn.dn_type == DMU_OT_DIRECTORY_CONTENTS);
  1922. debug("zfs type=%d, name=%s\n",
  1923. (int)dn.dn.dn_type, (char *)name);
  1924. if (!data->userhook)
  1925. return 0;
  1926. return data->userhook(name, &info);
  1927. }
  1928. static int iterate_zap_fs(const char *name, uint64_t val, struct zfs_data *data)
  1929. {
  1930. struct zfs_dirhook_info info;
  1931. dnode_end_t mdn;
  1932. int err;
  1933. err = dnode_get(&(data->mos), val, 0, &mdn, data);
  1934. if (err)
  1935. return 0;
  1936. if (mdn.dn.dn_type != DMU_OT_DSL_DIR)
  1937. return 0;
  1938. fill_fs_info(&info, mdn, data);
  1939. if (!data->userhook)
  1940. return 0;
  1941. return data->userhook(name, &info);
  1942. }
  1943. static int iterate_zap_snap(const char *name, uint64_t val, struct zfs_data *data)
  1944. {
  1945. struct zfs_dirhook_info info;
  1946. char *name2;
  1947. int ret = 0;
  1948. dnode_end_t mdn;
  1949. int err;
  1950. err = dnode_get(&(data->mos), val, 0, &mdn, data);
  1951. if (err)
  1952. return 0;
  1953. if (mdn.dn.dn_type != DMU_OT_DSL_DATASET)
  1954. return 0;
  1955. fill_fs_info(&info, mdn, data);
  1956. name2 = malloc(strlen(name) + 2);
  1957. name2[0] = '@';
  1958. memcpy(name2 + 1, name, strlen(name) + 1);
  1959. if (data->userhook)
  1960. ret = data->userhook(name2, &info);
  1961. free(name2);
  1962. return ret;
  1963. }
  1964. int
  1965. zfs_ls(device_t device, const char *path,
  1966. int (*hook)(const char *, const struct zfs_dirhook_info *))
  1967. {
  1968. struct zfs_data *data;
  1969. int err;
  1970. int isfs;
  1971. #if 0
  1972. char *label = NULL;
  1973. zfs_label(device, &label);
  1974. if (label)
  1975. printf("ZPOOL label '%s'\n",
  1976. label);
  1977. #endif
  1978. data = zfs_mount(device);
  1979. if (!data)
  1980. return ZFS_ERR_BAD_FS;
  1981. data->userhook = hook;
  1982. err = dnode_get_fullpath(path, &(data->mdn), 0, &(data->dnode), &isfs, data);
  1983. if (err) {
  1984. zfs_unmount(data);
  1985. return err;
  1986. }
  1987. if (isfs) {
  1988. uint64_t childobj, headobj;
  1989. uint64_t snapobj;
  1990. dnode_end_t dn;
  1991. struct zfs_dirhook_info info;
  1992. fill_fs_info(&info, data->dnode, data);
  1993. hook("@", &info);
  1994. childobj = zfs_to_cpu64(((dsl_dir_phys_t *) DN_BONUS(&data->dnode.dn))->dd_child_dir_zapobj, data->dnode.endian);
  1995. headobj = zfs_to_cpu64(((dsl_dir_phys_t *) DN_BONUS(&data->dnode.dn))->dd_head_dataset_obj, data->dnode.endian);
  1996. err = dnode_get(&(data->mos), childobj,
  1997. DMU_OT_DSL_DIR_CHILD_MAP, &dn, data);
  1998. if (err) {
  1999. zfs_unmount(data);
  2000. return err;
  2001. }
  2002. zap_iterate(&dn, iterate_zap_fs, data);
  2003. err = dnode_get(&(data->mos), headobj, DMU_OT_DSL_DATASET, &dn, data);
  2004. if (err) {
  2005. zfs_unmount(data);
  2006. return err;
  2007. }
  2008. snapobj = zfs_to_cpu64(((dsl_dataset_phys_t *) DN_BONUS(&dn.dn))->ds_snapnames_zapobj, dn.endian);
  2009. err = dnode_get(&(data->mos), snapobj,
  2010. DMU_OT_DSL_DS_SNAP_MAP, &dn, data);
  2011. if (err) {
  2012. zfs_unmount(data);
  2013. return err;
  2014. }
  2015. zap_iterate(&dn, iterate_zap_snap, data);
  2016. } else {
  2017. if (data->dnode.dn.dn_type != DMU_OT_DIRECTORY_CONTENTS) {
  2018. zfs_unmount(data);
  2019. printf("not a directory\n");
  2020. return ZFS_ERR_BAD_FILE_TYPE;
  2021. }
  2022. zap_iterate(&(data->dnode), iterate_zap, data);
  2023. }
  2024. zfs_unmount(data);
  2025. return ZFS_ERR_NONE;
  2026. }