volumes.c 28 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123
  1. /*
  2. * Copyright (C) 2007 Oracle. All rights reserved.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public
  6. * License v2 as published by the Free Software Foundation.
  7. *
  8. * This program is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. * General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU General Public
  14. * License along with this program; if not, write to the
  15. * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16. * Boston, MA 021110-1307, USA.
  17. */
  18. #include <linux/sched.h>
  19. #include <linux/bio.h>
  20. #include <linux/buffer_head.h>
  21. #include <asm/div64.h>
  22. #include "ctree.h"
  23. #include "extent_map.h"
  24. #include "disk-io.h"
  25. #include "transaction.h"
  26. #include "print-tree.h"
  27. #include "volumes.h"
  28. struct stripe {
  29. struct btrfs_device *dev;
  30. u64 physical;
  31. };
  32. struct map_lookup {
  33. u64 type;
  34. int io_align;
  35. int io_width;
  36. int stripe_len;
  37. int sector_size;
  38. int num_stripes;
  39. struct stripe stripes[];
  40. };
  41. #define map_lookup_size(n) (sizeof(struct map_lookup) + \
  42. (sizeof(struct stripe) * (n)))
  43. static DEFINE_MUTEX(uuid_mutex);
  44. static LIST_HEAD(fs_uuids);
  45. int btrfs_cleanup_fs_uuids(void)
  46. {
  47. struct btrfs_fs_devices *fs_devices;
  48. struct list_head *uuid_cur;
  49. struct list_head *devices_cur;
  50. struct btrfs_device *dev;
  51. list_for_each(uuid_cur, &fs_uuids) {
  52. fs_devices = list_entry(uuid_cur, struct btrfs_fs_devices,
  53. list);
  54. while(!list_empty(&fs_devices->devices)) {
  55. devices_cur = fs_devices->devices.next;
  56. dev = list_entry(devices_cur, struct btrfs_device,
  57. dev_list);
  58. printk("uuid cleanup finds %s\n", dev->name);
  59. if (dev->bdev) {
  60. printk("closing\n");
  61. close_bdev_excl(dev->bdev);
  62. }
  63. list_del(&dev->dev_list);
  64. kfree(dev);
  65. }
  66. }
  67. return 0;
  68. }
  69. static struct btrfs_device *__find_device(struct list_head *head, u64 devid)
  70. {
  71. struct btrfs_device *dev;
  72. struct list_head *cur;
  73. list_for_each(cur, head) {
  74. dev = list_entry(cur, struct btrfs_device, dev_list);
  75. if (dev->devid == devid)
  76. return dev;
  77. }
  78. return NULL;
  79. }
  80. static struct btrfs_fs_devices *find_fsid(u8 *fsid)
  81. {
  82. struct list_head *cur;
  83. struct btrfs_fs_devices *fs_devices;
  84. list_for_each(cur, &fs_uuids) {
  85. fs_devices = list_entry(cur, struct btrfs_fs_devices, list);
  86. if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0)
  87. return fs_devices;
  88. }
  89. return NULL;
  90. }
  91. static int device_list_add(const char *path,
  92. struct btrfs_super_block *disk_super,
  93. u64 devid, struct btrfs_fs_devices **fs_devices_ret)
  94. {
  95. struct btrfs_device *device;
  96. struct btrfs_fs_devices *fs_devices;
  97. u64 found_transid = btrfs_super_generation(disk_super);
  98. fs_devices = find_fsid(disk_super->fsid);
  99. if (!fs_devices) {
  100. fs_devices = kmalloc(sizeof(*fs_devices), GFP_NOFS);
  101. if (!fs_devices)
  102. return -ENOMEM;
  103. INIT_LIST_HEAD(&fs_devices->devices);
  104. list_add(&fs_devices->list, &fs_uuids);
  105. memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE);
  106. fs_devices->latest_devid = devid;
  107. fs_devices->latest_trans = found_transid;
  108. fs_devices->lowest_devid = (u64)-1;
  109. fs_devices->num_devices = 0;
  110. device = NULL;
  111. } else {
  112. device = __find_device(&fs_devices->devices, devid);
  113. }
  114. if (!device) {
  115. device = kzalloc(sizeof(*device), GFP_NOFS);
  116. if (!device) {
  117. /* we can safely leave the fs_devices entry around */
  118. return -ENOMEM;
  119. }
  120. device->devid = devid;
  121. device->name = kstrdup(path, GFP_NOFS);
  122. if (!device->name) {
  123. kfree(device);
  124. return -ENOMEM;
  125. }
  126. list_add(&device->dev_list, &fs_devices->devices);
  127. fs_devices->num_devices++;
  128. }
  129. if (found_transid > fs_devices->latest_trans) {
  130. fs_devices->latest_devid = devid;
  131. fs_devices->latest_trans = found_transid;
  132. }
  133. if (fs_devices->lowest_devid > devid) {
  134. fs_devices->lowest_devid = devid;
  135. printk("lowest devid now %Lu\n", devid);
  136. }
  137. *fs_devices_ret = fs_devices;
  138. return 0;
  139. }
  140. int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
  141. {
  142. struct list_head *head = &fs_devices->devices;
  143. struct list_head *cur;
  144. struct btrfs_device *device;
  145. mutex_lock(&uuid_mutex);
  146. list_for_each(cur, head) {
  147. device = list_entry(cur, struct btrfs_device, dev_list);
  148. if (device->bdev) {
  149. close_bdev_excl(device->bdev);
  150. printk("close devices closes %s\n", device->name);
  151. }
  152. device->bdev = NULL;
  153. }
  154. mutex_unlock(&uuid_mutex);
  155. return 0;
  156. }
  157. int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
  158. int flags, void *holder)
  159. {
  160. struct block_device *bdev;
  161. struct list_head *head = &fs_devices->devices;
  162. struct list_head *cur;
  163. struct btrfs_device *device;
  164. int ret;
  165. mutex_lock(&uuid_mutex);
  166. list_for_each(cur, head) {
  167. device = list_entry(cur, struct btrfs_device, dev_list);
  168. bdev = open_bdev_excl(device->name, flags, holder);
  169. printk("opening %s devid %Lu\n", device->name, device->devid);
  170. if (IS_ERR(bdev)) {
  171. printk("open %s failed\n", device->name);
  172. ret = PTR_ERR(bdev);
  173. goto fail;
  174. }
  175. if (device->devid == fs_devices->latest_devid)
  176. fs_devices->latest_bdev = bdev;
  177. if (device->devid == fs_devices->lowest_devid) {
  178. fs_devices->lowest_bdev = bdev;
  179. printk("lowest bdev %s\n", device->name);
  180. }
  181. device->bdev = bdev;
  182. }
  183. mutex_unlock(&uuid_mutex);
  184. return 0;
  185. fail:
  186. mutex_unlock(&uuid_mutex);
  187. btrfs_close_devices(fs_devices);
  188. return ret;
  189. }
  190. int btrfs_scan_one_device(const char *path, int flags, void *holder,
  191. struct btrfs_fs_devices **fs_devices_ret)
  192. {
  193. struct btrfs_super_block *disk_super;
  194. struct block_device *bdev;
  195. struct buffer_head *bh;
  196. int ret;
  197. u64 devid;
  198. mutex_lock(&uuid_mutex);
  199. printk("scan one opens %s\n", path);
  200. bdev = open_bdev_excl(path, flags, holder);
  201. if (IS_ERR(bdev)) {
  202. printk("open failed\n");
  203. ret = PTR_ERR(bdev);
  204. goto error;
  205. }
  206. ret = set_blocksize(bdev, 4096);
  207. if (ret)
  208. goto error_close;
  209. bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096);
  210. if (!bh) {
  211. ret = -EIO;
  212. goto error_close;
  213. }
  214. disk_super = (struct btrfs_super_block *)bh->b_data;
  215. if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
  216. sizeof(disk_super->magic))) {
  217. printk("no btrfs found on %s\n", path);
  218. ret = -ENOENT;
  219. goto error_brelse;
  220. }
  221. devid = le64_to_cpu(disk_super->dev_item.devid);
  222. printk("found device %Lu on %s\n", devid, path);
  223. ret = device_list_add(path, disk_super, devid, fs_devices_ret);
  224. error_brelse:
  225. brelse(bh);
  226. error_close:
  227. close_bdev_excl(bdev);
  228. printk("scan one closes bdev %s\n", path);
  229. error:
  230. mutex_unlock(&uuid_mutex);
  231. return ret;
  232. }
  233. /*
  234. * this uses a pretty simple search, the expectation is that it is
  235. * called very infrequently and that a given device has a small number
  236. * of extents
  237. */
  238. static int find_free_dev_extent(struct btrfs_trans_handle *trans,
  239. struct btrfs_device *device,
  240. struct btrfs_path *path,
  241. u64 num_bytes, u64 *start)
  242. {
  243. struct btrfs_key key;
  244. struct btrfs_root *root = device->dev_root;
  245. struct btrfs_dev_extent *dev_extent = NULL;
  246. u64 hole_size = 0;
  247. u64 last_byte = 0;
  248. u64 search_start = 0;
  249. u64 search_end = device->total_bytes;
  250. int ret;
  251. int slot = 0;
  252. int start_found;
  253. struct extent_buffer *l;
  254. start_found = 0;
  255. path->reada = 2;
  256. /* FIXME use last free of some kind */
  257. /* we don't want to overwrite the superblock on the drive,
  258. * so we make sure to start at an offset of at least 1MB
  259. */
  260. search_start = max((u64)1024 * 1024, search_start);
  261. key.objectid = device->devid;
  262. key.offset = search_start;
  263. key.type = BTRFS_DEV_EXTENT_KEY;
  264. ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
  265. if (ret < 0)
  266. goto error;
  267. ret = btrfs_previous_item(root, path, 0, key.type);
  268. if (ret < 0)
  269. goto error;
  270. l = path->nodes[0];
  271. btrfs_item_key_to_cpu(l, &key, path->slots[0]);
  272. while (1) {
  273. l = path->nodes[0];
  274. slot = path->slots[0];
  275. if (slot >= btrfs_header_nritems(l)) {
  276. ret = btrfs_next_leaf(root, path);
  277. if (ret == 0)
  278. continue;
  279. if (ret < 0)
  280. goto error;
  281. no_more_items:
  282. if (!start_found) {
  283. if (search_start >= search_end) {
  284. ret = -ENOSPC;
  285. goto error;
  286. }
  287. *start = search_start;
  288. start_found = 1;
  289. goto check_pending;
  290. }
  291. *start = last_byte > search_start ?
  292. last_byte : search_start;
  293. if (search_end <= *start) {
  294. ret = -ENOSPC;
  295. goto error;
  296. }
  297. goto check_pending;
  298. }
  299. btrfs_item_key_to_cpu(l, &key, slot);
  300. if (key.objectid < device->devid)
  301. goto next;
  302. if (key.objectid > device->devid)
  303. goto no_more_items;
  304. if (key.offset >= search_start && key.offset > last_byte &&
  305. start_found) {
  306. if (last_byte < search_start)
  307. last_byte = search_start;
  308. hole_size = key.offset - last_byte;
  309. if (key.offset > last_byte &&
  310. hole_size >= num_bytes) {
  311. *start = last_byte;
  312. goto check_pending;
  313. }
  314. }
  315. if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) {
  316. goto next;
  317. }
  318. start_found = 1;
  319. dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
  320. last_byte = key.offset + btrfs_dev_extent_length(l, dev_extent);
  321. next:
  322. path->slots[0]++;
  323. cond_resched();
  324. }
  325. check_pending:
  326. /* we have to make sure we didn't find an extent that has already
  327. * been allocated by the map tree or the original allocation
  328. */
  329. btrfs_release_path(root, path);
  330. BUG_ON(*start < search_start);
  331. if (*start + num_bytes > search_end) {
  332. ret = -ENOSPC;
  333. goto error;
  334. }
  335. /* check for pending inserts here */
  336. return 0;
  337. error:
  338. btrfs_release_path(root, path);
  339. return ret;
  340. }
  341. int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
  342. struct btrfs_device *device,
  343. u64 owner, u64 num_bytes, u64 *start)
  344. {
  345. int ret;
  346. struct btrfs_path *path;
  347. struct btrfs_root *root = device->dev_root;
  348. struct btrfs_dev_extent *extent;
  349. struct extent_buffer *leaf;
  350. struct btrfs_key key;
  351. path = btrfs_alloc_path();
  352. if (!path)
  353. return -ENOMEM;
  354. ret = find_free_dev_extent(trans, device, path, num_bytes, start);
  355. if (ret) {
  356. goto err;
  357. }
  358. key.objectid = device->devid;
  359. key.offset = *start;
  360. key.type = BTRFS_DEV_EXTENT_KEY;
  361. ret = btrfs_insert_empty_item(trans, root, path, &key,
  362. sizeof(*extent));
  363. BUG_ON(ret);
  364. leaf = path->nodes[0];
  365. extent = btrfs_item_ptr(leaf, path->slots[0],
  366. struct btrfs_dev_extent);
  367. btrfs_set_dev_extent_owner(leaf, extent, owner);
  368. btrfs_set_dev_extent_length(leaf, extent, num_bytes);
  369. btrfs_mark_buffer_dirty(leaf);
  370. err:
  371. btrfs_free_path(path);
  372. return ret;
  373. }
  374. static int find_next_chunk(struct btrfs_root *root, u64 *objectid)
  375. {
  376. struct btrfs_path *path;
  377. int ret;
  378. struct btrfs_key key;
  379. struct btrfs_key found_key;
  380. path = btrfs_alloc_path();
  381. BUG_ON(!path);
  382. key.objectid = (u64)-1;
  383. key.offset = (u64)-1;
  384. key.type = BTRFS_CHUNK_ITEM_KEY;
  385. ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
  386. if (ret < 0)
  387. goto error;
  388. BUG_ON(ret == 0);
  389. ret = btrfs_previous_item(root, path, 0, BTRFS_CHUNK_ITEM_KEY);
  390. if (ret) {
  391. *objectid = 0;
  392. } else {
  393. btrfs_item_key_to_cpu(path->nodes[0], &found_key,
  394. path->slots[0]);
  395. *objectid = found_key.objectid + found_key.offset;
  396. }
  397. ret = 0;
  398. error:
  399. btrfs_free_path(path);
  400. return ret;
  401. }
  402. static int find_next_devid(struct btrfs_root *root, struct btrfs_path *path,
  403. u64 *objectid)
  404. {
  405. int ret;
  406. struct btrfs_key key;
  407. struct btrfs_key found_key;
  408. key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
  409. key.type = BTRFS_DEV_ITEM_KEY;
  410. key.offset = (u64)-1;
  411. ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
  412. if (ret < 0)
  413. goto error;
  414. BUG_ON(ret == 0);
  415. ret = btrfs_previous_item(root, path, BTRFS_DEV_ITEMS_OBJECTID,
  416. BTRFS_DEV_ITEM_KEY);
  417. if (ret) {
  418. *objectid = 1;
  419. } else {
  420. btrfs_item_key_to_cpu(path->nodes[0], &found_key,
  421. path->slots[0]);
  422. *objectid = found_key.offset + 1;
  423. }
  424. ret = 0;
  425. error:
  426. btrfs_release_path(root, path);
  427. return ret;
  428. }
  429. /*
  430. * the device information is stored in the chunk root
  431. * the btrfs_device struct should be fully filled in
  432. */
  433. int btrfs_add_device(struct btrfs_trans_handle *trans,
  434. struct btrfs_root *root,
  435. struct btrfs_device *device)
  436. {
  437. int ret;
  438. struct btrfs_path *path;
  439. struct btrfs_dev_item *dev_item;
  440. struct extent_buffer *leaf;
  441. struct btrfs_key key;
  442. unsigned long ptr;
  443. u64 free_devid;
  444. root = root->fs_info->chunk_root;
  445. path = btrfs_alloc_path();
  446. if (!path)
  447. return -ENOMEM;
  448. ret = find_next_devid(root, path, &free_devid);
  449. if (ret)
  450. goto out;
  451. key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
  452. key.type = BTRFS_DEV_ITEM_KEY;
  453. key.offset = free_devid;
  454. ret = btrfs_insert_empty_item(trans, root, path, &key,
  455. sizeof(*dev_item));
  456. if (ret)
  457. goto out;
  458. leaf = path->nodes[0];
  459. dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
  460. device->devid = free_devid;
  461. btrfs_set_device_id(leaf, dev_item, device->devid);
  462. btrfs_set_device_type(leaf, dev_item, device->type);
  463. btrfs_set_device_io_align(leaf, dev_item, device->io_align);
  464. btrfs_set_device_io_width(leaf, dev_item, device->io_width);
  465. btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
  466. btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
  467. btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
  468. ptr = (unsigned long)btrfs_device_uuid(dev_item);
  469. write_extent_buffer(leaf, device->uuid, ptr, BTRFS_DEV_UUID_SIZE);
  470. btrfs_mark_buffer_dirty(leaf);
  471. ret = 0;
  472. out:
  473. btrfs_free_path(path);
  474. return ret;
  475. }
  476. int btrfs_update_device(struct btrfs_trans_handle *trans,
  477. struct btrfs_device *device)
  478. {
  479. int ret;
  480. struct btrfs_path *path;
  481. struct btrfs_root *root;
  482. struct btrfs_dev_item *dev_item;
  483. struct extent_buffer *leaf;
  484. struct btrfs_key key;
  485. root = device->dev_root->fs_info->chunk_root;
  486. path = btrfs_alloc_path();
  487. if (!path)
  488. return -ENOMEM;
  489. key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
  490. key.type = BTRFS_DEV_ITEM_KEY;
  491. key.offset = device->devid;
  492. ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
  493. if (ret < 0)
  494. goto out;
  495. if (ret > 0) {
  496. ret = -ENOENT;
  497. goto out;
  498. }
  499. leaf = path->nodes[0];
  500. dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
  501. btrfs_set_device_id(leaf, dev_item, device->devid);
  502. btrfs_set_device_type(leaf, dev_item, device->type);
  503. btrfs_set_device_io_align(leaf, dev_item, device->io_align);
  504. btrfs_set_device_io_width(leaf, dev_item, device->io_width);
  505. btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
  506. btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
  507. btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
  508. btrfs_mark_buffer_dirty(leaf);
  509. out:
  510. btrfs_free_path(path);
  511. return ret;
  512. }
  513. int btrfs_add_system_chunk(struct btrfs_trans_handle *trans,
  514. struct btrfs_root *root,
  515. struct btrfs_key *key,
  516. struct btrfs_chunk *chunk, int item_size)
  517. {
  518. struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
  519. struct btrfs_disk_key disk_key;
  520. u32 array_size;
  521. u8 *ptr;
  522. array_size = btrfs_super_sys_array_size(super_copy);
  523. if (array_size + item_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE)
  524. return -EFBIG;
  525. ptr = super_copy->sys_chunk_array + array_size;
  526. btrfs_cpu_key_to_disk(&disk_key, key);
  527. memcpy(ptr, &disk_key, sizeof(disk_key));
  528. ptr += sizeof(disk_key);
  529. memcpy(ptr, chunk, item_size);
  530. item_size += sizeof(disk_key);
  531. btrfs_set_super_sys_array_size(super_copy, array_size + item_size);
  532. return 0;
  533. }
  534. int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
  535. struct btrfs_root *extent_root, u64 *start,
  536. u64 *num_bytes, u64 type)
  537. {
  538. u64 dev_offset;
  539. struct btrfs_fs_info *info = extent_root->fs_info;
  540. struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root;
  541. struct btrfs_stripe *stripes;
  542. struct btrfs_device *device = NULL;
  543. struct btrfs_chunk *chunk;
  544. struct list_head private_devs;
  545. struct list_head *dev_list = &extent_root->fs_info->fs_devices->devices;
  546. struct list_head *cur;
  547. struct extent_map_tree *em_tree;
  548. struct map_lookup *map;
  549. struct extent_map *em;
  550. u64 physical;
  551. u64 calc_size = 1024 * 1024 * 1024;
  552. u64 avail;
  553. u64 max_avail = 0;
  554. int num_stripes = 1;
  555. int looped = 0;
  556. int ret;
  557. int index;
  558. int stripe_len = 64 * 1024;
  559. struct btrfs_key key;
  560. if (list_empty(dev_list))
  561. return -ENOSPC;
  562. if (type & BTRFS_BLOCK_GROUP_RAID0)
  563. num_stripes = btrfs_super_num_devices(&info->super_copy);
  564. if (type & BTRFS_BLOCK_GROUP_DATA)
  565. stripe_len = 64 * 1024;
  566. if (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM))
  567. stripe_len = 32 * 1024;
  568. again:
  569. INIT_LIST_HEAD(&private_devs);
  570. cur = dev_list->next;
  571. index = 0;
  572. /* build a private list of devices we will allocate from */
  573. while(index < num_stripes) {
  574. device = list_entry(cur, struct btrfs_device, dev_list);
  575. avail = device->total_bytes - device->bytes_used;
  576. cur = cur->next;
  577. if (avail > max_avail)
  578. max_avail = avail;
  579. if (avail >= calc_size) {
  580. list_move_tail(&device->dev_list, &private_devs);
  581. index++;
  582. }
  583. if (cur == dev_list)
  584. break;
  585. }
  586. if (index < num_stripes) {
  587. list_splice(&private_devs, dev_list);
  588. if (!looped && max_avail > 0) {
  589. looped = 1;
  590. calc_size = max_avail;
  591. goto again;
  592. }
  593. return -ENOSPC;
  594. }
  595. ret = find_next_chunk(chunk_root, &key.objectid);
  596. if (ret)
  597. return ret;
  598. chunk = kmalloc(btrfs_chunk_item_size(num_stripes), GFP_NOFS);
  599. if (!chunk)
  600. return -ENOMEM;
  601. map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
  602. if (!map) {
  603. kfree(chunk);
  604. return -ENOMEM;
  605. }
  606. stripes = &chunk->stripe;
  607. *num_bytes = calc_size * num_stripes;
  608. index = 0;
  609. while(index < num_stripes) {
  610. BUG_ON(list_empty(&private_devs));
  611. cur = private_devs.next;
  612. device = list_entry(cur, struct btrfs_device, dev_list);
  613. list_move_tail(&device->dev_list, dev_list);
  614. ret = btrfs_alloc_dev_extent(trans, device,
  615. key.objectid,
  616. calc_size, &dev_offset);
  617. BUG_ON(ret);
  618. printk("alloc chunk size %Lu from dev %Lu\n", calc_size, device->devid);
  619. device->bytes_used += calc_size;
  620. ret = btrfs_update_device(trans, device);
  621. BUG_ON(ret);
  622. map->stripes[index].dev = device;
  623. map->stripes[index].physical = dev_offset;
  624. btrfs_set_stack_stripe_devid(stripes + index, device->devid);
  625. btrfs_set_stack_stripe_offset(stripes + index, dev_offset);
  626. physical = dev_offset;
  627. index++;
  628. }
  629. BUG_ON(!list_empty(&private_devs));
  630. /* key.objectid was set above */
  631. key.offset = *num_bytes;
  632. key.type = BTRFS_CHUNK_ITEM_KEY;
  633. btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid);
  634. btrfs_set_stack_chunk_stripe_len(chunk, stripe_len);
  635. btrfs_set_stack_chunk_type(chunk, type);
  636. btrfs_set_stack_chunk_num_stripes(chunk, num_stripes);
  637. btrfs_set_stack_chunk_io_align(chunk, stripe_len);
  638. btrfs_set_stack_chunk_io_width(chunk, stripe_len);
  639. btrfs_set_stack_chunk_sector_size(chunk, extent_root->sectorsize);
  640. map->sector_size = extent_root->sectorsize;
  641. map->stripe_len = stripe_len;
  642. map->io_align = stripe_len;
  643. map->io_width = stripe_len;
  644. map->type = type;
  645. map->num_stripes = num_stripes;
  646. ret = btrfs_insert_item(trans, chunk_root, &key, chunk,
  647. btrfs_chunk_item_size(num_stripes));
  648. BUG_ON(ret);
  649. *start = key.objectid;
  650. em = alloc_extent_map(GFP_NOFS);
  651. if (!em)
  652. return -ENOMEM;
  653. em->bdev = (struct block_device *)map;
  654. em->start = key.objectid;
  655. em->len = key.offset;
  656. em->block_start = 0;
  657. kfree(chunk);
  658. em_tree = &extent_root->fs_info->mapping_tree.map_tree;
  659. spin_lock(&em_tree->lock);
  660. ret = add_extent_mapping(em_tree, em);
  661. BUG_ON(ret);
  662. spin_unlock(&em_tree->lock);
  663. free_extent_map(em);
  664. return ret;
  665. }
  666. void btrfs_mapping_init(struct btrfs_mapping_tree *tree)
  667. {
  668. extent_map_tree_init(&tree->map_tree, GFP_NOFS);
  669. }
  670. void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree)
  671. {
  672. struct extent_map *em;
  673. while(1) {
  674. spin_lock(&tree->map_tree.lock);
  675. em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1);
  676. if (em)
  677. remove_extent_mapping(&tree->map_tree, em);
  678. spin_unlock(&tree->map_tree.lock);
  679. if (!em)
  680. break;
  681. kfree(em->bdev);
  682. /* once for us */
  683. free_extent_map(em);
  684. /* once for the tree */
  685. free_extent_map(em);
  686. }
  687. }
  688. int btrfs_map_block(struct btrfs_mapping_tree *map_tree,
  689. u64 logical, u64 *phys, u64 *length,
  690. struct btrfs_device **dev)
  691. {
  692. struct extent_map *em;
  693. struct map_lookup *map;
  694. struct extent_map_tree *em_tree = &map_tree->map_tree;
  695. u64 offset;
  696. u64 stripe_offset;
  697. u64 stripe_nr;
  698. int stripe_index;
  699. spin_lock(&em_tree->lock);
  700. em = lookup_extent_mapping(em_tree, logical, *length);
  701. BUG_ON(!em);
  702. BUG_ON(em->start > logical || em->start + em->len < logical);
  703. map = (struct map_lookup *)em->bdev;
  704. offset = logical - em->start;
  705. stripe_nr = offset;
  706. /*
  707. * stripe_nr counts the total number of stripes we have to stride
  708. * to get to this block
  709. */
  710. do_div(stripe_nr, map->stripe_len);
  711. stripe_offset = stripe_nr * map->stripe_len;
  712. BUG_ON(offset < stripe_offset);
  713. /* stripe_offset is the offset of this block in its stripe*/
  714. stripe_offset = offset - stripe_offset;
  715. /*
  716. * after this do_div call, stripe_nr is the number of stripes
  717. * on this device we have to walk to find the data, and
  718. * stripe_index is the number of our device in the stripe array
  719. */
  720. stripe_index = do_div(stripe_nr, map->num_stripes);
  721. BUG_ON(stripe_index >= map->num_stripes);
  722. *phys = map->stripes[stripe_index].physical + stripe_offset +
  723. stripe_nr * map->stripe_len;
  724. if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
  725. /* we limit the length of each bio to what fits in a stripe */
  726. *length = min_t(u64, em->len - offset,
  727. map->stripe_len - stripe_offset);
  728. } else {
  729. *length = em->len - offset;
  730. }
  731. *dev = map->stripes[stripe_index].dev;
  732. free_extent_map(em);
  733. spin_unlock(&em_tree->lock);
  734. return 0;
  735. }
  736. int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio)
  737. {
  738. struct btrfs_mapping_tree *map_tree;
  739. struct btrfs_device *dev;
  740. u64 logical = bio->bi_sector << 9;
  741. u64 physical;
  742. u64 length = 0;
  743. u64 map_length;
  744. struct bio_vec *bvec;
  745. int i;
  746. int ret;
  747. bio_for_each_segment(bvec, bio, i) {
  748. length += bvec->bv_len;
  749. }
  750. map_tree = &root->fs_info->mapping_tree;
  751. map_length = length;
  752. ret = btrfs_map_block(map_tree, logical, &physical, &map_length, &dev);
  753. if (map_length < length) {
  754. printk("mapping failed logical %Lu bio len %Lu physical %Lu "
  755. "len %Lu\n", logical, length, physical, map_length);
  756. BUG();
  757. }
  758. BUG_ON(map_length < length);
  759. bio->bi_sector = physical >> 9;
  760. bio->bi_bdev = dev->bdev;
  761. submit_bio(rw, bio);
  762. return 0;
  763. }
  764. struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid)
  765. {
  766. struct list_head *head = &root->fs_info->fs_devices->devices;
  767. return __find_device(head, devid);
  768. }
  769. static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
  770. struct extent_buffer *leaf,
  771. struct btrfs_chunk *chunk)
  772. {
  773. struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
  774. struct map_lookup *map;
  775. struct extent_map *em;
  776. u64 logical;
  777. u64 length;
  778. u64 devid;
  779. int num_stripes;
  780. int ret;
  781. int i;
  782. logical = key->objectid;
  783. length = key->offset;
  784. spin_lock(&map_tree->map_tree.lock);
  785. em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
  786. /* already mapped? */
  787. if (em && em->start <= logical && em->start + em->len > logical) {
  788. free_extent_map(em);
  789. spin_unlock(&map_tree->map_tree.lock);
  790. return 0;
  791. } else if (em) {
  792. free_extent_map(em);
  793. }
  794. spin_unlock(&map_tree->map_tree.lock);
  795. map = kzalloc(sizeof(*map), GFP_NOFS);
  796. if (!map)
  797. return -ENOMEM;
  798. em = alloc_extent_map(GFP_NOFS);
  799. if (!em)
  800. return -ENOMEM;
  801. num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
  802. map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
  803. if (!map) {
  804. free_extent_map(em);
  805. return -ENOMEM;
  806. }
  807. em->bdev = (struct block_device *)map;
  808. em->start = logical;
  809. em->len = length;
  810. em->block_start = 0;
  811. map->num_stripes = num_stripes;
  812. map->io_width = btrfs_chunk_io_width(leaf, chunk);
  813. map->io_align = btrfs_chunk_io_align(leaf, chunk);
  814. map->sector_size = btrfs_chunk_sector_size(leaf, chunk);
  815. map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
  816. map->type = btrfs_chunk_type(leaf, chunk);
  817. for (i = 0; i < num_stripes; i++) {
  818. map->stripes[i].physical =
  819. btrfs_stripe_offset_nr(leaf, chunk, i);
  820. devid = btrfs_stripe_devid_nr(leaf, chunk, i);
  821. map->stripes[i].dev = btrfs_find_device(root, devid);
  822. if (!map->stripes[i].dev) {
  823. kfree(map);
  824. free_extent_map(em);
  825. return -EIO;
  826. }
  827. }
  828. spin_lock(&map_tree->map_tree.lock);
  829. ret = add_extent_mapping(&map_tree->map_tree, em);
  830. BUG_ON(ret);
  831. spin_unlock(&map_tree->map_tree.lock);
  832. free_extent_map(em);
  833. return 0;
  834. }
  835. static int fill_device_from_item(struct extent_buffer *leaf,
  836. struct btrfs_dev_item *dev_item,
  837. struct btrfs_device *device)
  838. {
  839. unsigned long ptr;
  840. device->devid = btrfs_device_id(leaf, dev_item);
  841. device->total_bytes = btrfs_device_total_bytes(leaf, dev_item);
  842. device->bytes_used = btrfs_device_bytes_used(leaf, dev_item);
  843. device->type = btrfs_device_type(leaf, dev_item);
  844. device->io_align = btrfs_device_io_align(leaf, dev_item);
  845. device->io_width = btrfs_device_io_width(leaf, dev_item);
  846. device->sector_size = btrfs_device_sector_size(leaf, dev_item);
  847. ptr = (unsigned long)btrfs_device_uuid(dev_item);
  848. read_extent_buffer(leaf, device->uuid, ptr, BTRFS_DEV_UUID_SIZE);
  849. return 0;
  850. }
  851. static int read_one_dev(struct btrfs_root *root,
  852. struct extent_buffer *leaf,
  853. struct btrfs_dev_item *dev_item)
  854. {
  855. struct btrfs_device *device;
  856. u64 devid;
  857. int ret;
  858. devid = btrfs_device_id(leaf, dev_item);
  859. device = btrfs_find_device(root, devid);
  860. if (!device) {
  861. printk("warning devid %Lu not found already\n", devid);
  862. device = kmalloc(sizeof(*device), GFP_NOFS);
  863. if (!device)
  864. return -ENOMEM;
  865. list_add(&device->dev_list,
  866. &root->fs_info->fs_devices->devices);
  867. }
  868. fill_device_from_item(leaf, dev_item, device);
  869. device->dev_root = root->fs_info->dev_root;
  870. ret = 0;
  871. #if 0
  872. ret = btrfs_open_device(device);
  873. if (ret) {
  874. kfree(device);
  875. }
  876. #endif
  877. return ret;
  878. }
  879. int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf)
  880. {
  881. struct btrfs_dev_item *dev_item;
  882. dev_item = (struct btrfs_dev_item *)offsetof(struct btrfs_super_block,
  883. dev_item);
  884. return read_one_dev(root, buf, dev_item);
  885. }
  886. int btrfs_read_sys_array(struct btrfs_root *root)
  887. {
  888. struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
  889. struct extent_buffer *sb = root->fs_info->sb_buffer;
  890. struct btrfs_disk_key *disk_key;
  891. struct btrfs_chunk *chunk;
  892. struct btrfs_key key;
  893. u32 num_stripes;
  894. u32 array_size;
  895. u32 len = 0;
  896. u8 *ptr;
  897. unsigned long sb_ptr;
  898. u32 cur;
  899. int ret;
  900. array_size = btrfs_super_sys_array_size(super_copy);
  901. /*
  902. * we do this loop twice, once for the device items and
  903. * once for all of the chunks. This way there are device
  904. * structs filled in for every chunk
  905. */
  906. ptr = super_copy->sys_chunk_array;
  907. sb_ptr = offsetof(struct btrfs_super_block, sys_chunk_array);
  908. cur = 0;
  909. while (cur < array_size) {
  910. disk_key = (struct btrfs_disk_key *)ptr;
  911. btrfs_disk_key_to_cpu(&key, disk_key);
  912. len = sizeof(*disk_key);
  913. ptr += len;
  914. sb_ptr += len;
  915. cur += len;
  916. if (key.type == BTRFS_CHUNK_ITEM_KEY) {
  917. chunk = (struct btrfs_chunk *)sb_ptr;
  918. ret = read_one_chunk(root, &key, sb, chunk);
  919. BUG_ON(ret);
  920. num_stripes = btrfs_chunk_num_stripes(sb, chunk);
  921. len = btrfs_chunk_item_size(num_stripes);
  922. } else {
  923. BUG();
  924. }
  925. ptr += len;
  926. sb_ptr += len;
  927. cur += len;
  928. }
  929. return 0;
  930. }
  931. int btrfs_read_chunk_tree(struct btrfs_root *root)
  932. {
  933. struct btrfs_path *path;
  934. struct extent_buffer *leaf;
  935. struct btrfs_key key;
  936. struct btrfs_key found_key;
  937. int ret;
  938. int slot;
  939. root = root->fs_info->chunk_root;
  940. path = btrfs_alloc_path();
  941. if (!path)
  942. return -ENOMEM;
  943. /* first we search for all of the device items, and then we
  944. * read in all of the chunk items. This way we can create chunk
  945. * mappings that reference all of the devices that are afound
  946. */
  947. key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
  948. key.offset = 0;
  949. key.type = 0;
  950. again:
  951. ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
  952. while(1) {
  953. leaf = path->nodes[0];
  954. slot = path->slots[0];
  955. if (slot >= btrfs_header_nritems(leaf)) {
  956. ret = btrfs_next_leaf(root, path);
  957. if (ret == 0)
  958. continue;
  959. if (ret < 0)
  960. goto error;
  961. break;
  962. }
  963. btrfs_item_key_to_cpu(leaf, &found_key, slot);
  964. if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
  965. if (found_key.objectid != BTRFS_DEV_ITEMS_OBJECTID)
  966. break;
  967. if (found_key.type == BTRFS_DEV_ITEM_KEY) {
  968. struct btrfs_dev_item *dev_item;
  969. dev_item = btrfs_item_ptr(leaf, slot,
  970. struct btrfs_dev_item);
  971. ret = read_one_dev(root, leaf, dev_item);
  972. BUG_ON(ret);
  973. }
  974. } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
  975. struct btrfs_chunk *chunk;
  976. chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
  977. ret = read_one_chunk(root, &found_key, leaf, chunk);
  978. }
  979. path->slots[0]++;
  980. }
  981. if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
  982. key.objectid = 0;
  983. btrfs_release_path(root, path);
  984. goto again;
  985. }
  986. btrfs_free_path(path);
  987. ret = 0;
  988. error:
  989. return ret;
  990. }