genhd.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687
  1. /*
  2. * gendisk handling
  3. */
  4. #include <linux/config.h>
  5. #include <linux/module.h>
  6. #include <linux/fs.h>
  7. #include <linux/genhd.h>
  8. #include <linux/kernel.h>
  9. #include <linux/blkdev.h>
  10. #include <linux/init.h>
  11. #include <linux/spinlock.h>
  12. #include <linux/seq_file.h>
  13. #include <linux/slab.h>
  14. #include <linux/kmod.h>
  15. #include <linux/kobj_map.h>
  16. #include <linux/buffer_head.h>
  17. #define MAX_PROBE_HASH 255 /* random */
  18. static struct subsystem block_subsys;
  19. static DECLARE_MUTEX(block_subsys_sem);
  20. /*
  21. * Can be deleted altogether. Later.
  22. *
  23. */
  24. static struct blk_major_name {
  25. struct blk_major_name *next;
  26. int major;
  27. char name[16];
  28. } *major_names[MAX_PROBE_HASH];
  29. /* index in the above - for now: assume no multimajor ranges */
  30. static inline int major_to_index(int major)
  31. {
  32. return major % MAX_PROBE_HASH;
  33. }
  34. #ifdef CONFIG_PROC_FS
  35. /* get block device names in somewhat random order */
  36. int get_blkdev_list(char *p)
  37. {
  38. struct blk_major_name *n;
  39. int i, len;
  40. len = sprintf(p, "\nBlock devices:\n");
  41. down(&block_subsys_sem);
  42. for (i = 0; i < ARRAY_SIZE(major_names); i++) {
  43. for (n = major_names[i]; n; n = n->next)
  44. len += sprintf(p+len, "%3d %s\n",
  45. n->major, n->name);
  46. }
  47. up(&block_subsys_sem);
  48. return len;
  49. }
  50. #endif
  51. int register_blkdev(unsigned int major, const char *name)
  52. {
  53. struct blk_major_name **n, *p;
  54. int index, ret = 0;
  55. down(&block_subsys_sem);
  56. /* temporary */
  57. if (major == 0) {
  58. for (index = ARRAY_SIZE(major_names)-1; index > 0; index--) {
  59. if (major_names[index] == NULL)
  60. break;
  61. }
  62. if (index == 0) {
  63. printk("register_blkdev: failed to get major for %s\n",
  64. name);
  65. ret = -EBUSY;
  66. goto out;
  67. }
  68. major = index;
  69. ret = major;
  70. }
  71. p = kmalloc(sizeof(struct blk_major_name), GFP_KERNEL);
  72. if (p == NULL) {
  73. ret = -ENOMEM;
  74. goto out;
  75. }
  76. p->major = major;
  77. strlcpy(p->name, name, sizeof(p->name));
  78. p->next = NULL;
  79. index = major_to_index(major);
  80. for (n = &major_names[index]; *n; n = &(*n)->next) {
  81. if ((*n)->major == major)
  82. break;
  83. }
  84. if (!*n)
  85. *n = p;
  86. else
  87. ret = -EBUSY;
  88. if (ret < 0) {
  89. printk("register_blkdev: cannot get major %d for %s\n",
  90. major, name);
  91. kfree(p);
  92. }
  93. out:
  94. up(&block_subsys_sem);
  95. return ret;
  96. }
  97. EXPORT_SYMBOL(register_blkdev);
  98. /* todo: make void - error printk here */
  99. int unregister_blkdev(unsigned int major, const char *name)
  100. {
  101. struct blk_major_name **n;
  102. struct blk_major_name *p = NULL;
  103. int index = major_to_index(major);
  104. int ret = 0;
  105. down(&block_subsys_sem);
  106. for (n = &major_names[index]; *n; n = &(*n)->next)
  107. if ((*n)->major == major)
  108. break;
  109. if (!*n || strcmp((*n)->name, name))
  110. ret = -EINVAL;
  111. else {
  112. p = *n;
  113. *n = p->next;
  114. }
  115. up(&block_subsys_sem);
  116. kfree(p);
  117. return ret;
  118. }
  119. EXPORT_SYMBOL(unregister_blkdev);
  120. static struct kobj_map *bdev_map;
  121. /*
  122. * Register device numbers dev..(dev+range-1)
  123. * range must be nonzero
  124. * The hash chain is sorted on range, so that subranges can override.
  125. */
  126. void blk_register_region(dev_t dev, unsigned long range, struct module *module,
  127. struct kobject *(*probe)(dev_t, int *, void *),
  128. int (*lock)(dev_t, void *), void *data)
  129. {
  130. kobj_map(bdev_map, dev, range, module, probe, lock, data);
  131. }
  132. EXPORT_SYMBOL(blk_register_region);
  133. void blk_unregister_region(dev_t dev, unsigned long range)
  134. {
  135. kobj_unmap(bdev_map, dev, range);
  136. }
  137. EXPORT_SYMBOL(blk_unregister_region);
  138. static struct kobject *exact_match(dev_t dev, int *part, void *data)
  139. {
  140. struct gendisk *p = data;
  141. return &p->kobj;
  142. }
  143. static int exact_lock(dev_t dev, void *data)
  144. {
  145. struct gendisk *p = data;
  146. if (!get_disk(p))
  147. return -1;
  148. return 0;
  149. }
  150. /**
  151. * add_disk - add partitioning information to kernel list
  152. * @disk: per-device partitioning information
  153. *
  154. * This function registers the partitioning information in @disk
  155. * with the kernel.
  156. */
  157. void add_disk(struct gendisk *disk)
  158. {
  159. disk->flags |= GENHD_FL_UP;
  160. blk_register_region(MKDEV(disk->major, disk->first_minor),
  161. disk->minors, NULL, exact_match, exact_lock, disk);
  162. register_disk(disk);
  163. blk_register_queue(disk);
  164. }
  165. EXPORT_SYMBOL(add_disk);
  166. EXPORT_SYMBOL(del_gendisk); /* in partitions/check.c */
  167. void unlink_gendisk(struct gendisk *disk)
  168. {
  169. blk_unregister_queue(disk);
  170. blk_unregister_region(MKDEV(disk->major, disk->first_minor),
  171. disk->minors);
  172. }
  173. #define to_disk(obj) container_of(obj,struct gendisk,kobj)
  174. /**
  175. * get_gendisk - get partitioning information for a given device
  176. * @dev: device to get partitioning information for
  177. *
  178. * This function gets the structure containing partitioning
  179. * information for the given device @dev.
  180. */
  181. struct gendisk *get_gendisk(dev_t dev, int *part)
  182. {
  183. struct kobject *kobj = kobj_lookup(bdev_map, dev, part);
  184. return kobj ? to_disk(kobj) : NULL;
  185. }
  186. #ifdef CONFIG_PROC_FS
  187. /* iterator */
  188. static void *part_start(struct seq_file *part, loff_t *pos)
  189. {
  190. struct list_head *p;
  191. loff_t l = *pos;
  192. down(&block_subsys_sem);
  193. list_for_each(p, &block_subsys.kset.list)
  194. if (!l--)
  195. return list_entry(p, struct gendisk, kobj.entry);
  196. return NULL;
  197. }
  198. static void *part_next(struct seq_file *part, void *v, loff_t *pos)
  199. {
  200. struct list_head *p = ((struct gendisk *)v)->kobj.entry.next;
  201. ++*pos;
  202. return p==&block_subsys.kset.list ? NULL :
  203. list_entry(p, struct gendisk, kobj.entry);
  204. }
  205. static void part_stop(struct seq_file *part, void *v)
  206. {
  207. up(&block_subsys_sem);
  208. }
  209. static int show_partition(struct seq_file *part, void *v)
  210. {
  211. struct gendisk *sgp = v;
  212. int n;
  213. char buf[BDEVNAME_SIZE];
  214. if (&sgp->kobj.entry == block_subsys.kset.list.next)
  215. seq_puts(part, "major minor #blocks name\n\n");
  216. /* Don't show non-partitionable removeable devices or empty devices */
  217. if (!get_capacity(sgp) ||
  218. (sgp->minors == 1 && (sgp->flags & GENHD_FL_REMOVABLE)))
  219. return 0;
  220. if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)
  221. return 0;
  222. /* show the full disk and all non-0 size partitions of it */
  223. seq_printf(part, "%4d %4d %10llu %s\n",
  224. sgp->major, sgp->first_minor,
  225. (unsigned long long)get_capacity(sgp) >> 1,
  226. disk_name(sgp, 0, buf));
  227. for (n = 0; n < sgp->minors - 1; n++) {
  228. if (!sgp->part[n])
  229. continue;
  230. if (sgp->part[n]->nr_sects == 0)
  231. continue;
  232. seq_printf(part, "%4d %4d %10llu %s\n",
  233. sgp->major, n + 1 + sgp->first_minor,
  234. (unsigned long long)sgp->part[n]->nr_sects >> 1 ,
  235. disk_name(sgp, n + 1, buf));
  236. }
  237. return 0;
  238. }
  239. struct seq_operations partitions_op = {
  240. .start =part_start,
  241. .next = part_next,
  242. .stop = part_stop,
  243. .show = show_partition
  244. };
  245. #endif
  246. extern int blk_dev_init(void);
  247. static struct kobject *base_probe(dev_t dev, int *part, void *data)
  248. {
  249. if (request_module("block-major-%d-%d", MAJOR(dev), MINOR(dev)) > 0)
  250. /* Make old-style 2.4 aliases work */
  251. request_module("block-major-%d", MAJOR(dev));
  252. return NULL;
  253. }
  254. static int __init genhd_device_init(void)
  255. {
  256. bdev_map = kobj_map_init(base_probe, &block_subsys_sem);
  257. blk_dev_init();
  258. subsystem_register(&block_subsys);
  259. return 0;
  260. }
  261. subsys_initcall(genhd_device_init);
  262. /*
  263. * kobject & sysfs bindings for block devices
  264. */
  265. static ssize_t disk_attr_show(struct kobject *kobj, struct attribute *attr,
  266. char *page)
  267. {
  268. struct gendisk *disk = to_disk(kobj);
  269. struct disk_attribute *disk_attr =
  270. container_of(attr,struct disk_attribute,attr);
  271. ssize_t ret = -EIO;
  272. if (disk_attr->show)
  273. ret = disk_attr->show(disk,page);
  274. return ret;
  275. }
  276. static struct sysfs_ops disk_sysfs_ops = {
  277. .show = &disk_attr_show,
  278. };
  279. static ssize_t disk_dev_read(struct gendisk * disk, char *page)
  280. {
  281. dev_t base = MKDEV(disk->major, disk->first_minor);
  282. return print_dev_t(page, base);
  283. }
  284. static ssize_t disk_range_read(struct gendisk * disk, char *page)
  285. {
  286. return sprintf(page, "%d\n", disk->minors);
  287. }
  288. static ssize_t disk_removable_read(struct gendisk * disk, char *page)
  289. {
  290. return sprintf(page, "%d\n",
  291. (disk->flags & GENHD_FL_REMOVABLE ? 1 : 0));
  292. }
  293. static ssize_t disk_size_read(struct gendisk * disk, char *page)
  294. {
  295. return sprintf(page, "%llu\n", (unsigned long long)get_capacity(disk));
  296. }
  297. static ssize_t disk_stats_read(struct gendisk * disk, char *page)
  298. {
  299. preempt_disable();
  300. disk_round_stats(disk);
  301. preempt_enable();
  302. return sprintf(page,
  303. "%8u %8u %8llu %8u "
  304. "%8u %8u %8llu %8u "
  305. "%8u %8u %8u"
  306. "\n",
  307. disk_stat_read(disk, reads), disk_stat_read(disk, read_merges),
  308. (unsigned long long)disk_stat_read(disk, read_sectors),
  309. jiffies_to_msecs(disk_stat_read(disk, read_ticks)),
  310. disk_stat_read(disk, writes),
  311. disk_stat_read(disk, write_merges),
  312. (unsigned long long)disk_stat_read(disk, write_sectors),
  313. jiffies_to_msecs(disk_stat_read(disk, write_ticks)),
  314. disk->in_flight,
  315. jiffies_to_msecs(disk_stat_read(disk, io_ticks)),
  316. jiffies_to_msecs(disk_stat_read(disk, time_in_queue)));
  317. }
  318. static struct disk_attribute disk_attr_dev = {
  319. .attr = {.name = "dev", .mode = S_IRUGO },
  320. .show = disk_dev_read
  321. };
  322. static struct disk_attribute disk_attr_range = {
  323. .attr = {.name = "range", .mode = S_IRUGO },
  324. .show = disk_range_read
  325. };
  326. static struct disk_attribute disk_attr_removable = {
  327. .attr = {.name = "removable", .mode = S_IRUGO },
  328. .show = disk_removable_read
  329. };
  330. static struct disk_attribute disk_attr_size = {
  331. .attr = {.name = "size", .mode = S_IRUGO },
  332. .show = disk_size_read
  333. };
  334. static struct disk_attribute disk_attr_stat = {
  335. .attr = {.name = "stat", .mode = S_IRUGO },
  336. .show = disk_stats_read
  337. };
  338. static struct attribute * default_attrs[] = {
  339. &disk_attr_dev.attr,
  340. &disk_attr_range.attr,
  341. &disk_attr_removable.attr,
  342. &disk_attr_size.attr,
  343. &disk_attr_stat.attr,
  344. NULL,
  345. };
  346. static void disk_release(struct kobject * kobj)
  347. {
  348. struct gendisk *disk = to_disk(kobj);
  349. kfree(disk->random);
  350. kfree(disk->part);
  351. free_disk_stats(disk);
  352. kfree(disk);
  353. }
  354. static struct kobj_type ktype_block = {
  355. .release = disk_release,
  356. .sysfs_ops = &disk_sysfs_ops,
  357. .default_attrs = default_attrs,
  358. };
  359. extern struct kobj_type ktype_part;
  360. static int block_hotplug_filter(struct kset *kset, struct kobject *kobj)
  361. {
  362. struct kobj_type *ktype = get_ktype(kobj);
  363. return ((ktype == &ktype_block) || (ktype == &ktype_part));
  364. }
  365. static int block_hotplug(struct kset *kset, struct kobject *kobj, char **envp,
  366. int num_envp, char *buffer, int buffer_size)
  367. {
  368. struct kobj_type *ktype = get_ktype(kobj);
  369. struct device *physdev;
  370. struct gendisk *disk;
  371. struct hd_struct *part;
  372. int length = 0;
  373. int i = 0;
  374. if (ktype == &ktype_block) {
  375. disk = container_of(kobj, struct gendisk, kobj);
  376. add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size,
  377. &length, "MINOR=%u", disk->first_minor);
  378. } else if (ktype == &ktype_part) {
  379. disk = container_of(kobj->parent, struct gendisk, kobj);
  380. part = container_of(kobj, struct hd_struct, kobj);
  381. add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size,
  382. &length, "MINOR=%u",
  383. disk->first_minor + part->partno);
  384. } else
  385. return 0;
  386. add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size, &length,
  387. "MAJOR=%u", disk->major);
  388. /* add physical device, backing this device */
  389. physdev = disk->driverfs_dev;
  390. if (physdev) {
  391. char *path = kobject_get_path(&physdev->kobj, GFP_KERNEL);
  392. add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size,
  393. &length, "PHYSDEVPATH=%s", path);
  394. kfree(path);
  395. if (physdev->bus)
  396. add_hotplug_env_var(envp, num_envp, &i,
  397. buffer, buffer_size, &length,
  398. "PHYSDEVBUS=%s",
  399. physdev->bus->name);
  400. if (physdev->driver)
  401. add_hotplug_env_var(envp, num_envp, &i,
  402. buffer, buffer_size, &length,
  403. "PHYSDEVDRIVER=%s",
  404. physdev->driver->name);
  405. }
  406. /* terminate, set to next free slot, shrink available space */
  407. envp[i] = NULL;
  408. envp = &envp[i];
  409. num_envp -= i;
  410. buffer = &buffer[length];
  411. buffer_size -= length;
  412. return 0;
  413. }
  414. static struct kset_hotplug_ops block_hotplug_ops = {
  415. .filter = block_hotplug_filter,
  416. .hotplug = block_hotplug,
  417. };
  418. /* declare block_subsys. */
  419. static decl_subsys(block, &ktype_block, &block_hotplug_ops);
  420. /*
  421. * aggregate disk stat collector. Uses the same stats that the sysfs
  422. * entries do, above, but makes them available through one seq_file.
  423. * Watching a few disks may be efficient through sysfs, but watching
  424. * all of them will be more efficient through this interface.
  425. *
  426. * The output looks suspiciously like /proc/partitions with a bunch of
  427. * extra fields.
  428. */
  429. /* iterator */
  430. static void *diskstats_start(struct seq_file *part, loff_t *pos)
  431. {
  432. loff_t k = *pos;
  433. struct list_head *p;
  434. down(&block_subsys_sem);
  435. list_for_each(p, &block_subsys.kset.list)
  436. if (!k--)
  437. return list_entry(p, struct gendisk, kobj.entry);
  438. return NULL;
  439. }
  440. static void *diskstats_next(struct seq_file *part, void *v, loff_t *pos)
  441. {
  442. struct list_head *p = ((struct gendisk *)v)->kobj.entry.next;
  443. ++*pos;
  444. return p==&block_subsys.kset.list ? NULL :
  445. list_entry(p, struct gendisk, kobj.entry);
  446. }
  447. static void diskstats_stop(struct seq_file *part, void *v)
  448. {
  449. up(&block_subsys_sem);
  450. }
  451. static int diskstats_show(struct seq_file *s, void *v)
  452. {
  453. struct gendisk *gp = v;
  454. char buf[BDEVNAME_SIZE];
  455. int n = 0;
  456. /*
  457. if (&sgp->kobj.entry == block_subsys.kset.list.next)
  458. seq_puts(s, "major minor name"
  459. " rio rmerge rsect ruse wio wmerge "
  460. "wsect wuse running use aveq"
  461. "\n\n");
  462. */
  463. preempt_disable();
  464. disk_round_stats(gp);
  465. preempt_enable();
  466. seq_printf(s, "%4d %4d %s %u %u %llu %u %u %u %llu %u %u %u %u\n",
  467. gp->major, n + gp->first_minor, disk_name(gp, n, buf),
  468. disk_stat_read(gp, reads), disk_stat_read(gp, read_merges),
  469. (unsigned long long)disk_stat_read(gp, read_sectors),
  470. jiffies_to_msecs(disk_stat_read(gp, read_ticks)),
  471. disk_stat_read(gp, writes), disk_stat_read(gp, write_merges),
  472. (unsigned long long)disk_stat_read(gp, write_sectors),
  473. jiffies_to_msecs(disk_stat_read(gp, write_ticks)),
  474. gp->in_flight,
  475. jiffies_to_msecs(disk_stat_read(gp, io_ticks)),
  476. jiffies_to_msecs(disk_stat_read(gp, time_in_queue)));
  477. /* now show all non-0 size partitions of it */
  478. for (n = 0; n < gp->minors - 1; n++) {
  479. struct hd_struct *hd = gp->part[n];
  480. if (hd && hd->nr_sects)
  481. seq_printf(s, "%4d %4d %s %u %u %u %u\n",
  482. gp->major, n + gp->first_minor + 1,
  483. disk_name(gp, n + 1, buf),
  484. hd->reads, hd->read_sectors,
  485. hd->writes, hd->write_sectors);
  486. }
  487. return 0;
  488. }
  489. struct seq_operations diskstats_op = {
  490. .start = diskstats_start,
  491. .next = diskstats_next,
  492. .stop = diskstats_stop,
  493. .show = diskstats_show
  494. };
  495. struct gendisk *alloc_disk(int minors)
  496. {
  497. struct gendisk *disk = kmalloc(sizeof(struct gendisk), GFP_KERNEL);
  498. if (disk) {
  499. memset(disk, 0, sizeof(struct gendisk));
  500. if (!init_disk_stats(disk)) {
  501. kfree(disk);
  502. return NULL;
  503. }
  504. if (minors > 1) {
  505. int size = (minors - 1) * sizeof(struct hd_struct *);
  506. disk->part = kmalloc(size, GFP_KERNEL);
  507. if (!disk->part) {
  508. kfree(disk);
  509. return NULL;
  510. }
  511. memset(disk->part, 0, size);
  512. }
  513. disk->minors = minors;
  514. kobj_set_kset_s(disk,block_subsys);
  515. kobject_init(&disk->kobj);
  516. rand_initialize_disk(disk);
  517. }
  518. return disk;
  519. }
  520. EXPORT_SYMBOL(alloc_disk);
  521. struct kobject *get_disk(struct gendisk *disk)
  522. {
  523. struct module *owner;
  524. struct kobject *kobj;
  525. if (!disk->fops)
  526. return NULL;
  527. owner = disk->fops->owner;
  528. if (owner && !try_module_get(owner))
  529. return NULL;
  530. kobj = kobject_get(&disk->kobj);
  531. if (kobj == NULL) {
  532. module_put(owner);
  533. return NULL;
  534. }
  535. return kobj;
  536. }
  537. EXPORT_SYMBOL(get_disk);
  538. void put_disk(struct gendisk *disk)
  539. {
  540. if (disk)
  541. kobject_put(&disk->kobj);
  542. }
  543. EXPORT_SYMBOL(put_disk);
  544. void set_device_ro(struct block_device *bdev, int flag)
  545. {
  546. if (bdev->bd_contains != bdev)
  547. bdev->bd_part->policy = flag;
  548. else
  549. bdev->bd_disk->policy = flag;
  550. }
  551. EXPORT_SYMBOL(set_device_ro);
  552. void set_disk_ro(struct gendisk *disk, int flag)
  553. {
  554. int i;
  555. disk->policy = flag;
  556. for (i = 0; i < disk->minors - 1; i++)
  557. if (disk->part[i]) disk->part[i]->policy = flag;
  558. }
  559. EXPORT_SYMBOL(set_disk_ro);
  560. int bdev_read_only(struct block_device *bdev)
  561. {
  562. if (!bdev)
  563. return 0;
  564. else if (bdev->bd_contains != bdev)
  565. return bdev->bd_part->policy;
  566. else
  567. return bdev->bd_disk->policy;
  568. }
  569. EXPORT_SYMBOL(bdev_read_only);
  570. int invalidate_partition(struct gendisk *disk, int index)
  571. {
  572. int res = 0;
  573. struct block_device *bdev = bdget_disk(disk, index);
  574. if (bdev) {
  575. fsync_bdev(bdev);
  576. res = __invalidate_device(bdev);
  577. bdput(bdev);
  578. }
  579. return res;
  580. }
  581. EXPORT_SYMBOL(invalidate_partition);