genhd.c 43 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799
  1. /*
  2. * gendisk handling
  3. */
  4. #include <linux/module.h>
  5. #include <linux/fs.h>
  6. #include <linux/genhd.h>
  7. #include <linux/kdev_t.h>
  8. #include <linux/kernel.h>
  9. #include <linux/blkdev.h>
  10. #include <linux/init.h>
  11. #include <linux/spinlock.h>
  12. #include <linux/proc_fs.h>
  13. #include <linux/seq_file.h>
  14. #include <linux/slab.h>
  15. #include <linux/kmod.h>
  16. #include <linux/kobj_map.h>
  17. #include <linux/mutex.h>
  18. #include <linux/idr.h>
  19. #include <linux/log2.h>
  20. #include "blk.h"
  21. static DEFINE_MUTEX(block_class_lock);
  22. struct kobject *block_depr;
  23. /* for extended dynamic devt allocation, currently only one major is used */
  24. #define MAX_EXT_DEVT (1 << MINORBITS)
  25. /* For extended devt allocation. ext_devt_mutex prevents look up
  26. * results from going away underneath its user.
  27. */
  28. static DEFINE_MUTEX(ext_devt_mutex);
  29. static DEFINE_IDR(ext_devt_idr);
  30. static struct device_type disk_type;
  31. static void disk_add_events(struct gendisk *disk);
  32. static void disk_del_events(struct gendisk *disk);
  33. static void disk_release_events(struct gendisk *disk);
  34. /**
  35. * disk_get_part - get partition
  36. * @disk: disk to look partition from
  37. * @partno: partition number
  38. *
  39. * Look for partition @partno from @disk. If found, increment
  40. * reference count and return it.
  41. *
  42. * CONTEXT:
  43. * Don't care.
  44. *
  45. * RETURNS:
  46. * Pointer to the found partition on success, NULL if not found.
  47. */
  48. struct hd_struct *disk_get_part(struct gendisk *disk, int partno)
  49. {
  50. struct hd_struct *part = NULL;
  51. struct disk_part_tbl *ptbl;
  52. if (unlikely(partno < 0))
  53. return NULL;
  54. rcu_read_lock();
  55. ptbl = rcu_dereference(disk->part_tbl);
  56. if (likely(partno < ptbl->len)) {
  57. part = rcu_dereference(ptbl->part[partno]);
  58. if (part)
  59. get_device(part_to_dev(part));
  60. }
  61. rcu_read_unlock();
  62. return part;
  63. }
  64. EXPORT_SYMBOL_GPL(disk_get_part);
  65. /**
  66. * disk_part_iter_init - initialize partition iterator
  67. * @piter: iterator to initialize
  68. * @disk: disk to iterate over
  69. * @flags: DISK_PITER_* flags
  70. *
  71. * Initialize @piter so that it iterates over partitions of @disk.
  72. *
  73. * CONTEXT:
  74. * Don't care.
  75. */
  76. void disk_part_iter_init(struct disk_part_iter *piter, struct gendisk *disk,
  77. unsigned int flags)
  78. {
  79. struct disk_part_tbl *ptbl;
  80. rcu_read_lock();
  81. ptbl = rcu_dereference(disk->part_tbl);
  82. piter->disk = disk;
  83. piter->part = NULL;
  84. if (flags & DISK_PITER_REVERSE)
  85. piter->idx = ptbl->len - 1;
  86. else if (flags & (DISK_PITER_INCL_PART0 | DISK_PITER_INCL_EMPTY_PART0))
  87. piter->idx = 0;
  88. else
  89. piter->idx = 1;
  90. piter->flags = flags;
  91. rcu_read_unlock();
  92. }
  93. EXPORT_SYMBOL_GPL(disk_part_iter_init);
  94. /**
  95. * disk_part_iter_next - proceed iterator to the next partition and return it
  96. * @piter: iterator of interest
  97. *
  98. * Proceed @piter to the next partition and return it.
  99. *
  100. * CONTEXT:
  101. * Don't care.
  102. */
  103. struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter)
  104. {
  105. struct disk_part_tbl *ptbl;
  106. int inc, end;
  107. /* put the last partition */
  108. disk_put_part(piter->part);
  109. piter->part = NULL;
  110. /* get part_tbl */
  111. rcu_read_lock();
  112. ptbl = rcu_dereference(piter->disk->part_tbl);
  113. /* determine iteration parameters */
  114. if (piter->flags & DISK_PITER_REVERSE) {
  115. inc = -1;
  116. if (piter->flags & (DISK_PITER_INCL_PART0 |
  117. DISK_PITER_INCL_EMPTY_PART0))
  118. end = -1;
  119. else
  120. end = 0;
  121. } else {
  122. inc = 1;
  123. end = ptbl->len;
  124. }
  125. /* iterate to the next partition */
  126. for (; piter->idx != end; piter->idx += inc) {
  127. struct hd_struct *part;
  128. part = rcu_dereference(ptbl->part[piter->idx]);
  129. if (!part)
  130. continue;
  131. if (!part->nr_sects &&
  132. !(piter->flags & DISK_PITER_INCL_EMPTY) &&
  133. !(piter->flags & DISK_PITER_INCL_EMPTY_PART0 &&
  134. piter->idx == 0))
  135. continue;
  136. get_device(part_to_dev(part));
  137. piter->part = part;
  138. piter->idx += inc;
  139. break;
  140. }
  141. rcu_read_unlock();
  142. return piter->part;
  143. }
  144. EXPORT_SYMBOL_GPL(disk_part_iter_next);
  145. /**
  146. * disk_part_iter_exit - finish up partition iteration
  147. * @piter: iter of interest
  148. *
  149. * Called when iteration is over. Cleans up @piter.
  150. *
  151. * CONTEXT:
  152. * Don't care.
  153. */
  154. void disk_part_iter_exit(struct disk_part_iter *piter)
  155. {
  156. disk_put_part(piter->part);
  157. piter->part = NULL;
  158. }
  159. EXPORT_SYMBOL_GPL(disk_part_iter_exit);
  160. static inline int sector_in_part(struct hd_struct *part, sector_t sector)
  161. {
  162. return part->start_sect <= sector &&
  163. sector < part->start_sect + part->nr_sects;
  164. }
  165. /**
  166. * disk_map_sector_rcu - map sector to partition
  167. * @disk: gendisk of interest
  168. * @sector: sector to map
  169. *
  170. * Find out which partition @sector maps to on @disk. This is
  171. * primarily used for stats accounting.
  172. *
  173. * CONTEXT:
  174. * RCU read locked. The returned partition pointer is valid only
  175. * while preemption is disabled.
  176. *
  177. * RETURNS:
  178. * Found partition on success, part0 is returned if no partition matches
  179. */
  180. struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector)
  181. {
  182. struct disk_part_tbl *ptbl;
  183. struct hd_struct *part;
  184. int i;
  185. ptbl = rcu_dereference(disk->part_tbl);
  186. part = rcu_dereference(ptbl->last_lookup);
  187. if (part && sector_in_part(part, sector))
  188. return part;
  189. for (i = 1; i < ptbl->len; i++) {
  190. part = rcu_dereference(ptbl->part[i]);
  191. if (part && sector_in_part(part, sector)) {
  192. rcu_assign_pointer(ptbl->last_lookup, part);
  193. return part;
  194. }
  195. }
  196. return &disk->part0;
  197. }
  198. EXPORT_SYMBOL_GPL(disk_map_sector_rcu);
  199. /*
  200. * Can be deleted altogether. Later.
  201. *
  202. */
  203. static struct blk_major_name {
  204. struct blk_major_name *next;
  205. int major;
  206. char name[16];
  207. } *major_names[BLKDEV_MAJOR_HASH_SIZE];
  208. /* index in the above - for now: assume no multimajor ranges */
  209. static inline int major_to_index(unsigned major)
  210. {
  211. return major % BLKDEV_MAJOR_HASH_SIZE;
  212. }
  213. #ifdef CONFIG_PROC_FS
  214. void blkdev_show(struct seq_file *seqf, off_t offset)
  215. {
  216. struct blk_major_name *dp;
  217. if (offset < BLKDEV_MAJOR_HASH_SIZE) {
  218. mutex_lock(&block_class_lock);
  219. for (dp = major_names[offset]; dp; dp = dp->next)
  220. seq_printf(seqf, "%3d %s\n", dp->major, dp->name);
  221. mutex_unlock(&block_class_lock);
  222. }
  223. }
  224. #endif /* CONFIG_PROC_FS */
  225. /**
  226. * register_blkdev - register a new block device
  227. *
  228. * @major: the requested major device number [1..255]. If @major=0, try to
  229. * allocate any unused major number.
  230. * @name: the name of the new block device as a zero terminated string
  231. *
  232. * The @name must be unique within the system.
  233. *
  234. * The return value depends on the @major input parameter.
  235. * - if a major device number was requested in range [1..255] then the
  236. * function returns zero on success, or a negative error code
  237. * - if any unused major number was requested with @major=0 parameter
  238. * then the return value is the allocated major number in range
  239. * [1..255] or a negative error code otherwise
  240. */
  241. int register_blkdev(unsigned int major, const char *name)
  242. {
  243. struct blk_major_name **n, *p;
  244. int index, ret = 0;
  245. mutex_lock(&block_class_lock);
  246. /* temporary */
  247. if (major == 0) {
  248. for (index = ARRAY_SIZE(major_names)-1; index > 0; index--) {
  249. if (major_names[index] == NULL)
  250. break;
  251. }
  252. if (index == 0) {
  253. printk("register_blkdev: failed to get major for %s\n",
  254. name);
  255. ret = -EBUSY;
  256. goto out;
  257. }
  258. major = index;
  259. ret = major;
  260. }
  261. p = kmalloc(sizeof(struct blk_major_name), GFP_KERNEL);
  262. if (p == NULL) {
  263. ret = -ENOMEM;
  264. goto out;
  265. }
  266. p->major = major;
  267. strlcpy(p->name, name, sizeof(p->name));
  268. p->next = NULL;
  269. index = major_to_index(major);
  270. for (n = &major_names[index]; *n; n = &(*n)->next) {
  271. if ((*n)->major == major)
  272. break;
  273. }
  274. if (!*n)
  275. *n = p;
  276. else
  277. ret = -EBUSY;
  278. if (ret < 0) {
  279. printk("register_blkdev: cannot get major %d for %s\n",
  280. major, name);
  281. kfree(p);
  282. }
  283. out:
  284. mutex_unlock(&block_class_lock);
  285. return ret;
  286. }
  287. EXPORT_SYMBOL(register_blkdev);
  288. void unregister_blkdev(unsigned int major, const char *name)
  289. {
  290. struct blk_major_name **n;
  291. struct blk_major_name *p = NULL;
  292. int index = major_to_index(major);
  293. mutex_lock(&block_class_lock);
  294. for (n = &major_names[index]; *n; n = &(*n)->next)
  295. if ((*n)->major == major)
  296. break;
  297. if (!*n || strcmp((*n)->name, name)) {
  298. WARN_ON(1);
  299. } else {
  300. p = *n;
  301. *n = p->next;
  302. }
  303. mutex_unlock(&block_class_lock);
  304. kfree(p);
  305. }
  306. EXPORT_SYMBOL(unregister_blkdev);
  307. static struct kobj_map *bdev_map;
  308. /**
  309. * blk_mangle_minor - scatter minor numbers apart
  310. * @minor: minor number to mangle
  311. *
  312. * Scatter consecutively allocated @minor number apart if MANGLE_DEVT
  313. * is enabled. Mangling twice gives the original value.
  314. *
  315. * RETURNS:
  316. * Mangled value.
  317. *
  318. * CONTEXT:
  319. * Don't care.
  320. */
  321. static int blk_mangle_minor(int minor)
  322. {
  323. #ifdef CONFIG_DEBUG_BLOCK_EXT_DEVT
  324. int i;
  325. for (i = 0; i < MINORBITS / 2; i++) {
  326. int low = minor & (1 << i);
  327. int high = minor & (1 << (MINORBITS - 1 - i));
  328. int distance = MINORBITS - 1 - 2 * i;
  329. minor ^= low | high; /* clear both bits */
  330. low <<= distance; /* swap the positions */
  331. high >>= distance;
  332. minor |= low | high; /* and set */
  333. }
  334. #endif
  335. return minor;
  336. }
  337. /**
  338. * blk_alloc_devt - allocate a dev_t for a partition
  339. * @part: partition to allocate dev_t for
  340. * @devt: out parameter for resulting dev_t
  341. *
  342. * Allocate a dev_t for block device.
  343. *
  344. * RETURNS:
  345. * 0 on success, allocated dev_t is returned in *@devt. -errno on
  346. * failure.
  347. *
  348. * CONTEXT:
  349. * Might sleep.
  350. */
  351. int blk_alloc_devt(struct hd_struct *part, dev_t *devt)
  352. {
  353. struct gendisk *disk = part_to_disk(part);
  354. int idx, rc;
  355. /* in consecutive minor range? */
  356. if (part->partno < disk->minors) {
  357. *devt = MKDEV(disk->major, disk->first_minor + part->partno);
  358. return 0;
  359. }
  360. /* allocate ext devt */
  361. do {
  362. if (!idr_pre_get(&ext_devt_idr, GFP_KERNEL))
  363. return -ENOMEM;
  364. rc = idr_get_new(&ext_devt_idr, part, &idx);
  365. } while (rc == -EAGAIN);
  366. if (rc)
  367. return rc;
  368. if (idx > MAX_EXT_DEVT) {
  369. idr_remove(&ext_devt_idr, idx);
  370. return -EBUSY;
  371. }
  372. *devt = MKDEV(BLOCK_EXT_MAJOR, blk_mangle_minor(idx));
  373. return 0;
  374. }
  375. /**
  376. * blk_free_devt - free a dev_t
  377. * @devt: dev_t to free
  378. *
  379. * Free @devt which was allocated using blk_alloc_devt().
  380. *
  381. * CONTEXT:
  382. * Might sleep.
  383. */
  384. void blk_free_devt(dev_t devt)
  385. {
  386. might_sleep();
  387. if (devt == MKDEV(0, 0))
  388. return;
  389. if (MAJOR(devt) == BLOCK_EXT_MAJOR) {
  390. mutex_lock(&ext_devt_mutex);
  391. idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
  392. mutex_unlock(&ext_devt_mutex);
  393. }
  394. }
  395. static char *bdevt_str(dev_t devt, char *buf)
  396. {
  397. if (MAJOR(devt) <= 0xff && MINOR(devt) <= 0xff) {
  398. char tbuf[BDEVT_SIZE];
  399. snprintf(tbuf, BDEVT_SIZE, "%02x%02x", MAJOR(devt), MINOR(devt));
  400. snprintf(buf, BDEVT_SIZE, "%-9s", tbuf);
  401. } else
  402. snprintf(buf, BDEVT_SIZE, "%03x:%05x", MAJOR(devt), MINOR(devt));
  403. return buf;
  404. }
  405. /*
  406. * Register device numbers dev..(dev+range-1)
  407. * range must be nonzero
  408. * The hash chain is sorted on range, so that subranges can override.
  409. */
  410. void blk_register_region(dev_t devt, unsigned long range, struct module *module,
  411. struct kobject *(*probe)(dev_t, int *, void *),
  412. int (*lock)(dev_t, void *), void *data)
  413. {
  414. kobj_map(bdev_map, devt, range, module, probe, lock, data);
  415. }
  416. EXPORT_SYMBOL(blk_register_region);
  417. void blk_unregister_region(dev_t devt, unsigned long range)
  418. {
  419. kobj_unmap(bdev_map, devt, range);
  420. }
  421. EXPORT_SYMBOL(blk_unregister_region);
  422. static struct kobject *exact_match(dev_t devt, int *partno, void *data)
  423. {
  424. struct gendisk *p = data;
  425. return &disk_to_dev(p)->kobj;
  426. }
  427. static int exact_lock(dev_t devt, void *data)
  428. {
  429. struct gendisk *p = data;
  430. if (!get_disk(p))
  431. return -1;
  432. return 0;
  433. }
  434. static void register_disk(struct gendisk *disk)
  435. {
  436. struct device *ddev = disk_to_dev(disk);
  437. struct block_device *bdev;
  438. struct disk_part_iter piter;
  439. struct hd_struct *part;
  440. int err;
  441. ddev->parent = disk->driverfs_dev;
  442. dev_set_name(ddev, disk->disk_name);
  443. /* delay uevents, until we scanned partition table */
  444. dev_set_uevent_suppress(ddev, 1);
  445. if (device_add(ddev))
  446. return;
  447. if (!sysfs_deprecated) {
  448. err = sysfs_create_link(block_depr, &ddev->kobj,
  449. kobject_name(&ddev->kobj));
  450. if (err) {
  451. device_del(ddev);
  452. return;
  453. }
  454. }
  455. disk->part0.holder_dir = kobject_create_and_add("holders", &ddev->kobj);
  456. disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);
  457. /* No minors to use for partitions */
  458. if (!disk_part_scan_enabled(disk))
  459. goto exit;
  460. /* No such device (e.g., media were just removed) */
  461. if (!get_capacity(disk))
  462. goto exit;
  463. bdev = bdget_disk(disk, 0);
  464. if (!bdev)
  465. goto exit;
  466. bdev->bd_invalidated = 1;
  467. err = blkdev_get(bdev, FMODE_READ, NULL);
  468. if (err < 0)
  469. goto exit;
  470. blkdev_put(bdev, FMODE_READ);
  471. exit:
  472. /* announce disk after possible partitions are created */
  473. dev_set_uevent_suppress(ddev, 0);
  474. kobject_uevent(&ddev->kobj, KOBJ_ADD);
  475. /* announce possible partitions */
  476. disk_part_iter_init(&piter, disk, 0);
  477. while ((part = disk_part_iter_next(&piter)))
  478. kobject_uevent(&part_to_dev(part)->kobj, KOBJ_ADD);
  479. disk_part_iter_exit(&piter);
  480. }
  481. /**
  482. * add_disk - add partitioning information to kernel list
  483. * @disk: per-device partitioning information
  484. *
  485. * This function registers the partitioning information in @disk
  486. * with the kernel.
  487. *
  488. * FIXME: error handling
  489. */
  490. void add_disk(struct gendisk *disk)
  491. {
  492. struct backing_dev_info *bdi;
  493. dev_t devt;
  494. int retval;
  495. /* minors == 0 indicates to use ext devt from part0 and should
  496. * be accompanied with EXT_DEVT flag. Make sure all
  497. * parameters make sense.
  498. */
  499. WARN_ON(disk->minors && !(disk->major || disk->first_minor));
  500. WARN_ON(!disk->minors && !(disk->flags & GENHD_FL_EXT_DEVT));
  501. disk->flags |= GENHD_FL_UP;
  502. retval = blk_alloc_devt(&disk->part0, &devt);
  503. if (retval) {
  504. WARN_ON(1);
  505. return;
  506. }
  507. disk_to_dev(disk)->devt = devt;
  508. /* ->major and ->first_minor aren't supposed to be
  509. * dereferenced from here on, but set them just in case.
  510. */
  511. disk->major = MAJOR(devt);
  512. disk->first_minor = MINOR(devt);
  513. /* Register BDI before referencing it from bdev */
  514. bdi = &disk->queue->backing_dev_info;
  515. bdi_register_dev(bdi, disk_devt(disk));
  516. blk_register_region(disk_devt(disk), disk->minors, NULL,
  517. exact_match, exact_lock, disk);
  518. register_disk(disk);
  519. blk_register_queue(disk);
  520. /*
  521. * Take an extra ref on queue which will be put on disk_release()
  522. * so that it sticks around as long as @disk is there.
  523. */
  524. WARN_ON_ONCE(!blk_get_queue(disk->queue));
  525. retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj,
  526. "bdi");
  527. WARN_ON(retval);
  528. disk_add_events(disk);
  529. }
  530. EXPORT_SYMBOL(add_disk);
  531. void del_gendisk(struct gendisk *disk)
  532. {
  533. struct disk_part_iter piter;
  534. struct hd_struct *part;
  535. disk_del_events(disk);
  536. /* invalidate stuff */
  537. disk_part_iter_init(&piter, disk,
  538. DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
  539. while ((part = disk_part_iter_next(&piter))) {
  540. invalidate_partition(disk, part->partno);
  541. delete_partition(disk, part->partno);
  542. }
  543. disk_part_iter_exit(&piter);
  544. invalidate_partition(disk, 0);
  545. blk_free_devt(disk_to_dev(disk)->devt);
  546. set_capacity(disk, 0);
  547. disk->flags &= ~GENHD_FL_UP;
  548. sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
  549. bdi_unregister(&disk->queue->backing_dev_info);
  550. blk_unregister_queue(disk);
  551. blk_unregister_region(disk_devt(disk), disk->minors);
  552. part_stat_set_all(&disk->part0, 0);
  553. disk->part0.stamp = 0;
  554. kobject_put(disk->part0.holder_dir);
  555. kobject_put(disk->slave_dir);
  556. disk->driverfs_dev = NULL;
  557. if (!sysfs_deprecated)
  558. sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk)));
  559. device_del(disk_to_dev(disk));
  560. }
  561. EXPORT_SYMBOL(del_gendisk);
  562. /**
  563. * get_gendisk - get partitioning information for a given device
  564. * @devt: device to get partitioning information for
  565. * @partno: returned partition index
  566. *
  567. * This function gets the structure containing partitioning
  568. * information for the given device @devt.
  569. */
  570. struct gendisk *get_gendisk(dev_t devt, int *partno)
  571. {
  572. struct gendisk *disk = NULL;
  573. if (MAJOR(devt) != BLOCK_EXT_MAJOR) {
  574. struct kobject *kobj;
  575. kobj = kobj_lookup(bdev_map, devt, partno);
  576. if (kobj)
  577. disk = dev_to_disk(kobj_to_dev(kobj));
  578. } else {
  579. struct hd_struct *part;
  580. mutex_lock(&ext_devt_mutex);
  581. part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
  582. if (part && get_disk(part_to_disk(part))) {
  583. *partno = part->partno;
  584. disk = part_to_disk(part);
  585. }
  586. mutex_unlock(&ext_devt_mutex);
  587. }
  588. return disk;
  589. }
  590. EXPORT_SYMBOL(get_gendisk);
  591. /**
  592. * bdget_disk - do bdget() by gendisk and partition number
  593. * @disk: gendisk of interest
  594. * @partno: partition number
  595. *
  596. * Find partition @partno from @disk, do bdget() on it.
  597. *
  598. * CONTEXT:
  599. * Don't care.
  600. *
  601. * RETURNS:
  602. * Resulting block_device on success, NULL on failure.
  603. */
  604. struct block_device *bdget_disk(struct gendisk *disk, int partno)
  605. {
  606. struct hd_struct *part;
  607. struct block_device *bdev = NULL;
  608. part = disk_get_part(disk, partno);
  609. if (part)
  610. bdev = bdget(part_devt(part));
  611. disk_put_part(part);
  612. return bdev;
  613. }
  614. EXPORT_SYMBOL(bdget_disk);
  615. /*
  616. * print a full list of all partitions - intended for places where the root
  617. * filesystem can't be mounted and thus to give the victim some idea of what
  618. * went wrong
  619. */
  620. void __init printk_all_partitions(void)
  621. {
  622. struct class_dev_iter iter;
  623. struct device *dev;
  624. class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
  625. while ((dev = class_dev_iter_next(&iter))) {
  626. struct gendisk *disk = dev_to_disk(dev);
  627. struct disk_part_iter piter;
  628. struct hd_struct *part;
  629. char name_buf[BDEVNAME_SIZE];
  630. char devt_buf[BDEVT_SIZE];
  631. u8 uuid[PARTITION_META_INFO_UUIDLTH * 2 + 1];
  632. /*
  633. * Don't show empty devices or things that have been
  634. * suppressed
  635. */
  636. if (get_capacity(disk) == 0 ||
  637. (disk->flags & GENHD_FL_SUPPRESS_PARTITION_INFO))
  638. continue;
  639. /*
  640. * Note, unlike /proc/partitions, I am showing the
  641. * numbers in hex - the same format as the root=
  642. * option takes.
  643. */
  644. disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0);
  645. while ((part = disk_part_iter_next(&piter))) {
  646. bool is_part0 = part == &disk->part0;
  647. uuid[0] = 0;
  648. if (part->info)
  649. part_unpack_uuid(part->info->uuid, uuid);
  650. printk("%s%s %10llu %s %s", is_part0 ? "" : " ",
  651. bdevt_str(part_devt(part), devt_buf),
  652. (unsigned long long)part->nr_sects >> 1,
  653. disk_name(disk, part->partno, name_buf), uuid);
  654. if (is_part0) {
  655. if (disk->driverfs_dev != NULL &&
  656. disk->driverfs_dev->driver != NULL)
  657. printk(" driver: %s\n",
  658. disk->driverfs_dev->driver->name);
  659. else
  660. printk(" (driver?)\n");
  661. } else
  662. printk("\n");
  663. }
  664. disk_part_iter_exit(&piter);
  665. }
  666. class_dev_iter_exit(&iter);
  667. }
  668. #ifdef CONFIG_PROC_FS
  669. /* iterator */
  670. static void *disk_seqf_start(struct seq_file *seqf, loff_t *pos)
  671. {
  672. loff_t skip = *pos;
  673. struct class_dev_iter *iter;
  674. struct device *dev;
  675. iter = kmalloc(sizeof(*iter), GFP_KERNEL);
  676. if (!iter)
  677. return ERR_PTR(-ENOMEM);
  678. seqf->private = iter;
  679. class_dev_iter_init(iter, &block_class, NULL, &disk_type);
  680. do {
  681. dev = class_dev_iter_next(iter);
  682. if (!dev)
  683. return NULL;
  684. } while (skip--);
  685. return dev_to_disk(dev);
  686. }
  687. static void *disk_seqf_next(struct seq_file *seqf, void *v, loff_t *pos)
  688. {
  689. struct device *dev;
  690. (*pos)++;
  691. dev = class_dev_iter_next(seqf->private);
  692. if (dev)
  693. return dev_to_disk(dev);
  694. return NULL;
  695. }
  696. static void disk_seqf_stop(struct seq_file *seqf, void *v)
  697. {
  698. struct class_dev_iter *iter = seqf->private;
  699. /* stop is called even after start failed :-( */
  700. if (iter) {
  701. class_dev_iter_exit(iter);
  702. kfree(iter);
  703. }
  704. }
  705. static void *show_partition_start(struct seq_file *seqf, loff_t *pos)
  706. {
  707. static void *p;
  708. p = disk_seqf_start(seqf, pos);
  709. if (!IS_ERR_OR_NULL(p) && !*pos)
  710. seq_puts(seqf, "major minor #blocks name\n\n");
  711. return p;
  712. }
  713. static int show_partition(struct seq_file *seqf, void *v)
  714. {
  715. struct gendisk *sgp = v;
  716. struct disk_part_iter piter;
  717. struct hd_struct *part;
  718. char buf[BDEVNAME_SIZE];
  719. /* Don't show non-partitionable removeable devices or empty devices */
  720. if (!get_capacity(sgp) || (!disk_max_parts(sgp) &&
  721. (sgp->flags & GENHD_FL_REMOVABLE)))
  722. return 0;
  723. if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)
  724. return 0;
  725. /* show the full disk and all non-0 size partitions of it */
  726. disk_part_iter_init(&piter, sgp, DISK_PITER_INCL_PART0);
  727. while ((part = disk_part_iter_next(&piter)))
  728. seq_printf(seqf, "%4d %7d %10llu %s\n",
  729. MAJOR(part_devt(part)), MINOR(part_devt(part)),
  730. (unsigned long long)part->nr_sects >> 1,
  731. disk_name(sgp, part->partno, buf));
  732. disk_part_iter_exit(&piter);
  733. return 0;
  734. }
  735. static const struct seq_operations partitions_op = {
  736. .start = show_partition_start,
  737. .next = disk_seqf_next,
  738. .stop = disk_seqf_stop,
  739. .show = show_partition
  740. };
  741. static int partitions_open(struct inode *inode, struct file *file)
  742. {
  743. return seq_open(file, &partitions_op);
  744. }
  745. static const struct file_operations proc_partitions_operations = {
  746. .open = partitions_open,
  747. .read = seq_read,
  748. .llseek = seq_lseek,
  749. .release = seq_release,
  750. };
  751. #endif
  752. static struct kobject *base_probe(dev_t devt, int *partno, void *data)
  753. {
  754. if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > 0)
  755. /* Make old-style 2.4 aliases work */
  756. request_module("block-major-%d", MAJOR(devt));
  757. return NULL;
  758. }
  759. static int __init genhd_device_init(void)
  760. {
  761. int error;
  762. block_class.dev_kobj = sysfs_dev_block_kobj;
  763. error = class_register(&block_class);
  764. if (unlikely(error))
  765. return error;
  766. bdev_map = kobj_map_init(base_probe, &block_class_lock);
  767. blk_dev_init();
  768. register_blkdev(BLOCK_EXT_MAJOR, "blkext");
  769. /* create top-level block dir */
  770. if (!sysfs_deprecated)
  771. block_depr = kobject_create_and_add("block", NULL);
  772. return 0;
  773. }
  774. subsys_initcall(genhd_device_init);
  775. static ssize_t disk_range_show(struct device *dev,
  776. struct device_attribute *attr, char *buf)
  777. {
  778. struct gendisk *disk = dev_to_disk(dev);
  779. return sprintf(buf, "%d\n", disk->minors);
  780. }
  781. static ssize_t disk_ext_range_show(struct device *dev,
  782. struct device_attribute *attr, char *buf)
  783. {
  784. struct gendisk *disk = dev_to_disk(dev);
  785. return sprintf(buf, "%d\n", disk_max_parts(disk));
  786. }
  787. static ssize_t disk_removable_show(struct device *dev,
  788. struct device_attribute *attr, char *buf)
  789. {
  790. struct gendisk *disk = dev_to_disk(dev);
  791. return sprintf(buf, "%d\n",
  792. (disk->flags & GENHD_FL_REMOVABLE ? 1 : 0));
  793. }
  794. static ssize_t disk_ro_show(struct device *dev,
  795. struct device_attribute *attr, char *buf)
  796. {
  797. struct gendisk *disk = dev_to_disk(dev);
  798. return sprintf(buf, "%d\n", get_disk_ro(disk) ? 1 : 0);
  799. }
  800. static ssize_t disk_capability_show(struct device *dev,
  801. struct device_attribute *attr, char *buf)
  802. {
  803. struct gendisk *disk = dev_to_disk(dev);
  804. return sprintf(buf, "%x\n", disk->flags);
  805. }
  806. static ssize_t disk_alignment_offset_show(struct device *dev,
  807. struct device_attribute *attr,
  808. char *buf)
  809. {
  810. struct gendisk *disk = dev_to_disk(dev);
  811. return sprintf(buf, "%d\n", queue_alignment_offset(disk->queue));
  812. }
  813. static ssize_t disk_discard_alignment_show(struct device *dev,
  814. struct device_attribute *attr,
  815. char *buf)
  816. {
  817. struct gendisk *disk = dev_to_disk(dev);
  818. return sprintf(buf, "%d\n", queue_discard_alignment(disk->queue));
  819. }
  820. static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
  821. static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL);
  822. static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL);
  823. static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL);
  824. static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
  825. static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL);
  826. static DEVICE_ATTR(discard_alignment, S_IRUGO, disk_discard_alignment_show,
  827. NULL);
  828. static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
  829. static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
  830. static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
  831. #ifdef CONFIG_FAIL_MAKE_REQUEST
  832. static struct device_attribute dev_attr_fail =
  833. __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
  834. #endif
  835. #ifdef CONFIG_FAIL_IO_TIMEOUT
  836. static struct device_attribute dev_attr_fail_timeout =
  837. __ATTR(io-timeout-fail, S_IRUGO|S_IWUSR, part_timeout_show,
  838. part_timeout_store);
  839. #endif
  840. static struct attribute *disk_attrs[] = {
  841. &dev_attr_range.attr,
  842. &dev_attr_ext_range.attr,
  843. &dev_attr_removable.attr,
  844. &dev_attr_ro.attr,
  845. &dev_attr_size.attr,
  846. &dev_attr_alignment_offset.attr,
  847. &dev_attr_discard_alignment.attr,
  848. &dev_attr_capability.attr,
  849. &dev_attr_stat.attr,
  850. &dev_attr_inflight.attr,
  851. #ifdef CONFIG_FAIL_MAKE_REQUEST
  852. &dev_attr_fail.attr,
  853. #endif
  854. #ifdef CONFIG_FAIL_IO_TIMEOUT
  855. &dev_attr_fail_timeout.attr,
  856. #endif
  857. NULL
  858. };
  859. static struct attribute_group disk_attr_group = {
  860. .attrs = disk_attrs,
  861. };
  862. static const struct attribute_group *disk_attr_groups[] = {
  863. &disk_attr_group,
  864. NULL
  865. };
  866. /**
  867. * disk_replace_part_tbl - replace disk->part_tbl in RCU-safe way
  868. * @disk: disk to replace part_tbl for
  869. * @new_ptbl: new part_tbl to install
  870. *
  871. * Replace disk->part_tbl with @new_ptbl in RCU-safe way. The
  872. * original ptbl is freed using RCU callback.
  873. *
  874. * LOCKING:
  875. * Matching bd_mutx locked.
  876. */
  877. static void disk_replace_part_tbl(struct gendisk *disk,
  878. struct disk_part_tbl *new_ptbl)
  879. {
  880. struct disk_part_tbl *old_ptbl = disk->part_tbl;
  881. rcu_assign_pointer(disk->part_tbl, new_ptbl);
  882. if (old_ptbl) {
  883. rcu_assign_pointer(old_ptbl->last_lookup, NULL);
  884. kfree_rcu(old_ptbl, rcu_head);
  885. }
  886. }
  887. /**
  888. * disk_expand_part_tbl - expand disk->part_tbl
  889. * @disk: disk to expand part_tbl for
  890. * @partno: expand such that this partno can fit in
  891. *
  892. * Expand disk->part_tbl such that @partno can fit in. disk->part_tbl
  893. * uses RCU to allow unlocked dereferencing for stats and other stuff.
  894. *
  895. * LOCKING:
  896. * Matching bd_mutex locked, might sleep.
  897. *
  898. * RETURNS:
  899. * 0 on success, -errno on failure.
  900. */
  901. int disk_expand_part_tbl(struct gendisk *disk, int partno)
  902. {
  903. struct disk_part_tbl *old_ptbl = disk->part_tbl;
  904. struct disk_part_tbl *new_ptbl;
  905. int len = old_ptbl ? old_ptbl->len : 0;
  906. int target = partno + 1;
  907. size_t size;
  908. int i;
  909. /* disk_max_parts() is zero during initialization, ignore if so */
  910. if (disk_max_parts(disk) && target > disk_max_parts(disk))
  911. return -EINVAL;
  912. if (target <= len)
  913. return 0;
  914. size = sizeof(*new_ptbl) + target * sizeof(new_ptbl->part[0]);
  915. new_ptbl = kzalloc_node(size, GFP_KERNEL, disk->node_id);
  916. if (!new_ptbl)
  917. return -ENOMEM;
  918. new_ptbl->len = target;
  919. for (i = 0; i < len; i++)
  920. rcu_assign_pointer(new_ptbl->part[i], old_ptbl->part[i]);
  921. disk_replace_part_tbl(disk, new_ptbl);
  922. return 0;
  923. }
  924. static void disk_release(struct device *dev)
  925. {
  926. struct gendisk *disk = dev_to_disk(dev);
  927. disk_release_events(disk);
  928. kfree(disk->random);
  929. disk_replace_part_tbl(disk, NULL);
  930. free_part_stats(&disk->part0);
  931. free_part_info(&disk->part0);
  932. if (disk->queue)
  933. blk_put_queue(disk->queue);
  934. kfree(disk);
  935. }
  936. struct class block_class = {
  937. .name = "block",
  938. };
  939. static char *block_devnode(struct device *dev, umode_t *mode)
  940. {
  941. struct gendisk *disk = dev_to_disk(dev);
  942. if (disk->devnode)
  943. return disk->devnode(disk, mode);
  944. return NULL;
  945. }
  946. static struct device_type disk_type = {
  947. .name = "disk",
  948. .groups = disk_attr_groups,
  949. .release = disk_release,
  950. .devnode = block_devnode,
  951. };
  952. #ifdef CONFIG_PROC_FS
  953. /*
  954. * aggregate disk stat collector. Uses the same stats that the sysfs
  955. * entries do, above, but makes them available through one seq_file.
  956. *
  957. * The output looks suspiciously like /proc/partitions with a bunch of
  958. * extra fields.
  959. */
  960. static int diskstats_show(struct seq_file *seqf, void *v)
  961. {
  962. struct gendisk *gp = v;
  963. struct disk_part_iter piter;
  964. struct hd_struct *hd;
  965. char buf[BDEVNAME_SIZE];
  966. int cpu;
  967. /*
  968. if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next)
  969. seq_puts(seqf, "major minor name"
  970. " rio rmerge rsect ruse wio wmerge "
  971. "wsect wuse running use aveq"
  972. "\n\n");
  973. */
  974. disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0);
  975. while ((hd = disk_part_iter_next(&piter))) {
  976. cpu = part_stat_lock();
  977. part_round_stats(cpu, hd);
  978. part_stat_unlock();
  979. seq_printf(seqf, "%4d %7d %s %lu %lu %lu "
  980. "%u %lu %lu %lu %u %u %u %u\n",
  981. MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
  982. disk_name(gp, hd->partno, buf),
  983. part_stat_read(hd, ios[READ]),
  984. part_stat_read(hd, merges[READ]),
  985. part_stat_read(hd, sectors[READ]),
  986. jiffies_to_msecs(part_stat_read(hd, ticks[READ])),
  987. part_stat_read(hd, ios[WRITE]),
  988. part_stat_read(hd, merges[WRITE]),
  989. part_stat_read(hd, sectors[WRITE]),
  990. jiffies_to_msecs(part_stat_read(hd, ticks[WRITE])),
  991. part_in_flight(hd),
  992. jiffies_to_msecs(part_stat_read(hd, io_ticks)),
  993. jiffies_to_msecs(part_stat_read(hd, time_in_queue))
  994. );
  995. }
  996. disk_part_iter_exit(&piter);
  997. return 0;
  998. }
  999. static const struct seq_operations diskstats_op = {
  1000. .start = disk_seqf_start,
  1001. .next = disk_seqf_next,
  1002. .stop = disk_seqf_stop,
  1003. .show = diskstats_show
  1004. };
  1005. static int diskstats_open(struct inode *inode, struct file *file)
  1006. {
  1007. return seq_open(file, &diskstats_op);
  1008. }
  1009. static const struct file_operations proc_diskstats_operations = {
  1010. .open = diskstats_open,
  1011. .read = seq_read,
  1012. .llseek = seq_lseek,
  1013. .release = seq_release,
  1014. };
  1015. static int __init proc_genhd_init(void)
  1016. {
  1017. proc_create("diskstats", 0, NULL, &proc_diskstats_operations);
  1018. proc_create("partitions", 0, NULL, &proc_partitions_operations);
  1019. return 0;
  1020. }
  1021. module_init(proc_genhd_init);
  1022. #endif /* CONFIG_PROC_FS */
  1023. dev_t blk_lookup_devt(const char *name, int partno)
  1024. {
  1025. dev_t devt = MKDEV(0, 0);
  1026. struct class_dev_iter iter;
  1027. struct device *dev;
  1028. class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
  1029. while ((dev = class_dev_iter_next(&iter))) {
  1030. struct gendisk *disk = dev_to_disk(dev);
  1031. struct hd_struct *part;
  1032. if (strcmp(dev_name(dev), name))
  1033. continue;
  1034. if (partno < disk->minors) {
  1035. /* We need to return the right devno, even
  1036. * if the partition doesn't exist yet.
  1037. */
  1038. devt = MKDEV(MAJOR(dev->devt),
  1039. MINOR(dev->devt) + partno);
  1040. break;
  1041. }
  1042. part = disk_get_part(disk, partno);
  1043. if (part) {
  1044. devt = part_devt(part);
  1045. disk_put_part(part);
  1046. break;
  1047. }
  1048. disk_put_part(part);
  1049. }
  1050. class_dev_iter_exit(&iter);
  1051. return devt;
  1052. }
  1053. EXPORT_SYMBOL(blk_lookup_devt);
  1054. struct gendisk *alloc_disk(int minors)
  1055. {
  1056. return alloc_disk_node(minors, -1);
  1057. }
  1058. EXPORT_SYMBOL(alloc_disk);
  1059. struct gendisk *alloc_disk_node(int minors, int node_id)
  1060. {
  1061. struct gendisk *disk;
  1062. disk = kmalloc_node(sizeof(struct gendisk),
  1063. GFP_KERNEL | __GFP_ZERO, node_id);
  1064. if (disk) {
  1065. if (!init_part_stats(&disk->part0)) {
  1066. kfree(disk);
  1067. return NULL;
  1068. }
  1069. disk->node_id = node_id;
  1070. if (disk_expand_part_tbl(disk, 0)) {
  1071. free_part_stats(&disk->part0);
  1072. kfree(disk);
  1073. return NULL;
  1074. }
  1075. disk->part_tbl->part[0] = &disk->part0;
  1076. hd_ref_init(&disk->part0);
  1077. disk->minors = minors;
  1078. rand_initialize_disk(disk);
  1079. disk_to_dev(disk)->class = &block_class;
  1080. disk_to_dev(disk)->type = &disk_type;
  1081. device_initialize(disk_to_dev(disk));
  1082. }
  1083. return disk;
  1084. }
  1085. EXPORT_SYMBOL(alloc_disk_node);
  1086. struct kobject *get_disk(struct gendisk *disk)
  1087. {
  1088. struct module *owner;
  1089. struct kobject *kobj;
  1090. if (!disk->fops)
  1091. return NULL;
  1092. owner = disk->fops->owner;
  1093. if (owner && !try_module_get(owner))
  1094. return NULL;
  1095. kobj = kobject_get(&disk_to_dev(disk)->kobj);
  1096. if (kobj == NULL) {
  1097. module_put(owner);
  1098. return NULL;
  1099. }
  1100. return kobj;
  1101. }
  1102. EXPORT_SYMBOL(get_disk);
  1103. void put_disk(struct gendisk *disk)
  1104. {
  1105. if (disk)
  1106. kobject_put(&disk_to_dev(disk)->kobj);
  1107. }
  1108. EXPORT_SYMBOL(put_disk);
  1109. static void set_disk_ro_uevent(struct gendisk *gd, int ro)
  1110. {
  1111. char event[] = "DISK_RO=1";
  1112. char *envp[] = { event, NULL };
  1113. if (!ro)
  1114. event[8] = '0';
  1115. kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp);
  1116. }
  1117. void set_device_ro(struct block_device *bdev, int flag)
  1118. {
  1119. bdev->bd_part->policy = flag;
  1120. }
  1121. EXPORT_SYMBOL(set_device_ro);
  1122. void set_disk_ro(struct gendisk *disk, int flag)
  1123. {
  1124. struct disk_part_iter piter;
  1125. struct hd_struct *part;
  1126. if (disk->part0.policy != flag) {
  1127. set_disk_ro_uevent(disk, flag);
  1128. disk->part0.policy = flag;
  1129. }
  1130. disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY);
  1131. while ((part = disk_part_iter_next(&piter)))
  1132. part->policy = flag;
  1133. disk_part_iter_exit(&piter);
  1134. }
  1135. EXPORT_SYMBOL(set_disk_ro);
  1136. int bdev_read_only(struct block_device *bdev)
  1137. {
  1138. if (!bdev)
  1139. return 0;
  1140. return bdev->bd_part->policy;
  1141. }
  1142. EXPORT_SYMBOL(bdev_read_only);
  1143. int invalidate_partition(struct gendisk *disk, int partno)
  1144. {
  1145. int res = 0;
  1146. struct block_device *bdev = bdget_disk(disk, partno);
  1147. if (bdev) {
  1148. fsync_bdev(bdev);
  1149. res = __invalidate_device(bdev, true);
  1150. bdput(bdev);
  1151. }
  1152. return res;
  1153. }
  1154. EXPORT_SYMBOL(invalidate_partition);
  1155. /*
  1156. * Disk events - monitor disk events like media change and eject request.
  1157. */
  1158. struct disk_events {
  1159. struct list_head node; /* all disk_event's */
  1160. struct gendisk *disk; /* the associated disk */
  1161. spinlock_t lock;
  1162. struct mutex block_mutex; /* protects blocking */
  1163. int block; /* event blocking depth */
  1164. unsigned int pending; /* events already sent out */
  1165. unsigned int clearing; /* events being cleared */
  1166. long poll_msecs; /* interval, -1 for default */
  1167. struct delayed_work dwork;
  1168. };
  1169. static const char *disk_events_strs[] = {
  1170. [ilog2(DISK_EVENT_MEDIA_CHANGE)] = "media_change",
  1171. [ilog2(DISK_EVENT_EJECT_REQUEST)] = "eject_request",
  1172. };
  1173. static char *disk_uevents[] = {
  1174. [ilog2(DISK_EVENT_MEDIA_CHANGE)] = "DISK_MEDIA_CHANGE=1",
  1175. [ilog2(DISK_EVENT_EJECT_REQUEST)] = "DISK_EJECT_REQUEST=1",
  1176. };
  1177. /* list of all disk_events */
  1178. static DEFINE_MUTEX(disk_events_mutex);
  1179. static LIST_HEAD(disk_events);
  1180. /* disable in-kernel polling by default */
  1181. static unsigned long disk_events_dfl_poll_msecs = 0;
  1182. static unsigned long disk_events_poll_jiffies(struct gendisk *disk)
  1183. {
  1184. struct disk_events *ev = disk->ev;
  1185. long intv_msecs = 0;
  1186. /*
  1187. * If device-specific poll interval is set, always use it. If
  1188. * the default is being used, poll iff there are events which
  1189. * can't be monitored asynchronously.
  1190. */
  1191. if (ev->poll_msecs >= 0)
  1192. intv_msecs = ev->poll_msecs;
  1193. else if (disk->events & ~disk->async_events)
  1194. intv_msecs = disk_events_dfl_poll_msecs;
  1195. return msecs_to_jiffies(intv_msecs);
  1196. }
  1197. /**
  1198. * disk_block_events - block and flush disk event checking
  1199. * @disk: disk to block events for
  1200. *
  1201. * On return from this function, it is guaranteed that event checking
  1202. * isn't in progress and won't happen until unblocked by
  1203. * disk_unblock_events(). Events blocking is counted and the actual
  1204. * unblocking happens after the matching number of unblocks are done.
  1205. *
  1206. * Note that this intentionally does not block event checking from
  1207. * disk_clear_events().
  1208. *
  1209. * CONTEXT:
  1210. * Might sleep.
  1211. */
  1212. void disk_block_events(struct gendisk *disk)
  1213. {
  1214. struct disk_events *ev = disk->ev;
  1215. unsigned long flags;
  1216. bool cancel;
  1217. if (!ev)
  1218. return;
  1219. /*
  1220. * Outer mutex ensures that the first blocker completes canceling
  1221. * the event work before further blockers are allowed to finish.
  1222. */
  1223. mutex_lock(&ev->block_mutex);
  1224. spin_lock_irqsave(&ev->lock, flags);
  1225. cancel = !ev->block++;
  1226. spin_unlock_irqrestore(&ev->lock, flags);
  1227. if (cancel)
  1228. cancel_delayed_work_sync(&disk->ev->dwork);
  1229. mutex_unlock(&ev->block_mutex);
  1230. }
  1231. static void __disk_unblock_events(struct gendisk *disk, bool check_now)
  1232. {
  1233. struct disk_events *ev = disk->ev;
  1234. unsigned long intv;
  1235. unsigned long flags;
  1236. spin_lock_irqsave(&ev->lock, flags);
  1237. if (WARN_ON_ONCE(ev->block <= 0))
  1238. goto out_unlock;
  1239. if (--ev->block)
  1240. goto out_unlock;
  1241. /*
  1242. * Not exactly a latency critical operation, set poll timer
  1243. * slack to 25% and kick event check.
  1244. */
  1245. intv = disk_events_poll_jiffies(disk);
  1246. set_timer_slack(&ev->dwork.timer, intv / 4);
  1247. if (check_now)
  1248. queue_delayed_work(system_nrt_wq, &ev->dwork, 0);
  1249. else if (intv)
  1250. queue_delayed_work(system_nrt_wq, &ev->dwork, intv);
  1251. out_unlock:
  1252. spin_unlock_irqrestore(&ev->lock, flags);
  1253. }
  1254. /**
  1255. * disk_unblock_events - unblock disk event checking
  1256. * @disk: disk to unblock events for
  1257. *
  1258. * Undo disk_block_events(). When the block count reaches zero, it
  1259. * starts events polling if configured.
  1260. *
  1261. * CONTEXT:
  1262. * Don't care. Safe to call from irq context.
  1263. */
  1264. void disk_unblock_events(struct gendisk *disk)
  1265. {
  1266. if (disk->ev)
  1267. __disk_unblock_events(disk, false);
  1268. }
  1269. /**
  1270. * disk_flush_events - schedule immediate event checking and flushing
  1271. * @disk: disk to check and flush events for
  1272. * @mask: events to flush
  1273. *
  1274. * Schedule immediate event checking on @disk if not blocked. Events in
  1275. * @mask are scheduled to be cleared from the driver. Note that this
  1276. * doesn't clear the events from @disk->ev.
  1277. *
  1278. * CONTEXT:
  1279. * If @mask is non-zero must be called with bdev->bd_mutex held.
  1280. */
  1281. void disk_flush_events(struct gendisk *disk, unsigned int mask)
  1282. {
  1283. struct disk_events *ev = disk->ev;
  1284. if (!ev)
  1285. return;
  1286. spin_lock_irq(&ev->lock);
  1287. ev->clearing |= mask;
  1288. if (!ev->block) {
  1289. cancel_delayed_work(&ev->dwork);
  1290. queue_delayed_work(system_nrt_wq, &ev->dwork, 0);
  1291. }
  1292. spin_unlock_irq(&ev->lock);
  1293. }
  1294. /**
  1295. * disk_clear_events - synchronously check, clear and return pending events
  1296. * @disk: disk to fetch and clear events from
  1297. * @mask: mask of events to be fetched and clearted
  1298. *
  1299. * Disk events are synchronously checked and pending events in @mask
  1300. * are cleared and returned. This ignores the block count.
  1301. *
  1302. * CONTEXT:
  1303. * Might sleep.
  1304. */
  1305. unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask)
  1306. {
  1307. const struct block_device_operations *bdops = disk->fops;
  1308. struct disk_events *ev = disk->ev;
  1309. unsigned int pending;
  1310. if (!ev) {
  1311. /* for drivers still using the old ->media_changed method */
  1312. if ((mask & DISK_EVENT_MEDIA_CHANGE) &&
  1313. bdops->media_changed && bdops->media_changed(disk))
  1314. return DISK_EVENT_MEDIA_CHANGE;
  1315. return 0;
  1316. }
  1317. /* tell the workfn about the events being cleared */
  1318. spin_lock_irq(&ev->lock);
  1319. ev->clearing |= mask;
  1320. spin_unlock_irq(&ev->lock);
  1321. /* uncondtionally schedule event check and wait for it to finish */
  1322. disk_block_events(disk);
  1323. queue_delayed_work(system_nrt_wq, &ev->dwork, 0);
  1324. flush_delayed_work(&ev->dwork);
  1325. __disk_unblock_events(disk, false);
  1326. /* then, fetch and clear pending events */
  1327. spin_lock_irq(&ev->lock);
  1328. WARN_ON_ONCE(ev->clearing & mask); /* cleared by workfn */
  1329. pending = ev->pending & mask;
  1330. ev->pending &= ~mask;
  1331. spin_unlock_irq(&ev->lock);
  1332. return pending;
  1333. }
  1334. static void disk_events_workfn(struct work_struct *work)
  1335. {
  1336. struct delayed_work *dwork = to_delayed_work(work);
  1337. struct disk_events *ev = container_of(dwork, struct disk_events, dwork);
  1338. struct gendisk *disk = ev->disk;
  1339. char *envp[ARRAY_SIZE(disk_uevents) + 1] = { };
  1340. unsigned int clearing = ev->clearing;
  1341. unsigned int events;
  1342. unsigned long intv;
  1343. int nr_events = 0, i;
  1344. /* check events */
  1345. events = disk->fops->check_events(disk, clearing);
  1346. /* accumulate pending events and schedule next poll if necessary */
  1347. spin_lock_irq(&ev->lock);
  1348. events &= ~ev->pending;
  1349. ev->pending |= events;
  1350. ev->clearing &= ~clearing;
  1351. intv = disk_events_poll_jiffies(disk);
  1352. if (!ev->block && intv)
  1353. queue_delayed_work(system_nrt_wq, &ev->dwork, intv);
  1354. spin_unlock_irq(&ev->lock);
  1355. /*
  1356. * Tell userland about new events. Only the events listed in
  1357. * @disk->events are reported. Unlisted events are processed the
  1358. * same internally but never get reported to userland.
  1359. */
  1360. for (i = 0; i < ARRAY_SIZE(disk_uevents); i++)
  1361. if (events & disk->events & (1 << i))
  1362. envp[nr_events++] = disk_uevents[i];
  1363. if (nr_events)
  1364. kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp);
  1365. }
  1366. /*
  1367. * A disk events enabled device has the following sysfs nodes under
  1368. * its /sys/block/X/ directory.
  1369. *
  1370. * events : list of all supported events
  1371. * events_async : list of events which can be detected w/o polling
  1372. * events_poll_msecs : polling interval, 0: disable, -1: system default
  1373. */
  1374. static ssize_t __disk_events_show(unsigned int events, char *buf)
  1375. {
  1376. const char *delim = "";
  1377. ssize_t pos = 0;
  1378. int i;
  1379. for (i = 0; i < ARRAY_SIZE(disk_events_strs); i++)
  1380. if (events & (1 << i)) {
  1381. pos += sprintf(buf + pos, "%s%s",
  1382. delim, disk_events_strs[i]);
  1383. delim = " ";
  1384. }
  1385. if (pos)
  1386. pos += sprintf(buf + pos, "\n");
  1387. return pos;
  1388. }
  1389. static ssize_t disk_events_show(struct device *dev,
  1390. struct device_attribute *attr, char *buf)
  1391. {
  1392. struct gendisk *disk = dev_to_disk(dev);
  1393. return __disk_events_show(disk->events, buf);
  1394. }
  1395. static ssize_t disk_events_async_show(struct device *dev,
  1396. struct device_attribute *attr, char *buf)
  1397. {
  1398. struct gendisk *disk = dev_to_disk(dev);
  1399. return __disk_events_show(disk->async_events, buf);
  1400. }
  1401. static ssize_t disk_events_poll_msecs_show(struct device *dev,
  1402. struct device_attribute *attr,
  1403. char *buf)
  1404. {
  1405. struct gendisk *disk = dev_to_disk(dev);
  1406. return sprintf(buf, "%ld\n", disk->ev->poll_msecs);
  1407. }
  1408. static ssize_t disk_events_poll_msecs_store(struct device *dev,
  1409. struct device_attribute *attr,
  1410. const char *buf, size_t count)
  1411. {
  1412. struct gendisk *disk = dev_to_disk(dev);
  1413. long intv;
  1414. if (!count || !sscanf(buf, "%ld", &intv))
  1415. return -EINVAL;
  1416. if (intv < 0 && intv != -1)
  1417. return -EINVAL;
  1418. disk_block_events(disk);
  1419. disk->ev->poll_msecs = intv;
  1420. __disk_unblock_events(disk, true);
  1421. return count;
  1422. }
  1423. static const DEVICE_ATTR(events, S_IRUGO, disk_events_show, NULL);
  1424. static const DEVICE_ATTR(events_async, S_IRUGO, disk_events_async_show, NULL);
  1425. static const DEVICE_ATTR(events_poll_msecs, S_IRUGO|S_IWUSR,
  1426. disk_events_poll_msecs_show,
  1427. disk_events_poll_msecs_store);
  1428. static const struct attribute *disk_events_attrs[] = {
  1429. &dev_attr_events.attr,
  1430. &dev_attr_events_async.attr,
  1431. &dev_attr_events_poll_msecs.attr,
  1432. NULL,
  1433. };
  1434. /*
  1435. * The default polling interval can be specified by the kernel
  1436. * parameter block.events_dfl_poll_msecs which defaults to 0
  1437. * (disable). This can also be modified runtime by writing to
  1438. * /sys/module/block/events_dfl_poll_msecs.
  1439. */
  1440. static int disk_events_set_dfl_poll_msecs(const char *val,
  1441. const struct kernel_param *kp)
  1442. {
  1443. struct disk_events *ev;
  1444. int ret;
  1445. ret = param_set_ulong(val, kp);
  1446. if (ret < 0)
  1447. return ret;
  1448. mutex_lock(&disk_events_mutex);
  1449. list_for_each_entry(ev, &disk_events, node)
  1450. disk_flush_events(ev->disk, 0);
  1451. mutex_unlock(&disk_events_mutex);
  1452. return 0;
  1453. }
  1454. static const struct kernel_param_ops disk_events_dfl_poll_msecs_param_ops = {
  1455. .set = disk_events_set_dfl_poll_msecs,
  1456. .get = param_get_ulong,
  1457. };
  1458. #undef MODULE_PARAM_PREFIX
  1459. #define MODULE_PARAM_PREFIX "block."
  1460. module_param_cb(events_dfl_poll_msecs, &disk_events_dfl_poll_msecs_param_ops,
  1461. &disk_events_dfl_poll_msecs, 0644);
  1462. /*
  1463. * disk_{add|del|release}_events - initialize and destroy disk_events.
  1464. */
  1465. static void disk_add_events(struct gendisk *disk)
  1466. {
  1467. struct disk_events *ev;
  1468. if (!disk->fops->check_events)
  1469. return;
  1470. ev = kzalloc(sizeof(*ev), GFP_KERNEL);
  1471. if (!ev) {
  1472. pr_warn("%s: failed to initialize events\n", disk->disk_name);
  1473. return;
  1474. }
  1475. if (sysfs_create_files(&disk_to_dev(disk)->kobj,
  1476. disk_events_attrs) < 0) {
  1477. pr_warn("%s: failed to create sysfs files for events\n",
  1478. disk->disk_name);
  1479. kfree(ev);
  1480. return;
  1481. }
  1482. disk->ev = ev;
  1483. INIT_LIST_HEAD(&ev->node);
  1484. ev->disk = disk;
  1485. spin_lock_init(&ev->lock);
  1486. mutex_init(&ev->block_mutex);
  1487. ev->block = 1;
  1488. ev->poll_msecs = -1;
  1489. INIT_DELAYED_WORK(&ev->dwork, disk_events_workfn);
  1490. mutex_lock(&disk_events_mutex);
  1491. list_add_tail(&ev->node, &disk_events);
  1492. mutex_unlock(&disk_events_mutex);
  1493. /*
  1494. * Block count is initialized to 1 and the following initial
  1495. * unblock kicks it into action.
  1496. */
  1497. __disk_unblock_events(disk, true);
  1498. }
  1499. static void disk_del_events(struct gendisk *disk)
  1500. {
  1501. if (!disk->ev)
  1502. return;
  1503. disk_block_events(disk);
  1504. mutex_lock(&disk_events_mutex);
  1505. list_del_init(&disk->ev->node);
  1506. mutex_unlock(&disk_events_mutex);
  1507. sysfs_remove_files(&disk_to_dev(disk)->kobj, disk_events_attrs);
  1508. }
  1509. static void disk_release_events(struct gendisk *disk)
  1510. {
  1511. /* the block count should be 1 from disk_del_events() */
  1512. WARN_ON_ONCE(disk->ev && disk->ev->block != 1);
  1513. kfree(disk->ev);
  1514. }