inotify.c 26 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057
  1. /*
  2. * fs/inotify.c - inode-based file event notifications
  3. *
  4. * Authors:
  5. * John McCutchan <ttb@tentacle.dhs.org>
  6. * Robert Love <rml@novell.com>
  7. *
  8. * Copyright (C) 2005 John McCutchan
  9. *
  10. * This program is free software; you can redistribute it and/or modify it
  11. * under the terms of the GNU General Public License as published by the
  12. * Free Software Foundation; either version 2, or (at your option) any
  13. * later version.
  14. *
  15. * This program is distributed in the hope that it will be useful, but
  16. * WITHOUT ANY WARRANTY; without even the implied warranty of
  17. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  18. * General Public License for more details.
  19. */
  20. #include <linux/module.h>
  21. #include <linux/kernel.h>
  22. #include <linux/sched.h>
  23. #include <linux/spinlock.h>
  24. #include <linux/idr.h>
  25. #include <linux/slab.h>
  26. #include <linux/fs.h>
  27. #include <linux/file.h>
  28. #include <linux/mount.h>
  29. #include <linux/namei.h>
  30. #include <linux/poll.h>
  31. #include <linux/init.h>
  32. #include <linux/list.h>
  33. #include <linux/writeback.h>
  34. #include <linux/inotify.h>
  35. #include <asm/ioctls.h>
  36. static atomic_t inotify_cookie;
  37. static kmem_cache_t *watch_cachep;
  38. static kmem_cache_t *event_cachep;
  39. static struct vfsmount *inotify_mnt;
  40. /* these are configurable via /proc/sys/fs/inotify/ */
  41. int inotify_max_user_instances;
  42. int inotify_max_user_watches;
  43. int inotify_max_queued_events;
  44. /*
  45. * Lock ordering:
  46. *
  47. * dentry->d_lock (used to keep d_move() away from dentry->d_parent)
  48. * iprune_sem (synchronize shrink_icache_memory())
  49. * inode_lock (protects the super_block->s_inodes list)
  50. * inode->inotify_sem (protects inode->inotify_watches and watches->i_list)
  51. * inotify_dev->sem (protects inotify_device and watches->d_list)
  52. */
  53. /*
  54. * Lifetimes of the three main data structures--inotify_device, inode, and
  55. * inotify_watch--are managed by reference count.
  56. *
  57. * inotify_device: Lifetime is from inotify_init() until release. Additional
  58. * references can bump the count via get_inotify_dev() and drop the count via
  59. * put_inotify_dev().
  60. *
  61. * inotify_watch: Lifetime is from create_watch() to destory_watch().
  62. * Additional references can bump the count via get_inotify_watch() and drop
  63. * the count via put_inotify_watch().
  64. *
  65. * inode: Pinned so long as the inode is associated with a watch, from
  66. * create_watch() to put_inotify_watch().
  67. */
  68. /*
  69. * struct inotify_device - represents an inotify instance
  70. *
  71. * This structure is protected by the semaphore 'sem'.
  72. */
  73. struct inotify_device {
  74. wait_queue_head_t wq; /* wait queue for i/o */
  75. struct idr idr; /* idr mapping wd -> watch */
  76. struct semaphore sem; /* protects this bad boy */
  77. struct list_head events; /* list of queued events */
  78. struct list_head watches; /* list of watches */
  79. atomic_t count; /* reference count */
  80. struct user_struct *user; /* user who opened this dev */
  81. unsigned int queue_size; /* size of the queue (bytes) */
  82. unsigned int event_count; /* number of pending events */
  83. unsigned int max_events; /* maximum number of events */
  84. u32 last_wd; /* the last wd allocated */
  85. };
  86. /*
  87. * struct inotify_kernel_event - An inotify event, originating from a watch and
  88. * queued for user-space. A list of these is attached to each instance of the
  89. * device. In read(), this list is walked and all events that can fit in the
  90. * buffer are returned.
  91. *
  92. * Protected by dev->sem of the device in which we are queued.
  93. */
  94. struct inotify_kernel_event {
  95. struct inotify_event event; /* the user-space event */
  96. struct list_head list; /* entry in inotify_device's list */
  97. char *name; /* filename, if any */
  98. };
  99. /*
  100. * struct inotify_watch - represents a watch request on a specific inode
  101. *
  102. * d_list is protected by dev->sem of the associated watch->dev.
  103. * i_list and mask are protected by inode->inotify_sem of the associated inode.
  104. * dev, inode, and wd are never written to once the watch is created.
  105. */
  106. struct inotify_watch {
  107. struct list_head d_list; /* entry in inotify_device's list */
  108. struct list_head i_list; /* entry in inode's list */
  109. atomic_t count; /* reference count */
  110. struct inotify_device *dev; /* associated device */
  111. struct inode *inode; /* associated inode */
  112. s32 wd; /* watch descriptor */
  113. u32 mask; /* event mask for this watch */
  114. };
  115. #ifdef CONFIG_SYSCTL
  116. #include <linux/sysctl.h>
  117. static int zero;
  118. ctl_table inotify_table[] = {
  119. {
  120. .ctl_name = INOTIFY_MAX_USER_INSTANCES,
  121. .procname = "max_user_instances",
  122. .data = &inotify_max_user_instances,
  123. .maxlen = sizeof(int),
  124. .mode = 0644,
  125. .proc_handler = &proc_dointvec_minmax,
  126. .strategy = &sysctl_intvec,
  127. .extra1 = &zero,
  128. },
  129. {
  130. .ctl_name = INOTIFY_MAX_USER_WATCHES,
  131. .procname = "max_user_watches",
  132. .data = &inotify_max_user_watches,
  133. .maxlen = sizeof(int),
  134. .mode = 0644,
  135. .proc_handler = &proc_dointvec_minmax,
  136. .strategy = &sysctl_intvec,
  137. .extra1 = &zero,
  138. },
  139. {
  140. .ctl_name = INOTIFY_MAX_QUEUED_EVENTS,
  141. .procname = "max_queued_events",
  142. .data = &inotify_max_queued_events,
  143. .maxlen = sizeof(int),
  144. .mode = 0644,
  145. .proc_handler = &proc_dointvec_minmax,
  146. .strategy = &sysctl_intvec,
  147. .extra1 = &zero
  148. },
  149. { .ctl_name = 0 }
  150. };
  151. #endif /* CONFIG_SYSCTL */
  152. static inline void get_inotify_dev(struct inotify_device *dev)
  153. {
  154. atomic_inc(&dev->count);
  155. }
  156. static inline void put_inotify_dev(struct inotify_device *dev)
  157. {
  158. if (atomic_dec_and_test(&dev->count)) {
  159. atomic_dec(&dev->user->inotify_devs);
  160. free_uid(dev->user);
  161. kfree(dev);
  162. }
  163. }
  164. static inline void get_inotify_watch(struct inotify_watch *watch)
  165. {
  166. atomic_inc(&watch->count);
  167. }
  168. /*
  169. * put_inotify_watch - decrements the ref count on a given watch. cleans up
  170. * the watch and its references if the count reaches zero.
  171. */
  172. static inline void put_inotify_watch(struct inotify_watch *watch)
  173. {
  174. if (atomic_dec_and_test(&watch->count)) {
  175. put_inotify_dev(watch->dev);
  176. iput(watch->inode);
  177. kmem_cache_free(watch_cachep, watch);
  178. }
  179. }
  180. /*
  181. * kernel_event - create a new kernel event with the given parameters
  182. *
  183. * This function can sleep.
  184. */
  185. static struct inotify_kernel_event * kernel_event(s32 wd, u32 mask, u32 cookie,
  186. const char *name)
  187. {
  188. struct inotify_kernel_event *kevent;
  189. kevent = kmem_cache_alloc(event_cachep, GFP_KERNEL);
  190. if (unlikely(!kevent))
  191. return NULL;
  192. /* we hand this out to user-space, so zero it just in case */
  193. memset(&kevent->event, 0, sizeof(struct inotify_event));
  194. kevent->event.wd = wd;
  195. kevent->event.mask = mask;
  196. kevent->event.cookie = cookie;
  197. INIT_LIST_HEAD(&kevent->list);
  198. if (name) {
  199. size_t len, rem, event_size = sizeof(struct inotify_event);
  200. /*
  201. * We need to pad the filename so as to properly align an
  202. * array of inotify_event structures. Because the structure is
  203. * small and the common case is a small filename, we just round
  204. * up to the next multiple of the structure's sizeof. This is
  205. * simple and safe for all architectures.
  206. */
  207. len = strlen(name) + 1;
  208. rem = event_size - len;
  209. if (len > event_size) {
  210. rem = event_size - (len % event_size);
  211. if (len % event_size == 0)
  212. rem = 0;
  213. }
  214. kevent->name = kmalloc(len + rem, GFP_KERNEL);
  215. if (unlikely(!kevent->name)) {
  216. kmem_cache_free(event_cachep, kevent);
  217. return NULL;
  218. }
  219. memcpy(kevent->name, name, len);
  220. if (rem)
  221. memset(kevent->name + len, 0, rem);
  222. kevent->event.len = len + rem;
  223. } else {
  224. kevent->event.len = 0;
  225. kevent->name = NULL;
  226. }
  227. return kevent;
  228. }
  229. /*
  230. * inotify_dev_get_event - return the next event in the given dev's queue
  231. *
  232. * Caller must hold dev->sem.
  233. */
  234. static inline struct inotify_kernel_event *
  235. inotify_dev_get_event(struct inotify_device *dev)
  236. {
  237. return list_entry(dev->events.next, struct inotify_kernel_event, list);
  238. }
  239. /*
  240. * inotify_dev_queue_event - add a new event to the given device
  241. *
  242. * Caller must hold dev->sem. Can sleep (calls kernel_event()).
  243. */
  244. static void inotify_dev_queue_event(struct inotify_device *dev,
  245. struct inotify_watch *watch, u32 mask,
  246. u32 cookie, const char *name)
  247. {
  248. struct inotify_kernel_event *kevent, *last;
  249. /* coalescing: drop this event if it is a dupe of the previous */
  250. last = inotify_dev_get_event(dev);
  251. if (last && last->event.mask == mask && last->event.wd == watch->wd &&
  252. last->event.cookie == cookie) {
  253. const char *lastname = last->name;
  254. if (!name && !lastname)
  255. return;
  256. if (name && lastname && !strcmp(lastname, name))
  257. return;
  258. }
  259. /* the queue overflowed and we already sent the Q_OVERFLOW event */
  260. if (unlikely(dev->event_count > dev->max_events))
  261. return;
  262. /* if the queue overflows, we need to notify user space */
  263. if (unlikely(dev->event_count == dev->max_events))
  264. kevent = kernel_event(-1, IN_Q_OVERFLOW, cookie, NULL);
  265. else
  266. kevent = kernel_event(watch->wd, mask, cookie, name);
  267. if (unlikely(!kevent))
  268. return;
  269. /* queue the event and wake up anyone waiting */
  270. dev->event_count++;
  271. dev->queue_size += sizeof(struct inotify_event) + kevent->event.len;
  272. list_add_tail(&kevent->list, &dev->events);
  273. wake_up_interruptible(&dev->wq);
  274. }
  275. /*
  276. * remove_kevent - cleans up and ultimately frees the given kevent
  277. *
  278. * Caller must hold dev->sem.
  279. */
  280. static void remove_kevent(struct inotify_device *dev,
  281. struct inotify_kernel_event *kevent)
  282. {
  283. list_del(&kevent->list);
  284. dev->event_count--;
  285. dev->queue_size -= sizeof(struct inotify_event) + kevent->event.len;
  286. kfree(kevent->name);
  287. kmem_cache_free(event_cachep, kevent);
  288. }
  289. /*
  290. * inotify_dev_event_dequeue - destroy an event on the given device
  291. *
  292. * Caller must hold dev->sem.
  293. */
  294. static void inotify_dev_event_dequeue(struct inotify_device *dev)
  295. {
  296. if (!list_empty(&dev->events)) {
  297. struct inotify_kernel_event *kevent;
  298. kevent = inotify_dev_get_event(dev);
  299. remove_kevent(dev, kevent);
  300. }
  301. }
  302. /*
  303. * inotify_dev_get_wd - returns the next WD for use by the given dev
  304. *
  305. * Callers must hold dev->sem. This function can sleep.
  306. */
  307. static int inotify_dev_get_wd(struct inotify_device *dev,
  308. struct inotify_watch *watch)
  309. {
  310. int ret;
  311. do {
  312. if (unlikely(!idr_pre_get(&dev->idr, GFP_KERNEL)))
  313. return -ENOSPC;
  314. ret = idr_get_new_above(&dev->idr, watch, dev->last_wd+1, &watch->wd);
  315. } while (ret == -EAGAIN);
  316. return ret;
  317. }
  318. /*
  319. * find_inode - resolve a user-given path to a specific inode and return a nd
  320. */
  321. static int find_inode(const char __user *dirname, struct nameidata *nd)
  322. {
  323. int error;
  324. error = __user_walk(dirname, LOOKUP_FOLLOW, nd);
  325. if (error)
  326. return error;
  327. /* you can only watch an inode if you have read permissions on it */
  328. error = permission(nd->dentry->d_inode, MAY_READ, NULL);
  329. if (error)
  330. path_release(nd);
  331. return error;
  332. }
  333. /*
  334. * create_watch - creates a watch on the given device.
  335. *
  336. * Callers must hold dev->sem. Calls inotify_dev_get_wd() so may sleep.
  337. * Both 'dev' and 'inode' (by way of nameidata) need to be pinned.
  338. */
  339. static struct inotify_watch *create_watch(struct inotify_device *dev,
  340. u32 mask, struct inode *inode)
  341. {
  342. struct inotify_watch *watch;
  343. int ret;
  344. if (atomic_read(&dev->user->inotify_watches) >=
  345. inotify_max_user_watches)
  346. return ERR_PTR(-ENOSPC);
  347. watch = kmem_cache_alloc(watch_cachep, GFP_KERNEL);
  348. if (unlikely(!watch))
  349. return ERR_PTR(-ENOMEM);
  350. ret = inotify_dev_get_wd(dev, watch);
  351. if (unlikely(ret)) {
  352. kmem_cache_free(watch_cachep, watch);
  353. return ERR_PTR(ret);
  354. }
  355. dev->last_wd = watch->wd;
  356. watch->mask = mask;
  357. atomic_set(&watch->count, 0);
  358. INIT_LIST_HEAD(&watch->d_list);
  359. INIT_LIST_HEAD(&watch->i_list);
  360. /* save a reference to device and bump the count to make it official */
  361. get_inotify_dev(dev);
  362. watch->dev = dev;
  363. /*
  364. * Save a reference to the inode and bump the ref count to make it
  365. * official. We hold a reference to nameidata, which makes this safe.
  366. */
  367. watch->inode = igrab(inode);
  368. /* bump our own count, corresponding to our entry in dev->watches */
  369. get_inotify_watch(watch);
  370. atomic_inc(&dev->user->inotify_watches);
  371. return watch;
  372. }
  373. /*
  374. * inotify_find_dev - find the watch associated with the given inode and dev
  375. *
  376. * Callers must hold inode->inotify_sem.
  377. */
  378. static struct inotify_watch *inode_find_dev(struct inode *inode,
  379. struct inotify_device *dev)
  380. {
  381. struct inotify_watch *watch;
  382. list_for_each_entry(watch, &inode->inotify_watches, i_list) {
  383. if (watch->dev == dev)
  384. return watch;
  385. }
  386. return NULL;
  387. }
  388. /*
  389. * remove_watch_no_event - remove_watch() without the IN_IGNORED event.
  390. */
  391. static void remove_watch_no_event(struct inotify_watch *watch,
  392. struct inotify_device *dev)
  393. {
  394. list_del(&watch->i_list);
  395. list_del(&watch->d_list);
  396. atomic_dec(&dev->user->inotify_watches);
  397. idr_remove(&dev->idr, watch->wd);
  398. put_inotify_watch(watch);
  399. }
  400. /*
  401. * remove_watch - Remove a watch from both the device and the inode. Sends
  402. * the IN_IGNORED event to the given device signifying that the inode is no
  403. * longer watched.
  404. *
  405. * Callers must hold both inode->inotify_sem and dev->sem. We drop a
  406. * reference to the inode before returning.
  407. *
  408. * The inode is not iput() so as to remain atomic. If the inode needs to be
  409. * iput(), the call returns one. Otherwise, it returns zero.
  410. */
  411. static void remove_watch(struct inotify_watch *watch,struct inotify_device *dev)
  412. {
  413. inotify_dev_queue_event(dev, watch, IN_IGNORED, 0, NULL);
  414. remove_watch_no_event(watch, dev);
  415. }
  416. /*
  417. * inotify_inode_watched - returns nonzero if there are watches on this inode
  418. * and zero otherwise. We call this lockless, we do not care if we race.
  419. */
  420. static inline int inotify_inode_watched(struct inode *inode)
  421. {
  422. return !list_empty(&inode->inotify_watches);
  423. }
  424. /* Kernel API */
  425. /**
  426. * inotify_inode_queue_event - queue an event to all watches on this inode
  427. * @inode: inode event is originating from
  428. * @mask: event mask describing this event
  429. * @cookie: cookie for synchronization, or zero
  430. * @name: filename, if any
  431. */
  432. void inotify_inode_queue_event(struct inode *inode, u32 mask, u32 cookie,
  433. const char *name)
  434. {
  435. struct inotify_watch *watch, *next;
  436. if (!inotify_inode_watched(inode))
  437. return;
  438. down(&inode->inotify_sem);
  439. list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) {
  440. u32 watch_mask = watch->mask;
  441. if (watch_mask & mask) {
  442. struct inotify_device *dev = watch->dev;
  443. get_inotify_watch(watch);
  444. down(&dev->sem);
  445. inotify_dev_queue_event(dev, watch, mask, cookie, name);
  446. if (watch_mask & IN_ONESHOT)
  447. remove_watch_no_event(watch, dev);
  448. up(&dev->sem);
  449. put_inotify_watch(watch);
  450. }
  451. }
  452. up(&inode->inotify_sem);
  453. }
  454. EXPORT_SYMBOL_GPL(inotify_inode_queue_event);
  455. /**
  456. * inotify_dentry_parent_queue_event - queue an event to a dentry's parent
  457. * @dentry: the dentry in question, we queue against this dentry's parent
  458. * @mask: event mask describing this event
  459. * @cookie: cookie for synchronization, or zero
  460. * @name: filename, if any
  461. */
  462. void inotify_dentry_parent_queue_event(struct dentry *dentry, u32 mask,
  463. u32 cookie, const char *name)
  464. {
  465. struct dentry *parent;
  466. struct inode *inode;
  467. spin_lock(&dentry->d_lock);
  468. parent = dentry->d_parent;
  469. inode = parent->d_inode;
  470. if (inotify_inode_watched(inode)) {
  471. dget(parent);
  472. spin_unlock(&dentry->d_lock);
  473. inotify_inode_queue_event(inode, mask, cookie, name);
  474. dput(parent);
  475. } else
  476. spin_unlock(&dentry->d_lock);
  477. }
  478. EXPORT_SYMBOL_GPL(inotify_dentry_parent_queue_event);
  479. /**
  480. * inotify_get_cookie - return a unique cookie for use in synchronizing events.
  481. */
  482. u32 inotify_get_cookie(void)
  483. {
  484. return atomic_inc_return(&inotify_cookie);
  485. }
  486. EXPORT_SYMBOL_GPL(inotify_get_cookie);
  487. /**
  488. * inotify_unmount_inodes - an sb is unmounting. handle any watched inodes.
  489. * @list: list of inodes being unmounted (sb->s_inodes)
  490. *
  491. * Called with inode_lock held, protecting the unmounting super block's list
  492. * of inodes, and with iprune_sem held, keeping shrink_icache_memory() at bay.
  493. * We temporarily drop inode_lock, however, and CAN block.
  494. */
  495. void inotify_unmount_inodes(struct list_head *list)
  496. {
  497. struct inode *inode, *next_i, *need_iput = NULL;
  498. list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
  499. struct inotify_watch *watch, *next_w;
  500. struct inode *need_iput_tmp;
  501. struct list_head *watches;
  502. /*
  503. * If i_count is zero, the inode cannot have any watches and
  504. * doing an __iget/iput with MS_ACTIVE clear would actually
  505. * evict all inodes with zero i_count from icache which is
  506. * unnecessarily violent and may in fact be illegal to do.
  507. */
  508. if (!atomic_read(&inode->i_count))
  509. continue;
  510. /*
  511. * We cannot __iget() an inode in state I_CLEAR, I_FREEING, or
  512. * I_WILL_FREE which is fine because by that point the inode
  513. * cannot have any associated watches.
  514. */
  515. if (inode->i_state & (I_CLEAR | I_FREEING | I_WILL_FREE))
  516. continue;
  517. need_iput_tmp = need_iput;
  518. need_iput = NULL;
  519. /* In case the remove_watch() drops a reference. */
  520. if (inode != need_iput_tmp)
  521. __iget(inode);
  522. else
  523. need_iput_tmp = NULL;
  524. /* In case the dropping of a reference would nuke next_i. */
  525. if ((&next_i->i_sb_list != list) &&
  526. atomic_read(&next_i->i_count) &&
  527. !(next_i->i_state & (I_CLEAR | I_FREEING |
  528. I_WILL_FREE))) {
  529. __iget(next_i);
  530. need_iput = next_i;
  531. }
  532. /*
  533. * We can safely drop inode_lock here because we hold
  534. * references on both inode and next_i. Also no new inodes
  535. * will be added since the umount has begun. Finally,
  536. * iprune_sem keeps shrink_icache_memory() away.
  537. */
  538. spin_unlock(&inode_lock);
  539. if (need_iput_tmp)
  540. iput(need_iput_tmp);
  541. /* for each watch, send IN_UNMOUNT and then remove it */
  542. down(&inode->inotify_sem);
  543. watches = &inode->inotify_watches;
  544. list_for_each_entry_safe(watch, next_w, watches, i_list) {
  545. struct inotify_device *dev = watch->dev;
  546. down(&dev->sem);
  547. inotify_dev_queue_event(dev, watch, IN_UNMOUNT,0,NULL);
  548. remove_watch(watch, dev);
  549. up(&dev->sem);
  550. }
  551. up(&inode->inotify_sem);
  552. iput(inode);
  553. spin_lock(&inode_lock);
  554. }
  555. }
  556. EXPORT_SYMBOL_GPL(inotify_unmount_inodes);
  557. /**
  558. * inotify_inode_is_dead - an inode has been deleted, cleanup any watches
  559. * @inode: inode that is about to be removed
  560. */
  561. void inotify_inode_is_dead(struct inode *inode)
  562. {
  563. struct inotify_watch *watch, *next;
  564. down(&inode->inotify_sem);
  565. list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) {
  566. struct inotify_device *dev = watch->dev;
  567. down(&dev->sem);
  568. remove_watch(watch, dev);
  569. up(&dev->sem);
  570. }
  571. up(&inode->inotify_sem);
  572. }
  573. EXPORT_SYMBOL_GPL(inotify_inode_is_dead);
  574. /* Device Interface */
  575. static unsigned int inotify_poll(struct file *file, poll_table *wait)
  576. {
  577. struct inotify_device *dev = file->private_data;
  578. int ret = 0;
  579. poll_wait(file, &dev->wq, wait);
  580. down(&dev->sem);
  581. if (!list_empty(&dev->events))
  582. ret = POLLIN | POLLRDNORM;
  583. up(&dev->sem);
  584. return ret;
  585. }
  586. static ssize_t inotify_read(struct file *file, char __user *buf,
  587. size_t count, loff_t *pos)
  588. {
  589. size_t event_size = sizeof (struct inotify_event);
  590. struct inotify_device *dev;
  591. char __user *start;
  592. int ret;
  593. DEFINE_WAIT(wait);
  594. start = buf;
  595. dev = file->private_data;
  596. while (1) {
  597. int events;
  598. prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE);
  599. down(&dev->sem);
  600. events = !list_empty(&dev->events);
  601. up(&dev->sem);
  602. if (events) {
  603. ret = 0;
  604. break;
  605. }
  606. if (file->f_flags & O_NONBLOCK) {
  607. ret = -EAGAIN;
  608. break;
  609. }
  610. if (signal_pending(current)) {
  611. ret = -EINTR;
  612. break;
  613. }
  614. schedule();
  615. }
  616. finish_wait(&dev->wq, &wait);
  617. if (ret)
  618. return ret;
  619. down(&dev->sem);
  620. while (1) {
  621. struct inotify_kernel_event *kevent;
  622. ret = buf - start;
  623. if (list_empty(&dev->events))
  624. break;
  625. kevent = inotify_dev_get_event(dev);
  626. if (event_size + kevent->event.len > count)
  627. break;
  628. if (copy_to_user(buf, &kevent->event, event_size)) {
  629. ret = -EFAULT;
  630. break;
  631. }
  632. buf += event_size;
  633. count -= event_size;
  634. if (kevent->name) {
  635. if (copy_to_user(buf, kevent->name, kevent->event.len)){
  636. ret = -EFAULT;
  637. break;
  638. }
  639. buf += kevent->event.len;
  640. count -= kevent->event.len;
  641. }
  642. remove_kevent(dev, kevent);
  643. }
  644. up(&dev->sem);
  645. return ret;
  646. }
  647. static int inotify_release(struct inode *ignored, struct file *file)
  648. {
  649. struct inotify_device *dev = file->private_data;
  650. /*
  651. * Destroy all of the watches on this device. Unfortunately, not very
  652. * pretty. We cannot do a simple iteration over the list, because we
  653. * do not know the inode until we iterate to the watch. But we need to
  654. * hold inode->inotify_sem before dev->sem. The following works.
  655. */
  656. while (1) {
  657. struct inotify_watch *watch;
  658. struct list_head *watches;
  659. struct inode *inode;
  660. down(&dev->sem);
  661. watches = &dev->watches;
  662. if (list_empty(watches)) {
  663. up(&dev->sem);
  664. break;
  665. }
  666. watch = list_entry(watches->next, struct inotify_watch, d_list);
  667. get_inotify_watch(watch);
  668. up(&dev->sem);
  669. inode = watch->inode;
  670. down(&inode->inotify_sem);
  671. down(&dev->sem);
  672. remove_watch_no_event(watch, dev);
  673. up(&dev->sem);
  674. up(&inode->inotify_sem);
  675. put_inotify_watch(watch);
  676. }
  677. /* destroy all of the events on this device */
  678. down(&dev->sem);
  679. while (!list_empty(&dev->events))
  680. inotify_dev_event_dequeue(dev);
  681. up(&dev->sem);
  682. /* free this device: the put matching the get in inotify_init() */
  683. put_inotify_dev(dev);
  684. return 0;
  685. }
  686. /*
  687. * inotify_ignore - remove a given wd from this inotify instance.
  688. *
  689. * Can sleep.
  690. */
  691. static int inotify_ignore(struct inotify_device *dev, s32 wd)
  692. {
  693. struct inotify_watch *watch;
  694. struct inode *inode;
  695. down(&dev->sem);
  696. watch = idr_find(&dev->idr, wd);
  697. if (unlikely(!watch)) {
  698. up(&dev->sem);
  699. return -EINVAL;
  700. }
  701. get_inotify_watch(watch);
  702. inode = watch->inode;
  703. up(&dev->sem);
  704. down(&inode->inotify_sem);
  705. down(&dev->sem);
  706. /* make sure that we did not race */
  707. watch = idr_find(&dev->idr, wd);
  708. if (likely(watch))
  709. remove_watch(watch, dev);
  710. up(&dev->sem);
  711. up(&inode->inotify_sem);
  712. put_inotify_watch(watch);
  713. return 0;
  714. }
  715. static long inotify_ioctl(struct file *file, unsigned int cmd,
  716. unsigned long arg)
  717. {
  718. struct inotify_device *dev;
  719. void __user *p;
  720. int ret = -ENOTTY;
  721. dev = file->private_data;
  722. p = (void __user *) arg;
  723. switch (cmd) {
  724. case FIONREAD:
  725. ret = put_user(dev->queue_size, (int __user *) p);
  726. break;
  727. }
  728. return ret;
  729. }
  730. static struct file_operations inotify_fops = {
  731. .poll = inotify_poll,
  732. .read = inotify_read,
  733. .release = inotify_release,
  734. .unlocked_ioctl = inotify_ioctl,
  735. .compat_ioctl = inotify_ioctl,
  736. };
  737. asmlinkage long sys_inotify_init(void)
  738. {
  739. struct inotify_device *dev;
  740. struct user_struct *user;
  741. struct file *filp;
  742. int fd, ret;
  743. fd = get_unused_fd();
  744. if (fd < 0)
  745. return fd;
  746. filp = get_empty_filp();
  747. if (!filp) {
  748. ret = -ENFILE;
  749. goto out_put_fd;
  750. }
  751. user = get_uid(current->user);
  752. if (unlikely(atomic_read(&user->inotify_devs) >=
  753. inotify_max_user_instances)) {
  754. ret = -EMFILE;
  755. goto out_free_uid;
  756. }
  757. dev = kmalloc(sizeof(struct inotify_device), GFP_KERNEL);
  758. if (unlikely(!dev)) {
  759. ret = -ENOMEM;
  760. goto out_free_uid;
  761. }
  762. filp->f_op = &inotify_fops;
  763. filp->f_vfsmnt = mntget(inotify_mnt);
  764. filp->f_dentry = dget(inotify_mnt->mnt_root);
  765. filp->f_mapping = filp->f_dentry->d_inode->i_mapping;
  766. filp->f_mode = FMODE_READ;
  767. filp->f_flags = O_RDONLY;
  768. filp->private_data = dev;
  769. idr_init(&dev->idr);
  770. INIT_LIST_HEAD(&dev->events);
  771. INIT_LIST_HEAD(&dev->watches);
  772. init_waitqueue_head(&dev->wq);
  773. sema_init(&dev->sem, 1);
  774. dev->event_count = 0;
  775. dev->queue_size = 0;
  776. dev->max_events = inotify_max_queued_events;
  777. dev->user = user;
  778. dev->last_wd = 0;
  779. atomic_set(&dev->count, 0);
  780. get_inotify_dev(dev);
  781. atomic_inc(&user->inotify_devs);
  782. fd_install(fd, filp);
  783. return fd;
  784. out_free_uid:
  785. free_uid(user);
  786. put_filp(filp);
  787. out_put_fd:
  788. put_unused_fd(fd);
  789. return ret;
  790. }
  791. asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
  792. {
  793. struct inotify_watch *watch, *old;
  794. struct inode *inode;
  795. struct inotify_device *dev;
  796. struct nameidata nd;
  797. struct file *filp;
  798. int ret, fput_needed;
  799. filp = fget_light(fd, &fput_needed);
  800. if (unlikely(!filp))
  801. return -EBADF;
  802. /* verify that this is indeed an inotify instance */
  803. if (unlikely(filp->f_op != &inotify_fops)) {
  804. ret = -EINVAL;
  805. goto fput_and_out;
  806. }
  807. ret = find_inode(path, &nd);
  808. if (unlikely(ret))
  809. goto fput_and_out;
  810. /* inode held in place by reference to nd; dev by fget on fd */
  811. inode = nd.dentry->d_inode;
  812. dev = filp->private_data;
  813. down(&inode->inotify_sem);
  814. down(&dev->sem);
  815. /* don't let user-space set invalid bits: we don't want flags set */
  816. mask &= IN_ALL_EVENTS;
  817. if (unlikely(!mask)) {
  818. ret = -EINVAL;
  819. goto out;
  820. }
  821. /*
  822. * Handle the case of re-adding a watch on an (inode,dev) pair that we
  823. * are already watching. We just update the mask and return its wd.
  824. */
  825. old = inode_find_dev(inode, dev);
  826. if (unlikely(old)) {
  827. old->mask = mask;
  828. ret = old->wd;
  829. goto out;
  830. }
  831. watch = create_watch(dev, mask, inode);
  832. if (unlikely(IS_ERR(watch))) {
  833. ret = PTR_ERR(watch);
  834. goto out;
  835. }
  836. /* Add the watch to the device's and the inode's list */
  837. list_add(&watch->d_list, &dev->watches);
  838. list_add(&watch->i_list, &inode->inotify_watches);
  839. ret = watch->wd;
  840. out:
  841. up(&dev->sem);
  842. up(&inode->inotify_sem);
  843. path_release(&nd);
  844. fput_and_out:
  845. fput_light(filp, fput_needed);
  846. return ret;
  847. }
  848. asmlinkage long sys_inotify_rm_watch(int fd, u32 wd)
  849. {
  850. struct file *filp;
  851. struct inotify_device *dev;
  852. int ret, fput_needed;
  853. filp = fget_light(fd, &fput_needed);
  854. if (unlikely(!filp))
  855. return -EBADF;
  856. /* verify that this is indeed an inotify instance */
  857. if (unlikely(filp->f_op != &inotify_fops)) {
  858. ret = -EINVAL;
  859. goto out;
  860. }
  861. dev = filp->private_data;
  862. ret = inotify_ignore(dev, wd);
  863. out:
  864. fput_light(filp, fput_needed);
  865. return ret;
  866. }
  867. static struct super_block *
  868. inotify_get_sb(struct file_system_type *fs_type, int flags,
  869. const char *dev_name, void *data)
  870. {
  871. return get_sb_pseudo(fs_type, "inotify", NULL, 0xBAD1DEA);
  872. }
  873. static struct file_system_type inotify_fs_type = {
  874. .name = "inotifyfs",
  875. .get_sb = inotify_get_sb,
  876. .kill_sb = kill_anon_super,
  877. };
  878. /*
  879. * inotify_setup - Our initialization function. Note that we cannnot return
  880. * error because we have compiled-in VFS hooks. So an (unlikely) failure here
  881. * must result in panic().
  882. */
  883. static int __init inotify_setup(void)
  884. {
  885. int ret;
  886. ret = register_filesystem(&inotify_fs_type);
  887. if (unlikely(ret))
  888. panic("inotify: register_filesystem returned %d!\n", ret);
  889. inotify_mnt = kern_mount(&inotify_fs_type);
  890. if (IS_ERR(inotify_mnt))
  891. panic("inotify: kern_mount ret %ld!\n", PTR_ERR(inotify_mnt));
  892. inotify_max_queued_events = 16384;
  893. inotify_max_user_instances = 128;
  894. inotify_max_user_watches = 8192;
  895. atomic_set(&inotify_cookie, 0);
  896. watch_cachep = kmem_cache_create("inotify_watch_cache",
  897. sizeof(struct inotify_watch),
  898. 0, SLAB_PANIC, NULL, NULL);
  899. event_cachep = kmem_cache_create("inotify_event_cache",
  900. sizeof(struct inotify_kernel_event),
  901. 0, SLAB_PANIC, NULL, NULL);
  902. return 0;
  903. }
  904. module_init(inotify_setup);