slow-work.c 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977
  1. /* Worker thread pool for slow items, such as filesystem lookups or mkdirs
  2. *
  3. * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
  4. * Written by David Howells (dhowells@redhat.com)
  5. *
  6. * This program is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU General Public Licence
  8. * as published by the Free Software Foundation; either version
  9. * 2 of the Licence, or (at your option) any later version.
  10. *
  11. * See Documentation/slow-work.txt
  12. */
  13. #include <linux/module.h>
  14. #include <linux/slow-work.h>
  15. #include <linux/kthread.h>
  16. #include <linux/freezer.h>
  17. #include <linux/wait.h>
  18. #include <linux/proc_fs.h>
  19. #include "slow-work.h"
  20. static void slow_work_cull_timeout(unsigned long);
  21. static void slow_work_oom_timeout(unsigned long);
  22. #ifdef CONFIG_SYSCTL
  23. static int slow_work_min_threads_sysctl(struct ctl_table *, int,
  24. void __user *, size_t *, loff_t *);
  25. static int slow_work_max_threads_sysctl(struct ctl_table *, int ,
  26. void __user *, size_t *, loff_t *);
  27. #endif
  28. /*
  29. * The pool of threads has at least min threads in it as long as someone is
  30. * using the facility, and may have as many as max.
  31. *
  32. * A portion of the pool may be processing very slow operations.
  33. */
  34. static unsigned slow_work_min_threads = 2;
  35. static unsigned slow_work_max_threads = 4;
  36. static unsigned vslow_work_proportion = 50; /* % of threads that may process
  37. * very slow work */
  38. #ifdef CONFIG_SYSCTL
  39. static const int slow_work_min_min_threads = 2;
  40. static int slow_work_max_max_threads = SLOW_WORK_THREAD_LIMIT;
  41. static const int slow_work_min_vslow = 1;
  42. static const int slow_work_max_vslow = 99;
  43. ctl_table slow_work_sysctls[] = {
  44. {
  45. .ctl_name = CTL_UNNUMBERED,
  46. .procname = "min-threads",
  47. .data = &slow_work_min_threads,
  48. .maxlen = sizeof(unsigned),
  49. .mode = 0644,
  50. .proc_handler = slow_work_min_threads_sysctl,
  51. .extra1 = (void *) &slow_work_min_min_threads,
  52. .extra2 = &slow_work_max_threads,
  53. },
  54. {
  55. .ctl_name = CTL_UNNUMBERED,
  56. .procname = "max-threads",
  57. .data = &slow_work_max_threads,
  58. .maxlen = sizeof(unsigned),
  59. .mode = 0644,
  60. .proc_handler = slow_work_max_threads_sysctl,
  61. .extra1 = &slow_work_min_threads,
  62. .extra2 = (void *) &slow_work_max_max_threads,
  63. },
  64. {
  65. .ctl_name = CTL_UNNUMBERED,
  66. .procname = "vslow-percentage",
  67. .data = &vslow_work_proportion,
  68. .maxlen = sizeof(unsigned),
  69. .mode = 0644,
  70. .proc_handler = &proc_dointvec_minmax,
  71. .extra1 = (void *) &slow_work_min_vslow,
  72. .extra2 = (void *) &slow_work_max_vslow,
  73. },
  74. { .ctl_name = 0 }
  75. };
  76. #endif
  77. /*
  78. * The active state of the thread pool
  79. */
  80. static atomic_t slow_work_thread_count;
  81. static atomic_t vslow_work_executing_count;
  82. static bool slow_work_may_not_start_new_thread;
  83. static bool slow_work_cull; /* cull a thread due to lack of activity */
  84. static DEFINE_TIMER(slow_work_cull_timer, slow_work_cull_timeout, 0, 0);
  85. static DEFINE_TIMER(slow_work_oom_timer, slow_work_oom_timeout, 0, 0);
  86. static struct slow_work slow_work_new_thread; /* new thread starter */
  87. /*
  88. * slow work ID allocation (use slow_work_queue_lock)
  89. */
  90. static DECLARE_BITMAP(slow_work_ids, SLOW_WORK_THREAD_LIMIT);
  91. /*
  92. * Unregistration tracking to prevent put_ref() from disappearing during module
  93. * unload
  94. */
  95. #ifdef CONFIG_MODULES
  96. static struct module *slow_work_thread_processing[SLOW_WORK_THREAD_LIMIT];
  97. static struct module *slow_work_unreg_module;
  98. static struct slow_work *slow_work_unreg_work_item;
  99. static DECLARE_WAIT_QUEUE_HEAD(slow_work_unreg_wq);
  100. static DEFINE_MUTEX(slow_work_unreg_sync_lock);
  101. #endif
  102. /*
  103. * Data for tracking currently executing items for indication through /proc
  104. */
  105. #ifdef CONFIG_SLOW_WORK_PROC
  106. struct slow_work *slow_work_execs[SLOW_WORK_THREAD_LIMIT];
  107. pid_t slow_work_pids[SLOW_WORK_THREAD_LIMIT];
  108. DEFINE_RWLOCK(slow_work_execs_lock);
  109. #endif
  110. /*
  111. * The queues of work items and the lock governing access to them. These are
  112. * shared between all the CPUs. It doesn't make sense to have per-CPU queues
  113. * as the number of threads bears no relation to the number of CPUs.
  114. *
  115. * There are two queues of work items: one for slow work items, and one for
  116. * very slow work items.
  117. */
  118. LIST_HEAD(slow_work_queue);
  119. LIST_HEAD(vslow_work_queue);
  120. DEFINE_SPINLOCK(slow_work_queue_lock);
  121. /*
  122. * The thread controls. A variable used to signal to the threads that they
  123. * should exit when the queue is empty, a waitqueue used by the threads to wait
  124. * for signals, and a completion set by the last thread to exit.
  125. */
  126. static bool slow_work_threads_should_exit;
  127. static DECLARE_WAIT_QUEUE_HEAD(slow_work_thread_wq);
  128. static DECLARE_COMPLETION(slow_work_last_thread_exited);
  129. /*
  130. * The number of users of the thread pool and its lock. Whilst this is zero we
  131. * have no threads hanging around, and when this reaches zero, we wait for all
  132. * active or queued work items to complete and kill all the threads we do have.
  133. */
  134. static int slow_work_user_count;
  135. static DEFINE_MUTEX(slow_work_user_lock);
  136. static inline int slow_work_get_ref(struct slow_work *work)
  137. {
  138. if (work->ops->get_ref)
  139. return work->ops->get_ref(work);
  140. return 0;
  141. }
  142. static inline void slow_work_put_ref(struct slow_work *work)
  143. {
  144. if (work->ops->put_ref)
  145. work->ops->put_ref(work);
  146. }
  147. /*
  148. * Calculate the maximum number of active threads in the pool that are
  149. * permitted to process very slow work items.
  150. *
  151. * The answer is rounded up to at least 1, but may not equal or exceed the
  152. * maximum number of the threads in the pool. This means we always have at
  153. * least one thread that can process slow work items, and we always have at
  154. * least one thread that won't get tied up doing so.
  155. */
  156. static unsigned slow_work_calc_vsmax(void)
  157. {
  158. unsigned vsmax;
  159. vsmax = atomic_read(&slow_work_thread_count) * vslow_work_proportion;
  160. vsmax /= 100;
  161. vsmax = max(vsmax, 1U);
  162. return min(vsmax, slow_work_max_threads - 1);
  163. }
  164. /*
  165. * Attempt to execute stuff queued on a slow thread. Return true if we managed
  166. * it, false if there was nothing to do.
  167. */
  168. static noinline bool slow_work_execute(int id)
  169. {
  170. #ifdef CONFIG_MODULES
  171. struct module *module;
  172. #endif
  173. struct slow_work *work = NULL;
  174. unsigned vsmax;
  175. bool very_slow;
  176. vsmax = slow_work_calc_vsmax();
  177. /* see if we can schedule a new thread to be started if we're not
  178. * keeping up with the work */
  179. if (!waitqueue_active(&slow_work_thread_wq) &&
  180. (!list_empty(&slow_work_queue) || !list_empty(&vslow_work_queue)) &&
  181. atomic_read(&slow_work_thread_count) < slow_work_max_threads &&
  182. !slow_work_may_not_start_new_thread)
  183. slow_work_enqueue(&slow_work_new_thread);
  184. /* find something to execute */
  185. spin_lock_irq(&slow_work_queue_lock);
  186. if (!list_empty(&vslow_work_queue) &&
  187. atomic_read(&vslow_work_executing_count) < vsmax) {
  188. work = list_entry(vslow_work_queue.next,
  189. struct slow_work, link);
  190. if (test_and_set_bit_lock(SLOW_WORK_EXECUTING, &work->flags))
  191. BUG();
  192. list_del_init(&work->link);
  193. atomic_inc(&vslow_work_executing_count);
  194. very_slow = true;
  195. } else if (!list_empty(&slow_work_queue)) {
  196. work = list_entry(slow_work_queue.next,
  197. struct slow_work, link);
  198. if (test_and_set_bit_lock(SLOW_WORK_EXECUTING, &work->flags))
  199. BUG();
  200. list_del_init(&work->link);
  201. very_slow = false;
  202. } else {
  203. very_slow = false; /* avoid the compiler warning */
  204. }
  205. #ifdef CONFIG_MODULES
  206. if (work)
  207. slow_work_thread_processing[id] = work->owner;
  208. #endif
  209. if (work) {
  210. slow_work_mark_time(work);
  211. slow_work_begin_exec(id, work);
  212. }
  213. spin_unlock_irq(&slow_work_queue_lock);
  214. if (!work)
  215. return false;
  216. if (!test_and_clear_bit(SLOW_WORK_PENDING, &work->flags))
  217. BUG();
  218. /* don't execute if the work is in the process of being cancelled */
  219. if (!test_bit(SLOW_WORK_CANCELLING, &work->flags))
  220. work->ops->execute(work);
  221. if (very_slow)
  222. atomic_dec(&vslow_work_executing_count);
  223. clear_bit_unlock(SLOW_WORK_EXECUTING, &work->flags);
  224. /* wake up anyone waiting for this work to be complete */
  225. wake_up_bit(&work->flags, SLOW_WORK_EXECUTING);
  226. slow_work_end_exec(id, work);
  227. /* if someone tried to enqueue the item whilst we were executing it,
  228. * then it'll be left unenqueued to avoid multiple threads trying to
  229. * execute it simultaneously
  230. *
  231. * there is, however, a race between us testing the pending flag and
  232. * getting the spinlock, and between the enqueuer setting the pending
  233. * flag and getting the spinlock, so we use a deferral bit to tell us
  234. * if the enqueuer got there first
  235. */
  236. if (test_bit(SLOW_WORK_PENDING, &work->flags)) {
  237. spin_lock_irq(&slow_work_queue_lock);
  238. if (!test_bit(SLOW_WORK_EXECUTING, &work->flags) &&
  239. test_and_clear_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags))
  240. goto auto_requeue;
  241. spin_unlock_irq(&slow_work_queue_lock);
  242. }
  243. /* sort out the race between module unloading and put_ref() */
  244. slow_work_put_ref(work);
  245. #ifdef CONFIG_MODULES
  246. module = slow_work_thread_processing[id];
  247. slow_work_thread_processing[id] = NULL;
  248. smp_mb();
  249. if (slow_work_unreg_work_item == work ||
  250. slow_work_unreg_module == module)
  251. wake_up_all(&slow_work_unreg_wq);
  252. #endif
  253. return true;
  254. auto_requeue:
  255. /* we must complete the enqueue operation
  256. * - we transfer our ref on the item back to the appropriate queue
  257. * - don't wake another thread up as we're awake already
  258. */
  259. slow_work_mark_time(work);
  260. if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags))
  261. list_add_tail(&work->link, &vslow_work_queue);
  262. else
  263. list_add_tail(&work->link, &slow_work_queue);
  264. spin_unlock_irq(&slow_work_queue_lock);
  265. slow_work_thread_processing[id] = NULL;
  266. return true;
  267. }
  268. /**
  269. * slow_work_enqueue - Schedule a slow work item for processing
  270. * @work: The work item to queue
  271. *
  272. * Schedule a slow work item for processing. If the item is already undergoing
  273. * execution, this guarantees not to re-enter the execution routine until the
  274. * first execution finishes.
  275. *
  276. * The item is pinned by this function as it retains a reference to it, managed
  277. * through the item operations. The item is unpinned once it has been
  278. * executed.
  279. *
  280. * An item may hog the thread that is running it for a relatively large amount
  281. * of time, sufficient, for example, to perform several lookup, mkdir, create
  282. * and setxattr operations. It may sleep on I/O and may sleep to obtain locks.
  283. *
  284. * Conversely, if a number of items are awaiting processing, it may take some
  285. * time before any given item is given attention. The number of threads in the
  286. * pool may be increased to deal with demand, but only up to a limit.
  287. *
  288. * If SLOW_WORK_VERY_SLOW is set on the work item, then it will be placed in
  289. * the very slow queue, from which only a portion of the threads will be
  290. * allowed to pick items to execute. This ensures that very slow items won't
  291. * overly block ones that are just ordinarily slow.
  292. *
  293. * Returns 0 if successful, -EAGAIN if not (or -ECANCELED if cancelled work is
  294. * attempted queued)
  295. */
  296. int slow_work_enqueue(struct slow_work *work)
  297. {
  298. unsigned long flags;
  299. int ret;
  300. if (test_bit(SLOW_WORK_CANCELLING, &work->flags))
  301. return -ECANCELED;
  302. BUG_ON(slow_work_user_count <= 0);
  303. BUG_ON(!work);
  304. BUG_ON(!work->ops);
  305. /* when honouring an enqueue request, we only promise that we will run
  306. * the work function in the future; we do not promise to run it once
  307. * per enqueue request
  308. *
  309. * we use the PENDING bit to merge together repeat requests without
  310. * having to disable IRQs and take the spinlock, whilst still
  311. * maintaining our promise
  312. */
  313. if (!test_and_set_bit_lock(SLOW_WORK_PENDING, &work->flags)) {
  314. spin_lock_irqsave(&slow_work_queue_lock, flags);
  315. if (unlikely(test_bit(SLOW_WORK_CANCELLING, &work->flags)))
  316. goto cancelled;
  317. /* we promise that we will not attempt to execute the work
  318. * function in more than one thread simultaneously
  319. *
  320. * this, however, leaves us with a problem if we're asked to
  321. * enqueue the work whilst someone is executing the work
  322. * function as simply queueing the work immediately means that
  323. * another thread may try executing it whilst it is already
  324. * under execution
  325. *
  326. * to deal with this, we set the ENQ_DEFERRED bit instead of
  327. * enqueueing, and the thread currently executing the work
  328. * function will enqueue the work item when the work function
  329. * returns and it has cleared the EXECUTING bit
  330. */
  331. if (test_bit(SLOW_WORK_EXECUTING, &work->flags)) {
  332. set_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags);
  333. } else {
  334. ret = slow_work_get_ref(work);
  335. if (ret < 0)
  336. goto failed;
  337. slow_work_mark_time(work);
  338. if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags))
  339. list_add_tail(&work->link, &vslow_work_queue);
  340. else
  341. list_add_tail(&work->link, &slow_work_queue);
  342. wake_up(&slow_work_thread_wq);
  343. }
  344. spin_unlock_irqrestore(&slow_work_queue_lock, flags);
  345. }
  346. return 0;
  347. cancelled:
  348. ret = -ECANCELED;
  349. failed:
  350. spin_unlock_irqrestore(&slow_work_queue_lock, flags);
  351. return ret;
  352. }
  353. EXPORT_SYMBOL(slow_work_enqueue);
  354. static int slow_work_wait(void *word)
  355. {
  356. schedule();
  357. return 0;
  358. }
  359. /**
  360. * slow_work_cancel - Cancel a slow work item
  361. * @work: The work item to cancel
  362. *
  363. * This function will cancel a previously enqueued work item. If we cannot
  364. * cancel the work item, it is guarenteed to have run when this function
  365. * returns.
  366. */
  367. void slow_work_cancel(struct slow_work *work)
  368. {
  369. bool wait = true, put = false;
  370. set_bit(SLOW_WORK_CANCELLING, &work->flags);
  371. smp_mb();
  372. /* if the work item is a delayed work item with an active timer, we
  373. * need to wait for the timer to finish _before_ getting the spinlock,
  374. * lest we deadlock against the timer routine
  375. *
  376. * the timer routine will leave DELAYED set if it notices the
  377. * CANCELLING flag in time
  378. */
  379. if (test_bit(SLOW_WORK_DELAYED, &work->flags)) {
  380. struct delayed_slow_work *dwork =
  381. container_of(work, struct delayed_slow_work, work);
  382. del_timer_sync(&dwork->timer);
  383. }
  384. spin_lock_irq(&slow_work_queue_lock);
  385. if (test_bit(SLOW_WORK_DELAYED, &work->flags)) {
  386. /* the timer routine aborted or never happened, so we are left
  387. * holding the timer's reference on the item and should just
  388. * drop the pending flag and wait for any ongoing execution to
  389. * finish */
  390. struct delayed_slow_work *dwork =
  391. container_of(work, struct delayed_slow_work, work);
  392. BUG_ON(timer_pending(&dwork->timer));
  393. BUG_ON(!list_empty(&work->link));
  394. clear_bit(SLOW_WORK_DELAYED, &work->flags);
  395. put = true;
  396. clear_bit(SLOW_WORK_PENDING, &work->flags);
  397. } else if (test_bit(SLOW_WORK_PENDING, &work->flags) &&
  398. !list_empty(&work->link)) {
  399. /* the link in the pending queue holds a reference on the item
  400. * that we will need to release */
  401. list_del_init(&work->link);
  402. wait = false;
  403. put = true;
  404. clear_bit(SLOW_WORK_PENDING, &work->flags);
  405. } else if (test_and_clear_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags)) {
  406. /* the executor is holding our only reference on the item, so
  407. * we merely need to wait for it to finish executing */
  408. clear_bit(SLOW_WORK_PENDING, &work->flags);
  409. }
  410. spin_unlock_irq(&slow_work_queue_lock);
  411. /* the EXECUTING flag is set by the executor whilst the spinlock is set
  412. * and before the item is dequeued - so assuming the above doesn't
  413. * actually dequeue it, simply waiting for the EXECUTING flag to be
  414. * released here should be sufficient */
  415. if (wait)
  416. wait_on_bit(&work->flags, SLOW_WORK_EXECUTING, slow_work_wait,
  417. TASK_UNINTERRUPTIBLE);
  418. clear_bit(SLOW_WORK_CANCELLING, &work->flags);
  419. if (put)
  420. slow_work_put_ref(work);
  421. }
  422. EXPORT_SYMBOL(slow_work_cancel);
  423. /*
  424. * Handle expiry of the delay timer, indicating that a delayed slow work item
  425. * should now be queued if not cancelled
  426. */
  427. static void delayed_slow_work_timer(unsigned long data)
  428. {
  429. struct slow_work *work = (struct slow_work *) data;
  430. unsigned long flags;
  431. bool queued = false, put = false;
  432. spin_lock_irqsave(&slow_work_queue_lock, flags);
  433. if (likely(!test_bit(SLOW_WORK_CANCELLING, &work->flags))) {
  434. clear_bit(SLOW_WORK_DELAYED, &work->flags);
  435. if (test_bit(SLOW_WORK_EXECUTING, &work->flags)) {
  436. /* we discard the reference the timer was holding in
  437. * favour of the one the executor holds */
  438. set_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags);
  439. put = true;
  440. } else {
  441. slow_work_mark_time(work);
  442. if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags))
  443. list_add_tail(&work->link, &vslow_work_queue);
  444. else
  445. list_add_tail(&work->link, &slow_work_queue);
  446. queued = true;
  447. }
  448. }
  449. spin_unlock_irqrestore(&slow_work_queue_lock, flags);
  450. if (put)
  451. slow_work_put_ref(work);
  452. if (queued)
  453. wake_up(&slow_work_thread_wq);
  454. }
  455. /**
  456. * delayed_slow_work_enqueue - Schedule a delayed slow work item for processing
  457. * @dwork: The delayed work item to queue
  458. * @delay: When to start executing the work, in jiffies from now
  459. *
  460. * This is similar to slow_work_enqueue(), but it adds a delay before the work
  461. * is actually queued for processing.
  462. *
  463. * The item can have delayed processing requested on it whilst it is being
  464. * executed. The delay will begin immediately, and if it expires before the
  465. * item finishes executing, the item will be placed back on the queue when it
  466. * has done executing.
  467. */
  468. int delayed_slow_work_enqueue(struct delayed_slow_work *dwork,
  469. unsigned long delay)
  470. {
  471. struct slow_work *work = &dwork->work;
  472. unsigned long flags;
  473. int ret;
  474. if (delay == 0)
  475. return slow_work_enqueue(&dwork->work);
  476. BUG_ON(slow_work_user_count <= 0);
  477. BUG_ON(!work);
  478. BUG_ON(!work->ops);
  479. if (test_bit(SLOW_WORK_CANCELLING, &work->flags))
  480. return -ECANCELED;
  481. if (!test_and_set_bit_lock(SLOW_WORK_PENDING, &work->flags)) {
  482. spin_lock_irqsave(&slow_work_queue_lock, flags);
  483. if (test_bit(SLOW_WORK_CANCELLING, &work->flags))
  484. goto cancelled;
  485. /* the timer holds a reference whilst it is pending */
  486. ret = work->ops->get_ref(work);
  487. if (ret < 0)
  488. goto cant_get_ref;
  489. if (test_and_set_bit(SLOW_WORK_DELAYED, &work->flags))
  490. BUG();
  491. dwork->timer.expires = jiffies + delay;
  492. dwork->timer.data = (unsigned long) work;
  493. dwork->timer.function = delayed_slow_work_timer;
  494. add_timer(&dwork->timer);
  495. spin_unlock_irqrestore(&slow_work_queue_lock, flags);
  496. }
  497. return 0;
  498. cancelled:
  499. ret = -ECANCELED;
  500. cant_get_ref:
  501. spin_unlock_irqrestore(&slow_work_queue_lock, flags);
  502. return ret;
  503. }
  504. EXPORT_SYMBOL(delayed_slow_work_enqueue);
  505. /*
  506. * Schedule a cull of the thread pool at some time in the near future
  507. */
  508. static void slow_work_schedule_cull(void)
  509. {
  510. mod_timer(&slow_work_cull_timer,
  511. round_jiffies(jiffies + SLOW_WORK_CULL_TIMEOUT));
  512. }
  513. /*
  514. * Worker thread culling algorithm
  515. */
  516. static bool slow_work_cull_thread(void)
  517. {
  518. unsigned long flags;
  519. bool do_cull = false;
  520. spin_lock_irqsave(&slow_work_queue_lock, flags);
  521. if (slow_work_cull) {
  522. slow_work_cull = false;
  523. if (list_empty(&slow_work_queue) &&
  524. list_empty(&vslow_work_queue) &&
  525. atomic_read(&slow_work_thread_count) >
  526. slow_work_min_threads) {
  527. slow_work_schedule_cull();
  528. do_cull = true;
  529. }
  530. }
  531. spin_unlock_irqrestore(&slow_work_queue_lock, flags);
  532. return do_cull;
  533. }
  534. /*
  535. * Determine if there is slow work available for dispatch
  536. */
  537. static inline bool slow_work_available(int vsmax)
  538. {
  539. return !list_empty(&slow_work_queue) ||
  540. (!list_empty(&vslow_work_queue) &&
  541. atomic_read(&vslow_work_executing_count) < vsmax);
  542. }
  543. /*
  544. * Worker thread dispatcher
  545. */
  546. static int slow_work_thread(void *_data)
  547. {
  548. int vsmax, id;
  549. DEFINE_WAIT(wait);
  550. set_freezable();
  551. set_user_nice(current, -5);
  552. /* allocate ourselves an ID */
  553. spin_lock_irq(&slow_work_queue_lock);
  554. id = find_first_zero_bit(slow_work_ids, SLOW_WORK_THREAD_LIMIT);
  555. BUG_ON(id < 0 || id >= SLOW_WORK_THREAD_LIMIT);
  556. __set_bit(id, slow_work_ids);
  557. slow_work_set_thread_pid(id, current->pid);
  558. spin_unlock_irq(&slow_work_queue_lock);
  559. sprintf(current->comm, "kslowd%03u", id);
  560. for (;;) {
  561. vsmax = vslow_work_proportion;
  562. vsmax *= atomic_read(&slow_work_thread_count);
  563. vsmax /= 100;
  564. prepare_to_wait_exclusive(&slow_work_thread_wq, &wait,
  565. TASK_INTERRUPTIBLE);
  566. if (!freezing(current) &&
  567. !slow_work_threads_should_exit &&
  568. !slow_work_available(vsmax) &&
  569. !slow_work_cull)
  570. schedule();
  571. finish_wait(&slow_work_thread_wq, &wait);
  572. try_to_freeze();
  573. vsmax = vslow_work_proportion;
  574. vsmax *= atomic_read(&slow_work_thread_count);
  575. vsmax /= 100;
  576. if (slow_work_available(vsmax) && slow_work_execute(id)) {
  577. cond_resched();
  578. if (list_empty(&slow_work_queue) &&
  579. list_empty(&vslow_work_queue) &&
  580. atomic_read(&slow_work_thread_count) >
  581. slow_work_min_threads)
  582. slow_work_schedule_cull();
  583. continue;
  584. }
  585. if (slow_work_threads_should_exit)
  586. break;
  587. if (slow_work_cull && slow_work_cull_thread())
  588. break;
  589. }
  590. spin_lock_irq(&slow_work_queue_lock);
  591. slow_work_set_thread_pid(id, 0);
  592. __clear_bit(id, slow_work_ids);
  593. spin_unlock_irq(&slow_work_queue_lock);
  594. if (atomic_dec_and_test(&slow_work_thread_count))
  595. complete_and_exit(&slow_work_last_thread_exited, 0);
  596. return 0;
  597. }
  598. /*
  599. * Handle thread cull timer expiration
  600. */
  601. static void slow_work_cull_timeout(unsigned long data)
  602. {
  603. slow_work_cull = true;
  604. wake_up(&slow_work_thread_wq);
  605. }
  606. /*
  607. * Start a new slow work thread
  608. */
  609. static void slow_work_new_thread_execute(struct slow_work *work)
  610. {
  611. struct task_struct *p;
  612. if (slow_work_threads_should_exit)
  613. return;
  614. if (atomic_read(&slow_work_thread_count) >= slow_work_max_threads)
  615. return;
  616. if (!mutex_trylock(&slow_work_user_lock))
  617. return;
  618. slow_work_may_not_start_new_thread = true;
  619. atomic_inc(&slow_work_thread_count);
  620. p = kthread_run(slow_work_thread, NULL, "kslowd");
  621. if (IS_ERR(p)) {
  622. printk(KERN_DEBUG "Slow work thread pool: OOM\n");
  623. if (atomic_dec_and_test(&slow_work_thread_count))
  624. BUG(); /* we're running on a slow work thread... */
  625. mod_timer(&slow_work_oom_timer,
  626. round_jiffies(jiffies + SLOW_WORK_OOM_TIMEOUT));
  627. } else {
  628. /* ratelimit the starting of new threads */
  629. mod_timer(&slow_work_oom_timer, jiffies + 1);
  630. }
  631. mutex_unlock(&slow_work_user_lock);
  632. }
  633. static const struct slow_work_ops slow_work_new_thread_ops = {
  634. .owner = THIS_MODULE,
  635. .execute = slow_work_new_thread_execute,
  636. #ifdef CONFIG_SLOW_WORK_PROC
  637. .desc = slow_work_new_thread_desc,
  638. #endif
  639. };
  640. /*
  641. * post-OOM new thread start suppression expiration
  642. */
  643. static void slow_work_oom_timeout(unsigned long data)
  644. {
  645. slow_work_may_not_start_new_thread = false;
  646. }
  647. #ifdef CONFIG_SYSCTL
  648. /*
  649. * Handle adjustment of the minimum number of threads
  650. */
  651. static int slow_work_min_threads_sysctl(struct ctl_table *table, int write,
  652. void __user *buffer,
  653. size_t *lenp, loff_t *ppos)
  654. {
  655. int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
  656. int n;
  657. if (ret == 0) {
  658. mutex_lock(&slow_work_user_lock);
  659. if (slow_work_user_count > 0) {
  660. /* see if we need to start or stop threads */
  661. n = atomic_read(&slow_work_thread_count) -
  662. slow_work_min_threads;
  663. if (n < 0 && !slow_work_may_not_start_new_thread)
  664. slow_work_enqueue(&slow_work_new_thread);
  665. else if (n > 0)
  666. slow_work_schedule_cull();
  667. }
  668. mutex_unlock(&slow_work_user_lock);
  669. }
  670. return ret;
  671. }
  672. /*
  673. * Handle adjustment of the maximum number of threads
  674. */
  675. static int slow_work_max_threads_sysctl(struct ctl_table *table, int write,
  676. void __user *buffer,
  677. size_t *lenp, loff_t *ppos)
  678. {
  679. int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
  680. int n;
  681. if (ret == 0) {
  682. mutex_lock(&slow_work_user_lock);
  683. if (slow_work_user_count > 0) {
  684. /* see if we need to stop threads */
  685. n = slow_work_max_threads -
  686. atomic_read(&slow_work_thread_count);
  687. if (n < 0)
  688. slow_work_schedule_cull();
  689. }
  690. mutex_unlock(&slow_work_user_lock);
  691. }
  692. return ret;
  693. }
  694. #endif /* CONFIG_SYSCTL */
  695. /**
  696. * slow_work_register_user - Register a user of the facility
  697. * @module: The module about to make use of the facility
  698. *
  699. * Register a user of the facility, starting up the initial threads if there
  700. * aren't any other users at this point. This will return 0 if successful, or
  701. * an error if not.
  702. */
  703. int slow_work_register_user(struct module *module)
  704. {
  705. struct task_struct *p;
  706. int loop;
  707. mutex_lock(&slow_work_user_lock);
  708. if (slow_work_user_count == 0) {
  709. printk(KERN_NOTICE "Slow work thread pool: Starting up\n");
  710. init_completion(&slow_work_last_thread_exited);
  711. slow_work_threads_should_exit = false;
  712. slow_work_init(&slow_work_new_thread,
  713. &slow_work_new_thread_ops);
  714. slow_work_may_not_start_new_thread = false;
  715. slow_work_cull = false;
  716. /* start the minimum number of threads */
  717. for (loop = 0; loop < slow_work_min_threads; loop++) {
  718. atomic_inc(&slow_work_thread_count);
  719. p = kthread_run(slow_work_thread, NULL, "kslowd");
  720. if (IS_ERR(p))
  721. goto error;
  722. }
  723. printk(KERN_NOTICE "Slow work thread pool: Ready\n");
  724. }
  725. slow_work_user_count++;
  726. mutex_unlock(&slow_work_user_lock);
  727. return 0;
  728. error:
  729. if (atomic_dec_and_test(&slow_work_thread_count))
  730. complete(&slow_work_last_thread_exited);
  731. if (loop > 0) {
  732. printk(KERN_ERR "Slow work thread pool:"
  733. " Aborting startup on ENOMEM\n");
  734. slow_work_threads_should_exit = true;
  735. wake_up_all(&slow_work_thread_wq);
  736. wait_for_completion(&slow_work_last_thread_exited);
  737. printk(KERN_ERR "Slow work thread pool: Aborted\n");
  738. }
  739. mutex_unlock(&slow_work_user_lock);
  740. return PTR_ERR(p);
  741. }
  742. EXPORT_SYMBOL(slow_work_register_user);
  743. /*
  744. * wait for all outstanding items from the calling module to complete
  745. * - note that more items may be queued whilst we're waiting
  746. */
  747. static void slow_work_wait_for_items(struct module *module)
  748. {
  749. DECLARE_WAITQUEUE(myself, current);
  750. struct slow_work *work;
  751. int loop;
  752. mutex_lock(&slow_work_unreg_sync_lock);
  753. add_wait_queue(&slow_work_unreg_wq, &myself);
  754. for (;;) {
  755. spin_lock_irq(&slow_work_queue_lock);
  756. /* first of all, we wait for the last queued item in each list
  757. * to be processed */
  758. list_for_each_entry_reverse(work, &vslow_work_queue, link) {
  759. if (work->owner == module) {
  760. set_current_state(TASK_UNINTERRUPTIBLE);
  761. slow_work_unreg_work_item = work;
  762. goto do_wait;
  763. }
  764. }
  765. list_for_each_entry_reverse(work, &slow_work_queue, link) {
  766. if (work->owner == module) {
  767. set_current_state(TASK_UNINTERRUPTIBLE);
  768. slow_work_unreg_work_item = work;
  769. goto do_wait;
  770. }
  771. }
  772. /* then we wait for the items being processed to finish */
  773. slow_work_unreg_module = module;
  774. smp_mb();
  775. for (loop = 0; loop < SLOW_WORK_THREAD_LIMIT; loop++) {
  776. if (slow_work_thread_processing[loop] == module)
  777. goto do_wait;
  778. }
  779. spin_unlock_irq(&slow_work_queue_lock);
  780. break; /* okay, we're done */
  781. do_wait:
  782. spin_unlock_irq(&slow_work_queue_lock);
  783. schedule();
  784. slow_work_unreg_work_item = NULL;
  785. slow_work_unreg_module = NULL;
  786. }
  787. remove_wait_queue(&slow_work_unreg_wq, &myself);
  788. mutex_unlock(&slow_work_unreg_sync_lock);
  789. }
  790. /**
  791. * slow_work_unregister_user - Unregister a user of the facility
  792. * @module: The module whose items should be cleared
  793. *
  794. * Unregister a user of the facility, killing all the threads if this was the
  795. * last one.
  796. *
  797. * This waits for all the work items belonging to the nominated module to go
  798. * away before proceeding.
  799. */
  800. void slow_work_unregister_user(struct module *module)
  801. {
  802. /* first of all, wait for all outstanding items from the calling module
  803. * to complete */
  804. if (module)
  805. slow_work_wait_for_items(module);
  806. /* then we can actually go about shutting down the facility if need
  807. * be */
  808. mutex_lock(&slow_work_user_lock);
  809. BUG_ON(slow_work_user_count <= 0);
  810. slow_work_user_count--;
  811. if (slow_work_user_count == 0) {
  812. printk(KERN_NOTICE "Slow work thread pool: Shutting down\n");
  813. slow_work_threads_should_exit = true;
  814. del_timer_sync(&slow_work_cull_timer);
  815. del_timer_sync(&slow_work_oom_timer);
  816. wake_up_all(&slow_work_thread_wq);
  817. wait_for_completion(&slow_work_last_thread_exited);
  818. printk(KERN_NOTICE "Slow work thread pool:"
  819. " Shut down complete\n");
  820. }
  821. mutex_unlock(&slow_work_user_lock);
  822. }
  823. EXPORT_SYMBOL(slow_work_unregister_user);
  824. /*
  825. * Initialise the slow work facility
  826. */
  827. static int __init init_slow_work(void)
  828. {
  829. unsigned nr_cpus = num_possible_cpus();
  830. if (slow_work_max_threads < nr_cpus)
  831. slow_work_max_threads = nr_cpus;
  832. #ifdef CONFIG_SYSCTL
  833. if (slow_work_max_max_threads < nr_cpus * 2)
  834. slow_work_max_max_threads = nr_cpus * 2;
  835. #endif
  836. #ifdef CONFIG_SLOW_WORK_PROC
  837. proc_create("slow_work_rq", S_IFREG | 0400, NULL,
  838. &slow_work_runqueue_fops);
  839. #endif
  840. return 0;
  841. }
  842. subsys_initcall(init_slow_work);