slow-work.c 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955
  1. /* Worker thread pool for slow items, such as filesystem lookups or mkdirs
  2. *
  3. * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
  4. * Written by David Howells (dhowells@redhat.com)
  5. *
  6. * This program is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU General Public Licence
  8. * as published by the Free Software Foundation; either version
  9. * 2 of the Licence, or (at your option) any later version.
  10. *
  11. * See Documentation/slow-work.txt
  12. */
  13. #include <linux/module.h>
  14. #include <linux/slow-work.h>
  15. #include <linux/kthread.h>
  16. #include <linux/freezer.h>
  17. #include <linux/wait.h>
  18. #define SLOW_WORK_CULL_TIMEOUT (5 * HZ) /* cull threads 5s after running out of
  19. * things to do */
  20. #define SLOW_WORK_OOM_TIMEOUT (5 * HZ) /* can't start new threads for 5s after
  21. * OOM */
  22. #define SLOW_WORK_THREAD_LIMIT 255 /* abs maximum number of slow-work threads */
  23. static void slow_work_cull_timeout(unsigned long);
  24. static void slow_work_oom_timeout(unsigned long);
  25. #ifdef CONFIG_SYSCTL
  26. static int slow_work_min_threads_sysctl(struct ctl_table *, int,
  27. void __user *, size_t *, loff_t *);
  28. static int slow_work_max_threads_sysctl(struct ctl_table *, int ,
  29. void __user *, size_t *, loff_t *);
  30. #endif
  31. /*
  32. * The pool of threads has at least min threads in it as long as someone is
  33. * using the facility, and may have as many as max.
  34. *
  35. * A portion of the pool may be processing very slow operations.
  36. */
  37. static unsigned slow_work_min_threads = 2;
  38. static unsigned slow_work_max_threads = 4;
  39. static unsigned vslow_work_proportion = 50; /* % of threads that may process
  40. * very slow work */
  41. #ifdef CONFIG_SYSCTL
  42. static const int slow_work_min_min_threads = 2;
  43. static int slow_work_max_max_threads = SLOW_WORK_THREAD_LIMIT;
  44. static const int slow_work_min_vslow = 1;
  45. static const int slow_work_max_vslow = 99;
  46. ctl_table slow_work_sysctls[] = {
  47. {
  48. .ctl_name = CTL_UNNUMBERED,
  49. .procname = "min-threads",
  50. .data = &slow_work_min_threads,
  51. .maxlen = sizeof(unsigned),
  52. .mode = 0644,
  53. .proc_handler = slow_work_min_threads_sysctl,
  54. .extra1 = (void *) &slow_work_min_min_threads,
  55. .extra2 = &slow_work_max_threads,
  56. },
  57. {
  58. .ctl_name = CTL_UNNUMBERED,
  59. .procname = "max-threads",
  60. .data = &slow_work_max_threads,
  61. .maxlen = sizeof(unsigned),
  62. .mode = 0644,
  63. .proc_handler = slow_work_max_threads_sysctl,
  64. .extra1 = &slow_work_min_threads,
  65. .extra2 = (void *) &slow_work_max_max_threads,
  66. },
  67. {
  68. .ctl_name = CTL_UNNUMBERED,
  69. .procname = "vslow-percentage",
  70. .data = &vslow_work_proportion,
  71. .maxlen = sizeof(unsigned),
  72. .mode = 0644,
  73. .proc_handler = &proc_dointvec_minmax,
  74. .extra1 = (void *) &slow_work_min_vslow,
  75. .extra2 = (void *) &slow_work_max_vslow,
  76. },
  77. { .ctl_name = 0 }
  78. };
  79. #endif
  80. /*
  81. * The active state of the thread pool
  82. */
  83. static atomic_t slow_work_thread_count;
  84. static atomic_t vslow_work_executing_count;
  85. static bool slow_work_may_not_start_new_thread;
  86. static bool slow_work_cull; /* cull a thread due to lack of activity */
  87. static DEFINE_TIMER(slow_work_cull_timer, slow_work_cull_timeout, 0, 0);
  88. static DEFINE_TIMER(slow_work_oom_timer, slow_work_oom_timeout, 0, 0);
  89. static struct slow_work slow_work_new_thread; /* new thread starter */
  90. /*
  91. * slow work ID allocation (use slow_work_queue_lock)
  92. */
  93. static DECLARE_BITMAP(slow_work_ids, SLOW_WORK_THREAD_LIMIT);
  94. /*
  95. * Unregistration tracking to prevent put_ref() from disappearing during module
  96. * unload
  97. */
  98. #ifdef CONFIG_MODULES
  99. static struct module *slow_work_thread_processing[SLOW_WORK_THREAD_LIMIT];
  100. static struct module *slow_work_unreg_module;
  101. static struct slow_work *slow_work_unreg_work_item;
  102. static DECLARE_WAIT_QUEUE_HEAD(slow_work_unreg_wq);
  103. static DEFINE_MUTEX(slow_work_unreg_sync_lock);
  104. #endif
  105. /*
  106. * The queues of work items and the lock governing access to them. These are
  107. * shared between all the CPUs. It doesn't make sense to have per-CPU queues
  108. * as the number of threads bears no relation to the number of CPUs.
  109. *
  110. * There are two queues of work items: one for slow work items, and one for
  111. * very slow work items.
  112. */
  113. static LIST_HEAD(slow_work_queue);
  114. static LIST_HEAD(vslow_work_queue);
  115. static DEFINE_SPINLOCK(slow_work_queue_lock);
  116. /*
  117. * The thread controls. A variable used to signal to the threads that they
  118. * should exit when the queue is empty, a waitqueue used by the threads to wait
  119. * for signals, and a completion set by the last thread to exit.
  120. */
  121. static bool slow_work_threads_should_exit;
  122. static DECLARE_WAIT_QUEUE_HEAD(slow_work_thread_wq);
  123. static DECLARE_COMPLETION(slow_work_last_thread_exited);
  124. /*
  125. * The number of users of the thread pool and its lock. Whilst this is zero we
  126. * have no threads hanging around, and when this reaches zero, we wait for all
  127. * active or queued work items to complete and kill all the threads we do have.
  128. */
  129. static int slow_work_user_count;
  130. static DEFINE_MUTEX(slow_work_user_lock);
  131. static inline int slow_work_get_ref(struct slow_work *work)
  132. {
  133. if (work->ops->get_ref)
  134. return work->ops->get_ref(work);
  135. return 0;
  136. }
  137. static inline void slow_work_put_ref(struct slow_work *work)
  138. {
  139. if (work->ops->put_ref)
  140. work->ops->put_ref(work);
  141. }
  142. /*
  143. * Calculate the maximum number of active threads in the pool that are
  144. * permitted to process very slow work items.
  145. *
  146. * The answer is rounded up to at least 1, but may not equal or exceed the
  147. * maximum number of the threads in the pool. This means we always have at
  148. * least one thread that can process slow work items, and we always have at
  149. * least one thread that won't get tied up doing so.
  150. */
  151. static unsigned slow_work_calc_vsmax(void)
  152. {
  153. unsigned vsmax;
  154. vsmax = atomic_read(&slow_work_thread_count) * vslow_work_proportion;
  155. vsmax /= 100;
  156. vsmax = max(vsmax, 1U);
  157. return min(vsmax, slow_work_max_threads - 1);
  158. }
  159. /*
  160. * Attempt to execute stuff queued on a slow thread. Return true if we managed
  161. * it, false if there was nothing to do.
  162. */
  163. static bool slow_work_execute(int id)
  164. {
  165. #ifdef CONFIG_MODULES
  166. struct module *module;
  167. #endif
  168. struct slow_work *work = NULL;
  169. unsigned vsmax;
  170. bool very_slow;
  171. vsmax = slow_work_calc_vsmax();
  172. /* see if we can schedule a new thread to be started if we're not
  173. * keeping up with the work */
  174. if (!waitqueue_active(&slow_work_thread_wq) &&
  175. (!list_empty(&slow_work_queue) || !list_empty(&vslow_work_queue)) &&
  176. atomic_read(&slow_work_thread_count) < slow_work_max_threads &&
  177. !slow_work_may_not_start_new_thread)
  178. slow_work_enqueue(&slow_work_new_thread);
  179. /* find something to execute */
  180. spin_lock_irq(&slow_work_queue_lock);
  181. if (!list_empty(&vslow_work_queue) &&
  182. atomic_read(&vslow_work_executing_count) < vsmax) {
  183. work = list_entry(vslow_work_queue.next,
  184. struct slow_work, link);
  185. if (test_and_set_bit_lock(SLOW_WORK_EXECUTING, &work->flags))
  186. BUG();
  187. list_del_init(&work->link);
  188. atomic_inc(&vslow_work_executing_count);
  189. very_slow = true;
  190. } else if (!list_empty(&slow_work_queue)) {
  191. work = list_entry(slow_work_queue.next,
  192. struct slow_work, link);
  193. if (test_and_set_bit_lock(SLOW_WORK_EXECUTING, &work->flags))
  194. BUG();
  195. list_del_init(&work->link);
  196. very_slow = false;
  197. } else {
  198. very_slow = false; /* avoid the compiler warning */
  199. }
  200. #ifdef CONFIG_MODULES
  201. if (work)
  202. slow_work_thread_processing[id] = work->owner;
  203. #endif
  204. spin_unlock_irq(&slow_work_queue_lock);
  205. if (!work)
  206. return false;
  207. if (!test_and_clear_bit(SLOW_WORK_PENDING, &work->flags))
  208. BUG();
  209. /* don't execute if the work is in the process of being cancelled */
  210. if (!test_bit(SLOW_WORK_CANCELLING, &work->flags))
  211. work->ops->execute(work);
  212. if (very_slow)
  213. atomic_dec(&vslow_work_executing_count);
  214. clear_bit_unlock(SLOW_WORK_EXECUTING, &work->flags);
  215. /* wake up anyone waiting for this work to be complete */
  216. wake_up_bit(&work->flags, SLOW_WORK_EXECUTING);
  217. /* if someone tried to enqueue the item whilst we were executing it,
  218. * then it'll be left unenqueued to avoid multiple threads trying to
  219. * execute it simultaneously
  220. *
  221. * there is, however, a race between us testing the pending flag and
  222. * getting the spinlock, and between the enqueuer setting the pending
  223. * flag and getting the spinlock, so we use a deferral bit to tell us
  224. * if the enqueuer got there first
  225. */
  226. if (test_bit(SLOW_WORK_PENDING, &work->flags)) {
  227. spin_lock_irq(&slow_work_queue_lock);
  228. if (!test_bit(SLOW_WORK_EXECUTING, &work->flags) &&
  229. test_and_clear_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags))
  230. goto auto_requeue;
  231. spin_unlock_irq(&slow_work_queue_lock);
  232. }
  233. /* sort out the race between module unloading and put_ref() */
  234. slow_work_put_ref(work);
  235. #ifdef CONFIG_MODULES
  236. module = slow_work_thread_processing[id];
  237. slow_work_thread_processing[id] = NULL;
  238. smp_mb();
  239. if (slow_work_unreg_work_item == work ||
  240. slow_work_unreg_module == module)
  241. wake_up_all(&slow_work_unreg_wq);
  242. #endif
  243. return true;
  244. auto_requeue:
  245. /* we must complete the enqueue operation
  246. * - we transfer our ref on the item back to the appropriate queue
  247. * - don't wake another thread up as we're awake already
  248. */
  249. if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags))
  250. list_add_tail(&work->link, &vslow_work_queue);
  251. else
  252. list_add_tail(&work->link, &slow_work_queue);
  253. spin_unlock_irq(&slow_work_queue_lock);
  254. slow_work_thread_processing[id] = NULL;
  255. return true;
  256. }
  257. /**
  258. * slow_work_enqueue - Schedule a slow work item for processing
  259. * @work: The work item to queue
  260. *
  261. * Schedule a slow work item for processing. If the item is already undergoing
  262. * execution, this guarantees not to re-enter the execution routine until the
  263. * first execution finishes.
  264. *
  265. * The item is pinned by this function as it retains a reference to it, managed
  266. * through the item operations. The item is unpinned once it has been
  267. * executed.
  268. *
  269. * An item may hog the thread that is running it for a relatively large amount
  270. * of time, sufficient, for example, to perform several lookup, mkdir, create
  271. * and setxattr operations. It may sleep on I/O and may sleep to obtain locks.
  272. *
  273. * Conversely, if a number of items are awaiting processing, it may take some
  274. * time before any given item is given attention. The number of threads in the
  275. * pool may be increased to deal with demand, but only up to a limit.
  276. *
  277. * If SLOW_WORK_VERY_SLOW is set on the work item, then it will be placed in
  278. * the very slow queue, from which only a portion of the threads will be
  279. * allowed to pick items to execute. This ensures that very slow items won't
  280. * overly block ones that are just ordinarily slow.
  281. *
  282. * Returns 0 if successful, -EAGAIN if not (or -ECANCELED if cancelled work is
  283. * attempted queued)
  284. */
  285. int slow_work_enqueue(struct slow_work *work)
  286. {
  287. unsigned long flags;
  288. int ret;
  289. if (test_bit(SLOW_WORK_CANCELLING, &work->flags))
  290. return -ECANCELED;
  291. BUG_ON(slow_work_user_count <= 0);
  292. BUG_ON(!work);
  293. BUG_ON(!work->ops);
  294. /* when honouring an enqueue request, we only promise that we will run
  295. * the work function in the future; we do not promise to run it once
  296. * per enqueue request
  297. *
  298. * we use the PENDING bit to merge together repeat requests without
  299. * having to disable IRQs and take the spinlock, whilst still
  300. * maintaining our promise
  301. */
  302. if (!test_and_set_bit_lock(SLOW_WORK_PENDING, &work->flags)) {
  303. spin_lock_irqsave(&slow_work_queue_lock, flags);
  304. if (unlikely(test_bit(SLOW_WORK_CANCELLING, &work->flags)))
  305. goto cancelled;
  306. /* we promise that we will not attempt to execute the work
  307. * function in more than one thread simultaneously
  308. *
  309. * this, however, leaves us with a problem if we're asked to
  310. * enqueue the work whilst someone is executing the work
  311. * function as simply queueing the work immediately means that
  312. * another thread may try executing it whilst it is already
  313. * under execution
  314. *
  315. * to deal with this, we set the ENQ_DEFERRED bit instead of
  316. * enqueueing, and the thread currently executing the work
  317. * function will enqueue the work item when the work function
  318. * returns and it has cleared the EXECUTING bit
  319. */
  320. if (test_bit(SLOW_WORK_EXECUTING, &work->flags)) {
  321. set_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags);
  322. } else {
  323. ret = slow_work_get_ref(work);
  324. if (ret < 0)
  325. goto failed;
  326. if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags))
  327. list_add_tail(&work->link, &vslow_work_queue);
  328. else
  329. list_add_tail(&work->link, &slow_work_queue);
  330. wake_up(&slow_work_thread_wq);
  331. }
  332. spin_unlock_irqrestore(&slow_work_queue_lock, flags);
  333. }
  334. return 0;
  335. cancelled:
  336. ret = -ECANCELED;
  337. failed:
  338. spin_unlock_irqrestore(&slow_work_queue_lock, flags);
  339. return ret;
  340. }
  341. EXPORT_SYMBOL(slow_work_enqueue);
  342. static int slow_work_wait(void *word)
  343. {
  344. schedule();
  345. return 0;
  346. }
  347. /**
  348. * slow_work_cancel - Cancel a slow work item
  349. * @work: The work item to cancel
  350. *
  351. * This function will cancel a previously enqueued work item. If we cannot
  352. * cancel the work item, it is guarenteed to have run when this function
  353. * returns.
  354. */
  355. void slow_work_cancel(struct slow_work *work)
  356. {
  357. bool wait = true, put = false;
  358. set_bit(SLOW_WORK_CANCELLING, &work->flags);
  359. smp_mb();
  360. /* if the work item is a delayed work item with an active timer, we
  361. * need to wait for the timer to finish _before_ getting the spinlock,
  362. * lest we deadlock against the timer routine
  363. *
  364. * the timer routine will leave DELAYED set if it notices the
  365. * CANCELLING flag in time
  366. */
  367. if (test_bit(SLOW_WORK_DELAYED, &work->flags)) {
  368. struct delayed_slow_work *dwork =
  369. container_of(work, struct delayed_slow_work, work);
  370. del_timer_sync(&dwork->timer);
  371. }
  372. spin_lock_irq(&slow_work_queue_lock);
  373. if (test_bit(SLOW_WORK_DELAYED, &work->flags)) {
  374. /* the timer routine aborted or never happened, so we are left
  375. * holding the timer's reference on the item and should just
  376. * drop the pending flag and wait for any ongoing execution to
  377. * finish */
  378. struct delayed_slow_work *dwork =
  379. container_of(work, struct delayed_slow_work, work);
  380. BUG_ON(timer_pending(&dwork->timer));
  381. BUG_ON(!list_empty(&work->link));
  382. clear_bit(SLOW_WORK_DELAYED, &work->flags);
  383. put = true;
  384. clear_bit(SLOW_WORK_PENDING, &work->flags);
  385. } else if (test_bit(SLOW_WORK_PENDING, &work->flags) &&
  386. !list_empty(&work->link)) {
  387. /* the link in the pending queue holds a reference on the item
  388. * that we will need to release */
  389. list_del_init(&work->link);
  390. wait = false;
  391. put = true;
  392. clear_bit(SLOW_WORK_PENDING, &work->flags);
  393. } else if (test_and_clear_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags)) {
  394. /* the executor is holding our only reference on the item, so
  395. * we merely need to wait for it to finish executing */
  396. clear_bit(SLOW_WORK_PENDING, &work->flags);
  397. }
  398. spin_unlock_irq(&slow_work_queue_lock);
  399. /* the EXECUTING flag is set by the executor whilst the spinlock is set
  400. * and before the item is dequeued - so assuming the above doesn't
  401. * actually dequeue it, simply waiting for the EXECUTING flag to be
  402. * released here should be sufficient */
  403. if (wait)
  404. wait_on_bit(&work->flags, SLOW_WORK_EXECUTING, slow_work_wait,
  405. TASK_UNINTERRUPTIBLE);
  406. clear_bit(SLOW_WORK_CANCELLING, &work->flags);
  407. if (put)
  408. slow_work_put_ref(work);
  409. }
  410. EXPORT_SYMBOL(slow_work_cancel);
  411. /*
  412. * Handle expiry of the delay timer, indicating that a delayed slow work item
  413. * should now be queued if not cancelled
  414. */
  415. static void delayed_slow_work_timer(unsigned long data)
  416. {
  417. struct slow_work *work = (struct slow_work *) data;
  418. unsigned long flags;
  419. bool queued = false, put = false;
  420. spin_lock_irqsave(&slow_work_queue_lock, flags);
  421. if (likely(!test_bit(SLOW_WORK_CANCELLING, &work->flags))) {
  422. clear_bit(SLOW_WORK_DELAYED, &work->flags);
  423. if (test_bit(SLOW_WORK_EXECUTING, &work->flags)) {
  424. /* we discard the reference the timer was holding in
  425. * favour of the one the executor holds */
  426. set_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags);
  427. put = true;
  428. } else {
  429. if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags))
  430. list_add_tail(&work->link, &vslow_work_queue);
  431. else
  432. list_add_tail(&work->link, &slow_work_queue);
  433. queued = true;
  434. }
  435. }
  436. spin_unlock_irqrestore(&slow_work_queue_lock, flags);
  437. if (put)
  438. slow_work_put_ref(work);
  439. if (queued)
  440. wake_up(&slow_work_thread_wq);
  441. }
  442. /**
  443. * delayed_slow_work_enqueue - Schedule a delayed slow work item for processing
  444. * @dwork: The delayed work item to queue
  445. * @delay: When to start executing the work, in jiffies from now
  446. *
  447. * This is similar to slow_work_enqueue(), but it adds a delay before the work
  448. * is actually queued for processing.
  449. *
  450. * The item can have delayed processing requested on it whilst it is being
  451. * executed. The delay will begin immediately, and if it expires before the
  452. * item finishes executing, the item will be placed back on the queue when it
  453. * has done executing.
  454. */
  455. int delayed_slow_work_enqueue(struct delayed_slow_work *dwork,
  456. unsigned long delay)
  457. {
  458. struct slow_work *work = &dwork->work;
  459. unsigned long flags;
  460. int ret;
  461. if (delay == 0)
  462. return slow_work_enqueue(&dwork->work);
  463. BUG_ON(slow_work_user_count <= 0);
  464. BUG_ON(!work);
  465. BUG_ON(!work->ops);
  466. if (test_bit(SLOW_WORK_CANCELLING, &work->flags))
  467. return -ECANCELED;
  468. if (!test_and_set_bit_lock(SLOW_WORK_PENDING, &work->flags)) {
  469. spin_lock_irqsave(&slow_work_queue_lock, flags);
  470. if (test_bit(SLOW_WORK_CANCELLING, &work->flags))
  471. goto cancelled;
  472. /* the timer holds a reference whilst it is pending */
  473. ret = work->ops->get_ref(work);
  474. if (ret < 0)
  475. goto cant_get_ref;
  476. if (test_and_set_bit(SLOW_WORK_DELAYED, &work->flags))
  477. BUG();
  478. dwork->timer.expires = jiffies + delay;
  479. dwork->timer.data = (unsigned long) work;
  480. dwork->timer.function = delayed_slow_work_timer;
  481. add_timer(&dwork->timer);
  482. spin_unlock_irqrestore(&slow_work_queue_lock, flags);
  483. }
  484. return 0;
  485. cancelled:
  486. ret = -ECANCELED;
  487. cant_get_ref:
  488. spin_unlock_irqrestore(&slow_work_queue_lock, flags);
  489. return ret;
  490. }
  491. EXPORT_SYMBOL(delayed_slow_work_enqueue);
  492. /*
  493. * Schedule a cull of the thread pool at some time in the near future
  494. */
  495. static void slow_work_schedule_cull(void)
  496. {
  497. mod_timer(&slow_work_cull_timer,
  498. round_jiffies(jiffies + SLOW_WORK_CULL_TIMEOUT));
  499. }
  500. /*
  501. * Worker thread culling algorithm
  502. */
  503. static bool slow_work_cull_thread(void)
  504. {
  505. unsigned long flags;
  506. bool do_cull = false;
  507. spin_lock_irqsave(&slow_work_queue_lock, flags);
  508. if (slow_work_cull) {
  509. slow_work_cull = false;
  510. if (list_empty(&slow_work_queue) &&
  511. list_empty(&vslow_work_queue) &&
  512. atomic_read(&slow_work_thread_count) >
  513. slow_work_min_threads) {
  514. slow_work_schedule_cull();
  515. do_cull = true;
  516. }
  517. }
  518. spin_unlock_irqrestore(&slow_work_queue_lock, flags);
  519. return do_cull;
  520. }
  521. /*
  522. * Determine if there is slow work available for dispatch
  523. */
  524. static inline bool slow_work_available(int vsmax)
  525. {
  526. return !list_empty(&slow_work_queue) ||
  527. (!list_empty(&vslow_work_queue) &&
  528. atomic_read(&vslow_work_executing_count) < vsmax);
  529. }
  530. /*
  531. * Worker thread dispatcher
  532. */
  533. static int slow_work_thread(void *_data)
  534. {
  535. int vsmax, id;
  536. DEFINE_WAIT(wait);
  537. set_freezable();
  538. set_user_nice(current, -5);
  539. /* allocate ourselves an ID */
  540. spin_lock_irq(&slow_work_queue_lock);
  541. id = find_first_zero_bit(slow_work_ids, SLOW_WORK_THREAD_LIMIT);
  542. BUG_ON(id < 0 || id >= SLOW_WORK_THREAD_LIMIT);
  543. __set_bit(id, slow_work_ids);
  544. spin_unlock_irq(&slow_work_queue_lock);
  545. sprintf(current->comm, "kslowd%03u", id);
  546. for (;;) {
  547. vsmax = vslow_work_proportion;
  548. vsmax *= atomic_read(&slow_work_thread_count);
  549. vsmax /= 100;
  550. prepare_to_wait_exclusive(&slow_work_thread_wq, &wait,
  551. TASK_INTERRUPTIBLE);
  552. if (!freezing(current) &&
  553. !slow_work_threads_should_exit &&
  554. !slow_work_available(vsmax) &&
  555. !slow_work_cull)
  556. schedule();
  557. finish_wait(&slow_work_thread_wq, &wait);
  558. try_to_freeze();
  559. vsmax = vslow_work_proportion;
  560. vsmax *= atomic_read(&slow_work_thread_count);
  561. vsmax /= 100;
  562. if (slow_work_available(vsmax) && slow_work_execute(id)) {
  563. cond_resched();
  564. if (list_empty(&slow_work_queue) &&
  565. list_empty(&vslow_work_queue) &&
  566. atomic_read(&slow_work_thread_count) >
  567. slow_work_min_threads)
  568. slow_work_schedule_cull();
  569. continue;
  570. }
  571. if (slow_work_threads_should_exit)
  572. break;
  573. if (slow_work_cull && slow_work_cull_thread())
  574. break;
  575. }
  576. spin_lock_irq(&slow_work_queue_lock);
  577. __clear_bit(id, slow_work_ids);
  578. spin_unlock_irq(&slow_work_queue_lock);
  579. if (atomic_dec_and_test(&slow_work_thread_count))
  580. complete_and_exit(&slow_work_last_thread_exited, 0);
  581. return 0;
  582. }
  583. /*
  584. * Handle thread cull timer expiration
  585. */
  586. static void slow_work_cull_timeout(unsigned long data)
  587. {
  588. slow_work_cull = true;
  589. wake_up(&slow_work_thread_wq);
  590. }
  591. /*
  592. * Start a new slow work thread
  593. */
  594. static void slow_work_new_thread_execute(struct slow_work *work)
  595. {
  596. struct task_struct *p;
  597. if (slow_work_threads_should_exit)
  598. return;
  599. if (atomic_read(&slow_work_thread_count) >= slow_work_max_threads)
  600. return;
  601. if (!mutex_trylock(&slow_work_user_lock))
  602. return;
  603. slow_work_may_not_start_new_thread = true;
  604. atomic_inc(&slow_work_thread_count);
  605. p = kthread_run(slow_work_thread, NULL, "kslowd");
  606. if (IS_ERR(p)) {
  607. printk(KERN_DEBUG "Slow work thread pool: OOM\n");
  608. if (atomic_dec_and_test(&slow_work_thread_count))
  609. BUG(); /* we're running on a slow work thread... */
  610. mod_timer(&slow_work_oom_timer,
  611. round_jiffies(jiffies + SLOW_WORK_OOM_TIMEOUT));
  612. } else {
  613. /* ratelimit the starting of new threads */
  614. mod_timer(&slow_work_oom_timer, jiffies + 1);
  615. }
  616. mutex_unlock(&slow_work_user_lock);
  617. }
  618. static const struct slow_work_ops slow_work_new_thread_ops = {
  619. .owner = THIS_MODULE,
  620. .execute = slow_work_new_thread_execute,
  621. };
  622. /*
  623. * post-OOM new thread start suppression expiration
  624. */
  625. static void slow_work_oom_timeout(unsigned long data)
  626. {
  627. slow_work_may_not_start_new_thread = false;
  628. }
  629. #ifdef CONFIG_SYSCTL
  630. /*
  631. * Handle adjustment of the minimum number of threads
  632. */
  633. static int slow_work_min_threads_sysctl(struct ctl_table *table, int write,
  634. void __user *buffer,
  635. size_t *lenp, loff_t *ppos)
  636. {
  637. int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
  638. int n;
  639. if (ret == 0) {
  640. mutex_lock(&slow_work_user_lock);
  641. if (slow_work_user_count > 0) {
  642. /* see if we need to start or stop threads */
  643. n = atomic_read(&slow_work_thread_count) -
  644. slow_work_min_threads;
  645. if (n < 0 && !slow_work_may_not_start_new_thread)
  646. slow_work_enqueue(&slow_work_new_thread);
  647. else if (n > 0)
  648. slow_work_schedule_cull();
  649. }
  650. mutex_unlock(&slow_work_user_lock);
  651. }
  652. return ret;
  653. }
  654. /*
  655. * Handle adjustment of the maximum number of threads
  656. */
  657. static int slow_work_max_threads_sysctl(struct ctl_table *table, int write,
  658. void __user *buffer,
  659. size_t *lenp, loff_t *ppos)
  660. {
  661. int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
  662. int n;
  663. if (ret == 0) {
  664. mutex_lock(&slow_work_user_lock);
  665. if (slow_work_user_count > 0) {
  666. /* see if we need to stop threads */
  667. n = slow_work_max_threads -
  668. atomic_read(&slow_work_thread_count);
  669. if (n < 0)
  670. slow_work_schedule_cull();
  671. }
  672. mutex_unlock(&slow_work_user_lock);
  673. }
  674. return ret;
  675. }
  676. #endif /* CONFIG_SYSCTL */
  677. /**
  678. * slow_work_register_user - Register a user of the facility
  679. * @module: The module about to make use of the facility
  680. *
  681. * Register a user of the facility, starting up the initial threads if there
  682. * aren't any other users at this point. This will return 0 if successful, or
  683. * an error if not.
  684. */
  685. int slow_work_register_user(struct module *module)
  686. {
  687. struct task_struct *p;
  688. int loop;
  689. mutex_lock(&slow_work_user_lock);
  690. if (slow_work_user_count == 0) {
  691. printk(KERN_NOTICE "Slow work thread pool: Starting up\n");
  692. init_completion(&slow_work_last_thread_exited);
  693. slow_work_threads_should_exit = false;
  694. slow_work_init(&slow_work_new_thread,
  695. &slow_work_new_thread_ops);
  696. slow_work_may_not_start_new_thread = false;
  697. slow_work_cull = false;
  698. /* start the minimum number of threads */
  699. for (loop = 0; loop < slow_work_min_threads; loop++) {
  700. atomic_inc(&slow_work_thread_count);
  701. p = kthread_run(slow_work_thread, NULL, "kslowd");
  702. if (IS_ERR(p))
  703. goto error;
  704. }
  705. printk(KERN_NOTICE "Slow work thread pool: Ready\n");
  706. }
  707. slow_work_user_count++;
  708. mutex_unlock(&slow_work_user_lock);
  709. return 0;
  710. error:
  711. if (atomic_dec_and_test(&slow_work_thread_count))
  712. complete(&slow_work_last_thread_exited);
  713. if (loop > 0) {
  714. printk(KERN_ERR "Slow work thread pool:"
  715. " Aborting startup on ENOMEM\n");
  716. slow_work_threads_should_exit = true;
  717. wake_up_all(&slow_work_thread_wq);
  718. wait_for_completion(&slow_work_last_thread_exited);
  719. printk(KERN_ERR "Slow work thread pool: Aborted\n");
  720. }
  721. mutex_unlock(&slow_work_user_lock);
  722. return PTR_ERR(p);
  723. }
  724. EXPORT_SYMBOL(slow_work_register_user);
  725. /*
  726. * wait for all outstanding items from the calling module to complete
  727. * - note that more items may be queued whilst we're waiting
  728. */
  729. static void slow_work_wait_for_items(struct module *module)
  730. {
  731. DECLARE_WAITQUEUE(myself, current);
  732. struct slow_work *work;
  733. int loop;
  734. mutex_lock(&slow_work_unreg_sync_lock);
  735. add_wait_queue(&slow_work_unreg_wq, &myself);
  736. for (;;) {
  737. spin_lock_irq(&slow_work_queue_lock);
  738. /* first of all, we wait for the last queued item in each list
  739. * to be processed */
  740. list_for_each_entry_reverse(work, &vslow_work_queue, link) {
  741. if (work->owner == module) {
  742. set_current_state(TASK_UNINTERRUPTIBLE);
  743. slow_work_unreg_work_item = work;
  744. goto do_wait;
  745. }
  746. }
  747. list_for_each_entry_reverse(work, &slow_work_queue, link) {
  748. if (work->owner == module) {
  749. set_current_state(TASK_UNINTERRUPTIBLE);
  750. slow_work_unreg_work_item = work;
  751. goto do_wait;
  752. }
  753. }
  754. /* then we wait for the items being processed to finish */
  755. slow_work_unreg_module = module;
  756. smp_mb();
  757. for (loop = 0; loop < SLOW_WORK_THREAD_LIMIT; loop++) {
  758. if (slow_work_thread_processing[loop] == module)
  759. goto do_wait;
  760. }
  761. spin_unlock_irq(&slow_work_queue_lock);
  762. break; /* okay, we're done */
  763. do_wait:
  764. spin_unlock_irq(&slow_work_queue_lock);
  765. schedule();
  766. slow_work_unreg_work_item = NULL;
  767. slow_work_unreg_module = NULL;
  768. }
  769. remove_wait_queue(&slow_work_unreg_wq, &myself);
  770. mutex_unlock(&slow_work_unreg_sync_lock);
  771. }
  772. /**
  773. * slow_work_unregister_user - Unregister a user of the facility
  774. * @module: The module whose items should be cleared
  775. *
  776. * Unregister a user of the facility, killing all the threads if this was the
  777. * last one.
  778. *
  779. * This waits for all the work items belonging to the nominated module to go
  780. * away before proceeding.
  781. */
  782. void slow_work_unregister_user(struct module *module)
  783. {
  784. /* first of all, wait for all outstanding items from the calling module
  785. * to complete */
  786. if (module)
  787. slow_work_wait_for_items(module);
  788. /* then we can actually go about shutting down the facility if need
  789. * be */
  790. mutex_lock(&slow_work_user_lock);
  791. BUG_ON(slow_work_user_count <= 0);
  792. slow_work_user_count--;
  793. if (slow_work_user_count == 0) {
  794. printk(KERN_NOTICE "Slow work thread pool: Shutting down\n");
  795. slow_work_threads_should_exit = true;
  796. del_timer_sync(&slow_work_cull_timer);
  797. del_timer_sync(&slow_work_oom_timer);
  798. wake_up_all(&slow_work_thread_wq);
  799. wait_for_completion(&slow_work_last_thread_exited);
  800. printk(KERN_NOTICE "Slow work thread pool:"
  801. " Shut down complete\n");
  802. }
  803. mutex_unlock(&slow_work_user_lock);
  804. }
  805. EXPORT_SYMBOL(slow_work_unregister_user);
  806. /*
  807. * Initialise the slow work facility
  808. */
  809. static int __init init_slow_work(void)
  810. {
  811. unsigned nr_cpus = num_possible_cpus();
  812. if (slow_work_max_threads < nr_cpus)
  813. slow_work_max_threads = nr_cpus;
  814. #ifdef CONFIG_SYSCTL
  815. if (slow_work_max_max_threads < nr_cpus * 2)
  816. slow_work_max_max_threads = nr_cpus * 2;
  817. #endif
  818. return 0;
  819. }
  820. subsys_initcall(init_slow_work);