slow-work.c 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068
  1. /* Worker thread pool for slow items, such as filesystem lookups or mkdirs
  2. *
  3. * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
  4. * Written by David Howells (dhowells@redhat.com)
  5. *
  6. * This program is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU General Public Licence
  8. * as published by the Free Software Foundation; either version
  9. * 2 of the Licence, or (at your option) any later version.
  10. *
  11. * See Documentation/slow-work.txt
  12. */
  13. #include <linux/module.h>
  14. #include <linux/slow-work.h>
  15. #include <linux/kthread.h>
  16. #include <linux/freezer.h>
  17. #include <linux/wait.h>
  18. #include <linux/debugfs.h>
  19. #include "slow-work.h"
  20. static void slow_work_cull_timeout(unsigned long);
  21. static void slow_work_oom_timeout(unsigned long);
  22. #ifdef CONFIG_SYSCTL
  23. static int slow_work_min_threads_sysctl(struct ctl_table *, int,
  24. void __user *, size_t *, loff_t *);
  25. static int slow_work_max_threads_sysctl(struct ctl_table *, int ,
  26. void __user *, size_t *, loff_t *);
  27. #endif
  28. /*
  29. * The pool of threads has at least min threads in it as long as someone is
  30. * using the facility, and may have as many as max.
  31. *
  32. * A portion of the pool may be processing very slow operations.
  33. */
  34. static unsigned slow_work_min_threads = 2;
  35. static unsigned slow_work_max_threads = 4;
  36. static unsigned vslow_work_proportion = 50; /* % of threads that may process
  37. * very slow work */
  38. #ifdef CONFIG_SYSCTL
  39. static const int slow_work_min_min_threads = 2;
  40. static int slow_work_max_max_threads = SLOW_WORK_THREAD_LIMIT;
  41. static const int slow_work_min_vslow = 1;
  42. static const int slow_work_max_vslow = 99;
  43. ctl_table slow_work_sysctls[] = {
  44. {
  45. .procname = "min-threads",
  46. .data = &slow_work_min_threads,
  47. .maxlen = sizeof(unsigned),
  48. .mode = 0644,
  49. .proc_handler = slow_work_min_threads_sysctl,
  50. .extra1 = (void *) &slow_work_min_min_threads,
  51. .extra2 = &slow_work_max_threads,
  52. },
  53. {
  54. .procname = "max-threads",
  55. .data = &slow_work_max_threads,
  56. .maxlen = sizeof(unsigned),
  57. .mode = 0644,
  58. .proc_handler = slow_work_max_threads_sysctl,
  59. .extra1 = &slow_work_min_threads,
  60. .extra2 = (void *) &slow_work_max_max_threads,
  61. },
  62. {
  63. .procname = "vslow-percentage",
  64. .data = &vslow_work_proportion,
  65. .maxlen = sizeof(unsigned),
  66. .mode = 0644,
  67. .proc_handler = proc_dointvec_minmax,
  68. .extra1 = (void *) &slow_work_min_vslow,
  69. .extra2 = (void *) &slow_work_max_vslow,
  70. },
  71. {}
  72. };
  73. #endif
  74. /*
  75. * The active state of the thread pool
  76. */
  77. static atomic_t slow_work_thread_count;
  78. static atomic_t vslow_work_executing_count;
  79. static bool slow_work_may_not_start_new_thread;
  80. static bool slow_work_cull; /* cull a thread due to lack of activity */
  81. static DEFINE_TIMER(slow_work_cull_timer, slow_work_cull_timeout, 0, 0);
  82. static DEFINE_TIMER(slow_work_oom_timer, slow_work_oom_timeout, 0, 0);
  83. static struct slow_work slow_work_new_thread; /* new thread starter */
  84. /*
  85. * slow work ID allocation (use slow_work_queue_lock)
  86. */
  87. static DECLARE_BITMAP(slow_work_ids, SLOW_WORK_THREAD_LIMIT);
  88. /*
  89. * Unregistration tracking to prevent put_ref() from disappearing during module
  90. * unload
  91. */
  92. #ifdef CONFIG_MODULES
  93. static struct module *slow_work_thread_processing[SLOW_WORK_THREAD_LIMIT];
  94. static struct module *slow_work_unreg_module;
  95. static struct slow_work *slow_work_unreg_work_item;
  96. static DECLARE_WAIT_QUEUE_HEAD(slow_work_unreg_wq);
  97. static DEFINE_MUTEX(slow_work_unreg_sync_lock);
  98. static void slow_work_set_thread_processing(int id, struct slow_work *work)
  99. {
  100. if (work)
  101. slow_work_thread_processing[id] = work->owner;
  102. }
  103. static void slow_work_done_thread_processing(int id, struct slow_work *work)
  104. {
  105. struct module *module = slow_work_thread_processing[id];
  106. slow_work_thread_processing[id] = NULL;
  107. smp_mb();
  108. if (slow_work_unreg_work_item == work ||
  109. slow_work_unreg_module == module)
  110. wake_up_all(&slow_work_unreg_wq);
  111. }
  112. static void slow_work_clear_thread_processing(int id)
  113. {
  114. slow_work_thread_processing[id] = NULL;
  115. }
  116. #else
  117. static void slow_work_set_thread_processing(int id, struct slow_work *work) {}
  118. static void slow_work_done_thread_processing(int id, struct slow_work *work) {}
  119. static void slow_work_clear_thread_processing(int id) {}
  120. #endif
  121. /*
  122. * Data for tracking currently executing items for indication through /proc
  123. */
  124. #ifdef CONFIG_SLOW_WORK_DEBUG
  125. struct slow_work *slow_work_execs[SLOW_WORK_THREAD_LIMIT];
  126. pid_t slow_work_pids[SLOW_WORK_THREAD_LIMIT];
  127. DEFINE_RWLOCK(slow_work_execs_lock);
  128. #endif
  129. /*
  130. * The queues of work items and the lock governing access to them. These are
  131. * shared between all the CPUs. It doesn't make sense to have per-CPU queues
  132. * as the number of threads bears no relation to the number of CPUs.
  133. *
  134. * There are two queues of work items: one for slow work items, and one for
  135. * very slow work items.
  136. */
  137. LIST_HEAD(slow_work_queue);
  138. LIST_HEAD(vslow_work_queue);
  139. DEFINE_SPINLOCK(slow_work_queue_lock);
  140. /*
  141. * The following are two wait queues that get pinged when a work item is placed
  142. * on an empty queue. These allow work items that are hogging a thread by
  143. * sleeping in a way that could be deferred to yield their thread and enqueue
  144. * themselves.
  145. */
  146. static DECLARE_WAIT_QUEUE_HEAD(slow_work_queue_waits_for_occupation);
  147. static DECLARE_WAIT_QUEUE_HEAD(vslow_work_queue_waits_for_occupation);
  148. /*
  149. * The thread controls. A variable used to signal to the threads that they
  150. * should exit when the queue is empty, a waitqueue used by the threads to wait
  151. * for signals, and a completion set by the last thread to exit.
  152. */
  153. static bool slow_work_threads_should_exit;
  154. static DECLARE_WAIT_QUEUE_HEAD(slow_work_thread_wq);
  155. static DECLARE_COMPLETION(slow_work_last_thread_exited);
  156. /*
  157. * The number of users of the thread pool and its lock. Whilst this is zero we
  158. * have no threads hanging around, and when this reaches zero, we wait for all
  159. * active or queued work items to complete and kill all the threads we do have.
  160. */
  161. static int slow_work_user_count;
  162. static DEFINE_MUTEX(slow_work_user_lock);
  163. static inline int slow_work_get_ref(struct slow_work *work)
  164. {
  165. if (work->ops->get_ref)
  166. return work->ops->get_ref(work);
  167. return 0;
  168. }
  169. static inline void slow_work_put_ref(struct slow_work *work)
  170. {
  171. if (work->ops->put_ref)
  172. work->ops->put_ref(work);
  173. }
  174. /*
  175. * Calculate the maximum number of active threads in the pool that are
  176. * permitted to process very slow work items.
  177. *
  178. * The answer is rounded up to at least 1, but may not equal or exceed the
  179. * maximum number of the threads in the pool. This means we always have at
  180. * least one thread that can process slow work items, and we always have at
  181. * least one thread that won't get tied up doing so.
  182. */
  183. static unsigned slow_work_calc_vsmax(void)
  184. {
  185. unsigned vsmax;
  186. vsmax = atomic_read(&slow_work_thread_count) * vslow_work_proportion;
  187. vsmax /= 100;
  188. vsmax = max(vsmax, 1U);
  189. return min(vsmax, slow_work_max_threads - 1);
  190. }
  191. /*
  192. * Attempt to execute stuff queued on a slow thread. Return true if we managed
  193. * it, false if there was nothing to do.
  194. */
  195. static noinline bool slow_work_execute(int id)
  196. {
  197. struct slow_work *work = NULL;
  198. unsigned vsmax;
  199. bool very_slow;
  200. vsmax = slow_work_calc_vsmax();
  201. /* see if we can schedule a new thread to be started if we're not
  202. * keeping up with the work */
  203. if (!waitqueue_active(&slow_work_thread_wq) &&
  204. (!list_empty(&slow_work_queue) || !list_empty(&vslow_work_queue)) &&
  205. atomic_read(&slow_work_thread_count) < slow_work_max_threads &&
  206. !slow_work_may_not_start_new_thread)
  207. slow_work_enqueue(&slow_work_new_thread);
  208. /* find something to execute */
  209. spin_lock_irq(&slow_work_queue_lock);
  210. if (!list_empty(&vslow_work_queue) &&
  211. atomic_read(&vslow_work_executing_count) < vsmax) {
  212. work = list_entry(vslow_work_queue.next,
  213. struct slow_work, link);
  214. if (test_and_set_bit_lock(SLOW_WORK_EXECUTING, &work->flags))
  215. BUG();
  216. list_del_init(&work->link);
  217. atomic_inc(&vslow_work_executing_count);
  218. very_slow = true;
  219. } else if (!list_empty(&slow_work_queue)) {
  220. work = list_entry(slow_work_queue.next,
  221. struct slow_work, link);
  222. if (test_and_set_bit_lock(SLOW_WORK_EXECUTING, &work->flags))
  223. BUG();
  224. list_del_init(&work->link);
  225. very_slow = false;
  226. } else {
  227. very_slow = false; /* avoid the compiler warning */
  228. }
  229. slow_work_set_thread_processing(id, work);
  230. if (work) {
  231. slow_work_mark_time(work);
  232. slow_work_begin_exec(id, work);
  233. }
  234. spin_unlock_irq(&slow_work_queue_lock);
  235. if (!work)
  236. return false;
  237. if (!test_and_clear_bit(SLOW_WORK_PENDING, &work->flags))
  238. BUG();
  239. /* don't execute if the work is in the process of being cancelled */
  240. if (!test_bit(SLOW_WORK_CANCELLING, &work->flags))
  241. work->ops->execute(work);
  242. if (very_slow)
  243. atomic_dec(&vslow_work_executing_count);
  244. clear_bit_unlock(SLOW_WORK_EXECUTING, &work->flags);
  245. /* wake up anyone waiting for this work to be complete */
  246. wake_up_bit(&work->flags, SLOW_WORK_EXECUTING);
  247. slow_work_end_exec(id, work);
  248. /* if someone tried to enqueue the item whilst we were executing it,
  249. * then it'll be left unenqueued to avoid multiple threads trying to
  250. * execute it simultaneously
  251. *
  252. * there is, however, a race between us testing the pending flag and
  253. * getting the spinlock, and between the enqueuer setting the pending
  254. * flag and getting the spinlock, so we use a deferral bit to tell us
  255. * if the enqueuer got there first
  256. */
  257. if (test_bit(SLOW_WORK_PENDING, &work->flags)) {
  258. spin_lock_irq(&slow_work_queue_lock);
  259. if (!test_bit(SLOW_WORK_EXECUTING, &work->flags) &&
  260. test_and_clear_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags))
  261. goto auto_requeue;
  262. spin_unlock_irq(&slow_work_queue_lock);
  263. }
  264. /* sort out the race between module unloading and put_ref() */
  265. slow_work_put_ref(work);
  266. slow_work_done_thread_processing(id, work);
  267. return true;
  268. auto_requeue:
  269. /* we must complete the enqueue operation
  270. * - we transfer our ref on the item back to the appropriate queue
  271. * - don't wake another thread up as we're awake already
  272. */
  273. slow_work_mark_time(work);
  274. if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags))
  275. list_add_tail(&work->link, &vslow_work_queue);
  276. else
  277. list_add_tail(&work->link, &slow_work_queue);
  278. spin_unlock_irq(&slow_work_queue_lock);
  279. slow_work_clear_thread_processing(id);
  280. return true;
  281. }
  282. /**
  283. * slow_work_sleep_till_thread_needed - Sleep till thread needed by other work
  284. * work: The work item under execution that wants to sleep
  285. * _timeout: Scheduler sleep timeout
  286. *
  287. * Allow a requeueable work item to sleep on a slow-work processor thread until
  288. * that thread is needed to do some other work or the sleep is interrupted by
  289. * some other event.
  290. *
  291. * The caller must set up a wake up event before calling this and must have set
  292. * the appropriate sleep mode (such as TASK_UNINTERRUPTIBLE) and tested its own
  293. * condition before calling this function as no test is made here.
  294. *
  295. * False is returned if there is nothing on the queue; true is returned if the
  296. * work item should be requeued
  297. */
  298. bool slow_work_sleep_till_thread_needed(struct slow_work *work,
  299. signed long *_timeout)
  300. {
  301. wait_queue_head_t *wfo_wq;
  302. struct list_head *queue;
  303. DEFINE_WAIT(wait);
  304. if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) {
  305. wfo_wq = &vslow_work_queue_waits_for_occupation;
  306. queue = &vslow_work_queue;
  307. } else {
  308. wfo_wq = &slow_work_queue_waits_for_occupation;
  309. queue = &slow_work_queue;
  310. }
  311. if (!list_empty(queue))
  312. return true;
  313. add_wait_queue_exclusive(wfo_wq, &wait);
  314. if (list_empty(queue))
  315. *_timeout = schedule_timeout(*_timeout);
  316. finish_wait(wfo_wq, &wait);
  317. return !list_empty(queue);
  318. }
  319. EXPORT_SYMBOL(slow_work_sleep_till_thread_needed);
  320. /**
  321. * slow_work_enqueue - Schedule a slow work item for processing
  322. * @work: The work item to queue
  323. *
  324. * Schedule a slow work item for processing. If the item is already undergoing
  325. * execution, this guarantees not to re-enter the execution routine until the
  326. * first execution finishes.
  327. *
  328. * The item is pinned by this function as it retains a reference to it, managed
  329. * through the item operations. The item is unpinned once it has been
  330. * executed.
  331. *
  332. * An item may hog the thread that is running it for a relatively large amount
  333. * of time, sufficient, for example, to perform several lookup, mkdir, create
  334. * and setxattr operations. It may sleep on I/O and may sleep to obtain locks.
  335. *
  336. * Conversely, if a number of items are awaiting processing, it may take some
  337. * time before any given item is given attention. The number of threads in the
  338. * pool may be increased to deal with demand, but only up to a limit.
  339. *
  340. * If SLOW_WORK_VERY_SLOW is set on the work item, then it will be placed in
  341. * the very slow queue, from which only a portion of the threads will be
  342. * allowed to pick items to execute. This ensures that very slow items won't
  343. * overly block ones that are just ordinarily slow.
  344. *
  345. * Returns 0 if successful, -EAGAIN if not (or -ECANCELED if cancelled work is
  346. * attempted queued)
  347. */
  348. int slow_work_enqueue(struct slow_work *work)
  349. {
  350. wait_queue_head_t *wfo_wq;
  351. struct list_head *queue;
  352. unsigned long flags;
  353. int ret;
  354. if (test_bit(SLOW_WORK_CANCELLING, &work->flags))
  355. return -ECANCELED;
  356. BUG_ON(slow_work_user_count <= 0);
  357. BUG_ON(!work);
  358. BUG_ON(!work->ops);
  359. /* when honouring an enqueue request, we only promise that we will run
  360. * the work function in the future; we do not promise to run it once
  361. * per enqueue request
  362. *
  363. * we use the PENDING bit to merge together repeat requests without
  364. * having to disable IRQs and take the spinlock, whilst still
  365. * maintaining our promise
  366. */
  367. if (!test_and_set_bit_lock(SLOW_WORK_PENDING, &work->flags)) {
  368. if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) {
  369. wfo_wq = &vslow_work_queue_waits_for_occupation;
  370. queue = &vslow_work_queue;
  371. } else {
  372. wfo_wq = &slow_work_queue_waits_for_occupation;
  373. queue = &slow_work_queue;
  374. }
  375. spin_lock_irqsave(&slow_work_queue_lock, flags);
  376. if (unlikely(test_bit(SLOW_WORK_CANCELLING, &work->flags)))
  377. goto cancelled;
  378. /* we promise that we will not attempt to execute the work
  379. * function in more than one thread simultaneously
  380. *
  381. * this, however, leaves us with a problem if we're asked to
  382. * enqueue the work whilst someone is executing the work
  383. * function as simply queueing the work immediately means that
  384. * another thread may try executing it whilst it is already
  385. * under execution
  386. *
  387. * to deal with this, we set the ENQ_DEFERRED bit instead of
  388. * enqueueing, and the thread currently executing the work
  389. * function will enqueue the work item when the work function
  390. * returns and it has cleared the EXECUTING bit
  391. */
  392. if (test_bit(SLOW_WORK_EXECUTING, &work->flags)) {
  393. set_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags);
  394. } else {
  395. ret = slow_work_get_ref(work);
  396. if (ret < 0)
  397. goto failed;
  398. slow_work_mark_time(work);
  399. list_add_tail(&work->link, queue);
  400. wake_up(&slow_work_thread_wq);
  401. /* if someone who could be requeued is sleeping on a
  402. * thread, then ask them to yield their thread */
  403. if (work->link.prev == queue)
  404. wake_up(wfo_wq);
  405. }
  406. spin_unlock_irqrestore(&slow_work_queue_lock, flags);
  407. }
  408. return 0;
  409. cancelled:
  410. ret = -ECANCELED;
  411. failed:
  412. spin_unlock_irqrestore(&slow_work_queue_lock, flags);
  413. return ret;
  414. }
  415. EXPORT_SYMBOL(slow_work_enqueue);
  416. static int slow_work_wait(void *word)
  417. {
  418. schedule();
  419. return 0;
  420. }
  421. /**
  422. * slow_work_cancel - Cancel a slow work item
  423. * @work: The work item to cancel
  424. *
  425. * This function will cancel a previously enqueued work item. If we cannot
  426. * cancel the work item, it is guarenteed to have run when this function
  427. * returns.
  428. */
  429. void slow_work_cancel(struct slow_work *work)
  430. {
  431. bool wait = true, put = false;
  432. set_bit(SLOW_WORK_CANCELLING, &work->flags);
  433. smp_mb();
  434. /* if the work item is a delayed work item with an active timer, we
  435. * need to wait for the timer to finish _before_ getting the spinlock,
  436. * lest we deadlock against the timer routine
  437. *
  438. * the timer routine will leave DELAYED set if it notices the
  439. * CANCELLING flag in time
  440. */
  441. if (test_bit(SLOW_WORK_DELAYED, &work->flags)) {
  442. struct delayed_slow_work *dwork =
  443. container_of(work, struct delayed_slow_work, work);
  444. del_timer_sync(&dwork->timer);
  445. }
  446. spin_lock_irq(&slow_work_queue_lock);
  447. if (test_bit(SLOW_WORK_DELAYED, &work->flags)) {
  448. /* the timer routine aborted or never happened, so we are left
  449. * holding the timer's reference on the item and should just
  450. * drop the pending flag and wait for any ongoing execution to
  451. * finish */
  452. struct delayed_slow_work *dwork =
  453. container_of(work, struct delayed_slow_work, work);
  454. BUG_ON(timer_pending(&dwork->timer));
  455. BUG_ON(!list_empty(&work->link));
  456. clear_bit(SLOW_WORK_DELAYED, &work->flags);
  457. put = true;
  458. clear_bit(SLOW_WORK_PENDING, &work->flags);
  459. } else if (test_bit(SLOW_WORK_PENDING, &work->flags) &&
  460. !list_empty(&work->link)) {
  461. /* the link in the pending queue holds a reference on the item
  462. * that we will need to release */
  463. list_del_init(&work->link);
  464. wait = false;
  465. put = true;
  466. clear_bit(SLOW_WORK_PENDING, &work->flags);
  467. } else if (test_and_clear_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags)) {
  468. /* the executor is holding our only reference on the item, so
  469. * we merely need to wait for it to finish executing */
  470. clear_bit(SLOW_WORK_PENDING, &work->flags);
  471. }
  472. spin_unlock_irq(&slow_work_queue_lock);
  473. /* the EXECUTING flag is set by the executor whilst the spinlock is set
  474. * and before the item is dequeued - so assuming the above doesn't
  475. * actually dequeue it, simply waiting for the EXECUTING flag to be
  476. * released here should be sufficient */
  477. if (wait)
  478. wait_on_bit(&work->flags, SLOW_WORK_EXECUTING, slow_work_wait,
  479. TASK_UNINTERRUPTIBLE);
  480. clear_bit(SLOW_WORK_CANCELLING, &work->flags);
  481. if (put)
  482. slow_work_put_ref(work);
  483. }
  484. EXPORT_SYMBOL(slow_work_cancel);
  485. /*
  486. * Handle expiry of the delay timer, indicating that a delayed slow work item
  487. * should now be queued if not cancelled
  488. */
  489. static void delayed_slow_work_timer(unsigned long data)
  490. {
  491. wait_queue_head_t *wfo_wq;
  492. struct list_head *queue;
  493. struct slow_work *work = (struct slow_work *) data;
  494. unsigned long flags;
  495. bool queued = false, put = false, first = false;
  496. if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) {
  497. wfo_wq = &vslow_work_queue_waits_for_occupation;
  498. queue = &vslow_work_queue;
  499. } else {
  500. wfo_wq = &slow_work_queue_waits_for_occupation;
  501. queue = &slow_work_queue;
  502. }
  503. spin_lock_irqsave(&slow_work_queue_lock, flags);
  504. if (likely(!test_bit(SLOW_WORK_CANCELLING, &work->flags))) {
  505. clear_bit(SLOW_WORK_DELAYED, &work->flags);
  506. if (test_bit(SLOW_WORK_EXECUTING, &work->flags)) {
  507. /* we discard the reference the timer was holding in
  508. * favour of the one the executor holds */
  509. set_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags);
  510. put = true;
  511. } else {
  512. slow_work_mark_time(work);
  513. list_add_tail(&work->link, queue);
  514. queued = true;
  515. if (work->link.prev == queue)
  516. first = true;
  517. }
  518. }
  519. spin_unlock_irqrestore(&slow_work_queue_lock, flags);
  520. if (put)
  521. slow_work_put_ref(work);
  522. if (first)
  523. wake_up(wfo_wq);
  524. if (queued)
  525. wake_up(&slow_work_thread_wq);
  526. }
  527. /**
  528. * delayed_slow_work_enqueue - Schedule a delayed slow work item for processing
  529. * @dwork: The delayed work item to queue
  530. * @delay: When to start executing the work, in jiffies from now
  531. *
  532. * This is similar to slow_work_enqueue(), but it adds a delay before the work
  533. * is actually queued for processing.
  534. *
  535. * The item can have delayed processing requested on it whilst it is being
  536. * executed. The delay will begin immediately, and if it expires before the
  537. * item finishes executing, the item will be placed back on the queue when it
  538. * has done executing.
  539. */
  540. int delayed_slow_work_enqueue(struct delayed_slow_work *dwork,
  541. unsigned long delay)
  542. {
  543. struct slow_work *work = &dwork->work;
  544. unsigned long flags;
  545. int ret;
  546. if (delay == 0)
  547. return slow_work_enqueue(&dwork->work);
  548. BUG_ON(slow_work_user_count <= 0);
  549. BUG_ON(!work);
  550. BUG_ON(!work->ops);
  551. if (test_bit(SLOW_WORK_CANCELLING, &work->flags))
  552. return -ECANCELED;
  553. if (!test_and_set_bit_lock(SLOW_WORK_PENDING, &work->flags)) {
  554. spin_lock_irqsave(&slow_work_queue_lock, flags);
  555. if (test_bit(SLOW_WORK_CANCELLING, &work->flags))
  556. goto cancelled;
  557. /* the timer holds a reference whilst it is pending */
  558. ret = slow_work_get_ref(work);
  559. if (ret < 0)
  560. goto cant_get_ref;
  561. if (test_and_set_bit(SLOW_WORK_DELAYED, &work->flags))
  562. BUG();
  563. dwork->timer.expires = jiffies + delay;
  564. dwork->timer.data = (unsigned long) work;
  565. dwork->timer.function = delayed_slow_work_timer;
  566. add_timer(&dwork->timer);
  567. spin_unlock_irqrestore(&slow_work_queue_lock, flags);
  568. }
  569. return 0;
  570. cancelled:
  571. ret = -ECANCELED;
  572. cant_get_ref:
  573. spin_unlock_irqrestore(&slow_work_queue_lock, flags);
  574. return ret;
  575. }
  576. EXPORT_SYMBOL(delayed_slow_work_enqueue);
  577. /*
  578. * Schedule a cull of the thread pool at some time in the near future
  579. */
  580. static void slow_work_schedule_cull(void)
  581. {
  582. mod_timer(&slow_work_cull_timer,
  583. round_jiffies(jiffies + SLOW_WORK_CULL_TIMEOUT));
  584. }
  585. /*
  586. * Worker thread culling algorithm
  587. */
  588. static bool slow_work_cull_thread(void)
  589. {
  590. unsigned long flags;
  591. bool do_cull = false;
  592. spin_lock_irqsave(&slow_work_queue_lock, flags);
  593. if (slow_work_cull) {
  594. slow_work_cull = false;
  595. if (list_empty(&slow_work_queue) &&
  596. list_empty(&vslow_work_queue) &&
  597. atomic_read(&slow_work_thread_count) >
  598. slow_work_min_threads) {
  599. slow_work_schedule_cull();
  600. do_cull = true;
  601. }
  602. }
  603. spin_unlock_irqrestore(&slow_work_queue_lock, flags);
  604. return do_cull;
  605. }
  606. /*
  607. * Determine if there is slow work available for dispatch
  608. */
  609. static inline bool slow_work_available(int vsmax)
  610. {
  611. return !list_empty(&slow_work_queue) ||
  612. (!list_empty(&vslow_work_queue) &&
  613. atomic_read(&vslow_work_executing_count) < vsmax);
  614. }
  615. /*
  616. * Worker thread dispatcher
  617. */
  618. static int slow_work_thread(void *_data)
  619. {
  620. int vsmax, id;
  621. DEFINE_WAIT(wait);
  622. set_freezable();
  623. set_user_nice(current, -5);
  624. /* allocate ourselves an ID */
  625. spin_lock_irq(&slow_work_queue_lock);
  626. id = find_first_zero_bit(slow_work_ids, SLOW_WORK_THREAD_LIMIT);
  627. BUG_ON(id < 0 || id >= SLOW_WORK_THREAD_LIMIT);
  628. __set_bit(id, slow_work_ids);
  629. slow_work_set_thread_pid(id, current->pid);
  630. spin_unlock_irq(&slow_work_queue_lock);
  631. sprintf(current->comm, "kslowd%03u", id);
  632. for (;;) {
  633. vsmax = vslow_work_proportion;
  634. vsmax *= atomic_read(&slow_work_thread_count);
  635. vsmax /= 100;
  636. prepare_to_wait_exclusive(&slow_work_thread_wq, &wait,
  637. TASK_INTERRUPTIBLE);
  638. if (!freezing(current) &&
  639. !slow_work_threads_should_exit &&
  640. !slow_work_available(vsmax) &&
  641. !slow_work_cull)
  642. schedule();
  643. finish_wait(&slow_work_thread_wq, &wait);
  644. try_to_freeze();
  645. vsmax = vslow_work_proportion;
  646. vsmax *= atomic_read(&slow_work_thread_count);
  647. vsmax /= 100;
  648. if (slow_work_available(vsmax) && slow_work_execute(id)) {
  649. cond_resched();
  650. if (list_empty(&slow_work_queue) &&
  651. list_empty(&vslow_work_queue) &&
  652. atomic_read(&slow_work_thread_count) >
  653. slow_work_min_threads)
  654. slow_work_schedule_cull();
  655. continue;
  656. }
  657. if (slow_work_threads_should_exit)
  658. break;
  659. if (slow_work_cull && slow_work_cull_thread())
  660. break;
  661. }
  662. spin_lock_irq(&slow_work_queue_lock);
  663. slow_work_set_thread_pid(id, 0);
  664. __clear_bit(id, slow_work_ids);
  665. spin_unlock_irq(&slow_work_queue_lock);
  666. if (atomic_dec_and_test(&slow_work_thread_count))
  667. complete_and_exit(&slow_work_last_thread_exited, 0);
  668. return 0;
  669. }
  670. /*
  671. * Handle thread cull timer expiration
  672. */
  673. static void slow_work_cull_timeout(unsigned long data)
  674. {
  675. slow_work_cull = true;
  676. wake_up(&slow_work_thread_wq);
  677. }
  678. /*
  679. * Start a new slow work thread
  680. */
  681. static void slow_work_new_thread_execute(struct slow_work *work)
  682. {
  683. struct task_struct *p;
  684. if (slow_work_threads_should_exit)
  685. return;
  686. if (atomic_read(&slow_work_thread_count) >= slow_work_max_threads)
  687. return;
  688. if (!mutex_trylock(&slow_work_user_lock))
  689. return;
  690. slow_work_may_not_start_new_thread = true;
  691. atomic_inc(&slow_work_thread_count);
  692. p = kthread_run(slow_work_thread, NULL, "kslowd");
  693. if (IS_ERR(p)) {
  694. printk(KERN_DEBUG "Slow work thread pool: OOM\n");
  695. if (atomic_dec_and_test(&slow_work_thread_count))
  696. BUG(); /* we're running on a slow work thread... */
  697. mod_timer(&slow_work_oom_timer,
  698. round_jiffies(jiffies + SLOW_WORK_OOM_TIMEOUT));
  699. } else {
  700. /* ratelimit the starting of new threads */
  701. mod_timer(&slow_work_oom_timer, jiffies + 1);
  702. }
  703. mutex_unlock(&slow_work_user_lock);
  704. }
  705. static const struct slow_work_ops slow_work_new_thread_ops = {
  706. .owner = THIS_MODULE,
  707. .execute = slow_work_new_thread_execute,
  708. #ifdef CONFIG_SLOW_WORK_DEBUG
  709. .desc = slow_work_new_thread_desc,
  710. #endif
  711. };
  712. /*
  713. * post-OOM new thread start suppression expiration
  714. */
  715. static void slow_work_oom_timeout(unsigned long data)
  716. {
  717. slow_work_may_not_start_new_thread = false;
  718. }
  719. #ifdef CONFIG_SYSCTL
  720. /*
  721. * Handle adjustment of the minimum number of threads
  722. */
  723. static int slow_work_min_threads_sysctl(struct ctl_table *table, int write,
  724. void __user *buffer,
  725. size_t *lenp, loff_t *ppos)
  726. {
  727. int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
  728. int n;
  729. if (ret == 0) {
  730. mutex_lock(&slow_work_user_lock);
  731. if (slow_work_user_count > 0) {
  732. /* see if we need to start or stop threads */
  733. n = atomic_read(&slow_work_thread_count) -
  734. slow_work_min_threads;
  735. if (n < 0 && !slow_work_may_not_start_new_thread)
  736. slow_work_enqueue(&slow_work_new_thread);
  737. else if (n > 0)
  738. slow_work_schedule_cull();
  739. }
  740. mutex_unlock(&slow_work_user_lock);
  741. }
  742. return ret;
  743. }
  744. /*
  745. * Handle adjustment of the maximum number of threads
  746. */
  747. static int slow_work_max_threads_sysctl(struct ctl_table *table, int write,
  748. void __user *buffer,
  749. size_t *lenp, loff_t *ppos)
  750. {
  751. int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
  752. int n;
  753. if (ret == 0) {
  754. mutex_lock(&slow_work_user_lock);
  755. if (slow_work_user_count > 0) {
  756. /* see if we need to stop threads */
  757. n = slow_work_max_threads -
  758. atomic_read(&slow_work_thread_count);
  759. if (n < 0)
  760. slow_work_schedule_cull();
  761. }
  762. mutex_unlock(&slow_work_user_lock);
  763. }
  764. return ret;
  765. }
  766. #endif /* CONFIG_SYSCTL */
  767. /**
  768. * slow_work_register_user - Register a user of the facility
  769. * @module: The module about to make use of the facility
  770. *
  771. * Register a user of the facility, starting up the initial threads if there
  772. * aren't any other users at this point. This will return 0 if successful, or
  773. * an error if not.
  774. */
  775. int slow_work_register_user(struct module *module)
  776. {
  777. struct task_struct *p;
  778. int loop;
  779. mutex_lock(&slow_work_user_lock);
  780. if (slow_work_user_count == 0) {
  781. printk(KERN_NOTICE "Slow work thread pool: Starting up\n");
  782. init_completion(&slow_work_last_thread_exited);
  783. slow_work_threads_should_exit = false;
  784. slow_work_init(&slow_work_new_thread,
  785. &slow_work_new_thread_ops);
  786. slow_work_may_not_start_new_thread = false;
  787. slow_work_cull = false;
  788. /* start the minimum number of threads */
  789. for (loop = 0; loop < slow_work_min_threads; loop++) {
  790. atomic_inc(&slow_work_thread_count);
  791. p = kthread_run(slow_work_thread, NULL, "kslowd");
  792. if (IS_ERR(p))
  793. goto error;
  794. }
  795. printk(KERN_NOTICE "Slow work thread pool: Ready\n");
  796. }
  797. slow_work_user_count++;
  798. mutex_unlock(&slow_work_user_lock);
  799. return 0;
  800. error:
  801. if (atomic_dec_and_test(&slow_work_thread_count))
  802. complete(&slow_work_last_thread_exited);
  803. if (loop > 0) {
  804. printk(KERN_ERR "Slow work thread pool:"
  805. " Aborting startup on ENOMEM\n");
  806. slow_work_threads_should_exit = true;
  807. wake_up_all(&slow_work_thread_wq);
  808. wait_for_completion(&slow_work_last_thread_exited);
  809. printk(KERN_ERR "Slow work thread pool: Aborted\n");
  810. }
  811. mutex_unlock(&slow_work_user_lock);
  812. return PTR_ERR(p);
  813. }
  814. EXPORT_SYMBOL(slow_work_register_user);
  815. /*
  816. * wait for all outstanding items from the calling module to complete
  817. * - note that more items may be queued whilst we're waiting
  818. */
  819. static void slow_work_wait_for_items(struct module *module)
  820. {
  821. #ifdef CONFIG_MODULES
  822. DECLARE_WAITQUEUE(myself, current);
  823. struct slow_work *work;
  824. int loop;
  825. mutex_lock(&slow_work_unreg_sync_lock);
  826. add_wait_queue(&slow_work_unreg_wq, &myself);
  827. for (;;) {
  828. spin_lock_irq(&slow_work_queue_lock);
  829. /* first of all, we wait for the last queued item in each list
  830. * to be processed */
  831. list_for_each_entry_reverse(work, &vslow_work_queue, link) {
  832. if (work->owner == module) {
  833. set_current_state(TASK_UNINTERRUPTIBLE);
  834. slow_work_unreg_work_item = work;
  835. goto do_wait;
  836. }
  837. }
  838. list_for_each_entry_reverse(work, &slow_work_queue, link) {
  839. if (work->owner == module) {
  840. set_current_state(TASK_UNINTERRUPTIBLE);
  841. slow_work_unreg_work_item = work;
  842. goto do_wait;
  843. }
  844. }
  845. /* then we wait for the items being processed to finish */
  846. slow_work_unreg_module = module;
  847. smp_mb();
  848. for (loop = 0; loop < SLOW_WORK_THREAD_LIMIT; loop++) {
  849. if (slow_work_thread_processing[loop] == module)
  850. goto do_wait;
  851. }
  852. spin_unlock_irq(&slow_work_queue_lock);
  853. break; /* okay, we're done */
  854. do_wait:
  855. spin_unlock_irq(&slow_work_queue_lock);
  856. schedule();
  857. slow_work_unreg_work_item = NULL;
  858. slow_work_unreg_module = NULL;
  859. }
  860. remove_wait_queue(&slow_work_unreg_wq, &myself);
  861. mutex_unlock(&slow_work_unreg_sync_lock);
  862. #endif /* CONFIG_MODULES */
  863. }
  864. /**
  865. * slow_work_unregister_user - Unregister a user of the facility
  866. * @module: The module whose items should be cleared
  867. *
  868. * Unregister a user of the facility, killing all the threads if this was the
  869. * last one.
  870. *
  871. * This waits for all the work items belonging to the nominated module to go
  872. * away before proceeding.
  873. */
  874. void slow_work_unregister_user(struct module *module)
  875. {
  876. /* first of all, wait for all outstanding items from the calling module
  877. * to complete */
  878. if (module)
  879. slow_work_wait_for_items(module);
  880. /* then we can actually go about shutting down the facility if need
  881. * be */
  882. mutex_lock(&slow_work_user_lock);
  883. BUG_ON(slow_work_user_count <= 0);
  884. slow_work_user_count--;
  885. if (slow_work_user_count == 0) {
  886. printk(KERN_NOTICE "Slow work thread pool: Shutting down\n");
  887. slow_work_threads_should_exit = true;
  888. del_timer_sync(&slow_work_cull_timer);
  889. del_timer_sync(&slow_work_oom_timer);
  890. wake_up_all(&slow_work_thread_wq);
  891. wait_for_completion(&slow_work_last_thread_exited);
  892. printk(KERN_NOTICE "Slow work thread pool:"
  893. " Shut down complete\n");
  894. }
  895. mutex_unlock(&slow_work_user_lock);
  896. }
  897. EXPORT_SYMBOL(slow_work_unregister_user);
  898. /*
  899. * Initialise the slow work facility
  900. */
  901. static int __init init_slow_work(void)
  902. {
  903. unsigned nr_cpus = num_possible_cpus();
  904. if (slow_work_max_threads < nr_cpus)
  905. slow_work_max_threads = nr_cpus;
  906. #ifdef CONFIG_SYSCTL
  907. if (slow_work_max_max_threads < nr_cpus * 2)
  908. slow_work_max_max_threads = nr_cpus * 2;
  909. #endif
  910. #ifdef CONFIG_SLOW_WORK_DEBUG
  911. {
  912. struct dentry *dbdir;
  913. dbdir = debugfs_create_dir("slow_work", NULL);
  914. if (dbdir && !IS_ERR(dbdir))
  915. debugfs_create_file("runqueue", S_IFREG | 0400, dbdir,
  916. NULL, &slow_work_runqueue_fops);
  917. }
  918. #endif
  919. return 0;
  920. }
  921. subsys_initcall(init_slow_work);