radix-tree.c 40 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492
  1. /*
  2. * Copyright (C) 2001 Momchil Velikov
  3. * Portions Copyright (C) 2001 Christoph Hellwig
  4. * Copyright (C) 2005 SGI, Christoph Lameter
  5. * Copyright (C) 2006 Nick Piggin
  6. * Copyright (C) 2012 Konstantin Khlebnikov
  7. *
  8. * This program is free software; you can redistribute it and/or
  9. * modify it under the terms of the GNU General Public License as
  10. * published by the Free Software Foundation; either version 2, or (at
  11. * your option) any later version.
  12. *
  13. * This program is distributed in the hope that it will be useful, but
  14. * WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. * General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU General Public License
  19. * along with this program; if not, write to the Free Software
  20. * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  21. */
  22. #include <linux/errno.h>
  23. #include <linux/init.h>
  24. #include <linux/kernel.h>
  25. #include <linux/export.h>
  26. #include <linux/radix-tree.h>
  27. #include <linux/percpu.h>
  28. #include <linux/slab.h>
  29. #include <linux/notifier.h>
  30. #include <linux/cpu.h>
  31. #include <linux/string.h>
  32. #include <linux/bitops.h>
  33. #include <linux/rcupdate.h>
  34. #include <linux/hardirq.h> /* in_interrupt() */
  35. #ifdef __KERNEL__
  36. #define RADIX_TREE_MAP_SHIFT (CONFIG_BASE_SMALL ? 4 : 6)
  37. #else
  38. #define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */
  39. #endif
  40. #define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT)
  41. #define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1)
  42. #define RADIX_TREE_TAG_LONGS \
  43. ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG)
  44. struct radix_tree_node {
  45. unsigned int height; /* Height from the bottom */
  46. unsigned int count;
  47. union {
  48. struct radix_tree_node *parent; /* Used when ascending tree */
  49. struct rcu_head rcu_head; /* Used when freeing node */
  50. };
  51. void __rcu *slots[RADIX_TREE_MAP_SIZE];
  52. unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
  53. };
  54. #define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long))
  55. #define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \
  56. RADIX_TREE_MAP_SHIFT))
  57. /*
  58. * The height_to_maxindex array needs to be one deeper than the maximum
  59. * path as height 0 holds only 1 entry.
  60. */
  61. static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1] __read_mostly;
  62. /*
  63. * Radix tree node cache.
  64. */
  65. static struct kmem_cache *radix_tree_node_cachep;
  66. /*
  67. * The radix tree is variable-height, so an insert operation not only has
  68. * to build the branch to its corresponding item, it also has to build the
  69. * branch to existing items if the size has to be increased (by
  70. * radix_tree_extend).
  71. *
  72. * The worst case is a zero height tree with just a single item at index 0,
  73. * and then inserting an item at index ULONG_MAX. This requires 2 new branches
  74. * of RADIX_TREE_MAX_PATH size to be created, with only the root node shared.
  75. * Hence:
  76. */
  77. #define RADIX_TREE_PRELOAD_SIZE (RADIX_TREE_MAX_PATH * 2 - 1)
  78. /*
  79. * Per-cpu pool of preloaded nodes
  80. */
  81. struct radix_tree_preload {
  82. int nr;
  83. struct radix_tree_node *nodes[RADIX_TREE_PRELOAD_SIZE];
  84. };
  85. static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, };
  86. static inline void *ptr_to_indirect(void *ptr)
  87. {
  88. return (void *)((unsigned long)ptr | RADIX_TREE_INDIRECT_PTR);
  89. }
  90. static inline void *indirect_to_ptr(void *ptr)
  91. {
  92. return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR);
  93. }
  94. static inline gfp_t root_gfp_mask(struct radix_tree_root *root)
  95. {
  96. return root->gfp_mask & __GFP_BITS_MASK;
  97. }
  98. static inline void tag_set(struct radix_tree_node *node, unsigned int tag,
  99. int offset)
  100. {
  101. __set_bit(offset, node->tags[tag]);
  102. }
  103. static inline void tag_clear(struct radix_tree_node *node, unsigned int tag,
  104. int offset)
  105. {
  106. __clear_bit(offset, node->tags[tag]);
  107. }
  108. static inline int tag_get(struct radix_tree_node *node, unsigned int tag,
  109. int offset)
  110. {
  111. return test_bit(offset, node->tags[tag]);
  112. }
  113. static inline void root_tag_set(struct radix_tree_root *root, unsigned int tag)
  114. {
  115. root->gfp_mask |= (__force gfp_t)(1 << (tag + __GFP_BITS_SHIFT));
  116. }
  117. static inline void root_tag_clear(struct radix_tree_root *root, unsigned int tag)
  118. {
  119. root->gfp_mask &= (__force gfp_t)~(1 << (tag + __GFP_BITS_SHIFT));
  120. }
  121. static inline void root_tag_clear_all(struct radix_tree_root *root)
  122. {
  123. root->gfp_mask &= __GFP_BITS_MASK;
  124. }
  125. static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag)
  126. {
  127. return (__force unsigned)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT));
  128. }
  129. /*
  130. * Returns 1 if any slot in the node has this tag set.
  131. * Otherwise returns 0.
  132. */
  133. static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag)
  134. {
  135. int idx;
  136. for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
  137. if (node->tags[tag][idx])
  138. return 1;
  139. }
  140. return 0;
  141. }
  142. /**
  143. * radix_tree_find_next_bit - find the next set bit in a memory region
  144. *
  145. * @addr: The address to base the search on
  146. * @size: The bitmap size in bits
  147. * @offset: The bitnumber to start searching at
  148. *
  149. * Unrollable variant of find_next_bit() for constant size arrays.
  150. * Tail bits starting from size to roundup(size, BITS_PER_LONG) must be zero.
  151. * Returns next bit offset, or size if nothing found.
  152. */
  153. static __always_inline unsigned long
  154. radix_tree_find_next_bit(const unsigned long *addr,
  155. unsigned long size, unsigned long offset)
  156. {
  157. if (!__builtin_constant_p(size))
  158. return find_next_bit(addr, size, offset);
  159. if (offset < size) {
  160. unsigned long tmp;
  161. addr += offset / BITS_PER_LONG;
  162. tmp = *addr >> (offset % BITS_PER_LONG);
  163. if (tmp)
  164. return __ffs(tmp) + offset;
  165. offset = (offset + BITS_PER_LONG) & ~(BITS_PER_LONG - 1);
  166. while (offset < size) {
  167. tmp = *++addr;
  168. if (tmp)
  169. return __ffs(tmp) + offset;
  170. offset += BITS_PER_LONG;
  171. }
  172. }
  173. return size;
  174. }
  175. /*
  176. * This assumes that the caller has performed appropriate preallocation, and
  177. * that the caller has pinned this thread of control to the current CPU.
  178. */
  179. static struct radix_tree_node *
  180. radix_tree_node_alloc(struct radix_tree_root *root)
  181. {
  182. struct radix_tree_node *ret = NULL;
  183. gfp_t gfp_mask = root_gfp_mask(root);
  184. /*
  185. * Preload code isn't irq safe and it doesn't make sence to use
  186. * preloading in the interrupt anyway as all the allocations have to
  187. * be atomic. So just do normal allocation when in interrupt.
  188. */
  189. if (!(gfp_mask & __GFP_WAIT) && !in_interrupt()) {
  190. struct radix_tree_preload *rtp;
  191. /*
  192. * Provided the caller has preloaded here, we will always
  193. * succeed in getting a node here (and never reach
  194. * kmem_cache_alloc)
  195. */
  196. rtp = &__get_cpu_var(radix_tree_preloads);
  197. if (rtp->nr) {
  198. ret = rtp->nodes[rtp->nr - 1];
  199. rtp->nodes[rtp->nr - 1] = NULL;
  200. rtp->nr--;
  201. }
  202. }
  203. if (ret == NULL)
  204. ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
  205. BUG_ON(radix_tree_is_indirect_ptr(ret));
  206. return ret;
  207. }
  208. static void radix_tree_node_rcu_free(struct rcu_head *head)
  209. {
  210. struct radix_tree_node *node =
  211. container_of(head, struct radix_tree_node, rcu_head);
  212. int i;
  213. /*
  214. * must only free zeroed nodes into the slab. radix_tree_shrink
  215. * can leave us with a non-NULL entry in the first slot, so clear
  216. * that here to make sure.
  217. */
  218. for (i = 0; i < RADIX_TREE_MAX_TAGS; i++)
  219. tag_clear(node, i, 0);
  220. node->slots[0] = NULL;
  221. node->count = 0;
  222. kmem_cache_free(radix_tree_node_cachep, node);
  223. }
  224. static inline void
  225. radix_tree_node_free(struct radix_tree_node *node)
  226. {
  227. call_rcu(&node->rcu_head, radix_tree_node_rcu_free);
  228. }
  229. /*
  230. * Load up this CPU's radix_tree_node buffer with sufficient objects to
  231. * ensure that the addition of a single element in the tree cannot fail. On
  232. * success, return zero, with preemption disabled. On error, return -ENOMEM
  233. * with preemption not disabled.
  234. *
  235. * To make use of this facility, the radix tree must be initialised without
  236. * __GFP_WAIT being passed to INIT_RADIX_TREE().
  237. */
  238. static int __radix_tree_preload(gfp_t gfp_mask)
  239. {
  240. struct radix_tree_preload *rtp;
  241. struct radix_tree_node *node;
  242. int ret = -ENOMEM;
  243. preempt_disable();
  244. rtp = &__get_cpu_var(radix_tree_preloads);
  245. while (rtp->nr < ARRAY_SIZE(rtp->nodes)) {
  246. preempt_enable();
  247. node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
  248. if (node == NULL)
  249. goto out;
  250. preempt_disable();
  251. rtp = &__get_cpu_var(radix_tree_preloads);
  252. if (rtp->nr < ARRAY_SIZE(rtp->nodes))
  253. rtp->nodes[rtp->nr++] = node;
  254. else
  255. kmem_cache_free(radix_tree_node_cachep, node);
  256. }
  257. ret = 0;
  258. out:
  259. return ret;
  260. }
  261. /*
  262. * Load up this CPU's radix_tree_node buffer with sufficient objects to
  263. * ensure that the addition of a single element in the tree cannot fail. On
  264. * success, return zero, with preemption disabled. On error, return -ENOMEM
  265. * with preemption not disabled.
  266. *
  267. * To make use of this facility, the radix tree must be initialised without
  268. * __GFP_WAIT being passed to INIT_RADIX_TREE().
  269. */
  270. int radix_tree_preload(gfp_t gfp_mask)
  271. {
  272. /* Warn on non-sensical use... */
  273. WARN_ON_ONCE(!(gfp_mask & __GFP_WAIT));
  274. return __radix_tree_preload(gfp_mask);
  275. }
  276. EXPORT_SYMBOL(radix_tree_preload);
  277. /*
  278. * The same as above function, except we don't guarantee preloading happens.
  279. * We do it, if we decide it helps. On success, return zero with preemption
  280. * disabled. On error, return -ENOMEM with preemption not disabled.
  281. */
  282. int radix_tree_maybe_preload(gfp_t gfp_mask)
  283. {
  284. if (gfp_mask & __GFP_WAIT)
  285. return __radix_tree_preload(gfp_mask);
  286. /* Preloading doesn't help anything with this gfp mask, skip it */
  287. preempt_disable();
  288. return 0;
  289. }
  290. EXPORT_SYMBOL(radix_tree_maybe_preload);
  291. /*
  292. * Return the maximum key which can be store into a
  293. * radix tree with height HEIGHT.
  294. */
  295. static inline unsigned long radix_tree_maxindex(unsigned int height)
  296. {
  297. return height_to_maxindex[height];
  298. }
  299. /*
  300. * Extend a radix tree so it can store key @index.
  301. */
  302. static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
  303. {
  304. struct radix_tree_node *node;
  305. struct radix_tree_node *slot;
  306. unsigned int height;
  307. int tag;
  308. /* Figure out what the height should be. */
  309. height = root->height + 1;
  310. while (index > radix_tree_maxindex(height))
  311. height++;
  312. if (root->rnode == NULL) {
  313. root->height = height;
  314. goto out;
  315. }
  316. do {
  317. unsigned int newheight;
  318. if (!(node = radix_tree_node_alloc(root)))
  319. return -ENOMEM;
  320. /* Propagate the aggregated tag info into the new root */
  321. for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
  322. if (root_tag_get(root, tag))
  323. tag_set(node, tag, 0);
  324. }
  325. /* Increase the height. */
  326. newheight = root->height+1;
  327. node->height = newheight;
  328. node->count = 1;
  329. node->parent = NULL;
  330. slot = root->rnode;
  331. if (newheight > 1) {
  332. slot = indirect_to_ptr(slot);
  333. slot->parent = node;
  334. }
  335. node->slots[0] = slot;
  336. node = ptr_to_indirect(node);
  337. rcu_assign_pointer(root->rnode, node);
  338. root->height = newheight;
  339. } while (height > root->height);
  340. out:
  341. return 0;
  342. }
  343. /**
  344. * radix_tree_insert - insert into a radix tree
  345. * @root: radix tree root
  346. * @index: index key
  347. * @item: item to insert
  348. *
  349. * Insert an item into the radix tree at position @index.
  350. */
  351. int radix_tree_insert(struct radix_tree_root *root,
  352. unsigned long index, void *item)
  353. {
  354. struct radix_tree_node *node = NULL, *slot;
  355. unsigned int height, shift;
  356. int offset;
  357. int error;
  358. BUG_ON(radix_tree_is_indirect_ptr(item));
  359. /* Make sure the tree is high enough. */
  360. if (index > radix_tree_maxindex(root->height)) {
  361. error = radix_tree_extend(root, index);
  362. if (error)
  363. return error;
  364. }
  365. slot = indirect_to_ptr(root->rnode);
  366. height = root->height;
  367. shift = (height-1) * RADIX_TREE_MAP_SHIFT;
  368. offset = 0; /* uninitialised var warning */
  369. while (height > 0) {
  370. if (slot == NULL) {
  371. /* Have to add a child node. */
  372. if (!(slot = radix_tree_node_alloc(root)))
  373. return -ENOMEM;
  374. slot->height = height;
  375. slot->parent = node;
  376. if (node) {
  377. rcu_assign_pointer(node->slots[offset], slot);
  378. node->count++;
  379. } else
  380. rcu_assign_pointer(root->rnode, ptr_to_indirect(slot));
  381. }
  382. /* Go a level down */
  383. offset = (index >> shift) & RADIX_TREE_MAP_MASK;
  384. node = slot;
  385. slot = node->slots[offset];
  386. shift -= RADIX_TREE_MAP_SHIFT;
  387. height--;
  388. }
  389. if (slot != NULL)
  390. return -EEXIST;
  391. if (node) {
  392. node->count++;
  393. rcu_assign_pointer(node->slots[offset], item);
  394. BUG_ON(tag_get(node, 0, offset));
  395. BUG_ON(tag_get(node, 1, offset));
  396. } else {
  397. rcu_assign_pointer(root->rnode, item);
  398. BUG_ON(root_tag_get(root, 0));
  399. BUG_ON(root_tag_get(root, 1));
  400. }
  401. return 0;
  402. }
  403. EXPORT_SYMBOL(radix_tree_insert);
  404. /*
  405. * is_slot == 1 : search for the slot.
  406. * is_slot == 0 : search for the node.
  407. */
  408. static void *radix_tree_lookup_element(struct radix_tree_root *root,
  409. unsigned long index, int is_slot)
  410. {
  411. unsigned int height, shift;
  412. struct radix_tree_node *node, **slot;
  413. node = rcu_dereference_raw(root->rnode);
  414. if (node == NULL)
  415. return NULL;
  416. if (!radix_tree_is_indirect_ptr(node)) {
  417. if (index > 0)
  418. return NULL;
  419. return is_slot ? (void *)&root->rnode : node;
  420. }
  421. node = indirect_to_ptr(node);
  422. height = node->height;
  423. if (index > radix_tree_maxindex(height))
  424. return NULL;
  425. shift = (height-1) * RADIX_TREE_MAP_SHIFT;
  426. do {
  427. slot = (struct radix_tree_node **)
  428. (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK));
  429. node = rcu_dereference_raw(*slot);
  430. if (node == NULL)
  431. return NULL;
  432. shift -= RADIX_TREE_MAP_SHIFT;
  433. height--;
  434. } while (height > 0);
  435. return is_slot ? (void *)slot : indirect_to_ptr(node);
  436. }
  437. /**
  438. * radix_tree_lookup_slot - lookup a slot in a radix tree
  439. * @root: radix tree root
  440. * @index: index key
  441. *
  442. * Returns: the slot corresponding to the position @index in the
  443. * radix tree @root. This is useful for update-if-exists operations.
  444. *
  445. * This function can be called under rcu_read_lock iff the slot is not
  446. * modified by radix_tree_replace_slot, otherwise it must be called
  447. * exclusive from other writers. Any dereference of the slot must be done
  448. * using radix_tree_deref_slot.
  449. */
  450. void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index)
  451. {
  452. return (void **)radix_tree_lookup_element(root, index, 1);
  453. }
  454. EXPORT_SYMBOL(radix_tree_lookup_slot);
  455. /**
  456. * radix_tree_lookup - perform lookup operation on a radix tree
  457. * @root: radix tree root
  458. * @index: index key
  459. *
  460. * Lookup the item at the position @index in the radix tree @root.
  461. *
  462. * This function can be called under rcu_read_lock, however the caller
  463. * must manage lifetimes of leaf nodes (eg. RCU may also be used to free
  464. * them safely). No RCU barriers are required to access or modify the
  465. * returned item, however.
  466. */
  467. void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index)
  468. {
  469. return radix_tree_lookup_element(root, index, 0);
  470. }
  471. EXPORT_SYMBOL(radix_tree_lookup);
  472. /**
  473. * radix_tree_tag_set - set a tag on a radix tree node
  474. * @root: radix tree root
  475. * @index: index key
  476. * @tag: tag index
  477. *
  478. * Set the search tag (which must be < RADIX_TREE_MAX_TAGS)
  479. * corresponding to @index in the radix tree. From
  480. * the root all the way down to the leaf node.
  481. *
  482. * Returns the address of the tagged item. Setting a tag on a not-present
  483. * item is a bug.
  484. */
  485. void *radix_tree_tag_set(struct radix_tree_root *root,
  486. unsigned long index, unsigned int tag)
  487. {
  488. unsigned int height, shift;
  489. struct radix_tree_node *slot;
  490. height = root->height;
  491. BUG_ON(index > radix_tree_maxindex(height));
  492. slot = indirect_to_ptr(root->rnode);
  493. shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
  494. while (height > 0) {
  495. int offset;
  496. offset = (index >> shift) & RADIX_TREE_MAP_MASK;
  497. if (!tag_get(slot, tag, offset))
  498. tag_set(slot, tag, offset);
  499. slot = slot->slots[offset];
  500. BUG_ON(slot == NULL);
  501. shift -= RADIX_TREE_MAP_SHIFT;
  502. height--;
  503. }
  504. /* set the root's tag bit */
  505. if (slot && !root_tag_get(root, tag))
  506. root_tag_set(root, tag);
  507. return slot;
  508. }
  509. EXPORT_SYMBOL(radix_tree_tag_set);
  510. /**
  511. * radix_tree_tag_clear - clear a tag on a radix tree node
  512. * @root: radix tree root
  513. * @index: index key
  514. * @tag: tag index
  515. *
  516. * Clear the search tag (which must be < RADIX_TREE_MAX_TAGS)
  517. * corresponding to @index in the radix tree. If
  518. * this causes the leaf node to have no tags set then clear the tag in the
  519. * next-to-leaf node, etc.
  520. *
  521. * Returns the address of the tagged item on success, else NULL. ie:
  522. * has the same return value and semantics as radix_tree_lookup().
  523. */
  524. void *radix_tree_tag_clear(struct radix_tree_root *root,
  525. unsigned long index, unsigned int tag)
  526. {
  527. struct radix_tree_node *node = NULL;
  528. struct radix_tree_node *slot = NULL;
  529. unsigned int height, shift;
  530. int uninitialized_var(offset);
  531. height = root->height;
  532. if (index > radix_tree_maxindex(height))
  533. goto out;
  534. shift = height * RADIX_TREE_MAP_SHIFT;
  535. slot = indirect_to_ptr(root->rnode);
  536. while (shift) {
  537. if (slot == NULL)
  538. goto out;
  539. shift -= RADIX_TREE_MAP_SHIFT;
  540. offset = (index >> shift) & RADIX_TREE_MAP_MASK;
  541. node = slot;
  542. slot = slot->slots[offset];
  543. }
  544. if (slot == NULL)
  545. goto out;
  546. while (node) {
  547. if (!tag_get(node, tag, offset))
  548. goto out;
  549. tag_clear(node, tag, offset);
  550. if (any_tag_set(node, tag))
  551. goto out;
  552. index >>= RADIX_TREE_MAP_SHIFT;
  553. offset = index & RADIX_TREE_MAP_MASK;
  554. node = node->parent;
  555. }
  556. /* clear the root's tag bit */
  557. if (root_tag_get(root, tag))
  558. root_tag_clear(root, tag);
  559. out:
  560. return slot;
  561. }
  562. EXPORT_SYMBOL(radix_tree_tag_clear);
  563. /**
  564. * radix_tree_tag_get - get a tag on a radix tree node
  565. * @root: radix tree root
  566. * @index: index key
  567. * @tag: tag index (< RADIX_TREE_MAX_TAGS)
  568. *
  569. * Return values:
  570. *
  571. * 0: tag not present or not set
  572. * 1: tag set
  573. *
  574. * Note that the return value of this function may not be relied on, even if
  575. * the RCU lock is held, unless tag modification and node deletion are excluded
  576. * from concurrency.
  577. */
  578. int radix_tree_tag_get(struct radix_tree_root *root,
  579. unsigned long index, unsigned int tag)
  580. {
  581. unsigned int height, shift;
  582. struct radix_tree_node *node;
  583. /* check the root's tag bit */
  584. if (!root_tag_get(root, tag))
  585. return 0;
  586. node = rcu_dereference_raw(root->rnode);
  587. if (node == NULL)
  588. return 0;
  589. if (!radix_tree_is_indirect_ptr(node))
  590. return (index == 0);
  591. node = indirect_to_ptr(node);
  592. height = node->height;
  593. if (index > radix_tree_maxindex(height))
  594. return 0;
  595. shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
  596. for ( ; ; ) {
  597. int offset;
  598. if (node == NULL)
  599. return 0;
  600. offset = (index >> shift) & RADIX_TREE_MAP_MASK;
  601. if (!tag_get(node, tag, offset))
  602. return 0;
  603. if (height == 1)
  604. return 1;
  605. node = rcu_dereference_raw(node->slots[offset]);
  606. shift -= RADIX_TREE_MAP_SHIFT;
  607. height--;
  608. }
  609. }
  610. EXPORT_SYMBOL(radix_tree_tag_get);
  611. /**
  612. * radix_tree_next_chunk - find next chunk of slots for iteration
  613. *
  614. * @root: radix tree root
  615. * @iter: iterator state
  616. * @flags: RADIX_TREE_ITER_* flags and tag index
  617. * Returns: pointer to chunk first slot, or NULL if iteration is over
  618. */
  619. void **radix_tree_next_chunk(struct radix_tree_root *root,
  620. struct radix_tree_iter *iter, unsigned flags)
  621. {
  622. unsigned shift, tag = flags & RADIX_TREE_ITER_TAG_MASK;
  623. struct radix_tree_node *rnode, *node;
  624. unsigned long index, offset;
  625. if ((flags & RADIX_TREE_ITER_TAGGED) && !root_tag_get(root, tag))
  626. return NULL;
  627. /*
  628. * Catch next_index overflow after ~0UL. iter->index never overflows
  629. * during iterating; it can be zero only at the beginning.
  630. * And we cannot overflow iter->next_index in a single step,
  631. * because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG.
  632. *
  633. * This condition also used by radix_tree_next_slot() to stop
  634. * contiguous iterating, and forbid swithing to the next chunk.
  635. */
  636. index = iter->next_index;
  637. if (!index && iter->index)
  638. return NULL;
  639. rnode = rcu_dereference_raw(root->rnode);
  640. if (radix_tree_is_indirect_ptr(rnode)) {
  641. rnode = indirect_to_ptr(rnode);
  642. } else if (rnode && !index) {
  643. /* Single-slot tree */
  644. iter->index = 0;
  645. iter->next_index = 1;
  646. iter->tags = 1;
  647. return (void **)&root->rnode;
  648. } else
  649. return NULL;
  650. restart:
  651. shift = (rnode->height - 1) * RADIX_TREE_MAP_SHIFT;
  652. offset = index >> shift;
  653. /* Index outside of the tree */
  654. if (offset >= RADIX_TREE_MAP_SIZE)
  655. return NULL;
  656. node = rnode;
  657. while (1) {
  658. if ((flags & RADIX_TREE_ITER_TAGGED) ?
  659. !test_bit(offset, node->tags[tag]) :
  660. !node->slots[offset]) {
  661. /* Hole detected */
  662. if (flags & RADIX_TREE_ITER_CONTIG)
  663. return NULL;
  664. if (flags & RADIX_TREE_ITER_TAGGED)
  665. offset = radix_tree_find_next_bit(
  666. node->tags[tag],
  667. RADIX_TREE_MAP_SIZE,
  668. offset + 1);
  669. else
  670. while (++offset < RADIX_TREE_MAP_SIZE) {
  671. if (node->slots[offset])
  672. break;
  673. }
  674. index &= ~((RADIX_TREE_MAP_SIZE << shift) - 1);
  675. index += offset << shift;
  676. /* Overflow after ~0UL */
  677. if (!index)
  678. return NULL;
  679. if (offset == RADIX_TREE_MAP_SIZE)
  680. goto restart;
  681. }
  682. /* This is leaf-node */
  683. if (!shift)
  684. break;
  685. node = rcu_dereference_raw(node->slots[offset]);
  686. if (node == NULL)
  687. goto restart;
  688. shift -= RADIX_TREE_MAP_SHIFT;
  689. offset = (index >> shift) & RADIX_TREE_MAP_MASK;
  690. }
  691. /* Update the iterator state */
  692. iter->index = index;
  693. iter->next_index = (index | RADIX_TREE_MAP_MASK) + 1;
  694. /* Construct iter->tags bit-mask from node->tags[tag] array */
  695. if (flags & RADIX_TREE_ITER_TAGGED) {
  696. unsigned tag_long, tag_bit;
  697. tag_long = offset / BITS_PER_LONG;
  698. tag_bit = offset % BITS_PER_LONG;
  699. iter->tags = node->tags[tag][tag_long] >> tag_bit;
  700. /* This never happens if RADIX_TREE_TAG_LONGS == 1 */
  701. if (tag_long < RADIX_TREE_TAG_LONGS - 1) {
  702. /* Pick tags from next element */
  703. if (tag_bit)
  704. iter->tags |= node->tags[tag][tag_long + 1] <<
  705. (BITS_PER_LONG - tag_bit);
  706. /* Clip chunk size, here only BITS_PER_LONG tags */
  707. iter->next_index = index + BITS_PER_LONG;
  708. }
  709. }
  710. return node->slots + offset;
  711. }
  712. EXPORT_SYMBOL(radix_tree_next_chunk);
  713. /**
  714. * radix_tree_range_tag_if_tagged - for each item in given range set given
  715. * tag if item has another tag set
  716. * @root: radix tree root
  717. * @first_indexp: pointer to a starting index of a range to scan
  718. * @last_index: last index of a range to scan
  719. * @nr_to_tag: maximum number items to tag
  720. * @iftag: tag index to test
  721. * @settag: tag index to set if tested tag is set
  722. *
  723. * This function scans range of radix tree from first_index to last_index
  724. * (inclusive). For each item in the range if iftag is set, the function sets
  725. * also settag. The function stops either after tagging nr_to_tag items or
  726. * after reaching last_index.
  727. *
  728. * The tags must be set from the leaf level only and propagated back up the
  729. * path to the root. We must do this so that we resolve the full path before
  730. * setting any tags on intermediate nodes. If we set tags as we descend, then
  731. * we can get to the leaf node and find that the index that has the iftag
  732. * set is outside the range we are scanning. This reults in dangling tags and
  733. * can lead to problems with later tag operations (e.g. livelocks on lookups).
  734. *
  735. * The function returns number of leaves where the tag was set and sets
  736. * *first_indexp to the first unscanned index.
  737. * WARNING! *first_indexp can wrap if last_index is ULONG_MAX. Caller must
  738. * be prepared to handle that.
  739. */
  740. unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
  741. unsigned long *first_indexp, unsigned long last_index,
  742. unsigned long nr_to_tag,
  743. unsigned int iftag, unsigned int settag)
  744. {
  745. unsigned int height = root->height;
  746. struct radix_tree_node *node = NULL;
  747. struct radix_tree_node *slot;
  748. unsigned int shift;
  749. unsigned long tagged = 0;
  750. unsigned long index = *first_indexp;
  751. last_index = min(last_index, radix_tree_maxindex(height));
  752. if (index > last_index)
  753. return 0;
  754. if (!nr_to_tag)
  755. return 0;
  756. if (!root_tag_get(root, iftag)) {
  757. *first_indexp = last_index + 1;
  758. return 0;
  759. }
  760. if (height == 0) {
  761. *first_indexp = last_index + 1;
  762. root_tag_set(root, settag);
  763. return 1;
  764. }
  765. shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
  766. slot = indirect_to_ptr(root->rnode);
  767. for (;;) {
  768. unsigned long upindex;
  769. int offset;
  770. offset = (index >> shift) & RADIX_TREE_MAP_MASK;
  771. if (!slot->slots[offset])
  772. goto next;
  773. if (!tag_get(slot, iftag, offset))
  774. goto next;
  775. if (shift) {
  776. /* Go down one level */
  777. shift -= RADIX_TREE_MAP_SHIFT;
  778. node = slot;
  779. slot = slot->slots[offset];
  780. continue;
  781. }
  782. /* tag the leaf */
  783. tagged++;
  784. tag_set(slot, settag, offset);
  785. /* walk back up the path tagging interior nodes */
  786. upindex = index;
  787. while (node) {
  788. upindex >>= RADIX_TREE_MAP_SHIFT;
  789. offset = upindex & RADIX_TREE_MAP_MASK;
  790. /* stop if we find a node with the tag already set */
  791. if (tag_get(node, settag, offset))
  792. break;
  793. tag_set(node, settag, offset);
  794. node = node->parent;
  795. }
  796. /*
  797. * Small optimization: now clear that node pointer.
  798. * Since all of this slot's ancestors now have the tag set
  799. * from setting it above, we have no further need to walk
  800. * back up the tree setting tags, until we update slot to
  801. * point to another radix_tree_node.
  802. */
  803. node = NULL;
  804. next:
  805. /* Go to next item at level determined by 'shift' */
  806. index = ((index >> shift) + 1) << shift;
  807. /* Overflow can happen when last_index is ~0UL... */
  808. if (index > last_index || !index)
  809. break;
  810. if (tagged >= nr_to_tag)
  811. break;
  812. while (((index >> shift) & RADIX_TREE_MAP_MASK) == 0) {
  813. /*
  814. * We've fully scanned this node. Go up. Because
  815. * last_index is guaranteed to be in the tree, what
  816. * we do below cannot wander astray.
  817. */
  818. slot = slot->parent;
  819. shift += RADIX_TREE_MAP_SHIFT;
  820. }
  821. }
  822. /*
  823. * We need not to tag the root tag if there is no tag which is set with
  824. * settag within the range from *first_indexp to last_index.
  825. */
  826. if (tagged > 0)
  827. root_tag_set(root, settag);
  828. *first_indexp = index;
  829. return tagged;
  830. }
  831. EXPORT_SYMBOL(radix_tree_range_tag_if_tagged);
  832. /**
  833. * radix_tree_next_hole - find the next hole (not-present entry)
  834. * @root: tree root
  835. * @index: index key
  836. * @max_scan: maximum range to search
  837. *
  838. * Search the set [index, min(index+max_scan-1, MAX_INDEX)] for the lowest
  839. * indexed hole.
  840. *
  841. * Returns: the index of the hole if found, otherwise returns an index
  842. * outside of the set specified (in which case 'return - index >= max_scan'
  843. * will be true). In rare cases of index wrap-around, 0 will be returned.
  844. *
  845. * radix_tree_next_hole may be called under rcu_read_lock. However, like
  846. * radix_tree_gang_lookup, this will not atomically search a snapshot of
  847. * the tree at a single point in time. For example, if a hole is created
  848. * at index 5, then subsequently a hole is created at index 10,
  849. * radix_tree_next_hole covering both indexes may return 10 if called
  850. * under rcu_read_lock.
  851. */
  852. unsigned long radix_tree_next_hole(struct radix_tree_root *root,
  853. unsigned long index, unsigned long max_scan)
  854. {
  855. unsigned long i;
  856. for (i = 0; i < max_scan; i++) {
  857. if (!radix_tree_lookup(root, index))
  858. break;
  859. index++;
  860. if (index == 0)
  861. break;
  862. }
  863. return index;
  864. }
  865. EXPORT_SYMBOL(radix_tree_next_hole);
  866. /**
  867. * radix_tree_prev_hole - find the prev hole (not-present entry)
  868. * @root: tree root
  869. * @index: index key
  870. * @max_scan: maximum range to search
  871. *
  872. * Search backwards in the range [max(index-max_scan+1, 0), index]
  873. * for the first hole.
  874. *
  875. * Returns: the index of the hole if found, otherwise returns an index
  876. * outside of the set specified (in which case 'index - return >= max_scan'
  877. * will be true). In rare cases of wrap-around, ULONG_MAX will be returned.
  878. *
  879. * radix_tree_next_hole may be called under rcu_read_lock. However, like
  880. * radix_tree_gang_lookup, this will not atomically search a snapshot of
  881. * the tree at a single point in time. For example, if a hole is created
  882. * at index 10, then subsequently a hole is created at index 5,
  883. * radix_tree_prev_hole covering both indexes may return 5 if called under
  884. * rcu_read_lock.
  885. */
  886. unsigned long radix_tree_prev_hole(struct radix_tree_root *root,
  887. unsigned long index, unsigned long max_scan)
  888. {
  889. unsigned long i;
  890. for (i = 0; i < max_scan; i++) {
  891. if (!radix_tree_lookup(root, index))
  892. break;
  893. index--;
  894. if (index == ULONG_MAX)
  895. break;
  896. }
  897. return index;
  898. }
  899. EXPORT_SYMBOL(radix_tree_prev_hole);
  900. /**
  901. * radix_tree_gang_lookup - perform multiple lookup on a radix tree
  902. * @root: radix tree root
  903. * @results: where the results of the lookup are placed
  904. * @first_index: start the lookup from this key
  905. * @max_items: place up to this many items at *results
  906. *
  907. * Performs an index-ascending scan of the tree for present items. Places
  908. * them at *@results and returns the number of items which were placed at
  909. * *@results.
  910. *
  911. * The implementation is naive.
  912. *
  913. * Like radix_tree_lookup, radix_tree_gang_lookup may be called under
  914. * rcu_read_lock. In this case, rather than the returned results being
  915. * an atomic snapshot of the tree at a single point in time, the semantics
  916. * of an RCU protected gang lookup are as though multiple radix_tree_lookups
  917. * have been issued in individual locks, and results stored in 'results'.
  918. */
  919. unsigned int
  920. radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
  921. unsigned long first_index, unsigned int max_items)
  922. {
  923. struct radix_tree_iter iter;
  924. void **slot;
  925. unsigned int ret = 0;
  926. if (unlikely(!max_items))
  927. return 0;
  928. radix_tree_for_each_slot(slot, root, &iter, first_index) {
  929. results[ret] = indirect_to_ptr(rcu_dereference_raw(*slot));
  930. if (!results[ret])
  931. continue;
  932. if (++ret == max_items)
  933. break;
  934. }
  935. return ret;
  936. }
  937. EXPORT_SYMBOL(radix_tree_gang_lookup);
  938. /**
  939. * radix_tree_gang_lookup_slot - perform multiple slot lookup on radix tree
  940. * @root: radix tree root
  941. * @results: where the results of the lookup are placed
  942. * @indices: where their indices should be placed (but usually NULL)
  943. * @first_index: start the lookup from this key
  944. * @max_items: place up to this many items at *results
  945. *
  946. * Performs an index-ascending scan of the tree for present items. Places
  947. * their slots at *@results and returns the number of items which were
  948. * placed at *@results.
  949. *
  950. * The implementation is naive.
  951. *
  952. * Like radix_tree_gang_lookup as far as RCU and locking goes. Slots must
  953. * be dereferenced with radix_tree_deref_slot, and if using only RCU
  954. * protection, radix_tree_deref_slot may fail requiring a retry.
  955. */
  956. unsigned int
  957. radix_tree_gang_lookup_slot(struct radix_tree_root *root,
  958. void ***results, unsigned long *indices,
  959. unsigned long first_index, unsigned int max_items)
  960. {
  961. struct radix_tree_iter iter;
  962. void **slot;
  963. unsigned int ret = 0;
  964. if (unlikely(!max_items))
  965. return 0;
  966. radix_tree_for_each_slot(slot, root, &iter, first_index) {
  967. results[ret] = slot;
  968. if (indices)
  969. indices[ret] = iter.index;
  970. if (++ret == max_items)
  971. break;
  972. }
  973. return ret;
  974. }
  975. EXPORT_SYMBOL(radix_tree_gang_lookup_slot);
  976. /**
  977. * radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree
  978. * based on a tag
  979. * @root: radix tree root
  980. * @results: where the results of the lookup are placed
  981. * @first_index: start the lookup from this key
  982. * @max_items: place up to this many items at *results
  983. * @tag: the tag index (< RADIX_TREE_MAX_TAGS)
  984. *
  985. * Performs an index-ascending scan of the tree for present items which
  986. * have the tag indexed by @tag set. Places the items at *@results and
  987. * returns the number of items which were placed at *@results.
  988. */
  989. unsigned int
  990. radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
  991. unsigned long first_index, unsigned int max_items,
  992. unsigned int tag)
  993. {
  994. struct radix_tree_iter iter;
  995. void **slot;
  996. unsigned int ret = 0;
  997. if (unlikely(!max_items))
  998. return 0;
  999. radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) {
  1000. results[ret] = indirect_to_ptr(rcu_dereference_raw(*slot));
  1001. if (!results[ret])
  1002. continue;
  1003. if (++ret == max_items)
  1004. break;
  1005. }
  1006. return ret;
  1007. }
  1008. EXPORT_SYMBOL(radix_tree_gang_lookup_tag);
  1009. /**
  1010. * radix_tree_gang_lookup_tag_slot - perform multiple slot lookup on a
  1011. * radix tree based on a tag
  1012. * @root: radix tree root
  1013. * @results: where the results of the lookup are placed
  1014. * @first_index: start the lookup from this key
  1015. * @max_items: place up to this many items at *results
  1016. * @tag: the tag index (< RADIX_TREE_MAX_TAGS)
  1017. *
  1018. * Performs an index-ascending scan of the tree for present items which
  1019. * have the tag indexed by @tag set. Places the slots at *@results and
  1020. * returns the number of slots which were placed at *@results.
  1021. */
  1022. unsigned int
  1023. radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
  1024. unsigned long first_index, unsigned int max_items,
  1025. unsigned int tag)
  1026. {
  1027. struct radix_tree_iter iter;
  1028. void **slot;
  1029. unsigned int ret = 0;
  1030. if (unlikely(!max_items))
  1031. return 0;
  1032. radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) {
  1033. results[ret] = slot;
  1034. if (++ret == max_items)
  1035. break;
  1036. }
  1037. return ret;
  1038. }
  1039. EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot);
  1040. #if defined(CONFIG_SHMEM) && defined(CONFIG_SWAP)
  1041. #include <linux/sched.h> /* for cond_resched() */
  1042. /*
  1043. * This linear search is at present only useful to shmem_unuse_inode().
  1044. */
  1045. static unsigned long __locate(struct radix_tree_node *slot, void *item,
  1046. unsigned long index, unsigned long *found_index)
  1047. {
  1048. unsigned int shift, height;
  1049. unsigned long i;
  1050. height = slot->height;
  1051. shift = (height-1) * RADIX_TREE_MAP_SHIFT;
  1052. for ( ; height > 1; height--) {
  1053. i = (index >> shift) & RADIX_TREE_MAP_MASK;
  1054. for (;;) {
  1055. if (slot->slots[i] != NULL)
  1056. break;
  1057. index &= ~((1UL << shift) - 1);
  1058. index += 1UL << shift;
  1059. if (index == 0)
  1060. goto out; /* 32-bit wraparound */
  1061. i++;
  1062. if (i == RADIX_TREE_MAP_SIZE)
  1063. goto out;
  1064. }
  1065. shift -= RADIX_TREE_MAP_SHIFT;
  1066. slot = rcu_dereference_raw(slot->slots[i]);
  1067. if (slot == NULL)
  1068. goto out;
  1069. }
  1070. /* Bottom level: check items */
  1071. for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
  1072. if (slot->slots[i] == item) {
  1073. *found_index = index + i;
  1074. index = 0;
  1075. goto out;
  1076. }
  1077. }
  1078. index += RADIX_TREE_MAP_SIZE;
  1079. out:
  1080. return index;
  1081. }
  1082. /**
  1083. * radix_tree_locate_item - search through radix tree for item
  1084. * @root: radix tree root
  1085. * @item: item to be found
  1086. *
  1087. * Returns index where item was found, or -1 if not found.
  1088. * Caller must hold no lock (since this time-consuming function needs
  1089. * to be preemptible), and must check afterwards if item is still there.
  1090. */
  1091. unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
  1092. {
  1093. struct radix_tree_node *node;
  1094. unsigned long max_index;
  1095. unsigned long cur_index = 0;
  1096. unsigned long found_index = -1;
  1097. do {
  1098. rcu_read_lock();
  1099. node = rcu_dereference_raw(root->rnode);
  1100. if (!radix_tree_is_indirect_ptr(node)) {
  1101. rcu_read_unlock();
  1102. if (node == item)
  1103. found_index = 0;
  1104. break;
  1105. }
  1106. node = indirect_to_ptr(node);
  1107. max_index = radix_tree_maxindex(node->height);
  1108. if (cur_index > max_index)
  1109. break;
  1110. cur_index = __locate(node, item, cur_index, &found_index);
  1111. rcu_read_unlock();
  1112. cond_resched();
  1113. } while (cur_index != 0 && cur_index <= max_index);
  1114. return found_index;
  1115. }
  1116. #else
  1117. unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
  1118. {
  1119. return -1;
  1120. }
  1121. #endif /* CONFIG_SHMEM && CONFIG_SWAP */
  1122. /**
  1123. * radix_tree_shrink - shrink height of a radix tree to minimal
  1124. * @root radix tree root
  1125. */
  1126. static inline void radix_tree_shrink(struct radix_tree_root *root)
  1127. {
  1128. /* try to shrink tree height */
  1129. while (root->height > 0) {
  1130. struct radix_tree_node *to_free = root->rnode;
  1131. struct radix_tree_node *slot;
  1132. BUG_ON(!radix_tree_is_indirect_ptr(to_free));
  1133. to_free = indirect_to_ptr(to_free);
  1134. /*
  1135. * The candidate node has more than one child, or its child
  1136. * is not at the leftmost slot, we cannot shrink.
  1137. */
  1138. if (to_free->count != 1)
  1139. break;
  1140. if (!to_free->slots[0])
  1141. break;
  1142. /*
  1143. * We don't need rcu_assign_pointer(), since we are simply
  1144. * moving the node from one part of the tree to another: if it
  1145. * was safe to dereference the old pointer to it
  1146. * (to_free->slots[0]), it will be safe to dereference the new
  1147. * one (root->rnode) as far as dependent read barriers go.
  1148. */
  1149. slot = to_free->slots[0];
  1150. if (root->height > 1) {
  1151. slot->parent = NULL;
  1152. slot = ptr_to_indirect(slot);
  1153. }
  1154. root->rnode = slot;
  1155. root->height--;
  1156. /*
  1157. * We have a dilemma here. The node's slot[0] must not be
  1158. * NULLed in case there are concurrent lookups expecting to
  1159. * find the item. However if this was a bottom-level node,
  1160. * then it may be subject to the slot pointer being visible
  1161. * to callers dereferencing it. If item corresponding to
  1162. * slot[0] is subsequently deleted, these callers would expect
  1163. * their slot to become empty sooner or later.
  1164. *
  1165. * For example, lockless pagecache will look up a slot, deref
  1166. * the page pointer, and if the page is 0 refcount it means it
  1167. * was concurrently deleted from pagecache so try the deref
  1168. * again. Fortunately there is already a requirement for logic
  1169. * to retry the entire slot lookup -- the indirect pointer
  1170. * problem (replacing direct root node with an indirect pointer
  1171. * also results in a stale slot). So tag the slot as indirect
  1172. * to force callers to retry.
  1173. */
  1174. if (root->height == 0)
  1175. *((unsigned long *)&to_free->slots[0]) |=
  1176. RADIX_TREE_INDIRECT_PTR;
  1177. radix_tree_node_free(to_free);
  1178. }
  1179. }
  1180. /**
  1181. * radix_tree_delete - delete an item from a radix tree
  1182. * @root: radix tree root
  1183. * @index: index key
  1184. *
  1185. * Remove the item at @index from the radix tree rooted at @root.
  1186. *
  1187. * Returns the address of the deleted item, or NULL if it was not present.
  1188. */
  1189. void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
  1190. {
  1191. struct radix_tree_node *node = NULL;
  1192. struct radix_tree_node *slot = NULL;
  1193. struct radix_tree_node *to_free;
  1194. unsigned int height, shift;
  1195. int tag;
  1196. int uninitialized_var(offset);
  1197. height = root->height;
  1198. if (index > radix_tree_maxindex(height))
  1199. goto out;
  1200. slot = root->rnode;
  1201. if (height == 0) {
  1202. root_tag_clear_all(root);
  1203. root->rnode = NULL;
  1204. goto out;
  1205. }
  1206. slot = indirect_to_ptr(slot);
  1207. shift = height * RADIX_TREE_MAP_SHIFT;
  1208. do {
  1209. if (slot == NULL)
  1210. goto out;
  1211. shift -= RADIX_TREE_MAP_SHIFT;
  1212. offset = (index >> shift) & RADIX_TREE_MAP_MASK;
  1213. node = slot;
  1214. slot = slot->slots[offset];
  1215. } while (shift);
  1216. if (slot == NULL)
  1217. goto out;
  1218. /*
  1219. * Clear all tags associated with the item to be deleted.
  1220. * This way of doing it would be inefficient, but seldom is any set.
  1221. */
  1222. for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
  1223. if (tag_get(node, tag, offset))
  1224. radix_tree_tag_clear(root, index, tag);
  1225. }
  1226. to_free = NULL;
  1227. /* Now free the nodes we do not need anymore */
  1228. while (node) {
  1229. node->slots[offset] = NULL;
  1230. node->count--;
  1231. /*
  1232. * Queue the node for deferred freeing after the
  1233. * last reference to it disappears (set NULL, above).
  1234. */
  1235. if (to_free)
  1236. radix_tree_node_free(to_free);
  1237. if (node->count) {
  1238. if (node == indirect_to_ptr(root->rnode))
  1239. radix_tree_shrink(root);
  1240. goto out;
  1241. }
  1242. /* Node with zero slots in use so free it */
  1243. to_free = node;
  1244. index >>= RADIX_TREE_MAP_SHIFT;
  1245. offset = index & RADIX_TREE_MAP_MASK;
  1246. node = node->parent;
  1247. }
  1248. root_tag_clear_all(root);
  1249. root->height = 0;
  1250. root->rnode = NULL;
  1251. if (to_free)
  1252. radix_tree_node_free(to_free);
  1253. out:
  1254. return slot;
  1255. }
  1256. EXPORT_SYMBOL(radix_tree_delete);
  1257. /**
  1258. * radix_tree_tagged - test whether any items in the tree are tagged
  1259. * @root: radix tree root
  1260. * @tag: tag to test
  1261. */
  1262. int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag)
  1263. {
  1264. return root_tag_get(root, tag);
  1265. }
  1266. EXPORT_SYMBOL(radix_tree_tagged);
  1267. static void
  1268. radix_tree_node_ctor(void *node)
  1269. {
  1270. memset(node, 0, sizeof(struct radix_tree_node));
  1271. }
  1272. static __init unsigned long __maxindex(unsigned int height)
  1273. {
  1274. unsigned int width = height * RADIX_TREE_MAP_SHIFT;
  1275. int shift = RADIX_TREE_INDEX_BITS - width;
  1276. if (shift < 0)
  1277. return ~0UL;
  1278. if (shift >= BITS_PER_LONG)
  1279. return 0UL;
  1280. return ~0UL >> shift;
  1281. }
  1282. static __init void radix_tree_init_maxindex(void)
  1283. {
  1284. unsigned int i;
  1285. for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++)
  1286. height_to_maxindex[i] = __maxindex(i);
  1287. }
  1288. static int radix_tree_callback(struct notifier_block *nfb,
  1289. unsigned long action,
  1290. void *hcpu)
  1291. {
  1292. int cpu = (long)hcpu;
  1293. struct radix_tree_preload *rtp;
  1294. /* Free per-cpu pool of perloaded nodes */
  1295. if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
  1296. rtp = &per_cpu(radix_tree_preloads, cpu);
  1297. while (rtp->nr) {
  1298. kmem_cache_free(radix_tree_node_cachep,
  1299. rtp->nodes[rtp->nr-1]);
  1300. rtp->nodes[rtp->nr-1] = NULL;
  1301. rtp->nr--;
  1302. }
  1303. }
  1304. return NOTIFY_OK;
  1305. }
  1306. void __init radix_tree_init(void)
  1307. {
  1308. radix_tree_node_cachep = kmem_cache_create("radix_tree_node",
  1309. sizeof(struct radix_tree_node), 0,
  1310. SLAB_PANIC | SLAB_RECLAIM_ACCOUNT,
  1311. radix_tree_node_ctor);
  1312. radix_tree_init_maxindex();
  1313. hotcpu_notifier(radix_tree_callback, 0);
  1314. }