radix-tree.c 39 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513
  1. /*
  2. * Copyright (C) 2001 Momchil Velikov
  3. * Portions Copyright (C) 2001 Christoph Hellwig
  4. * Copyright (C) 2005 SGI, Christoph Lameter
  5. * Copyright (C) 2006 Nick Piggin
  6. *
  7. * This program is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU General Public License as
  9. * published by the Free Software Foundation; either version 2, or (at
  10. * your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful, but
  13. * WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, write to the Free Software
  19. * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  20. */
  21. #include <linux/errno.h>
  22. #include <linux/init.h>
  23. #include <linux/kernel.h>
  24. #include <linux/export.h>
  25. #include <linux/radix-tree.h>
  26. #include <linux/percpu.h>
  27. #include <linux/slab.h>
  28. #include <linux/notifier.h>
  29. #include <linux/cpu.h>
  30. #include <linux/string.h>
  31. #include <linux/bitops.h>
  32. #include <linux/rcupdate.h>
  33. #ifdef __KERNEL__
  34. #define RADIX_TREE_MAP_SHIFT (CONFIG_BASE_SMALL ? 4 : 6)
  35. #else
  36. #define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */
  37. #endif
  38. #define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT)
  39. #define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1)
  40. #define RADIX_TREE_TAG_LONGS \
  41. ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG)
  42. struct radix_tree_node {
  43. unsigned int height; /* Height from the bottom */
  44. unsigned int count;
  45. union {
  46. struct radix_tree_node *parent; /* Used when ascending tree */
  47. struct rcu_head rcu_head; /* Used when freeing node */
  48. };
  49. void __rcu *slots[RADIX_TREE_MAP_SIZE];
  50. unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
  51. };
  52. #define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long))
  53. #define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \
  54. RADIX_TREE_MAP_SHIFT))
  55. /*
  56. * The height_to_maxindex array needs to be one deeper than the maximum
  57. * path as height 0 holds only 1 entry.
  58. */
  59. static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1] __read_mostly;
  60. /*
  61. * Radix tree node cache.
  62. */
  63. static struct kmem_cache *radix_tree_node_cachep;
  64. /*
  65. * Per-cpu pool of preloaded nodes
  66. */
  67. struct radix_tree_preload {
  68. int nr;
  69. struct radix_tree_node *nodes[RADIX_TREE_MAX_PATH];
  70. };
  71. static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, };
  72. static inline void *ptr_to_indirect(void *ptr)
  73. {
  74. return (void *)((unsigned long)ptr | RADIX_TREE_INDIRECT_PTR);
  75. }
  76. static inline void *indirect_to_ptr(void *ptr)
  77. {
  78. return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR);
  79. }
  80. static inline gfp_t root_gfp_mask(struct radix_tree_root *root)
  81. {
  82. return root->gfp_mask & __GFP_BITS_MASK;
  83. }
  84. static inline void tag_set(struct radix_tree_node *node, unsigned int tag,
  85. int offset)
  86. {
  87. __set_bit(offset, node->tags[tag]);
  88. }
  89. static inline void tag_clear(struct radix_tree_node *node, unsigned int tag,
  90. int offset)
  91. {
  92. __clear_bit(offset, node->tags[tag]);
  93. }
  94. static inline int tag_get(struct radix_tree_node *node, unsigned int tag,
  95. int offset)
  96. {
  97. return test_bit(offset, node->tags[tag]);
  98. }
  99. static inline void root_tag_set(struct radix_tree_root *root, unsigned int tag)
  100. {
  101. root->gfp_mask |= (__force gfp_t)(1 << (tag + __GFP_BITS_SHIFT));
  102. }
  103. static inline void root_tag_clear(struct radix_tree_root *root, unsigned int tag)
  104. {
  105. root->gfp_mask &= (__force gfp_t)~(1 << (tag + __GFP_BITS_SHIFT));
  106. }
  107. static inline void root_tag_clear_all(struct radix_tree_root *root)
  108. {
  109. root->gfp_mask &= __GFP_BITS_MASK;
  110. }
  111. static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag)
  112. {
  113. return (__force unsigned)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT));
  114. }
  115. /*
  116. * Returns 1 if any slot in the node has this tag set.
  117. * Otherwise returns 0.
  118. */
  119. static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag)
  120. {
  121. int idx;
  122. for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
  123. if (node->tags[tag][idx])
  124. return 1;
  125. }
  126. return 0;
  127. }
  128. /*
  129. * This assumes that the caller has performed appropriate preallocation, and
  130. * that the caller has pinned this thread of control to the current CPU.
  131. */
  132. static struct radix_tree_node *
  133. radix_tree_node_alloc(struct radix_tree_root *root)
  134. {
  135. struct radix_tree_node *ret = NULL;
  136. gfp_t gfp_mask = root_gfp_mask(root);
  137. if (!(gfp_mask & __GFP_WAIT)) {
  138. struct radix_tree_preload *rtp;
  139. /*
  140. * Provided the caller has preloaded here, we will always
  141. * succeed in getting a node here (and never reach
  142. * kmem_cache_alloc)
  143. */
  144. rtp = &__get_cpu_var(radix_tree_preloads);
  145. if (rtp->nr) {
  146. ret = rtp->nodes[rtp->nr - 1];
  147. rtp->nodes[rtp->nr - 1] = NULL;
  148. rtp->nr--;
  149. }
  150. }
  151. if (ret == NULL)
  152. ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
  153. BUG_ON(radix_tree_is_indirect_ptr(ret));
  154. return ret;
  155. }
  156. static void radix_tree_node_rcu_free(struct rcu_head *head)
  157. {
  158. struct radix_tree_node *node =
  159. container_of(head, struct radix_tree_node, rcu_head);
  160. int i;
  161. /*
  162. * must only free zeroed nodes into the slab. radix_tree_shrink
  163. * can leave us with a non-NULL entry in the first slot, so clear
  164. * that here to make sure.
  165. */
  166. for (i = 0; i < RADIX_TREE_MAX_TAGS; i++)
  167. tag_clear(node, i, 0);
  168. node->slots[0] = NULL;
  169. node->count = 0;
  170. kmem_cache_free(radix_tree_node_cachep, node);
  171. }
  172. static inline void
  173. radix_tree_node_free(struct radix_tree_node *node)
  174. {
  175. call_rcu(&node->rcu_head, radix_tree_node_rcu_free);
  176. }
  177. /*
  178. * Load up this CPU's radix_tree_node buffer with sufficient objects to
  179. * ensure that the addition of a single element in the tree cannot fail. On
  180. * success, return zero, with preemption disabled. On error, return -ENOMEM
  181. * with preemption not disabled.
  182. *
  183. * To make use of this facility, the radix tree must be initialised without
  184. * __GFP_WAIT being passed to INIT_RADIX_TREE().
  185. */
  186. int radix_tree_preload(gfp_t gfp_mask)
  187. {
  188. struct radix_tree_preload *rtp;
  189. struct radix_tree_node *node;
  190. int ret = -ENOMEM;
  191. preempt_disable();
  192. rtp = &__get_cpu_var(radix_tree_preloads);
  193. while (rtp->nr < ARRAY_SIZE(rtp->nodes)) {
  194. preempt_enable();
  195. node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
  196. if (node == NULL)
  197. goto out;
  198. preempt_disable();
  199. rtp = &__get_cpu_var(radix_tree_preloads);
  200. if (rtp->nr < ARRAY_SIZE(rtp->nodes))
  201. rtp->nodes[rtp->nr++] = node;
  202. else
  203. kmem_cache_free(radix_tree_node_cachep, node);
  204. }
  205. ret = 0;
  206. out:
  207. return ret;
  208. }
  209. EXPORT_SYMBOL(radix_tree_preload);
  210. /*
  211. * Return the maximum key which can be store into a
  212. * radix tree with height HEIGHT.
  213. */
  214. static inline unsigned long radix_tree_maxindex(unsigned int height)
  215. {
  216. return height_to_maxindex[height];
  217. }
  218. /*
  219. * Extend a radix tree so it can store key @index.
  220. */
  221. static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
  222. {
  223. struct radix_tree_node *node;
  224. struct radix_tree_node *slot;
  225. unsigned int height;
  226. int tag;
  227. /* Figure out what the height should be. */
  228. height = root->height + 1;
  229. while (index > radix_tree_maxindex(height))
  230. height++;
  231. if (root->rnode == NULL) {
  232. root->height = height;
  233. goto out;
  234. }
  235. do {
  236. unsigned int newheight;
  237. if (!(node = radix_tree_node_alloc(root)))
  238. return -ENOMEM;
  239. /* Propagate the aggregated tag info into the new root */
  240. for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
  241. if (root_tag_get(root, tag))
  242. tag_set(node, tag, 0);
  243. }
  244. /* Increase the height. */
  245. newheight = root->height+1;
  246. node->height = newheight;
  247. node->count = 1;
  248. node->parent = NULL;
  249. slot = root->rnode;
  250. if (newheight > 1) {
  251. slot = indirect_to_ptr(slot);
  252. slot->parent = node;
  253. }
  254. node->slots[0] = slot;
  255. node = ptr_to_indirect(node);
  256. rcu_assign_pointer(root->rnode, node);
  257. root->height = newheight;
  258. } while (height > root->height);
  259. out:
  260. return 0;
  261. }
  262. /**
  263. * radix_tree_insert - insert into a radix tree
  264. * @root: radix tree root
  265. * @index: index key
  266. * @item: item to insert
  267. *
  268. * Insert an item into the radix tree at position @index.
  269. */
  270. int radix_tree_insert(struct radix_tree_root *root,
  271. unsigned long index, void *item)
  272. {
  273. struct radix_tree_node *node = NULL, *slot;
  274. unsigned int height, shift;
  275. int offset;
  276. int error;
  277. BUG_ON(radix_tree_is_indirect_ptr(item));
  278. /* Make sure the tree is high enough. */
  279. if (index > radix_tree_maxindex(root->height)) {
  280. error = radix_tree_extend(root, index);
  281. if (error)
  282. return error;
  283. }
  284. slot = indirect_to_ptr(root->rnode);
  285. height = root->height;
  286. shift = (height-1) * RADIX_TREE_MAP_SHIFT;
  287. offset = 0; /* uninitialised var warning */
  288. while (height > 0) {
  289. if (slot == NULL) {
  290. /* Have to add a child node. */
  291. if (!(slot = radix_tree_node_alloc(root)))
  292. return -ENOMEM;
  293. slot->height = height;
  294. slot->parent = node;
  295. if (node) {
  296. rcu_assign_pointer(node->slots[offset], slot);
  297. node->count++;
  298. } else
  299. rcu_assign_pointer(root->rnode, ptr_to_indirect(slot));
  300. }
  301. /* Go a level down */
  302. offset = (index >> shift) & RADIX_TREE_MAP_MASK;
  303. node = slot;
  304. slot = node->slots[offset];
  305. shift -= RADIX_TREE_MAP_SHIFT;
  306. height--;
  307. }
  308. if (slot != NULL)
  309. return -EEXIST;
  310. if (node) {
  311. node->count++;
  312. rcu_assign_pointer(node->slots[offset], item);
  313. BUG_ON(tag_get(node, 0, offset));
  314. BUG_ON(tag_get(node, 1, offset));
  315. } else {
  316. rcu_assign_pointer(root->rnode, item);
  317. BUG_ON(root_tag_get(root, 0));
  318. BUG_ON(root_tag_get(root, 1));
  319. }
  320. return 0;
  321. }
  322. EXPORT_SYMBOL(radix_tree_insert);
  323. /*
  324. * is_slot == 1 : search for the slot.
  325. * is_slot == 0 : search for the node.
  326. */
  327. static void *radix_tree_lookup_element(struct radix_tree_root *root,
  328. unsigned long index, int is_slot)
  329. {
  330. unsigned int height, shift;
  331. struct radix_tree_node *node, **slot;
  332. node = rcu_dereference_raw(root->rnode);
  333. if (node == NULL)
  334. return NULL;
  335. if (!radix_tree_is_indirect_ptr(node)) {
  336. if (index > 0)
  337. return NULL;
  338. return is_slot ? (void *)&root->rnode : node;
  339. }
  340. node = indirect_to_ptr(node);
  341. height = node->height;
  342. if (index > radix_tree_maxindex(height))
  343. return NULL;
  344. shift = (height-1) * RADIX_TREE_MAP_SHIFT;
  345. do {
  346. slot = (struct radix_tree_node **)
  347. (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK));
  348. node = rcu_dereference_raw(*slot);
  349. if (node == NULL)
  350. return NULL;
  351. shift -= RADIX_TREE_MAP_SHIFT;
  352. height--;
  353. } while (height > 0);
  354. return is_slot ? (void *)slot : indirect_to_ptr(node);
  355. }
  356. /**
  357. * radix_tree_lookup_slot - lookup a slot in a radix tree
  358. * @root: radix tree root
  359. * @index: index key
  360. *
  361. * Returns: the slot corresponding to the position @index in the
  362. * radix tree @root. This is useful for update-if-exists operations.
  363. *
  364. * This function can be called under rcu_read_lock iff the slot is not
  365. * modified by radix_tree_replace_slot, otherwise it must be called
  366. * exclusive from other writers. Any dereference of the slot must be done
  367. * using radix_tree_deref_slot.
  368. */
  369. void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index)
  370. {
  371. return (void **)radix_tree_lookup_element(root, index, 1);
  372. }
  373. EXPORT_SYMBOL(radix_tree_lookup_slot);
  374. /**
  375. * radix_tree_lookup - perform lookup operation on a radix tree
  376. * @root: radix tree root
  377. * @index: index key
  378. *
  379. * Lookup the item at the position @index in the radix tree @root.
  380. *
  381. * This function can be called under rcu_read_lock, however the caller
  382. * must manage lifetimes of leaf nodes (eg. RCU may also be used to free
  383. * them safely). No RCU barriers are required to access or modify the
  384. * returned item, however.
  385. */
  386. void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index)
  387. {
  388. return radix_tree_lookup_element(root, index, 0);
  389. }
  390. EXPORT_SYMBOL(radix_tree_lookup);
  391. /**
  392. * radix_tree_tag_set - set a tag on a radix tree node
  393. * @root: radix tree root
  394. * @index: index key
  395. * @tag: tag index
  396. *
  397. * Set the search tag (which must be < RADIX_TREE_MAX_TAGS)
  398. * corresponding to @index in the radix tree. From
  399. * the root all the way down to the leaf node.
  400. *
  401. * Returns the address of the tagged item. Setting a tag on a not-present
  402. * item is a bug.
  403. */
  404. void *radix_tree_tag_set(struct radix_tree_root *root,
  405. unsigned long index, unsigned int tag)
  406. {
  407. unsigned int height, shift;
  408. struct radix_tree_node *slot;
  409. height = root->height;
  410. BUG_ON(index > radix_tree_maxindex(height));
  411. slot = indirect_to_ptr(root->rnode);
  412. shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
  413. while (height > 0) {
  414. int offset;
  415. offset = (index >> shift) & RADIX_TREE_MAP_MASK;
  416. if (!tag_get(slot, tag, offset))
  417. tag_set(slot, tag, offset);
  418. slot = slot->slots[offset];
  419. BUG_ON(slot == NULL);
  420. shift -= RADIX_TREE_MAP_SHIFT;
  421. height--;
  422. }
  423. /* set the root's tag bit */
  424. if (slot && !root_tag_get(root, tag))
  425. root_tag_set(root, tag);
  426. return slot;
  427. }
  428. EXPORT_SYMBOL(radix_tree_tag_set);
  429. /**
  430. * radix_tree_tag_clear - clear a tag on a radix tree node
  431. * @root: radix tree root
  432. * @index: index key
  433. * @tag: tag index
  434. *
  435. * Clear the search tag (which must be < RADIX_TREE_MAX_TAGS)
  436. * corresponding to @index in the radix tree. If
  437. * this causes the leaf node to have no tags set then clear the tag in the
  438. * next-to-leaf node, etc.
  439. *
  440. * Returns the address of the tagged item on success, else NULL. ie:
  441. * has the same return value and semantics as radix_tree_lookup().
  442. */
  443. void *radix_tree_tag_clear(struct radix_tree_root *root,
  444. unsigned long index, unsigned int tag)
  445. {
  446. struct radix_tree_node *node = NULL;
  447. struct radix_tree_node *slot = NULL;
  448. unsigned int height, shift;
  449. int uninitialized_var(offset);
  450. height = root->height;
  451. if (index > radix_tree_maxindex(height))
  452. goto out;
  453. shift = height * RADIX_TREE_MAP_SHIFT;
  454. slot = indirect_to_ptr(root->rnode);
  455. while (shift) {
  456. if (slot == NULL)
  457. goto out;
  458. shift -= RADIX_TREE_MAP_SHIFT;
  459. offset = (index >> shift) & RADIX_TREE_MAP_MASK;
  460. node = slot;
  461. slot = slot->slots[offset];
  462. }
  463. if (slot == NULL)
  464. goto out;
  465. while (node) {
  466. if (!tag_get(node, tag, offset))
  467. goto out;
  468. tag_clear(node, tag, offset);
  469. if (any_tag_set(node, tag))
  470. goto out;
  471. index >>= RADIX_TREE_MAP_SHIFT;
  472. offset = index & RADIX_TREE_MAP_MASK;
  473. node = node->parent;
  474. }
  475. /* clear the root's tag bit */
  476. if (root_tag_get(root, tag))
  477. root_tag_clear(root, tag);
  478. out:
  479. return slot;
  480. }
  481. EXPORT_SYMBOL(radix_tree_tag_clear);
  482. /**
  483. * radix_tree_tag_get - get a tag on a radix tree node
  484. * @root: radix tree root
  485. * @index: index key
  486. * @tag: tag index (< RADIX_TREE_MAX_TAGS)
  487. *
  488. * Return values:
  489. *
  490. * 0: tag not present or not set
  491. * 1: tag set
  492. *
  493. * Note that the return value of this function may not be relied on, even if
  494. * the RCU lock is held, unless tag modification and node deletion are excluded
  495. * from concurrency.
  496. */
  497. int radix_tree_tag_get(struct radix_tree_root *root,
  498. unsigned long index, unsigned int tag)
  499. {
  500. unsigned int height, shift;
  501. struct radix_tree_node *node;
  502. /* check the root's tag bit */
  503. if (!root_tag_get(root, tag))
  504. return 0;
  505. node = rcu_dereference_raw(root->rnode);
  506. if (node == NULL)
  507. return 0;
  508. if (!radix_tree_is_indirect_ptr(node))
  509. return (index == 0);
  510. node = indirect_to_ptr(node);
  511. height = node->height;
  512. if (index > radix_tree_maxindex(height))
  513. return 0;
  514. shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
  515. for ( ; ; ) {
  516. int offset;
  517. if (node == NULL)
  518. return 0;
  519. offset = (index >> shift) & RADIX_TREE_MAP_MASK;
  520. if (!tag_get(node, tag, offset))
  521. return 0;
  522. if (height == 1)
  523. return 1;
  524. node = rcu_dereference_raw(node->slots[offset]);
  525. shift -= RADIX_TREE_MAP_SHIFT;
  526. height--;
  527. }
  528. }
  529. EXPORT_SYMBOL(radix_tree_tag_get);
  530. /**
  531. * radix_tree_range_tag_if_tagged - for each item in given range set given
  532. * tag if item has another tag set
  533. * @root: radix tree root
  534. * @first_indexp: pointer to a starting index of a range to scan
  535. * @last_index: last index of a range to scan
  536. * @nr_to_tag: maximum number items to tag
  537. * @iftag: tag index to test
  538. * @settag: tag index to set if tested tag is set
  539. *
  540. * This function scans range of radix tree from first_index to last_index
  541. * (inclusive). For each item in the range if iftag is set, the function sets
  542. * also settag. The function stops either after tagging nr_to_tag items or
  543. * after reaching last_index.
  544. *
  545. * The tags must be set from the leaf level only and propagated back up the
  546. * path to the root. We must do this so that we resolve the full path before
  547. * setting any tags on intermediate nodes. If we set tags as we descend, then
  548. * we can get to the leaf node and find that the index that has the iftag
  549. * set is outside the range we are scanning. This reults in dangling tags and
  550. * can lead to problems with later tag operations (e.g. livelocks on lookups).
  551. *
  552. * The function returns number of leaves where the tag was set and sets
  553. * *first_indexp to the first unscanned index.
  554. * WARNING! *first_indexp can wrap if last_index is ULONG_MAX. Caller must
  555. * be prepared to handle that.
  556. */
  557. unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
  558. unsigned long *first_indexp, unsigned long last_index,
  559. unsigned long nr_to_tag,
  560. unsigned int iftag, unsigned int settag)
  561. {
  562. unsigned int height = root->height;
  563. struct radix_tree_node *node = NULL;
  564. struct radix_tree_node *slot;
  565. unsigned int shift;
  566. unsigned long tagged = 0;
  567. unsigned long index = *first_indexp;
  568. last_index = min(last_index, radix_tree_maxindex(height));
  569. if (index > last_index)
  570. return 0;
  571. if (!nr_to_tag)
  572. return 0;
  573. if (!root_tag_get(root, iftag)) {
  574. *first_indexp = last_index + 1;
  575. return 0;
  576. }
  577. if (height == 0) {
  578. *first_indexp = last_index + 1;
  579. root_tag_set(root, settag);
  580. return 1;
  581. }
  582. shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
  583. slot = indirect_to_ptr(root->rnode);
  584. for (;;) {
  585. unsigned long upindex;
  586. int offset;
  587. offset = (index >> shift) & RADIX_TREE_MAP_MASK;
  588. if (!slot->slots[offset])
  589. goto next;
  590. if (!tag_get(slot, iftag, offset))
  591. goto next;
  592. if (shift) {
  593. /* Go down one level */
  594. shift -= RADIX_TREE_MAP_SHIFT;
  595. node = slot;
  596. slot = slot->slots[offset];
  597. continue;
  598. }
  599. /* tag the leaf */
  600. tagged++;
  601. tag_set(slot, settag, offset);
  602. /* walk back up the path tagging interior nodes */
  603. upindex = index;
  604. while (node) {
  605. upindex >>= RADIX_TREE_MAP_SHIFT;
  606. offset = upindex & RADIX_TREE_MAP_MASK;
  607. /* stop if we find a node with the tag already set */
  608. if (tag_get(node, settag, offset))
  609. break;
  610. tag_set(node, settag, offset);
  611. node = node->parent;
  612. }
  613. /*
  614. * Small optimization: now clear that node pointer.
  615. * Since all of this slot's ancestors now have the tag set
  616. * from setting it above, we have no further need to walk
  617. * back up the tree setting tags, until we update slot to
  618. * point to another radix_tree_node.
  619. */
  620. node = NULL;
  621. next:
  622. /* Go to next item at level determined by 'shift' */
  623. index = ((index >> shift) + 1) << shift;
  624. /* Overflow can happen when last_index is ~0UL... */
  625. if (index > last_index || !index)
  626. break;
  627. if (tagged >= nr_to_tag)
  628. break;
  629. while (((index >> shift) & RADIX_TREE_MAP_MASK) == 0) {
  630. /*
  631. * We've fully scanned this node. Go up. Because
  632. * last_index is guaranteed to be in the tree, what
  633. * we do below cannot wander astray.
  634. */
  635. slot = slot->parent;
  636. shift += RADIX_TREE_MAP_SHIFT;
  637. }
  638. }
  639. /*
  640. * We need not to tag the root tag if there is no tag which is set with
  641. * settag within the range from *first_indexp to last_index.
  642. */
  643. if (tagged > 0)
  644. root_tag_set(root, settag);
  645. *first_indexp = index;
  646. return tagged;
  647. }
  648. EXPORT_SYMBOL(radix_tree_range_tag_if_tagged);
  649. /**
  650. * radix_tree_next_hole - find the next hole (not-present entry)
  651. * @root: tree root
  652. * @index: index key
  653. * @max_scan: maximum range to search
  654. *
  655. * Search the set [index, min(index+max_scan-1, MAX_INDEX)] for the lowest
  656. * indexed hole.
  657. *
  658. * Returns: the index of the hole if found, otherwise returns an index
  659. * outside of the set specified (in which case 'return - index >= max_scan'
  660. * will be true). In rare cases of index wrap-around, 0 will be returned.
  661. *
  662. * radix_tree_next_hole may be called under rcu_read_lock. However, like
  663. * radix_tree_gang_lookup, this will not atomically search a snapshot of
  664. * the tree at a single point in time. For example, if a hole is created
  665. * at index 5, then subsequently a hole is created at index 10,
  666. * radix_tree_next_hole covering both indexes may return 10 if called
  667. * under rcu_read_lock.
  668. */
  669. unsigned long radix_tree_next_hole(struct radix_tree_root *root,
  670. unsigned long index, unsigned long max_scan)
  671. {
  672. unsigned long i;
  673. for (i = 0; i < max_scan; i++) {
  674. if (!radix_tree_lookup(root, index))
  675. break;
  676. index++;
  677. if (index == 0)
  678. break;
  679. }
  680. return index;
  681. }
  682. EXPORT_SYMBOL(radix_tree_next_hole);
  683. /**
  684. * radix_tree_prev_hole - find the prev hole (not-present entry)
  685. * @root: tree root
  686. * @index: index key
  687. * @max_scan: maximum range to search
  688. *
  689. * Search backwards in the range [max(index-max_scan+1, 0), index]
  690. * for the first hole.
  691. *
  692. * Returns: the index of the hole if found, otherwise returns an index
  693. * outside of the set specified (in which case 'index - return >= max_scan'
  694. * will be true). In rare cases of wrap-around, ULONG_MAX will be returned.
  695. *
  696. * radix_tree_next_hole may be called under rcu_read_lock. However, like
  697. * radix_tree_gang_lookup, this will not atomically search a snapshot of
  698. * the tree at a single point in time. For example, if a hole is created
  699. * at index 10, then subsequently a hole is created at index 5,
  700. * radix_tree_prev_hole covering both indexes may return 5 if called under
  701. * rcu_read_lock.
  702. */
  703. unsigned long radix_tree_prev_hole(struct radix_tree_root *root,
  704. unsigned long index, unsigned long max_scan)
  705. {
  706. unsigned long i;
  707. for (i = 0; i < max_scan; i++) {
  708. if (!radix_tree_lookup(root, index))
  709. break;
  710. index--;
  711. if (index == ULONG_MAX)
  712. break;
  713. }
  714. return index;
  715. }
  716. EXPORT_SYMBOL(radix_tree_prev_hole);
  717. static unsigned int
  718. __lookup(struct radix_tree_node *slot, void ***results, unsigned long *indices,
  719. unsigned long index, unsigned int max_items, unsigned long *next_index)
  720. {
  721. unsigned int nr_found = 0;
  722. unsigned int shift, height;
  723. unsigned long i;
  724. height = slot->height;
  725. if (height == 0)
  726. goto out;
  727. shift = (height-1) * RADIX_TREE_MAP_SHIFT;
  728. for ( ; height > 1; height--) {
  729. i = (index >> shift) & RADIX_TREE_MAP_MASK;
  730. for (;;) {
  731. if (slot->slots[i] != NULL)
  732. break;
  733. index &= ~((1UL << shift) - 1);
  734. index += 1UL << shift;
  735. if (index == 0)
  736. goto out; /* 32-bit wraparound */
  737. i++;
  738. if (i == RADIX_TREE_MAP_SIZE)
  739. goto out;
  740. }
  741. shift -= RADIX_TREE_MAP_SHIFT;
  742. slot = rcu_dereference_raw(slot->slots[i]);
  743. if (slot == NULL)
  744. goto out;
  745. }
  746. /* Bottom level: grab some items */
  747. for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) {
  748. if (slot->slots[i]) {
  749. results[nr_found] = &(slot->slots[i]);
  750. if (indices)
  751. indices[nr_found] = index;
  752. if (++nr_found == max_items) {
  753. index++;
  754. goto out;
  755. }
  756. }
  757. index++;
  758. }
  759. out:
  760. *next_index = index;
  761. return nr_found;
  762. }
  763. /**
  764. * radix_tree_gang_lookup - perform multiple lookup on a radix tree
  765. * @root: radix tree root
  766. * @results: where the results of the lookup are placed
  767. * @first_index: start the lookup from this key
  768. * @max_items: place up to this many items at *results
  769. *
  770. * Performs an index-ascending scan of the tree for present items. Places
  771. * them at *@results and returns the number of items which were placed at
  772. * *@results.
  773. *
  774. * The implementation is naive.
  775. *
  776. * Like radix_tree_lookup, radix_tree_gang_lookup may be called under
  777. * rcu_read_lock. In this case, rather than the returned results being
  778. * an atomic snapshot of the tree at a single point in time, the semantics
  779. * of an RCU protected gang lookup are as though multiple radix_tree_lookups
  780. * have been issued in individual locks, and results stored in 'results'.
  781. */
  782. unsigned int
  783. radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
  784. unsigned long first_index, unsigned int max_items)
  785. {
  786. unsigned long max_index;
  787. struct radix_tree_node *node;
  788. unsigned long cur_index = first_index;
  789. unsigned int ret;
  790. node = rcu_dereference_raw(root->rnode);
  791. if (!node)
  792. return 0;
  793. if (!radix_tree_is_indirect_ptr(node)) {
  794. if (first_index > 0)
  795. return 0;
  796. results[0] = node;
  797. return 1;
  798. }
  799. node = indirect_to_ptr(node);
  800. max_index = radix_tree_maxindex(node->height);
  801. ret = 0;
  802. while (ret < max_items) {
  803. unsigned int nr_found, slots_found, i;
  804. unsigned long next_index; /* Index of next search */
  805. if (cur_index > max_index)
  806. break;
  807. slots_found = __lookup(node, (void ***)results + ret, NULL,
  808. cur_index, max_items - ret, &next_index);
  809. nr_found = 0;
  810. for (i = 0; i < slots_found; i++) {
  811. struct radix_tree_node *slot;
  812. slot = *(((void ***)results)[ret + i]);
  813. if (!slot)
  814. continue;
  815. results[ret + nr_found] =
  816. indirect_to_ptr(rcu_dereference_raw(slot));
  817. nr_found++;
  818. }
  819. ret += nr_found;
  820. if (next_index == 0)
  821. break;
  822. cur_index = next_index;
  823. }
  824. return ret;
  825. }
  826. EXPORT_SYMBOL(radix_tree_gang_lookup);
  827. /**
  828. * radix_tree_gang_lookup_slot - perform multiple slot lookup on radix tree
  829. * @root: radix tree root
  830. * @results: where the results of the lookup are placed
  831. * @indices: where their indices should be placed (but usually NULL)
  832. * @first_index: start the lookup from this key
  833. * @max_items: place up to this many items at *results
  834. *
  835. * Performs an index-ascending scan of the tree for present items. Places
  836. * their slots at *@results and returns the number of items which were
  837. * placed at *@results.
  838. *
  839. * The implementation is naive.
  840. *
  841. * Like radix_tree_gang_lookup as far as RCU and locking goes. Slots must
  842. * be dereferenced with radix_tree_deref_slot, and if using only RCU
  843. * protection, radix_tree_deref_slot may fail requiring a retry.
  844. */
  845. unsigned int
  846. radix_tree_gang_lookup_slot(struct radix_tree_root *root,
  847. void ***results, unsigned long *indices,
  848. unsigned long first_index, unsigned int max_items)
  849. {
  850. unsigned long max_index;
  851. struct radix_tree_node *node;
  852. unsigned long cur_index = first_index;
  853. unsigned int ret;
  854. node = rcu_dereference_raw(root->rnode);
  855. if (!node)
  856. return 0;
  857. if (!radix_tree_is_indirect_ptr(node)) {
  858. if (first_index > 0)
  859. return 0;
  860. results[0] = (void **)&root->rnode;
  861. if (indices)
  862. indices[0] = 0;
  863. return 1;
  864. }
  865. node = indirect_to_ptr(node);
  866. max_index = radix_tree_maxindex(node->height);
  867. ret = 0;
  868. while (ret < max_items) {
  869. unsigned int slots_found;
  870. unsigned long next_index; /* Index of next search */
  871. if (cur_index > max_index)
  872. break;
  873. slots_found = __lookup(node, results + ret,
  874. indices ? indices + ret : NULL,
  875. cur_index, max_items - ret, &next_index);
  876. ret += slots_found;
  877. if (next_index == 0)
  878. break;
  879. cur_index = next_index;
  880. }
  881. return ret;
  882. }
  883. EXPORT_SYMBOL(radix_tree_gang_lookup_slot);
  884. /*
  885. * FIXME: the two tag_get()s here should use find_next_bit() instead of
  886. * open-coding the search.
  887. */
  888. static unsigned int
  889. __lookup_tag(struct radix_tree_node *slot, void ***results, unsigned long index,
  890. unsigned int max_items, unsigned long *next_index, unsigned int tag)
  891. {
  892. unsigned int nr_found = 0;
  893. unsigned int shift, height;
  894. height = slot->height;
  895. if (height == 0)
  896. goto out;
  897. shift = (height-1) * RADIX_TREE_MAP_SHIFT;
  898. while (height > 0) {
  899. unsigned long i = (index >> shift) & RADIX_TREE_MAP_MASK ;
  900. for (;;) {
  901. if (tag_get(slot, tag, i))
  902. break;
  903. index &= ~((1UL << shift) - 1);
  904. index += 1UL << shift;
  905. if (index == 0)
  906. goto out; /* 32-bit wraparound */
  907. i++;
  908. if (i == RADIX_TREE_MAP_SIZE)
  909. goto out;
  910. }
  911. height--;
  912. if (height == 0) { /* Bottom level: grab some items */
  913. unsigned long j = index & RADIX_TREE_MAP_MASK;
  914. for ( ; j < RADIX_TREE_MAP_SIZE; j++) {
  915. index++;
  916. if (!tag_get(slot, tag, j))
  917. continue;
  918. /*
  919. * Even though the tag was found set, we need to
  920. * recheck that we have a non-NULL node, because
  921. * if this lookup is lockless, it may have been
  922. * subsequently deleted.
  923. *
  924. * Similar care must be taken in any place that
  925. * lookup ->slots[x] without a lock (ie. can't
  926. * rely on its value remaining the same).
  927. */
  928. if (slot->slots[j]) {
  929. results[nr_found++] = &(slot->slots[j]);
  930. if (nr_found == max_items)
  931. goto out;
  932. }
  933. }
  934. }
  935. shift -= RADIX_TREE_MAP_SHIFT;
  936. slot = rcu_dereference_raw(slot->slots[i]);
  937. if (slot == NULL)
  938. break;
  939. }
  940. out:
  941. *next_index = index;
  942. return nr_found;
  943. }
  944. /**
  945. * radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree
  946. * based on a tag
  947. * @root: radix tree root
  948. * @results: where the results of the lookup are placed
  949. * @first_index: start the lookup from this key
  950. * @max_items: place up to this many items at *results
  951. * @tag: the tag index (< RADIX_TREE_MAX_TAGS)
  952. *
  953. * Performs an index-ascending scan of the tree for present items which
  954. * have the tag indexed by @tag set. Places the items at *@results and
  955. * returns the number of items which were placed at *@results.
  956. */
  957. unsigned int
  958. radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
  959. unsigned long first_index, unsigned int max_items,
  960. unsigned int tag)
  961. {
  962. struct radix_tree_node *node;
  963. unsigned long max_index;
  964. unsigned long cur_index = first_index;
  965. unsigned int ret;
  966. /* check the root's tag bit */
  967. if (!root_tag_get(root, tag))
  968. return 0;
  969. node = rcu_dereference_raw(root->rnode);
  970. if (!node)
  971. return 0;
  972. if (!radix_tree_is_indirect_ptr(node)) {
  973. if (first_index > 0)
  974. return 0;
  975. results[0] = node;
  976. return 1;
  977. }
  978. node = indirect_to_ptr(node);
  979. max_index = radix_tree_maxindex(node->height);
  980. ret = 0;
  981. while (ret < max_items) {
  982. unsigned int nr_found, slots_found, i;
  983. unsigned long next_index; /* Index of next search */
  984. if (cur_index > max_index)
  985. break;
  986. slots_found = __lookup_tag(node, (void ***)results + ret,
  987. cur_index, max_items - ret, &next_index, tag);
  988. nr_found = 0;
  989. for (i = 0; i < slots_found; i++) {
  990. struct radix_tree_node *slot;
  991. slot = *(((void ***)results)[ret + i]);
  992. if (!slot)
  993. continue;
  994. results[ret + nr_found] =
  995. indirect_to_ptr(rcu_dereference_raw(slot));
  996. nr_found++;
  997. }
  998. ret += nr_found;
  999. if (next_index == 0)
  1000. break;
  1001. cur_index = next_index;
  1002. }
  1003. return ret;
  1004. }
  1005. EXPORT_SYMBOL(radix_tree_gang_lookup_tag);
  1006. /**
  1007. * radix_tree_gang_lookup_tag_slot - perform multiple slot lookup on a
  1008. * radix tree based on a tag
  1009. * @root: radix tree root
  1010. * @results: where the results of the lookup are placed
  1011. * @first_index: start the lookup from this key
  1012. * @max_items: place up to this many items at *results
  1013. * @tag: the tag index (< RADIX_TREE_MAX_TAGS)
  1014. *
  1015. * Performs an index-ascending scan of the tree for present items which
  1016. * have the tag indexed by @tag set. Places the slots at *@results and
  1017. * returns the number of slots which were placed at *@results.
  1018. */
  1019. unsigned int
  1020. radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
  1021. unsigned long first_index, unsigned int max_items,
  1022. unsigned int tag)
  1023. {
  1024. struct radix_tree_node *node;
  1025. unsigned long max_index;
  1026. unsigned long cur_index = first_index;
  1027. unsigned int ret;
  1028. /* check the root's tag bit */
  1029. if (!root_tag_get(root, tag))
  1030. return 0;
  1031. node = rcu_dereference_raw(root->rnode);
  1032. if (!node)
  1033. return 0;
  1034. if (!radix_tree_is_indirect_ptr(node)) {
  1035. if (first_index > 0)
  1036. return 0;
  1037. results[0] = (void **)&root->rnode;
  1038. return 1;
  1039. }
  1040. node = indirect_to_ptr(node);
  1041. max_index = radix_tree_maxindex(node->height);
  1042. ret = 0;
  1043. while (ret < max_items) {
  1044. unsigned int slots_found;
  1045. unsigned long next_index; /* Index of next search */
  1046. if (cur_index > max_index)
  1047. break;
  1048. slots_found = __lookup_tag(node, results + ret,
  1049. cur_index, max_items - ret, &next_index, tag);
  1050. ret += slots_found;
  1051. if (next_index == 0)
  1052. break;
  1053. cur_index = next_index;
  1054. }
  1055. return ret;
  1056. }
  1057. EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot);
  1058. #if defined(CONFIG_SHMEM) && defined(CONFIG_SWAP)
  1059. #include <linux/sched.h> /* for cond_resched() */
  1060. /*
  1061. * This linear search is at present only useful to shmem_unuse_inode().
  1062. */
  1063. static unsigned long __locate(struct radix_tree_node *slot, void *item,
  1064. unsigned long index, unsigned long *found_index)
  1065. {
  1066. unsigned int shift, height;
  1067. unsigned long i;
  1068. height = slot->height;
  1069. shift = (height-1) * RADIX_TREE_MAP_SHIFT;
  1070. for ( ; height > 1; height--) {
  1071. i = (index >> shift) & RADIX_TREE_MAP_MASK;
  1072. for (;;) {
  1073. if (slot->slots[i] != NULL)
  1074. break;
  1075. index &= ~((1UL << shift) - 1);
  1076. index += 1UL << shift;
  1077. if (index == 0)
  1078. goto out; /* 32-bit wraparound */
  1079. i++;
  1080. if (i == RADIX_TREE_MAP_SIZE)
  1081. goto out;
  1082. }
  1083. shift -= RADIX_TREE_MAP_SHIFT;
  1084. slot = rcu_dereference_raw(slot->slots[i]);
  1085. if (slot == NULL)
  1086. goto out;
  1087. }
  1088. /* Bottom level: check items */
  1089. for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
  1090. if (slot->slots[i] == item) {
  1091. *found_index = index + i;
  1092. index = 0;
  1093. goto out;
  1094. }
  1095. }
  1096. index += RADIX_TREE_MAP_SIZE;
  1097. out:
  1098. return index;
  1099. }
  1100. /**
  1101. * radix_tree_locate_item - search through radix tree for item
  1102. * @root: radix tree root
  1103. * @item: item to be found
  1104. *
  1105. * Returns index where item was found, or -1 if not found.
  1106. * Caller must hold no lock (since this time-consuming function needs
  1107. * to be preemptible), and must check afterwards if item is still there.
  1108. */
  1109. unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
  1110. {
  1111. struct radix_tree_node *node;
  1112. unsigned long max_index;
  1113. unsigned long cur_index = 0;
  1114. unsigned long found_index = -1;
  1115. do {
  1116. rcu_read_lock();
  1117. node = rcu_dereference_raw(root->rnode);
  1118. if (!radix_tree_is_indirect_ptr(node)) {
  1119. rcu_read_unlock();
  1120. if (node == item)
  1121. found_index = 0;
  1122. break;
  1123. }
  1124. node = indirect_to_ptr(node);
  1125. max_index = radix_tree_maxindex(node->height);
  1126. if (cur_index > max_index)
  1127. break;
  1128. cur_index = __locate(node, item, cur_index, &found_index);
  1129. rcu_read_unlock();
  1130. cond_resched();
  1131. } while (cur_index != 0 && cur_index <= max_index);
  1132. return found_index;
  1133. }
  1134. #else
  1135. unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
  1136. {
  1137. return -1;
  1138. }
  1139. #endif /* CONFIG_SHMEM && CONFIG_SWAP */
  1140. /**
  1141. * radix_tree_shrink - shrink height of a radix tree to minimal
  1142. * @root radix tree root
  1143. */
  1144. static inline void radix_tree_shrink(struct radix_tree_root *root)
  1145. {
  1146. /* try to shrink tree height */
  1147. while (root->height > 0) {
  1148. struct radix_tree_node *to_free = root->rnode;
  1149. struct radix_tree_node *slot;
  1150. BUG_ON(!radix_tree_is_indirect_ptr(to_free));
  1151. to_free = indirect_to_ptr(to_free);
  1152. /*
  1153. * The candidate node has more than one child, or its child
  1154. * is not at the leftmost slot, we cannot shrink.
  1155. */
  1156. if (to_free->count != 1)
  1157. break;
  1158. if (!to_free->slots[0])
  1159. break;
  1160. /*
  1161. * We don't need rcu_assign_pointer(), since we are simply
  1162. * moving the node from one part of the tree to another: if it
  1163. * was safe to dereference the old pointer to it
  1164. * (to_free->slots[0]), it will be safe to dereference the new
  1165. * one (root->rnode) as far as dependent read barriers go.
  1166. */
  1167. slot = to_free->slots[0];
  1168. if (root->height > 1) {
  1169. slot->parent = NULL;
  1170. slot = ptr_to_indirect(slot);
  1171. }
  1172. root->rnode = slot;
  1173. root->height--;
  1174. /*
  1175. * We have a dilemma here. The node's slot[0] must not be
  1176. * NULLed in case there are concurrent lookups expecting to
  1177. * find the item. However if this was a bottom-level node,
  1178. * then it may be subject to the slot pointer being visible
  1179. * to callers dereferencing it. If item corresponding to
  1180. * slot[0] is subsequently deleted, these callers would expect
  1181. * their slot to become empty sooner or later.
  1182. *
  1183. * For example, lockless pagecache will look up a slot, deref
  1184. * the page pointer, and if the page is 0 refcount it means it
  1185. * was concurrently deleted from pagecache so try the deref
  1186. * again. Fortunately there is already a requirement for logic
  1187. * to retry the entire slot lookup -- the indirect pointer
  1188. * problem (replacing direct root node with an indirect pointer
  1189. * also results in a stale slot). So tag the slot as indirect
  1190. * to force callers to retry.
  1191. */
  1192. if (root->height == 0)
  1193. *((unsigned long *)&to_free->slots[0]) |=
  1194. RADIX_TREE_INDIRECT_PTR;
  1195. radix_tree_node_free(to_free);
  1196. }
  1197. }
  1198. /**
  1199. * radix_tree_delete - delete an item from a radix tree
  1200. * @root: radix tree root
  1201. * @index: index key
  1202. *
  1203. * Remove the item at @index from the radix tree rooted at @root.
  1204. *
  1205. * Returns the address of the deleted item, or NULL if it was not present.
  1206. */
  1207. void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
  1208. {
  1209. struct radix_tree_node *node = NULL;
  1210. struct radix_tree_node *slot = NULL;
  1211. struct radix_tree_node *to_free;
  1212. unsigned int height, shift;
  1213. int tag;
  1214. int uninitialized_var(offset);
  1215. height = root->height;
  1216. if (index > radix_tree_maxindex(height))
  1217. goto out;
  1218. slot = root->rnode;
  1219. if (height == 0) {
  1220. root_tag_clear_all(root);
  1221. root->rnode = NULL;
  1222. goto out;
  1223. }
  1224. slot = indirect_to_ptr(slot);
  1225. shift = height * RADIX_TREE_MAP_SHIFT;
  1226. do {
  1227. if (slot == NULL)
  1228. goto out;
  1229. shift -= RADIX_TREE_MAP_SHIFT;
  1230. offset = (index >> shift) & RADIX_TREE_MAP_MASK;
  1231. node = slot;
  1232. slot = slot->slots[offset];
  1233. } while (shift);
  1234. if (slot == NULL)
  1235. goto out;
  1236. /*
  1237. * Clear all tags associated with the item to be deleted.
  1238. * This way of doing it would be inefficient, but seldom is any set.
  1239. */
  1240. for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
  1241. if (tag_get(node, tag, offset))
  1242. radix_tree_tag_clear(root, index, tag);
  1243. }
  1244. to_free = NULL;
  1245. /* Now free the nodes we do not need anymore */
  1246. while (node) {
  1247. node->slots[offset] = NULL;
  1248. node->count--;
  1249. /*
  1250. * Queue the node for deferred freeing after the
  1251. * last reference to it disappears (set NULL, above).
  1252. */
  1253. if (to_free)
  1254. radix_tree_node_free(to_free);
  1255. if (node->count) {
  1256. if (node == indirect_to_ptr(root->rnode))
  1257. radix_tree_shrink(root);
  1258. goto out;
  1259. }
  1260. /* Node with zero slots in use so free it */
  1261. to_free = node;
  1262. index >>= RADIX_TREE_MAP_SHIFT;
  1263. offset = index & RADIX_TREE_MAP_MASK;
  1264. node = node->parent;
  1265. }
  1266. root_tag_clear_all(root);
  1267. root->height = 0;
  1268. root->rnode = NULL;
  1269. if (to_free)
  1270. radix_tree_node_free(to_free);
  1271. out:
  1272. return slot;
  1273. }
  1274. EXPORT_SYMBOL(radix_tree_delete);
  1275. /**
  1276. * radix_tree_tagged - test whether any items in the tree are tagged
  1277. * @root: radix tree root
  1278. * @tag: tag to test
  1279. */
  1280. int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag)
  1281. {
  1282. return root_tag_get(root, tag);
  1283. }
  1284. EXPORT_SYMBOL(radix_tree_tagged);
  1285. static void
  1286. radix_tree_node_ctor(void *node)
  1287. {
  1288. memset(node, 0, sizeof(struct radix_tree_node));
  1289. }
  1290. static __init unsigned long __maxindex(unsigned int height)
  1291. {
  1292. unsigned int width = height * RADIX_TREE_MAP_SHIFT;
  1293. int shift = RADIX_TREE_INDEX_BITS - width;
  1294. if (shift < 0)
  1295. return ~0UL;
  1296. if (shift >= BITS_PER_LONG)
  1297. return 0UL;
  1298. return ~0UL >> shift;
  1299. }
  1300. static __init void radix_tree_init_maxindex(void)
  1301. {
  1302. unsigned int i;
  1303. for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++)
  1304. height_to_maxindex[i] = __maxindex(i);
  1305. }
  1306. static int radix_tree_callback(struct notifier_block *nfb,
  1307. unsigned long action,
  1308. void *hcpu)
  1309. {
  1310. int cpu = (long)hcpu;
  1311. struct radix_tree_preload *rtp;
  1312. /* Free per-cpu pool of perloaded nodes */
  1313. if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
  1314. rtp = &per_cpu(radix_tree_preloads, cpu);
  1315. while (rtp->nr) {
  1316. kmem_cache_free(radix_tree_node_cachep,
  1317. rtp->nodes[rtp->nr-1]);
  1318. rtp->nodes[rtp->nr-1] = NULL;
  1319. rtp->nr--;
  1320. }
  1321. }
  1322. return NOTIFY_OK;
  1323. }
  1324. void __init radix_tree_init(void)
  1325. {
  1326. radix_tree_node_cachep = kmem_cache_create("radix_tree_node",
  1327. sizeof(struct radix_tree_node), 0,
  1328. SLAB_PANIC | SLAB_RECLAIM_ACCOUNT,
  1329. radix_tree_node_ctor);
  1330. radix_tree_init_maxindex();
  1331. hotcpu_notifier(radix_tree_callback, 0);
  1332. }