radix-tree.c 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455
  1. /*
  2. * Copyright (C) 2001 Momchil Velikov
  3. * Portions Copyright (C) 2001 Christoph Hellwig
  4. * Copyright (C) 2005 SGI, Christoph Lameter
  5. * Copyright (C) 2006 Nick Piggin
  6. * Copyright (C) 2012 Konstantin Khlebnikov
  7. *
  8. * This program is free software; you can redistribute it and/or
  9. * modify it under the terms of the GNU General Public License as
  10. * published by the Free Software Foundation; either version 2, or (at
  11. * your option) any later version.
  12. *
  13. * This program is distributed in the hope that it will be useful, but
  14. * WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. * General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU General Public License
  19. * along with this program; if not, write to the Free Software
  20. * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  21. */
  22. #include <linux/errno.h>
  23. #include <linux/init.h>
  24. #include <linux/kernel.h>
  25. #include <linux/export.h>
  26. #include <linux/radix-tree.h>
  27. #include <linux/percpu.h>
  28. #include <linux/slab.h>
  29. #include <linux/notifier.h>
  30. #include <linux/cpu.h>
  31. #include <linux/string.h>
  32. #include <linux/bitops.h>
  33. #include <linux/rcupdate.h>
  34. #ifdef __KERNEL__
  35. #define RADIX_TREE_MAP_SHIFT (CONFIG_BASE_SMALL ? 4 : 6)
  36. #else
  37. #define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */
  38. #endif
  39. #define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT)
  40. #define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1)
  41. #define RADIX_TREE_TAG_LONGS \
  42. ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG)
  43. struct radix_tree_node {
  44. unsigned int height; /* Height from the bottom */
  45. unsigned int count;
  46. union {
  47. struct radix_tree_node *parent; /* Used when ascending tree */
  48. struct rcu_head rcu_head; /* Used when freeing node */
  49. };
  50. void __rcu *slots[RADIX_TREE_MAP_SIZE];
  51. unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
  52. };
  53. #define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long))
  54. #define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \
  55. RADIX_TREE_MAP_SHIFT))
  56. /*
  57. * The height_to_maxindex array needs to be one deeper than the maximum
  58. * path as height 0 holds only 1 entry.
  59. */
  60. static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1] __read_mostly;
  61. /*
  62. * Radix tree node cache.
  63. */
  64. static struct kmem_cache *radix_tree_node_cachep;
  65. /*
  66. * The radix tree is variable-height, so an insert operation not only has
  67. * to build the branch to its corresponding item, it also has to build the
  68. * branch to existing items if the size has to be increased (by
  69. * radix_tree_extend).
  70. *
  71. * The worst case is a zero height tree with just a single item at index 0,
  72. * and then inserting an item at index ULONG_MAX. This requires 2 new branches
  73. * of RADIX_TREE_MAX_PATH size to be created, with only the root node shared.
  74. * Hence:
  75. */
  76. #define RADIX_TREE_PRELOAD_SIZE (RADIX_TREE_MAX_PATH * 2 - 1)
  77. /*
  78. * Per-cpu pool of preloaded nodes
  79. */
  80. struct radix_tree_preload {
  81. int nr;
  82. struct radix_tree_node *nodes[RADIX_TREE_PRELOAD_SIZE];
  83. };
  84. static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, };
  85. static inline void *ptr_to_indirect(void *ptr)
  86. {
  87. return (void *)((unsigned long)ptr | RADIX_TREE_INDIRECT_PTR);
  88. }
  89. static inline void *indirect_to_ptr(void *ptr)
  90. {
  91. return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR);
  92. }
  93. static inline gfp_t root_gfp_mask(struct radix_tree_root *root)
  94. {
  95. return root->gfp_mask & __GFP_BITS_MASK;
  96. }
  97. static inline void tag_set(struct radix_tree_node *node, unsigned int tag,
  98. int offset)
  99. {
  100. __set_bit(offset, node->tags[tag]);
  101. }
  102. static inline void tag_clear(struct radix_tree_node *node, unsigned int tag,
  103. int offset)
  104. {
  105. __clear_bit(offset, node->tags[tag]);
  106. }
  107. static inline int tag_get(struct radix_tree_node *node, unsigned int tag,
  108. int offset)
  109. {
  110. return test_bit(offset, node->tags[tag]);
  111. }
  112. static inline void root_tag_set(struct radix_tree_root *root, unsigned int tag)
  113. {
  114. root->gfp_mask |= (__force gfp_t)(1 << (tag + __GFP_BITS_SHIFT));
  115. }
  116. static inline void root_tag_clear(struct radix_tree_root *root, unsigned int tag)
  117. {
  118. root->gfp_mask &= (__force gfp_t)~(1 << (tag + __GFP_BITS_SHIFT));
  119. }
  120. static inline void root_tag_clear_all(struct radix_tree_root *root)
  121. {
  122. root->gfp_mask &= __GFP_BITS_MASK;
  123. }
  124. static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag)
  125. {
  126. return (__force unsigned)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT));
  127. }
  128. /*
  129. * Returns 1 if any slot in the node has this tag set.
  130. * Otherwise returns 0.
  131. */
  132. static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag)
  133. {
  134. int idx;
  135. for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
  136. if (node->tags[tag][idx])
  137. return 1;
  138. }
  139. return 0;
  140. }
  141. /**
  142. * radix_tree_find_next_bit - find the next set bit in a memory region
  143. *
  144. * @addr: The address to base the search on
  145. * @size: The bitmap size in bits
  146. * @offset: The bitnumber to start searching at
  147. *
  148. * Unrollable variant of find_next_bit() for constant size arrays.
  149. * Tail bits starting from size to roundup(size, BITS_PER_LONG) must be zero.
  150. * Returns next bit offset, or size if nothing found.
  151. */
  152. static __always_inline unsigned long
  153. radix_tree_find_next_bit(const unsigned long *addr,
  154. unsigned long size, unsigned long offset)
  155. {
  156. if (!__builtin_constant_p(size))
  157. return find_next_bit(addr, size, offset);
  158. if (offset < size) {
  159. unsigned long tmp;
  160. addr += offset / BITS_PER_LONG;
  161. tmp = *addr >> (offset % BITS_PER_LONG);
  162. if (tmp)
  163. return __ffs(tmp) + offset;
  164. offset = (offset + BITS_PER_LONG) & ~(BITS_PER_LONG - 1);
  165. while (offset < size) {
  166. tmp = *++addr;
  167. if (tmp)
  168. return __ffs(tmp) + offset;
  169. offset += BITS_PER_LONG;
  170. }
  171. }
  172. return size;
  173. }
  174. /*
  175. * This assumes that the caller has performed appropriate preallocation, and
  176. * that the caller has pinned this thread of control to the current CPU.
  177. */
  178. static struct radix_tree_node *
  179. radix_tree_node_alloc(struct radix_tree_root *root)
  180. {
  181. struct radix_tree_node *ret = NULL;
  182. gfp_t gfp_mask = root_gfp_mask(root);
  183. if (!(gfp_mask & __GFP_WAIT)) {
  184. struct radix_tree_preload *rtp;
  185. /*
  186. * Provided the caller has preloaded here, we will always
  187. * succeed in getting a node here (and never reach
  188. * kmem_cache_alloc)
  189. */
  190. rtp = &__get_cpu_var(radix_tree_preloads);
  191. if (rtp->nr) {
  192. ret = rtp->nodes[rtp->nr - 1];
  193. rtp->nodes[rtp->nr - 1] = NULL;
  194. rtp->nr--;
  195. }
  196. }
  197. if (ret == NULL)
  198. ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
  199. BUG_ON(radix_tree_is_indirect_ptr(ret));
  200. return ret;
  201. }
  202. static void radix_tree_node_rcu_free(struct rcu_head *head)
  203. {
  204. struct radix_tree_node *node =
  205. container_of(head, struct radix_tree_node, rcu_head);
  206. int i;
  207. /*
  208. * must only free zeroed nodes into the slab. radix_tree_shrink
  209. * can leave us with a non-NULL entry in the first slot, so clear
  210. * that here to make sure.
  211. */
  212. for (i = 0; i < RADIX_TREE_MAX_TAGS; i++)
  213. tag_clear(node, i, 0);
  214. node->slots[0] = NULL;
  215. node->count = 0;
  216. kmem_cache_free(radix_tree_node_cachep, node);
  217. }
  218. static inline void
  219. radix_tree_node_free(struct radix_tree_node *node)
  220. {
  221. call_rcu(&node->rcu_head, radix_tree_node_rcu_free);
  222. }
  223. /*
  224. * Load up this CPU's radix_tree_node buffer with sufficient objects to
  225. * ensure that the addition of a single element in the tree cannot fail. On
  226. * success, return zero, with preemption disabled. On error, return -ENOMEM
  227. * with preemption not disabled.
  228. *
  229. * To make use of this facility, the radix tree must be initialised without
  230. * __GFP_WAIT being passed to INIT_RADIX_TREE().
  231. */
  232. int radix_tree_preload(gfp_t gfp_mask)
  233. {
  234. struct radix_tree_preload *rtp;
  235. struct radix_tree_node *node;
  236. int ret = -ENOMEM;
  237. preempt_disable();
  238. rtp = &__get_cpu_var(radix_tree_preloads);
  239. while (rtp->nr < ARRAY_SIZE(rtp->nodes)) {
  240. preempt_enable();
  241. node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
  242. if (node == NULL)
  243. goto out;
  244. preempt_disable();
  245. rtp = &__get_cpu_var(radix_tree_preloads);
  246. if (rtp->nr < ARRAY_SIZE(rtp->nodes))
  247. rtp->nodes[rtp->nr++] = node;
  248. else
  249. kmem_cache_free(radix_tree_node_cachep, node);
  250. }
  251. ret = 0;
  252. out:
  253. return ret;
  254. }
  255. EXPORT_SYMBOL(radix_tree_preload);
  256. /*
  257. * Return the maximum key which can be store into a
  258. * radix tree with height HEIGHT.
  259. */
  260. static inline unsigned long radix_tree_maxindex(unsigned int height)
  261. {
  262. return height_to_maxindex[height];
  263. }
  264. /*
  265. * Extend a radix tree so it can store key @index.
  266. */
  267. static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
  268. {
  269. struct radix_tree_node *node;
  270. struct radix_tree_node *slot;
  271. unsigned int height;
  272. int tag;
  273. /* Figure out what the height should be. */
  274. height = root->height + 1;
  275. while (index > radix_tree_maxindex(height))
  276. height++;
  277. if (root->rnode == NULL) {
  278. root->height = height;
  279. goto out;
  280. }
  281. do {
  282. unsigned int newheight;
  283. if (!(node = radix_tree_node_alloc(root)))
  284. return -ENOMEM;
  285. /* Propagate the aggregated tag info into the new root */
  286. for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
  287. if (root_tag_get(root, tag))
  288. tag_set(node, tag, 0);
  289. }
  290. /* Increase the height. */
  291. newheight = root->height+1;
  292. node->height = newheight;
  293. node->count = 1;
  294. node->parent = NULL;
  295. slot = root->rnode;
  296. if (newheight > 1) {
  297. slot = indirect_to_ptr(slot);
  298. slot->parent = node;
  299. }
  300. node->slots[0] = slot;
  301. node = ptr_to_indirect(node);
  302. rcu_assign_pointer(root->rnode, node);
  303. root->height = newheight;
  304. } while (height > root->height);
  305. out:
  306. return 0;
  307. }
  308. /**
  309. * radix_tree_insert - insert into a radix tree
  310. * @root: radix tree root
  311. * @index: index key
  312. * @item: item to insert
  313. *
  314. * Insert an item into the radix tree at position @index.
  315. */
  316. int radix_tree_insert(struct radix_tree_root *root,
  317. unsigned long index, void *item)
  318. {
  319. struct radix_tree_node *node = NULL, *slot;
  320. unsigned int height, shift;
  321. int offset;
  322. int error;
  323. BUG_ON(radix_tree_is_indirect_ptr(item));
  324. /* Make sure the tree is high enough. */
  325. if (index > radix_tree_maxindex(root->height)) {
  326. error = radix_tree_extend(root, index);
  327. if (error)
  328. return error;
  329. }
  330. slot = indirect_to_ptr(root->rnode);
  331. height = root->height;
  332. shift = (height-1) * RADIX_TREE_MAP_SHIFT;
  333. offset = 0; /* uninitialised var warning */
  334. while (height > 0) {
  335. if (slot == NULL) {
  336. /* Have to add a child node. */
  337. if (!(slot = radix_tree_node_alloc(root)))
  338. return -ENOMEM;
  339. slot->height = height;
  340. slot->parent = node;
  341. if (node) {
  342. rcu_assign_pointer(node->slots[offset], slot);
  343. node->count++;
  344. } else
  345. rcu_assign_pointer(root->rnode, ptr_to_indirect(slot));
  346. }
  347. /* Go a level down */
  348. offset = (index >> shift) & RADIX_TREE_MAP_MASK;
  349. node = slot;
  350. slot = node->slots[offset];
  351. shift -= RADIX_TREE_MAP_SHIFT;
  352. height--;
  353. }
  354. if (slot != NULL)
  355. return -EEXIST;
  356. if (node) {
  357. node->count++;
  358. rcu_assign_pointer(node->slots[offset], item);
  359. BUG_ON(tag_get(node, 0, offset));
  360. BUG_ON(tag_get(node, 1, offset));
  361. } else {
  362. rcu_assign_pointer(root->rnode, item);
  363. BUG_ON(root_tag_get(root, 0));
  364. BUG_ON(root_tag_get(root, 1));
  365. }
  366. return 0;
  367. }
  368. EXPORT_SYMBOL(radix_tree_insert);
  369. /*
  370. * is_slot == 1 : search for the slot.
  371. * is_slot == 0 : search for the node.
  372. */
  373. static void *radix_tree_lookup_element(struct radix_tree_root *root,
  374. unsigned long index, int is_slot)
  375. {
  376. unsigned int height, shift;
  377. struct radix_tree_node *node, **slot;
  378. node = rcu_dereference_raw(root->rnode);
  379. if (node == NULL)
  380. return NULL;
  381. if (!radix_tree_is_indirect_ptr(node)) {
  382. if (index > 0)
  383. return NULL;
  384. return is_slot ? (void *)&root->rnode : node;
  385. }
  386. node = indirect_to_ptr(node);
  387. height = node->height;
  388. if (index > radix_tree_maxindex(height))
  389. return NULL;
  390. shift = (height-1) * RADIX_TREE_MAP_SHIFT;
  391. do {
  392. slot = (struct radix_tree_node **)
  393. (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK));
  394. node = rcu_dereference_raw(*slot);
  395. if (node == NULL)
  396. return NULL;
  397. shift -= RADIX_TREE_MAP_SHIFT;
  398. height--;
  399. } while (height > 0);
  400. return is_slot ? (void *)slot : indirect_to_ptr(node);
  401. }
  402. /**
  403. * radix_tree_lookup_slot - lookup a slot in a radix tree
  404. * @root: radix tree root
  405. * @index: index key
  406. *
  407. * Returns: the slot corresponding to the position @index in the
  408. * radix tree @root. This is useful for update-if-exists operations.
  409. *
  410. * This function can be called under rcu_read_lock iff the slot is not
  411. * modified by radix_tree_replace_slot, otherwise it must be called
  412. * exclusive from other writers. Any dereference of the slot must be done
  413. * using radix_tree_deref_slot.
  414. */
  415. void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index)
  416. {
  417. return (void **)radix_tree_lookup_element(root, index, 1);
  418. }
  419. EXPORT_SYMBOL(radix_tree_lookup_slot);
  420. /**
  421. * radix_tree_lookup - perform lookup operation on a radix tree
  422. * @root: radix tree root
  423. * @index: index key
  424. *
  425. * Lookup the item at the position @index in the radix tree @root.
  426. *
  427. * This function can be called under rcu_read_lock, however the caller
  428. * must manage lifetimes of leaf nodes (eg. RCU may also be used to free
  429. * them safely). No RCU barriers are required to access or modify the
  430. * returned item, however.
  431. */
  432. void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index)
  433. {
  434. return radix_tree_lookup_element(root, index, 0);
  435. }
  436. EXPORT_SYMBOL(radix_tree_lookup);
  437. /**
  438. * radix_tree_tag_set - set a tag on a radix tree node
  439. * @root: radix tree root
  440. * @index: index key
  441. * @tag: tag index
  442. *
  443. * Set the search tag (which must be < RADIX_TREE_MAX_TAGS)
  444. * corresponding to @index in the radix tree. From
  445. * the root all the way down to the leaf node.
  446. *
  447. * Returns the address of the tagged item. Setting a tag on a not-present
  448. * item is a bug.
  449. */
  450. void *radix_tree_tag_set(struct radix_tree_root *root,
  451. unsigned long index, unsigned int tag)
  452. {
  453. unsigned int height, shift;
  454. struct radix_tree_node *slot;
  455. height = root->height;
  456. BUG_ON(index > radix_tree_maxindex(height));
  457. slot = indirect_to_ptr(root->rnode);
  458. shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
  459. while (height > 0) {
  460. int offset;
  461. offset = (index >> shift) & RADIX_TREE_MAP_MASK;
  462. if (!tag_get(slot, tag, offset))
  463. tag_set(slot, tag, offset);
  464. slot = slot->slots[offset];
  465. BUG_ON(slot == NULL);
  466. shift -= RADIX_TREE_MAP_SHIFT;
  467. height--;
  468. }
  469. /* set the root's tag bit */
  470. if (slot && !root_tag_get(root, tag))
  471. root_tag_set(root, tag);
  472. return slot;
  473. }
  474. EXPORT_SYMBOL(radix_tree_tag_set);
  475. /**
  476. * radix_tree_tag_clear - clear a tag on a radix tree node
  477. * @root: radix tree root
  478. * @index: index key
  479. * @tag: tag index
  480. *
  481. * Clear the search tag (which must be < RADIX_TREE_MAX_TAGS)
  482. * corresponding to @index in the radix tree. If
  483. * this causes the leaf node to have no tags set then clear the tag in the
  484. * next-to-leaf node, etc.
  485. *
  486. * Returns the address of the tagged item on success, else NULL. ie:
  487. * has the same return value and semantics as radix_tree_lookup().
  488. */
  489. void *radix_tree_tag_clear(struct radix_tree_root *root,
  490. unsigned long index, unsigned int tag)
  491. {
  492. struct radix_tree_node *node = NULL;
  493. struct radix_tree_node *slot = NULL;
  494. unsigned int height, shift;
  495. int uninitialized_var(offset);
  496. height = root->height;
  497. if (index > radix_tree_maxindex(height))
  498. goto out;
  499. shift = height * RADIX_TREE_MAP_SHIFT;
  500. slot = indirect_to_ptr(root->rnode);
  501. while (shift) {
  502. if (slot == NULL)
  503. goto out;
  504. shift -= RADIX_TREE_MAP_SHIFT;
  505. offset = (index >> shift) & RADIX_TREE_MAP_MASK;
  506. node = slot;
  507. slot = slot->slots[offset];
  508. }
  509. if (slot == NULL)
  510. goto out;
  511. while (node) {
  512. if (!tag_get(node, tag, offset))
  513. goto out;
  514. tag_clear(node, tag, offset);
  515. if (any_tag_set(node, tag))
  516. goto out;
  517. index >>= RADIX_TREE_MAP_SHIFT;
  518. offset = index & RADIX_TREE_MAP_MASK;
  519. node = node->parent;
  520. }
  521. /* clear the root's tag bit */
  522. if (root_tag_get(root, tag))
  523. root_tag_clear(root, tag);
  524. out:
  525. return slot;
  526. }
  527. EXPORT_SYMBOL(radix_tree_tag_clear);
  528. /**
  529. * radix_tree_tag_get - get a tag on a radix tree node
  530. * @root: radix tree root
  531. * @index: index key
  532. * @tag: tag index (< RADIX_TREE_MAX_TAGS)
  533. *
  534. * Return values:
  535. *
  536. * 0: tag not present or not set
  537. * 1: tag set
  538. *
  539. * Note that the return value of this function may not be relied on, even if
  540. * the RCU lock is held, unless tag modification and node deletion are excluded
  541. * from concurrency.
  542. */
  543. int radix_tree_tag_get(struct radix_tree_root *root,
  544. unsigned long index, unsigned int tag)
  545. {
  546. unsigned int height, shift;
  547. struct radix_tree_node *node;
  548. /* check the root's tag bit */
  549. if (!root_tag_get(root, tag))
  550. return 0;
  551. node = rcu_dereference_raw(root->rnode);
  552. if (node == NULL)
  553. return 0;
  554. if (!radix_tree_is_indirect_ptr(node))
  555. return (index == 0);
  556. node = indirect_to_ptr(node);
  557. height = node->height;
  558. if (index > radix_tree_maxindex(height))
  559. return 0;
  560. shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
  561. for ( ; ; ) {
  562. int offset;
  563. if (node == NULL)
  564. return 0;
  565. offset = (index >> shift) & RADIX_TREE_MAP_MASK;
  566. if (!tag_get(node, tag, offset))
  567. return 0;
  568. if (height == 1)
  569. return 1;
  570. node = rcu_dereference_raw(node->slots[offset]);
  571. shift -= RADIX_TREE_MAP_SHIFT;
  572. height--;
  573. }
  574. }
  575. EXPORT_SYMBOL(radix_tree_tag_get);
  576. /**
  577. * radix_tree_next_chunk - find next chunk of slots for iteration
  578. *
  579. * @root: radix tree root
  580. * @iter: iterator state
  581. * @flags: RADIX_TREE_ITER_* flags and tag index
  582. * Returns: pointer to chunk first slot, or NULL if iteration is over
  583. */
  584. void **radix_tree_next_chunk(struct radix_tree_root *root,
  585. struct radix_tree_iter *iter, unsigned flags)
  586. {
  587. unsigned shift, tag = flags & RADIX_TREE_ITER_TAG_MASK;
  588. struct radix_tree_node *rnode, *node;
  589. unsigned long index, offset;
  590. if ((flags & RADIX_TREE_ITER_TAGGED) && !root_tag_get(root, tag))
  591. return NULL;
  592. /*
  593. * Catch next_index overflow after ~0UL. iter->index never overflows
  594. * during iterating; it can be zero only at the beginning.
  595. * And we cannot overflow iter->next_index in a single step,
  596. * because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG.
  597. *
  598. * This condition also used by radix_tree_next_slot() to stop
  599. * contiguous iterating, and forbid swithing to the next chunk.
  600. */
  601. index = iter->next_index;
  602. if (!index && iter->index)
  603. return NULL;
  604. rnode = rcu_dereference_raw(root->rnode);
  605. if (radix_tree_is_indirect_ptr(rnode)) {
  606. rnode = indirect_to_ptr(rnode);
  607. } else if (rnode && !index) {
  608. /* Single-slot tree */
  609. iter->index = 0;
  610. iter->next_index = 1;
  611. iter->tags = 1;
  612. return (void **)&root->rnode;
  613. } else
  614. return NULL;
  615. restart:
  616. shift = (rnode->height - 1) * RADIX_TREE_MAP_SHIFT;
  617. offset = index >> shift;
  618. /* Index outside of the tree */
  619. if (offset >= RADIX_TREE_MAP_SIZE)
  620. return NULL;
  621. node = rnode;
  622. while (1) {
  623. if ((flags & RADIX_TREE_ITER_TAGGED) ?
  624. !test_bit(offset, node->tags[tag]) :
  625. !node->slots[offset]) {
  626. /* Hole detected */
  627. if (flags & RADIX_TREE_ITER_CONTIG)
  628. return NULL;
  629. if (flags & RADIX_TREE_ITER_TAGGED)
  630. offset = radix_tree_find_next_bit(
  631. node->tags[tag],
  632. RADIX_TREE_MAP_SIZE,
  633. offset + 1);
  634. else
  635. while (++offset < RADIX_TREE_MAP_SIZE) {
  636. if (node->slots[offset])
  637. break;
  638. }
  639. index &= ~((RADIX_TREE_MAP_SIZE << shift) - 1);
  640. index += offset << shift;
  641. /* Overflow after ~0UL */
  642. if (!index)
  643. return NULL;
  644. if (offset == RADIX_TREE_MAP_SIZE)
  645. goto restart;
  646. }
  647. /* This is leaf-node */
  648. if (!shift)
  649. break;
  650. node = rcu_dereference_raw(node->slots[offset]);
  651. if (node == NULL)
  652. goto restart;
  653. shift -= RADIX_TREE_MAP_SHIFT;
  654. offset = (index >> shift) & RADIX_TREE_MAP_MASK;
  655. }
  656. /* Update the iterator state */
  657. iter->index = index;
  658. iter->next_index = (index | RADIX_TREE_MAP_MASK) + 1;
  659. /* Construct iter->tags bit-mask from node->tags[tag] array */
  660. if (flags & RADIX_TREE_ITER_TAGGED) {
  661. unsigned tag_long, tag_bit;
  662. tag_long = offset / BITS_PER_LONG;
  663. tag_bit = offset % BITS_PER_LONG;
  664. iter->tags = node->tags[tag][tag_long] >> tag_bit;
  665. /* This never happens if RADIX_TREE_TAG_LONGS == 1 */
  666. if (tag_long < RADIX_TREE_TAG_LONGS - 1) {
  667. /* Pick tags from next element */
  668. if (tag_bit)
  669. iter->tags |= node->tags[tag][tag_long + 1] <<
  670. (BITS_PER_LONG - tag_bit);
  671. /* Clip chunk size, here only BITS_PER_LONG tags */
  672. iter->next_index = index + BITS_PER_LONG;
  673. }
  674. }
  675. return node->slots + offset;
  676. }
  677. EXPORT_SYMBOL(radix_tree_next_chunk);
  678. /**
  679. * radix_tree_range_tag_if_tagged - for each item in given range set given
  680. * tag if item has another tag set
  681. * @root: radix tree root
  682. * @first_indexp: pointer to a starting index of a range to scan
  683. * @last_index: last index of a range to scan
  684. * @nr_to_tag: maximum number items to tag
  685. * @iftag: tag index to test
  686. * @settag: tag index to set if tested tag is set
  687. *
  688. * This function scans range of radix tree from first_index to last_index
  689. * (inclusive). For each item in the range if iftag is set, the function sets
  690. * also settag. The function stops either after tagging nr_to_tag items or
  691. * after reaching last_index.
  692. *
  693. * The tags must be set from the leaf level only and propagated back up the
  694. * path to the root. We must do this so that we resolve the full path before
  695. * setting any tags on intermediate nodes. If we set tags as we descend, then
  696. * we can get to the leaf node and find that the index that has the iftag
  697. * set is outside the range we are scanning. This reults in dangling tags and
  698. * can lead to problems with later tag operations (e.g. livelocks on lookups).
  699. *
  700. * The function returns number of leaves where the tag was set and sets
  701. * *first_indexp to the first unscanned index.
  702. * WARNING! *first_indexp can wrap if last_index is ULONG_MAX. Caller must
  703. * be prepared to handle that.
  704. */
  705. unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
  706. unsigned long *first_indexp, unsigned long last_index,
  707. unsigned long nr_to_tag,
  708. unsigned int iftag, unsigned int settag)
  709. {
  710. unsigned int height = root->height;
  711. struct radix_tree_node *node = NULL;
  712. struct radix_tree_node *slot;
  713. unsigned int shift;
  714. unsigned long tagged = 0;
  715. unsigned long index = *first_indexp;
  716. last_index = min(last_index, radix_tree_maxindex(height));
  717. if (index > last_index)
  718. return 0;
  719. if (!nr_to_tag)
  720. return 0;
  721. if (!root_tag_get(root, iftag)) {
  722. *first_indexp = last_index + 1;
  723. return 0;
  724. }
  725. if (height == 0) {
  726. *first_indexp = last_index + 1;
  727. root_tag_set(root, settag);
  728. return 1;
  729. }
  730. shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
  731. slot = indirect_to_ptr(root->rnode);
  732. for (;;) {
  733. unsigned long upindex;
  734. int offset;
  735. offset = (index >> shift) & RADIX_TREE_MAP_MASK;
  736. if (!slot->slots[offset])
  737. goto next;
  738. if (!tag_get(slot, iftag, offset))
  739. goto next;
  740. if (shift) {
  741. /* Go down one level */
  742. shift -= RADIX_TREE_MAP_SHIFT;
  743. node = slot;
  744. slot = slot->slots[offset];
  745. continue;
  746. }
  747. /* tag the leaf */
  748. tagged++;
  749. tag_set(slot, settag, offset);
  750. /* walk back up the path tagging interior nodes */
  751. upindex = index;
  752. while (node) {
  753. upindex >>= RADIX_TREE_MAP_SHIFT;
  754. offset = upindex & RADIX_TREE_MAP_MASK;
  755. /* stop if we find a node with the tag already set */
  756. if (tag_get(node, settag, offset))
  757. break;
  758. tag_set(node, settag, offset);
  759. node = node->parent;
  760. }
  761. /*
  762. * Small optimization: now clear that node pointer.
  763. * Since all of this slot's ancestors now have the tag set
  764. * from setting it above, we have no further need to walk
  765. * back up the tree setting tags, until we update slot to
  766. * point to another radix_tree_node.
  767. */
  768. node = NULL;
  769. next:
  770. /* Go to next item at level determined by 'shift' */
  771. index = ((index >> shift) + 1) << shift;
  772. /* Overflow can happen when last_index is ~0UL... */
  773. if (index > last_index || !index)
  774. break;
  775. if (tagged >= nr_to_tag)
  776. break;
  777. while (((index >> shift) & RADIX_TREE_MAP_MASK) == 0) {
  778. /*
  779. * We've fully scanned this node. Go up. Because
  780. * last_index is guaranteed to be in the tree, what
  781. * we do below cannot wander astray.
  782. */
  783. slot = slot->parent;
  784. shift += RADIX_TREE_MAP_SHIFT;
  785. }
  786. }
  787. /*
  788. * We need not to tag the root tag if there is no tag which is set with
  789. * settag within the range from *first_indexp to last_index.
  790. */
  791. if (tagged > 0)
  792. root_tag_set(root, settag);
  793. *first_indexp = index;
  794. return tagged;
  795. }
  796. EXPORT_SYMBOL(radix_tree_range_tag_if_tagged);
  797. /**
  798. * radix_tree_next_hole - find the next hole (not-present entry)
  799. * @root: tree root
  800. * @index: index key
  801. * @max_scan: maximum range to search
  802. *
  803. * Search the set [index, min(index+max_scan-1, MAX_INDEX)] for the lowest
  804. * indexed hole.
  805. *
  806. * Returns: the index of the hole if found, otherwise returns an index
  807. * outside of the set specified (in which case 'return - index >= max_scan'
  808. * will be true). In rare cases of index wrap-around, 0 will be returned.
  809. *
  810. * radix_tree_next_hole may be called under rcu_read_lock. However, like
  811. * radix_tree_gang_lookup, this will not atomically search a snapshot of
  812. * the tree at a single point in time. For example, if a hole is created
  813. * at index 5, then subsequently a hole is created at index 10,
  814. * radix_tree_next_hole covering both indexes may return 10 if called
  815. * under rcu_read_lock.
  816. */
  817. unsigned long radix_tree_next_hole(struct radix_tree_root *root,
  818. unsigned long index, unsigned long max_scan)
  819. {
  820. unsigned long i;
  821. for (i = 0; i < max_scan; i++) {
  822. if (!radix_tree_lookup(root, index))
  823. break;
  824. index++;
  825. if (index == 0)
  826. break;
  827. }
  828. return index;
  829. }
  830. EXPORT_SYMBOL(radix_tree_next_hole);
  831. /**
  832. * radix_tree_prev_hole - find the prev hole (not-present entry)
  833. * @root: tree root
  834. * @index: index key
  835. * @max_scan: maximum range to search
  836. *
  837. * Search backwards in the range [max(index-max_scan+1, 0), index]
  838. * for the first hole.
  839. *
  840. * Returns: the index of the hole if found, otherwise returns an index
  841. * outside of the set specified (in which case 'index - return >= max_scan'
  842. * will be true). In rare cases of wrap-around, ULONG_MAX will be returned.
  843. *
  844. * radix_tree_next_hole may be called under rcu_read_lock. However, like
  845. * radix_tree_gang_lookup, this will not atomically search a snapshot of
  846. * the tree at a single point in time. For example, if a hole is created
  847. * at index 10, then subsequently a hole is created at index 5,
  848. * radix_tree_prev_hole covering both indexes may return 5 if called under
  849. * rcu_read_lock.
  850. */
  851. unsigned long radix_tree_prev_hole(struct radix_tree_root *root,
  852. unsigned long index, unsigned long max_scan)
  853. {
  854. unsigned long i;
  855. for (i = 0; i < max_scan; i++) {
  856. if (!radix_tree_lookup(root, index))
  857. break;
  858. index--;
  859. if (index == ULONG_MAX)
  860. break;
  861. }
  862. return index;
  863. }
  864. EXPORT_SYMBOL(radix_tree_prev_hole);
  865. /**
  866. * radix_tree_gang_lookup - perform multiple lookup on a radix tree
  867. * @root: radix tree root
  868. * @results: where the results of the lookup are placed
  869. * @first_index: start the lookup from this key
  870. * @max_items: place up to this many items at *results
  871. *
  872. * Performs an index-ascending scan of the tree for present items. Places
  873. * them at *@results and returns the number of items which were placed at
  874. * *@results.
  875. *
  876. * The implementation is naive.
  877. *
  878. * Like radix_tree_lookup, radix_tree_gang_lookup may be called under
  879. * rcu_read_lock. In this case, rather than the returned results being
  880. * an atomic snapshot of the tree at a single point in time, the semantics
  881. * of an RCU protected gang lookup are as though multiple radix_tree_lookups
  882. * have been issued in individual locks, and results stored in 'results'.
  883. */
  884. unsigned int
  885. radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
  886. unsigned long first_index, unsigned int max_items)
  887. {
  888. struct radix_tree_iter iter;
  889. void **slot;
  890. unsigned int ret = 0;
  891. if (unlikely(!max_items))
  892. return 0;
  893. radix_tree_for_each_slot(slot, root, &iter, first_index) {
  894. results[ret] = indirect_to_ptr(rcu_dereference_raw(*slot));
  895. if (!results[ret])
  896. continue;
  897. if (++ret == max_items)
  898. break;
  899. }
  900. return ret;
  901. }
  902. EXPORT_SYMBOL(radix_tree_gang_lookup);
  903. /**
  904. * radix_tree_gang_lookup_slot - perform multiple slot lookup on radix tree
  905. * @root: radix tree root
  906. * @results: where the results of the lookup are placed
  907. * @indices: where their indices should be placed (but usually NULL)
  908. * @first_index: start the lookup from this key
  909. * @max_items: place up to this many items at *results
  910. *
  911. * Performs an index-ascending scan of the tree for present items. Places
  912. * their slots at *@results and returns the number of items which were
  913. * placed at *@results.
  914. *
  915. * The implementation is naive.
  916. *
  917. * Like radix_tree_gang_lookup as far as RCU and locking goes. Slots must
  918. * be dereferenced with radix_tree_deref_slot, and if using only RCU
  919. * protection, radix_tree_deref_slot may fail requiring a retry.
  920. */
  921. unsigned int
  922. radix_tree_gang_lookup_slot(struct radix_tree_root *root,
  923. void ***results, unsigned long *indices,
  924. unsigned long first_index, unsigned int max_items)
  925. {
  926. struct radix_tree_iter iter;
  927. void **slot;
  928. unsigned int ret = 0;
  929. if (unlikely(!max_items))
  930. return 0;
  931. radix_tree_for_each_slot(slot, root, &iter, first_index) {
  932. results[ret] = slot;
  933. if (indices)
  934. indices[ret] = iter.index;
  935. if (++ret == max_items)
  936. break;
  937. }
  938. return ret;
  939. }
  940. EXPORT_SYMBOL(radix_tree_gang_lookup_slot);
  941. /**
  942. * radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree
  943. * based on a tag
  944. * @root: radix tree root
  945. * @results: where the results of the lookup are placed
  946. * @first_index: start the lookup from this key
  947. * @max_items: place up to this many items at *results
  948. * @tag: the tag index (< RADIX_TREE_MAX_TAGS)
  949. *
  950. * Performs an index-ascending scan of the tree for present items which
  951. * have the tag indexed by @tag set. Places the items at *@results and
  952. * returns the number of items which were placed at *@results.
  953. */
  954. unsigned int
  955. radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
  956. unsigned long first_index, unsigned int max_items,
  957. unsigned int tag)
  958. {
  959. struct radix_tree_iter iter;
  960. void **slot;
  961. unsigned int ret = 0;
  962. if (unlikely(!max_items))
  963. return 0;
  964. radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) {
  965. results[ret] = indirect_to_ptr(rcu_dereference_raw(*slot));
  966. if (!results[ret])
  967. continue;
  968. if (++ret == max_items)
  969. break;
  970. }
  971. return ret;
  972. }
  973. EXPORT_SYMBOL(radix_tree_gang_lookup_tag);
  974. /**
  975. * radix_tree_gang_lookup_tag_slot - perform multiple slot lookup on a
  976. * radix tree based on a tag
  977. * @root: radix tree root
  978. * @results: where the results of the lookup are placed
  979. * @first_index: start the lookup from this key
  980. * @max_items: place up to this many items at *results
  981. * @tag: the tag index (< RADIX_TREE_MAX_TAGS)
  982. *
  983. * Performs an index-ascending scan of the tree for present items which
  984. * have the tag indexed by @tag set. Places the slots at *@results and
  985. * returns the number of slots which were placed at *@results.
  986. */
  987. unsigned int
  988. radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
  989. unsigned long first_index, unsigned int max_items,
  990. unsigned int tag)
  991. {
  992. struct radix_tree_iter iter;
  993. void **slot;
  994. unsigned int ret = 0;
  995. if (unlikely(!max_items))
  996. return 0;
  997. radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) {
  998. results[ret] = slot;
  999. if (++ret == max_items)
  1000. break;
  1001. }
  1002. return ret;
  1003. }
  1004. EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot);
  1005. #if defined(CONFIG_SHMEM) && defined(CONFIG_SWAP)
  1006. #include <linux/sched.h> /* for cond_resched() */
  1007. /*
  1008. * This linear search is at present only useful to shmem_unuse_inode().
  1009. */
  1010. static unsigned long __locate(struct radix_tree_node *slot, void *item,
  1011. unsigned long index, unsigned long *found_index)
  1012. {
  1013. unsigned int shift, height;
  1014. unsigned long i;
  1015. height = slot->height;
  1016. shift = (height-1) * RADIX_TREE_MAP_SHIFT;
  1017. for ( ; height > 1; height--) {
  1018. i = (index >> shift) & RADIX_TREE_MAP_MASK;
  1019. for (;;) {
  1020. if (slot->slots[i] != NULL)
  1021. break;
  1022. index &= ~((1UL << shift) - 1);
  1023. index += 1UL << shift;
  1024. if (index == 0)
  1025. goto out; /* 32-bit wraparound */
  1026. i++;
  1027. if (i == RADIX_TREE_MAP_SIZE)
  1028. goto out;
  1029. }
  1030. shift -= RADIX_TREE_MAP_SHIFT;
  1031. slot = rcu_dereference_raw(slot->slots[i]);
  1032. if (slot == NULL)
  1033. goto out;
  1034. }
  1035. /* Bottom level: check items */
  1036. for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
  1037. if (slot->slots[i] == item) {
  1038. *found_index = index + i;
  1039. index = 0;
  1040. goto out;
  1041. }
  1042. }
  1043. index += RADIX_TREE_MAP_SIZE;
  1044. out:
  1045. return index;
  1046. }
  1047. /**
  1048. * radix_tree_locate_item - search through radix tree for item
  1049. * @root: radix tree root
  1050. * @item: item to be found
  1051. *
  1052. * Returns index where item was found, or -1 if not found.
  1053. * Caller must hold no lock (since this time-consuming function needs
  1054. * to be preemptible), and must check afterwards if item is still there.
  1055. */
  1056. unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
  1057. {
  1058. struct radix_tree_node *node;
  1059. unsigned long max_index;
  1060. unsigned long cur_index = 0;
  1061. unsigned long found_index = -1;
  1062. do {
  1063. rcu_read_lock();
  1064. node = rcu_dereference_raw(root->rnode);
  1065. if (!radix_tree_is_indirect_ptr(node)) {
  1066. rcu_read_unlock();
  1067. if (node == item)
  1068. found_index = 0;
  1069. break;
  1070. }
  1071. node = indirect_to_ptr(node);
  1072. max_index = radix_tree_maxindex(node->height);
  1073. if (cur_index > max_index)
  1074. break;
  1075. cur_index = __locate(node, item, cur_index, &found_index);
  1076. rcu_read_unlock();
  1077. cond_resched();
  1078. } while (cur_index != 0 && cur_index <= max_index);
  1079. return found_index;
  1080. }
  1081. #else
  1082. unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
  1083. {
  1084. return -1;
  1085. }
  1086. #endif /* CONFIG_SHMEM && CONFIG_SWAP */
  1087. /**
  1088. * radix_tree_shrink - shrink height of a radix tree to minimal
  1089. * @root radix tree root
  1090. */
  1091. static inline void radix_tree_shrink(struct radix_tree_root *root)
  1092. {
  1093. /* try to shrink tree height */
  1094. while (root->height > 0) {
  1095. struct radix_tree_node *to_free = root->rnode;
  1096. struct radix_tree_node *slot;
  1097. BUG_ON(!radix_tree_is_indirect_ptr(to_free));
  1098. to_free = indirect_to_ptr(to_free);
  1099. /*
  1100. * The candidate node has more than one child, or its child
  1101. * is not at the leftmost slot, we cannot shrink.
  1102. */
  1103. if (to_free->count != 1)
  1104. break;
  1105. if (!to_free->slots[0])
  1106. break;
  1107. /*
  1108. * We don't need rcu_assign_pointer(), since we are simply
  1109. * moving the node from one part of the tree to another: if it
  1110. * was safe to dereference the old pointer to it
  1111. * (to_free->slots[0]), it will be safe to dereference the new
  1112. * one (root->rnode) as far as dependent read barriers go.
  1113. */
  1114. slot = to_free->slots[0];
  1115. if (root->height > 1) {
  1116. slot->parent = NULL;
  1117. slot = ptr_to_indirect(slot);
  1118. }
  1119. root->rnode = slot;
  1120. root->height--;
  1121. /*
  1122. * We have a dilemma here. The node's slot[0] must not be
  1123. * NULLed in case there are concurrent lookups expecting to
  1124. * find the item. However if this was a bottom-level node,
  1125. * then it may be subject to the slot pointer being visible
  1126. * to callers dereferencing it. If item corresponding to
  1127. * slot[0] is subsequently deleted, these callers would expect
  1128. * their slot to become empty sooner or later.
  1129. *
  1130. * For example, lockless pagecache will look up a slot, deref
  1131. * the page pointer, and if the page is 0 refcount it means it
  1132. * was concurrently deleted from pagecache so try the deref
  1133. * again. Fortunately there is already a requirement for logic
  1134. * to retry the entire slot lookup -- the indirect pointer
  1135. * problem (replacing direct root node with an indirect pointer
  1136. * also results in a stale slot). So tag the slot as indirect
  1137. * to force callers to retry.
  1138. */
  1139. if (root->height == 0)
  1140. *((unsigned long *)&to_free->slots[0]) |=
  1141. RADIX_TREE_INDIRECT_PTR;
  1142. radix_tree_node_free(to_free);
  1143. }
  1144. }
  1145. /**
  1146. * radix_tree_delete - delete an item from a radix tree
  1147. * @root: radix tree root
  1148. * @index: index key
  1149. *
  1150. * Remove the item at @index from the radix tree rooted at @root.
  1151. *
  1152. * Returns the address of the deleted item, or NULL if it was not present.
  1153. */
  1154. void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
  1155. {
  1156. struct radix_tree_node *node = NULL;
  1157. struct radix_tree_node *slot = NULL;
  1158. struct radix_tree_node *to_free;
  1159. unsigned int height, shift;
  1160. int tag;
  1161. int uninitialized_var(offset);
  1162. height = root->height;
  1163. if (index > radix_tree_maxindex(height))
  1164. goto out;
  1165. slot = root->rnode;
  1166. if (height == 0) {
  1167. root_tag_clear_all(root);
  1168. root->rnode = NULL;
  1169. goto out;
  1170. }
  1171. slot = indirect_to_ptr(slot);
  1172. shift = height * RADIX_TREE_MAP_SHIFT;
  1173. do {
  1174. if (slot == NULL)
  1175. goto out;
  1176. shift -= RADIX_TREE_MAP_SHIFT;
  1177. offset = (index >> shift) & RADIX_TREE_MAP_MASK;
  1178. node = slot;
  1179. slot = slot->slots[offset];
  1180. } while (shift);
  1181. if (slot == NULL)
  1182. goto out;
  1183. /*
  1184. * Clear all tags associated with the item to be deleted.
  1185. * This way of doing it would be inefficient, but seldom is any set.
  1186. */
  1187. for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
  1188. if (tag_get(node, tag, offset))
  1189. radix_tree_tag_clear(root, index, tag);
  1190. }
  1191. to_free = NULL;
  1192. /* Now free the nodes we do not need anymore */
  1193. while (node) {
  1194. node->slots[offset] = NULL;
  1195. node->count--;
  1196. /*
  1197. * Queue the node for deferred freeing after the
  1198. * last reference to it disappears (set NULL, above).
  1199. */
  1200. if (to_free)
  1201. radix_tree_node_free(to_free);
  1202. if (node->count) {
  1203. if (node == indirect_to_ptr(root->rnode))
  1204. radix_tree_shrink(root);
  1205. goto out;
  1206. }
  1207. /* Node with zero slots in use so free it */
  1208. to_free = node;
  1209. index >>= RADIX_TREE_MAP_SHIFT;
  1210. offset = index & RADIX_TREE_MAP_MASK;
  1211. node = node->parent;
  1212. }
  1213. root_tag_clear_all(root);
  1214. root->height = 0;
  1215. root->rnode = NULL;
  1216. if (to_free)
  1217. radix_tree_node_free(to_free);
  1218. out:
  1219. return slot;
  1220. }
  1221. EXPORT_SYMBOL(radix_tree_delete);
  1222. /**
  1223. * radix_tree_tagged - test whether any items in the tree are tagged
  1224. * @root: radix tree root
  1225. * @tag: tag to test
  1226. */
  1227. int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag)
  1228. {
  1229. return root_tag_get(root, tag);
  1230. }
  1231. EXPORT_SYMBOL(radix_tree_tagged);
  1232. static void
  1233. radix_tree_node_ctor(void *node)
  1234. {
  1235. memset(node, 0, sizeof(struct radix_tree_node));
  1236. }
  1237. static __init unsigned long __maxindex(unsigned int height)
  1238. {
  1239. unsigned int width = height * RADIX_TREE_MAP_SHIFT;
  1240. int shift = RADIX_TREE_INDEX_BITS - width;
  1241. if (shift < 0)
  1242. return ~0UL;
  1243. if (shift >= BITS_PER_LONG)
  1244. return 0UL;
  1245. return ~0UL >> shift;
  1246. }
  1247. static __init void radix_tree_init_maxindex(void)
  1248. {
  1249. unsigned int i;
  1250. for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++)
  1251. height_to_maxindex[i] = __maxindex(i);
  1252. }
  1253. static int radix_tree_callback(struct notifier_block *nfb,
  1254. unsigned long action,
  1255. void *hcpu)
  1256. {
  1257. int cpu = (long)hcpu;
  1258. struct radix_tree_preload *rtp;
  1259. /* Free per-cpu pool of perloaded nodes */
  1260. if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
  1261. rtp = &per_cpu(radix_tree_preloads, cpu);
  1262. while (rtp->nr) {
  1263. kmem_cache_free(radix_tree_node_cachep,
  1264. rtp->nodes[rtp->nr-1]);
  1265. rtp->nodes[rtp->nr-1] = NULL;
  1266. rtp->nr--;
  1267. }
  1268. }
  1269. return NOTIFY_OK;
  1270. }
  1271. void __init radix_tree_init(void)
  1272. {
  1273. radix_tree_node_cachep = kmem_cache_create("radix_tree_node",
  1274. sizeof(struct radix_tree_node), 0,
  1275. SLAB_PANIC | SLAB_RECLAIM_ACCOUNT,
  1276. radix_tree_node_ctor);
  1277. radix_tree_init_maxindex();
  1278. hotcpu_notifier(radix_tree_callback, 0);
  1279. }