ip_vs_dh.c 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260
  1. /*
  2. * IPVS: Destination Hashing scheduling module
  3. *
  4. * Version: $Id: ip_vs_dh.c,v 1.5 2002/09/15 08:14:08 wensong Exp $
  5. *
  6. * Authors: Wensong Zhang <wensong@gnuchina.org>
  7. *
  8. * Inspired by the consistent hashing scheduler patch from
  9. * Thomas Proell <proellt@gmx.de>
  10. *
  11. * This program is free software; you can redistribute it and/or
  12. * modify it under the terms of the GNU General Public License
  13. * as published by the Free Software Foundation; either version
  14. * 2 of the License, or (at your option) any later version.
  15. *
  16. * Changes:
  17. *
  18. */
  19. /*
  20. * The dh algorithm is to select server by the hash key of destination IP
  21. * address. The pseudo code is as follows:
  22. *
  23. * n <- servernode[dest_ip];
  24. * if (n is dead) OR
  25. * (n is overloaded) OR (n.weight <= 0) then
  26. * return NULL;
  27. *
  28. * return n;
  29. *
  30. * Notes that servernode is a 256-bucket hash table that maps the hash
  31. * index derived from packet destination IP address to the current server
  32. * array. If the dh scheduler is used in cache cluster, it is good to
  33. * combine it with cache_bypass feature. When the statically assigned
  34. * server is dead or overloaded, the load balancer can bypass the cache
  35. * server and send requests to the original server directly.
  36. *
  37. */
  38. #include <linux/ip.h>
  39. #include <linux/module.h>
  40. #include <linux/kernel.h>
  41. #include <linux/skbuff.h>
  42. #include <net/ip_vs.h>
  43. /*
  44. * IPVS DH bucket
  45. */
  46. struct ip_vs_dh_bucket {
  47. struct ip_vs_dest *dest; /* real server (cache) */
  48. };
  49. /*
  50. * for IPVS DH entry hash table
  51. */
  52. #ifndef CONFIG_IP_VS_DH_TAB_BITS
  53. #define CONFIG_IP_VS_DH_TAB_BITS 8
  54. #endif
  55. #define IP_VS_DH_TAB_BITS CONFIG_IP_VS_DH_TAB_BITS
  56. #define IP_VS_DH_TAB_SIZE (1 << IP_VS_DH_TAB_BITS)
  57. #define IP_VS_DH_TAB_MASK (IP_VS_DH_TAB_SIZE - 1)
  58. /*
  59. * Returns hash value for IPVS DH entry
  60. */
  61. static inline unsigned ip_vs_dh_hashkey(__be32 addr)
  62. {
  63. return (ntohl(addr)*2654435761UL) & IP_VS_DH_TAB_MASK;
  64. }
  65. /*
  66. * Get ip_vs_dest associated with supplied parameters.
  67. */
  68. static inline struct ip_vs_dest *
  69. ip_vs_dh_get(struct ip_vs_dh_bucket *tbl, __be32 addr)
  70. {
  71. return (tbl[ip_vs_dh_hashkey(addr)]).dest;
  72. }
  73. /*
  74. * Assign all the hash buckets of the specified table with the service.
  75. */
  76. static int
  77. ip_vs_dh_assign(struct ip_vs_dh_bucket *tbl, struct ip_vs_service *svc)
  78. {
  79. int i;
  80. struct ip_vs_dh_bucket *b;
  81. struct list_head *p;
  82. struct ip_vs_dest *dest;
  83. b = tbl;
  84. p = &svc->destinations;
  85. for (i=0; i<IP_VS_DH_TAB_SIZE; i++) {
  86. if (list_empty(p)) {
  87. b->dest = NULL;
  88. } else {
  89. if (p == &svc->destinations)
  90. p = p->next;
  91. dest = list_entry(p, struct ip_vs_dest, n_list);
  92. atomic_inc(&dest->refcnt);
  93. b->dest = dest;
  94. p = p->next;
  95. }
  96. b++;
  97. }
  98. return 0;
  99. }
  100. /*
  101. * Flush all the hash buckets of the specified table.
  102. */
  103. static void ip_vs_dh_flush(struct ip_vs_dh_bucket *tbl)
  104. {
  105. int i;
  106. struct ip_vs_dh_bucket *b;
  107. b = tbl;
  108. for (i=0; i<IP_VS_DH_TAB_SIZE; i++) {
  109. if (b->dest) {
  110. atomic_dec(&b->dest->refcnt);
  111. b->dest = NULL;
  112. }
  113. b++;
  114. }
  115. }
  116. static int ip_vs_dh_init_svc(struct ip_vs_service *svc)
  117. {
  118. struct ip_vs_dh_bucket *tbl;
  119. /* allocate the DH table for this service */
  120. tbl = kmalloc(sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE,
  121. GFP_ATOMIC);
  122. if (tbl == NULL) {
  123. IP_VS_ERR("ip_vs_dh_init_svc(): no memory\n");
  124. return -ENOMEM;
  125. }
  126. svc->sched_data = tbl;
  127. IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) allocated for "
  128. "current service\n",
  129. sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
  130. /* assign the hash buckets with the updated service */
  131. ip_vs_dh_assign(tbl, svc);
  132. return 0;
  133. }
  134. static int ip_vs_dh_done_svc(struct ip_vs_service *svc)
  135. {
  136. struct ip_vs_dh_bucket *tbl = svc->sched_data;
  137. /* got to clean up hash buckets here */
  138. ip_vs_dh_flush(tbl);
  139. /* release the table itself */
  140. kfree(svc->sched_data);
  141. IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) released\n",
  142. sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
  143. return 0;
  144. }
  145. static int ip_vs_dh_update_svc(struct ip_vs_service *svc)
  146. {
  147. struct ip_vs_dh_bucket *tbl = svc->sched_data;
  148. /* got to clean up hash buckets here */
  149. ip_vs_dh_flush(tbl);
  150. /* assign the hash buckets with the updated service */
  151. ip_vs_dh_assign(tbl, svc);
  152. return 0;
  153. }
  154. /*
  155. * If the dest flags is set with IP_VS_DEST_F_OVERLOAD,
  156. * consider that the server is overloaded here.
  157. */
  158. static inline int is_overloaded(struct ip_vs_dest *dest)
  159. {
  160. return dest->flags & IP_VS_DEST_F_OVERLOAD;
  161. }
  162. /*
  163. * Destination hashing scheduling
  164. */
  165. static struct ip_vs_dest *
  166. ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
  167. {
  168. struct ip_vs_dest *dest;
  169. struct ip_vs_dh_bucket *tbl;
  170. struct iphdr *iph = skb->nh.iph;
  171. IP_VS_DBG(6, "ip_vs_dh_schedule(): Scheduling...\n");
  172. tbl = (struct ip_vs_dh_bucket *)svc->sched_data;
  173. dest = ip_vs_dh_get(tbl, iph->daddr);
  174. if (!dest
  175. || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
  176. || atomic_read(&dest->weight) <= 0
  177. || is_overloaded(dest)) {
  178. return NULL;
  179. }
  180. IP_VS_DBG(6, "DH: destination IP address %u.%u.%u.%u "
  181. "--> server %u.%u.%u.%u:%d\n",
  182. NIPQUAD(iph->daddr),
  183. NIPQUAD(dest->addr),
  184. ntohs(dest->port));
  185. return dest;
  186. }
  187. /*
  188. * IPVS DH Scheduler structure
  189. */
  190. static struct ip_vs_scheduler ip_vs_dh_scheduler =
  191. {
  192. .name = "dh",
  193. .refcnt = ATOMIC_INIT(0),
  194. .module = THIS_MODULE,
  195. .init_service = ip_vs_dh_init_svc,
  196. .done_service = ip_vs_dh_done_svc,
  197. .update_service = ip_vs_dh_update_svc,
  198. .schedule = ip_vs_dh_schedule,
  199. };
  200. static int __init ip_vs_dh_init(void)
  201. {
  202. INIT_LIST_HEAD(&ip_vs_dh_scheduler.n_list);
  203. return register_ip_vs_scheduler(&ip_vs_dh_scheduler);
  204. }
  205. static void __exit ip_vs_dh_cleanup(void)
  206. {
  207. unregister_ip_vs_scheduler(&ip_vs_dh_scheduler);
  208. }
  209. module_init(ip_vs_dh_init);
  210. module_exit(ip_vs_dh_cleanup);
  211. MODULE_LICENSE("GPL");