ip_vs_sh.c 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257
  1. /*
  2. * IPVS: Source Hashing scheduling module
  3. *
  4. * Version: $Id: ip_vs_sh.c,v 1.5 2002/09/15 08:14:08 wensong Exp $
  5. *
  6. * Authors: Wensong Zhang <wensong@gnuchina.org>
  7. *
  8. * This program is free software; you can redistribute it and/or
  9. * modify it under the terms of the GNU General Public License
  10. * as published by the Free Software Foundation; either version
  11. * 2 of the License, or (at your option) any later version.
  12. *
  13. * Changes:
  14. *
  15. */
  16. /*
  17. * The sh algorithm is to select server by the hash key of source IP
  18. * address. The pseudo code is as follows:
  19. *
  20. * n <- servernode[src_ip];
  21. * if (n is dead) OR
  22. * (n is overloaded) or (n.weight <= 0) then
  23. * return NULL;
  24. *
  25. * return n;
  26. *
  27. * Notes that servernode is a 256-bucket hash table that maps the hash
  28. * index derived from packet source IP address to the current server
  29. * array. If the sh scheduler is used in cache cluster, it is good to
  30. * combine it with cache_bypass feature. When the statically assigned
  31. * server is dead or overloaded, the load balancer can bypass the cache
  32. * server and send requests to the original server directly.
  33. *
  34. */
  35. #include <linux/ip.h>
  36. #include <linux/module.h>
  37. #include <linux/kernel.h>
  38. #include <linux/skbuff.h>
  39. #include <net/ip_vs.h>
  40. /*
  41. * IPVS SH bucket
  42. */
  43. struct ip_vs_sh_bucket {
  44. struct ip_vs_dest *dest; /* real server (cache) */
  45. };
  46. /*
  47. * for IPVS SH entry hash table
  48. */
  49. #ifndef CONFIG_IP_VS_SH_TAB_BITS
  50. #define CONFIG_IP_VS_SH_TAB_BITS 8
  51. #endif
  52. #define IP_VS_SH_TAB_BITS CONFIG_IP_VS_SH_TAB_BITS
  53. #define IP_VS_SH_TAB_SIZE (1 << IP_VS_SH_TAB_BITS)
  54. #define IP_VS_SH_TAB_MASK (IP_VS_SH_TAB_SIZE - 1)
  55. /*
  56. * Returns hash value for IPVS SH entry
  57. */
  58. static inline unsigned ip_vs_sh_hashkey(__u32 addr)
  59. {
  60. return (ntohl(addr)*2654435761UL) & IP_VS_SH_TAB_MASK;
  61. }
  62. /*
  63. * Get ip_vs_dest associated with supplied parameters.
  64. */
  65. static inline struct ip_vs_dest *
  66. ip_vs_sh_get(struct ip_vs_sh_bucket *tbl, __u32 addr)
  67. {
  68. return (tbl[ip_vs_sh_hashkey(addr)]).dest;
  69. }
  70. /*
  71. * Assign all the hash buckets of the specified table with the service.
  72. */
  73. static int
  74. ip_vs_sh_assign(struct ip_vs_sh_bucket *tbl, struct ip_vs_service *svc)
  75. {
  76. int i;
  77. struct ip_vs_sh_bucket *b;
  78. struct list_head *p;
  79. struct ip_vs_dest *dest;
  80. b = tbl;
  81. p = &svc->destinations;
  82. for (i=0; i<IP_VS_SH_TAB_SIZE; i++) {
  83. if (list_empty(p)) {
  84. b->dest = NULL;
  85. } else {
  86. if (p == &svc->destinations)
  87. p = p->next;
  88. dest = list_entry(p, struct ip_vs_dest, n_list);
  89. atomic_inc(&dest->refcnt);
  90. b->dest = dest;
  91. p = p->next;
  92. }
  93. b++;
  94. }
  95. return 0;
  96. }
  97. /*
  98. * Flush all the hash buckets of the specified table.
  99. */
  100. static void ip_vs_sh_flush(struct ip_vs_sh_bucket *tbl)
  101. {
  102. int i;
  103. struct ip_vs_sh_bucket *b;
  104. b = tbl;
  105. for (i=0; i<IP_VS_SH_TAB_SIZE; i++) {
  106. if (b->dest) {
  107. atomic_dec(&b->dest->refcnt);
  108. b->dest = NULL;
  109. }
  110. b++;
  111. }
  112. }
  113. static int ip_vs_sh_init_svc(struct ip_vs_service *svc)
  114. {
  115. struct ip_vs_sh_bucket *tbl;
  116. /* allocate the SH table for this service */
  117. tbl = kmalloc(sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE,
  118. GFP_ATOMIC);
  119. if (tbl == NULL) {
  120. IP_VS_ERR("ip_vs_sh_init_svc(): no memory\n");
  121. return -ENOMEM;
  122. }
  123. svc->sched_data = tbl;
  124. IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) allocated for "
  125. "current service\n",
  126. sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE);
  127. /* assign the hash buckets with the updated service */
  128. ip_vs_sh_assign(tbl, svc);
  129. return 0;
  130. }
  131. static int ip_vs_sh_done_svc(struct ip_vs_service *svc)
  132. {
  133. struct ip_vs_sh_bucket *tbl = svc->sched_data;
  134. /* got to clean up hash buckets here */
  135. ip_vs_sh_flush(tbl);
  136. /* release the table itself */
  137. kfree(svc->sched_data);
  138. IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) released\n",
  139. sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE);
  140. return 0;
  141. }
  142. static int ip_vs_sh_update_svc(struct ip_vs_service *svc)
  143. {
  144. struct ip_vs_sh_bucket *tbl = svc->sched_data;
  145. /* got to clean up hash buckets here */
  146. ip_vs_sh_flush(tbl);
  147. /* assign the hash buckets with the updated service */
  148. ip_vs_sh_assign(tbl, svc);
  149. return 0;
  150. }
  151. /*
  152. * If the dest flags is set with IP_VS_DEST_F_OVERLOAD,
  153. * consider that the server is overloaded here.
  154. */
  155. static inline int is_overloaded(struct ip_vs_dest *dest)
  156. {
  157. return dest->flags & IP_VS_DEST_F_OVERLOAD;
  158. }
  159. /*
  160. * Source Hashing scheduling
  161. */
  162. static struct ip_vs_dest *
  163. ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
  164. {
  165. struct ip_vs_dest *dest;
  166. struct ip_vs_sh_bucket *tbl;
  167. struct iphdr *iph = skb->nh.iph;
  168. IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n");
  169. tbl = (struct ip_vs_sh_bucket *)svc->sched_data;
  170. dest = ip_vs_sh_get(tbl, iph->saddr);
  171. if (!dest
  172. || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
  173. || atomic_read(&dest->weight) <= 0
  174. || is_overloaded(dest)) {
  175. return NULL;
  176. }
  177. IP_VS_DBG(6, "SH: source IP address %u.%u.%u.%u "
  178. "--> server %u.%u.%u.%u:%d\n",
  179. NIPQUAD(iph->saddr),
  180. NIPQUAD(dest->addr),
  181. ntohs(dest->port));
  182. return dest;
  183. }
  184. /*
  185. * IPVS SH Scheduler structure
  186. */
  187. static struct ip_vs_scheduler ip_vs_sh_scheduler =
  188. {
  189. .name = "sh",
  190. .refcnt = ATOMIC_INIT(0),
  191. .module = THIS_MODULE,
  192. .init_service = ip_vs_sh_init_svc,
  193. .done_service = ip_vs_sh_done_svc,
  194. .update_service = ip_vs_sh_update_svc,
  195. .schedule = ip_vs_sh_schedule,
  196. };
  197. static int __init ip_vs_sh_init(void)
  198. {
  199. INIT_LIST_HEAD(&ip_vs_sh_scheduler.n_list);
  200. return register_ip_vs_scheduler(&ip_vs_sh_scheduler);
  201. }
  202. static void __exit ip_vs_sh_cleanup(void)
  203. {
  204. unregister_ip_vs_scheduler(&ip_vs_sh_scheduler);
  205. }
  206. module_init(ip_vs_sh_init);
  207. module_exit(ip_vs_sh_cleanup);
  208. MODULE_LICENSE("GPL");