ip_vs_proto_udp.c 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427
  1. /*
  2. * ip_vs_proto_udp.c: UDP load balancing support for IPVS
  3. *
  4. * Version: $Id: ip_vs_proto_udp.c,v 1.3 2002/11/30 01:50:35 wensong Exp $
  5. *
  6. * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
  7. * Julian Anastasov <ja@ssi.bg>
  8. *
  9. * This program is free software; you can redistribute it and/or
  10. * modify it under the terms of the GNU General Public License
  11. * as published by the Free Software Foundation; either version
  12. * 2 of the License, or (at your option) any later version.
  13. *
  14. * Changes:
  15. *
  16. */
  17. #include <linux/kernel.h>
  18. #include <linux/netfilter_ipv4.h>
  19. #include <net/ip_vs.h>
  20. static struct ip_vs_conn *
  21. udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
  22. const struct iphdr *iph, unsigned int proto_off, int inverse)
  23. {
  24. struct ip_vs_conn *cp;
  25. __u16 _ports[2], *pptr;
  26. pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
  27. if (pptr == NULL)
  28. return NULL;
  29. if (likely(!inverse)) {
  30. cp = ip_vs_conn_in_get(iph->protocol,
  31. iph->saddr, pptr[0],
  32. iph->daddr, pptr[1]);
  33. } else {
  34. cp = ip_vs_conn_in_get(iph->protocol,
  35. iph->daddr, pptr[1],
  36. iph->saddr, pptr[0]);
  37. }
  38. return cp;
  39. }
  40. static struct ip_vs_conn *
  41. udp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
  42. const struct iphdr *iph, unsigned int proto_off, int inverse)
  43. {
  44. struct ip_vs_conn *cp;
  45. __u16 _ports[2], *pptr;
  46. pptr = skb_header_pointer(skb, skb->nh.iph->ihl*4,
  47. sizeof(_ports), _ports);
  48. if (pptr == NULL)
  49. return NULL;
  50. if (likely(!inverse)) {
  51. cp = ip_vs_conn_out_get(iph->protocol,
  52. iph->saddr, pptr[0],
  53. iph->daddr, pptr[1]);
  54. } else {
  55. cp = ip_vs_conn_out_get(iph->protocol,
  56. iph->daddr, pptr[1],
  57. iph->saddr, pptr[0]);
  58. }
  59. return cp;
  60. }
  61. static int
  62. udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
  63. int *verdict, struct ip_vs_conn **cpp)
  64. {
  65. struct ip_vs_service *svc;
  66. struct udphdr _udph, *uh;
  67. uh = skb_header_pointer(skb, skb->nh.iph->ihl*4,
  68. sizeof(_udph), &_udph);
  69. if (uh == NULL) {
  70. *verdict = NF_DROP;
  71. return 0;
  72. }
  73. if ((svc = ip_vs_service_get(skb->nfmark, skb->nh.iph->protocol,
  74. skb->nh.iph->daddr, uh->dest))) {
  75. if (ip_vs_todrop()) {
  76. /*
  77. * It seems that we are very loaded.
  78. * We have to drop this packet :(
  79. */
  80. ip_vs_service_put(svc);
  81. *verdict = NF_DROP;
  82. return 0;
  83. }
  84. /*
  85. * Let the virtual server select a real server for the
  86. * incoming connection, and create a connection entry.
  87. */
  88. *cpp = ip_vs_schedule(svc, skb);
  89. if (!*cpp) {
  90. *verdict = ip_vs_leave(svc, skb, pp);
  91. return 0;
  92. }
  93. ip_vs_service_put(svc);
  94. }
  95. return 1;
  96. }
  97. static inline void
  98. udp_fast_csum_update(struct udphdr *uhdr, u32 oldip, u32 newip,
  99. u16 oldport, u16 newport)
  100. {
  101. uhdr->check =
  102. ip_vs_check_diff(~oldip, newip,
  103. ip_vs_check_diff(oldport ^ 0xFFFF,
  104. newport, uhdr->check));
  105. if (!uhdr->check)
  106. uhdr->check = 0xFFFF;
  107. }
  108. static int
  109. udp_snat_handler(struct sk_buff **pskb,
  110. struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
  111. {
  112. struct udphdr *udph;
  113. unsigned int udphoff = (*pskb)->nh.iph->ihl * 4;
  114. /* csum_check requires unshared skb */
  115. if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph)))
  116. return 0;
  117. if (unlikely(cp->app != NULL)) {
  118. /* Some checks before mangling */
  119. if (pp->csum_check && !pp->csum_check(*pskb, pp))
  120. return 0;
  121. /*
  122. * Call application helper if needed
  123. */
  124. if (!ip_vs_app_pkt_out(cp, pskb))
  125. return 0;
  126. }
  127. udph = (void *)(*pskb)->nh.iph + udphoff;
  128. udph->source = cp->vport;
  129. /*
  130. * Adjust UDP checksums
  131. */
  132. if (!cp->app && (udph->check != 0)) {
  133. /* Only port and addr are changed, do fast csum update */
  134. udp_fast_csum_update(udph, cp->daddr, cp->vaddr,
  135. cp->dport, cp->vport);
  136. if ((*pskb)->ip_summed == CHECKSUM_HW)
  137. (*pskb)->ip_summed = CHECKSUM_NONE;
  138. } else {
  139. /* full checksum calculation */
  140. udph->check = 0;
  141. (*pskb)->csum = skb_checksum(*pskb, udphoff,
  142. (*pskb)->len - udphoff, 0);
  143. udph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr,
  144. (*pskb)->len - udphoff,
  145. cp->protocol,
  146. (*pskb)->csum);
  147. if (udph->check == 0)
  148. udph->check = 0xFFFF;
  149. IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
  150. pp->name, udph->check,
  151. (char*)&(udph->check) - (char*)udph);
  152. }
  153. return 1;
  154. }
  155. static int
  156. udp_dnat_handler(struct sk_buff **pskb,
  157. struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
  158. {
  159. struct udphdr *udph;
  160. unsigned int udphoff = (*pskb)->nh.iph->ihl * 4;
  161. /* csum_check requires unshared skb */
  162. if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph)))
  163. return 0;
  164. if (unlikely(cp->app != NULL)) {
  165. /* Some checks before mangling */
  166. if (pp->csum_check && !pp->csum_check(*pskb, pp))
  167. return 0;
  168. /*
  169. * Attempt ip_vs_app call.
  170. * It will fix ip_vs_conn
  171. */
  172. if (!ip_vs_app_pkt_in(cp, pskb))
  173. return 0;
  174. }
  175. udph = (void *)(*pskb)->nh.iph + udphoff;
  176. udph->dest = cp->dport;
  177. /*
  178. * Adjust UDP checksums
  179. */
  180. if (!cp->app && (udph->check != 0)) {
  181. /* Only port and addr are changed, do fast csum update */
  182. udp_fast_csum_update(udph, cp->vaddr, cp->daddr,
  183. cp->vport, cp->dport);
  184. if ((*pskb)->ip_summed == CHECKSUM_HW)
  185. (*pskb)->ip_summed = CHECKSUM_NONE;
  186. } else {
  187. /* full checksum calculation */
  188. udph->check = 0;
  189. (*pskb)->csum = skb_checksum(*pskb, udphoff,
  190. (*pskb)->len - udphoff, 0);
  191. udph->check = csum_tcpudp_magic(cp->caddr, cp->daddr,
  192. (*pskb)->len - udphoff,
  193. cp->protocol,
  194. (*pskb)->csum);
  195. if (udph->check == 0)
  196. udph->check = 0xFFFF;
  197. (*pskb)->ip_summed = CHECKSUM_UNNECESSARY;
  198. }
  199. return 1;
  200. }
  201. static int
  202. udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
  203. {
  204. struct udphdr _udph, *uh;
  205. unsigned int udphoff = skb->nh.iph->ihl*4;
  206. uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph);
  207. if (uh == NULL)
  208. return 0;
  209. if (uh->check != 0) {
  210. switch (skb->ip_summed) {
  211. case CHECKSUM_NONE:
  212. skb->csum = skb_checksum(skb, udphoff,
  213. skb->len - udphoff, 0);
  214. case CHECKSUM_HW:
  215. if (csum_tcpudp_magic(skb->nh.iph->saddr,
  216. skb->nh.iph->daddr,
  217. skb->len - udphoff,
  218. skb->nh.iph->protocol,
  219. skb->csum)) {
  220. IP_VS_DBG_RL_PKT(0, pp, skb, 0,
  221. "Failed checksum for");
  222. return 0;
  223. }
  224. break;
  225. default:
  226. /* CHECKSUM_UNNECESSARY */
  227. break;
  228. }
  229. }
  230. return 1;
  231. }
  232. /*
  233. * Note: the caller guarantees that only one of register_app,
  234. * unregister_app or app_conn_bind is called each time.
  235. */
  236. #define UDP_APP_TAB_BITS 4
  237. #define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS)
  238. #define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1)
  239. static struct list_head udp_apps[UDP_APP_TAB_SIZE];
  240. static DEFINE_SPINLOCK(udp_app_lock);
  241. static inline __u16 udp_app_hashkey(__u16 port)
  242. {
  243. return ((port >> UDP_APP_TAB_BITS) ^ port) & UDP_APP_TAB_MASK;
  244. }
  245. static int udp_register_app(struct ip_vs_app *inc)
  246. {
  247. struct ip_vs_app *i;
  248. __u16 hash, port = inc->port;
  249. int ret = 0;
  250. hash = udp_app_hashkey(port);
  251. spin_lock_bh(&udp_app_lock);
  252. list_for_each_entry(i, &udp_apps[hash], p_list) {
  253. if (i->port == port) {
  254. ret = -EEXIST;
  255. goto out;
  256. }
  257. }
  258. list_add(&inc->p_list, &udp_apps[hash]);
  259. atomic_inc(&ip_vs_protocol_udp.appcnt);
  260. out:
  261. spin_unlock_bh(&udp_app_lock);
  262. return ret;
  263. }
  264. static void
  265. udp_unregister_app(struct ip_vs_app *inc)
  266. {
  267. spin_lock_bh(&udp_app_lock);
  268. atomic_dec(&ip_vs_protocol_udp.appcnt);
  269. list_del(&inc->p_list);
  270. spin_unlock_bh(&udp_app_lock);
  271. }
  272. static int udp_app_conn_bind(struct ip_vs_conn *cp)
  273. {
  274. int hash;
  275. struct ip_vs_app *inc;
  276. int result = 0;
  277. /* Default binding: bind app only for NAT */
  278. if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
  279. return 0;
  280. /* Lookup application incarnations and bind the right one */
  281. hash = udp_app_hashkey(cp->vport);
  282. spin_lock(&udp_app_lock);
  283. list_for_each_entry(inc, &udp_apps[hash], p_list) {
  284. if (inc->port == cp->vport) {
  285. if (unlikely(!ip_vs_app_inc_get(inc)))
  286. break;
  287. spin_unlock(&udp_app_lock);
  288. IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->"
  289. "%u.%u.%u.%u:%u to app %s on port %u\n",
  290. __FUNCTION__,
  291. NIPQUAD(cp->caddr), ntohs(cp->cport),
  292. NIPQUAD(cp->vaddr), ntohs(cp->vport),
  293. inc->name, ntohs(inc->port));
  294. cp->app = inc;
  295. if (inc->init_conn)
  296. result = inc->init_conn(inc, cp);
  297. goto out;
  298. }
  299. }
  300. spin_unlock(&udp_app_lock);
  301. out:
  302. return result;
  303. }
  304. static int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
  305. [IP_VS_UDP_S_NORMAL] = 5*60*HZ,
  306. [IP_VS_UDP_S_LAST] = 2*HZ,
  307. };
  308. static char * udp_state_name_table[IP_VS_UDP_S_LAST+1] = {
  309. [IP_VS_UDP_S_NORMAL] = "UDP",
  310. [IP_VS_UDP_S_LAST] = "BUG!",
  311. };
  312. static int
  313. udp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
  314. {
  315. return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_UDP_S_LAST,
  316. udp_state_name_table, sname, to);
  317. }
  318. static const char * udp_state_name(int state)
  319. {
  320. if (state >= IP_VS_UDP_S_LAST)
  321. return "ERR!";
  322. return udp_state_name_table[state] ? udp_state_name_table[state] : "?";
  323. }
  324. static int
  325. udp_state_transition(struct ip_vs_conn *cp, int direction,
  326. const struct sk_buff *skb,
  327. struct ip_vs_protocol *pp)
  328. {
  329. cp->timeout = pp->timeout_table[IP_VS_UDP_S_NORMAL];
  330. return 1;
  331. }
  332. static void udp_init(struct ip_vs_protocol *pp)
  333. {
  334. IP_VS_INIT_HASH_TABLE(udp_apps);
  335. pp->timeout_table = udp_timeouts;
  336. }
  337. static void udp_exit(struct ip_vs_protocol *pp)
  338. {
  339. }
  340. struct ip_vs_protocol ip_vs_protocol_udp = {
  341. .name = "UDP",
  342. .protocol = IPPROTO_UDP,
  343. .dont_defrag = 0,
  344. .init = udp_init,
  345. .exit = udp_exit,
  346. .conn_schedule = udp_conn_schedule,
  347. .conn_in_get = udp_conn_in_get,
  348. .conn_out_get = udp_conn_out_get,
  349. .snat_handler = udp_snat_handler,
  350. .dnat_handler = udp_dnat_handler,
  351. .csum_check = udp_csum_check,
  352. .state_transition = udp_state_transition,
  353. .state_name = udp_state_name,
  354. .register_app = udp_register_app,
  355. .unregister_app = udp_unregister_app,
  356. .app_conn_bind = udp_app_conn_bind,
  357. .debug_packet = ip_vs_tcpudp_debug_packet,
  358. .timeout_change = NULL,
  359. .set_state_timeout = udp_set_state_timeout,
  360. };