ip_vs_proto_udp.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433
  1. /*
  2. * ip_vs_proto_udp.c: UDP load balancing support for IPVS
  3. *
  4. * Version: $Id: ip_vs_proto_udp.c,v 1.3 2002/11/30 01:50:35 wensong Exp $
  5. *
  6. * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
  7. * Julian Anastasov <ja@ssi.bg>
  8. *
  9. * This program is free software; you can redistribute it and/or
  10. * modify it under the terms of the GNU General Public License
  11. * as published by the Free Software Foundation; either version
  12. * 2 of the License, or (at your option) any later version.
  13. *
  14. * Changes:
  15. *
  16. */
  17. #include <linux/in.h>
  18. #include <linux/ip.h>
  19. #include <linux/kernel.h>
  20. #include <linux/netfilter.h>
  21. #include <linux/netfilter_ipv4.h>
  22. #include <linux/udp.h>
  23. #include <net/ip_vs.h>
  24. #include <net/ip.h>
  25. static struct ip_vs_conn *
  26. udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
  27. const struct iphdr *iph, unsigned int proto_off, int inverse)
  28. {
  29. struct ip_vs_conn *cp;
  30. __be16 _ports[2], *pptr;
  31. pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
  32. if (pptr == NULL)
  33. return NULL;
  34. if (likely(!inverse)) {
  35. cp = ip_vs_conn_in_get(iph->protocol,
  36. iph->saddr, pptr[0],
  37. iph->daddr, pptr[1]);
  38. } else {
  39. cp = ip_vs_conn_in_get(iph->protocol,
  40. iph->daddr, pptr[1],
  41. iph->saddr, pptr[0]);
  42. }
  43. return cp;
  44. }
  45. static struct ip_vs_conn *
  46. udp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
  47. const struct iphdr *iph, unsigned int proto_off, int inverse)
  48. {
  49. struct ip_vs_conn *cp;
  50. __be16 _ports[2], *pptr;
  51. pptr = skb_header_pointer(skb, ip_hdrlen(skb),
  52. sizeof(_ports), _ports);
  53. if (pptr == NULL)
  54. return NULL;
  55. if (likely(!inverse)) {
  56. cp = ip_vs_conn_out_get(iph->protocol,
  57. iph->saddr, pptr[0],
  58. iph->daddr, pptr[1]);
  59. } else {
  60. cp = ip_vs_conn_out_get(iph->protocol,
  61. iph->daddr, pptr[1],
  62. iph->saddr, pptr[0]);
  63. }
  64. return cp;
  65. }
  66. static int
  67. udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
  68. int *verdict, struct ip_vs_conn **cpp)
  69. {
  70. struct ip_vs_service *svc;
  71. struct udphdr _udph, *uh;
  72. uh = skb_header_pointer(skb, ip_hdrlen(skb),
  73. sizeof(_udph), &_udph);
  74. if (uh == NULL) {
  75. *verdict = NF_DROP;
  76. return 0;
  77. }
  78. if ((svc = ip_vs_service_get(skb->mark, ip_hdr(skb)->protocol,
  79. ip_hdr(skb)->daddr, uh->dest))) {
  80. if (ip_vs_todrop()) {
  81. /*
  82. * It seems that we are very loaded.
  83. * We have to drop this packet :(
  84. */
  85. ip_vs_service_put(svc);
  86. *verdict = NF_DROP;
  87. return 0;
  88. }
  89. /*
  90. * Let the virtual server select a real server for the
  91. * incoming connection, and create a connection entry.
  92. */
  93. *cpp = ip_vs_schedule(svc, skb);
  94. if (!*cpp) {
  95. *verdict = ip_vs_leave(svc, skb, pp);
  96. return 0;
  97. }
  98. ip_vs_service_put(svc);
  99. }
  100. return 1;
  101. }
  102. static inline void
  103. udp_fast_csum_update(struct udphdr *uhdr, __be32 oldip, __be32 newip,
  104. __be16 oldport, __be16 newport)
  105. {
  106. uhdr->check =
  107. csum_fold(ip_vs_check_diff4(oldip, newip,
  108. ip_vs_check_diff2(oldport, newport,
  109. ~csum_unfold(uhdr->check))));
  110. if (!uhdr->check)
  111. uhdr->check = CSUM_MANGLED_0;
  112. }
  113. static int
  114. udp_snat_handler(struct sk_buff **pskb,
  115. struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
  116. {
  117. struct udphdr *udph;
  118. const unsigned int udphoff = ip_hdrlen(*pskb);
  119. /* csum_check requires unshared skb */
  120. if (!skb_make_writable(*pskb, udphoff+sizeof(*udph)))
  121. return 0;
  122. if (unlikely(cp->app != NULL)) {
  123. /* Some checks before mangling */
  124. if (pp->csum_check && !pp->csum_check(*pskb, pp))
  125. return 0;
  126. /*
  127. * Call application helper if needed
  128. */
  129. if (!ip_vs_app_pkt_out(cp, pskb))
  130. return 0;
  131. }
  132. udph = (void *)ip_hdr(*pskb) + udphoff;
  133. udph->source = cp->vport;
  134. /*
  135. * Adjust UDP checksums
  136. */
  137. if (!cp->app && (udph->check != 0)) {
  138. /* Only port and addr are changed, do fast csum update */
  139. udp_fast_csum_update(udph, cp->daddr, cp->vaddr,
  140. cp->dport, cp->vport);
  141. if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
  142. (*pskb)->ip_summed = CHECKSUM_NONE;
  143. } else {
  144. /* full checksum calculation */
  145. udph->check = 0;
  146. (*pskb)->csum = skb_checksum(*pskb, udphoff,
  147. (*pskb)->len - udphoff, 0);
  148. udph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr,
  149. (*pskb)->len - udphoff,
  150. cp->protocol,
  151. (*pskb)->csum);
  152. if (udph->check == 0)
  153. udph->check = CSUM_MANGLED_0;
  154. IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
  155. pp->name, udph->check,
  156. (char*)&(udph->check) - (char*)udph);
  157. }
  158. return 1;
  159. }
  160. static int
  161. udp_dnat_handler(struct sk_buff **pskb,
  162. struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
  163. {
  164. struct udphdr *udph;
  165. unsigned int udphoff = ip_hdrlen(*pskb);
  166. /* csum_check requires unshared skb */
  167. if (!skb_make_writable(*pskb, udphoff+sizeof(*udph)))
  168. return 0;
  169. if (unlikely(cp->app != NULL)) {
  170. /* Some checks before mangling */
  171. if (pp->csum_check && !pp->csum_check(*pskb, pp))
  172. return 0;
  173. /*
  174. * Attempt ip_vs_app call.
  175. * It will fix ip_vs_conn
  176. */
  177. if (!ip_vs_app_pkt_in(cp, pskb))
  178. return 0;
  179. }
  180. udph = (void *)ip_hdr(*pskb) + udphoff;
  181. udph->dest = cp->dport;
  182. /*
  183. * Adjust UDP checksums
  184. */
  185. if (!cp->app && (udph->check != 0)) {
  186. /* Only port and addr are changed, do fast csum update */
  187. udp_fast_csum_update(udph, cp->vaddr, cp->daddr,
  188. cp->vport, cp->dport);
  189. if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
  190. (*pskb)->ip_summed = CHECKSUM_NONE;
  191. } else {
  192. /* full checksum calculation */
  193. udph->check = 0;
  194. (*pskb)->csum = skb_checksum(*pskb, udphoff,
  195. (*pskb)->len - udphoff, 0);
  196. udph->check = csum_tcpudp_magic(cp->caddr, cp->daddr,
  197. (*pskb)->len - udphoff,
  198. cp->protocol,
  199. (*pskb)->csum);
  200. if (udph->check == 0)
  201. udph->check = CSUM_MANGLED_0;
  202. (*pskb)->ip_summed = CHECKSUM_UNNECESSARY;
  203. }
  204. return 1;
  205. }
  206. static int
  207. udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
  208. {
  209. struct udphdr _udph, *uh;
  210. const unsigned int udphoff = ip_hdrlen(skb);
  211. uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph);
  212. if (uh == NULL)
  213. return 0;
  214. if (uh->check != 0) {
  215. switch (skb->ip_summed) {
  216. case CHECKSUM_NONE:
  217. skb->csum = skb_checksum(skb, udphoff,
  218. skb->len - udphoff, 0);
  219. case CHECKSUM_COMPLETE:
  220. if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
  221. ip_hdr(skb)->daddr,
  222. skb->len - udphoff,
  223. ip_hdr(skb)->protocol,
  224. skb->csum)) {
  225. IP_VS_DBG_RL_PKT(0, pp, skb, 0,
  226. "Failed checksum for");
  227. return 0;
  228. }
  229. break;
  230. default:
  231. /* No need to checksum. */
  232. break;
  233. }
  234. }
  235. return 1;
  236. }
  237. /*
  238. * Note: the caller guarantees that only one of register_app,
  239. * unregister_app or app_conn_bind is called each time.
  240. */
  241. #define UDP_APP_TAB_BITS 4
  242. #define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS)
  243. #define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1)
  244. static struct list_head udp_apps[UDP_APP_TAB_SIZE];
  245. static DEFINE_SPINLOCK(udp_app_lock);
  246. static inline __u16 udp_app_hashkey(__be16 port)
  247. {
  248. return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port)
  249. & UDP_APP_TAB_MASK;
  250. }
  251. static int udp_register_app(struct ip_vs_app *inc)
  252. {
  253. struct ip_vs_app *i;
  254. __u16 hash;
  255. __be16 port = inc->port;
  256. int ret = 0;
  257. hash = udp_app_hashkey(port);
  258. spin_lock_bh(&udp_app_lock);
  259. list_for_each_entry(i, &udp_apps[hash], p_list) {
  260. if (i->port == port) {
  261. ret = -EEXIST;
  262. goto out;
  263. }
  264. }
  265. list_add(&inc->p_list, &udp_apps[hash]);
  266. atomic_inc(&ip_vs_protocol_udp.appcnt);
  267. out:
  268. spin_unlock_bh(&udp_app_lock);
  269. return ret;
  270. }
  271. static void
  272. udp_unregister_app(struct ip_vs_app *inc)
  273. {
  274. spin_lock_bh(&udp_app_lock);
  275. atomic_dec(&ip_vs_protocol_udp.appcnt);
  276. list_del(&inc->p_list);
  277. spin_unlock_bh(&udp_app_lock);
  278. }
  279. static int udp_app_conn_bind(struct ip_vs_conn *cp)
  280. {
  281. int hash;
  282. struct ip_vs_app *inc;
  283. int result = 0;
  284. /* Default binding: bind app only for NAT */
  285. if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
  286. return 0;
  287. /* Lookup application incarnations and bind the right one */
  288. hash = udp_app_hashkey(cp->vport);
  289. spin_lock(&udp_app_lock);
  290. list_for_each_entry(inc, &udp_apps[hash], p_list) {
  291. if (inc->port == cp->vport) {
  292. if (unlikely(!ip_vs_app_inc_get(inc)))
  293. break;
  294. spin_unlock(&udp_app_lock);
  295. IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->"
  296. "%u.%u.%u.%u:%u to app %s on port %u\n",
  297. __FUNCTION__,
  298. NIPQUAD(cp->caddr), ntohs(cp->cport),
  299. NIPQUAD(cp->vaddr), ntohs(cp->vport),
  300. inc->name, ntohs(inc->port));
  301. cp->app = inc;
  302. if (inc->init_conn)
  303. result = inc->init_conn(inc, cp);
  304. goto out;
  305. }
  306. }
  307. spin_unlock(&udp_app_lock);
  308. out:
  309. return result;
  310. }
  311. static int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
  312. [IP_VS_UDP_S_NORMAL] = 5*60*HZ,
  313. [IP_VS_UDP_S_LAST] = 2*HZ,
  314. };
  315. static char * udp_state_name_table[IP_VS_UDP_S_LAST+1] = {
  316. [IP_VS_UDP_S_NORMAL] = "UDP",
  317. [IP_VS_UDP_S_LAST] = "BUG!",
  318. };
  319. static int
  320. udp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
  321. {
  322. return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_UDP_S_LAST,
  323. udp_state_name_table, sname, to);
  324. }
  325. static const char * udp_state_name(int state)
  326. {
  327. if (state >= IP_VS_UDP_S_LAST)
  328. return "ERR!";
  329. return udp_state_name_table[state] ? udp_state_name_table[state] : "?";
  330. }
  331. static int
  332. udp_state_transition(struct ip_vs_conn *cp, int direction,
  333. const struct sk_buff *skb,
  334. struct ip_vs_protocol *pp)
  335. {
  336. cp->timeout = pp->timeout_table[IP_VS_UDP_S_NORMAL];
  337. return 1;
  338. }
  339. static void udp_init(struct ip_vs_protocol *pp)
  340. {
  341. IP_VS_INIT_HASH_TABLE(udp_apps);
  342. pp->timeout_table = udp_timeouts;
  343. }
  344. static void udp_exit(struct ip_vs_protocol *pp)
  345. {
  346. }
  347. struct ip_vs_protocol ip_vs_protocol_udp = {
  348. .name = "UDP",
  349. .protocol = IPPROTO_UDP,
  350. .dont_defrag = 0,
  351. .init = udp_init,
  352. .exit = udp_exit,
  353. .conn_schedule = udp_conn_schedule,
  354. .conn_in_get = udp_conn_in_get,
  355. .conn_out_get = udp_conn_out_get,
  356. .snat_handler = udp_snat_handler,
  357. .dnat_handler = udp_dnat_handler,
  358. .csum_check = udp_csum_check,
  359. .state_transition = udp_state_transition,
  360. .state_name = udp_state_name,
  361. .register_app = udp_register_app,
  362. .unregister_app = udp_unregister_app,
  363. .app_conn_bind = udp_app_conn_bind,
  364. .debug_packet = ip_vs_tcpudp_debug_packet,
  365. .timeout_change = NULL,
  366. .set_state_timeout = udp_set_state_timeout,
  367. };