ip_vs_proto_udp.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533
  1. /*
  2. * ip_vs_proto_udp.c: UDP load balancing support for IPVS
  3. *
  4. * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
  5. * Julian Anastasov <ja@ssi.bg>
  6. *
  7. * This program is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU General Public License
  9. * as published by the Free Software Foundation; either version
  10. * 2 of the License, or (at your option) any later version.
  11. *
  12. * Changes:
  13. *
  14. */
  15. #include <linux/in.h>
  16. #include <linux/ip.h>
  17. #include <linux/kernel.h>
  18. #include <linux/netfilter.h>
  19. #include <linux/netfilter_ipv4.h>
  20. #include <linux/udp.h>
  21. #include <net/ip_vs.h>
  22. #include <net/ip.h>
  23. #include <net/ip6_checksum.h>
  24. static struct ip_vs_conn *
  25. udp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
  26. const struct ip_vs_iphdr *iph, unsigned int proto_off,
  27. int inverse)
  28. {
  29. struct ip_vs_conn *cp;
  30. __be16 _ports[2], *pptr;
  31. pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
  32. if (pptr == NULL)
  33. return NULL;
  34. if (likely(!inverse)) {
  35. cp = ip_vs_conn_in_get(af, iph->protocol,
  36. &iph->saddr, pptr[0],
  37. &iph->daddr, pptr[1]);
  38. } else {
  39. cp = ip_vs_conn_in_get(af, iph->protocol,
  40. &iph->daddr, pptr[1],
  41. &iph->saddr, pptr[0]);
  42. }
  43. return cp;
  44. }
  45. static struct ip_vs_conn *
  46. udp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
  47. const struct ip_vs_iphdr *iph, unsigned int proto_off,
  48. int inverse)
  49. {
  50. struct ip_vs_conn *cp;
  51. __be16 _ports[2], *pptr;
  52. pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
  53. if (pptr == NULL)
  54. return NULL;
  55. if (likely(!inverse)) {
  56. cp = ip_vs_conn_out_get(af, iph->protocol,
  57. &iph->saddr, pptr[0],
  58. &iph->daddr, pptr[1]);
  59. } else {
  60. cp = ip_vs_conn_out_get(af, iph->protocol,
  61. &iph->daddr, pptr[1],
  62. &iph->saddr, pptr[0]);
  63. }
  64. return cp;
  65. }
  66. static int
  67. udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
  68. int *verdict, struct ip_vs_conn **cpp)
  69. {
  70. struct ip_vs_service *svc;
  71. struct udphdr _udph, *uh;
  72. struct ip_vs_iphdr iph;
  73. ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
  74. uh = skb_header_pointer(skb, iph.len, sizeof(_udph), &_udph);
  75. if (uh == NULL) {
  76. *verdict = NF_DROP;
  77. return 0;
  78. }
  79. svc = ip_vs_service_get(af, skb->mark, iph.protocol,
  80. &iph.daddr, uh->dest);
  81. if (svc) {
  82. if (ip_vs_todrop()) {
  83. /*
  84. * It seems that we are very loaded.
  85. * We have to drop this packet :(
  86. */
  87. ip_vs_service_put(svc);
  88. *verdict = NF_DROP;
  89. return 0;
  90. }
  91. /*
  92. * Let the virtual server select a real server for the
  93. * incoming connection, and create a connection entry.
  94. */
  95. *cpp = ip_vs_schedule(svc, skb);
  96. if (!*cpp) {
  97. *verdict = ip_vs_leave(svc, skb, pp);
  98. return 0;
  99. }
  100. ip_vs_service_put(svc);
  101. }
  102. return 1;
  103. }
  104. static inline void
  105. udp_fast_csum_update(int af, struct udphdr *uhdr,
  106. const union nf_inet_addr *oldip,
  107. const union nf_inet_addr *newip,
  108. __be16 oldport, __be16 newport)
  109. {
  110. #ifdef CONFIG_IP_VS_IPV6
  111. if (af == AF_INET6)
  112. uhdr->check =
  113. csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
  114. ip_vs_check_diff2(oldport, newport,
  115. ~csum_unfold(uhdr->check))));
  116. else
  117. #endif
  118. uhdr->check =
  119. csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
  120. ip_vs_check_diff2(oldport, newport,
  121. ~csum_unfold(uhdr->check))));
  122. if (!uhdr->check)
  123. uhdr->check = CSUM_MANGLED_0;
  124. }
  125. static inline void
  126. udp_partial_csum_update(int af, struct udphdr *uhdr,
  127. const union nf_inet_addr *oldip,
  128. const union nf_inet_addr *newip,
  129. __be16 oldlen, __be16 newlen)
  130. {
  131. #ifdef CONFIG_IP_VS_IPV6
  132. if (af == AF_INET6)
  133. uhdr->check =
  134. csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
  135. ip_vs_check_diff2(oldlen, newlen,
  136. ~csum_unfold(uhdr->check))));
  137. else
  138. #endif
  139. uhdr->check =
  140. csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
  141. ip_vs_check_diff2(oldlen, newlen,
  142. ~csum_unfold(uhdr->check))));
  143. }
  144. static int
  145. udp_snat_handler(struct sk_buff *skb,
  146. struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
  147. {
  148. struct udphdr *udph;
  149. unsigned int udphoff;
  150. int oldlen;
  151. #ifdef CONFIG_IP_VS_IPV6
  152. if (cp->af == AF_INET6)
  153. udphoff = sizeof(struct ipv6hdr);
  154. else
  155. #endif
  156. udphoff = ip_hdrlen(skb);
  157. oldlen = skb->len - udphoff;
  158. /* csum_check requires unshared skb */
  159. if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
  160. return 0;
  161. if (unlikely(cp->app != NULL)) {
  162. /* Some checks before mangling */
  163. if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
  164. return 0;
  165. /*
  166. * Call application helper if needed
  167. */
  168. if (!ip_vs_app_pkt_out(cp, skb))
  169. return 0;
  170. }
  171. udph = (void *)skb_network_header(skb) + udphoff;
  172. udph->source = cp->vport;
  173. /*
  174. * Adjust UDP checksums
  175. */
  176. if (skb->ip_summed == CHECKSUM_PARTIAL) {
  177. udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
  178. htonl(oldlen),
  179. htonl(skb->len - udphoff));
  180. } else if (!cp->app && (udph->check != 0)) {
  181. /* Only port and addr are changed, do fast csum update */
  182. udp_fast_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
  183. cp->dport, cp->vport);
  184. if (skb->ip_summed == CHECKSUM_COMPLETE)
  185. skb->ip_summed = CHECKSUM_NONE;
  186. } else {
  187. /* full checksum calculation */
  188. udph->check = 0;
  189. skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
  190. #ifdef CONFIG_IP_VS_IPV6
  191. if (cp->af == AF_INET6)
  192. udph->check = csum_ipv6_magic(&cp->vaddr.in6,
  193. &cp->caddr.in6,
  194. skb->len - udphoff,
  195. cp->protocol, skb->csum);
  196. else
  197. #endif
  198. udph->check = csum_tcpudp_magic(cp->vaddr.ip,
  199. cp->caddr.ip,
  200. skb->len - udphoff,
  201. cp->protocol,
  202. skb->csum);
  203. if (udph->check == 0)
  204. udph->check = CSUM_MANGLED_0;
  205. IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
  206. pp->name, udph->check,
  207. (char*)&(udph->check) - (char*)udph);
  208. }
  209. return 1;
  210. }
  211. static int
  212. udp_dnat_handler(struct sk_buff *skb,
  213. struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
  214. {
  215. struct udphdr *udph;
  216. unsigned int udphoff;
  217. int oldlen;
  218. #ifdef CONFIG_IP_VS_IPV6
  219. if (cp->af == AF_INET6)
  220. udphoff = sizeof(struct ipv6hdr);
  221. else
  222. #endif
  223. udphoff = ip_hdrlen(skb);
  224. oldlen = skb->len - udphoff;
  225. /* csum_check requires unshared skb */
  226. if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
  227. return 0;
  228. if (unlikely(cp->app != NULL)) {
  229. /* Some checks before mangling */
  230. if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
  231. return 0;
  232. /*
  233. * Attempt ip_vs_app call.
  234. * It will fix ip_vs_conn
  235. */
  236. if (!ip_vs_app_pkt_in(cp, skb))
  237. return 0;
  238. }
  239. udph = (void *)skb_network_header(skb) + udphoff;
  240. udph->dest = cp->dport;
  241. /*
  242. * Adjust UDP checksums
  243. */
  244. if (skb->ip_summed == CHECKSUM_PARTIAL) {
  245. udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
  246. htonl(oldlen),
  247. htonl(skb->len - udphoff));
  248. } else if (!cp->app && (udph->check != 0)) {
  249. /* Only port and addr are changed, do fast csum update */
  250. udp_fast_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr,
  251. cp->vport, cp->dport);
  252. if (skb->ip_summed == CHECKSUM_COMPLETE)
  253. skb->ip_summed = CHECKSUM_NONE;
  254. } else {
  255. /* full checksum calculation */
  256. udph->check = 0;
  257. skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
  258. #ifdef CONFIG_IP_VS_IPV6
  259. if (cp->af == AF_INET6)
  260. udph->check = csum_ipv6_magic(&cp->caddr.in6,
  261. &cp->daddr.in6,
  262. skb->len - udphoff,
  263. cp->protocol, skb->csum);
  264. else
  265. #endif
  266. udph->check = csum_tcpudp_magic(cp->caddr.ip,
  267. cp->daddr.ip,
  268. skb->len - udphoff,
  269. cp->protocol,
  270. skb->csum);
  271. if (udph->check == 0)
  272. udph->check = CSUM_MANGLED_0;
  273. skb->ip_summed = CHECKSUM_UNNECESSARY;
  274. }
  275. return 1;
  276. }
  277. static int
  278. udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
  279. {
  280. struct udphdr _udph, *uh;
  281. unsigned int udphoff;
  282. #ifdef CONFIG_IP_VS_IPV6
  283. if (af == AF_INET6)
  284. udphoff = sizeof(struct ipv6hdr);
  285. else
  286. #endif
  287. udphoff = ip_hdrlen(skb);
  288. uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph);
  289. if (uh == NULL)
  290. return 0;
  291. if (uh->check != 0) {
  292. switch (skb->ip_summed) {
  293. case CHECKSUM_NONE:
  294. skb->csum = skb_checksum(skb, udphoff,
  295. skb->len - udphoff, 0);
  296. case CHECKSUM_COMPLETE:
  297. #ifdef CONFIG_IP_VS_IPV6
  298. if (af == AF_INET6) {
  299. if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
  300. &ipv6_hdr(skb)->daddr,
  301. skb->len - udphoff,
  302. ipv6_hdr(skb)->nexthdr,
  303. skb->csum)) {
  304. IP_VS_DBG_RL_PKT(0, pp, skb, 0,
  305. "Failed checksum for");
  306. return 0;
  307. }
  308. } else
  309. #endif
  310. if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
  311. ip_hdr(skb)->daddr,
  312. skb->len - udphoff,
  313. ip_hdr(skb)->protocol,
  314. skb->csum)) {
  315. IP_VS_DBG_RL_PKT(0, pp, skb, 0,
  316. "Failed checksum for");
  317. return 0;
  318. }
  319. break;
  320. default:
  321. /* No need to checksum. */
  322. break;
  323. }
  324. }
  325. return 1;
  326. }
  327. /*
  328. * Note: the caller guarantees that only one of register_app,
  329. * unregister_app or app_conn_bind is called each time.
  330. */
  331. #define UDP_APP_TAB_BITS 4
  332. #define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS)
  333. #define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1)
  334. static struct list_head udp_apps[UDP_APP_TAB_SIZE];
  335. static DEFINE_SPINLOCK(udp_app_lock);
  336. static inline __u16 udp_app_hashkey(__be16 port)
  337. {
  338. return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port)
  339. & UDP_APP_TAB_MASK;
  340. }
  341. static int udp_register_app(struct ip_vs_app *inc)
  342. {
  343. struct ip_vs_app *i;
  344. __u16 hash;
  345. __be16 port = inc->port;
  346. int ret = 0;
  347. hash = udp_app_hashkey(port);
  348. spin_lock_bh(&udp_app_lock);
  349. list_for_each_entry(i, &udp_apps[hash], p_list) {
  350. if (i->port == port) {
  351. ret = -EEXIST;
  352. goto out;
  353. }
  354. }
  355. list_add(&inc->p_list, &udp_apps[hash]);
  356. atomic_inc(&ip_vs_protocol_udp.appcnt);
  357. out:
  358. spin_unlock_bh(&udp_app_lock);
  359. return ret;
  360. }
  361. static void
  362. udp_unregister_app(struct ip_vs_app *inc)
  363. {
  364. spin_lock_bh(&udp_app_lock);
  365. atomic_dec(&ip_vs_protocol_udp.appcnt);
  366. list_del(&inc->p_list);
  367. spin_unlock_bh(&udp_app_lock);
  368. }
  369. static int udp_app_conn_bind(struct ip_vs_conn *cp)
  370. {
  371. int hash;
  372. struct ip_vs_app *inc;
  373. int result = 0;
  374. /* Default binding: bind app only for NAT */
  375. if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
  376. return 0;
  377. /* Lookup application incarnations and bind the right one */
  378. hash = udp_app_hashkey(cp->vport);
  379. spin_lock(&udp_app_lock);
  380. list_for_each_entry(inc, &udp_apps[hash], p_list) {
  381. if (inc->port == cp->vport) {
  382. if (unlikely(!ip_vs_app_inc_get(inc)))
  383. break;
  384. spin_unlock(&udp_app_lock);
  385. IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
  386. "%s:%u to app %s on port %u\n",
  387. __func__,
  388. IP_VS_DBG_ADDR(cp->af, &cp->caddr),
  389. ntohs(cp->cport),
  390. IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
  391. ntohs(cp->vport),
  392. inc->name, ntohs(inc->port));
  393. cp->app = inc;
  394. if (inc->init_conn)
  395. result = inc->init_conn(inc, cp);
  396. goto out;
  397. }
  398. }
  399. spin_unlock(&udp_app_lock);
  400. out:
  401. return result;
  402. }
  403. static int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
  404. [IP_VS_UDP_S_NORMAL] = 5*60*HZ,
  405. [IP_VS_UDP_S_LAST] = 2*HZ,
  406. };
  407. static char * udp_state_name_table[IP_VS_UDP_S_LAST+1] = {
  408. [IP_VS_UDP_S_NORMAL] = "UDP",
  409. [IP_VS_UDP_S_LAST] = "BUG!",
  410. };
  411. static int
  412. udp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
  413. {
  414. return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_UDP_S_LAST,
  415. udp_state_name_table, sname, to);
  416. }
  417. static const char * udp_state_name(int state)
  418. {
  419. if (state >= IP_VS_UDP_S_LAST)
  420. return "ERR!";
  421. return udp_state_name_table[state] ? udp_state_name_table[state] : "?";
  422. }
  423. static int
  424. udp_state_transition(struct ip_vs_conn *cp, int direction,
  425. const struct sk_buff *skb,
  426. struct ip_vs_protocol *pp)
  427. {
  428. cp->timeout = pp->timeout_table[IP_VS_UDP_S_NORMAL];
  429. return 1;
  430. }
  431. static void udp_init(struct ip_vs_protocol *pp)
  432. {
  433. IP_VS_INIT_HASH_TABLE(udp_apps);
  434. pp->timeout_table = udp_timeouts;
  435. }
  436. static void udp_exit(struct ip_vs_protocol *pp)
  437. {
  438. }
  439. struct ip_vs_protocol ip_vs_protocol_udp = {
  440. .name = "UDP",
  441. .protocol = IPPROTO_UDP,
  442. .num_states = IP_VS_UDP_S_LAST,
  443. .dont_defrag = 0,
  444. .init = udp_init,
  445. .exit = udp_exit,
  446. .conn_schedule = udp_conn_schedule,
  447. .conn_in_get = udp_conn_in_get,
  448. .conn_out_get = udp_conn_out_get,
  449. .snat_handler = udp_snat_handler,
  450. .dnat_handler = udp_dnat_handler,
  451. .csum_check = udp_csum_check,
  452. .state_transition = udp_state_transition,
  453. .state_name = udp_state_name,
  454. .register_app = udp_register_app,
  455. .unregister_app = udp_unregister_app,
  456. .app_conn_bind = udp_app_conn_bind,
  457. .debug_packet = ip_vs_tcpudp_debug_packet,
  458. .timeout_change = NULL,
  459. .set_state_timeout = udp_set_state_timeout,
  460. };