ip_vs_proto_udp.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496
  1. /*
  2. * ip_vs_proto_udp.c: UDP load balancing support for IPVS
  3. *
  4. * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
  5. * Julian Anastasov <ja@ssi.bg>
  6. *
  7. * This program is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU General Public License
  9. * as published by the Free Software Foundation; either version
  10. * 2 of the License, or (at your option) any later version.
  11. *
  12. * Changes:
  13. *
  14. */
  15. #include <linux/in.h>
  16. #include <linux/ip.h>
  17. #include <linux/kernel.h>
  18. #include <linux/netfilter.h>
  19. #include <linux/netfilter_ipv4.h>
  20. #include <linux/udp.h>
  21. #include <net/ip_vs.h>
  22. #include <net/ip.h>
  23. static struct ip_vs_conn *
  24. udp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
  25. const struct ip_vs_iphdr *iph, unsigned int proto_off,
  26. int inverse)
  27. {
  28. struct ip_vs_conn *cp;
  29. __be16 _ports[2], *pptr;
  30. pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
  31. if (pptr == NULL)
  32. return NULL;
  33. if (likely(!inverse)) {
  34. cp = ip_vs_conn_in_get(iph->protocol,
  35. iph->saddr.ip, pptr[0],
  36. iph->daddr.ip, pptr[1]);
  37. } else {
  38. cp = ip_vs_conn_in_get(iph->protocol,
  39. iph->daddr.ip, pptr[1],
  40. iph->saddr.ip, pptr[0]);
  41. }
  42. return cp;
  43. }
  44. static struct ip_vs_conn *
  45. udp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
  46. const struct ip_vs_iphdr *iph, unsigned int proto_off,
  47. int inverse)
  48. {
  49. struct ip_vs_conn *cp;
  50. __be16 _ports[2], *pptr;
  51. pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
  52. if (pptr == NULL)
  53. return NULL;
  54. if (likely(!inverse)) {
  55. cp = ip_vs_conn_out_get(iph->protocol,
  56. iph->saddr.ip, pptr[0],
  57. iph->daddr.ip, pptr[1]);
  58. } else {
  59. cp = ip_vs_conn_out_get(iph->protocol,
  60. iph->daddr.ip, pptr[1],
  61. iph->saddr.ip, pptr[0]);
  62. }
  63. return cp;
  64. }
  65. static int
  66. udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
  67. int *verdict, struct ip_vs_conn **cpp)
  68. {
  69. struct ip_vs_service *svc;
  70. struct udphdr _udph, *uh;
  71. struct ip_vs_iphdr iph;
  72. ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
  73. uh = skb_header_pointer(skb, iph.len, sizeof(_udph), &_udph);
  74. if (uh == NULL) {
  75. *verdict = NF_DROP;
  76. return 0;
  77. }
  78. svc = ip_vs_service_get(af, skb->mark, iph.protocol,
  79. &iph.daddr, uh->dest);
  80. if (svc) {
  81. if (ip_vs_todrop()) {
  82. /*
  83. * It seems that we are very loaded.
  84. * We have to drop this packet :(
  85. */
  86. ip_vs_service_put(svc);
  87. *verdict = NF_DROP;
  88. return 0;
  89. }
  90. /*
  91. * Let the virtual server select a real server for the
  92. * incoming connection, and create a connection entry.
  93. */
  94. *cpp = ip_vs_schedule(svc, skb);
  95. if (!*cpp) {
  96. *verdict = ip_vs_leave(svc, skb, pp);
  97. return 0;
  98. }
  99. ip_vs_service_put(svc);
  100. }
  101. return 1;
  102. }
  103. static inline void
  104. udp_fast_csum_update(int af, struct udphdr *uhdr,
  105. const union nf_inet_addr *oldip,
  106. const union nf_inet_addr *newip,
  107. __be16 oldport, __be16 newport)
  108. {
  109. #ifdef CONFIG_IP_VS_IPV6
  110. if (af == AF_INET6)
  111. uhdr->check =
  112. csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
  113. ip_vs_check_diff2(oldport, newport,
  114. ~csum_unfold(uhdr->check))));
  115. else
  116. #endif
  117. uhdr->check =
  118. csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
  119. ip_vs_check_diff2(oldport, newport,
  120. ~csum_unfold(uhdr->check))));
  121. if (!uhdr->check)
  122. uhdr->check = CSUM_MANGLED_0;
  123. }
  124. static int
  125. udp_snat_handler(struct sk_buff *skb,
  126. struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
  127. {
  128. struct udphdr *udph;
  129. unsigned int udphoff;
  130. #ifdef CONFIG_IP_VS_IPV6
  131. if (cp->af == AF_INET6)
  132. udphoff = sizeof(struct ipv6hdr);
  133. else
  134. #endif
  135. udphoff = ip_hdrlen(skb);
  136. /* csum_check requires unshared skb */
  137. if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
  138. return 0;
  139. if (unlikely(cp->app != NULL)) {
  140. /* Some checks before mangling */
  141. if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
  142. return 0;
  143. /*
  144. * Call application helper if needed
  145. */
  146. if (!ip_vs_app_pkt_out(cp, skb))
  147. return 0;
  148. }
  149. udph = (void *)skb_network_header(skb) + udphoff;
  150. udph->source = cp->vport;
  151. /*
  152. * Adjust UDP checksums
  153. */
  154. if (!cp->app && (udph->check != 0)) {
  155. /* Only port and addr are changed, do fast csum update */
  156. udp_fast_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
  157. cp->dport, cp->vport);
  158. if (skb->ip_summed == CHECKSUM_COMPLETE)
  159. skb->ip_summed = CHECKSUM_NONE;
  160. } else {
  161. /* full checksum calculation */
  162. udph->check = 0;
  163. skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
  164. #ifdef CONFIG_IP_VS_IPV6
  165. if (cp->af == AF_INET6)
  166. udph->check = csum_ipv6_magic(&cp->vaddr.in6,
  167. &cp->caddr.in6,
  168. skb->len - udphoff,
  169. cp->protocol, skb->csum);
  170. else
  171. #endif
  172. udph->check = csum_tcpudp_magic(cp->vaddr.ip,
  173. cp->caddr.ip,
  174. skb->len - udphoff,
  175. cp->protocol,
  176. skb->csum);
  177. if (udph->check == 0)
  178. udph->check = CSUM_MANGLED_0;
  179. IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
  180. pp->name, udph->check,
  181. (char*)&(udph->check) - (char*)udph);
  182. }
  183. return 1;
  184. }
  185. static int
  186. udp_dnat_handler(struct sk_buff *skb,
  187. struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
  188. {
  189. struct udphdr *udph;
  190. unsigned int udphoff;
  191. #ifdef CONFIG_IP_VS_IPV6
  192. if (cp->af == AF_INET6)
  193. udphoff = sizeof(struct ipv6hdr);
  194. else
  195. #endif
  196. udphoff = ip_hdrlen(skb);
  197. /* csum_check requires unshared skb */
  198. if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
  199. return 0;
  200. if (unlikely(cp->app != NULL)) {
  201. /* Some checks before mangling */
  202. if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
  203. return 0;
  204. /*
  205. * Attempt ip_vs_app call.
  206. * It will fix ip_vs_conn
  207. */
  208. if (!ip_vs_app_pkt_in(cp, skb))
  209. return 0;
  210. }
  211. udph = (void *)skb_network_header(skb) + udphoff;
  212. udph->dest = cp->dport;
  213. /*
  214. * Adjust UDP checksums
  215. */
  216. if (!cp->app && (udph->check != 0)) {
  217. /* Only port and addr are changed, do fast csum update */
  218. udp_fast_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr,
  219. cp->vport, cp->dport);
  220. if (skb->ip_summed == CHECKSUM_COMPLETE)
  221. skb->ip_summed = CHECKSUM_NONE;
  222. } else {
  223. /* full checksum calculation */
  224. udph->check = 0;
  225. skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
  226. #ifdef CONFIG_IP_VS_IPV6
  227. if (cp->af == AF_INET6)
  228. udph->check = csum_ipv6_magic(&cp->caddr.in6,
  229. &cp->daddr.in6,
  230. skb->len - udphoff,
  231. cp->protocol, skb->csum);
  232. else
  233. #endif
  234. udph->check = csum_tcpudp_magic(cp->caddr.ip,
  235. cp->daddr.ip,
  236. skb->len - udphoff,
  237. cp->protocol,
  238. skb->csum);
  239. if (udph->check == 0)
  240. udph->check = CSUM_MANGLED_0;
  241. skb->ip_summed = CHECKSUM_UNNECESSARY;
  242. }
  243. return 1;
  244. }
  245. static int
  246. udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
  247. {
  248. struct udphdr _udph, *uh;
  249. unsigned int udphoff;
  250. #ifdef CONFIG_IP_VS_IPV6
  251. if (af == AF_INET6)
  252. udphoff = sizeof(struct ipv6hdr);
  253. else
  254. #endif
  255. udphoff = ip_hdrlen(skb);
  256. uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph);
  257. if (uh == NULL)
  258. return 0;
  259. if (uh->check != 0) {
  260. switch (skb->ip_summed) {
  261. case CHECKSUM_NONE:
  262. skb->csum = skb_checksum(skb, udphoff,
  263. skb->len - udphoff, 0);
  264. case CHECKSUM_COMPLETE:
  265. #ifdef CONFIG_IP_VS_IPV6
  266. if (af == AF_INET6) {
  267. if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
  268. &ipv6_hdr(skb)->daddr,
  269. skb->len - udphoff,
  270. ipv6_hdr(skb)->nexthdr,
  271. skb->csum)) {
  272. IP_VS_DBG_RL_PKT(0, pp, skb, 0,
  273. "Failed checksum for");
  274. return 0;
  275. }
  276. } else
  277. #endif
  278. if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
  279. ip_hdr(skb)->daddr,
  280. skb->len - udphoff,
  281. ip_hdr(skb)->protocol,
  282. skb->csum)) {
  283. IP_VS_DBG_RL_PKT(0, pp, skb, 0,
  284. "Failed checksum for");
  285. return 0;
  286. }
  287. break;
  288. default:
  289. /* No need to checksum. */
  290. break;
  291. }
  292. }
  293. return 1;
  294. }
  295. /*
  296. * Note: the caller guarantees that only one of register_app,
  297. * unregister_app or app_conn_bind is called each time.
  298. */
  299. #define UDP_APP_TAB_BITS 4
  300. #define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS)
  301. #define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1)
  302. static struct list_head udp_apps[UDP_APP_TAB_SIZE];
  303. static DEFINE_SPINLOCK(udp_app_lock);
  304. static inline __u16 udp_app_hashkey(__be16 port)
  305. {
  306. return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port)
  307. & UDP_APP_TAB_MASK;
  308. }
  309. static int udp_register_app(struct ip_vs_app *inc)
  310. {
  311. struct ip_vs_app *i;
  312. __u16 hash;
  313. __be16 port = inc->port;
  314. int ret = 0;
  315. hash = udp_app_hashkey(port);
  316. spin_lock_bh(&udp_app_lock);
  317. list_for_each_entry(i, &udp_apps[hash], p_list) {
  318. if (i->port == port) {
  319. ret = -EEXIST;
  320. goto out;
  321. }
  322. }
  323. list_add(&inc->p_list, &udp_apps[hash]);
  324. atomic_inc(&ip_vs_protocol_udp.appcnt);
  325. out:
  326. spin_unlock_bh(&udp_app_lock);
  327. return ret;
  328. }
  329. static void
  330. udp_unregister_app(struct ip_vs_app *inc)
  331. {
  332. spin_lock_bh(&udp_app_lock);
  333. atomic_dec(&ip_vs_protocol_udp.appcnt);
  334. list_del(&inc->p_list);
  335. spin_unlock_bh(&udp_app_lock);
  336. }
  337. static int udp_app_conn_bind(struct ip_vs_conn *cp)
  338. {
  339. int hash;
  340. struct ip_vs_app *inc;
  341. int result = 0;
  342. /* Default binding: bind app only for NAT */
  343. if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
  344. return 0;
  345. /* Lookup application incarnations and bind the right one */
  346. hash = udp_app_hashkey(cp->vport);
  347. spin_lock(&udp_app_lock);
  348. list_for_each_entry(inc, &udp_apps[hash], p_list) {
  349. if (inc->port == cp->vport) {
  350. if (unlikely(!ip_vs_app_inc_get(inc)))
  351. break;
  352. spin_unlock(&udp_app_lock);
  353. IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->"
  354. "%u.%u.%u.%u:%u to app %s on port %u\n",
  355. __func__,
  356. NIPQUAD(cp->caddr.ip), ntohs(cp->cport),
  357. NIPQUAD(cp->vaddr.ip), ntohs(cp->vport),
  358. inc->name, ntohs(inc->port));
  359. cp->app = inc;
  360. if (inc->init_conn)
  361. result = inc->init_conn(inc, cp);
  362. goto out;
  363. }
  364. }
  365. spin_unlock(&udp_app_lock);
  366. out:
  367. return result;
  368. }
  369. static int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
  370. [IP_VS_UDP_S_NORMAL] = 5*60*HZ,
  371. [IP_VS_UDP_S_LAST] = 2*HZ,
  372. };
  373. static char * udp_state_name_table[IP_VS_UDP_S_LAST+1] = {
  374. [IP_VS_UDP_S_NORMAL] = "UDP",
  375. [IP_VS_UDP_S_LAST] = "BUG!",
  376. };
  377. static int
  378. udp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
  379. {
  380. return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_UDP_S_LAST,
  381. udp_state_name_table, sname, to);
  382. }
  383. static const char * udp_state_name(int state)
  384. {
  385. if (state >= IP_VS_UDP_S_LAST)
  386. return "ERR!";
  387. return udp_state_name_table[state] ? udp_state_name_table[state] : "?";
  388. }
  389. static int
  390. udp_state_transition(struct ip_vs_conn *cp, int direction,
  391. const struct sk_buff *skb,
  392. struct ip_vs_protocol *pp)
  393. {
  394. cp->timeout = pp->timeout_table[IP_VS_UDP_S_NORMAL];
  395. return 1;
  396. }
  397. static void udp_init(struct ip_vs_protocol *pp)
  398. {
  399. IP_VS_INIT_HASH_TABLE(udp_apps);
  400. pp->timeout_table = udp_timeouts;
  401. }
  402. static void udp_exit(struct ip_vs_protocol *pp)
  403. {
  404. }
  405. struct ip_vs_protocol ip_vs_protocol_udp = {
  406. .name = "UDP",
  407. .protocol = IPPROTO_UDP,
  408. .num_states = IP_VS_UDP_S_LAST,
  409. .dont_defrag = 0,
  410. .init = udp_init,
  411. .exit = udp_exit,
  412. .conn_schedule = udp_conn_schedule,
  413. .conn_in_get = udp_conn_in_get,
  414. .conn_out_get = udp_conn_out_get,
  415. .snat_handler = udp_snat_handler,
  416. .dnat_handler = udp_dnat_handler,
  417. .csum_check = udp_csum_check,
  418. .state_transition = udp_state_transition,
  419. .state_name = udp_state_name,
  420. .register_app = udp_register_app,
  421. .unregister_app = udp_unregister_app,
  422. .app_conn_bind = udp_app_conn_bind,
  423. .debug_packet = ip_vs_tcpudp_debug_packet,
  424. .timeout_change = NULL,
  425. .set_state_timeout = udp_set_state_timeout,
  426. };