ip_vs_proto_udp.c 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428
  1. /*
  2. * ip_vs_proto_udp.c: UDP load balancing support for IPVS
  3. *
  4. * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
  5. * Julian Anastasov <ja@ssi.bg>
  6. *
  7. * This program is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU General Public License
  9. * as published by the Free Software Foundation; either version
  10. * 2 of the License, or (at your option) any later version.
  11. *
  12. * Changes:
  13. *
  14. */
  15. #include <linux/in.h>
  16. #include <linux/ip.h>
  17. #include <linux/kernel.h>
  18. #include <linux/netfilter.h>
  19. #include <linux/netfilter_ipv4.h>
  20. #include <linux/udp.h>
  21. #include <net/ip_vs.h>
  22. #include <net/ip.h>
  23. static struct ip_vs_conn *
  24. udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
  25. const struct iphdr *iph, unsigned int proto_off, int inverse)
  26. {
  27. struct ip_vs_conn *cp;
  28. __be16 _ports[2], *pptr;
  29. pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
  30. if (pptr == NULL)
  31. return NULL;
  32. if (likely(!inverse)) {
  33. cp = ip_vs_conn_in_get(iph->protocol,
  34. iph->saddr, pptr[0],
  35. iph->daddr, pptr[1]);
  36. } else {
  37. cp = ip_vs_conn_in_get(iph->protocol,
  38. iph->daddr, pptr[1],
  39. iph->saddr, pptr[0]);
  40. }
  41. return cp;
  42. }
  43. static struct ip_vs_conn *
  44. udp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
  45. const struct iphdr *iph, unsigned int proto_off, int inverse)
  46. {
  47. struct ip_vs_conn *cp;
  48. __be16 _ports[2], *pptr;
  49. pptr = skb_header_pointer(skb, ip_hdrlen(skb),
  50. sizeof(_ports), _ports);
  51. if (pptr == NULL)
  52. return NULL;
  53. if (likely(!inverse)) {
  54. cp = ip_vs_conn_out_get(iph->protocol,
  55. iph->saddr, pptr[0],
  56. iph->daddr, pptr[1]);
  57. } else {
  58. cp = ip_vs_conn_out_get(iph->protocol,
  59. iph->daddr, pptr[1],
  60. iph->saddr, pptr[0]);
  61. }
  62. return cp;
  63. }
  64. static int
  65. udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
  66. int *verdict, struct ip_vs_conn **cpp)
  67. {
  68. struct ip_vs_service *svc;
  69. struct udphdr _udph, *uh;
  70. uh = skb_header_pointer(skb, ip_hdrlen(skb),
  71. sizeof(_udph), &_udph);
  72. if (uh == NULL) {
  73. *verdict = NF_DROP;
  74. return 0;
  75. }
  76. if ((svc = ip_vs_service_get(skb->mark, ip_hdr(skb)->protocol,
  77. ip_hdr(skb)->daddr, uh->dest))) {
  78. if (ip_vs_todrop()) {
  79. /*
  80. * It seems that we are very loaded.
  81. * We have to drop this packet :(
  82. */
  83. ip_vs_service_put(svc);
  84. *verdict = NF_DROP;
  85. return 0;
  86. }
  87. /*
  88. * Let the virtual server select a real server for the
  89. * incoming connection, and create a connection entry.
  90. */
  91. *cpp = ip_vs_schedule(svc, skb);
  92. if (!*cpp) {
  93. *verdict = ip_vs_leave(svc, skb, pp);
  94. return 0;
  95. }
  96. ip_vs_service_put(svc);
  97. }
  98. return 1;
  99. }
  100. static inline void
  101. udp_fast_csum_update(struct udphdr *uhdr, __be32 oldip, __be32 newip,
  102. __be16 oldport, __be16 newport)
  103. {
  104. uhdr->check =
  105. csum_fold(ip_vs_check_diff4(oldip, newip,
  106. ip_vs_check_diff2(oldport, newport,
  107. ~csum_unfold(uhdr->check))));
  108. if (!uhdr->check)
  109. uhdr->check = CSUM_MANGLED_0;
  110. }
  111. static int
  112. udp_snat_handler(struct sk_buff *skb,
  113. struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
  114. {
  115. struct udphdr *udph;
  116. const unsigned int udphoff = ip_hdrlen(skb);
  117. /* csum_check requires unshared skb */
  118. if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
  119. return 0;
  120. if (unlikely(cp->app != NULL)) {
  121. /* Some checks before mangling */
  122. if (pp->csum_check && !pp->csum_check(skb, pp))
  123. return 0;
  124. /*
  125. * Call application helper if needed
  126. */
  127. if (!ip_vs_app_pkt_out(cp, skb))
  128. return 0;
  129. }
  130. udph = (void *)ip_hdr(skb) + udphoff;
  131. udph->source = cp->vport;
  132. /*
  133. * Adjust UDP checksums
  134. */
  135. if (!cp->app && (udph->check != 0)) {
  136. /* Only port and addr are changed, do fast csum update */
  137. udp_fast_csum_update(udph, cp->daddr, cp->vaddr,
  138. cp->dport, cp->vport);
  139. if (skb->ip_summed == CHECKSUM_COMPLETE)
  140. skb->ip_summed = CHECKSUM_NONE;
  141. } else {
  142. /* full checksum calculation */
  143. udph->check = 0;
  144. skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
  145. udph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr,
  146. skb->len - udphoff,
  147. cp->protocol, skb->csum);
  148. if (udph->check == 0)
  149. udph->check = CSUM_MANGLED_0;
  150. IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
  151. pp->name, udph->check,
  152. (char*)&(udph->check) - (char*)udph);
  153. }
  154. return 1;
  155. }
  156. static int
  157. udp_dnat_handler(struct sk_buff *skb,
  158. struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
  159. {
  160. struct udphdr *udph;
  161. unsigned int udphoff = ip_hdrlen(skb);
  162. /* csum_check requires unshared skb */
  163. if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
  164. return 0;
  165. if (unlikely(cp->app != NULL)) {
  166. /* Some checks before mangling */
  167. if (pp->csum_check && !pp->csum_check(skb, pp))
  168. return 0;
  169. /*
  170. * Attempt ip_vs_app call.
  171. * It will fix ip_vs_conn
  172. */
  173. if (!ip_vs_app_pkt_in(cp, skb))
  174. return 0;
  175. }
  176. udph = (void *)ip_hdr(skb) + udphoff;
  177. udph->dest = cp->dport;
  178. /*
  179. * Adjust UDP checksums
  180. */
  181. if (!cp->app && (udph->check != 0)) {
  182. /* Only port and addr are changed, do fast csum update */
  183. udp_fast_csum_update(udph, cp->vaddr, cp->daddr,
  184. cp->vport, cp->dport);
  185. if (skb->ip_summed == CHECKSUM_COMPLETE)
  186. skb->ip_summed = CHECKSUM_NONE;
  187. } else {
  188. /* full checksum calculation */
  189. udph->check = 0;
  190. skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
  191. udph->check = csum_tcpudp_magic(cp->caddr, cp->daddr,
  192. skb->len - udphoff,
  193. cp->protocol, skb->csum);
  194. if (udph->check == 0)
  195. udph->check = CSUM_MANGLED_0;
  196. skb->ip_summed = CHECKSUM_UNNECESSARY;
  197. }
  198. return 1;
  199. }
  200. static int
  201. udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
  202. {
  203. struct udphdr _udph, *uh;
  204. const unsigned int udphoff = ip_hdrlen(skb);
  205. uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph);
  206. if (uh == NULL)
  207. return 0;
  208. if (uh->check != 0) {
  209. switch (skb->ip_summed) {
  210. case CHECKSUM_NONE:
  211. skb->csum = skb_checksum(skb, udphoff,
  212. skb->len - udphoff, 0);
  213. case CHECKSUM_COMPLETE:
  214. if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
  215. ip_hdr(skb)->daddr,
  216. skb->len - udphoff,
  217. ip_hdr(skb)->protocol,
  218. skb->csum)) {
  219. IP_VS_DBG_RL_PKT(0, pp, skb, 0,
  220. "Failed checksum for");
  221. return 0;
  222. }
  223. break;
  224. default:
  225. /* No need to checksum. */
  226. break;
  227. }
  228. }
  229. return 1;
  230. }
  231. /*
  232. * Note: the caller guarantees that only one of register_app,
  233. * unregister_app or app_conn_bind is called each time.
  234. */
  235. #define UDP_APP_TAB_BITS 4
  236. #define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS)
  237. #define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1)
  238. static struct list_head udp_apps[UDP_APP_TAB_SIZE];
  239. static DEFINE_SPINLOCK(udp_app_lock);
  240. static inline __u16 udp_app_hashkey(__be16 port)
  241. {
  242. return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port)
  243. & UDP_APP_TAB_MASK;
  244. }
  245. static int udp_register_app(struct ip_vs_app *inc)
  246. {
  247. struct ip_vs_app *i;
  248. __u16 hash;
  249. __be16 port = inc->port;
  250. int ret = 0;
  251. hash = udp_app_hashkey(port);
  252. spin_lock_bh(&udp_app_lock);
  253. list_for_each_entry(i, &udp_apps[hash], p_list) {
  254. if (i->port == port) {
  255. ret = -EEXIST;
  256. goto out;
  257. }
  258. }
  259. list_add(&inc->p_list, &udp_apps[hash]);
  260. atomic_inc(&ip_vs_protocol_udp.appcnt);
  261. out:
  262. spin_unlock_bh(&udp_app_lock);
  263. return ret;
  264. }
  265. static void
  266. udp_unregister_app(struct ip_vs_app *inc)
  267. {
  268. spin_lock_bh(&udp_app_lock);
  269. atomic_dec(&ip_vs_protocol_udp.appcnt);
  270. list_del(&inc->p_list);
  271. spin_unlock_bh(&udp_app_lock);
  272. }
  273. static int udp_app_conn_bind(struct ip_vs_conn *cp)
  274. {
  275. int hash;
  276. struct ip_vs_app *inc;
  277. int result = 0;
  278. /* Default binding: bind app only for NAT */
  279. if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
  280. return 0;
  281. /* Lookup application incarnations and bind the right one */
  282. hash = udp_app_hashkey(cp->vport);
  283. spin_lock(&udp_app_lock);
  284. list_for_each_entry(inc, &udp_apps[hash], p_list) {
  285. if (inc->port == cp->vport) {
  286. if (unlikely(!ip_vs_app_inc_get(inc)))
  287. break;
  288. spin_unlock(&udp_app_lock);
  289. IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->"
  290. "%u.%u.%u.%u:%u to app %s on port %u\n",
  291. __func__,
  292. NIPQUAD(cp->caddr), ntohs(cp->cport),
  293. NIPQUAD(cp->vaddr), ntohs(cp->vport),
  294. inc->name, ntohs(inc->port));
  295. cp->app = inc;
  296. if (inc->init_conn)
  297. result = inc->init_conn(inc, cp);
  298. goto out;
  299. }
  300. }
  301. spin_unlock(&udp_app_lock);
  302. out:
  303. return result;
  304. }
  305. static int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
  306. [IP_VS_UDP_S_NORMAL] = 5*60*HZ,
  307. [IP_VS_UDP_S_LAST] = 2*HZ,
  308. };
  309. static char * udp_state_name_table[IP_VS_UDP_S_LAST+1] = {
  310. [IP_VS_UDP_S_NORMAL] = "UDP",
  311. [IP_VS_UDP_S_LAST] = "BUG!",
  312. };
  313. static int
  314. udp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
  315. {
  316. return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_UDP_S_LAST,
  317. udp_state_name_table, sname, to);
  318. }
  319. static const char * udp_state_name(int state)
  320. {
  321. if (state >= IP_VS_UDP_S_LAST)
  322. return "ERR!";
  323. return udp_state_name_table[state] ? udp_state_name_table[state] : "?";
  324. }
  325. static int
  326. udp_state_transition(struct ip_vs_conn *cp, int direction,
  327. const struct sk_buff *skb,
  328. struct ip_vs_protocol *pp)
  329. {
  330. cp->timeout = pp->timeout_table[IP_VS_UDP_S_NORMAL];
  331. return 1;
  332. }
  333. static void udp_init(struct ip_vs_protocol *pp)
  334. {
  335. IP_VS_INIT_HASH_TABLE(udp_apps);
  336. pp->timeout_table = udp_timeouts;
  337. }
  338. static void udp_exit(struct ip_vs_protocol *pp)
  339. {
  340. }
  341. struct ip_vs_protocol ip_vs_protocol_udp = {
  342. .name = "UDP",
  343. .protocol = IPPROTO_UDP,
  344. .num_states = IP_VS_UDP_S_LAST,
  345. .dont_defrag = 0,
  346. .init = udp_init,
  347. .exit = udp_exit,
  348. .conn_schedule = udp_conn_schedule,
  349. .conn_in_get = udp_conn_in_get,
  350. .conn_out_get = udp_conn_out_get,
  351. .snat_handler = udp_snat_handler,
  352. .dnat_handler = udp_dnat_handler,
  353. .csum_check = udp_csum_check,
  354. .state_transition = udp_state_transition,
  355. .state_name = udp_state_name,
  356. .register_app = udp_register_app,
  357. .unregister_app = udp_unregister_app,
  358. .app_conn_bind = udp_app_conn_bind,
  359. .debug_packet = ip_vs_tcpudp_debug_packet,
  360. .timeout_change = NULL,
  361. .set_state_timeout = udp_set_state_timeout,
  362. };