ip_vs_proto_tcp.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615
  1. /*
  2. * ip_vs_proto_tcp.c: TCP load balancing support for IPVS
  3. *
  4. * Version: $Id: ip_vs_proto_tcp.c,v 1.3 2002/11/30 01:50:35 wensong Exp $
  5. *
  6. * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
  7. * Julian Anastasov <ja@ssi.bg>
  8. *
  9. * This program is free software; you can redistribute it and/or
  10. * modify it under the terms of the GNU General Public License
  11. * as published by the Free Software Foundation; either version
  12. * 2 of the License, or (at your option) any later version.
  13. *
  14. * Changes:
  15. *
  16. */
  17. #include <linux/kernel.h>
  18. #include <linux/ip.h>
  19. #include <linux/tcp.h> /* for tcphdr */
  20. #include <net/ip.h>
  21. #include <net/tcp.h> /* for csum_tcpudp_magic */
  22. #include <linux/netfilter.h>
  23. #include <linux/netfilter_ipv4.h>
  24. #include <net/ip_vs.h>
  25. static struct ip_vs_conn *
  26. tcp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
  27. const struct iphdr *iph, unsigned int proto_off, int inverse)
  28. {
  29. __be16 _ports[2], *pptr;
  30. pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
  31. if (pptr == NULL)
  32. return NULL;
  33. if (likely(!inverse)) {
  34. return ip_vs_conn_in_get(iph->protocol,
  35. iph->saddr, pptr[0],
  36. iph->daddr, pptr[1]);
  37. } else {
  38. return ip_vs_conn_in_get(iph->protocol,
  39. iph->daddr, pptr[1],
  40. iph->saddr, pptr[0]);
  41. }
  42. }
  43. static struct ip_vs_conn *
  44. tcp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
  45. const struct iphdr *iph, unsigned int proto_off, int inverse)
  46. {
  47. __be16 _ports[2], *pptr;
  48. pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
  49. if (pptr == NULL)
  50. return NULL;
  51. if (likely(!inverse)) {
  52. return ip_vs_conn_out_get(iph->protocol,
  53. iph->saddr, pptr[0],
  54. iph->daddr, pptr[1]);
  55. } else {
  56. return ip_vs_conn_out_get(iph->protocol,
  57. iph->daddr, pptr[1],
  58. iph->saddr, pptr[0]);
  59. }
  60. }
  61. static int
  62. tcp_conn_schedule(struct sk_buff *skb,
  63. struct ip_vs_protocol *pp,
  64. int *verdict, struct ip_vs_conn **cpp)
  65. {
  66. struct ip_vs_service *svc;
  67. struct tcphdr _tcph, *th;
  68. th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
  69. if (th == NULL) {
  70. *verdict = NF_DROP;
  71. return 0;
  72. }
  73. if (th->syn &&
  74. (svc = ip_vs_service_get(skb->mark, ip_hdr(skb)->protocol,
  75. ip_hdr(skb)->daddr, th->dest))) {
  76. if (ip_vs_todrop()) {
  77. /*
  78. * It seems that we are very loaded.
  79. * We have to drop this packet :(
  80. */
  81. ip_vs_service_put(svc);
  82. *verdict = NF_DROP;
  83. return 0;
  84. }
  85. /*
  86. * Let the virtual server select a real server for the
  87. * incoming connection, and create a connection entry.
  88. */
  89. *cpp = ip_vs_schedule(svc, skb);
  90. if (!*cpp) {
  91. *verdict = ip_vs_leave(svc, skb, pp);
  92. return 0;
  93. }
  94. ip_vs_service_put(svc);
  95. }
  96. return 1;
  97. }
  98. static inline void
  99. tcp_fast_csum_update(struct tcphdr *tcph, __be32 oldip, __be32 newip,
  100. __be16 oldport, __be16 newport)
  101. {
  102. tcph->check =
  103. csum_fold(ip_vs_check_diff4(oldip, newip,
  104. ip_vs_check_diff2(oldport, newport,
  105. ~csum_unfold(tcph->check))));
  106. }
  107. static int
  108. tcp_snat_handler(struct sk_buff *skb,
  109. struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
  110. {
  111. struct tcphdr *tcph;
  112. const unsigned int tcphoff = ip_hdrlen(skb);
  113. /* csum_check requires unshared skb */
  114. if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
  115. return 0;
  116. if (unlikely(cp->app != NULL)) {
  117. /* Some checks before mangling */
  118. if (pp->csum_check && !pp->csum_check(skb, pp))
  119. return 0;
  120. /* Call application helper if needed */
  121. if (!ip_vs_app_pkt_out(cp, skb))
  122. return 0;
  123. }
  124. tcph = (void *)ip_hdr(skb) + tcphoff;
  125. tcph->source = cp->vport;
  126. /* Adjust TCP checksums */
  127. if (!cp->app) {
  128. /* Only port and addr are changed, do fast csum update */
  129. tcp_fast_csum_update(tcph, cp->daddr, cp->vaddr,
  130. cp->dport, cp->vport);
  131. if (skb->ip_summed == CHECKSUM_COMPLETE)
  132. skb->ip_summed = CHECKSUM_NONE;
  133. } else {
  134. /* full checksum calculation */
  135. tcph->check = 0;
  136. skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
  137. tcph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr,
  138. skb->len - tcphoff,
  139. cp->protocol, skb->csum);
  140. IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
  141. pp->name, tcph->check,
  142. (char*)&(tcph->check) - (char*)tcph);
  143. }
  144. return 1;
  145. }
  146. static int
  147. tcp_dnat_handler(struct sk_buff *skb,
  148. struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
  149. {
  150. struct tcphdr *tcph;
  151. const unsigned int tcphoff = ip_hdrlen(skb);
  152. /* csum_check requires unshared skb */
  153. if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
  154. return 0;
  155. if (unlikely(cp->app != NULL)) {
  156. /* Some checks before mangling */
  157. if (pp->csum_check && !pp->csum_check(skb, pp))
  158. return 0;
  159. /*
  160. * Attempt ip_vs_app call.
  161. * It will fix ip_vs_conn and iph ack_seq stuff
  162. */
  163. if (!ip_vs_app_pkt_in(cp, skb))
  164. return 0;
  165. }
  166. tcph = (void *)ip_hdr(skb) + tcphoff;
  167. tcph->dest = cp->dport;
  168. /*
  169. * Adjust TCP checksums
  170. */
  171. if (!cp->app) {
  172. /* Only port and addr are changed, do fast csum update */
  173. tcp_fast_csum_update(tcph, cp->vaddr, cp->daddr,
  174. cp->vport, cp->dport);
  175. if (skb->ip_summed == CHECKSUM_COMPLETE)
  176. skb->ip_summed = CHECKSUM_NONE;
  177. } else {
  178. /* full checksum calculation */
  179. tcph->check = 0;
  180. skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
  181. tcph->check = csum_tcpudp_magic(cp->caddr, cp->daddr,
  182. skb->len - tcphoff,
  183. cp->protocol, skb->csum);
  184. skb->ip_summed = CHECKSUM_UNNECESSARY;
  185. }
  186. return 1;
  187. }
  188. static int
  189. tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
  190. {
  191. const unsigned int tcphoff = ip_hdrlen(skb);
  192. switch (skb->ip_summed) {
  193. case CHECKSUM_NONE:
  194. skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
  195. case CHECKSUM_COMPLETE:
  196. if (csum_tcpudp_magic(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
  197. skb->len - tcphoff,
  198. ip_hdr(skb)->protocol, skb->csum)) {
  199. IP_VS_DBG_RL_PKT(0, pp, skb, 0,
  200. "Failed checksum for");
  201. return 0;
  202. }
  203. break;
  204. default:
  205. /* No need to checksum. */
  206. break;
  207. }
  208. return 1;
  209. }
  210. #define TCP_DIR_INPUT 0
  211. #define TCP_DIR_OUTPUT 4
  212. #define TCP_DIR_INPUT_ONLY 8
  213. static const int tcp_state_off[IP_VS_DIR_LAST] = {
  214. [IP_VS_DIR_INPUT] = TCP_DIR_INPUT,
  215. [IP_VS_DIR_OUTPUT] = TCP_DIR_OUTPUT,
  216. [IP_VS_DIR_INPUT_ONLY] = TCP_DIR_INPUT_ONLY,
  217. };
  218. /*
  219. * Timeout table[state]
  220. */
  221. static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
  222. [IP_VS_TCP_S_NONE] = 2*HZ,
  223. [IP_VS_TCP_S_ESTABLISHED] = 15*60*HZ,
  224. [IP_VS_TCP_S_SYN_SENT] = 2*60*HZ,
  225. [IP_VS_TCP_S_SYN_RECV] = 1*60*HZ,
  226. [IP_VS_TCP_S_FIN_WAIT] = 2*60*HZ,
  227. [IP_VS_TCP_S_TIME_WAIT] = 2*60*HZ,
  228. [IP_VS_TCP_S_CLOSE] = 10*HZ,
  229. [IP_VS_TCP_S_CLOSE_WAIT] = 60*HZ,
  230. [IP_VS_TCP_S_LAST_ACK] = 30*HZ,
  231. [IP_VS_TCP_S_LISTEN] = 2*60*HZ,
  232. [IP_VS_TCP_S_SYNACK] = 120*HZ,
  233. [IP_VS_TCP_S_LAST] = 2*HZ,
  234. };
  235. static char * tcp_state_name_table[IP_VS_TCP_S_LAST+1] = {
  236. [IP_VS_TCP_S_NONE] = "NONE",
  237. [IP_VS_TCP_S_ESTABLISHED] = "ESTABLISHED",
  238. [IP_VS_TCP_S_SYN_SENT] = "SYN_SENT",
  239. [IP_VS_TCP_S_SYN_RECV] = "SYN_RECV",
  240. [IP_VS_TCP_S_FIN_WAIT] = "FIN_WAIT",
  241. [IP_VS_TCP_S_TIME_WAIT] = "TIME_WAIT",
  242. [IP_VS_TCP_S_CLOSE] = "CLOSE",
  243. [IP_VS_TCP_S_CLOSE_WAIT] = "CLOSE_WAIT",
  244. [IP_VS_TCP_S_LAST_ACK] = "LAST_ACK",
  245. [IP_VS_TCP_S_LISTEN] = "LISTEN",
  246. [IP_VS_TCP_S_SYNACK] = "SYNACK",
  247. [IP_VS_TCP_S_LAST] = "BUG!",
  248. };
  249. #define sNO IP_VS_TCP_S_NONE
  250. #define sES IP_VS_TCP_S_ESTABLISHED
  251. #define sSS IP_VS_TCP_S_SYN_SENT
  252. #define sSR IP_VS_TCP_S_SYN_RECV
  253. #define sFW IP_VS_TCP_S_FIN_WAIT
  254. #define sTW IP_VS_TCP_S_TIME_WAIT
  255. #define sCL IP_VS_TCP_S_CLOSE
  256. #define sCW IP_VS_TCP_S_CLOSE_WAIT
  257. #define sLA IP_VS_TCP_S_LAST_ACK
  258. #define sLI IP_VS_TCP_S_LISTEN
  259. #define sSA IP_VS_TCP_S_SYNACK
  260. struct tcp_states_t {
  261. int next_state[IP_VS_TCP_S_LAST];
  262. };
  263. static const char * tcp_state_name(int state)
  264. {
  265. if (state >= IP_VS_TCP_S_LAST)
  266. return "ERR!";
  267. return tcp_state_name_table[state] ? tcp_state_name_table[state] : "?";
  268. }
  269. static struct tcp_states_t tcp_states [] = {
  270. /* INPUT */
  271. /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
  272. /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
  273. /*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sTW }},
  274. /*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
  275. /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sSR }},
  276. /* OUTPUT */
  277. /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
  278. /*syn*/ {{sSS, sES, sSS, sSR, sSS, sSS, sSS, sSS, sSS, sLI, sSR }},
  279. /*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},
  280. /*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},
  281. /*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},
  282. /* INPUT-ONLY */
  283. /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
  284. /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
  285. /*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
  286. /*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
  287. /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
  288. };
  289. static struct tcp_states_t tcp_states_dos [] = {
  290. /* INPUT */
  291. /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
  292. /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSA }},
  293. /*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sSA }},
  294. /*ack*/ {{sCL, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI, sSA }},
  295. /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
  296. /* OUTPUT */
  297. /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
  298. /*syn*/ {{sSS, sES, sSS, sSA, sSS, sSS, sSS, sSS, sSS, sLI, sSA }},
  299. /*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},
  300. /*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},
  301. /*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},
  302. /* INPUT-ONLY */
  303. /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
  304. /*syn*/ {{sSA, sES, sES, sSR, sSA, sSA, sSA, sSA, sSA, sSA, sSA }},
  305. /*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
  306. /*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
  307. /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
  308. };
  309. static struct tcp_states_t *tcp_state_table = tcp_states;
  310. static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags)
  311. {
  312. int on = (flags & 1); /* secure_tcp */
  313. /*
  314. ** FIXME: change secure_tcp to independent sysctl var
  315. ** or make it per-service or per-app because it is valid
  316. ** for most if not for all of the applications. Something
  317. ** like "capabilities" (flags) for each object.
  318. */
  319. tcp_state_table = (on? tcp_states_dos : tcp_states);
  320. }
  321. static int
  322. tcp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
  323. {
  324. return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_TCP_S_LAST,
  325. tcp_state_name_table, sname, to);
  326. }
  327. static inline int tcp_state_idx(struct tcphdr *th)
  328. {
  329. if (th->rst)
  330. return 3;
  331. if (th->syn)
  332. return 0;
  333. if (th->fin)
  334. return 1;
  335. if (th->ack)
  336. return 2;
  337. return -1;
  338. }
  339. static inline void
  340. set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
  341. int direction, struct tcphdr *th)
  342. {
  343. int state_idx;
  344. int new_state = IP_VS_TCP_S_CLOSE;
  345. int state_off = tcp_state_off[direction];
  346. /*
  347. * Update state offset to INPUT_ONLY if necessary
  348. * or delete NO_OUTPUT flag if output packet detected
  349. */
  350. if (cp->flags & IP_VS_CONN_F_NOOUTPUT) {
  351. if (state_off == TCP_DIR_OUTPUT)
  352. cp->flags &= ~IP_VS_CONN_F_NOOUTPUT;
  353. else
  354. state_off = TCP_DIR_INPUT_ONLY;
  355. }
  356. if ((state_idx = tcp_state_idx(th)) < 0) {
  357. IP_VS_DBG(8, "tcp_state_idx=%d!!!\n", state_idx);
  358. goto tcp_state_out;
  359. }
  360. new_state = tcp_state_table[state_off+state_idx].next_state[cp->state];
  361. tcp_state_out:
  362. if (new_state != cp->state) {
  363. struct ip_vs_dest *dest = cp->dest;
  364. IP_VS_DBG(8, "%s %s [%c%c%c%c] %u.%u.%u.%u:%d->"
  365. "%u.%u.%u.%u:%d state: %s->%s conn->refcnt:%d\n",
  366. pp->name,
  367. (state_off==TCP_DIR_OUTPUT)?"output ":"input ",
  368. th->syn? 'S' : '.',
  369. th->fin? 'F' : '.',
  370. th->ack? 'A' : '.',
  371. th->rst? 'R' : '.',
  372. NIPQUAD(cp->daddr), ntohs(cp->dport),
  373. NIPQUAD(cp->caddr), ntohs(cp->cport),
  374. tcp_state_name(cp->state),
  375. tcp_state_name(new_state),
  376. atomic_read(&cp->refcnt));
  377. if (dest) {
  378. if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
  379. (new_state != IP_VS_TCP_S_ESTABLISHED)) {
  380. atomic_dec(&dest->activeconns);
  381. atomic_inc(&dest->inactconns);
  382. cp->flags |= IP_VS_CONN_F_INACTIVE;
  383. } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
  384. (new_state == IP_VS_TCP_S_ESTABLISHED)) {
  385. atomic_inc(&dest->activeconns);
  386. atomic_dec(&dest->inactconns);
  387. cp->flags &= ~IP_VS_CONN_F_INACTIVE;
  388. }
  389. }
  390. }
  391. cp->timeout = pp->timeout_table[cp->state = new_state];
  392. }
  393. /*
  394. * Handle state transitions
  395. */
  396. static int
  397. tcp_state_transition(struct ip_vs_conn *cp, int direction,
  398. const struct sk_buff *skb,
  399. struct ip_vs_protocol *pp)
  400. {
  401. struct tcphdr _tcph, *th;
  402. th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
  403. if (th == NULL)
  404. return 0;
  405. spin_lock(&cp->lock);
  406. set_tcp_state(pp, cp, direction, th);
  407. spin_unlock(&cp->lock);
  408. return 1;
  409. }
  410. /*
  411. * Hash table for TCP application incarnations
  412. */
  413. #define TCP_APP_TAB_BITS 4
  414. #define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS)
  415. #define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1)
  416. static struct list_head tcp_apps[TCP_APP_TAB_SIZE];
  417. static DEFINE_SPINLOCK(tcp_app_lock);
  418. static inline __u16 tcp_app_hashkey(__be16 port)
  419. {
  420. return (((__force u16)port >> TCP_APP_TAB_BITS) ^ (__force u16)port)
  421. & TCP_APP_TAB_MASK;
  422. }
  423. static int tcp_register_app(struct ip_vs_app *inc)
  424. {
  425. struct ip_vs_app *i;
  426. __u16 hash;
  427. __be16 port = inc->port;
  428. int ret = 0;
  429. hash = tcp_app_hashkey(port);
  430. spin_lock_bh(&tcp_app_lock);
  431. list_for_each_entry(i, &tcp_apps[hash], p_list) {
  432. if (i->port == port) {
  433. ret = -EEXIST;
  434. goto out;
  435. }
  436. }
  437. list_add(&inc->p_list, &tcp_apps[hash]);
  438. atomic_inc(&ip_vs_protocol_tcp.appcnt);
  439. out:
  440. spin_unlock_bh(&tcp_app_lock);
  441. return ret;
  442. }
  443. static void
  444. tcp_unregister_app(struct ip_vs_app *inc)
  445. {
  446. spin_lock_bh(&tcp_app_lock);
  447. atomic_dec(&ip_vs_protocol_tcp.appcnt);
  448. list_del(&inc->p_list);
  449. spin_unlock_bh(&tcp_app_lock);
  450. }
  451. static int
  452. tcp_app_conn_bind(struct ip_vs_conn *cp)
  453. {
  454. int hash;
  455. struct ip_vs_app *inc;
  456. int result = 0;
  457. /* Default binding: bind app only for NAT */
  458. if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
  459. return 0;
  460. /* Lookup application incarnations and bind the right one */
  461. hash = tcp_app_hashkey(cp->vport);
  462. spin_lock(&tcp_app_lock);
  463. list_for_each_entry(inc, &tcp_apps[hash], p_list) {
  464. if (inc->port == cp->vport) {
  465. if (unlikely(!ip_vs_app_inc_get(inc)))
  466. break;
  467. spin_unlock(&tcp_app_lock);
  468. IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->"
  469. "%u.%u.%u.%u:%u to app %s on port %u\n",
  470. __func__,
  471. NIPQUAD(cp->caddr), ntohs(cp->cport),
  472. NIPQUAD(cp->vaddr), ntohs(cp->vport),
  473. inc->name, ntohs(inc->port));
  474. cp->app = inc;
  475. if (inc->init_conn)
  476. result = inc->init_conn(inc, cp);
  477. goto out;
  478. }
  479. }
  480. spin_unlock(&tcp_app_lock);
  481. out:
  482. return result;
  483. }
  484. /*
  485. * Set LISTEN timeout. (ip_vs_conn_put will setup timer)
  486. */
  487. void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp)
  488. {
  489. spin_lock(&cp->lock);
  490. cp->state = IP_VS_TCP_S_LISTEN;
  491. cp->timeout = ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_LISTEN];
  492. spin_unlock(&cp->lock);
  493. }
  494. static void ip_vs_tcp_init(struct ip_vs_protocol *pp)
  495. {
  496. IP_VS_INIT_HASH_TABLE(tcp_apps);
  497. pp->timeout_table = tcp_timeouts;
  498. }
  499. static void ip_vs_tcp_exit(struct ip_vs_protocol *pp)
  500. {
  501. }
  502. struct ip_vs_protocol ip_vs_protocol_tcp = {
  503. .name = "TCP",
  504. .protocol = IPPROTO_TCP,
  505. .dont_defrag = 0,
  506. .appcnt = ATOMIC_INIT(0),
  507. .init = ip_vs_tcp_init,
  508. .exit = ip_vs_tcp_exit,
  509. .register_app = tcp_register_app,
  510. .unregister_app = tcp_unregister_app,
  511. .conn_schedule = tcp_conn_schedule,
  512. .conn_in_get = tcp_conn_in_get,
  513. .conn_out_get = tcp_conn_out_get,
  514. .snat_handler = tcp_snat_handler,
  515. .dnat_handler = tcp_dnat_handler,
  516. .csum_check = tcp_csum_check,
  517. .state_name = tcp_state_name,
  518. .state_transition = tcp_state_transition,
  519. .app_conn_bind = tcp_app_conn_bind,
  520. .debug_packet = ip_vs_tcpudp_debug_packet,
  521. .timeout_change = tcp_timeout_change,
  522. .set_state_timeout = tcp_set_state_timeout,
  523. };