ip_vs_proto_tcp.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614
  1. /*
  2. * ip_vs_proto_tcp.c: TCP load balancing support for IPVS
  3. *
  4. * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
  5. * Julian Anastasov <ja@ssi.bg>
  6. *
  7. * This program is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU General Public License
  9. * as published by the Free Software Foundation; either version
  10. * 2 of the License, or (at your option) any later version.
  11. *
  12. * Changes:
  13. *
  14. */
  15. #include <linux/kernel.h>
  16. #include <linux/ip.h>
  17. #include <linux/tcp.h> /* for tcphdr */
  18. #include <net/ip.h>
  19. #include <net/tcp.h> /* for csum_tcpudp_magic */
  20. #include <linux/netfilter.h>
  21. #include <linux/netfilter_ipv4.h>
  22. #include <net/ip_vs.h>
  23. static struct ip_vs_conn *
  24. tcp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
  25. const struct iphdr *iph, unsigned int proto_off, int inverse)
  26. {
  27. __be16 _ports[2], *pptr;
  28. pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
  29. if (pptr == NULL)
  30. return NULL;
  31. if (likely(!inverse)) {
  32. return ip_vs_conn_in_get(iph->protocol,
  33. iph->saddr, pptr[0],
  34. iph->daddr, pptr[1]);
  35. } else {
  36. return ip_vs_conn_in_get(iph->protocol,
  37. iph->daddr, pptr[1],
  38. iph->saddr, pptr[0]);
  39. }
  40. }
  41. static struct ip_vs_conn *
  42. tcp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
  43. const struct iphdr *iph, unsigned int proto_off, int inverse)
  44. {
  45. __be16 _ports[2], *pptr;
  46. pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
  47. if (pptr == NULL)
  48. return NULL;
  49. if (likely(!inverse)) {
  50. return ip_vs_conn_out_get(iph->protocol,
  51. iph->saddr, pptr[0],
  52. iph->daddr, pptr[1]);
  53. } else {
  54. return ip_vs_conn_out_get(iph->protocol,
  55. iph->daddr, pptr[1],
  56. iph->saddr, pptr[0]);
  57. }
  58. }
  59. static int
  60. tcp_conn_schedule(struct sk_buff *skb,
  61. struct ip_vs_protocol *pp,
  62. int *verdict, struct ip_vs_conn **cpp)
  63. {
  64. struct ip_vs_service *svc;
  65. struct tcphdr _tcph, *th;
  66. th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
  67. if (th == NULL) {
  68. *verdict = NF_DROP;
  69. return 0;
  70. }
  71. if (th->syn &&
  72. (svc = ip_vs_service_get(skb->mark, ip_hdr(skb)->protocol,
  73. ip_hdr(skb)->daddr, th->dest))) {
  74. if (ip_vs_todrop()) {
  75. /*
  76. * It seems that we are very loaded.
  77. * We have to drop this packet :(
  78. */
  79. ip_vs_service_put(svc);
  80. *verdict = NF_DROP;
  81. return 0;
  82. }
  83. /*
  84. * Let the virtual server select a real server for the
  85. * incoming connection, and create a connection entry.
  86. */
  87. *cpp = ip_vs_schedule(svc, skb);
  88. if (!*cpp) {
  89. *verdict = ip_vs_leave(svc, skb, pp);
  90. return 0;
  91. }
  92. ip_vs_service_put(svc);
  93. }
  94. return 1;
  95. }
  96. static inline void
  97. tcp_fast_csum_update(struct tcphdr *tcph, __be32 oldip, __be32 newip,
  98. __be16 oldport, __be16 newport)
  99. {
  100. tcph->check =
  101. csum_fold(ip_vs_check_diff4(oldip, newip,
  102. ip_vs_check_diff2(oldport, newport,
  103. ~csum_unfold(tcph->check))));
  104. }
  105. static int
  106. tcp_snat_handler(struct sk_buff *skb,
  107. struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
  108. {
  109. struct tcphdr *tcph;
  110. const unsigned int tcphoff = ip_hdrlen(skb);
  111. /* csum_check requires unshared skb */
  112. if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
  113. return 0;
  114. if (unlikely(cp->app != NULL)) {
  115. /* Some checks before mangling */
  116. if (pp->csum_check && !pp->csum_check(skb, pp))
  117. return 0;
  118. /* Call application helper if needed */
  119. if (!ip_vs_app_pkt_out(cp, skb))
  120. return 0;
  121. }
  122. tcph = (void *)ip_hdr(skb) + tcphoff;
  123. tcph->source = cp->vport;
  124. /* Adjust TCP checksums */
  125. if (!cp->app) {
  126. /* Only port and addr are changed, do fast csum update */
  127. tcp_fast_csum_update(tcph, cp->daddr, cp->vaddr,
  128. cp->dport, cp->vport);
  129. if (skb->ip_summed == CHECKSUM_COMPLETE)
  130. skb->ip_summed = CHECKSUM_NONE;
  131. } else {
  132. /* full checksum calculation */
  133. tcph->check = 0;
  134. skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
  135. tcph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr,
  136. skb->len - tcphoff,
  137. cp->protocol, skb->csum);
  138. IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
  139. pp->name, tcph->check,
  140. (char*)&(tcph->check) - (char*)tcph);
  141. }
  142. return 1;
  143. }
  144. static int
  145. tcp_dnat_handler(struct sk_buff *skb,
  146. struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
  147. {
  148. struct tcphdr *tcph;
  149. const unsigned int tcphoff = ip_hdrlen(skb);
  150. /* csum_check requires unshared skb */
  151. if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
  152. return 0;
  153. if (unlikely(cp->app != NULL)) {
  154. /* Some checks before mangling */
  155. if (pp->csum_check && !pp->csum_check(skb, pp))
  156. return 0;
  157. /*
  158. * Attempt ip_vs_app call.
  159. * It will fix ip_vs_conn and iph ack_seq stuff
  160. */
  161. if (!ip_vs_app_pkt_in(cp, skb))
  162. return 0;
  163. }
  164. tcph = (void *)ip_hdr(skb) + tcphoff;
  165. tcph->dest = cp->dport;
  166. /*
  167. * Adjust TCP checksums
  168. */
  169. if (!cp->app) {
  170. /* Only port and addr are changed, do fast csum update */
  171. tcp_fast_csum_update(tcph, cp->vaddr, cp->daddr,
  172. cp->vport, cp->dport);
  173. if (skb->ip_summed == CHECKSUM_COMPLETE)
  174. skb->ip_summed = CHECKSUM_NONE;
  175. } else {
  176. /* full checksum calculation */
  177. tcph->check = 0;
  178. skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
  179. tcph->check = csum_tcpudp_magic(cp->caddr, cp->daddr,
  180. skb->len - tcphoff,
  181. cp->protocol, skb->csum);
  182. skb->ip_summed = CHECKSUM_UNNECESSARY;
  183. }
  184. return 1;
  185. }
  186. static int
  187. tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
  188. {
  189. const unsigned int tcphoff = ip_hdrlen(skb);
  190. switch (skb->ip_summed) {
  191. case CHECKSUM_NONE:
  192. skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
  193. case CHECKSUM_COMPLETE:
  194. if (csum_tcpudp_magic(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
  195. skb->len - tcphoff,
  196. ip_hdr(skb)->protocol, skb->csum)) {
  197. IP_VS_DBG_RL_PKT(0, pp, skb, 0,
  198. "Failed checksum for");
  199. return 0;
  200. }
  201. break;
  202. default:
  203. /* No need to checksum. */
  204. break;
  205. }
  206. return 1;
  207. }
  208. #define TCP_DIR_INPUT 0
  209. #define TCP_DIR_OUTPUT 4
  210. #define TCP_DIR_INPUT_ONLY 8
  211. static const int tcp_state_off[IP_VS_DIR_LAST] = {
  212. [IP_VS_DIR_INPUT] = TCP_DIR_INPUT,
  213. [IP_VS_DIR_OUTPUT] = TCP_DIR_OUTPUT,
  214. [IP_VS_DIR_INPUT_ONLY] = TCP_DIR_INPUT_ONLY,
  215. };
  216. /*
  217. * Timeout table[state]
  218. */
  219. static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
  220. [IP_VS_TCP_S_NONE] = 2*HZ,
  221. [IP_VS_TCP_S_ESTABLISHED] = 15*60*HZ,
  222. [IP_VS_TCP_S_SYN_SENT] = 2*60*HZ,
  223. [IP_VS_TCP_S_SYN_RECV] = 1*60*HZ,
  224. [IP_VS_TCP_S_FIN_WAIT] = 2*60*HZ,
  225. [IP_VS_TCP_S_TIME_WAIT] = 2*60*HZ,
  226. [IP_VS_TCP_S_CLOSE] = 10*HZ,
  227. [IP_VS_TCP_S_CLOSE_WAIT] = 60*HZ,
  228. [IP_VS_TCP_S_LAST_ACK] = 30*HZ,
  229. [IP_VS_TCP_S_LISTEN] = 2*60*HZ,
  230. [IP_VS_TCP_S_SYNACK] = 120*HZ,
  231. [IP_VS_TCP_S_LAST] = 2*HZ,
  232. };
  233. static char * tcp_state_name_table[IP_VS_TCP_S_LAST+1] = {
  234. [IP_VS_TCP_S_NONE] = "NONE",
  235. [IP_VS_TCP_S_ESTABLISHED] = "ESTABLISHED",
  236. [IP_VS_TCP_S_SYN_SENT] = "SYN_SENT",
  237. [IP_VS_TCP_S_SYN_RECV] = "SYN_RECV",
  238. [IP_VS_TCP_S_FIN_WAIT] = "FIN_WAIT",
  239. [IP_VS_TCP_S_TIME_WAIT] = "TIME_WAIT",
  240. [IP_VS_TCP_S_CLOSE] = "CLOSE",
  241. [IP_VS_TCP_S_CLOSE_WAIT] = "CLOSE_WAIT",
  242. [IP_VS_TCP_S_LAST_ACK] = "LAST_ACK",
  243. [IP_VS_TCP_S_LISTEN] = "LISTEN",
  244. [IP_VS_TCP_S_SYNACK] = "SYNACK",
  245. [IP_VS_TCP_S_LAST] = "BUG!",
  246. };
  247. #define sNO IP_VS_TCP_S_NONE
  248. #define sES IP_VS_TCP_S_ESTABLISHED
  249. #define sSS IP_VS_TCP_S_SYN_SENT
  250. #define sSR IP_VS_TCP_S_SYN_RECV
  251. #define sFW IP_VS_TCP_S_FIN_WAIT
  252. #define sTW IP_VS_TCP_S_TIME_WAIT
  253. #define sCL IP_VS_TCP_S_CLOSE
  254. #define sCW IP_VS_TCP_S_CLOSE_WAIT
  255. #define sLA IP_VS_TCP_S_LAST_ACK
  256. #define sLI IP_VS_TCP_S_LISTEN
  257. #define sSA IP_VS_TCP_S_SYNACK
  258. struct tcp_states_t {
  259. int next_state[IP_VS_TCP_S_LAST];
  260. };
  261. static const char * tcp_state_name(int state)
  262. {
  263. if (state >= IP_VS_TCP_S_LAST)
  264. return "ERR!";
  265. return tcp_state_name_table[state] ? tcp_state_name_table[state] : "?";
  266. }
  267. static struct tcp_states_t tcp_states [] = {
  268. /* INPUT */
  269. /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
  270. /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
  271. /*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sTW }},
  272. /*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
  273. /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sSR }},
  274. /* OUTPUT */
  275. /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
  276. /*syn*/ {{sSS, sES, sSS, sSR, sSS, sSS, sSS, sSS, sSS, sLI, sSR }},
  277. /*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},
  278. /*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},
  279. /*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},
  280. /* INPUT-ONLY */
  281. /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
  282. /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
  283. /*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
  284. /*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
  285. /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
  286. };
  287. static struct tcp_states_t tcp_states_dos [] = {
  288. /* INPUT */
  289. /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
  290. /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSA }},
  291. /*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sSA }},
  292. /*ack*/ {{sCL, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI, sSA }},
  293. /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
  294. /* OUTPUT */
  295. /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
  296. /*syn*/ {{sSS, sES, sSS, sSA, sSS, sSS, sSS, sSS, sSS, sLI, sSA }},
  297. /*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},
  298. /*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},
  299. /*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},
  300. /* INPUT-ONLY */
  301. /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
  302. /*syn*/ {{sSA, sES, sES, sSR, sSA, sSA, sSA, sSA, sSA, sSA, sSA }},
  303. /*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
  304. /*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
  305. /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
  306. };
  307. static struct tcp_states_t *tcp_state_table = tcp_states;
  308. static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags)
  309. {
  310. int on = (flags & 1); /* secure_tcp */
  311. /*
  312. ** FIXME: change secure_tcp to independent sysctl var
  313. ** or make it per-service or per-app because it is valid
  314. ** for most if not for all of the applications. Something
  315. ** like "capabilities" (flags) for each object.
  316. */
  317. tcp_state_table = (on? tcp_states_dos : tcp_states);
  318. }
  319. static int
  320. tcp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
  321. {
  322. return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_TCP_S_LAST,
  323. tcp_state_name_table, sname, to);
  324. }
  325. static inline int tcp_state_idx(struct tcphdr *th)
  326. {
  327. if (th->rst)
  328. return 3;
  329. if (th->syn)
  330. return 0;
  331. if (th->fin)
  332. return 1;
  333. if (th->ack)
  334. return 2;
  335. return -1;
  336. }
  337. static inline void
  338. set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
  339. int direction, struct tcphdr *th)
  340. {
  341. int state_idx;
  342. int new_state = IP_VS_TCP_S_CLOSE;
  343. int state_off = tcp_state_off[direction];
  344. /*
  345. * Update state offset to INPUT_ONLY if necessary
  346. * or delete NO_OUTPUT flag if output packet detected
  347. */
  348. if (cp->flags & IP_VS_CONN_F_NOOUTPUT) {
  349. if (state_off == TCP_DIR_OUTPUT)
  350. cp->flags &= ~IP_VS_CONN_F_NOOUTPUT;
  351. else
  352. state_off = TCP_DIR_INPUT_ONLY;
  353. }
  354. if ((state_idx = tcp_state_idx(th)) < 0) {
  355. IP_VS_DBG(8, "tcp_state_idx=%d!!!\n", state_idx);
  356. goto tcp_state_out;
  357. }
  358. new_state = tcp_state_table[state_off+state_idx].next_state[cp->state];
  359. tcp_state_out:
  360. if (new_state != cp->state) {
  361. struct ip_vs_dest *dest = cp->dest;
  362. IP_VS_DBG(8, "%s %s [%c%c%c%c] %u.%u.%u.%u:%d->"
  363. "%u.%u.%u.%u:%d state: %s->%s conn->refcnt:%d\n",
  364. pp->name,
  365. (state_off==TCP_DIR_OUTPUT)?"output ":"input ",
  366. th->syn? 'S' : '.',
  367. th->fin? 'F' : '.',
  368. th->ack? 'A' : '.',
  369. th->rst? 'R' : '.',
  370. NIPQUAD(cp->daddr), ntohs(cp->dport),
  371. NIPQUAD(cp->caddr), ntohs(cp->cport),
  372. tcp_state_name(cp->state),
  373. tcp_state_name(new_state),
  374. atomic_read(&cp->refcnt));
  375. if (dest) {
  376. if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
  377. (new_state != IP_VS_TCP_S_ESTABLISHED)) {
  378. atomic_dec(&dest->activeconns);
  379. atomic_inc(&dest->inactconns);
  380. cp->flags |= IP_VS_CONN_F_INACTIVE;
  381. } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
  382. (new_state == IP_VS_TCP_S_ESTABLISHED)) {
  383. atomic_inc(&dest->activeconns);
  384. atomic_dec(&dest->inactconns);
  385. cp->flags &= ~IP_VS_CONN_F_INACTIVE;
  386. }
  387. }
  388. }
  389. cp->timeout = pp->timeout_table[cp->state = new_state];
  390. }
  391. /*
  392. * Handle state transitions
  393. */
  394. static int
  395. tcp_state_transition(struct ip_vs_conn *cp, int direction,
  396. const struct sk_buff *skb,
  397. struct ip_vs_protocol *pp)
  398. {
  399. struct tcphdr _tcph, *th;
  400. th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
  401. if (th == NULL)
  402. return 0;
  403. spin_lock(&cp->lock);
  404. set_tcp_state(pp, cp, direction, th);
  405. spin_unlock(&cp->lock);
  406. return 1;
  407. }
  408. /*
  409. * Hash table for TCP application incarnations
  410. */
  411. #define TCP_APP_TAB_BITS 4
  412. #define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS)
  413. #define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1)
  414. static struct list_head tcp_apps[TCP_APP_TAB_SIZE];
  415. static DEFINE_SPINLOCK(tcp_app_lock);
  416. static inline __u16 tcp_app_hashkey(__be16 port)
  417. {
  418. return (((__force u16)port >> TCP_APP_TAB_BITS) ^ (__force u16)port)
  419. & TCP_APP_TAB_MASK;
  420. }
  421. static int tcp_register_app(struct ip_vs_app *inc)
  422. {
  423. struct ip_vs_app *i;
  424. __u16 hash;
  425. __be16 port = inc->port;
  426. int ret = 0;
  427. hash = tcp_app_hashkey(port);
  428. spin_lock_bh(&tcp_app_lock);
  429. list_for_each_entry(i, &tcp_apps[hash], p_list) {
  430. if (i->port == port) {
  431. ret = -EEXIST;
  432. goto out;
  433. }
  434. }
  435. list_add(&inc->p_list, &tcp_apps[hash]);
  436. atomic_inc(&ip_vs_protocol_tcp.appcnt);
  437. out:
  438. spin_unlock_bh(&tcp_app_lock);
  439. return ret;
  440. }
  441. static void
  442. tcp_unregister_app(struct ip_vs_app *inc)
  443. {
  444. spin_lock_bh(&tcp_app_lock);
  445. atomic_dec(&ip_vs_protocol_tcp.appcnt);
  446. list_del(&inc->p_list);
  447. spin_unlock_bh(&tcp_app_lock);
  448. }
  449. static int
  450. tcp_app_conn_bind(struct ip_vs_conn *cp)
  451. {
  452. int hash;
  453. struct ip_vs_app *inc;
  454. int result = 0;
  455. /* Default binding: bind app only for NAT */
  456. if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
  457. return 0;
  458. /* Lookup application incarnations and bind the right one */
  459. hash = tcp_app_hashkey(cp->vport);
  460. spin_lock(&tcp_app_lock);
  461. list_for_each_entry(inc, &tcp_apps[hash], p_list) {
  462. if (inc->port == cp->vport) {
  463. if (unlikely(!ip_vs_app_inc_get(inc)))
  464. break;
  465. spin_unlock(&tcp_app_lock);
  466. IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->"
  467. "%u.%u.%u.%u:%u to app %s on port %u\n",
  468. __func__,
  469. NIPQUAD(cp->caddr), ntohs(cp->cport),
  470. NIPQUAD(cp->vaddr), ntohs(cp->vport),
  471. inc->name, ntohs(inc->port));
  472. cp->app = inc;
  473. if (inc->init_conn)
  474. result = inc->init_conn(inc, cp);
  475. goto out;
  476. }
  477. }
  478. spin_unlock(&tcp_app_lock);
  479. out:
  480. return result;
  481. }
  482. /*
  483. * Set LISTEN timeout. (ip_vs_conn_put will setup timer)
  484. */
  485. void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp)
  486. {
  487. spin_lock(&cp->lock);
  488. cp->state = IP_VS_TCP_S_LISTEN;
  489. cp->timeout = ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_LISTEN];
  490. spin_unlock(&cp->lock);
  491. }
  492. static void ip_vs_tcp_init(struct ip_vs_protocol *pp)
  493. {
  494. IP_VS_INIT_HASH_TABLE(tcp_apps);
  495. pp->timeout_table = tcp_timeouts;
  496. }
  497. static void ip_vs_tcp_exit(struct ip_vs_protocol *pp)
  498. {
  499. }
  500. struct ip_vs_protocol ip_vs_protocol_tcp = {
  501. .name = "TCP",
  502. .protocol = IPPROTO_TCP,
  503. .num_states = IP_VS_TCP_S_LAST,
  504. .dont_defrag = 0,
  505. .appcnt = ATOMIC_INIT(0),
  506. .init = ip_vs_tcp_init,
  507. .exit = ip_vs_tcp_exit,
  508. .register_app = tcp_register_app,
  509. .unregister_app = tcp_unregister_app,
  510. .conn_schedule = tcp_conn_schedule,
  511. .conn_in_get = tcp_conn_in_get,
  512. .conn_out_get = tcp_conn_out_get,
  513. .snat_handler = tcp_snat_handler,
  514. .dnat_handler = tcp_dnat_handler,
  515. .csum_check = tcp_csum_check,
  516. .state_name = tcp_state_name,
  517. .state_transition = tcp_state_transition,
  518. .app_conn_bind = tcp_app_conn_bind,
  519. .debug_packet = ip_vs_tcpudp_debug_packet,
  520. .timeout_change = tcp_timeout_change,
  521. .set_state_timeout = tcp_set_state_timeout,
  522. };