ip_vs_proto_tcp.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618
  1. /*
  2. * ip_vs_proto_tcp.c: TCP load balancing support for IPVS
  3. *
  4. * Version: $Id: ip_vs_proto_tcp.c,v 1.3 2002/11/30 01:50:35 wensong Exp $
  5. *
  6. * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
  7. * Julian Anastasov <ja@ssi.bg>
  8. *
  9. * This program is free software; you can redistribute it and/or
  10. * modify it under the terms of the GNU General Public License
  11. * as published by the Free Software Foundation; either version
  12. * 2 of the License, or (at your option) any later version.
  13. *
  14. * Changes:
  15. *
  16. */
  17. #include <linux/kernel.h>
  18. #include <linux/ip.h>
  19. #include <linux/tcp.h> /* for tcphdr */
  20. #include <net/ip.h>
  21. #include <net/tcp.h> /* for csum_tcpudp_magic */
  22. #include <linux/netfilter_ipv4.h>
  23. #include <net/ip_vs.h>
  24. static struct ip_vs_conn *
  25. tcp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
  26. const struct iphdr *iph, unsigned int proto_off, int inverse)
  27. {
  28. __u16 _ports[2], *pptr;
  29. pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
  30. if (pptr == NULL)
  31. return NULL;
  32. if (likely(!inverse)) {
  33. return ip_vs_conn_in_get(iph->protocol,
  34. iph->saddr, pptr[0],
  35. iph->daddr, pptr[1]);
  36. } else {
  37. return ip_vs_conn_in_get(iph->protocol,
  38. iph->daddr, pptr[1],
  39. iph->saddr, pptr[0]);
  40. }
  41. }
  42. static struct ip_vs_conn *
  43. tcp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
  44. const struct iphdr *iph, unsigned int proto_off, int inverse)
  45. {
  46. __u16 _ports[2], *pptr;
  47. pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
  48. if (pptr == NULL)
  49. return NULL;
  50. if (likely(!inverse)) {
  51. return ip_vs_conn_out_get(iph->protocol,
  52. iph->saddr, pptr[0],
  53. iph->daddr, pptr[1]);
  54. } else {
  55. return ip_vs_conn_out_get(iph->protocol,
  56. iph->daddr, pptr[1],
  57. iph->saddr, pptr[0]);
  58. }
  59. }
  60. static int
  61. tcp_conn_schedule(struct sk_buff *skb,
  62. struct ip_vs_protocol *pp,
  63. int *verdict, struct ip_vs_conn **cpp)
  64. {
  65. struct ip_vs_service *svc;
  66. struct tcphdr _tcph, *th;
  67. th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
  68. sizeof(_tcph), &_tcph);
  69. if (th == NULL) {
  70. *verdict = NF_DROP;
  71. return 0;
  72. }
  73. if (th->syn &&
  74. (svc = ip_vs_service_get(skb->nfmark, skb->nh.iph->protocol,
  75. skb->nh.iph->daddr, th->dest))) {
  76. if (ip_vs_todrop()) {
  77. /*
  78. * It seems that we are very loaded.
  79. * We have to drop this packet :(
  80. */
  81. ip_vs_service_put(svc);
  82. *verdict = NF_DROP;
  83. return 0;
  84. }
  85. /*
  86. * Let the virtual server select a real server for the
  87. * incoming connection, and create a connection entry.
  88. */
  89. *cpp = ip_vs_schedule(svc, skb);
  90. if (!*cpp) {
  91. *verdict = ip_vs_leave(svc, skb, pp);
  92. return 0;
  93. }
  94. ip_vs_service_put(svc);
  95. }
  96. return 1;
  97. }
  98. static inline void
  99. tcp_fast_csum_update(struct tcphdr *tcph, u32 oldip, u32 newip,
  100. u16 oldport, u16 newport)
  101. {
  102. tcph->check =
  103. ip_vs_check_diff(~oldip, newip,
  104. ip_vs_check_diff(oldport ^ 0xFFFF,
  105. newport, tcph->check));
  106. }
  107. static int
  108. tcp_snat_handler(struct sk_buff **pskb,
  109. struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
  110. {
  111. struct tcphdr *tcph;
  112. unsigned int tcphoff = (*pskb)->nh.iph->ihl * 4;
  113. /* csum_check requires unshared skb */
  114. if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph)))
  115. return 0;
  116. if (unlikely(cp->app != NULL)) {
  117. /* Some checks before mangling */
  118. if (pp->csum_check && !pp->csum_check(*pskb, pp))
  119. return 0;
  120. /* Call application helper if needed */
  121. if (!ip_vs_app_pkt_out(cp, pskb))
  122. return 0;
  123. }
  124. tcph = (void *)(*pskb)->nh.iph + tcphoff;
  125. tcph->source = cp->vport;
  126. /* Adjust TCP checksums */
  127. if (!cp->app) {
  128. /* Only port and addr are changed, do fast csum update */
  129. tcp_fast_csum_update(tcph, cp->daddr, cp->vaddr,
  130. cp->dport, cp->vport);
  131. if ((*pskb)->ip_summed == CHECKSUM_HW)
  132. (*pskb)->ip_summed = CHECKSUM_NONE;
  133. } else {
  134. /* full checksum calculation */
  135. tcph->check = 0;
  136. (*pskb)->csum = skb_checksum(*pskb, tcphoff,
  137. (*pskb)->len - tcphoff, 0);
  138. tcph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr,
  139. (*pskb)->len - tcphoff,
  140. cp->protocol,
  141. (*pskb)->csum);
  142. IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
  143. pp->name, tcph->check,
  144. (char*)&(tcph->check) - (char*)tcph);
  145. }
  146. return 1;
  147. }
  148. static int
  149. tcp_dnat_handler(struct sk_buff **pskb,
  150. struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
  151. {
  152. struct tcphdr *tcph;
  153. unsigned int tcphoff = (*pskb)->nh.iph->ihl * 4;
  154. /* csum_check requires unshared skb */
  155. if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph)))
  156. return 0;
  157. if (unlikely(cp->app != NULL)) {
  158. /* Some checks before mangling */
  159. if (pp->csum_check && !pp->csum_check(*pskb, pp))
  160. return 0;
  161. /*
  162. * Attempt ip_vs_app call.
  163. * It will fix ip_vs_conn and iph ack_seq stuff
  164. */
  165. if (!ip_vs_app_pkt_in(cp, pskb))
  166. return 0;
  167. }
  168. tcph = (void *)(*pskb)->nh.iph + tcphoff;
  169. tcph->dest = cp->dport;
  170. /*
  171. * Adjust TCP checksums
  172. */
  173. if (!cp->app) {
  174. /* Only port and addr are changed, do fast csum update */
  175. tcp_fast_csum_update(tcph, cp->vaddr, cp->daddr,
  176. cp->vport, cp->dport);
  177. if ((*pskb)->ip_summed == CHECKSUM_HW)
  178. (*pskb)->ip_summed = CHECKSUM_NONE;
  179. } else {
  180. /* full checksum calculation */
  181. tcph->check = 0;
  182. (*pskb)->csum = skb_checksum(*pskb, tcphoff,
  183. (*pskb)->len - tcphoff, 0);
  184. tcph->check = csum_tcpudp_magic(cp->caddr, cp->daddr,
  185. (*pskb)->len - tcphoff,
  186. cp->protocol,
  187. (*pskb)->csum);
  188. (*pskb)->ip_summed = CHECKSUM_UNNECESSARY;
  189. }
  190. return 1;
  191. }
  192. static int
  193. tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
  194. {
  195. unsigned int tcphoff = skb->nh.iph->ihl*4;
  196. switch (skb->ip_summed) {
  197. case CHECKSUM_NONE:
  198. skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
  199. case CHECKSUM_HW:
  200. if (csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr,
  201. skb->len - tcphoff,
  202. skb->nh.iph->protocol, skb->csum)) {
  203. IP_VS_DBG_RL_PKT(0, pp, skb, 0,
  204. "Failed checksum for");
  205. return 0;
  206. }
  207. break;
  208. default:
  209. /* CHECKSUM_UNNECESSARY */
  210. break;
  211. }
  212. return 1;
  213. }
  214. #define TCP_DIR_INPUT 0
  215. #define TCP_DIR_OUTPUT 4
  216. #define TCP_DIR_INPUT_ONLY 8
  217. static const int tcp_state_off[IP_VS_DIR_LAST] = {
  218. [IP_VS_DIR_INPUT] = TCP_DIR_INPUT,
  219. [IP_VS_DIR_OUTPUT] = TCP_DIR_OUTPUT,
  220. [IP_VS_DIR_INPUT_ONLY] = TCP_DIR_INPUT_ONLY,
  221. };
  222. /*
  223. * Timeout table[state]
  224. */
  225. static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
  226. [IP_VS_TCP_S_NONE] = 2*HZ,
  227. [IP_VS_TCP_S_ESTABLISHED] = 15*60*HZ,
  228. [IP_VS_TCP_S_SYN_SENT] = 2*60*HZ,
  229. [IP_VS_TCP_S_SYN_RECV] = 1*60*HZ,
  230. [IP_VS_TCP_S_FIN_WAIT] = 2*60*HZ,
  231. [IP_VS_TCP_S_TIME_WAIT] = 2*60*HZ,
  232. [IP_VS_TCP_S_CLOSE] = 10*HZ,
  233. [IP_VS_TCP_S_CLOSE_WAIT] = 60*HZ,
  234. [IP_VS_TCP_S_LAST_ACK] = 30*HZ,
  235. [IP_VS_TCP_S_LISTEN] = 2*60*HZ,
  236. [IP_VS_TCP_S_SYNACK] = 120*HZ,
  237. [IP_VS_TCP_S_LAST] = 2*HZ,
  238. };
  239. static char * tcp_state_name_table[IP_VS_TCP_S_LAST+1] = {
  240. [IP_VS_TCP_S_NONE] = "NONE",
  241. [IP_VS_TCP_S_ESTABLISHED] = "ESTABLISHED",
  242. [IP_VS_TCP_S_SYN_SENT] = "SYN_SENT",
  243. [IP_VS_TCP_S_SYN_RECV] = "SYN_RECV",
  244. [IP_VS_TCP_S_FIN_WAIT] = "FIN_WAIT",
  245. [IP_VS_TCP_S_TIME_WAIT] = "TIME_WAIT",
  246. [IP_VS_TCP_S_CLOSE] = "CLOSE",
  247. [IP_VS_TCP_S_CLOSE_WAIT] = "CLOSE_WAIT",
  248. [IP_VS_TCP_S_LAST_ACK] = "LAST_ACK",
  249. [IP_VS_TCP_S_LISTEN] = "LISTEN",
  250. [IP_VS_TCP_S_SYNACK] = "SYNACK",
  251. [IP_VS_TCP_S_LAST] = "BUG!",
  252. };
  253. #define sNO IP_VS_TCP_S_NONE
  254. #define sES IP_VS_TCP_S_ESTABLISHED
  255. #define sSS IP_VS_TCP_S_SYN_SENT
  256. #define sSR IP_VS_TCP_S_SYN_RECV
  257. #define sFW IP_VS_TCP_S_FIN_WAIT
  258. #define sTW IP_VS_TCP_S_TIME_WAIT
  259. #define sCL IP_VS_TCP_S_CLOSE
  260. #define sCW IP_VS_TCP_S_CLOSE_WAIT
  261. #define sLA IP_VS_TCP_S_LAST_ACK
  262. #define sLI IP_VS_TCP_S_LISTEN
  263. #define sSA IP_VS_TCP_S_SYNACK
  264. struct tcp_states_t {
  265. int next_state[IP_VS_TCP_S_LAST];
  266. };
  267. static const char * tcp_state_name(int state)
  268. {
  269. if (state >= IP_VS_TCP_S_LAST)
  270. return "ERR!";
  271. return tcp_state_name_table[state] ? tcp_state_name_table[state] : "?";
  272. }
  273. static struct tcp_states_t tcp_states [] = {
  274. /* INPUT */
  275. /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
  276. /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
  277. /*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sTW }},
  278. /*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
  279. /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sSR }},
  280. /* OUTPUT */
  281. /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
  282. /*syn*/ {{sSS, sES, sSS, sSR, sSS, sSS, sSS, sSS, sSS, sLI, sSR }},
  283. /*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},
  284. /*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},
  285. /*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},
  286. /* INPUT-ONLY */
  287. /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
  288. /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
  289. /*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
  290. /*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
  291. /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
  292. };
  293. static struct tcp_states_t tcp_states_dos [] = {
  294. /* INPUT */
  295. /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
  296. /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSA }},
  297. /*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sSA }},
  298. /*ack*/ {{sCL, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI, sSA }},
  299. /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
  300. /* OUTPUT */
  301. /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
  302. /*syn*/ {{sSS, sES, sSS, sSA, sSS, sSS, sSS, sSS, sSS, sLI, sSA }},
  303. /*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},
  304. /*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},
  305. /*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},
  306. /* INPUT-ONLY */
  307. /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
  308. /*syn*/ {{sSA, sES, sES, sSR, sSA, sSA, sSA, sSA, sSA, sSA, sSA }},
  309. /*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
  310. /*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
  311. /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
  312. };
  313. static struct tcp_states_t *tcp_state_table = tcp_states;
  314. static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags)
  315. {
  316. int on = (flags & 1); /* secure_tcp */
  317. /*
  318. ** FIXME: change secure_tcp to independent sysctl var
  319. ** or make it per-service or per-app because it is valid
  320. ** for most if not for all of the applications. Something
  321. ** like "capabilities" (flags) for each object.
  322. */
  323. tcp_state_table = (on? tcp_states_dos : tcp_states);
  324. }
  325. static int
  326. tcp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
  327. {
  328. return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_TCP_S_LAST,
  329. tcp_state_name_table, sname, to);
  330. }
  331. static inline int tcp_state_idx(struct tcphdr *th)
  332. {
  333. if (th->rst)
  334. return 3;
  335. if (th->syn)
  336. return 0;
  337. if (th->fin)
  338. return 1;
  339. if (th->ack)
  340. return 2;
  341. return -1;
  342. }
  343. static inline void
  344. set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
  345. int direction, struct tcphdr *th)
  346. {
  347. int state_idx;
  348. int new_state = IP_VS_TCP_S_CLOSE;
  349. int state_off = tcp_state_off[direction];
  350. /*
  351. * Update state offset to INPUT_ONLY if necessary
  352. * or delete NO_OUTPUT flag if output packet detected
  353. */
  354. if (cp->flags & IP_VS_CONN_F_NOOUTPUT) {
  355. if (state_off == TCP_DIR_OUTPUT)
  356. cp->flags &= ~IP_VS_CONN_F_NOOUTPUT;
  357. else
  358. state_off = TCP_DIR_INPUT_ONLY;
  359. }
  360. if ((state_idx = tcp_state_idx(th)) < 0) {
  361. IP_VS_DBG(8, "tcp_state_idx=%d!!!\n", state_idx);
  362. goto tcp_state_out;
  363. }
  364. new_state = tcp_state_table[state_off+state_idx].next_state[cp->state];
  365. tcp_state_out:
  366. if (new_state != cp->state) {
  367. struct ip_vs_dest *dest = cp->dest;
  368. IP_VS_DBG(8, "%s %s [%c%c%c%c] %u.%u.%u.%u:%d->"
  369. "%u.%u.%u.%u:%d state: %s->%s conn->refcnt:%d\n",
  370. pp->name,
  371. (state_off==TCP_DIR_OUTPUT)?"output ":"input ",
  372. th->syn? 'S' : '.',
  373. th->fin? 'F' : '.',
  374. th->ack? 'A' : '.',
  375. th->rst? 'R' : '.',
  376. NIPQUAD(cp->daddr), ntohs(cp->dport),
  377. NIPQUAD(cp->caddr), ntohs(cp->cport),
  378. tcp_state_name(cp->state),
  379. tcp_state_name(new_state),
  380. atomic_read(&cp->refcnt));
  381. if (dest) {
  382. if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
  383. (new_state != IP_VS_TCP_S_ESTABLISHED)) {
  384. atomic_dec(&dest->activeconns);
  385. atomic_inc(&dest->inactconns);
  386. cp->flags |= IP_VS_CONN_F_INACTIVE;
  387. } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
  388. (new_state == IP_VS_TCP_S_ESTABLISHED)) {
  389. atomic_inc(&dest->activeconns);
  390. atomic_dec(&dest->inactconns);
  391. cp->flags &= ~IP_VS_CONN_F_INACTIVE;
  392. }
  393. }
  394. }
  395. cp->timeout = pp->timeout_table[cp->state = new_state];
  396. }
  397. /*
  398. * Handle state transitions
  399. */
  400. static int
  401. tcp_state_transition(struct ip_vs_conn *cp, int direction,
  402. const struct sk_buff *skb,
  403. struct ip_vs_protocol *pp)
  404. {
  405. struct tcphdr _tcph, *th;
  406. th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
  407. sizeof(_tcph), &_tcph);
  408. if (th == NULL)
  409. return 0;
  410. spin_lock(&cp->lock);
  411. set_tcp_state(pp, cp, direction, th);
  412. spin_unlock(&cp->lock);
  413. return 1;
  414. }
  415. /*
  416. * Hash table for TCP application incarnations
  417. */
  418. #define TCP_APP_TAB_BITS 4
  419. #define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS)
  420. #define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1)
  421. static struct list_head tcp_apps[TCP_APP_TAB_SIZE];
  422. static DEFINE_SPINLOCK(tcp_app_lock);
  423. static inline __u16 tcp_app_hashkey(__u16 port)
  424. {
  425. return ((port >> TCP_APP_TAB_BITS) ^ port) & TCP_APP_TAB_MASK;
  426. }
  427. static int tcp_register_app(struct ip_vs_app *inc)
  428. {
  429. struct ip_vs_app *i;
  430. __u16 hash, port = inc->port;
  431. int ret = 0;
  432. hash = tcp_app_hashkey(port);
  433. spin_lock_bh(&tcp_app_lock);
  434. list_for_each_entry(i, &tcp_apps[hash], p_list) {
  435. if (i->port == port) {
  436. ret = -EEXIST;
  437. goto out;
  438. }
  439. }
  440. list_add(&inc->p_list, &tcp_apps[hash]);
  441. atomic_inc(&ip_vs_protocol_tcp.appcnt);
  442. out:
  443. spin_unlock_bh(&tcp_app_lock);
  444. return ret;
  445. }
  446. static void
  447. tcp_unregister_app(struct ip_vs_app *inc)
  448. {
  449. spin_lock_bh(&tcp_app_lock);
  450. atomic_dec(&ip_vs_protocol_tcp.appcnt);
  451. list_del(&inc->p_list);
  452. spin_unlock_bh(&tcp_app_lock);
  453. }
  454. static int
  455. tcp_app_conn_bind(struct ip_vs_conn *cp)
  456. {
  457. int hash;
  458. struct ip_vs_app *inc;
  459. int result = 0;
  460. /* Default binding: bind app only for NAT */
  461. if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
  462. return 0;
  463. /* Lookup application incarnations and bind the right one */
  464. hash = tcp_app_hashkey(cp->vport);
  465. spin_lock(&tcp_app_lock);
  466. list_for_each_entry(inc, &tcp_apps[hash], p_list) {
  467. if (inc->port == cp->vport) {
  468. if (unlikely(!ip_vs_app_inc_get(inc)))
  469. break;
  470. spin_unlock(&tcp_app_lock);
  471. IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->"
  472. "%u.%u.%u.%u:%u to app %s on port %u\n",
  473. __FUNCTION__,
  474. NIPQUAD(cp->caddr), ntohs(cp->cport),
  475. NIPQUAD(cp->vaddr), ntohs(cp->vport),
  476. inc->name, ntohs(inc->port));
  477. cp->app = inc;
  478. if (inc->init_conn)
  479. result = inc->init_conn(inc, cp);
  480. goto out;
  481. }
  482. }
  483. spin_unlock(&tcp_app_lock);
  484. out:
  485. return result;
  486. }
  487. /*
  488. * Set LISTEN timeout. (ip_vs_conn_put will setup timer)
  489. */
  490. void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp)
  491. {
  492. spin_lock(&cp->lock);
  493. cp->state = IP_VS_TCP_S_LISTEN;
  494. cp->timeout = ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_LISTEN];
  495. spin_unlock(&cp->lock);
  496. }
  497. static void ip_vs_tcp_init(struct ip_vs_protocol *pp)
  498. {
  499. IP_VS_INIT_HASH_TABLE(tcp_apps);
  500. pp->timeout_table = tcp_timeouts;
  501. }
  502. static void ip_vs_tcp_exit(struct ip_vs_protocol *pp)
  503. {
  504. }
  505. struct ip_vs_protocol ip_vs_protocol_tcp = {
  506. .name = "TCP",
  507. .protocol = IPPROTO_TCP,
  508. .dont_defrag = 0,
  509. .appcnt = ATOMIC_INIT(0),
  510. .init = ip_vs_tcp_init,
  511. .exit = ip_vs_tcp_exit,
  512. .register_app = tcp_register_app,
  513. .unregister_app = tcp_unregister_app,
  514. .conn_schedule = tcp_conn_schedule,
  515. .conn_in_get = tcp_conn_in_get,
  516. .conn_out_get = tcp_conn_out_get,
  517. .snat_handler = tcp_snat_handler,
  518. .dnat_handler = tcp_dnat_handler,
  519. .csum_check = tcp_csum_check,
  520. .state_name = tcp_state_name,
  521. .state_transition = tcp_state_transition,
  522. .app_conn_bind = tcp_app_conn_bind,
  523. .debug_packet = ip_vs_tcpudp_debug_packet,
  524. .timeout_change = tcp_timeout_change,
  525. .set_state_timeout = tcp_set_state_timeout,
  526. };