ip_vs_proto_tcp.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618
  1. /*
  2. * ip_vs_proto_tcp.c: TCP load balancing support for IPVS
  3. *
  4. * Version: $Id: ip_vs_proto_tcp.c,v 1.3 2002/11/30 01:50:35 wensong Exp $
  5. *
  6. * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
  7. * Julian Anastasov <ja@ssi.bg>
  8. *
  9. * This program is free software; you can redistribute it and/or
  10. * modify it under the terms of the GNU General Public License
  11. * as published by the Free Software Foundation; either version
  12. * 2 of the License, or (at your option) any later version.
  13. *
  14. * Changes:
  15. *
  16. */
  17. #include <linux/kernel.h>
  18. #include <linux/ip.h>
  19. #include <linux/tcp.h> /* for tcphdr */
  20. #include <net/ip.h>
  21. #include <net/tcp.h> /* for csum_tcpudp_magic */
  22. #include <linux/netfilter_ipv4.h>
  23. #include <net/ip_vs.h>
  24. static struct ip_vs_conn *
  25. tcp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
  26. const struct iphdr *iph, unsigned int proto_off, int inverse)
  27. {
  28. __be16 _ports[2], *pptr;
  29. pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
  30. if (pptr == NULL)
  31. return NULL;
  32. if (likely(!inverse)) {
  33. return ip_vs_conn_in_get(iph->protocol,
  34. iph->saddr, pptr[0],
  35. iph->daddr, pptr[1]);
  36. } else {
  37. return ip_vs_conn_in_get(iph->protocol,
  38. iph->daddr, pptr[1],
  39. iph->saddr, pptr[0]);
  40. }
  41. }
  42. static struct ip_vs_conn *
  43. tcp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
  44. const struct iphdr *iph, unsigned int proto_off, int inverse)
  45. {
  46. __be16 _ports[2], *pptr;
  47. pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
  48. if (pptr == NULL)
  49. return NULL;
  50. if (likely(!inverse)) {
  51. return ip_vs_conn_out_get(iph->protocol,
  52. iph->saddr, pptr[0],
  53. iph->daddr, pptr[1]);
  54. } else {
  55. return ip_vs_conn_out_get(iph->protocol,
  56. iph->daddr, pptr[1],
  57. iph->saddr, pptr[0]);
  58. }
  59. }
  60. static int
  61. tcp_conn_schedule(struct sk_buff *skb,
  62. struct ip_vs_protocol *pp,
  63. int *verdict, struct ip_vs_conn **cpp)
  64. {
  65. struct ip_vs_service *svc;
  66. struct tcphdr _tcph, *th;
  67. th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
  68. if (th == NULL) {
  69. *verdict = NF_DROP;
  70. return 0;
  71. }
  72. if (th->syn &&
  73. (svc = ip_vs_service_get(skb->mark, ip_hdr(skb)->protocol,
  74. ip_hdr(skb)->daddr, th->dest))) {
  75. if (ip_vs_todrop()) {
  76. /*
  77. * It seems that we are very loaded.
  78. * We have to drop this packet :(
  79. */
  80. ip_vs_service_put(svc);
  81. *verdict = NF_DROP;
  82. return 0;
  83. }
  84. /*
  85. * Let the virtual server select a real server for the
  86. * incoming connection, and create a connection entry.
  87. */
  88. *cpp = ip_vs_schedule(svc, skb);
  89. if (!*cpp) {
  90. *verdict = ip_vs_leave(svc, skb, pp);
  91. return 0;
  92. }
  93. ip_vs_service_put(svc);
  94. }
  95. return 1;
  96. }
  97. static inline void
  98. tcp_fast_csum_update(struct tcphdr *tcph, __be32 oldip, __be32 newip,
  99. __be16 oldport, __be16 newport)
  100. {
  101. tcph->check =
  102. csum_fold(ip_vs_check_diff4(oldip, newip,
  103. ip_vs_check_diff2(oldport, newport,
  104. ~csum_unfold(tcph->check))));
  105. }
  106. static int
  107. tcp_snat_handler(struct sk_buff **pskb,
  108. struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
  109. {
  110. struct tcphdr *tcph;
  111. const unsigned int tcphoff = ip_hdrlen(*pskb);
  112. /* csum_check requires unshared skb */
  113. if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph)))
  114. return 0;
  115. if (unlikely(cp->app != NULL)) {
  116. /* Some checks before mangling */
  117. if (pp->csum_check && !pp->csum_check(*pskb, pp))
  118. return 0;
  119. /* Call application helper if needed */
  120. if (!ip_vs_app_pkt_out(cp, pskb))
  121. return 0;
  122. }
  123. tcph = (void *)ip_hdr(*pskb) + tcphoff;
  124. tcph->source = cp->vport;
  125. /* Adjust TCP checksums */
  126. if (!cp->app) {
  127. /* Only port and addr are changed, do fast csum update */
  128. tcp_fast_csum_update(tcph, cp->daddr, cp->vaddr,
  129. cp->dport, cp->vport);
  130. if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
  131. (*pskb)->ip_summed = CHECKSUM_NONE;
  132. } else {
  133. /* full checksum calculation */
  134. tcph->check = 0;
  135. (*pskb)->csum = skb_checksum(*pskb, tcphoff,
  136. (*pskb)->len - tcphoff, 0);
  137. tcph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr,
  138. (*pskb)->len - tcphoff,
  139. cp->protocol,
  140. (*pskb)->csum);
  141. IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
  142. pp->name, tcph->check,
  143. (char*)&(tcph->check) - (char*)tcph);
  144. }
  145. return 1;
  146. }
  147. static int
  148. tcp_dnat_handler(struct sk_buff **pskb,
  149. struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
  150. {
  151. struct tcphdr *tcph;
  152. const unsigned int tcphoff = ip_hdrlen(*pskb);
  153. /* csum_check requires unshared skb */
  154. if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph)))
  155. return 0;
  156. if (unlikely(cp->app != NULL)) {
  157. /* Some checks before mangling */
  158. if (pp->csum_check && !pp->csum_check(*pskb, pp))
  159. return 0;
  160. /*
  161. * Attempt ip_vs_app call.
  162. * It will fix ip_vs_conn and iph ack_seq stuff
  163. */
  164. if (!ip_vs_app_pkt_in(cp, pskb))
  165. return 0;
  166. }
  167. tcph = (void *)ip_hdr(*pskb) + tcphoff;
  168. tcph->dest = cp->dport;
  169. /*
  170. * Adjust TCP checksums
  171. */
  172. if (!cp->app) {
  173. /* Only port and addr are changed, do fast csum update */
  174. tcp_fast_csum_update(tcph, cp->vaddr, cp->daddr,
  175. cp->vport, cp->dport);
  176. if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
  177. (*pskb)->ip_summed = CHECKSUM_NONE;
  178. } else {
  179. /* full checksum calculation */
  180. tcph->check = 0;
  181. (*pskb)->csum = skb_checksum(*pskb, tcphoff,
  182. (*pskb)->len - tcphoff, 0);
  183. tcph->check = csum_tcpudp_magic(cp->caddr, cp->daddr,
  184. (*pskb)->len - tcphoff,
  185. cp->protocol,
  186. (*pskb)->csum);
  187. (*pskb)->ip_summed = CHECKSUM_UNNECESSARY;
  188. }
  189. return 1;
  190. }
  191. static int
  192. tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
  193. {
  194. const unsigned int tcphoff = ip_hdrlen(skb);
  195. switch (skb->ip_summed) {
  196. case CHECKSUM_NONE:
  197. skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
  198. case CHECKSUM_COMPLETE:
  199. if (csum_tcpudp_magic(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
  200. skb->len - tcphoff,
  201. ip_hdr(skb)->protocol, skb->csum)) {
  202. IP_VS_DBG_RL_PKT(0, pp, skb, 0,
  203. "Failed checksum for");
  204. return 0;
  205. }
  206. break;
  207. default:
  208. /* No need to checksum. */
  209. break;
  210. }
  211. return 1;
  212. }
  213. #define TCP_DIR_INPUT 0
  214. #define TCP_DIR_OUTPUT 4
  215. #define TCP_DIR_INPUT_ONLY 8
  216. static const int tcp_state_off[IP_VS_DIR_LAST] = {
  217. [IP_VS_DIR_INPUT] = TCP_DIR_INPUT,
  218. [IP_VS_DIR_OUTPUT] = TCP_DIR_OUTPUT,
  219. [IP_VS_DIR_INPUT_ONLY] = TCP_DIR_INPUT_ONLY,
  220. };
  221. /*
  222. * Timeout table[state]
  223. */
  224. static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
  225. [IP_VS_TCP_S_NONE] = 2*HZ,
  226. [IP_VS_TCP_S_ESTABLISHED] = 15*60*HZ,
  227. [IP_VS_TCP_S_SYN_SENT] = 2*60*HZ,
  228. [IP_VS_TCP_S_SYN_RECV] = 1*60*HZ,
  229. [IP_VS_TCP_S_FIN_WAIT] = 2*60*HZ,
  230. [IP_VS_TCP_S_TIME_WAIT] = 2*60*HZ,
  231. [IP_VS_TCP_S_CLOSE] = 10*HZ,
  232. [IP_VS_TCP_S_CLOSE_WAIT] = 60*HZ,
  233. [IP_VS_TCP_S_LAST_ACK] = 30*HZ,
  234. [IP_VS_TCP_S_LISTEN] = 2*60*HZ,
  235. [IP_VS_TCP_S_SYNACK] = 120*HZ,
  236. [IP_VS_TCP_S_LAST] = 2*HZ,
  237. };
  238. static char * tcp_state_name_table[IP_VS_TCP_S_LAST+1] = {
  239. [IP_VS_TCP_S_NONE] = "NONE",
  240. [IP_VS_TCP_S_ESTABLISHED] = "ESTABLISHED",
  241. [IP_VS_TCP_S_SYN_SENT] = "SYN_SENT",
  242. [IP_VS_TCP_S_SYN_RECV] = "SYN_RECV",
  243. [IP_VS_TCP_S_FIN_WAIT] = "FIN_WAIT",
  244. [IP_VS_TCP_S_TIME_WAIT] = "TIME_WAIT",
  245. [IP_VS_TCP_S_CLOSE] = "CLOSE",
  246. [IP_VS_TCP_S_CLOSE_WAIT] = "CLOSE_WAIT",
  247. [IP_VS_TCP_S_LAST_ACK] = "LAST_ACK",
  248. [IP_VS_TCP_S_LISTEN] = "LISTEN",
  249. [IP_VS_TCP_S_SYNACK] = "SYNACK",
  250. [IP_VS_TCP_S_LAST] = "BUG!",
  251. };
  252. #define sNO IP_VS_TCP_S_NONE
  253. #define sES IP_VS_TCP_S_ESTABLISHED
  254. #define sSS IP_VS_TCP_S_SYN_SENT
  255. #define sSR IP_VS_TCP_S_SYN_RECV
  256. #define sFW IP_VS_TCP_S_FIN_WAIT
  257. #define sTW IP_VS_TCP_S_TIME_WAIT
  258. #define sCL IP_VS_TCP_S_CLOSE
  259. #define sCW IP_VS_TCP_S_CLOSE_WAIT
  260. #define sLA IP_VS_TCP_S_LAST_ACK
  261. #define sLI IP_VS_TCP_S_LISTEN
  262. #define sSA IP_VS_TCP_S_SYNACK
  263. struct tcp_states_t {
  264. int next_state[IP_VS_TCP_S_LAST];
  265. };
  266. static const char * tcp_state_name(int state)
  267. {
  268. if (state >= IP_VS_TCP_S_LAST)
  269. return "ERR!";
  270. return tcp_state_name_table[state] ? tcp_state_name_table[state] : "?";
  271. }
  272. static struct tcp_states_t tcp_states [] = {
  273. /* INPUT */
  274. /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
  275. /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
  276. /*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sTW }},
  277. /*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
  278. /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sSR }},
  279. /* OUTPUT */
  280. /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
  281. /*syn*/ {{sSS, sES, sSS, sSR, sSS, sSS, sSS, sSS, sSS, sLI, sSR }},
  282. /*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},
  283. /*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},
  284. /*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},
  285. /* INPUT-ONLY */
  286. /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
  287. /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
  288. /*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
  289. /*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
  290. /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
  291. };
  292. static struct tcp_states_t tcp_states_dos [] = {
  293. /* INPUT */
  294. /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
  295. /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSA }},
  296. /*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sSA }},
  297. /*ack*/ {{sCL, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI, sSA }},
  298. /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
  299. /* OUTPUT */
  300. /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
  301. /*syn*/ {{sSS, sES, sSS, sSA, sSS, sSS, sSS, sSS, sSS, sLI, sSA }},
  302. /*fin*/ {{sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI, sTW }},
  303. /*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES, sES }},
  304. /*rst*/ {{sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL, sCL }},
  305. /* INPUT-ONLY */
  306. /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
  307. /*syn*/ {{sSA, sES, sES, sSR, sSA, sSA, sSA, sSA, sSA, sSA, sSA }},
  308. /*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }},
  309. /*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }},
  310. /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
  311. };
  312. static struct tcp_states_t *tcp_state_table = tcp_states;
  313. static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags)
  314. {
  315. int on = (flags & 1); /* secure_tcp */
  316. /*
  317. ** FIXME: change secure_tcp to independent sysctl var
  318. ** or make it per-service or per-app because it is valid
  319. ** for most if not for all of the applications. Something
  320. ** like "capabilities" (flags) for each object.
  321. */
  322. tcp_state_table = (on? tcp_states_dos : tcp_states);
  323. }
  324. static int
  325. tcp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
  326. {
  327. return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_TCP_S_LAST,
  328. tcp_state_name_table, sname, to);
  329. }
  330. static inline int tcp_state_idx(struct tcphdr *th)
  331. {
  332. if (th->rst)
  333. return 3;
  334. if (th->syn)
  335. return 0;
  336. if (th->fin)
  337. return 1;
  338. if (th->ack)
  339. return 2;
  340. return -1;
  341. }
  342. static inline void
  343. set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
  344. int direction, struct tcphdr *th)
  345. {
  346. int state_idx;
  347. int new_state = IP_VS_TCP_S_CLOSE;
  348. int state_off = tcp_state_off[direction];
  349. /*
  350. * Update state offset to INPUT_ONLY if necessary
  351. * or delete NO_OUTPUT flag if output packet detected
  352. */
  353. if (cp->flags & IP_VS_CONN_F_NOOUTPUT) {
  354. if (state_off == TCP_DIR_OUTPUT)
  355. cp->flags &= ~IP_VS_CONN_F_NOOUTPUT;
  356. else
  357. state_off = TCP_DIR_INPUT_ONLY;
  358. }
  359. if ((state_idx = tcp_state_idx(th)) < 0) {
  360. IP_VS_DBG(8, "tcp_state_idx=%d!!!\n", state_idx);
  361. goto tcp_state_out;
  362. }
  363. new_state = tcp_state_table[state_off+state_idx].next_state[cp->state];
  364. tcp_state_out:
  365. if (new_state != cp->state) {
  366. struct ip_vs_dest *dest = cp->dest;
  367. IP_VS_DBG(8, "%s %s [%c%c%c%c] %u.%u.%u.%u:%d->"
  368. "%u.%u.%u.%u:%d state: %s->%s conn->refcnt:%d\n",
  369. pp->name,
  370. (state_off==TCP_DIR_OUTPUT)?"output ":"input ",
  371. th->syn? 'S' : '.',
  372. th->fin? 'F' : '.',
  373. th->ack? 'A' : '.',
  374. th->rst? 'R' : '.',
  375. NIPQUAD(cp->daddr), ntohs(cp->dport),
  376. NIPQUAD(cp->caddr), ntohs(cp->cport),
  377. tcp_state_name(cp->state),
  378. tcp_state_name(new_state),
  379. atomic_read(&cp->refcnt));
  380. if (dest) {
  381. if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
  382. (new_state != IP_VS_TCP_S_ESTABLISHED)) {
  383. atomic_dec(&dest->activeconns);
  384. atomic_inc(&dest->inactconns);
  385. cp->flags |= IP_VS_CONN_F_INACTIVE;
  386. } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
  387. (new_state == IP_VS_TCP_S_ESTABLISHED)) {
  388. atomic_inc(&dest->activeconns);
  389. atomic_dec(&dest->inactconns);
  390. cp->flags &= ~IP_VS_CONN_F_INACTIVE;
  391. }
  392. }
  393. }
  394. cp->timeout = pp->timeout_table[cp->state = new_state];
  395. }
  396. /*
  397. * Handle state transitions
  398. */
  399. static int
  400. tcp_state_transition(struct ip_vs_conn *cp, int direction,
  401. const struct sk_buff *skb,
  402. struct ip_vs_protocol *pp)
  403. {
  404. struct tcphdr _tcph, *th;
  405. th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
  406. if (th == NULL)
  407. return 0;
  408. spin_lock(&cp->lock);
  409. set_tcp_state(pp, cp, direction, th);
  410. spin_unlock(&cp->lock);
  411. return 1;
  412. }
  413. /*
  414. * Hash table for TCP application incarnations
  415. */
  416. #define TCP_APP_TAB_BITS 4
  417. #define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS)
  418. #define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1)
  419. static struct list_head tcp_apps[TCP_APP_TAB_SIZE];
  420. static DEFINE_SPINLOCK(tcp_app_lock);
  421. static inline __u16 tcp_app_hashkey(__be16 port)
  422. {
  423. return (((__force u16)port >> TCP_APP_TAB_BITS) ^ (__force u16)port)
  424. & TCP_APP_TAB_MASK;
  425. }
  426. static int tcp_register_app(struct ip_vs_app *inc)
  427. {
  428. struct ip_vs_app *i;
  429. __u16 hash;
  430. __be16 port = inc->port;
  431. int ret = 0;
  432. hash = tcp_app_hashkey(port);
  433. spin_lock_bh(&tcp_app_lock);
  434. list_for_each_entry(i, &tcp_apps[hash], p_list) {
  435. if (i->port == port) {
  436. ret = -EEXIST;
  437. goto out;
  438. }
  439. }
  440. list_add(&inc->p_list, &tcp_apps[hash]);
  441. atomic_inc(&ip_vs_protocol_tcp.appcnt);
  442. out:
  443. spin_unlock_bh(&tcp_app_lock);
  444. return ret;
  445. }
  446. static void
  447. tcp_unregister_app(struct ip_vs_app *inc)
  448. {
  449. spin_lock_bh(&tcp_app_lock);
  450. atomic_dec(&ip_vs_protocol_tcp.appcnt);
  451. list_del(&inc->p_list);
  452. spin_unlock_bh(&tcp_app_lock);
  453. }
  454. static int
  455. tcp_app_conn_bind(struct ip_vs_conn *cp)
  456. {
  457. int hash;
  458. struct ip_vs_app *inc;
  459. int result = 0;
  460. /* Default binding: bind app only for NAT */
  461. if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
  462. return 0;
  463. /* Lookup application incarnations and bind the right one */
  464. hash = tcp_app_hashkey(cp->vport);
  465. spin_lock(&tcp_app_lock);
  466. list_for_each_entry(inc, &tcp_apps[hash], p_list) {
  467. if (inc->port == cp->vport) {
  468. if (unlikely(!ip_vs_app_inc_get(inc)))
  469. break;
  470. spin_unlock(&tcp_app_lock);
  471. IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->"
  472. "%u.%u.%u.%u:%u to app %s on port %u\n",
  473. __FUNCTION__,
  474. NIPQUAD(cp->caddr), ntohs(cp->cport),
  475. NIPQUAD(cp->vaddr), ntohs(cp->vport),
  476. inc->name, ntohs(inc->port));
  477. cp->app = inc;
  478. if (inc->init_conn)
  479. result = inc->init_conn(inc, cp);
  480. goto out;
  481. }
  482. }
  483. spin_unlock(&tcp_app_lock);
  484. out:
  485. return result;
  486. }
  487. /*
  488. * Set LISTEN timeout. (ip_vs_conn_put will setup timer)
  489. */
  490. void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp)
  491. {
  492. spin_lock(&cp->lock);
  493. cp->state = IP_VS_TCP_S_LISTEN;
  494. cp->timeout = ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_LISTEN];
  495. spin_unlock(&cp->lock);
  496. }
  497. static void ip_vs_tcp_init(struct ip_vs_protocol *pp)
  498. {
  499. IP_VS_INIT_HASH_TABLE(tcp_apps);
  500. pp->timeout_table = tcp_timeouts;
  501. }
  502. static void ip_vs_tcp_exit(struct ip_vs_protocol *pp)
  503. {
  504. }
  505. struct ip_vs_protocol ip_vs_protocol_tcp = {
  506. .name = "TCP",
  507. .protocol = IPPROTO_TCP,
  508. .dont_defrag = 0,
  509. .appcnt = ATOMIC_INIT(0),
  510. .init = ip_vs_tcp_init,
  511. .exit = ip_vs_tcp_exit,
  512. .register_app = tcp_register_app,
  513. .unregister_app = tcp_unregister_app,
  514. .conn_schedule = tcp_conn_schedule,
  515. .conn_in_get = tcp_conn_in_get,
  516. .conn_out_get = tcp_conn_out_get,
  517. .snat_handler = tcp_snat_handler,
  518. .dnat_handler = tcp_dnat_handler,
  519. .csum_check = tcp_csum_check,
  520. .state_name = tcp_state_name,
  521. .state_transition = tcp_state_transition,
  522. .app_conn_bind = tcp_app_conn_bind,
  523. .debug_packet = ip_vs_tcpudp_debug_packet,
  524. .timeout_change = tcp_timeout_change,
  525. .set_state_timeout = tcp_set_state_timeout,
  526. };