|
@@ -35,12 +35,6 @@
|
|
|
#define SYNC_INIT 1
|
|
|
#endif
|
|
|
|
|
|
-/* New-style handling of TIME_WAIT sockets. */
|
|
|
-
|
|
|
-static void inet_twdr_hangman(unsigned long data);
|
|
|
-static void inet_twdr_twkill_work(void *data);
|
|
|
-static void inet_twdr_twcal_tick(unsigned long data);
|
|
|
-
|
|
|
int sysctl_tcp_syncookies = SYNC_INIT;
|
|
|
int sysctl_tcp_abort_on_overflow;
|
|
|
|
|
@@ -63,10 +57,6 @@ struct inet_timewait_death_row tcp_death_row = {
|
|
|
|
|
|
EXPORT_SYMBOL_GPL(tcp_death_row);
|
|
|
|
|
|
-static void inet_twsk_schedule(struct inet_timewait_sock *tw,
|
|
|
- struct inet_timewait_death_row *twdr,
|
|
|
- const int timeo);
|
|
|
-
|
|
|
static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
|
|
|
{
|
|
|
if (seq == s_win)
|
|
@@ -173,9 +163,11 @@ kill_with_rst:
|
|
|
if (tw->tw_family == AF_INET &&
|
|
|
tcp_death_row.sysctl_tw_recycle && tcptw->tw_ts_recent_stamp &&
|
|
|
tcp_v4_tw_remember_stamp(tw))
|
|
|
- inet_twsk_schedule(tw, &tcp_death_row, tw->tw_timeout);
|
|
|
+ inet_twsk_schedule(tw, &tcp_death_row, tw->tw_timeout,
|
|
|
+ TCP_TIMEWAIT_LEN);
|
|
|
else
|
|
|
- inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN);
|
|
|
+ inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN,
|
|
|
+ TCP_TIMEWAIT_LEN);
|
|
|
return TCP_TW_ACK;
|
|
|
}
|
|
|
|
|
@@ -213,7 +205,8 @@ kill:
|
|
|
return TCP_TW_SUCCESS;
|
|
|
}
|
|
|
}
|
|
|
- inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN);
|
|
|
+ inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN,
|
|
|
+ TCP_TIMEWAIT_LEN);
|
|
|
|
|
|
if (tmp_opt.saw_tstamp) {
|
|
|
tcptw->tw_ts_recent = tmp_opt.rcv_tsval;
|
|
@@ -263,7 +256,8 @@ kill:
|
|
|
* Do not reschedule in the last case.
|
|
|
*/
|
|
|
if (paws_reject || th->ack)
|
|
|
- inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN);
|
|
|
+ inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN,
|
|
|
+ TCP_TIMEWAIT_LEN);
|
|
|
|
|
|
/* Send ACK. Note, we do not put the bucket,
|
|
|
* it will be released by caller.
|
|
@@ -326,7 +320,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
|
|
|
timeo = TCP_TIMEWAIT_LEN;
|
|
|
}
|
|
|
|
|
|
- inet_twsk_schedule(tw, &tcp_death_row, timeo);
|
|
|
+ inet_twsk_schedule(tw, &tcp_death_row, timeo,
|
|
|
+ TCP_TIMEWAIT_LEN);
|
|
|
inet_twsk_put(tw);
|
|
|
} else {
|
|
|
/* Sorry, if we're out of memory, just CLOSE this
|
|
@@ -341,261 +336,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
|
|
|
tcp_done(sk);
|
|
|
}
|
|
|
|
|
|
-/* Returns non-zero if quota exceeded. */
|
|
|
-static int inet_twdr_do_twkill_work(struct inet_timewait_death_row *twdr,
|
|
|
- const int slot)
|
|
|
-{
|
|
|
- struct inet_timewait_sock *tw;
|
|
|
- struct hlist_node *node;
|
|
|
- unsigned int killed;
|
|
|
- int ret;
|
|
|
-
|
|
|
- /* NOTE: compare this to previous version where lock
|
|
|
- * was released after detaching chain. It was racy,
|
|
|
- * because tw buckets are scheduled in not serialized context
|
|
|
- * in 2.3 (with netfilter), and with softnet it is common, because
|
|
|
- * soft irqs are not sequenced.
|
|
|
- */
|
|
|
- killed = 0;
|
|
|
- ret = 0;
|
|
|
-rescan:
|
|
|
- inet_twsk_for_each_inmate(tw, node, &twdr->cells[slot]) {
|
|
|
- __inet_twsk_del_dead_node(tw);
|
|
|
- spin_unlock(&twdr->death_lock);
|
|
|
- __inet_twsk_kill(tw, twdr->hashinfo);
|
|
|
- inet_twsk_put(tw);
|
|
|
- killed++;
|
|
|
- spin_lock(&twdr->death_lock);
|
|
|
- if (killed > INET_TWDR_TWKILL_QUOTA) {
|
|
|
- ret = 1;
|
|
|
- break;
|
|
|
- }
|
|
|
-
|
|
|
- /* While we dropped twdr->death_lock, another cpu may have
|
|
|
- * killed off the next TW bucket in the list, therefore
|
|
|
- * do a fresh re-read of the hlist head node with the
|
|
|
- * lock reacquired. We still use the hlist traversal
|
|
|
- * macro in order to get the prefetches.
|
|
|
- */
|
|
|
- goto rescan;
|
|
|
- }
|
|
|
-
|
|
|
- twdr->tw_count -= killed;
|
|
|
- NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITED, killed);
|
|
|
-
|
|
|
- return ret;
|
|
|
-}
|
|
|
-
|
|
|
-static void inet_twdr_hangman(unsigned long data)
|
|
|
-{
|
|
|
- struct inet_timewait_death_row *twdr;
|
|
|
- int unsigned need_timer;
|
|
|
-
|
|
|
- twdr = (struct inet_timewait_death_row *)data;
|
|
|
- spin_lock(&twdr->death_lock);
|
|
|
-
|
|
|
- if (twdr->tw_count == 0)
|
|
|
- goto out;
|
|
|
-
|
|
|
- need_timer = 0;
|
|
|
- if (inet_twdr_do_twkill_work(twdr, twdr->slot)) {
|
|
|
- twdr->thread_slots |= (1 << twdr->slot);
|
|
|
- mb();
|
|
|
- schedule_work(&twdr->twkill_work);
|
|
|
- need_timer = 1;
|
|
|
- } else {
|
|
|
- /* We purged the entire slot, anything left? */
|
|
|
- if (twdr->tw_count)
|
|
|
- need_timer = 1;
|
|
|
- }
|
|
|
- twdr->slot = ((twdr->slot + 1) & (INET_TWDR_TWKILL_SLOTS - 1));
|
|
|
- if (need_timer)
|
|
|
- mod_timer(&twdr->tw_timer, jiffies + twdr->period);
|
|
|
-out:
|
|
|
- spin_unlock(&twdr->death_lock);
|
|
|
-}
|
|
|
-
|
|
|
-extern void twkill_slots_invalid(void);
|
|
|
-
|
|
|
-static void inet_twdr_twkill_work(void *data)
|
|
|
-{
|
|
|
- struct inet_timewait_death_row *twdr = data;
|
|
|
- int i;
|
|
|
-
|
|
|
- if ((INET_TWDR_TWKILL_SLOTS - 1) > (sizeof(twdr->thread_slots) * 8))
|
|
|
- twkill_slots_invalid();
|
|
|
-
|
|
|
- while (twdr->thread_slots) {
|
|
|
- spin_lock_bh(&twdr->death_lock);
|
|
|
- for (i = 0; i < INET_TWDR_TWKILL_SLOTS; i++) {
|
|
|
- if (!(twdr->thread_slots & (1 << i)))
|
|
|
- continue;
|
|
|
-
|
|
|
- while (inet_twdr_do_twkill_work(twdr, i) != 0) {
|
|
|
- if (need_resched()) {
|
|
|
- spin_unlock_bh(&twdr->death_lock);
|
|
|
- schedule();
|
|
|
- spin_lock_bh(&twdr->death_lock);
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- twdr->thread_slots &= ~(1 << i);
|
|
|
- }
|
|
|
- spin_unlock_bh(&twdr->death_lock);
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
-/* These are always called from BH context. See callers in
|
|
|
- * tcp_input.c to verify this.
|
|
|
- */
|
|
|
-
|
|
|
-/* This is for handling early-kills of TIME_WAIT sockets. */
|
|
|
-void inet_twsk_deschedule(struct inet_timewait_sock *tw,
|
|
|
- struct inet_timewait_death_row *twdr)
|
|
|
-{
|
|
|
- spin_lock(&twdr->death_lock);
|
|
|
- if (inet_twsk_del_dead_node(tw)) {
|
|
|
- inet_twsk_put(tw);
|
|
|
- if (--twdr->tw_count == 0)
|
|
|
- del_timer(&twdr->tw_timer);
|
|
|
- }
|
|
|
- spin_unlock(&twdr->death_lock);
|
|
|
- __inet_twsk_kill(tw, twdr->hashinfo);
|
|
|
-}
|
|
|
-
|
|
|
-static void inet_twsk_schedule(struct inet_timewait_sock *tw,
|
|
|
- struct inet_timewait_death_row *twdr,
|
|
|
- const int timeo)
|
|
|
-{
|
|
|
- struct hlist_head *list;
|
|
|
- int slot;
|
|
|
-
|
|
|
- /* timeout := RTO * 3.5
|
|
|
- *
|
|
|
- * 3.5 = 1+2+0.5 to wait for two retransmits.
|
|
|
- *
|
|
|
- * RATIONALE: if FIN arrived and we entered TIME-WAIT state,
|
|
|
- * our ACK acking that FIN can be lost. If N subsequent retransmitted
|
|
|
- * FINs (or previous seqments) are lost (probability of such event
|
|
|
- * is p^(N+1), where p is probability to lose single packet and
|
|
|
- * time to detect the loss is about RTO*(2^N - 1) with exponential
|
|
|
- * backoff). Normal timewait length is calculated so, that we
|
|
|
- * waited at least for one retransmitted FIN (maximal RTO is 120sec).
|
|
|
- * [ BTW Linux. following BSD, violates this requirement waiting
|
|
|
- * only for 60sec, we should wait at least for 240 secs.
|
|
|
- * Well, 240 consumes too much of resources 8)
|
|
|
- * ]
|
|
|
- * This interval is not reduced to catch old duplicate and
|
|
|
- * responces to our wandering segments living for two MSLs.
|
|
|
- * However, if we use PAWS to detect
|
|
|
- * old duplicates, we can reduce the interval to bounds required
|
|
|
- * by RTO, rather than MSL. So, if peer understands PAWS, we
|
|
|
- * kill tw bucket after 3.5*RTO (it is important that this number
|
|
|
- * is greater than TS tick!) and detect old duplicates with help
|
|
|
- * of PAWS.
|
|
|
- */
|
|
|
- slot = (timeo + (1 << INET_TWDR_RECYCLE_TICK) - 1) >> INET_TWDR_RECYCLE_TICK;
|
|
|
-
|
|
|
- spin_lock(&twdr->death_lock);
|
|
|
-
|
|
|
- /* Unlink it, if it was scheduled */
|
|
|
- if (inet_twsk_del_dead_node(tw))
|
|
|
- twdr->tw_count--;
|
|
|
- else
|
|
|
- atomic_inc(&tw->tw_refcnt);
|
|
|
-
|
|
|
- if (slot >= INET_TWDR_RECYCLE_SLOTS) {
|
|
|
- /* Schedule to slow timer */
|
|
|
- if (timeo >= TCP_TIMEWAIT_LEN) {
|
|
|
- slot = INET_TWDR_TWKILL_SLOTS - 1;
|
|
|
- } else {
|
|
|
- slot = (timeo + twdr->period - 1) / twdr->period;
|
|
|
- if (slot >= INET_TWDR_TWKILL_SLOTS)
|
|
|
- slot = INET_TWDR_TWKILL_SLOTS - 1;
|
|
|
- }
|
|
|
- tw->tw_ttd = jiffies + timeo;
|
|
|
- slot = (twdr->slot + slot) & (INET_TWDR_TWKILL_SLOTS - 1);
|
|
|
- list = &twdr->cells[slot];
|
|
|
- } else {
|
|
|
- tw->tw_ttd = jiffies + (slot << INET_TWDR_RECYCLE_TICK);
|
|
|
-
|
|
|
- if (twdr->twcal_hand < 0) {
|
|
|
- twdr->twcal_hand = 0;
|
|
|
- twdr->twcal_jiffie = jiffies;
|
|
|
- twdr->twcal_timer.expires = twdr->twcal_jiffie +
|
|
|
- (slot << INET_TWDR_RECYCLE_TICK);
|
|
|
- add_timer(&twdr->twcal_timer);
|
|
|
- } else {
|
|
|
- if (time_after(twdr->twcal_timer.expires,
|
|
|
- jiffies + (slot << INET_TWDR_RECYCLE_TICK)))
|
|
|
- mod_timer(&twdr->twcal_timer,
|
|
|
- jiffies + (slot << INET_TWDR_RECYCLE_TICK));
|
|
|
- slot = (twdr->twcal_hand + slot) & (INET_TWDR_RECYCLE_SLOTS - 1);
|
|
|
- }
|
|
|
- list = &twdr->twcal_row[slot];
|
|
|
- }
|
|
|
-
|
|
|
- hlist_add_head(&tw->tw_death_node, list);
|
|
|
-
|
|
|
- if (twdr->tw_count++ == 0)
|
|
|
- mod_timer(&twdr->tw_timer, jiffies + twdr->period);
|
|
|
- spin_unlock(&twdr->death_lock);
|
|
|
-}
|
|
|
-
|
|
|
-void inet_twdr_twcal_tick(unsigned long data)
|
|
|
-{
|
|
|
- struct inet_timewait_death_row *twdr;
|
|
|
- int n, slot;
|
|
|
- unsigned long j;
|
|
|
- unsigned long now = jiffies;
|
|
|
- int killed = 0;
|
|
|
- int adv = 0;
|
|
|
-
|
|
|
- twdr = (struct inet_timewait_death_row *)data;
|
|
|
-
|
|
|
- spin_lock(&twdr->death_lock);
|
|
|
- if (twdr->twcal_hand < 0)
|
|
|
- goto out;
|
|
|
-
|
|
|
- slot = twdr->twcal_hand;
|
|
|
- j = twdr->twcal_jiffie;
|
|
|
-
|
|
|
- for (n = 0; n < INET_TWDR_RECYCLE_SLOTS; n++) {
|
|
|
- if (time_before_eq(j, now)) {
|
|
|
- struct hlist_node *node, *safe;
|
|
|
- struct inet_timewait_sock *tw;
|
|
|
-
|
|
|
- inet_twsk_for_each_inmate_safe(tw, node, safe,
|
|
|
- &twdr->twcal_row[slot]) {
|
|
|
- __inet_twsk_del_dead_node(tw);
|
|
|
- __inet_twsk_kill(tw, twdr->hashinfo);
|
|
|
- inet_twsk_put(tw);
|
|
|
- killed++;
|
|
|
- }
|
|
|
- } else {
|
|
|
- if (!adv) {
|
|
|
- adv = 1;
|
|
|
- twdr->twcal_jiffie = j;
|
|
|
- twdr->twcal_hand = slot;
|
|
|
- }
|
|
|
-
|
|
|
- if (!hlist_empty(&twdr->twcal_row[slot])) {
|
|
|
- mod_timer(&twdr->twcal_timer, j);
|
|
|
- goto out;
|
|
|
- }
|
|
|
- }
|
|
|
- j += 1 << INET_TWDR_RECYCLE_TICK;
|
|
|
- slot = (slot + 1) & (INET_TWDR_RECYCLE_SLOTS - 1);
|
|
|
- }
|
|
|
- twdr->twcal_hand = -1;
|
|
|
-
|
|
|
-out:
|
|
|
- if ((twdr->tw_count -= killed) == 0)
|
|
|
- del_timer(&twdr->tw_timer);
|
|
|
- NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITKILLED, killed);
|
|
|
- spin_unlock(&twdr->death_lock);
|
|
|
-}
|
|
|
-
|
|
|
/* This is not only more efficient than what we used to do, it eliminates
|
|
|
* a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM
|
|
|
*
|
|
@@ -933,4 +673,3 @@ EXPORT_SYMBOL(tcp_check_req);
|
|
|
EXPORT_SYMBOL(tcp_child_process);
|
|
|
EXPORT_SYMBOL(tcp_create_openreq_child);
|
|
|
EXPORT_SYMBOL(tcp_timewait_state_process);
|
|
|
-EXPORT_SYMBOL(inet_twsk_deschedule);
|