Browse Source

Merge branch 'master' of git://1984.lsi.us.es/net-next

David S. Miller 13 years ago
parent
commit
9bb862beb6

+ 9 - 0
Documentation/ABI/removed/ip_queue

@@ -0,0 +1,9 @@
+What:		ip_queue
+Date:		finally removed in kernel v3.5.0
+Contact:	Pablo Neira Ayuso <pablo@netfilter.org>
+Description:
+	ip_queue has been replaced by nfnetlink_queue which provides
+	more advanced queueing mechanism to user-space. The ip_queue
+	module was already announced to become obsolete years ago.
+
+Users:

+ 11 - 2
Documentation/networking/ip-sysctl.txt

@@ -1301,13 +1301,22 @@ bridge-nf-call-ip6tables - BOOLEAN
 bridge-nf-filter-vlan-tagged - BOOLEAN
 bridge-nf-filter-vlan-tagged - BOOLEAN
 	1 : pass bridged vlan-tagged ARP/IP/IPv6 traffic to {arp,ip,ip6}tables.
 	1 : pass bridged vlan-tagged ARP/IP/IPv6 traffic to {arp,ip,ip6}tables.
 	0 : disable this.
 	0 : disable this.
-	Default: 1
+	Default: 0
 
 
 bridge-nf-filter-pppoe-tagged - BOOLEAN
 bridge-nf-filter-pppoe-tagged - BOOLEAN
 	1 : pass bridged pppoe-tagged IP/IPv6 traffic to {ip,ip6}tables.
 	1 : pass bridged pppoe-tagged IP/IPv6 traffic to {ip,ip6}tables.
 	0 : disable this.
 	0 : disable this.
-	Default: 1
+	Default: 0
 
 
+bridge-nf-pass-vlan-input-dev - BOOLEAN
+	1: if bridge-nf-filter-vlan-tagged is enabled, try to find a vlan
+	interface on the bridge and set the netfilter input device to the vlan.
+	This allows use of e.g. "iptables -i br0.1" and makes the REDIRECT
+	target work with vlan-on-top-of-bridge interfaces.  When no matching
+	vlan interface is found, or this switch is off, the input device is
+	set to the bridge interface.
+	0: disable bridge netfilter vlan interface lookup.
+	Default: 0
 
 
 proc/sys/net/sctp/* Variables:
 proc/sys/net/sctp/* Variables:
 
 

+ 5 - 0
include/linux/ip_vs.h

@@ -89,6 +89,7 @@
 #define IP_VS_CONN_F_TEMPLATE	0x1000		/* template, not connection */
 #define IP_VS_CONN_F_TEMPLATE	0x1000		/* template, not connection */
 #define IP_VS_CONN_F_ONE_PACKET	0x2000		/* forward only one packet */
 #define IP_VS_CONN_F_ONE_PACKET	0x2000		/* forward only one packet */
 
 
+/* Initial bits allowed in backup server */
 #define IP_VS_CONN_F_BACKUP_MASK (IP_VS_CONN_F_FWD_MASK | \
 #define IP_VS_CONN_F_BACKUP_MASK (IP_VS_CONN_F_FWD_MASK | \
 				  IP_VS_CONN_F_NOOUTPUT | \
 				  IP_VS_CONN_F_NOOUTPUT | \
 				  IP_VS_CONN_F_INACTIVE | \
 				  IP_VS_CONN_F_INACTIVE | \
@@ -97,6 +98,10 @@
 				  IP_VS_CONN_F_TEMPLATE \
 				  IP_VS_CONN_F_TEMPLATE \
 				 )
 				 )
 
 
+/* Bits allowed to update in backup server */
+#define IP_VS_CONN_F_BACKUP_UPD_MASK (IP_VS_CONN_F_INACTIVE | \
+				      IP_VS_CONN_F_SEQ_MASK)
+
 /* Flags that are not sent to backup server start from bit 16 */
 /* Flags that are not sent to backup server start from bit 16 */
 #define IP_VS_CONN_F_NFCT	(1 << 16)	/* use netfilter conntrack */
 #define IP_VS_CONN_F_NFCT	(1 << 16)	/* use netfilter conntrack */
 
 

+ 4 - 0
include/linux/netfilter/nf_conntrack_common.h

@@ -83,6 +83,10 @@ enum ip_conntrack_status {
 	/* Conntrack is a fake untracked entry */
 	/* Conntrack is a fake untracked entry */
 	IPS_UNTRACKED_BIT = 12,
 	IPS_UNTRACKED_BIT = 12,
 	IPS_UNTRACKED = (1 << IPS_UNTRACKED_BIT),
 	IPS_UNTRACKED = (1 << IPS_UNTRACKED_BIT),
+
+	/* Conntrack got a helper explicitly attached via CT target. */
+	IPS_HELPER_BIT = 13,
+	IPS_HELPER = (1 << IPS_HELPER_BIT),
 };
 };
 
 
 /* Connection tracking event types */
 /* Connection tracking event types */

+ 0 - 1
include/linux/netfilter_ipv4/Kbuild

@@ -1,4 +1,3 @@
-header-y += ip_queue.h
 header-y += ip_tables.h
 header-y += ip_tables.h
 header-y += ipt_CLUSTERIP.h
 header-y += ipt_CLUSTERIP.h
 header-y += ipt_ECN.h
 header-y += ipt_ECN.h

+ 0 - 72
include/linux/netfilter_ipv4/ip_queue.h

@@ -1,72 +0,0 @@
-/*
- * This is a module which is used for queueing IPv4 packets and
- * communicating with userspace via netlink.
- *
- * (C) 2000 James Morris, this code is GPL.
- */
-#ifndef _IP_QUEUE_H
-#define _IP_QUEUE_H
-
-#ifdef __KERNEL__
-#ifdef DEBUG_IPQ
-#define QDEBUG(x...) printk(KERN_DEBUG ## x)
-#else
-#define QDEBUG(x...)
-#endif  /* DEBUG_IPQ */
-#else
-#include <net/if.h>
-#endif	/* ! __KERNEL__ */
-
-/* Messages sent from kernel */
-typedef struct ipq_packet_msg {
-	unsigned long packet_id;	/* ID of queued packet */
-	unsigned long mark;		/* Netfilter mark value */
-	long timestamp_sec;		/* Packet arrival time (seconds) */
-	long timestamp_usec;		/* Packet arrvial time (+useconds) */
-	unsigned int hook;		/* Netfilter hook we rode in on */
-	char indev_name[IFNAMSIZ];	/* Name of incoming interface */
-	char outdev_name[IFNAMSIZ];	/* Name of outgoing interface */
-	__be16 hw_protocol;		/* Hardware protocol (network order) */
-	unsigned short hw_type;		/* Hardware type */
-	unsigned char hw_addrlen;	/* Hardware address length */
-	unsigned char hw_addr[8];	/* Hardware address */
-	size_t data_len;		/* Length of packet data */
-	unsigned char payload[0];	/* Optional packet data */
-} ipq_packet_msg_t;
-
-/* Messages sent from userspace */
-typedef struct ipq_mode_msg {
-	unsigned char value;		/* Requested mode */
-	size_t range;			/* Optional range of packet requested */
-} ipq_mode_msg_t;
-
-typedef struct ipq_verdict_msg {
-	unsigned int value;		/* Verdict to hand to netfilter */
-	unsigned long id;		/* Packet ID for this verdict */
-	size_t data_len;		/* Length of replacement data */
-	unsigned char payload[0];	/* Optional replacement packet */
-} ipq_verdict_msg_t;
-
-typedef struct ipq_peer_msg {
-	union {
-		ipq_verdict_msg_t verdict;
-		ipq_mode_msg_t mode;
-	} msg;
-} ipq_peer_msg_t;
-
-/* Packet delivery modes */
-enum {
-	IPQ_COPY_NONE,		/* Initial mode, packets are dropped */
-	IPQ_COPY_META,		/* Copy metadata */
-	IPQ_COPY_PACKET		/* Copy metadata + packet (range) */
-};
-#define IPQ_COPY_MAX IPQ_COPY_PACKET
-
-/* Types of messages */
-#define IPQM_BASE	0x10	/* standard netlink messages below this */
-#define IPQM_MODE	(IPQM_BASE + 1)		/* Mode request from peer */
-#define IPQM_VERDICT	(IPQM_BASE + 2)		/* Verdict from peer */ 
-#define IPQM_PACKET	(IPQM_BASE + 3)		/* Packet from kernel */
-#define IPQM_MAX	(IPQM_BASE + 4)
-
-#endif /*_IP_QUEUE_H*/

+ 1 - 1
include/linux/netlink.h

@@ -7,7 +7,7 @@
 #define NETLINK_ROUTE		0	/* Routing/device hook				*/
 #define NETLINK_ROUTE		0	/* Routing/device hook				*/
 #define NETLINK_UNUSED		1	/* Unused number				*/
 #define NETLINK_UNUSED		1	/* Unused number				*/
 #define NETLINK_USERSOCK	2	/* Reserved for user mode socket protocols 	*/
 #define NETLINK_USERSOCK	2	/* Reserved for user mode socket protocols 	*/
-#define NETLINK_FIREWALL	3	/* Firewalling hook				*/
+#define NETLINK_FIREWALL	3	/* Unused number, formerly ip_queue		*/
 #define NETLINK_SOCK_DIAG	4	/* socket monitoring				*/
 #define NETLINK_SOCK_DIAG	4	/* socket monitoring				*/
 #define NETLINK_NFLOG		5	/* netfilter/iptables ULOG */
 #define NETLINK_NFLOG		5	/* netfilter/iptables ULOG */
 #define NETLINK_XFRM		6	/* ipsec */
 #define NETLINK_XFRM		6	/* ipsec */

+ 79 - 8
include/net/ip_vs.h

@@ -504,6 +504,7 @@ struct ip_vs_conn {
 						 * state transition triggerd
 						 * state transition triggerd
 						 * synchronization
 						 * synchronization
 						 */
 						 */
+	unsigned long		sync_endtime;	/* jiffies + sent_retries */
 
 
 	/* Control members */
 	/* Control members */
 	struct ip_vs_conn       *control;       /* Master control connection */
 	struct ip_vs_conn       *control;       /* Master control connection */
@@ -783,6 +784,16 @@ struct ip_vs_app {
 	void (*timeout_change)(struct ip_vs_app *app, int flags);
 	void (*timeout_change)(struct ip_vs_app *app, int flags);
 };
 };
 
 
+struct ipvs_master_sync_state {
+	struct list_head	sync_queue;
+	struct ip_vs_sync_buff	*sync_buff;
+	int			sync_queue_len;
+	unsigned int		sync_queue_delay;
+	struct task_struct	*master_thread;
+	struct delayed_work	master_wakeup_work;
+	struct netns_ipvs	*ipvs;
+};
+
 /* IPVS in network namespace */
 /* IPVS in network namespace */
 struct netns_ipvs {
 struct netns_ipvs {
 	int			gen;		/* Generation */
 	int			gen;		/* Generation */
@@ -869,10 +880,15 @@ struct netns_ipvs {
 #endif
 #endif
 	int			sysctl_snat_reroute;
 	int			sysctl_snat_reroute;
 	int			sysctl_sync_ver;
 	int			sysctl_sync_ver;
+	int			sysctl_sync_ports;
+	int			sysctl_sync_qlen_max;
+	int			sysctl_sync_sock_size;
 	int			sysctl_cache_bypass;
 	int			sysctl_cache_bypass;
 	int			sysctl_expire_nodest_conn;
 	int			sysctl_expire_nodest_conn;
 	int			sysctl_expire_quiescent_template;
 	int			sysctl_expire_quiescent_template;
 	int			sysctl_sync_threshold[2];
 	int			sysctl_sync_threshold[2];
+	unsigned int		sysctl_sync_refresh_period;
+	int			sysctl_sync_retries;
 	int			sysctl_nat_icmp_send;
 	int			sysctl_nat_icmp_send;
 
 
 	/* ip_vs_lblc */
 	/* ip_vs_lblc */
@@ -888,13 +904,11 @@ struct netns_ipvs {
 	spinlock_t		est_lock;
 	spinlock_t		est_lock;
 	struct timer_list	est_timer;	/* Estimation timer */
 	struct timer_list	est_timer;	/* Estimation timer */
 	/* ip_vs_sync */
 	/* ip_vs_sync */
-	struct list_head	sync_queue;
 	spinlock_t		sync_lock;
 	spinlock_t		sync_lock;
-	struct ip_vs_sync_buff  *sync_buff;
+	struct ipvs_master_sync_state *ms;
 	spinlock_t		sync_buff_lock;
 	spinlock_t		sync_buff_lock;
-	struct sockaddr_in	sync_mcast_addr;
-	struct task_struct	*master_thread;
-	struct task_struct	*backup_thread;
+	struct task_struct	**backup_threads;
+	int			threads_mask;
 	int			send_mesg_maxlen;
 	int			send_mesg_maxlen;
 	int			recv_mesg_maxlen;
 	int			recv_mesg_maxlen;
 	volatile int		sync_state;
 	volatile int		sync_state;
@@ -911,6 +925,14 @@ struct netns_ipvs {
 #define DEFAULT_SYNC_THRESHOLD	3
 #define DEFAULT_SYNC_THRESHOLD	3
 #define DEFAULT_SYNC_PERIOD	50
 #define DEFAULT_SYNC_PERIOD	50
 #define DEFAULT_SYNC_VER	1
 #define DEFAULT_SYNC_VER	1
+#define DEFAULT_SYNC_REFRESH_PERIOD	(0U * HZ)
+#define DEFAULT_SYNC_RETRIES		0
+#define IPVS_SYNC_WAKEUP_RATE	8
+#define IPVS_SYNC_QLEN_MAX	(IPVS_SYNC_WAKEUP_RATE * 4)
+#define IPVS_SYNC_SEND_DELAY	(HZ / 50)
+#define IPVS_SYNC_CHECK_PERIOD	HZ
+#define IPVS_SYNC_FLUSH_TIME	(HZ * 2)
+#define IPVS_SYNC_PORTS_MAX	(1 << 6)
 
 
 #ifdef CONFIG_SYSCTL
 #ifdef CONFIG_SYSCTL
 
 
@@ -921,7 +943,17 @@ static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
 
 
 static inline int sysctl_sync_period(struct netns_ipvs *ipvs)
 static inline int sysctl_sync_period(struct netns_ipvs *ipvs)
 {
 {
-	return ipvs->sysctl_sync_threshold[1];
+	return ACCESS_ONCE(ipvs->sysctl_sync_threshold[1]);
+}
+
+static inline unsigned int sysctl_sync_refresh_period(struct netns_ipvs *ipvs)
+{
+	return ACCESS_ONCE(ipvs->sysctl_sync_refresh_period);
+}
+
+static inline int sysctl_sync_retries(struct netns_ipvs *ipvs)
+{
+	return ipvs->sysctl_sync_retries;
 }
 }
 
 
 static inline int sysctl_sync_ver(struct netns_ipvs *ipvs)
 static inline int sysctl_sync_ver(struct netns_ipvs *ipvs)
@@ -929,6 +961,21 @@ static inline int sysctl_sync_ver(struct netns_ipvs *ipvs)
 	return ipvs->sysctl_sync_ver;
 	return ipvs->sysctl_sync_ver;
 }
 }
 
 
+static inline int sysctl_sync_ports(struct netns_ipvs *ipvs)
+{
+	return ACCESS_ONCE(ipvs->sysctl_sync_ports);
+}
+
+static inline int sysctl_sync_qlen_max(struct netns_ipvs *ipvs)
+{
+	return ipvs->sysctl_sync_qlen_max;
+}
+
+static inline int sysctl_sync_sock_size(struct netns_ipvs *ipvs)
+{
+	return ipvs->sysctl_sync_sock_size;
+}
+
 #else
 #else
 
 
 static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
 static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
@@ -941,11 +988,36 @@ static inline int sysctl_sync_period(struct netns_ipvs *ipvs)
 	return DEFAULT_SYNC_PERIOD;
 	return DEFAULT_SYNC_PERIOD;
 }
 }
 
 
+static inline unsigned int sysctl_sync_refresh_period(struct netns_ipvs *ipvs)
+{
+	return DEFAULT_SYNC_REFRESH_PERIOD;
+}
+
+static inline int sysctl_sync_retries(struct netns_ipvs *ipvs)
+{
+	return DEFAULT_SYNC_RETRIES & 3;
+}
+
 static inline int sysctl_sync_ver(struct netns_ipvs *ipvs)
 static inline int sysctl_sync_ver(struct netns_ipvs *ipvs)
 {
 {
 	return DEFAULT_SYNC_VER;
 	return DEFAULT_SYNC_VER;
 }
 }
 
 
+static inline int sysctl_sync_ports(struct netns_ipvs *ipvs)
+{
+	return 1;
+}
+
+static inline int sysctl_sync_qlen_max(struct netns_ipvs *ipvs)
+{
+	return IPVS_SYNC_QLEN_MAX;
+}
+
+static inline int sysctl_sync_sock_size(struct netns_ipvs *ipvs)
+{
+	return 0;
+}
+
 #endif
 #endif
 
 
 /*
 /*
@@ -1185,7 +1257,6 @@ extern void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg);
 extern struct ip_vs_stats ip_vs_stats;
 extern struct ip_vs_stats ip_vs_stats;
 extern int sysctl_ip_vs_sync_ver;
 extern int sysctl_ip_vs_sync_ver;
 
 
-extern void ip_vs_sync_switch_mode(struct net *net, int mode);
 extern struct ip_vs_service *
 extern struct ip_vs_service *
 ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
 ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
 		  const union nf_inet_addr *vaddr, __be16 vport);
 		  const union nf_inet_addr *vaddr, __be16 vport);
@@ -1219,7 +1290,7 @@ extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp);
 extern int start_sync_thread(struct net *net, int state, char *mcast_ifn,
 extern int start_sync_thread(struct net *net, int state, char *mcast_ifn,
 			     __u8 syncid);
 			     __u8 syncid);
 extern int stop_sync_thread(struct net *net, int state);
 extern int stop_sync_thread(struct net *net, int state);
-extern void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp);
+extern void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts);
 
 
 
 
 /*
 /*

+ 2 - 8
include/net/netfilter/nf_conntrack.h

@@ -321,14 +321,8 @@ extern unsigned int nf_conntrack_max;
 extern unsigned int nf_conntrack_hash_rnd;
 extern unsigned int nf_conntrack_hash_rnd;
 void init_nf_conntrack_hash_rnd(void);
 void init_nf_conntrack_hash_rnd(void);
 
 
-#define NF_CT_STAT_INC(net, count)	\
-	__this_cpu_inc((net)->ct.stat->count)
-#define NF_CT_STAT_INC_ATOMIC(net, count)		\
-do {							\
-	local_bh_disable();				\
-	__this_cpu_inc((net)->ct.stat->count);		\
-	local_bh_enable();				\
-} while (0)
+#define NF_CT_STAT_INC(net, count)	  __this_cpu_inc((net)->ct.stat->count)
+#define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count)
 
 
 #define MODULE_ALIAS_NFCT_HELPER(helper) \
 #define MODULE_ALIAS_NFCT_HELPER(helper) \
         MODULE_ALIAS("nfct-helper-" helper)
         MODULE_ALIAS("nfct-helper-" helper)

+ 2 - 2
include/net/netfilter/nf_conntrack_helper.h

@@ -60,8 +60,8 @@ static inline struct nf_conn_help *nfct_help(const struct nf_conn *ct)
 	return nf_ct_ext_find(ct, NF_CT_EXT_HELPER);
 	return nf_ct_ext_find(ct, NF_CT_EXT_HELPER);
 }
 }
 
 
-extern int nf_conntrack_helper_init(void);
-extern void nf_conntrack_helper_fini(void);
+extern int nf_conntrack_helper_init(struct net *net);
+extern void nf_conntrack_helper_fini(struct net *net);
 
 
 extern int nf_conntrack_broadcast_help(struct sk_buff *skb,
 extern int nf_conntrack_broadcast_help(struct sk_buff *skb,
 				       unsigned int protoff,
 				       unsigned int protoff,

+ 3 - 0
include/net/netns/conntrack.h

@@ -26,11 +26,14 @@ struct netns_ct {
 	int			sysctl_tstamp;
 	int			sysctl_tstamp;
 	int			sysctl_checksum;
 	int			sysctl_checksum;
 	unsigned int		sysctl_log_invalid; /* Log invalid packets */
 	unsigned int		sysctl_log_invalid; /* Log invalid packets */
+	int			sysctl_auto_assign_helper;
+	bool			auto_assign_helper_warned;
 #ifdef CONFIG_SYSCTL
 #ifdef CONFIG_SYSCTL
 	struct ctl_table_header	*sysctl_header;
 	struct ctl_table_header	*sysctl_header;
 	struct ctl_table_header	*acct_sysctl_header;
 	struct ctl_table_header	*acct_sysctl_header;
 	struct ctl_table_header	*tstamp_sysctl_header;
 	struct ctl_table_header	*tstamp_sysctl_header;
 	struct ctl_table_header	*event_sysctl_header;
 	struct ctl_table_header	*event_sysctl_header;
+	struct ctl_table_header	*helper_sysctl_header;
 #endif
 #endif
 	char			*slabname;
 	char			*slabname;
 };
 };

+ 24 - 2
net/bridge/br_netfilter.c

@@ -54,12 +54,14 @@ static int brnf_call_ip6tables __read_mostly = 1;
 static int brnf_call_arptables __read_mostly = 1;
 static int brnf_call_arptables __read_mostly = 1;
 static int brnf_filter_vlan_tagged __read_mostly = 0;
 static int brnf_filter_vlan_tagged __read_mostly = 0;
 static int brnf_filter_pppoe_tagged __read_mostly = 0;
 static int brnf_filter_pppoe_tagged __read_mostly = 0;
+static int brnf_pass_vlan_indev __read_mostly = 0;
 #else
 #else
 #define brnf_call_iptables 1
 #define brnf_call_iptables 1
 #define brnf_call_ip6tables 1
 #define brnf_call_ip6tables 1
 #define brnf_call_arptables 1
 #define brnf_call_arptables 1
 #define brnf_filter_vlan_tagged 0
 #define brnf_filter_vlan_tagged 0
 #define brnf_filter_pppoe_tagged 0
 #define brnf_filter_pppoe_tagged 0
+#define brnf_pass_vlan_indev 0
 #endif
 #endif
 
 
 #define IS_IP(skb) \
 #define IS_IP(skb) \
@@ -503,6 +505,19 @@ bridged_dnat:
 	return 0;
 	return 0;
 }
 }
 
 
+static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct net_device *dev)
+{
+	struct net_device *vlan, *br;
+
+	br = bridge_parent(dev);
+	if (brnf_pass_vlan_indev == 0 || !vlan_tx_tag_present(skb))
+		return br;
+
+	vlan = __vlan_find_dev_deep(br, vlan_tx_tag_get(skb) & VLAN_VID_MASK);
+
+	return vlan ? vlan : br;
+}
+
 /* Some common code for IPv4/IPv6 */
 /* Some common code for IPv4/IPv6 */
 static struct net_device *setup_pre_routing(struct sk_buff *skb)
 static struct net_device *setup_pre_routing(struct sk_buff *skb)
 {
 {
@@ -515,7 +530,7 @@ static struct net_device *setup_pre_routing(struct sk_buff *skb)
 
 
 	nf_bridge->mask |= BRNF_NF_BRIDGE_PREROUTING;
 	nf_bridge->mask |= BRNF_NF_BRIDGE_PREROUTING;
 	nf_bridge->physindev = skb->dev;
 	nf_bridge->physindev = skb->dev;
-	skb->dev = bridge_parent(skb->dev);
+	skb->dev = brnf_get_logical_dev(skb, skb->dev);
 	if (skb->protocol == htons(ETH_P_8021Q))
 	if (skb->protocol == htons(ETH_P_8021Q))
 		nf_bridge->mask |= BRNF_8021Q;
 		nf_bridge->mask |= BRNF_8021Q;
 	else if (skb->protocol == htons(ETH_P_PPP_SES))
 	else if (skb->protocol == htons(ETH_P_PPP_SES))
@@ -774,7 +789,7 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb,
 	else
 	else
 		skb->protocol = htons(ETH_P_IPV6);
 		skb->protocol = htons(ETH_P_IPV6);
 
 
-	NF_HOOK(pf, NF_INET_FORWARD, skb, bridge_parent(in), parent,
+	NF_HOOK(pf, NF_INET_FORWARD, skb, brnf_get_logical_dev(skb, in), parent,
 		br_nf_forward_finish);
 		br_nf_forward_finish);
 
 
 	return NF_STOLEN;
 	return NF_STOLEN;
@@ -1002,6 +1017,13 @@ static ctl_table brnf_table[] = {
 		.mode		= 0644,
 		.mode		= 0644,
 		.proc_handler	= brnf_sysctl_call_tables,
 		.proc_handler	= brnf_sysctl_call_tables,
 	},
 	},
+	{
+		.procname	= "bridge-nf-pass-vlan-input-dev",
+		.data		= &brnf_pass_vlan_indev,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= brnf_sysctl_call_tables,
+	},
 	{ }
 	{ }
 };
 };
 #endif
 #endif

+ 2 - 0
net/core/sock.c

@@ -259,7 +259,9 @@ static struct lock_class_key af_callback_keys[AF_MAX];
 
 
 /* Run time adjustable parameters. */
 /* Run time adjustable parameters. */
 __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
 __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
+EXPORT_SYMBOL(sysctl_wmem_max);
 __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
 __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
+EXPORT_SYMBOL(sysctl_rmem_max);
 __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
 __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
 __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
 __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
 
 

+ 0 - 3
net/ipv4/netfilter/Makefile

@@ -66,6 +66,3 @@ obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o
 
 
 # just filtering instance of ARP tables for now
 # just filtering instance of ARP tables for now
 obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o
 obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o
-
-obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o
-

+ 0 - 639
net/ipv4/netfilter/ip_queue.c

@@ -1,639 +0,0 @@
-/*
- * This is a module which is used for queueing IPv4 packets and
- * communicating with userspace via netlink.
- *
- * (C) 2000-2002 James Morris <jmorris@intercode.com.au>
- * (C) 2003-2005 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/init.h>
-#include <linux/ip.h>
-#include <linux/notifier.h>
-#include <linux/netdevice.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4/ip_queue.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netlink.h>
-#include <linux/spinlock.h>
-#include <linux/sysctl.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/security.h>
-#include <linux/net.h>
-#include <linux/mutex.h>
-#include <linux/slab.h>
-#include <net/net_namespace.h>
-#include <net/sock.h>
-#include <net/route.h>
-#include <net/netfilter/nf_queue.h>
-#include <net/ip.h>
-
-#define IPQ_QMAX_DEFAULT 1024
-#define IPQ_PROC_FS_NAME "ip_queue"
-#define NET_IPQ_QMAX 2088
-#define NET_IPQ_QMAX_NAME "ip_queue_maxlen"
-
-typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long);
-
-static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;
-static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
-static DEFINE_SPINLOCK(queue_lock);
-static int peer_pid __read_mostly;
-static unsigned int copy_range __read_mostly;
-static unsigned int queue_total;
-static unsigned int queue_dropped = 0;
-static unsigned int queue_user_dropped = 0;
-static struct sock *ipqnl __read_mostly;
-static LIST_HEAD(queue_list);
-static DEFINE_MUTEX(ipqnl_mutex);
-
-static inline void
-__ipq_enqueue_entry(struct nf_queue_entry *entry)
-{
-       list_add_tail(&entry->list, &queue_list);
-       queue_total++;
-}
-
-static inline int
-__ipq_set_mode(unsigned char mode, unsigned int range)
-{
-	int status = 0;
-
-	switch(mode) {
-	case IPQ_COPY_NONE:
-	case IPQ_COPY_META:
-		copy_mode = mode;
-		copy_range = 0;
-		break;
-
-	case IPQ_COPY_PACKET:
-		if (range > 0xFFFF)
-			range = 0xFFFF;
-		copy_range = range;
-		copy_mode = mode;
-		break;
-
-	default:
-		status = -EINVAL;
-
-	}
-	return status;
-}
-
-static void __ipq_flush(ipq_cmpfn cmpfn, unsigned long data);
-
-static inline void
-__ipq_reset(void)
-{
-	peer_pid = 0;
-	net_disable_timestamp();
-	__ipq_set_mode(IPQ_COPY_NONE, 0);
-	__ipq_flush(NULL, 0);
-}
-
-static struct nf_queue_entry *
-ipq_find_dequeue_entry(unsigned long id)
-{
-	struct nf_queue_entry *entry = NULL, *i;
-
-	spin_lock_bh(&queue_lock);
-
-	list_for_each_entry(i, &queue_list, list) {
-		if ((unsigned long)i == id) {
-			entry = i;
-			break;
-		}
-	}
-
-	if (entry) {
-		list_del(&entry->list);
-		queue_total--;
-	}
-
-	spin_unlock_bh(&queue_lock);
-	return entry;
-}
-
-static void
-__ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
-{
-	struct nf_queue_entry *entry, *next;
-
-	list_for_each_entry_safe(entry, next, &queue_list, list) {
-		if (!cmpfn || cmpfn(entry, data)) {
-			list_del(&entry->list);
-			queue_total--;
-			nf_reinject(entry, NF_DROP);
-		}
-	}
-}
-
-static void
-ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
-{
-	spin_lock_bh(&queue_lock);
-	__ipq_flush(cmpfn, data);
-	spin_unlock_bh(&queue_lock);
-}
-
-static struct sk_buff *
-ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
-{
-	sk_buff_data_t old_tail;
-	size_t size = 0;
-	size_t data_len = 0;
-	struct sk_buff *skb;
-	struct ipq_packet_msg *pmsg;
-	struct nlmsghdr *nlh;
-	struct timeval tv;
-
-	switch (ACCESS_ONCE(copy_mode)) {
-	case IPQ_COPY_META:
-	case IPQ_COPY_NONE:
-		size = NLMSG_SPACE(sizeof(*pmsg));
-		break;
-
-	case IPQ_COPY_PACKET:
-		if (entry->skb->ip_summed == CHECKSUM_PARTIAL &&
-		    (*errp = skb_checksum_help(entry->skb)))
-			return NULL;
-
-		data_len = ACCESS_ONCE(copy_range);
-		if (data_len == 0 || data_len > entry->skb->len)
-			data_len = entry->skb->len;
-
-		size = NLMSG_SPACE(sizeof(*pmsg) + data_len);
-		break;
-
-	default:
-		*errp = -EINVAL;
-		return NULL;
-	}
-
-	skb = alloc_skb(size, GFP_ATOMIC);
-	if (!skb)
-		goto nlmsg_failure;
-
-	old_tail = skb->tail;
-	nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh));
-	pmsg = NLMSG_DATA(nlh);
-	memset(pmsg, 0, sizeof(*pmsg));
-
-	pmsg->packet_id       = (unsigned long )entry;
-	pmsg->data_len        = data_len;
-	tv = ktime_to_timeval(entry->skb->tstamp);
-	pmsg->timestamp_sec   = tv.tv_sec;
-	pmsg->timestamp_usec  = tv.tv_usec;
-	pmsg->mark            = entry->skb->mark;
-	pmsg->hook            = entry->hook;
-	pmsg->hw_protocol     = entry->skb->protocol;
-
-	if (entry->indev)
-		strcpy(pmsg->indev_name, entry->indev->name);
-	else
-		pmsg->indev_name[0] = '\0';
-
-	if (entry->outdev)
-		strcpy(pmsg->outdev_name, entry->outdev->name);
-	else
-		pmsg->outdev_name[0] = '\0';
-
-	if (entry->indev && entry->skb->dev &&
-	    entry->skb->mac_header != entry->skb->network_header) {
-		pmsg->hw_type = entry->skb->dev->type;
-		pmsg->hw_addrlen = dev_parse_header(entry->skb,
-						    pmsg->hw_addr);
-	}
-
-	if (data_len)
-		if (skb_copy_bits(entry->skb, 0, pmsg->payload, data_len))
-			BUG();
-
-	nlh->nlmsg_len = skb->tail - old_tail;
-	return skb;
-
-nlmsg_failure:
-	kfree_skb(skb);
-	*errp = -EINVAL;
-	printk(KERN_ERR "ip_queue: error creating packet message\n");
-	return NULL;
-}
-
-static int
-ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
-{
-	int status = -EINVAL;
-	struct sk_buff *nskb;
-
-	if (copy_mode == IPQ_COPY_NONE)
-		return -EAGAIN;
-
-	nskb = ipq_build_packet_message(entry, &status);
-	if (nskb == NULL)
-		return status;
-
-	spin_lock_bh(&queue_lock);
-
-	if (!peer_pid)
-		goto err_out_free_nskb;
-
-	if (queue_total >= queue_maxlen) {
-		queue_dropped++;
-		status = -ENOSPC;
-		if (net_ratelimit())
-			  printk (KERN_WARNING "ip_queue: full at %d entries, "
-				  "dropping packets(s). Dropped: %d\n", queue_total,
-				  queue_dropped);
-		goto err_out_free_nskb;
-	}
-
-	/* netlink_unicast will either free the nskb or attach it to a socket */
-	status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT);
-	if (status < 0) {
-		queue_user_dropped++;
-		goto err_out_unlock;
-	}
-
-	__ipq_enqueue_entry(entry);
-
-	spin_unlock_bh(&queue_lock);
-	return status;
-
-err_out_free_nskb:
-	kfree_skb(nskb);
-
-err_out_unlock:
-	spin_unlock_bh(&queue_lock);
-	return status;
-}
-
-static int
-ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct nf_queue_entry *e)
-{
-	int diff;
-	struct iphdr *user_iph = (struct iphdr *)v->payload;
-	struct sk_buff *nskb;
-
-	if (v->data_len < sizeof(*user_iph))
-		return 0;
-	diff = v->data_len - e->skb->len;
-	if (diff < 0) {
-		if (pskb_trim(e->skb, v->data_len))
-			return -ENOMEM;
-	} else if (diff > 0) {
-		if (v->data_len > 0xFFFF)
-			return -EINVAL;
-		if (diff > skb_tailroom(e->skb)) {
-			nskb = skb_copy_expand(e->skb, skb_headroom(e->skb),
-					       diff, GFP_ATOMIC);
-			if (!nskb) {
-				printk(KERN_WARNING "ip_queue: error "
-				      "in mangle, dropping packet\n");
-				return -ENOMEM;
-			}
-			kfree_skb(e->skb);
-			e->skb = nskb;
-		}
-		skb_put(e->skb, diff);
-	}
-	if (!skb_make_writable(e->skb, v->data_len))
-		return -ENOMEM;
-	skb_copy_to_linear_data(e->skb, v->payload, v->data_len);
-	e->skb->ip_summed = CHECKSUM_NONE;
-
-	return 0;
-}
-
-static int
-ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
-{
-	struct nf_queue_entry *entry;
-
-	if (vmsg->value > NF_MAX_VERDICT || vmsg->value == NF_STOLEN)
-		return -EINVAL;
-
-	entry = ipq_find_dequeue_entry(vmsg->id);
-	if (entry == NULL)
-		return -ENOENT;
-	else {
-		int verdict = vmsg->value;
-
-		if (vmsg->data_len && vmsg->data_len == len)
-			if (ipq_mangle_ipv4(vmsg, entry) < 0)
-				verdict = NF_DROP;
-
-		nf_reinject(entry, verdict);
-		return 0;
-	}
-}
-
-static int
-ipq_set_mode(unsigned char mode, unsigned int range)
-{
-	int status;
-
-	spin_lock_bh(&queue_lock);
-	status = __ipq_set_mode(mode, range);
-	spin_unlock_bh(&queue_lock);
-	return status;
-}
-
-static int
-ipq_receive_peer(struct ipq_peer_msg *pmsg,
-		 unsigned char type, unsigned int len)
-{
-	int status = 0;
-
-	if (len < sizeof(*pmsg))
-		return -EINVAL;
-
-	switch (type) {
-	case IPQM_MODE:
-		status = ipq_set_mode(pmsg->msg.mode.value,
-				      pmsg->msg.mode.range);
-		break;
-
-	case IPQM_VERDICT:
-		status = ipq_set_verdict(&pmsg->msg.verdict,
-					 len - sizeof(*pmsg));
-		break;
-	default:
-		status = -EINVAL;
-	}
-	return status;
-}
-
-static int
-dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
-{
-	if (entry->indev)
-		if (entry->indev->ifindex == ifindex)
-			return 1;
-	if (entry->outdev)
-		if (entry->outdev->ifindex == ifindex)
-			return 1;
-#ifdef CONFIG_BRIDGE_NETFILTER
-	if (entry->skb->nf_bridge) {
-		if (entry->skb->nf_bridge->physindev &&
-		    entry->skb->nf_bridge->physindev->ifindex == ifindex)
-			return 1;
-		if (entry->skb->nf_bridge->physoutdev &&
-		    entry->skb->nf_bridge->physoutdev->ifindex == ifindex)
-			return 1;
-	}
-#endif
-	return 0;
-}
-
-static void
-ipq_dev_drop(int ifindex)
-{
-	ipq_flush(dev_cmp, ifindex);
-}
-
-#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
-
-static inline void
-__ipq_rcv_skb(struct sk_buff *skb)
-{
-	int status, type, pid, flags;
-	unsigned int nlmsglen, skblen;
-	struct nlmsghdr *nlh;
-	bool enable_timestamp = false;
-
-	skblen = skb->len;
-	if (skblen < sizeof(*nlh))
-		return;
-
-	nlh = nlmsg_hdr(skb);
-	nlmsglen = nlh->nlmsg_len;
-	if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen)
-		return;
-
-	pid = nlh->nlmsg_pid;
-	flags = nlh->nlmsg_flags;
-
-	if(pid <= 0 || !(flags & NLM_F_REQUEST) || flags & NLM_F_MULTI)
-		RCV_SKB_FAIL(-EINVAL);
-
-	if (flags & MSG_TRUNC)
-		RCV_SKB_FAIL(-ECOMM);
-
-	type = nlh->nlmsg_type;
-	if (type < NLMSG_NOOP || type >= IPQM_MAX)
-		RCV_SKB_FAIL(-EINVAL);
-
-	if (type <= IPQM_BASE)
-		return;
-
-	if (!capable(CAP_NET_ADMIN))
-		RCV_SKB_FAIL(-EPERM);
-
-	spin_lock_bh(&queue_lock);
-
-	if (peer_pid) {
-		if (peer_pid != pid) {
-			spin_unlock_bh(&queue_lock);
-			RCV_SKB_FAIL(-EBUSY);
-		}
-	} else {
-		enable_timestamp = true;
-		peer_pid = pid;
-	}
-
-	spin_unlock_bh(&queue_lock);
-	if (enable_timestamp)
-		net_enable_timestamp();
-	status = ipq_receive_peer(NLMSG_DATA(nlh), type,
-				  nlmsglen - NLMSG_LENGTH(0));
-	if (status < 0)
-		RCV_SKB_FAIL(status);
-
-	if (flags & NLM_F_ACK)
-		netlink_ack(skb, nlh, 0);
-}
-
-static void
-ipq_rcv_skb(struct sk_buff *skb)
-{
-	mutex_lock(&ipqnl_mutex);
-	__ipq_rcv_skb(skb);
-	mutex_unlock(&ipqnl_mutex);
-}
-
-static int
-ipq_rcv_dev_event(struct notifier_block *this,
-		  unsigned long event, void *ptr)
-{
-	struct net_device *dev = ptr;
-
-	if (!net_eq(dev_net(dev), &init_net))
-		return NOTIFY_DONE;
-
-	/* Drop any packets associated with the downed device */
-	if (event == NETDEV_DOWN)
-		ipq_dev_drop(dev->ifindex);
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block ipq_dev_notifier = {
-	.notifier_call	= ipq_rcv_dev_event,
-};
-
-static int
-ipq_rcv_nl_event(struct notifier_block *this,
-		 unsigned long event, void *ptr)
-{
-	struct netlink_notify *n = ptr;
-
-	if (event == NETLINK_URELEASE && n->protocol == NETLINK_FIREWALL) {
-		spin_lock_bh(&queue_lock);
-		if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid))
-			__ipq_reset();
-		spin_unlock_bh(&queue_lock);
-	}
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block ipq_nl_notifier = {
-	.notifier_call	= ipq_rcv_nl_event,
-};
-
-#ifdef CONFIG_SYSCTL
-static struct ctl_table_header *ipq_sysctl_header;
-
-static ctl_table ipq_table[] = {
-	{
-		.procname	= NET_IPQ_QMAX_NAME,
-		.data		= &queue_maxlen,
-		.maxlen		= sizeof(queue_maxlen),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec
-	},
-	{ }
-};
-#endif
-
-#ifdef CONFIG_PROC_FS
-static int ip_queue_show(struct seq_file *m, void *v)
-{
-	spin_lock_bh(&queue_lock);
-
-	seq_printf(m,
-		      "Peer PID          : %d\n"
-		      "Copy mode         : %hu\n"
-		      "Copy range        : %u\n"
-		      "Queue length      : %u\n"
-		      "Queue max. length : %u\n"
-		      "Queue dropped     : %u\n"
-		      "Netlink dropped   : %u\n",
-		      peer_pid,
-		      copy_mode,
-		      copy_range,
-		      queue_total,
-		      queue_maxlen,
-		      queue_dropped,
-		      queue_user_dropped);
-
-	spin_unlock_bh(&queue_lock);
-	return 0;
-}
-
-static int ip_queue_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, ip_queue_show, NULL);
-}
-
-static const struct file_operations ip_queue_proc_fops = {
-	.open		= ip_queue_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-	.owner		= THIS_MODULE,
-};
-#endif
-
-static const struct nf_queue_handler nfqh = {
-	.name	= "ip_queue",
-	.outfn	= &ipq_enqueue_packet,
-};
-
-static int __init ip_queue_init(void)
-{
-	int status = -ENOMEM;
-	struct proc_dir_entry *proc __maybe_unused;
-
-	netlink_register_notifier(&ipq_nl_notifier);
-	ipqnl = netlink_kernel_create(&init_net, NETLINK_FIREWALL, 0,
-				      ipq_rcv_skb, NULL, THIS_MODULE);
-	if (ipqnl == NULL) {
-		printk(KERN_ERR "ip_queue: failed to create netlink socket\n");
-		goto cleanup_netlink_notifier;
-	}
-
-#ifdef CONFIG_PROC_FS
-	proc = proc_create(IPQ_PROC_FS_NAME, 0, init_net.proc_net,
-			   &ip_queue_proc_fops);
-	if (!proc) {
-		printk(KERN_ERR "ip_queue: failed to create proc entry\n");
-		goto cleanup_ipqnl;
-	}
-#endif
-	register_netdevice_notifier(&ipq_dev_notifier);
-#ifdef CONFIG_SYSCTL
-	ipq_sysctl_header = register_net_sysctl(&init_net, "net/ipv4", ipq_table);
-#endif
-	status = nf_register_queue_handler(NFPROTO_IPV4, &nfqh);
-	if (status < 0) {
-		printk(KERN_ERR "ip_queue: failed to register queue handler\n");
-		goto cleanup_sysctl;
-	}
-	return status;
-
-cleanup_sysctl:
-#ifdef CONFIG_SYSCTL
-	unregister_net_sysctl_table(ipq_sysctl_header);
-#endif
-	unregister_netdevice_notifier(&ipq_dev_notifier);
-	proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
-cleanup_ipqnl: __maybe_unused
-	netlink_kernel_release(ipqnl);
-	mutex_lock(&ipqnl_mutex);
-	mutex_unlock(&ipqnl_mutex);
-
-cleanup_netlink_notifier:
-	netlink_unregister_notifier(&ipq_nl_notifier);
-	return status;
-}
-
-static void __exit ip_queue_fini(void)
-{
-	nf_unregister_queue_handlers(&nfqh);
-
-	ipq_flush(NULL, 0);
-
-#ifdef CONFIG_SYSCTL
-	unregister_net_sysctl_table(ipq_sysctl_header);
-#endif
-	unregister_netdevice_notifier(&ipq_dev_notifier);
-	proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
-
-	netlink_kernel_release(ipqnl);
-	mutex_lock(&ipqnl_mutex);
-	mutex_unlock(&ipqnl_mutex);
-
-	netlink_unregister_notifier(&ipq_nl_notifier);
-}
-
-MODULE_DESCRIPTION("IPv4 packet queue handler");
-MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_FIREWALL);
-
-module_init(ip_queue_init);
-module_exit(ip_queue_fini);

+ 0 - 22
net/ipv6/netfilter/Kconfig

@@ -25,28 +25,6 @@ config NF_CONNTRACK_IPV6
 
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 	  To compile it as a module, choose M here.  If unsure, say N.
 
 
-config IP6_NF_QUEUE
-	tristate "IP6 Userspace queueing via NETLINK (OBSOLETE)"
-	depends on INET && IPV6 && NETFILTER
-	depends on NETFILTER_ADVANCED
-	---help---
-
-	  This option adds a queue handler to the kernel for IPv6
-	  packets which enables users to receive the filtered packets
-	  with QUEUE target using libipq.
-
-	  This option enables the old IPv6-only "ip6_queue" implementation
-	  which has been obsoleted by the new "nfnetlink_queue" code (see
-	  CONFIG_NETFILTER_NETLINK_QUEUE).
-
-	  (C) Fernando Anton 2001
-	  IPv64 Project - Work based in IPv64 draft by Arturo Azcorra.
-	  Universidad Carlos III de Madrid
-	  Universidad Politecnica de Alcala de Henares
-	  email: <fanton@it.uc3m.es>.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 config IP6_NF_IPTABLES
 config IP6_NF_IPTABLES
 	tristate "IP6 tables support (required for filtering)"
 	tristate "IP6 tables support (required for filtering)"
 	depends on INET && IPV6
 	depends on INET && IPV6

+ 0 - 1
net/ipv6/netfilter/Makefile

@@ -6,7 +6,6 @@
 obj-$(CONFIG_IP6_NF_IPTABLES) += ip6_tables.o
 obj-$(CONFIG_IP6_NF_IPTABLES) += ip6_tables.o
 obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o
 obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o
 obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o
 obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o
-obj-$(CONFIG_IP6_NF_QUEUE) += ip6_queue.o
 obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
 obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
 obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o
 obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o
 
 

+ 0 - 641
net/ipv6/netfilter/ip6_queue.c

@@ -1,641 +0,0 @@
-/*
- * This is a module which is used for queueing IPv6 packets and
- * communicating with userspace via netlink.
- *
- * (C) 2001 Fernando Anton, this code is GPL.
- *     IPv64 Project - Work based in IPv64 draft by Arturo Azcorra.
- *     Universidad Carlos III de Madrid - Leganes (Madrid) - Spain
- *     Universidad Politecnica de Alcala de Henares - Alcala de H. (Madrid) - Spain
- *     email: fanton@it.uc3m.es
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/init.h>
-#include <linux/ipv6.h>
-#include <linux/notifier.h>
-#include <linux/netdevice.h>
-#include <linux/netfilter.h>
-#include <linux/netlink.h>
-#include <linux/spinlock.h>
-#include <linux/sysctl.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/mutex.h>
-#include <linux/slab.h>
-#include <net/net_namespace.h>
-#include <net/sock.h>
-#include <net/ipv6.h>
-#include <net/ip6_route.h>
-#include <net/netfilter/nf_queue.h>
-#include <linux/netfilter_ipv4/ip_queue.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv6/ip6_tables.h>
-
-#define IPQ_QMAX_DEFAULT 1024
-#define IPQ_PROC_FS_NAME "ip6_queue"
-#define NET_IPQ_QMAX_NAME "ip6_queue_maxlen"
-
-typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long);
-
-static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;
-static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
-static DEFINE_SPINLOCK(queue_lock);
-static int peer_pid __read_mostly;
-static unsigned int copy_range __read_mostly;
-static unsigned int queue_total;
-static unsigned int queue_dropped = 0;
-static unsigned int queue_user_dropped = 0;
-static struct sock *ipqnl __read_mostly;
-static LIST_HEAD(queue_list);
-static DEFINE_MUTEX(ipqnl_mutex);
-
-static inline void
-__ipq_enqueue_entry(struct nf_queue_entry *entry)
-{
-       list_add_tail(&entry->list, &queue_list);
-       queue_total++;
-}
-
-static inline int
-__ipq_set_mode(unsigned char mode, unsigned int range)
-{
-	int status = 0;
-
-	switch(mode) {
-	case IPQ_COPY_NONE:
-	case IPQ_COPY_META:
-		copy_mode = mode;
-		copy_range = 0;
-		break;
-
-	case IPQ_COPY_PACKET:
-		if (range > 0xFFFF)
-			range = 0xFFFF;
-		copy_range = range;
-		copy_mode = mode;
-		break;
-
-	default:
-		status = -EINVAL;
-
-	}
-	return status;
-}
-
-static void __ipq_flush(ipq_cmpfn cmpfn, unsigned long data);
-
-static inline void
-__ipq_reset(void)
-{
-	peer_pid = 0;
-	net_disable_timestamp();
-	__ipq_set_mode(IPQ_COPY_NONE, 0);
-	__ipq_flush(NULL, 0);
-}
-
-static struct nf_queue_entry *
-ipq_find_dequeue_entry(unsigned long id)
-{
-	struct nf_queue_entry *entry = NULL, *i;
-
-	spin_lock_bh(&queue_lock);
-
-	list_for_each_entry(i, &queue_list, list) {
-		if ((unsigned long)i == id) {
-			entry = i;
-			break;
-		}
-	}
-
-	if (entry) {
-		list_del(&entry->list);
-		queue_total--;
-	}
-
-	spin_unlock_bh(&queue_lock);
-	return entry;
-}
-
-static void
-__ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
-{
-	struct nf_queue_entry *entry, *next;
-
-	list_for_each_entry_safe(entry, next, &queue_list, list) {
-		if (!cmpfn || cmpfn(entry, data)) {
-			list_del(&entry->list);
-			queue_total--;
-			nf_reinject(entry, NF_DROP);
-		}
-	}
-}
-
-static void
-ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
-{
-	spin_lock_bh(&queue_lock);
-	__ipq_flush(cmpfn, data);
-	spin_unlock_bh(&queue_lock);
-}
-
-static struct sk_buff *
-ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
-{
-	sk_buff_data_t old_tail;
-	size_t size = 0;
-	size_t data_len = 0;
-	struct sk_buff *skb;
-	struct ipq_packet_msg *pmsg;
-	struct nlmsghdr *nlh;
-	struct timeval tv;
-
-	switch (ACCESS_ONCE(copy_mode)) {
-	case IPQ_COPY_META:
-	case IPQ_COPY_NONE:
-		size = NLMSG_SPACE(sizeof(*pmsg));
-		break;
-
-	case IPQ_COPY_PACKET:
-		if (entry->skb->ip_summed == CHECKSUM_PARTIAL &&
-		    (*errp = skb_checksum_help(entry->skb)))
-			return NULL;
-
-		data_len = ACCESS_ONCE(copy_range);
-		if (data_len == 0 || data_len > entry->skb->len)
-			data_len = entry->skb->len;
-
-		size = NLMSG_SPACE(sizeof(*pmsg) + data_len);
-		break;
-
-	default:
-		*errp = -EINVAL;
-		return NULL;
-	}
-
-	skb = alloc_skb(size, GFP_ATOMIC);
-	if (!skb)
-		goto nlmsg_failure;
-
-	old_tail = skb->tail;
-	nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh));
-	pmsg = NLMSG_DATA(nlh);
-	memset(pmsg, 0, sizeof(*pmsg));
-
-	pmsg->packet_id       = (unsigned long )entry;
-	pmsg->data_len        = data_len;
-	tv = ktime_to_timeval(entry->skb->tstamp);
-	pmsg->timestamp_sec   = tv.tv_sec;
-	pmsg->timestamp_usec  = tv.tv_usec;
-	pmsg->mark            = entry->skb->mark;
-	pmsg->hook            = entry->hook;
-	pmsg->hw_protocol     = entry->skb->protocol;
-
-	if (entry->indev)
-		strcpy(pmsg->indev_name, entry->indev->name);
-	else
-		pmsg->indev_name[0] = '\0';
-
-	if (entry->outdev)
-		strcpy(pmsg->outdev_name, entry->outdev->name);
-	else
-		pmsg->outdev_name[0] = '\0';
-
-	if (entry->indev && entry->skb->dev &&
-	    entry->skb->mac_header != entry->skb->network_header) {
-		pmsg->hw_type = entry->skb->dev->type;
-		pmsg->hw_addrlen = dev_parse_header(entry->skb, pmsg->hw_addr);
-	}
-
-	if (data_len)
-		if (skb_copy_bits(entry->skb, 0, pmsg->payload, data_len))
-			BUG();
-
-	nlh->nlmsg_len = skb->tail - old_tail;
-	return skb;
-
-nlmsg_failure:
-	kfree_skb(skb);
-	*errp = -EINVAL;
-	printk(KERN_ERR "ip6_queue: error creating packet message\n");
-	return NULL;
-}
-
-static int
-ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
-{
-	int status = -EINVAL;
-	struct sk_buff *nskb;
-
-	if (copy_mode == IPQ_COPY_NONE)
-		return -EAGAIN;
-
-	nskb = ipq_build_packet_message(entry, &status);
-	if (nskb == NULL)
-		return status;
-
-	spin_lock_bh(&queue_lock);
-
-	if (!peer_pid)
-		goto err_out_free_nskb;
-
-	if (queue_total >= queue_maxlen) {
-		queue_dropped++;
-		status = -ENOSPC;
-		if (net_ratelimit())
-			printk (KERN_WARNING "ip6_queue: fill at %d entries, "
-				"dropping packet(s).  Dropped: %d\n", queue_total,
-				queue_dropped);
-		goto err_out_free_nskb;
-	}
-
-	/* netlink_unicast will either free the nskb or attach it to a socket */
-	status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT);
-	if (status < 0) {
-		queue_user_dropped++;
-		goto err_out_unlock;
-	}
-
-	__ipq_enqueue_entry(entry);
-
-	spin_unlock_bh(&queue_lock);
-	return status;
-
-err_out_free_nskb:
-	kfree_skb(nskb);
-
-err_out_unlock:
-	spin_unlock_bh(&queue_lock);
-	return status;
-}
-
-static int
-ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct nf_queue_entry *e)
-{
-	int diff;
-	struct ipv6hdr *user_iph = (struct ipv6hdr *)v->payload;
-	struct sk_buff *nskb;
-
-	if (v->data_len < sizeof(*user_iph))
-		return 0;
-	diff = v->data_len - e->skb->len;
-	if (diff < 0) {
-		if (pskb_trim(e->skb, v->data_len))
-			return -ENOMEM;
-	} else if (diff > 0) {
-		if (v->data_len > 0xFFFF)
-			return -EINVAL;
-		if (diff > skb_tailroom(e->skb)) {
-			nskb = skb_copy_expand(e->skb, skb_headroom(e->skb),
-					       diff, GFP_ATOMIC);
-			if (!nskb) {
-				printk(KERN_WARNING "ip6_queue: OOM "
-				      "in mangle, dropping packet\n");
-				return -ENOMEM;
-			}
-			kfree_skb(e->skb);
-			e->skb = nskb;
-		}
-		skb_put(e->skb, diff);
-	}
-	if (!skb_make_writable(e->skb, v->data_len))
-		return -ENOMEM;
-	skb_copy_to_linear_data(e->skb, v->payload, v->data_len);
-	e->skb->ip_summed = CHECKSUM_NONE;
-
-	return 0;
-}
-
-static int
-ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
-{
-	struct nf_queue_entry *entry;
-
-	if (vmsg->value > NF_MAX_VERDICT || vmsg->value == NF_STOLEN)
-		return -EINVAL;
-
-	entry = ipq_find_dequeue_entry(vmsg->id);
-	if (entry == NULL)
-		return -ENOENT;
-	else {
-		int verdict = vmsg->value;
-
-		if (vmsg->data_len && vmsg->data_len == len)
-			if (ipq_mangle_ipv6(vmsg, entry) < 0)
-				verdict = NF_DROP;
-
-		nf_reinject(entry, verdict);
-		return 0;
-	}
-}
-
-static int
-ipq_set_mode(unsigned char mode, unsigned int range)
-{
-	int status;
-
-	spin_lock_bh(&queue_lock);
-	status = __ipq_set_mode(mode, range);
-	spin_unlock_bh(&queue_lock);
-	return status;
-}
-
-static int
-ipq_receive_peer(struct ipq_peer_msg *pmsg,
-		 unsigned char type, unsigned int len)
-{
-	int status = 0;
-
-	if (len < sizeof(*pmsg))
-		return -EINVAL;
-
-	switch (type) {
-	case IPQM_MODE:
-		status = ipq_set_mode(pmsg->msg.mode.value,
-				      pmsg->msg.mode.range);
-		break;
-
-	case IPQM_VERDICT:
-		status = ipq_set_verdict(&pmsg->msg.verdict,
-					 len - sizeof(*pmsg));
-		break;
-	default:
-		status = -EINVAL;
-	}
-	return status;
-}
-
-static int
-dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
-{
-	if (entry->indev)
-		if (entry->indev->ifindex == ifindex)
-			return 1;
-
-	if (entry->outdev)
-		if (entry->outdev->ifindex == ifindex)
-			return 1;
-#ifdef CONFIG_BRIDGE_NETFILTER
-	if (entry->skb->nf_bridge) {
-		if (entry->skb->nf_bridge->physindev &&
-		    entry->skb->nf_bridge->physindev->ifindex == ifindex)
-			return 1;
-		if (entry->skb->nf_bridge->physoutdev &&
-		    entry->skb->nf_bridge->physoutdev->ifindex == ifindex)
-			return 1;
-	}
-#endif
-	return 0;
-}
-
-static void
-ipq_dev_drop(int ifindex)
-{
-	ipq_flush(dev_cmp, ifindex);
-}
-
-#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
-
-static inline void
-__ipq_rcv_skb(struct sk_buff *skb)
-{
-	int status, type, pid, flags;
-	unsigned int nlmsglen, skblen;
-	struct nlmsghdr *nlh;
-	bool enable_timestamp = false;
-
-	skblen = skb->len;
-	if (skblen < sizeof(*nlh))
-		return;
-
-	nlh = nlmsg_hdr(skb);
-	nlmsglen = nlh->nlmsg_len;
-	if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen)
-		return;
-
-	pid = nlh->nlmsg_pid;
-	flags = nlh->nlmsg_flags;
-
-	if(pid <= 0 || !(flags & NLM_F_REQUEST) || flags & NLM_F_MULTI)
-		RCV_SKB_FAIL(-EINVAL);
-
-	if (flags & MSG_TRUNC)
-		RCV_SKB_FAIL(-ECOMM);
-
-	type = nlh->nlmsg_type;
-	if (type < NLMSG_NOOP || type >= IPQM_MAX)
-		RCV_SKB_FAIL(-EINVAL);
-
-	if (type <= IPQM_BASE)
-		return;
-
-	if (!capable(CAP_NET_ADMIN))
-		RCV_SKB_FAIL(-EPERM);
-
-	spin_lock_bh(&queue_lock);
-
-	if (peer_pid) {
-		if (peer_pid != pid) {
-			spin_unlock_bh(&queue_lock);
-			RCV_SKB_FAIL(-EBUSY);
-		}
-	} else {
-		enable_timestamp = true;
-		peer_pid = pid;
-	}
-
-	spin_unlock_bh(&queue_lock);
-	if (enable_timestamp)
-		net_enable_timestamp();
-
-	status = ipq_receive_peer(NLMSG_DATA(nlh), type,
-				  nlmsglen - NLMSG_LENGTH(0));
-	if (status < 0)
-		RCV_SKB_FAIL(status);
-
-	if (flags & NLM_F_ACK)
-		netlink_ack(skb, nlh, 0);
-}
-
-static void
-ipq_rcv_skb(struct sk_buff *skb)
-{
-	mutex_lock(&ipqnl_mutex);
-	__ipq_rcv_skb(skb);
-	mutex_unlock(&ipqnl_mutex);
-}
-
-static int
-ipq_rcv_dev_event(struct notifier_block *this,
-		  unsigned long event, void *ptr)
-{
-	struct net_device *dev = ptr;
-
-	if (!net_eq(dev_net(dev), &init_net))
-		return NOTIFY_DONE;
-
-	/* Drop any packets associated with the downed device */
-	if (event == NETDEV_DOWN)
-		ipq_dev_drop(dev->ifindex);
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block ipq_dev_notifier = {
-	.notifier_call	= ipq_rcv_dev_event,
-};
-
-static int
-ipq_rcv_nl_event(struct notifier_block *this,
-		 unsigned long event, void *ptr)
-{
-	struct netlink_notify *n = ptr;
-
-	if (event == NETLINK_URELEASE && n->protocol == NETLINK_IP6_FW) {
-		spin_lock_bh(&queue_lock);
-		if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid))
-			__ipq_reset();
-		spin_unlock_bh(&queue_lock);
-	}
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block ipq_nl_notifier = {
-	.notifier_call	= ipq_rcv_nl_event,
-};
-
-#ifdef CONFIG_SYSCTL
-static struct ctl_table_header *ipq_sysctl_header;
-
-static ctl_table ipq_table[] = {
-	{
-		.procname	= NET_IPQ_QMAX_NAME,
-		.data		= &queue_maxlen,
-		.maxlen		= sizeof(queue_maxlen),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec
-	},
-	{ }
-};
-#endif
-
-#ifdef CONFIG_PROC_FS
-static int ip6_queue_show(struct seq_file *m, void *v)
-{
-	spin_lock_bh(&queue_lock);
-
-	seq_printf(m,
-		      "Peer PID          : %d\n"
-		      "Copy mode         : %hu\n"
-		      "Copy range        : %u\n"
-		      "Queue length      : %u\n"
-		      "Queue max. length : %u\n"
-		      "Queue dropped     : %u\n"
-		      "Netfilter dropped : %u\n",
-		      peer_pid,
-		      copy_mode,
-		      copy_range,
-		      queue_total,
-		      queue_maxlen,
-		      queue_dropped,
-		      queue_user_dropped);
-
-	spin_unlock_bh(&queue_lock);
-	return 0;
-}
-
-static int ip6_queue_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, ip6_queue_show, NULL);
-}
-
-static const struct file_operations ip6_queue_proc_fops = {
-	.open		= ip6_queue_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-	.owner		= THIS_MODULE,
-};
-#endif
-
-static const struct nf_queue_handler nfqh = {
-	.name	= "ip6_queue",
-	.outfn	= &ipq_enqueue_packet,
-};
-
-static int __init ip6_queue_init(void)
-{
-	int status = -ENOMEM;
-	struct proc_dir_entry *proc __maybe_unused;
-
-	netlink_register_notifier(&ipq_nl_notifier);
-	ipqnl = netlink_kernel_create(&init_net, NETLINK_IP6_FW, 0,
-			              ipq_rcv_skb, NULL, THIS_MODULE);
-	if (ipqnl == NULL) {
-		printk(KERN_ERR "ip6_queue: failed to create netlink socket\n");
-		goto cleanup_netlink_notifier;
-	}
-
-#ifdef CONFIG_PROC_FS
-	proc = proc_create(IPQ_PROC_FS_NAME, 0, init_net.proc_net,
-			   &ip6_queue_proc_fops);
-	if (!proc) {
-		printk(KERN_ERR "ip6_queue: failed to create proc entry\n");
-		goto cleanup_ipqnl;
-	}
-#endif
-	register_netdevice_notifier(&ipq_dev_notifier);
-#ifdef CONFIG_SYSCTL
-	ipq_sysctl_header = register_net_sysctl(&init_net, "net/ipv6", ipq_table);
-#endif
-	status = nf_register_queue_handler(NFPROTO_IPV6, &nfqh);
-	if (status < 0) {
-		printk(KERN_ERR "ip6_queue: failed to register queue handler\n");
-		goto cleanup_sysctl;
-	}
-	return status;
-
-cleanup_sysctl:
-#ifdef CONFIG_SYSCTL
-	unregister_net_sysctl_table(ipq_sysctl_header);
-#endif
-	unregister_netdevice_notifier(&ipq_dev_notifier);
-	proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
-
-cleanup_ipqnl: __maybe_unused
-	netlink_kernel_release(ipqnl);
-	mutex_lock(&ipqnl_mutex);
-	mutex_unlock(&ipqnl_mutex);
-
-cleanup_netlink_notifier:
-	netlink_unregister_notifier(&ipq_nl_notifier);
-	return status;
-}
-
-static void __exit ip6_queue_fini(void)
-{
-	nf_unregister_queue_handlers(&nfqh);
-
-	ipq_flush(NULL, 0);
-
-#ifdef CONFIG_SYSCTL
-	unregister_net_sysctl_table(ipq_sysctl_header);
-#endif
-	unregister_netdevice_notifier(&ipq_dev_notifier);
-	proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
-
-	netlink_kernel_release(ipqnl);
-	mutex_lock(&ipqnl_mutex);
-	mutex_unlock(&ipqnl_mutex);
-
-	netlink_unregister_notifier(&ipq_nl_notifier);
-}
-
-MODULE_DESCRIPTION("IPv6 packet queue handler");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_IP6_FW);
-
-module_init(ip6_queue_init);
-module_exit(ip6_queue_fini);

+ 52 - 18
net/netfilter/ipvs/ip_vs_conn.c

@@ -548,6 +548,7 @@ static inline void
 ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
 ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
 {
 {
 	unsigned int conn_flags;
 	unsigned int conn_flags;
+	__u32 flags;
 
 
 	/* if dest is NULL, then return directly */
 	/* if dest is NULL, then return directly */
 	if (!dest)
 	if (!dest)
@@ -559,17 +560,19 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
 	conn_flags = atomic_read(&dest->conn_flags);
 	conn_flags = atomic_read(&dest->conn_flags);
 	if (cp->protocol != IPPROTO_UDP)
 	if (cp->protocol != IPPROTO_UDP)
 		conn_flags &= ~IP_VS_CONN_F_ONE_PACKET;
 		conn_flags &= ~IP_VS_CONN_F_ONE_PACKET;
+	flags = cp->flags;
 	/* Bind with the destination and its corresponding transmitter */
 	/* Bind with the destination and its corresponding transmitter */
-	if (cp->flags & IP_VS_CONN_F_SYNC) {
+	if (flags & IP_VS_CONN_F_SYNC) {
 		/* if the connection is not template and is created
 		/* if the connection is not template and is created
 		 * by sync, preserve the activity flag.
 		 * by sync, preserve the activity flag.
 		 */
 		 */
-		if (!(cp->flags & IP_VS_CONN_F_TEMPLATE))
+		if (!(flags & IP_VS_CONN_F_TEMPLATE))
 			conn_flags &= ~IP_VS_CONN_F_INACTIVE;
 			conn_flags &= ~IP_VS_CONN_F_INACTIVE;
 		/* connections inherit forwarding method from dest */
 		/* connections inherit forwarding method from dest */
-		cp->flags &= ~IP_VS_CONN_F_FWD_MASK;
+		flags &= ~(IP_VS_CONN_F_FWD_MASK | IP_VS_CONN_F_NOOUTPUT);
 	}
 	}
-	cp->flags |= conn_flags;
+	flags |= conn_flags;
+	cp->flags = flags;
 	cp->dest = dest;
 	cp->dest = dest;
 
 
 	IP_VS_DBG_BUF(7, "Bind-dest %s c:%s:%d v:%s:%d "
 	IP_VS_DBG_BUF(7, "Bind-dest %s c:%s:%d v:%s:%d "
@@ -584,12 +587,12 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
 		      atomic_read(&dest->refcnt));
 		      atomic_read(&dest->refcnt));
 
 
 	/* Update the connection counters */
 	/* Update the connection counters */
-	if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) {
-		/* It is a normal connection, so increase the inactive
-		   connection counter because it is in TCP SYNRECV
-		   state (inactive) or other protocol inacive state */
-		if ((cp->flags & IP_VS_CONN_F_SYNC) &&
-		    (!(cp->flags & IP_VS_CONN_F_INACTIVE)))
+	if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
+		/* It is a normal connection, so modify the counters
+		 * according to the flags, later the protocol can
+		 * update them on state change
+		 */
+		if (!(flags & IP_VS_CONN_F_INACTIVE))
 			atomic_inc(&dest->activeconns);
 			atomic_inc(&dest->activeconns);
 		else
 		else
 			atomic_inc(&dest->inactconns);
 			atomic_inc(&dest->inactconns);
@@ -613,14 +616,40 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
 {
 {
 	struct ip_vs_dest *dest;
 	struct ip_vs_dest *dest;
 
 
-	if ((cp) && (!cp->dest)) {
-		dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr,
-				       cp->dport, &cp->vaddr, cp->vport,
-				       cp->protocol, cp->fwmark, cp->flags);
+	dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr,
+			       cp->dport, &cp->vaddr, cp->vport,
+			       cp->protocol, cp->fwmark, cp->flags);
+	if (dest) {
+		struct ip_vs_proto_data *pd;
+
+		spin_lock(&cp->lock);
+		if (cp->dest) {
+			spin_unlock(&cp->lock);
+			return dest;
+		}
+
+		/* Applications work depending on the forwarding method
+		 * but better to reassign them always when binding dest */
+		if (cp->app)
+			ip_vs_unbind_app(cp);
+
 		ip_vs_bind_dest(cp, dest);
 		ip_vs_bind_dest(cp, dest);
-		return dest;
-	} else
-		return NULL;
+		spin_unlock(&cp->lock);
+
+		/* Update its packet transmitter */
+		cp->packet_xmit = NULL;
+#ifdef CONFIG_IP_VS_IPV6
+		if (cp->af == AF_INET6)
+			ip_vs_bind_xmit_v6(cp);
+		else
+#endif
+			ip_vs_bind_xmit(cp);
+
+		pd = ip_vs_proto_data_get(ip_vs_conn_net(cp), cp->protocol);
+		if (pd && atomic_read(&pd->appcnt))
+			ip_vs_bind_app(cp, pd->pp);
+	}
+	return dest;
 }
 }
 
 
 
 
@@ -743,7 +772,8 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
 static void ip_vs_conn_expire(unsigned long data)
 static void ip_vs_conn_expire(unsigned long data)
 {
 {
 	struct ip_vs_conn *cp = (struct ip_vs_conn *)data;
 	struct ip_vs_conn *cp = (struct ip_vs_conn *)data;
-	struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
+	struct net *net = ip_vs_conn_net(cp);
+	struct netns_ipvs *ipvs = net_ipvs(net);
 
 
 	cp->timeout = 60*HZ;
 	cp->timeout = 60*HZ;
 
 
@@ -808,6 +838,9 @@ static void ip_vs_conn_expire(unsigned long data)
 		  atomic_read(&cp->refcnt)-1,
 		  atomic_read(&cp->refcnt)-1,
 		  atomic_read(&cp->n_control));
 		  atomic_read(&cp->n_control));
 
 
+	if (ipvs->sync_state & IP_VS_STATE_MASTER)
+		ip_vs_sync_conn(net, cp, sysctl_sync_threshold(ipvs));
+
 	ip_vs_conn_put(cp);
 	ip_vs_conn_put(cp);
 }
 }
 
 
@@ -881,6 +914,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
 	/* Set its state and timeout */
 	/* Set its state and timeout */
 	cp->state = 0;
 	cp->state = 0;
 	cp->timeout = 3*HZ;
 	cp->timeout = 3*HZ;
+	cp->sync_endtime = jiffies & ~3UL;
 
 
 	/* Bind its packet transmitter */
 	/* Bind its packet transmitter */
 #ifdef CONFIG_IP_VS_IPV6
 #ifdef CONFIG_IP_VS_IPV6

+ 2 - 28
net/netfilter/ipvs/ip_vs_core.c

@@ -1613,34 +1613,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 	else
 	else
 		pkts = atomic_add_return(1, &cp->in_pkts);
 		pkts = atomic_add_return(1, &cp->in_pkts);
 
 
-	if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
-	    cp->protocol == IPPROTO_SCTP) {
-		if ((cp->state == IP_VS_SCTP_S_ESTABLISHED &&
-			(pkts % sysctl_sync_period(ipvs)
-			 == sysctl_sync_threshold(ipvs))) ||
-				(cp->old_state != cp->state &&
-				 ((cp->state == IP_VS_SCTP_S_CLOSED) ||
-				  (cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) ||
-				  (cp->state == IP_VS_SCTP_S_SHUT_ACK_SER)))) {
-			ip_vs_sync_conn(net, cp);
-			goto out;
-		}
-	}
-
-	/* Keep this block last: TCP and others with pp->num_states <= 1 */
-	else if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
-	    (((cp->protocol != IPPROTO_TCP ||
-	       cp->state == IP_VS_TCP_S_ESTABLISHED) &&
-	      (pkts % sysctl_sync_period(ipvs)
-	       == sysctl_sync_threshold(ipvs))) ||
-	     ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&
-	      ((cp->state == IP_VS_TCP_S_FIN_WAIT) ||
-	       (cp->state == IP_VS_TCP_S_CLOSE) ||
-	       (cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||
-	       (cp->state == IP_VS_TCP_S_TIME_WAIT)))))
-		ip_vs_sync_conn(net, cp);
-out:
-	cp->old_state = cp->state;
+	if (ipvs->sync_state & IP_VS_STATE_MASTER)
+		ip_vs_sync_conn(net, cp, pkts);
 
 
 	ip_vs_conn_put(cp);
 	ip_vs_conn_put(cp);
 	return ret;
 	return ret;

+ 66 - 4
net/netfilter/ipvs/ip_vs_ctl.c

@@ -1599,6 +1599,10 @@ static int ip_vs_zero_all(struct net *net)
 }
 }
 
 
 #ifdef CONFIG_SYSCTL
 #ifdef CONFIG_SYSCTL
+
+static int zero;
+static int three = 3;
+
 static int
 static int
 proc_do_defense_mode(ctl_table *table, int write,
 proc_do_defense_mode(ctl_table *table, int write,
 		     void __user *buffer, size_t *lenp, loff_t *ppos)
 		     void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -1632,7 +1636,8 @@ proc_do_sync_threshold(ctl_table *table, int write,
 	memcpy(val, valp, sizeof(val));
 	memcpy(val, valp, sizeof(val));
 
 
 	rc = proc_dointvec(table, write, buffer, lenp, ppos);
 	rc = proc_dointvec(table, write, buffer, lenp, ppos);
-	if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
+	if (write && (valp[0] < 0 || valp[1] < 0 ||
+	    (valp[0] >= valp[1] && valp[1]))) {
 		/* Restore the correct value */
 		/* Restore the correct value */
 		memcpy(valp, val, sizeof(val));
 		memcpy(valp, val, sizeof(val));
 	}
 	}
@@ -1652,9 +1657,24 @@ proc_do_sync_mode(ctl_table *table, int write,
 		if ((*valp < 0) || (*valp > 1)) {
 		if ((*valp < 0) || (*valp > 1)) {
 			/* Restore the correct value */
 			/* Restore the correct value */
 			*valp = val;
 			*valp = val;
-		} else {
-			struct net *net = current->nsproxy->net_ns;
-			ip_vs_sync_switch_mode(net, val);
+		}
+	}
+	return rc;
+}
+
+static int
+proc_do_sync_ports(ctl_table *table, int write,
+		   void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int *valp = table->data;
+	int val = *valp;
+	int rc;
+
+	rc = proc_dointvec(table, write, buffer, lenp, ppos);
+	if (write && (*valp != val)) {
+		if (*valp < 1 || !is_power_of_2(*valp)) {
+			/* Restore the correct value */
+			*valp = val;
 		}
 		}
 	}
 	}
 	return rc;
 	return rc;
@@ -1717,6 +1737,24 @@ static struct ctl_table vs_vars[] = {
 		.mode		= 0644,
 		.mode		= 0644,
 		.proc_handler	= &proc_do_sync_mode,
 		.proc_handler	= &proc_do_sync_mode,
 	},
 	},
+	{
+		.procname	= "sync_ports",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_do_sync_ports,
+	},
+	{
+		.procname	= "sync_qlen_max",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "sync_sock_size",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
 	{
 	{
 		.procname	= "cache_bypass",
 		.procname	= "cache_bypass",
 		.maxlen		= sizeof(int),
 		.maxlen		= sizeof(int),
@@ -1742,6 +1780,20 @@ static struct ctl_table vs_vars[] = {
 		.mode		= 0644,
 		.mode		= 0644,
 		.proc_handler	= proc_do_sync_threshold,
 		.proc_handler	= proc_do_sync_threshold,
 	},
 	},
+	{
+		.procname	= "sync_refresh_period",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	{
+		.procname	= "sync_retries",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &three,
+	},
 	{
 	{
 		.procname	= "nat_icmp_send",
 		.procname	= "nat_icmp_send",
 		.maxlen		= sizeof(int),
 		.maxlen		= sizeof(int),
@@ -3655,6 +3707,12 @@ int __net_init ip_vs_control_net_init_sysctl(struct net *net)
 	tbl[idx++].data = &ipvs->sysctl_snat_reroute;
 	tbl[idx++].data = &ipvs->sysctl_snat_reroute;
 	ipvs->sysctl_sync_ver = 1;
 	ipvs->sysctl_sync_ver = 1;
 	tbl[idx++].data = &ipvs->sysctl_sync_ver;
 	tbl[idx++].data = &ipvs->sysctl_sync_ver;
+	ipvs->sysctl_sync_ports = 1;
+	tbl[idx++].data = &ipvs->sysctl_sync_ports;
+	ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32;
+	tbl[idx++].data = &ipvs->sysctl_sync_qlen_max;
+	ipvs->sysctl_sync_sock_size = 0;
+	tbl[idx++].data = &ipvs->sysctl_sync_sock_size;
 	tbl[idx++].data = &ipvs->sysctl_cache_bypass;
 	tbl[idx++].data = &ipvs->sysctl_cache_bypass;
 	tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
 	tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
 	tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
 	tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
@@ -3662,6 +3720,10 @@ int __net_init ip_vs_control_net_init_sysctl(struct net *net)
 	ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
 	ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
 	tbl[idx].data = &ipvs->sysctl_sync_threshold;
 	tbl[idx].data = &ipvs->sysctl_sync_threshold;
 	tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
 	tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
+	ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD;
+	tbl[idx++].data = &ipvs->sysctl_sync_refresh_period;
+	ipvs->sysctl_sync_retries = clamp_t(int, DEFAULT_SYNC_RETRIES, 0, 3);
+	tbl[idx++].data = &ipvs->sysctl_sync_retries;
 	tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
 	tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
 
 
 
 

+ 1 - 1
net/netfilter/ipvs/ip_vs_dh.c

@@ -149,7 +149,7 @@ static int ip_vs_dh_init_svc(struct ip_vs_service *svc)
 
 
 	/* allocate the DH table for this service */
 	/* allocate the DH table for this service */
 	tbl = kmalloc(sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE,
 	tbl = kmalloc(sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE,
-		      GFP_ATOMIC);
+		      GFP_KERNEL);
 	if (tbl == NULL)
 	if (tbl == NULL)
 		return -ENOMEM;
 		return -ENOMEM;
 
 

+ 1 - 1
net/netfilter/ipvs/ip_vs_ftp.c

@@ -485,7 +485,7 @@ static struct pernet_operations ip_vs_ftp_ops = {
 	.exit = __ip_vs_ftp_exit,
 	.exit = __ip_vs_ftp_exit,
 };
 };
 
 
-int __init ip_vs_ftp_init(void)
+static int __init ip_vs_ftp_init(void)
 {
 {
 	int rv;
 	int rv;
 
 

+ 1 - 1
net/netfilter/ipvs/ip_vs_lblc.c

@@ -342,7 +342,7 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
 	/*
 	/*
 	 *    Allocate the ip_vs_lblc_table for this service
 	 *    Allocate the ip_vs_lblc_table for this service
 	 */
 	 */
-	tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC);
+	tbl = kmalloc(sizeof(*tbl), GFP_KERNEL);
 	if (tbl == NULL)
 	if (tbl == NULL)
 		return -ENOMEM;
 		return -ENOMEM;
 
 

+ 1 - 1
net/netfilter/ipvs/ip_vs_lblcr.c

@@ -511,7 +511,7 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
 	/*
 	/*
 	 *    Allocate the ip_vs_lblcr_table for this service
 	 *    Allocate the ip_vs_lblcr_table for this service
 	 */
 	 */
-	tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC);
+	tbl = kmalloc(sizeof(*tbl), GFP_KERNEL);
 	if (tbl == NULL)
 	if (tbl == NULL)
 		return -ENOMEM;
 		return -ENOMEM;
 
 

+ 3 - 3
net/netfilter/ipvs/ip_vs_proto.c

@@ -68,7 +68,7 @@ register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp)
 	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct netns_ipvs *ipvs = net_ipvs(net);
 	unsigned int hash = IP_VS_PROTO_HASH(pp->protocol);
 	unsigned int hash = IP_VS_PROTO_HASH(pp->protocol);
 	struct ip_vs_proto_data *pd =
 	struct ip_vs_proto_data *pd =
-			kzalloc(sizeof(struct ip_vs_proto_data), GFP_ATOMIC);
+			kzalloc(sizeof(struct ip_vs_proto_data), GFP_KERNEL);
 
 
 	if (!pd)
 	if (!pd)
 		return -ENOMEM;
 		return -ENOMEM;
@@ -156,7 +156,7 @@ EXPORT_SYMBOL(ip_vs_proto_get);
 /*
 /*
  *	get ip_vs_protocol object data by netns and proto
  *	get ip_vs_protocol object data by netns and proto
  */
  */
-struct ip_vs_proto_data *
+static struct ip_vs_proto_data *
 __ipvs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto)
 __ipvs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto)
 {
 {
 	struct ip_vs_proto_data *pd;
 	struct ip_vs_proto_data *pd;
@@ -199,7 +199,7 @@ void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags)
 int *
 int *
 ip_vs_create_timeout_table(int *table, int size)
 ip_vs_create_timeout_table(int *table, int size)
 {
 {
-	return kmemdup(table, size, GFP_ATOMIC);
+	return kmemdup(table, size, GFP_KERNEL);
 }
 }
 
 
 
 

+ 1 - 1
net/netfilter/ipvs/ip_vs_sh.c

@@ -162,7 +162,7 @@ static int ip_vs_sh_init_svc(struct ip_vs_service *svc)
 
 
 	/* allocate the SH table for this service */
 	/* allocate the SH table for this service */
 	tbl = kmalloc(sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE,
 	tbl = kmalloc(sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE,
-		      GFP_ATOMIC);
+		      GFP_KERNEL);
 	if (tbl == NULL)
 	if (tbl == NULL)
 		return -ENOMEM;
 		return -ENOMEM;
 
 

File diff suppressed because it is too large
+ 455 - 202
net/netfilter/ipvs/ip_vs_sync.c


+ 1 - 1
net/netfilter/ipvs/ip_vs_wrr.c

@@ -84,7 +84,7 @@ static int ip_vs_wrr_init_svc(struct ip_vs_service *svc)
 	/*
 	/*
 	 *    Allocate the mark variable for WRR scheduling
 	 *    Allocate the mark variable for WRR scheduling
 	 */
 	 */
-	mark = kmalloc(sizeof(struct ip_vs_wrr_mark), GFP_ATOMIC);
+	mark = kmalloc(sizeof(struct ip_vs_wrr_mark), GFP_KERNEL);
 	if (mark == NULL)
 	if (mark == NULL)
 		return -ENOMEM;
 		return -ENOMEM;
 
 

+ 7 - 8
net/netfilter/nf_conntrack_core.c

@@ -1336,7 +1336,6 @@ static void nf_conntrack_cleanup_init_net(void)
 	while (untrack_refs() > 0)
 	while (untrack_refs() > 0)
 		schedule();
 		schedule();
 
 
-	nf_conntrack_helper_fini();
 	nf_conntrack_proto_fini();
 	nf_conntrack_proto_fini();
 #ifdef CONFIG_NF_CONNTRACK_ZONES
 #ifdef CONFIG_NF_CONNTRACK_ZONES
 	nf_ct_extend_unregister(&nf_ct_zone_extend);
 	nf_ct_extend_unregister(&nf_ct_zone_extend);
@@ -1354,6 +1353,7 @@ static void nf_conntrack_cleanup_net(struct net *net)
 	}
 	}
 
 
 	nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
 	nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
+	nf_conntrack_helper_fini(net);
 	nf_conntrack_timeout_fini(net);
 	nf_conntrack_timeout_fini(net);
 	nf_conntrack_ecache_fini(net);
 	nf_conntrack_ecache_fini(net);
 	nf_conntrack_tstamp_fini(net);
 	nf_conntrack_tstamp_fini(net);
@@ -1504,10 +1504,6 @@ static int nf_conntrack_init_init_net(void)
 	if (ret < 0)
 	if (ret < 0)
 		goto err_proto;
 		goto err_proto;
 
 
-	ret = nf_conntrack_helper_init();
-	if (ret < 0)
-		goto err_helper;
-
 #ifdef CONFIG_NF_CONNTRACK_ZONES
 #ifdef CONFIG_NF_CONNTRACK_ZONES
 	ret = nf_ct_extend_register(&nf_ct_zone_extend);
 	ret = nf_ct_extend_register(&nf_ct_zone_extend);
 	if (ret < 0)
 	if (ret < 0)
@@ -1525,10 +1521,8 @@ static int nf_conntrack_init_init_net(void)
 
 
 #ifdef CONFIG_NF_CONNTRACK_ZONES
 #ifdef CONFIG_NF_CONNTRACK_ZONES
 err_extend:
 err_extend:
-	nf_conntrack_helper_fini();
-#endif
-err_helper:
 	nf_conntrack_proto_fini();
 	nf_conntrack_proto_fini();
+#endif
 err_proto:
 err_proto:
 	return ret;
 	return ret;
 }
 }
@@ -1589,9 +1583,14 @@ static int nf_conntrack_init_net(struct net *net)
 	ret = nf_conntrack_timeout_init(net);
 	ret = nf_conntrack_timeout_init(net);
 	if (ret < 0)
 	if (ret < 0)
 		goto err_timeout;
 		goto err_timeout;
+	ret = nf_conntrack_helper_init(net);
+	if (ret < 0)
+		goto err_helper;
 
 
 	return 0;
 	return 0;
 
 
+err_helper:
+	nf_conntrack_timeout_fini(net);
 err_timeout:
 err_timeout:
 	nf_conntrack_ecache_fini(net);
 	nf_conntrack_ecache_fini(net);
 err_ecache:
 err_ecache:

+ 4 - 6
net/netfilter/nf_conntrack_ecache.c

@@ -84,7 +84,7 @@ EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events);
 int nf_conntrack_register_notifier(struct net *net,
 int nf_conntrack_register_notifier(struct net *net,
 				   struct nf_ct_event_notifier *new)
 				   struct nf_ct_event_notifier *new)
 {
 {
-	int ret = 0;
+	int ret;
 	struct nf_ct_event_notifier *notify;
 	struct nf_ct_event_notifier *notify;
 
 
 	mutex_lock(&nf_ct_ecache_mutex);
 	mutex_lock(&nf_ct_ecache_mutex);
@@ -95,8 +95,7 @@ int nf_conntrack_register_notifier(struct net *net,
 		goto out_unlock;
 		goto out_unlock;
 	}
 	}
 	rcu_assign_pointer(net->ct.nf_conntrack_event_cb, new);
 	rcu_assign_pointer(net->ct.nf_conntrack_event_cb, new);
-	mutex_unlock(&nf_ct_ecache_mutex);
-	return ret;
+	ret = 0;
 
 
 out_unlock:
 out_unlock:
 	mutex_unlock(&nf_ct_ecache_mutex);
 	mutex_unlock(&nf_ct_ecache_mutex);
@@ -121,7 +120,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
 int nf_ct_expect_register_notifier(struct net *net,
 int nf_ct_expect_register_notifier(struct net *net,
 				   struct nf_exp_event_notifier *new)
 				   struct nf_exp_event_notifier *new)
 {
 {
-	int ret = 0;
+	int ret;
 	struct nf_exp_event_notifier *notify;
 	struct nf_exp_event_notifier *notify;
 
 
 	mutex_lock(&nf_ct_ecache_mutex);
 	mutex_lock(&nf_ct_ecache_mutex);
@@ -132,8 +131,7 @@ int nf_ct_expect_register_notifier(struct net *net,
 		goto out_unlock;
 		goto out_unlock;
 	}
 	}
 	rcu_assign_pointer(net->ct.nf_expect_event_cb, new);
 	rcu_assign_pointer(net->ct.nf_expect_event_cb, new);
-	mutex_unlock(&nf_ct_ecache_mutex);
-	return ret;
+	ret = 0;
 
 
 out_unlock:
 out_unlock:
 	mutex_unlock(&nf_ct_ecache_mutex);
 	mutex_unlock(&nf_ct_ecache_mutex);

+ 110 - 12
net/netfilter/nf_conntrack_helper.c

@@ -34,6 +34,67 @@ static struct hlist_head *nf_ct_helper_hash __read_mostly;
 static unsigned int nf_ct_helper_hsize __read_mostly;
 static unsigned int nf_ct_helper_hsize __read_mostly;
 static unsigned int nf_ct_helper_count __read_mostly;
 static unsigned int nf_ct_helper_count __read_mostly;
 
 
+static bool nf_ct_auto_assign_helper __read_mostly = true;
+module_param_named(nf_conntrack_helper, nf_ct_auto_assign_helper, bool, 0644);
+MODULE_PARM_DESC(nf_conntrack_helper,
+		 "Enable automatic conntrack helper assignment (default 1)");
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table helper_sysctl_table[] = {
+	{
+		.procname	= "nf_conntrack_helper",
+		.data		= &init_net.ct.sysctl_auto_assign_helper,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{}
+};
+
+static int nf_conntrack_helper_init_sysctl(struct net *net)
+{
+	struct ctl_table *table;
+
+	table = kmemdup(helper_sysctl_table, sizeof(helper_sysctl_table),
+			GFP_KERNEL);
+	if (!table)
+		goto out;
+
+	table[0].data = &net->ct.sysctl_auto_assign_helper;
+
+	net->ct.helper_sysctl_header =
+		register_net_sysctl(net, "net/netfilter", table);
+
+	if (!net->ct.helper_sysctl_header) {
+		pr_err("nf_conntrack_helper: can't register to sysctl.\n");
+		goto out_register;
+	}
+	return 0;
+
+out_register:
+	kfree(table);
+out:
+	return -ENOMEM;
+}
+
+static void nf_conntrack_helper_fini_sysctl(struct net *net)
+{
+	struct ctl_table *table;
+
+	table = net->ct.helper_sysctl_header->ctl_table_arg;
+	unregister_net_sysctl_table(net->ct.helper_sysctl_header);
+	kfree(table);
+}
+#else
+static int nf_conntrack_helper_init_sysctl(struct net *net)
+{
+	return 0;
+}
+
+static void nf_conntrack_helper_fini_sysctl(struct net *net)
+{
+}
+#endif /* CONFIG_SYSCTL */
 
 
 /* Stupid hash, but collision free for the default registrations of the
 /* Stupid hash, but collision free for the default registrations of the
  * helpers currently in the kernel. */
  * helpers currently in the kernel. */
@@ -118,17 +179,38 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
 {
 {
 	struct nf_conntrack_helper *helper = NULL;
 	struct nf_conntrack_helper *helper = NULL;
 	struct nf_conn_help *help;
 	struct nf_conn_help *help;
+	struct net *net = nf_ct_net(ct);
 	int ret = 0;
 	int ret = 0;
 
 
+	/* We already got a helper explicitly attached. The function
+	 * nf_conntrack_alter_reply - in case NAT is in use - asks for looking
+	 * the helper up again. Since now the user is in full control of
+	 * making consistent helper configurations, skip this automatic
+	 * re-lookup, otherwise we'll lose the helper.
+	 */
+	if (test_bit(IPS_HELPER_BIT, &ct->status))
+		return 0;
+
 	if (tmpl != NULL) {
 	if (tmpl != NULL) {
 		help = nfct_help(tmpl);
 		help = nfct_help(tmpl);
-		if (help != NULL)
+		if (help != NULL) {
 			helper = help->helper;
 			helper = help->helper;
+			set_bit(IPS_HELPER_BIT, &ct->status);
+		}
 	}
 	}
 
 
 	help = nfct_help(ct);
 	help = nfct_help(ct);
-	if (helper == NULL)
+	if (net->ct.sysctl_auto_assign_helper && helper == NULL) {
 		helper = __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
 		helper = __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+		if (unlikely(!net->ct.auto_assign_helper_warned && helper)) {
+			pr_info("nf_conntrack: automatic helper "
+				"assignment is deprecated and it will "
+				"be removed soon. Use the iptables CT target "
+				"to attach helpers instead.\n");
+			net->ct.auto_assign_helper_warned = true;
+		}
+	}
+
 	if (helper == NULL) {
 	if (helper == NULL) {
 		if (help)
 		if (help)
 			RCU_INIT_POINTER(help->helper, NULL);
 			RCU_INIT_POINTER(help->helper, NULL);
@@ -315,28 +397,44 @@ static struct nf_ct_ext_type helper_extend __read_mostly = {
 	.id	= NF_CT_EXT_HELPER,
 	.id	= NF_CT_EXT_HELPER,
 };
 };
 
 
-int nf_conntrack_helper_init(void)
+int nf_conntrack_helper_init(struct net *net)
 {
 {
 	int err;
 	int err;
 
 
-	nf_ct_helper_hsize = 1; /* gets rounded up to use one page */
-	nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0);
-	if (!nf_ct_helper_hash)
-		return -ENOMEM;
+	net->ct.auto_assign_helper_warned = false;
+	net->ct.sysctl_auto_assign_helper = nf_ct_auto_assign_helper;
+
+	if (net_eq(net, &init_net)) {
+		nf_ct_helper_hsize = 1; /* gets rounded up to use one page */
+		nf_ct_helper_hash =
+			nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0);
+		if (!nf_ct_helper_hash)
+			return -ENOMEM;
 
 
-	err = nf_ct_extend_register(&helper_extend);
+		err = nf_ct_extend_register(&helper_extend);
+		if (err < 0)
+			goto err1;
+	}
+
+	err = nf_conntrack_helper_init_sysctl(net);
 	if (err < 0)
 	if (err < 0)
-		goto err1;
+		goto out_sysctl;
 
 
 	return 0;
 	return 0;
 
 
+out_sysctl:
+	if (net_eq(net, &init_net))
+		nf_ct_extend_unregister(&helper_extend);
 err1:
 err1:
 	nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize);
 	nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize);
 	return err;
 	return err;
 }
 }
 
 
-void nf_conntrack_helper_fini(void)
+void nf_conntrack_helper_fini(struct net *net)
 {
 {
-	nf_ct_extend_unregister(&helper_extend);
-	nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize);
+	nf_conntrack_helper_fini_sysctl(net);
+	if (net_eq(net, &init_net)) {
+		nf_ct_extend_unregister(&helper_extend);
+		nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize);
+	}
 }
 }

+ 9 - 1
net/netfilter/nf_conntrack_netlink.c

@@ -2080,7 +2080,15 @@ static int
 ctnetlink_change_expect(struct nf_conntrack_expect *x,
 ctnetlink_change_expect(struct nf_conntrack_expect *x,
 			const struct nlattr * const cda[])
 			const struct nlattr * const cda[])
 {
 {
-	return -EOPNOTSUPP;
+	if (cda[CTA_EXPECT_TIMEOUT]) {
+		if (!del_timer(&x->timeout))
+			return -ETIME;
+
+		x->timeout.expires = jiffies +
+			ntohl(nla_get_be32(cda[CTA_EXPECT_TIMEOUT])) * HZ;
+		add_timer(&x->timeout);
+	}
+	return 0;
 }
 }
 
 
 static const struct nla_policy exp_nat_nla_policy[CTA_EXPECT_NAT_MAX+1] = {
 static const struct nla_policy exp_nat_nla_policy[CTA_EXPECT_NAT_MAX+1] = {

+ 0 - 13
security/selinux/nlmsgtab.c

@@ -14,7 +14,6 @@
 #include <linux/netlink.h>
 #include <linux/netlink.h>
 #include <linux/rtnetlink.h>
 #include <linux/rtnetlink.h>
 #include <linux/if.h>
 #include <linux/if.h>
-#include <linux/netfilter_ipv4/ip_queue.h>
 #include <linux/inet_diag.h>
 #include <linux/inet_diag.h>
 #include <linux/xfrm.h>
 #include <linux/xfrm.h>
 #include <linux/audit.h>
 #include <linux/audit.h>
@@ -70,12 +69,6 @@ static struct nlmsg_perm nlmsg_route_perms[] =
 	{ RTM_SETDCB,		NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
 	{ RTM_SETDCB,		NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
 };
 };
 
 
-static struct nlmsg_perm nlmsg_firewall_perms[] =
-{
-	{ IPQM_MODE,		NETLINK_FIREWALL_SOCKET__NLMSG_WRITE },
-	{ IPQM_VERDICT,		NETLINK_FIREWALL_SOCKET__NLMSG_WRITE },
-};
-
 static struct nlmsg_perm nlmsg_tcpdiag_perms[] =
 static struct nlmsg_perm nlmsg_tcpdiag_perms[] =
 {
 {
 	{ TCPDIAG_GETSOCK,	NETLINK_TCPDIAG_SOCKET__NLMSG_READ },
 	{ TCPDIAG_GETSOCK,	NETLINK_TCPDIAG_SOCKET__NLMSG_READ },
@@ -145,12 +138,6 @@ int selinux_nlmsg_lookup(u16 sclass, u16 nlmsg_type, u32 *perm)
 				 sizeof(nlmsg_route_perms));
 				 sizeof(nlmsg_route_perms));
 		break;
 		break;
 
 
-	case SECCLASS_NETLINK_FIREWALL_SOCKET:
-	case SECCLASS_NETLINK_IP6FW_SOCKET:
-		err = nlmsg_perm(nlmsg_type, perm, nlmsg_firewall_perms,
-				 sizeof(nlmsg_firewall_perms));
-		break;
-
 	case SECCLASS_NETLINK_TCPDIAG_SOCKET:
 	case SECCLASS_NETLINK_TCPDIAG_SOCKET:
 		err = nlmsg_perm(nlmsg_type, perm, nlmsg_tcpdiag_perms,
 		err = nlmsg_perm(nlmsg_type, perm, nlmsg_tcpdiag_perms,
 				 sizeof(nlmsg_tcpdiag_perms));
 				 sizeof(nlmsg_tcpdiag_perms));

Some files were not shown because too many files changed in this diff