Browse Source

Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/kaber/nf-next-2.6

Conflicts:
	Documentation/feature-removal-schedule.txt
David S. Miller 14 years ago
parent
commit
31111c26d9

+ 9 - 0
Documentation/feature-removal-schedule.txt

@@ -637,3 +637,12 @@ Why:	The original implementation of memsw feature enabled by
 Who:	Michal Hocko <mhocko@suse.cz>
 
 ----------------------------
+
+What:	ipt_addrtype match include file
+When:	2012
+Why:	superseded by xt_addrtype
+Who:	Florian Westphal <fw@strlen.de>
+Files:	include/linux/netfilter_ipv4/ipt_addrtype.h
+>>>>>>> 2f5dc63123905a89d4260ab8ee08d19ec104db04
+
+----------------------------

+ 1 - 0
include/linux/netfilter/Kbuild

@@ -29,6 +29,7 @@ header-y += xt_TCPMSS.h
 header-y += xt_TCPOPTSTRIP.h
 header-y += xt_TEE.h
 header-y += xt_TPROXY.h
+header-y += xt_addrtype.h
 header-y += xt_cluster.h
 header-y += xt_comment.h
 header-y += xt_connbytes.h

+ 44 - 0
include/linux/netfilter/xt_addrtype.h

@@ -0,0 +1,44 @@
+#ifndef _XT_ADDRTYPE_H
+#define _XT_ADDRTYPE_H
+
+#include <linux/types.h>
+
+enum {
+	XT_ADDRTYPE_INVERT_SOURCE	= 0x0001,
+	XT_ADDRTYPE_INVERT_DEST		= 0x0002,
+	XT_ADDRTYPE_LIMIT_IFACE_IN	= 0x0004,
+	XT_ADDRTYPE_LIMIT_IFACE_OUT	= 0x0008,
+};
+
+
+/* rtn_type enum values from rtnetlink.h, but shifted */
+enum {
+	XT_ADDRTYPE_UNSPEC = 1 << 0,
+	XT_ADDRTYPE_UNICAST = 1 << 1,	/* 1 << RTN_UNICAST */
+	XT_ADDRTYPE_LOCAL  = 1 << 2,	/* 1 << RTN_LOCAL, etc */
+	XT_ADDRTYPE_BROADCAST = 1 << 3,
+	XT_ADDRTYPE_ANYCAST = 1 << 4,
+	XT_ADDRTYPE_MULTICAST = 1 << 5,
+	XT_ADDRTYPE_BLACKHOLE = 1 << 6,
+	XT_ADDRTYPE_UNREACHABLE = 1 << 7,
+	XT_ADDRTYPE_PROHIBIT = 1 << 8,
+	XT_ADDRTYPE_THROW = 1 << 9,
+	XT_ADDRTYPE_NAT = 1 << 10,
+	XT_ADDRTYPE_XRESOLVE = 1 << 11,
+};
+
+struct xt_addrtype_info_v1 {
+	__u16	source;		/* source-type mask */
+	__u16	dest;		/* dest-type mask */
+	__u32	flags;
+};
+
+/* revision 0 */
+struct xt_addrtype_info {
+	__u16	source;		/* source-type mask */
+	__u16	dest;		/* dest-type mask */
+	__u32	invert_source;
+	__u32	invert_dest;
+};
+
+#endif

+ 179 - 19
include/net/ip_vs.h

@@ -374,24 +374,9 @@ struct ip_vs_stats {
 	struct ip_vs_estimator	est;		/* estimator */
 	struct ip_vs_cpu_stats	*cpustats;	/* per cpu counters */
 	spinlock_t		lock;		/* spin lock */
+	struct ip_vs_stats_user	ustats0;	/* reset values */
 };
 
-/*
- * Helper Macros for per cpu
- * ipvs->tot_stats->ustats.count
- */
-#define IPVS_STAT_INC(ipvs, count)	\
-	__this_cpu_inc((ipvs)->ustats->count)
-
-#define IPVS_STAT_ADD(ipvs, count, value) \
-	do {\
-		write_seqcount_begin(per_cpu_ptr((ipvs)->ustats_seq, \
-				     raw_smp_processor_id())); \
-		__this_cpu_add((ipvs)->ustats->count, value); \
-		write_seqcount_end(per_cpu_ptr((ipvs)->ustats_seq, \
-				   raw_smp_processor_id())); \
-	} while (0)
-
 struct dst_entry;
 struct iphdr;
 struct ip_vs_conn;
@@ -803,6 +788,171 @@ struct ip_vs_app {
 	void (*timeout_change)(struct ip_vs_app *app, int flags);
 };
 
+/* IPVS in network namespace */
+struct netns_ipvs {
+	int			gen;		/* Generation */
+	/*
+	 *	Hash table: for real service lookups
+	 */
+	#define IP_VS_RTAB_BITS 4
+	#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
+	#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
+
+	struct list_head	rs_table[IP_VS_RTAB_SIZE];
+	/* ip_vs_app */
+	struct list_head	app_list;
+	struct mutex		app_mutex;
+	struct lock_class_key	app_key;	/* mutex debuging */
+
+	/* ip_vs_proto */
+	#define IP_VS_PROTO_TAB_SIZE	32	/* must be power of 2 */
+	struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE];
+	/* ip_vs_proto_tcp */
+#ifdef CONFIG_IP_VS_PROTO_TCP
+	#define	TCP_APP_TAB_BITS	4
+	#define	TCP_APP_TAB_SIZE	(1 << TCP_APP_TAB_BITS)
+	#define	TCP_APP_TAB_MASK	(TCP_APP_TAB_SIZE - 1)
+	struct list_head	tcp_apps[TCP_APP_TAB_SIZE];
+	spinlock_t		tcp_app_lock;
+#endif
+	/* ip_vs_proto_udp */
+#ifdef CONFIG_IP_VS_PROTO_UDP
+	#define	UDP_APP_TAB_BITS	4
+	#define	UDP_APP_TAB_SIZE	(1 << UDP_APP_TAB_BITS)
+	#define	UDP_APP_TAB_MASK	(UDP_APP_TAB_SIZE - 1)
+	struct list_head	udp_apps[UDP_APP_TAB_SIZE];
+	spinlock_t		udp_app_lock;
+#endif
+	/* ip_vs_proto_sctp */
+#ifdef CONFIG_IP_VS_PROTO_SCTP
+	#define SCTP_APP_TAB_BITS	4
+	#define SCTP_APP_TAB_SIZE	(1 << SCTP_APP_TAB_BITS)
+	#define SCTP_APP_TAB_MASK	(SCTP_APP_TAB_SIZE - 1)
+	/* Hash table for SCTP application incarnations	 */
+	struct list_head	sctp_apps[SCTP_APP_TAB_SIZE];
+	spinlock_t		sctp_app_lock;
+#endif
+	/* ip_vs_conn */
+	atomic_t		conn_count;      /*  connection counter */
+
+	/* ip_vs_ctl */
+	struct ip_vs_stats		tot_stats;  /* Statistics & est. */
+
+	int			num_services;    /* no of virtual services */
+
+	rwlock_t		rs_lock;         /* real services table */
+	/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
+	struct lock_class_key	ctl_key;	/* ctl_mutex debuging */
+	/* Trash for destinations */
+	struct list_head	dest_trash;
+	/* Service counters */
+	atomic_t		ftpsvc_counter;
+	atomic_t		nullsvc_counter;
+
+#ifdef CONFIG_SYSCTL
+	/* 1/rate drop and drop-entry variables */
+	struct delayed_work	defense_work;   /* Work handler */
+	int			drop_rate;
+	int			drop_counter;
+	atomic_t		dropentry;
+	/* locks in ctl.c */
+	spinlock_t		dropentry_lock;  /* drop entry handling */
+	spinlock_t		droppacket_lock; /* drop packet handling */
+	spinlock_t		securetcp_lock;  /* state and timeout tables */
+
+	/* sys-ctl struct */
+	struct ctl_table_header	*sysctl_hdr;
+	struct ctl_table	*sysctl_tbl;
+#endif
+
+	/* sysctl variables */
+	int			sysctl_amemthresh;
+	int			sysctl_am_droprate;
+	int			sysctl_drop_entry;
+	int			sysctl_drop_packet;
+	int			sysctl_secure_tcp;
+#ifdef CONFIG_IP_VS_NFCT
+	int			sysctl_conntrack;
+#endif
+	int			sysctl_snat_reroute;
+	int			sysctl_sync_ver;
+	int			sysctl_cache_bypass;
+	int			sysctl_expire_nodest_conn;
+	int			sysctl_expire_quiescent_template;
+	int			sysctl_sync_threshold[2];
+	int			sysctl_nat_icmp_send;
+
+	/* ip_vs_lblc */
+	int			sysctl_lblc_expiration;
+	struct ctl_table_header	*lblc_ctl_header;
+	struct ctl_table	*lblc_ctl_table;
+	/* ip_vs_lblcr */
+	int			sysctl_lblcr_expiration;
+	struct ctl_table_header	*lblcr_ctl_header;
+	struct ctl_table	*lblcr_ctl_table;
+	/* ip_vs_est */
+	struct list_head	est_list;	/* estimator list */
+	spinlock_t		est_lock;
+	struct timer_list	est_timer;	/* Estimation timer */
+	/* ip_vs_sync */
+	struct list_head	sync_queue;
+	spinlock_t		sync_lock;
+	struct ip_vs_sync_buff  *sync_buff;
+	spinlock_t		sync_buff_lock;
+	struct sockaddr_in	sync_mcast_addr;
+	struct task_struct	*master_thread;
+	struct task_struct	*backup_thread;
+	int			send_mesg_maxlen;
+	int			recv_mesg_maxlen;
+	volatile int		sync_state;
+	volatile int		master_syncid;
+	volatile int		backup_syncid;
+	/* multicast interface name */
+	char			master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
+	char			backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
+	/* net name space ptr */
+	struct net		*net;            /* Needed by timer routines */
+};
+
+#define DEFAULT_SYNC_THRESHOLD	3
+#define DEFAULT_SYNC_PERIOD	50
+#define DEFAULT_SYNC_VER	1
+
+#ifdef CONFIG_SYSCTL
+
+static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
+{
+	return ipvs->sysctl_sync_threshold[0];
+}
+
+static inline int sysctl_sync_period(struct netns_ipvs *ipvs)
+{
+	return ipvs->sysctl_sync_threshold[1];
+}
+
+static inline int sysctl_sync_ver(struct netns_ipvs *ipvs)
+{
+	return ipvs->sysctl_sync_ver;
+}
+
+#else
+
+static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
+{
+	return DEFAULT_SYNC_THRESHOLD;
+}
+
+static inline int sysctl_sync_period(struct netns_ipvs *ipvs)
+{
+	return DEFAULT_SYNC_PERIOD;
+}
+
+static inline int sysctl_sync_ver(struct netns_ipvs *ipvs)
+{
+	return DEFAULT_SYNC_VER;
+}
+
+#endif
 
 /*
  *      IPVS core functions
@@ -1071,9 +1221,11 @@ extern void ip_vs_sync_cleanup(void);
  */
 extern int ip_vs_estimator_init(void);
 extern void ip_vs_estimator_cleanup(void);
-extern void ip_vs_new_estimator(struct net *net, struct ip_vs_stats *stats);
-extern void ip_vs_kill_estimator(struct net *net, struct ip_vs_stats *stats);
+extern void ip_vs_start_estimator(struct net *net, struct ip_vs_stats *stats);
+extern void ip_vs_stop_estimator(struct net *net, struct ip_vs_stats *stats);
 extern void ip_vs_zero_estimator(struct ip_vs_stats *stats);
+extern void ip_vs_read_estimator(struct ip_vs_stats_user *dst,
+				 struct ip_vs_stats *stats);
 
 /*
  *	Various IPVS packet transmitters (from ip_vs_xmit.c)
@@ -1106,6 +1258,7 @@ extern int ip_vs_icmp_xmit_v6
  int offset);
 #endif
 
+#ifdef CONFIG_SYSCTL
 /*
  *	This is a simple mechanism to ignore packets when
  *	we are loaded. Just set ip_vs_drop_rate to 'n' and
@@ -1121,6 +1274,9 @@ static inline int ip_vs_todrop(struct netns_ipvs *ipvs)
 	ipvs->drop_counter = ipvs->drop_rate;
 	return 1;
 }
+#else
+static inline int ip_vs_todrop(struct netns_ipvs *ipvs) { return 0; }
+#endif
 
 /*
  *      ip_vs_fwd_tag returns the forwarding tag of the connection
@@ -1190,7 +1346,7 @@ static inline void ip_vs_notrack(struct sk_buff *skb)
 {
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	enum ip_conntrack_info ctinfo;
-	struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
 
 	if (!ct || !nf_ct_is_untracked(ct)) {
 		nf_reset(skb);
@@ -1208,7 +1364,11 @@ static inline void ip_vs_notrack(struct sk_buff *skb)
  */
 static inline int ip_vs_conntrack_enabled(struct netns_ipvs *ipvs)
 {
+#ifdef CONFIG_SYSCTL
 	return ipvs->sysctl_conntrack;
+#else
+	return 0;
+#endif
 }
 
 extern void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp,

+ 1 - 1
include/net/net_namespace.h

@@ -20,7 +20,6 @@
 #include <net/netns/conntrack.h>
 #endif
 #include <net/netns/xfrm.h>
-#include <net/netns/ip_vs.h>
 
 struct proc_dir_entry;
 struct net_device;
@@ -28,6 +27,7 @@ struct sock;
 struct ctl_table_header;
 struct net_generic;
 struct sock;
+struct netns_ipvs;
 
 
 #define NETDEV_HASHBITS    8

+ 0 - 143
include/net/netns/ip_vs.h

@@ -1,143 +0,0 @@
-/*
- *  IP Virtual Server
- *  Data structure for network namspace
- *
- */
-
-#ifndef IP_VS_H_
-#define IP_VS_H_
-
-#include <linux/list.h>
-#include <linux/mutex.h>
-#include <linux/list_nulls.h>
-#include <linux/ip_vs.h>
-#include <asm/atomic.h>
-#include <linux/in.h>
-
-struct ip_vs_stats;
-struct ip_vs_sync_buff;
-struct ctl_table_header;
-
-struct netns_ipvs {
-	int			gen;		/* Generation */
-	/*
-	 *	Hash table: for real service lookups
-	 */
-	#define IP_VS_RTAB_BITS 4
-	#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
-	#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
-
-	struct list_head	rs_table[IP_VS_RTAB_SIZE];
-	/* ip_vs_app */
-	struct list_head	app_list;
-	struct mutex		app_mutex;
-	struct lock_class_key	app_key;	/* mutex debuging */
-
-	/* ip_vs_proto */
-	#define IP_VS_PROTO_TAB_SIZE	32	/* must be power of 2 */
-	struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE];
-	/* ip_vs_proto_tcp */
-#ifdef CONFIG_IP_VS_PROTO_TCP
-	#define	TCP_APP_TAB_BITS	4
-	#define	TCP_APP_TAB_SIZE	(1 << TCP_APP_TAB_BITS)
-	#define	TCP_APP_TAB_MASK	(TCP_APP_TAB_SIZE - 1)
-	struct list_head	tcp_apps[TCP_APP_TAB_SIZE];
-	spinlock_t		tcp_app_lock;
-#endif
-	/* ip_vs_proto_udp */
-#ifdef CONFIG_IP_VS_PROTO_UDP
-	#define	UDP_APP_TAB_BITS	4
-	#define	UDP_APP_TAB_SIZE	(1 << UDP_APP_TAB_BITS)
-	#define	UDP_APP_TAB_MASK	(UDP_APP_TAB_SIZE - 1)
-	struct list_head	udp_apps[UDP_APP_TAB_SIZE];
-	spinlock_t		udp_app_lock;
-#endif
-	/* ip_vs_proto_sctp */
-#ifdef CONFIG_IP_VS_PROTO_SCTP
-	#define SCTP_APP_TAB_BITS	4
-	#define SCTP_APP_TAB_SIZE	(1 << SCTP_APP_TAB_BITS)
-	#define SCTP_APP_TAB_MASK	(SCTP_APP_TAB_SIZE - 1)
-	/* Hash table for SCTP application incarnations	 */
-	struct list_head	sctp_apps[SCTP_APP_TAB_SIZE];
-	spinlock_t		sctp_app_lock;
-#endif
-	/* ip_vs_conn */
-	atomic_t		conn_count;      /*  connection counter */
-
-	/* ip_vs_ctl */
-	struct ip_vs_stats		*tot_stats;  /* Statistics & est. */
-	struct ip_vs_cpu_stats __percpu *cpustats;   /* Stats per cpu */
-	seqcount_t			*ustats_seq; /* u64 read retry */
-
-	int			num_services;    /* no of virtual services */
-	/* 1/rate drop and drop-entry variables */
-	struct delayed_work	defense_work;   /* Work handler */
-	int			drop_rate;
-	int			drop_counter;
-	atomic_t		dropentry;
-	/* locks in ctl.c */
-	spinlock_t		dropentry_lock;  /* drop entry handling */
-	spinlock_t		droppacket_lock; /* drop packet handling */
-	spinlock_t		securetcp_lock;  /* state and timeout tables */
-	rwlock_t		rs_lock;         /* real services table */
-	/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
-	struct lock_class_key	ctl_key;	/* ctl_mutex debuging */
-	/* Trash for destinations */
-	struct list_head	dest_trash;
-	/* Service counters */
-	atomic_t		ftpsvc_counter;
-	atomic_t		nullsvc_counter;
-
-	/* sys-ctl struct */
-	struct ctl_table_header	*sysctl_hdr;
-	struct ctl_table	*sysctl_tbl;
-	/* sysctl variables */
-	int			sysctl_amemthresh;
-	int			sysctl_am_droprate;
-	int			sysctl_drop_entry;
-	int			sysctl_drop_packet;
-	int			sysctl_secure_tcp;
-#ifdef CONFIG_IP_VS_NFCT
-	int			sysctl_conntrack;
-#endif
-	int			sysctl_snat_reroute;
-	int			sysctl_sync_ver;
-	int			sysctl_cache_bypass;
-	int			sysctl_expire_nodest_conn;
-	int			sysctl_expire_quiescent_template;
-	int			sysctl_sync_threshold[2];
-	int			sysctl_nat_icmp_send;
-
-	/* ip_vs_lblc */
-	int			sysctl_lblc_expiration;
-	struct ctl_table_header	*lblc_ctl_header;
-	struct ctl_table	*lblc_ctl_table;
-	/* ip_vs_lblcr */
-	int			sysctl_lblcr_expiration;
-	struct ctl_table_header	*lblcr_ctl_header;
-	struct ctl_table	*lblcr_ctl_table;
-	/* ip_vs_est */
-	struct list_head	est_list;	/* estimator list */
-	spinlock_t		est_lock;
-	struct timer_list	est_timer;	/* Estimation timer */
-	/* ip_vs_sync */
-	struct list_head	sync_queue;
-	spinlock_t		sync_lock;
-	struct ip_vs_sync_buff  *sync_buff;
-	spinlock_t		sync_buff_lock;
-	struct sockaddr_in	sync_mcast_addr;
-	struct task_struct	*master_thread;
-	struct task_struct	*backup_thread;
-	int			send_mesg_maxlen;
-	int			recv_mesg_maxlen;
-	volatile int		sync_state;
-	volatile int		master_syncid;
-	volatile int		backup_syncid;
-	/* multicast interface name */
-	char			master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-	char			backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-	/* net name space ptr */
-	struct net		*net;            /* Needed by timer routines */
-};
-
-#endif /* IP_VS_H_ */

+ 0 - 10
net/ipv4/netfilter/Kconfig

@@ -64,16 +64,6 @@ config IP_NF_IPTABLES
 if IP_NF_IPTABLES
 
 # The matches.
-config IP_NF_MATCH_ADDRTYPE
-	tristate '"addrtype" address type match support'
-	depends on NETFILTER_ADVANCED
-	help
-	  This option allows you to match what routing thinks of an address,
-	  eg. UNICAST, LOCAL, BROADCAST, ...
-
-	  If you want to compile it as a module, say M here and read
-	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
-
 config IP_NF_MATCH_AH
 	tristate '"ah" match support'
 	depends on NETFILTER_ADVANCED

+ 0 - 1
net/ipv4/netfilter/Makefile

@@ -48,7 +48,6 @@ obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o
 obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o
 
 # matches
-obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
 obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o
 obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o
 

+ 3 - 0
net/ipv4/netfilter/arp_tables.c

@@ -1066,6 +1066,7 @@ static int do_replace(struct net *net, const void __user *user,
 	/* overflow check */
 	if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
 		return -ENOMEM;
+	tmp.name[sizeof(tmp.name)-1] = 0;
 
 	newinfo = xt_alloc_table_info(tmp.size);
 	if (!newinfo)
@@ -1488,6 +1489,7 @@ static int compat_do_replace(struct net *net, void __user *user,
 		return -ENOMEM;
 	if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
 		return -ENOMEM;
+	tmp.name[sizeof(tmp.name)-1] = 0;
 
 	newinfo = xt_alloc_table_info(tmp.size);
 	if (!newinfo)
@@ -1740,6 +1742,7 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
 			ret = -EFAULT;
 			break;
 		}
+		rev.name[sizeof(rev.name)-1] = 0;
 
 		try_then_request_module(xt_find_revision(NFPROTO_ARP, rev.name,
 							 rev.revision, 1, &ret),

+ 3 - 0
net/ipv4/netfilter/ip_tables.c

@@ -1262,6 +1262,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
 	/* overflow check */
 	if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
 		return -ENOMEM;
+	tmp.name[sizeof(tmp.name)-1] = 0;
 
 	newinfo = xt_alloc_table_info(tmp.size);
 	if (!newinfo)
@@ -1807,6 +1808,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
 		return -ENOMEM;
 	if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
 		return -ENOMEM;
+	tmp.name[sizeof(tmp.name)-1] = 0;
 
 	newinfo = xt_alloc_table_info(tmp.size);
 	if (!newinfo)
@@ -2036,6 +2038,7 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 			ret = -EFAULT;
 			break;
 		}
+		rev.name[sizeof(rev.name)-1] = 0;
 
 		if (cmd == IPT_SO_GET_REVISION_TARGET)
 			target = 1;

+ 0 - 134
net/ipv4/netfilter/ipt_addrtype.c

@@ -1,134 +0,0 @@
-/*
- *  iptables module to match inet_addr_type() of an ip.
- *
- *  Copyright (c) 2004 Patrick McHardy <kaber@trash.net>
- *  (C) 2007 Laszlo Attila Toth <panther@balabit.hu>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2 as
- *  published by the Free Software Foundation.
- */
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-#include <linux/ip.h>
-#include <net/route.h>
-
-#include <linux/netfilter_ipv4/ipt_addrtype.h>
-#include <linux/netfilter/x_tables.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_DESCRIPTION("Xtables: address type match for IPv4");
-
-static inline bool match_type(struct net *net, const struct net_device *dev,
-			      __be32 addr, u_int16_t mask)
-{
-	return !!(mask & (1 << inet_dev_addr_type(net, dev, addr)));
-}
-
-static bool
-addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
-{
-	struct net *net = dev_net(par->in ? par->in : par->out);
-	const struct ipt_addrtype_info *info = par->matchinfo;
-	const struct iphdr *iph = ip_hdr(skb);
-	bool ret = true;
-
-	if (info->source)
-		ret &= match_type(net, NULL, iph->saddr, info->source) ^
-		       info->invert_source;
-	if (info->dest)
-		ret &= match_type(net, NULL, iph->daddr, info->dest) ^
-		       info->invert_dest;
-
-	return ret;
-}
-
-static bool
-addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
-{
-	struct net *net = dev_net(par->in ? par->in : par->out);
-	const struct ipt_addrtype_info_v1 *info = par->matchinfo;
-	const struct iphdr *iph = ip_hdr(skb);
-	const struct net_device *dev = NULL;
-	bool ret = true;
-
-	if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN)
-		dev = par->in;
-	else if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT)
-		dev = par->out;
-
-	if (info->source)
-		ret &= match_type(net, dev, iph->saddr, info->source) ^
-		       (info->flags & IPT_ADDRTYPE_INVERT_SOURCE);
-	if (ret && info->dest)
-		ret &= match_type(net, dev, iph->daddr, info->dest) ^
-		       !!(info->flags & IPT_ADDRTYPE_INVERT_DEST);
-	return ret;
-}
-
-static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par)
-{
-	struct ipt_addrtype_info_v1 *info = par->matchinfo;
-
-	if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN &&
-	    info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) {
-		pr_info("both incoming and outgoing "
-			"interface limitation cannot be selected\n");
-		return -EINVAL;
-	}
-
-	if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) |
-	    (1 << NF_INET_LOCAL_IN)) &&
-	    info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) {
-		pr_info("output interface limitation "
-			"not valid in PREROUTING and INPUT\n");
-		return -EINVAL;
-	}
-
-	if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) |
-	    (1 << NF_INET_LOCAL_OUT)) &&
-	    info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN) {
-		pr_info("input interface limitation "
-			"not valid in POSTROUTING and OUTPUT\n");
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static struct xt_match addrtype_mt_reg[] __read_mostly = {
-	{
-		.name		= "addrtype",
-		.family		= NFPROTO_IPV4,
-		.match		= addrtype_mt_v0,
-		.matchsize	= sizeof(struct ipt_addrtype_info),
-		.me		= THIS_MODULE
-	},
-	{
-		.name		= "addrtype",
-		.family		= NFPROTO_IPV4,
-		.revision	= 1,
-		.match		= addrtype_mt_v1,
-		.checkentry	= addrtype_mt_checkentry_v1,
-		.matchsize	= sizeof(struct ipt_addrtype_info_v1),
-		.me		= THIS_MODULE
-	}
-};
-
-static int __init addrtype_mt_init(void)
-{
-	return xt_register_matches(addrtype_mt_reg,
-				   ARRAY_SIZE(addrtype_mt_reg));
-}
-
-static void __exit addrtype_mt_exit(void)
-{
-	xt_unregister_matches(addrtype_mt_reg, ARRAY_SIZE(addrtype_mt_reg));
-}
-
-module_init(addrtype_mt_init);
-module_exit(addrtype_mt_exit);

+ 3 - 0
net/ipv6/netfilter/ip6_tables.c

@@ -1275,6 +1275,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
 	/* overflow check */
 	if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
 		return -ENOMEM;
+	tmp.name[sizeof(tmp.name)-1] = 0;
 
 	newinfo = xt_alloc_table_info(tmp.size);
 	if (!newinfo)
@@ -1822,6 +1823,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
 		return -ENOMEM;
 	if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
 		return -ENOMEM;
+	tmp.name[sizeof(tmp.name)-1] = 0;
 
 	newinfo = xt_alloc_table_info(tmp.size);
 	if (!newinfo)
@@ -2051,6 +2053,7 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 			ret = -EFAULT;
 			break;
 		}
+		rev.name[sizeof(rev.name)-1] = 0;
 
 		if (cmd == IP6T_SO_GET_REVISION_TARGET)
 			target = 1;

+ 11 - 0
net/netfilter/Kconfig

@@ -649,6 +649,17 @@ config NETFILTER_XT_TARGET_TCPOPTSTRIP
 
 comment "Xtables matches"
 
+config NETFILTER_XT_MATCH_ADDRTYPE
+	tristate '"addrtype" address type match support'
+	depends on NETFILTER_ADVANCED
+	depends on (IPV6 || IPV6=n)
+	---help---
+	  This option allows you to match what routing thinks of an address,
+	  eg. UNICAST, LOCAL, BROADCAST, ...
+
+	  If you want to compile it as a module, say M here and read
+	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
+
 config NETFILTER_XT_MATCH_CLUSTER
 	tristate '"cluster" match support'
 	depends on NF_CONNTRACK

+ 1 - 0
net/netfilter/Makefile

@@ -70,6 +70,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_IDLETIMER) += xt_IDLETIMER.o
 
 # matches
+obj-$(CONFIG_NETFILTER_XT_MATCH_ADDRTYPE) += xt_addrtype.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o

+ 1 - 1
net/netfilter/ipset/ip_set_core.c

@@ -612,7 +612,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
 	      const struct nlmsghdr *nlh,
 	      const struct nlattr * const attr[])
 {
-	struct ip_set *set, *clash;
+	struct ip_set *set, *clash = NULL;
 	ip_set_id_t index = IPSET_INVALID_ID;
 	struct nlattr *tb[IPSET_ATTR_CREATE_MAX+1] = {};
 	const char *name, *typename;

+ 11 - 2
net/netfilter/ipvs/ip_vs_conn.c

@@ -680,6 +680,16 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
 	atomic_dec(&dest->refcnt);
 }
 
+static int expire_quiescent_template(struct netns_ipvs *ipvs,
+				     struct ip_vs_dest *dest)
+{
+#ifdef CONFIG_SYSCTL
+	return ipvs->sysctl_expire_quiescent_template &&
+		(atomic_read(&dest->weight) == 0);
+#else
+	return 0;
+#endif
+}
 
 /*
  *	Checking if the destination of a connection template is available.
@@ -696,8 +706,7 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
 	 */
 	if ((dest == NULL) ||
 	    !(dest->flags & IP_VS_DEST_F_AVAILABLE) ||
-	    (ipvs->sysctl_expire_quiescent_template &&
-	     (atomic_read(&dest->weight) == 0))) {
+	    expire_quiescent_template(ipvs, dest)) {
 		IP_VS_DBG_BUF(9, "check_template: dest not available for "
 			      "protocol %s s:%s:%d v:%s:%d "
 			      "-> d:%s:%d\n",

+ 64 - 40
net/netfilter/ipvs/ip_vs_core.c

@@ -132,7 +132,7 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
 		s->ustats.inbytes += skb->len;
 		u64_stats_update_end(&s->syncp);
 
-		s = this_cpu_ptr(ipvs->cpustats);
+		s = this_cpu_ptr(ipvs->tot_stats.cpustats);
 		s->ustats.inpkts++;
 		u64_stats_update_begin(&s->syncp);
 		s->ustats.inbytes += skb->len;
@@ -162,7 +162,7 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
 		s->ustats.outbytes += skb->len;
 		u64_stats_update_end(&s->syncp);
 
-		s = this_cpu_ptr(ipvs->cpustats);
+		s = this_cpu_ptr(ipvs->tot_stats.cpustats);
 		s->ustats.outpkts++;
 		u64_stats_update_begin(&s->syncp);
 		s->ustats.outbytes += skb->len;
@@ -183,7 +183,7 @@ ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
 	s = this_cpu_ptr(svc->stats.cpustats);
 	s->ustats.conns++;
 
-	s = this_cpu_ptr(ipvs->cpustats);
+	s = this_cpu_ptr(ipvs->tot_stats.cpustats);
 	s->ustats.conns++;
 }
 
@@ -499,11 +499,13 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
 int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 		struct ip_vs_proto_data *pd)
 {
-	struct net *net;
-	struct netns_ipvs *ipvs;
 	__be16 _ports[2], *pptr;
 	struct ip_vs_iphdr iph;
+#ifdef CONFIG_SYSCTL
+	struct net *net;
+	struct netns_ipvs *ipvs;
 	int unicast;
+#endif
 
 	ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
 
@@ -512,6 +514,8 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 		ip_vs_service_put(svc);
 		return NF_DROP;
 	}
+
+#ifdef CONFIG_SYSCTL
 	net = skb_net(skb);
 
 #ifdef CONFIG_IP_VS_IPV6
@@ -563,6 +567,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 		ip_vs_conn_put(cp);
 		return ret;
 	}
+#endif
 
 	/*
 	 * When the virtual ftp service is presented, packets destined
@@ -599,6 +604,33 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 	return NF_DROP;
 }
 
+#ifdef CONFIG_SYSCTL
+
+static int sysctl_snat_reroute(struct sk_buff *skb)
+{
+	struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
+	return ipvs->sysctl_snat_reroute;
+}
+
+static int sysctl_nat_icmp_send(struct net *net)
+{
+	struct netns_ipvs *ipvs = net_ipvs(net);
+	return ipvs->sysctl_nat_icmp_send;
+}
+
+static int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs)
+{
+	return ipvs->sysctl_expire_nodest_conn;
+}
+
+#else
+
+static int sysctl_snat_reroute(struct sk_buff *skb) { return 0; }
+static int sysctl_nat_icmp_send(struct net *net) { return 0; }
+static int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs) { return 0; }
+
+#endif
+
 __sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
 {
 	return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0));
@@ -631,6 +663,22 @@ static inline int ip_vs_gather_frags_v6(struct sk_buff *skb, u_int32_t user)
 }
 #endif
 
+static int ip_vs_route_me_harder(int af, struct sk_buff *skb)
+{
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6) {
+		if (sysctl_snat_reroute(skb) && ip6_route_me_harder(skb) != 0)
+			return 1;
+	} else
+#endif
+		if ((sysctl_snat_reroute(skb) ||
+		     skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
+		    ip_route_me_harder(skb, RTN_LOCAL) != 0)
+			return 1;
+
+	return 0;
+}
+
 /*
  * Packet has been made sufficiently writable in caller
  * - inout: 1=in->out, 0=out->in
@@ -737,7 +785,6 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
 				struct ip_vs_protocol *pp,
 				unsigned int offset, unsigned int ihl)
 {
-	struct netns_ipvs *ipvs;
 	unsigned int verdict = NF_DROP;
 
 	if (IP_VS_FWD_METHOD(cp) != 0) {
@@ -759,8 +806,6 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
 	if (!skb_make_writable(skb, offset))
 		goto out;
 
-	ipvs = net_ipvs(skb_net(skb));
-
 #ifdef CONFIG_IP_VS_IPV6
 	if (af == AF_INET6)
 		ip_vs_nat_icmp_v6(skb, pp, cp, 1);
@@ -768,16 +813,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
 #endif
 		ip_vs_nat_icmp(skb, pp, cp, 1);
 
-#ifdef CONFIG_IP_VS_IPV6
-	if (af == AF_INET6) {
-		if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0)
-			goto out;
-	} else
-#endif
-		if ((ipvs->sysctl_snat_reroute ||
-		     skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
-		    ip_route_me_harder(skb, RTN_LOCAL) != 0)
-			goto out;
+	if (ip_vs_route_me_harder(af, skb))
+		goto out;
 
 	/* do the statistics and put it back */
 	ip_vs_out_stats(cp, skb);
@@ -985,7 +1022,6 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
 		struct ip_vs_conn *cp, int ihl)
 {
 	struct ip_vs_protocol *pp = pd->pp;
-	struct netns_ipvs *ipvs;
 
 	IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet");
 
@@ -1021,18 +1057,8 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
 	 * if it came from this machine itself.  So re-compute
 	 * the routing information.
 	 */
-	ipvs = net_ipvs(skb_net(skb));
-
-#ifdef CONFIG_IP_VS_IPV6
-	if (af == AF_INET6) {
-		if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0)
-			goto drop;
-	} else
-#endif
-		if ((ipvs->sysctl_snat_reroute ||
-		     skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
-		    ip_route_me_harder(skb, RTN_LOCAL) != 0)
-			goto drop;
+	if (ip_vs_route_me_harder(af, skb))
+		goto drop;
 
 	IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT");
 
@@ -1066,7 +1092,6 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
 	struct ip_vs_protocol *pp;
 	struct ip_vs_proto_data *pd;
 	struct ip_vs_conn *cp;
-	struct netns_ipvs *ipvs;
 
 	EnterFunction(11);
 
@@ -1141,11 +1166,10 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
 	 * Check if the packet belongs to an existing entry
 	 */
 	cp = pp->conn_out_get(af, skb, &iph, iph.len, 0);
-	ipvs = net_ipvs(net);
 
 	if (likely(cp))
 		return handle_response(af, skb, pd, cp, iph.len);
-	if (ipvs->sysctl_nat_icmp_send &&
+	if (sysctl_nat_icmp_send(net) &&
 	    (pp->protocol == IPPROTO_TCP ||
 	     pp->protocol == IPPROTO_UDP ||
 	     pp->protocol == IPPROTO_SCTP)) {
@@ -1570,7 +1594,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 	if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
 		/* the destination server is not available */
 
-		if (ipvs->sysctl_expire_nodest_conn) {
+		if (sysctl_expire_nodest_conn(ipvs)) {
 			/* try to expire the connection immediately */
 			ip_vs_conn_expire_now(cp);
 		}
@@ -1600,15 +1624,15 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 	 */
 
 	if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
-		pkts = ipvs->sysctl_sync_threshold[0];
+		pkts = sysctl_sync_threshold(ipvs);
 	else
 		pkts = atomic_add_return(1, &cp->in_pkts);
 
 	if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
 	    cp->protocol == IPPROTO_SCTP) {
 		if ((cp->state == IP_VS_SCTP_S_ESTABLISHED &&
-			(pkts % ipvs->sysctl_sync_threshold[1]
-			 == ipvs->sysctl_sync_threshold[0])) ||
+			(pkts % sysctl_sync_period(ipvs)
+			 == sysctl_sync_threshold(ipvs))) ||
 				(cp->old_state != cp->state &&
 				 ((cp->state == IP_VS_SCTP_S_CLOSED) ||
 				  (cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) ||
@@ -1622,8 +1646,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 	else if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
 	    (((cp->protocol != IPPROTO_TCP ||
 	       cp->state == IP_VS_TCP_S_ESTABLISHED) &&
-	      (pkts % ipvs->sysctl_sync_threshold[1]
-	       == ipvs->sysctl_sync_threshold[0])) ||
+	      (pkts % sysctl_sync_period(ipvs)
+	       == sysctl_sync_threshold(ipvs))) ||
 	     ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&
 	      ((cp->state == IP_VS_TCP_S_FIN_WAIT) ||
 	       (cp->state == IP_VS_TCP_S_CLOSE) ||

+ 152 - 104
net/netfilter/ipvs/ip_vs_ctl.c

@@ -86,6 +86,8 @@ static int __ip_vs_addr_is_local_v6(struct net *net,
 	return 0;
 }
 #endif
+
+#ifdef CONFIG_SYSCTL
 /*
  *	update_defense_level is called from keventd and from sysctl,
  *	so it needs to protect itself from softirqs
@@ -227,6 +229,7 @@ static void defense_work_handler(struct work_struct *work)
 		ip_vs_random_dropentry(ipvs->net);
 	schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
 }
+#endif
 
 int
 ip_vs_use_count_inc(void)
@@ -409,9 +412,11 @@ ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
 	/*
 	 *	Check the table hashed by fwmark first
 	 */
-	svc = __ip_vs_svc_fwm_find(net, af, fwmark);
-	if (fwmark && svc)
-		goto out;
+	if (fwmark) {
+		svc = __ip_vs_svc_fwm_find(net, af, fwmark);
+		if (svc)
+			goto out;
+	}
 
 	/*
 	 *	Check the table hashed by <protocol,addr,port>
@@ -707,13 +712,39 @@ static void ip_vs_trash_cleanup(struct net *net)
 	}
 }
 
+static void
+ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
+{
+#define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c
+
+	spin_lock_bh(&src->lock);
+
+	IP_VS_SHOW_STATS_COUNTER(conns);
+	IP_VS_SHOW_STATS_COUNTER(inpkts);
+	IP_VS_SHOW_STATS_COUNTER(outpkts);
+	IP_VS_SHOW_STATS_COUNTER(inbytes);
+	IP_VS_SHOW_STATS_COUNTER(outbytes);
+
+	ip_vs_read_estimator(dst, src);
+
+	spin_unlock_bh(&src->lock);
+}
 
 static void
 ip_vs_zero_stats(struct ip_vs_stats *stats)
 {
 	spin_lock_bh(&stats->lock);
 
-	memset(&stats->ustats, 0, sizeof(stats->ustats));
+	/* get current counters as zero point, rates are zeroed */
+
+#define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c
+
+	IP_VS_ZERO_STATS_COUNTER(conns);
+	IP_VS_ZERO_STATS_COUNTER(inpkts);
+	IP_VS_ZERO_STATS_COUNTER(outpkts);
+	IP_VS_ZERO_STATS_COUNTER(inbytes);
+	IP_VS_ZERO_STATS_COUNTER(outbytes);
+
 	ip_vs_zero_estimator(stats);
 
 	spin_unlock_bh(&stats->lock);
@@ -772,7 +803,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
 	spin_unlock_bh(&dest->dst_lock);
 
 	if (add)
-		ip_vs_new_estimator(svc->net, &dest->stats);
+		ip_vs_start_estimator(svc->net, &dest->stats);
 
 	write_lock_bh(&__ip_vs_svc_lock);
 
@@ -978,7 +1009,7 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);
 
-	ip_vs_kill_estimator(net, &dest->stats);
+	ip_vs_stop_estimator(net, &dest->stats);
 
 	/*
 	 *  Remove it from the d-linked list with the real services.
@@ -1171,7 +1202,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
 	else if (svc->port == 0)
 		atomic_inc(&ipvs->nullsvc_counter);
 
-	ip_vs_new_estimator(net, &svc->stats);
+	ip_vs_start_estimator(net, &svc->stats);
 
 	/* Count only IPv4 services for old get/setsockopt interface */
 	if (svc->af == AF_INET)
@@ -1323,7 +1354,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
 	if (svc->af == AF_INET)
 		ipvs->num_services--;
 
-	ip_vs_kill_estimator(svc->net, &svc->stats);
+	ip_vs_stop_estimator(svc->net, &svc->stats);
 
 	/* Unbind scheduler */
 	old_sched = svc->scheduler;
@@ -1477,11 +1508,11 @@ static int ip_vs_zero_all(struct net *net)
 		}
 	}
 
-	ip_vs_zero_stats(net_ipvs(net)->tot_stats);
+	ip_vs_zero_stats(&net_ipvs(net)->tot_stats);
 	return 0;
 }
 
-
+#ifdef CONFIG_SYSCTL
 static int
 proc_do_defense_mode(ctl_table *table, int write,
 		     void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -1503,7 +1534,6 @@ proc_do_defense_mode(ctl_table *table, int write,
 	return rc;
 }
 
-
 static int
 proc_do_sync_threshold(ctl_table *table, int write,
 		       void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -1737,6 +1767,7 @@ const struct ctl_path net_vs_ctl_path[] = {
 	{ }
 };
 EXPORT_SYMBOL_GPL(net_vs_ctl_path);
+#endif
 
 #ifdef CONFIG_PROC_FS
 
@@ -1959,7 +1990,7 @@ static const struct file_operations ip_vs_info_fops = {
 static int ip_vs_stats_show(struct seq_file *seq, void *v)
 {
 	struct net *net = seq_file_single_net(seq);
-	struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
+	struct ip_vs_stats_user show;
 
 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
 	seq_puts(seq,
@@ -1967,22 +1998,18 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v)
 	seq_printf(seq,
 		   "   Conns  Packets  Packets            Bytes            Bytes\n");
 
-	spin_lock_bh(&tot_stats->lock);
-	seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", tot_stats->ustats.conns,
-		   tot_stats->ustats.inpkts, tot_stats->ustats.outpkts,
-		   (unsigned long long) tot_stats->ustats.inbytes,
-		   (unsigned long long) tot_stats->ustats.outbytes);
+	ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
+	seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns,
+		   show.inpkts, show.outpkts,
+		   (unsigned long long) show.inbytes,
+		   (unsigned long long) show.outbytes);
 
 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
 	seq_puts(seq,
 		   " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
-	seq_printf(seq,"%8X %8X %8X %16X %16X\n",
-			tot_stats->ustats.cps,
-			tot_stats->ustats.inpps,
-			tot_stats->ustats.outpps,
-			tot_stats->ustats.inbps,
-			tot_stats->ustats.outbps);
-	spin_unlock_bh(&tot_stats->lock);
+	seq_printf(seq, "%8X %8X %8X %16X %16X\n",
+			show.cps, show.inpps, show.outpps,
+			show.inbps, show.outbps);
 
 	return 0;
 }
@@ -2003,7 +2030,9 @@ static const struct file_operations ip_vs_stats_fops = {
 static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
 {
 	struct net *net = seq_file_single_net(seq);
-	struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
+	struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
+	struct ip_vs_cpu_stats *cpustats = tot_stats->cpustats;
+	struct ip_vs_stats_user rates;
 	int i;
 
 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
@@ -2013,30 +2042,43 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
 		   "CPU    Conns  Packets  Packets            Bytes            Bytes\n");
 
 	for_each_possible_cpu(i) {
-		struct ip_vs_cpu_stats *u = per_cpu_ptr(net->ipvs->cpustats, i);
+		struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
+		unsigned int start;
+		__u64 inbytes, outbytes;
+
+		do {
+			start = u64_stats_fetch_begin_bh(&u->syncp);
+			inbytes = u->ustats.inbytes;
+			outbytes = u->ustats.outbytes;
+		} while (u64_stats_fetch_retry_bh(&u->syncp, start));
+
 		seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
-			    i, u->ustats.conns, u->ustats.inpkts,
-			    u->ustats.outpkts, (__u64)u->ustats.inbytes,
-			    (__u64)u->ustats.outbytes);
+			   i, u->ustats.conns, u->ustats.inpkts,
+			   u->ustats.outpkts, (__u64)inbytes,
+			   (__u64)outbytes);
 	}
 
 	spin_lock_bh(&tot_stats->lock);
+
 	seq_printf(seq, "  ~ %8X %8X %8X %16LX %16LX\n\n",
 		   tot_stats->ustats.conns, tot_stats->ustats.inpkts,
 		   tot_stats->ustats.outpkts,
 		   (unsigned long long) tot_stats->ustats.inbytes,
 		   (unsigned long long) tot_stats->ustats.outbytes);
 
+	ip_vs_read_estimator(&rates, tot_stats);
+
+	spin_unlock_bh(&tot_stats->lock);
+
 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
 	seq_puts(seq,
 		   "     Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
 	seq_printf(seq, "    %8X %8X %8X %16X %16X\n",
-			tot_stats->ustats.cps,
-			tot_stats->ustats.inpps,
-			tot_stats->ustats.outpps,
-			tot_stats->ustats.inbps,
-			tot_stats->ustats.outbps);
-	spin_unlock_bh(&tot_stats->lock);
+			rates.cps,
+			rates.inpps,
+			rates.outpps,
+			rates.inbps,
+			rates.outbps);
 
 	return 0;
 }
@@ -2283,14 +2325,6 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 }
 
 
-static void
-ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
-{
-	spin_lock_bh(&src->lock);
-	memcpy(dst, &src->ustats, sizeof(*dst));
-	spin_unlock_bh(&src->lock);
-}
-
 static void
 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
 {
@@ -2677,31 +2711,29 @@ static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
 				 struct ip_vs_stats *stats)
 {
+	struct ip_vs_stats_user ustats;
 	struct nlattr *nl_stats = nla_nest_start(skb, container_type);
 	if (!nl_stats)
 		return -EMSGSIZE;
 
-	spin_lock_bh(&stats->lock);
-
-	NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
-	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
-	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
-	NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
-	NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
-	NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
-	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
-	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
-	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
-	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
+	ip_vs_copy_stats(&ustats, stats);
 
-	spin_unlock_bh(&stats->lock);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts);
+	NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes);
+	NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, ustats.cps);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps);
 
 	nla_nest_end(skb, nl_stats);
 
 	return 0;
 
 nla_put_failure:
-	spin_unlock_bh(&stats->lock);
 	nla_nest_cancel(skb, nl_stats);
 	return -EMSGSIZE;
 }
@@ -3480,7 +3512,8 @@ static void ip_vs_genl_unregister(void)
 /*
  * per netns intit/exit func.
  */
-int __net_init __ip_vs_control_init(struct net *net)
+#ifdef CONFIG_SYSCTL
+int __net_init __ip_vs_control_init_sysctl(struct net *net)
 {
 	int idx;
 	struct netns_ipvs *ipvs = net_ipvs(net);
@@ -3490,38 +3523,11 @@ int __net_init __ip_vs_control_init(struct net *net)
 	spin_lock_init(&ipvs->dropentry_lock);
 	spin_lock_init(&ipvs->droppacket_lock);
 	spin_lock_init(&ipvs->securetcp_lock);
-	ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
-
-	/* Initialize rs_table */
-	for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
-		INIT_LIST_HEAD(&ipvs->rs_table[idx]);
-
-	INIT_LIST_HEAD(&ipvs->dest_trash);
-	atomic_set(&ipvs->ftpsvc_counter, 0);
-	atomic_set(&ipvs->nullsvc_counter, 0);
-
-	/* procfs stats */
-	ipvs->tot_stats = kzalloc(sizeof(struct ip_vs_stats), GFP_KERNEL);
-	if (ipvs->tot_stats == NULL) {
-		pr_err("%s(): no memory.\n", __func__);
-		return -ENOMEM;
-	}
-	ipvs->cpustats = alloc_percpu(struct ip_vs_cpu_stats);
-	if (!ipvs->cpustats) {
-		pr_err("%s() alloc_percpu failed\n", __func__);
-		goto err_alloc;
-	}
-	spin_lock_init(&ipvs->tot_stats->lock);
-
-	proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
-	proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
-	proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
-			     &ip_vs_stats_percpu_fops);
 
 	if (!net_eq(net, &init_net)) {
 		tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
 		if (tbl == NULL)
-			goto err_dup;
+			return -ENOMEM;
 	} else
 		tbl = vs_vars;
 	/* Initialize sysctl defaults */
@@ -3543,52 +3549,94 @@ int __net_init __ip_vs_control_init(struct net *net)
 	tbl[idx++].data = &ipvs->sysctl_cache_bypass;
 	tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
 	tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
-	ipvs->sysctl_sync_threshold[0] = 3;
-	ipvs->sysctl_sync_threshold[1] = 50;
+	ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
+	ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
 	tbl[idx].data = &ipvs->sysctl_sync_threshold;
 	tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
 	tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
 
 
-#ifdef CONFIG_SYSCTL
 	ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path,
 						     tbl);
 	if (ipvs->sysctl_hdr == NULL) {
 		if (!net_eq(net, &init_net))
 			kfree(tbl);
-		goto err_dup;
+		return -ENOMEM;
 	}
-#endif
-	ip_vs_new_estimator(net, ipvs->tot_stats);
+	ip_vs_start_estimator(net, &ipvs->tot_stats);
 	ipvs->sysctl_tbl = tbl;
 	/* Schedule defense work */
 	INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
 	schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
-	return 0;
 
-err_dup:
-	free_percpu(ipvs->cpustats);
-err_alloc:
-	kfree(ipvs->tot_stats);
-	return -ENOMEM;
+	return 0;
 }
 
-static void __net_exit __ip_vs_control_cleanup(struct net *net)
+void __net_init __ip_vs_control_cleanup_sysctl(struct net *net)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);
 
-	ip_vs_trash_cleanup(net);
-	ip_vs_kill_estimator(net, ipvs->tot_stats);
 	cancel_delayed_work_sync(&ipvs->defense_work);
 	cancel_work_sync(&ipvs->defense_work.work);
-#ifdef CONFIG_SYSCTL
 	unregister_net_sysctl_table(ipvs->sysctl_hdr);
+}
+
+#else
+
+int __net_init __ip_vs_control_init_sysctl(struct net *net) { return 0; }
+void __net_init __ip_vs_control_cleanup_sysctl(struct net *net) { }
+
 #endif
+
+int __net_init __ip_vs_control_init(struct net *net)
+{
+	int idx;
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
+	ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
+
+	/* Initialize rs_table */
+	for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
+		INIT_LIST_HEAD(&ipvs->rs_table[idx]);
+
+	INIT_LIST_HEAD(&ipvs->dest_trash);
+	atomic_set(&ipvs->ftpsvc_counter, 0);
+	atomic_set(&ipvs->nullsvc_counter, 0);
+
+	/* procfs stats */
+	ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
+	if (ipvs->tot_stats.cpustats) {
+		pr_err("%s(): alloc_percpu.\n", __func__);
+		return -ENOMEM;
+	}
+	spin_lock_init(&ipvs->tot_stats.lock);
+
+	proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
+	proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
+	proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
+			     &ip_vs_stats_percpu_fops);
+
+	if (__ip_vs_control_init_sysctl(net))
+		goto err;
+
+	return 0;
+
+err:
+	free_percpu(ipvs->tot_stats.cpustats);
+	return -ENOMEM;
+}
+
+static void __net_exit __ip_vs_control_cleanup(struct net *net)
+{
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
+	ip_vs_trash_cleanup(net);
+	ip_vs_stop_estimator(net, &ipvs->tot_stats);
+	__ip_vs_control_cleanup_sysctl(net);
 	proc_net_remove(net, "ip_vs_stats_percpu");
 	proc_net_remove(net, "ip_vs_stats");
 	proc_net_remove(net, "ip_vs");
-	free_percpu(ipvs->cpustats);
-	kfree(ipvs->tot_stats);
+	free_percpu(ipvs->tot_stats.cpustats);
 }
 
 static struct pernet_operations ipvs_control_ops = {

+ 28 - 35
net/netfilter/ipvs/ip_vs_est.c

@@ -69,10 +69,10 @@ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
 			sum->inpkts += s->ustats.inpkts;
 			sum->outpkts += s->ustats.outpkts;
 			do {
-				start = u64_stats_fetch_begin_bh(&s->syncp);
+				start = u64_stats_fetch_begin(&s->syncp);
 				inbytes = s->ustats.inbytes;
 				outbytes = s->ustats.outbytes;
-			} while (u64_stats_fetch_retry_bh(&s->syncp, start));
+			} while (u64_stats_fetch_retry(&s->syncp, start));
 			sum->inbytes += inbytes;
 			sum->outbytes += outbytes;
 		} else {
@@ -80,10 +80,10 @@ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
 			sum->inpkts = s->ustats.inpkts;
 			sum->outpkts = s->ustats.outpkts;
 			do {
-				start = u64_stats_fetch_begin_bh(&s->syncp);
+				start = u64_stats_fetch_begin(&s->syncp);
 				sum->inbytes = s->ustats.inbytes;
 				sum->outbytes = s->ustats.outbytes;
-			} while (u64_stats_fetch_retry_bh(&s->syncp, start));
+			} while (u64_stats_fetch_retry(&s->syncp, start));
 		}
 	}
 }
@@ -101,13 +101,12 @@ static void estimation_timer(unsigned long arg)
 	struct netns_ipvs *ipvs;
 
 	ipvs = net_ipvs(net);
-	ip_vs_read_cpu_stats(&ipvs->tot_stats->ustats, ipvs->cpustats);
 	spin_lock(&ipvs->est_lock);
 	list_for_each_entry(e, &ipvs->est_list, list) {
 		s = container_of(e, struct ip_vs_stats, est);
 
-		ip_vs_read_cpu_stats(&s->ustats, s->cpustats);
 		spin_lock(&s->lock);
+		ip_vs_read_cpu_stats(&s->ustats, s->cpustats);
 		n_conns = s->ustats.conns;
 		n_inpkts = s->ustats.inpkts;
 		n_outpkts = s->ustats.outpkts;
@@ -118,61 +117,41 @@ static void estimation_timer(unsigned long arg)
 		rate = (n_conns - e->last_conns) << 9;
 		e->last_conns = n_conns;
 		e->cps += ((long)rate - (long)e->cps) >> 2;
-		s->ustats.cps = (e->cps + 0x1FF) >> 10;
 
 		rate = (n_inpkts - e->last_inpkts) << 9;
 		e->last_inpkts = n_inpkts;
 		e->inpps += ((long)rate - (long)e->inpps) >> 2;
-		s->ustats.inpps = (e->inpps + 0x1FF) >> 10;
 
 		rate = (n_outpkts - e->last_outpkts) << 9;
 		e->last_outpkts = n_outpkts;
 		e->outpps += ((long)rate - (long)e->outpps) >> 2;
-		s->ustats.outpps = (e->outpps + 0x1FF) >> 10;
 
 		rate = (n_inbytes - e->last_inbytes) << 4;
 		e->last_inbytes = n_inbytes;
 		e->inbps += ((long)rate - (long)e->inbps) >> 2;
-		s->ustats.inbps = (e->inbps + 0xF) >> 5;
 
 		rate = (n_outbytes - e->last_outbytes) << 4;
 		e->last_outbytes = n_outbytes;
 		e->outbps += ((long)rate - (long)e->outbps) >> 2;
-		s->ustats.outbps = (e->outbps + 0xF) >> 5;
 		spin_unlock(&s->lock);
 	}
 	spin_unlock(&ipvs->est_lock);
 	mod_timer(&ipvs->est_timer, jiffies + 2*HZ);
 }
 
-void ip_vs_new_estimator(struct net *net, struct ip_vs_stats *stats)
+void ip_vs_start_estimator(struct net *net, struct ip_vs_stats *stats)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct ip_vs_estimator *est = &stats->est;
 
 	INIT_LIST_HEAD(&est->list);
 
-	est->last_conns = stats->ustats.conns;
-	est->cps = stats->ustats.cps<<10;
-
-	est->last_inpkts = stats->ustats.inpkts;
-	est->inpps = stats->ustats.inpps<<10;
-
-	est->last_outpkts = stats->ustats.outpkts;
-	est->outpps = stats->ustats.outpps<<10;
-
-	est->last_inbytes = stats->ustats.inbytes;
-	est->inbps = stats->ustats.inbps<<5;
-
-	est->last_outbytes = stats->ustats.outbytes;
-	est->outbps = stats->ustats.outbps<<5;
-
 	spin_lock_bh(&ipvs->est_lock);
 	list_add(&est->list, &ipvs->est_list);
 	spin_unlock_bh(&ipvs->est_lock);
 }
 
-void ip_vs_kill_estimator(struct net *net, struct ip_vs_stats *stats)
+void ip_vs_stop_estimator(struct net *net, struct ip_vs_stats *stats)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct ip_vs_estimator *est = &stats->est;
@@ -185,13 +164,14 @@ void ip_vs_kill_estimator(struct net *net, struct ip_vs_stats *stats)
 void ip_vs_zero_estimator(struct ip_vs_stats *stats)
 {
 	struct ip_vs_estimator *est = &stats->est;
-
-	/* set counters zero, caller must hold the stats->lock lock */
-	est->last_inbytes = 0;
-	est->last_outbytes = 0;
-	est->last_conns = 0;
-	est->last_inpkts = 0;
-	est->last_outpkts = 0;
+	struct ip_vs_stats_user *u = &stats->ustats;
+
+	/* reset counters, caller must hold the stats->lock lock */
+	est->last_inbytes = u->inbytes;
+	est->last_outbytes = u->outbytes;
+	est->last_conns = u->conns;
+	est->last_inpkts = u->inpkts;
+	est->last_outpkts = u->outpkts;
 	est->cps = 0;
 	est->inpps = 0;
 	est->outpps = 0;
@@ -199,6 +179,19 @@ void ip_vs_zero_estimator(struct ip_vs_stats *stats)
 	est->outbps = 0;
 }
 
+/* Get decoded rates */
+void ip_vs_read_estimator(struct ip_vs_stats_user *dst,
+			  struct ip_vs_stats *stats)
+{
+	struct ip_vs_estimator *e = &stats->est;
+
+	dst->cps = (e->cps + 0x1FF) >> 10;
+	dst->inpps = (e->inpps + 0x1FF) >> 10;
+	dst->outpps = (e->outpps + 0x1FF) >> 10;
+	dst->inbps = (e->inbps + 0xF) >> 5;
+	dst->outbps = (e->outbps + 0xF) >> 5;
+}
+
 static int __net_init __ip_vs_estimator_init(struct net *net)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);

+ 23 - 8
net/netfilter/ipvs/ip_vs_lblc.c

@@ -63,6 +63,8 @@
 #define CHECK_EXPIRE_INTERVAL   (60*HZ)
 #define ENTRY_TIMEOUT           (6*60*HZ)
 
+#define DEFAULT_EXPIRATION	(24*60*60*HZ)
+
 /*
  *    It is for full expiration check.
  *    When there is no partial expiration check (garbage collection)
@@ -112,7 +114,7 @@ struct ip_vs_lblc_table {
 /*
  *      IPVS LBLC sysctl table
  */
-
+#ifdef CONFIG_SYSCTL
 static ctl_table vs_vars_table[] = {
 	{
 		.procname	= "lblc_expiration",
@@ -123,6 +125,7 @@ static ctl_table vs_vars_table[] = {
 	},
 	{ }
 };
+#endif
 
 static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)
 {
@@ -238,6 +241,15 @@ static void ip_vs_lblc_flush(struct ip_vs_lblc_table *tbl)
 	}
 }
 
+static int sysctl_lblc_expiration(struct ip_vs_service *svc)
+{
+#ifdef CONFIG_SYSCTL
+	struct netns_ipvs *ipvs = net_ipvs(svc->net);
+	return ipvs->sysctl_lblc_expiration;
+#else
+	return DEFAULT_EXPIRATION;
+#endif
+}
 
 static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
 {
@@ -245,7 +257,6 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
 	struct ip_vs_lblc_entry *en, *nxt;
 	unsigned long now = jiffies;
 	int i, j;
-	struct netns_ipvs *ipvs = net_ipvs(svc->net);
 
 	for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
 		j = (j + 1) & IP_VS_LBLC_TAB_MASK;
@@ -254,7 +265,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
 		list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
 			if (time_before(now,
 					en->lastuse +
-					ipvs->sysctl_lblc_expiration))
+					sysctl_lblc_expiration(svc)))
 				continue;
 
 			ip_vs_lblc_free(en);
@@ -538,6 +549,7 @@ static struct ip_vs_scheduler ip_vs_lblc_scheduler =
 /*
  *  per netns init.
  */
+#ifdef CONFIG_SYSCTL
 static int __net_init __ip_vs_lblc_init(struct net *net)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);
@@ -550,10 +562,9 @@ static int __net_init __ip_vs_lblc_init(struct net *net)
 			return -ENOMEM;
 	} else
 		ipvs->lblc_ctl_table = vs_vars_table;
-	ipvs->sysctl_lblc_expiration = 24*60*60*HZ;
+	ipvs->sysctl_lblc_expiration = DEFAULT_EXPIRATION;
 	ipvs->lblc_ctl_table[0].data = &ipvs->sysctl_lblc_expiration;
 
-#ifdef CONFIG_SYSCTL
 	ipvs->lblc_ctl_header =
 		register_net_sysctl_table(net, net_vs_ctl_path,
 					  ipvs->lblc_ctl_table);
@@ -562,7 +573,6 @@ static int __net_init __ip_vs_lblc_init(struct net *net)
 			kfree(ipvs->lblc_ctl_table);
 		return -ENOMEM;
 	}
-#endif
 
 	return 0;
 }
@@ -571,14 +581,19 @@ static void __net_exit __ip_vs_lblc_exit(struct net *net)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);
 
-#ifdef CONFIG_SYSCTL
 	unregister_net_sysctl_table(ipvs->lblc_ctl_header);
-#endif
 
 	if (!net_eq(net, &init_net))
 		kfree(ipvs->lblc_ctl_table);
 }
 
+#else
+
+static int __net_init __ip_vs_lblc_init(struct net *net) { return 0; }
+static void __net_exit __ip_vs_lblc_exit(struct net *net) { }
+
+#endif
+
 static struct pernet_operations ip_vs_lblc_ops = {
 	.init = __ip_vs_lblc_init,
 	.exit = __ip_vs_lblc_exit,

+ 25 - 10
net/netfilter/ipvs/ip_vs_lblcr.c

@@ -63,6 +63,8 @@
 #define CHECK_EXPIRE_INTERVAL   (60*HZ)
 #define ENTRY_TIMEOUT           (6*60*HZ)
 
+#define DEFAULT_EXPIRATION	(24*60*60*HZ)
+
 /*
  *    It is for full expiration check.
  *    When there is no partial expiration check (garbage collection)
@@ -283,6 +285,7 @@ struct ip_vs_lblcr_table {
 };
 
 
+#ifdef CONFIG_SYSCTL
 /*
  *      IPVS LBLCR sysctl table
  */
@@ -297,6 +300,7 @@ static ctl_table vs_vars_table[] = {
 	},
 	{ }
 };
+#endif
 
 static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en)
 {
@@ -410,6 +414,15 @@ static void ip_vs_lblcr_flush(struct ip_vs_lblcr_table *tbl)
 	}
 }
 
+static int sysctl_lblcr_expiration(struct ip_vs_service *svc)
+{
+#ifdef CONFIG_SYSCTL
+	struct netns_ipvs *ipvs = net_ipvs(svc->net);
+	return ipvs->sysctl_lblcr_expiration;
+#else
+	return DEFAULT_EXPIRATION;
+#endif
+}
 
 static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
 {
@@ -417,15 +430,14 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
 	unsigned long now = jiffies;
 	int i, j;
 	struct ip_vs_lblcr_entry *en, *nxt;
-	struct netns_ipvs *ipvs = net_ipvs(svc->net);
 
 	for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
 		j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
 
 		write_lock(&svc->sched_lock);
 		list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
-			if (time_after(en->lastuse
-					+ ipvs->sysctl_lblcr_expiration, now))
+			if (time_after(en->lastuse +
+				       sysctl_lblcr_expiration(svc), now))
 				continue;
 
 			ip_vs_lblcr_free(en);
@@ -650,7 +662,6 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	read_lock(&svc->sched_lock);
 	en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr);
 	if (en) {
-		struct netns_ipvs *ipvs = net_ipvs(svc->net);
 		/* We only hold a read lock, but this is atomic */
 		en->lastuse = jiffies;
 
@@ -662,7 +673,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 		/* More than one destination + enough time passed by, cleanup */
 		if (atomic_read(&en->set.size) > 1 &&
 				time_after(jiffies, en->set.lastmod +
-				ipvs->sysctl_lblcr_expiration)) {
+				sysctl_lblcr_expiration(svc))) {
 			struct ip_vs_dest *m;
 
 			write_lock(&en->set.lock);
@@ -734,6 +745,7 @@ static struct ip_vs_scheduler ip_vs_lblcr_scheduler =
 /*
  *  per netns init.
  */
+#ifdef CONFIG_SYSCTL
 static int __net_init __ip_vs_lblcr_init(struct net *net)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);
@@ -746,10 +758,9 @@ static int __net_init __ip_vs_lblcr_init(struct net *net)
 			return -ENOMEM;
 	} else
 		ipvs->lblcr_ctl_table = vs_vars_table;
-	ipvs->sysctl_lblcr_expiration = 24*60*60*HZ;
+	ipvs->sysctl_lblcr_expiration = DEFAULT_EXPIRATION;
 	ipvs->lblcr_ctl_table[0].data = &ipvs->sysctl_lblcr_expiration;
 
-#ifdef CONFIG_SYSCTL
 	ipvs->lblcr_ctl_header =
 		register_net_sysctl_table(net, net_vs_ctl_path,
 					  ipvs->lblcr_ctl_table);
@@ -758,7 +769,6 @@ static int __net_init __ip_vs_lblcr_init(struct net *net)
 			kfree(ipvs->lblcr_ctl_table);
 		return -ENOMEM;
 	}
-#endif
 
 	return 0;
 }
@@ -767,14 +777,19 @@ static void __net_exit __ip_vs_lblcr_exit(struct net *net)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);
 
-#ifdef CONFIG_SYSCTL
 	unregister_net_sysctl_table(ipvs->lblcr_ctl_header);
-#endif
 
 	if (!net_eq(net, &init_net))
 		kfree(ipvs->lblcr_ctl_table);
 }
 
+#else
+
+static int __net_init __ip_vs_lblcr_init(struct net *net) { return 0; }
+static void __net_exit __ip_vs_lblcr_exit(struct net *net) { }
+
+#endif
+
 static struct pernet_operations ip_vs_lblcr_ops = {
 	.init = __ip_vs_lblcr_init,
 	.exit = __ip_vs_lblcr_exit,

+ 4 - 5
net/netfilter/ipvs/ip_vs_pe_sip.c

@@ -92,14 +92,13 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
 	if (get_callid(dptr, dataoff, datalen, &matchoff, &matchlen))
 		return -EINVAL;
 
-	p->pe_data = kmalloc(matchlen, GFP_ATOMIC);
-	if (!p->pe_data)
-		return -ENOMEM;
-
 	/* N.B: pe_data is only set on success,
 	 * this allows fallback to the default persistence logic on failure
 	 */
-	memcpy(p->pe_data, dptr + matchoff, matchlen);
+	p->pe_data = kmemdup(dptr + matchoff, matchlen, GFP_ATOMIC);
+	if (!p->pe_data)
+		return -ENOMEM;
+
 	p->pe_data_len = matchlen;
 
 	return 0;

+ 5 - 6
net/netfilter/ipvs/ip_vs_sync.c

@@ -394,7 +394,7 @@ void ip_vs_sync_switch_mode(struct net *net, int mode)
 
 	if (!(ipvs->sync_state & IP_VS_STATE_MASTER))
 		return;
-	if (mode == ipvs->sysctl_sync_ver || !ipvs->sync_buff)
+	if (mode == sysctl_sync_ver(ipvs) || !ipvs->sync_buff)
 		return;
 
 	spin_lock_bh(&ipvs->sync_buff_lock);
@@ -521,7 +521,7 @@ void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp)
 	unsigned int len, pe_name_len, pad;
 
 	/* Handle old version of the protocol */
-	if (ipvs->sysctl_sync_ver == 0) {
+	if (sysctl_sync_ver(ipvs) == 0) {
 		ip_vs_sync_conn_v0(net, cp);
 		return;
 	}
@@ -650,7 +650,7 @@ control:
 	if (cp->flags & IP_VS_CONN_F_TEMPLATE) {
 		int pkts = atomic_add_return(1, &cp->in_pkts);
 
-		if (pkts % ipvs->sysctl_sync_threshold[1] != 1)
+		if (pkts % sysctl_sync_period(ipvs) != 1)
 			return;
 	}
 	goto sloop;
@@ -697,13 +697,12 @@ ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc,
 			return 1;
 		}
 
-		p->pe_data = kmalloc(pe_data_len, GFP_ATOMIC);
+		p->pe_data = kmemdup(pe_data, pe_data_len, GFP_ATOMIC);
 		if (!p->pe_data) {
 			if (p->pe->module)
 				module_put(p->pe->module);
 			return -ENOMEM;
 		}
-		memcpy(p->pe_data, pe_data, pe_data_len);
 		p->pe_data_len = pe_data_len;
 	}
 	return 0;
@@ -795,7 +794,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
 
 	if (opt)
 		memcpy(&cp->in_seq, opt, sizeof(*opt));
-	atomic_set(&cp->in_pkts, ipvs->sysctl_sync_threshold[0]);
+	atomic_set(&cp->in_pkts, sysctl_sync_threshold(ipvs));
 	cp->state = state;
 	cp->old_state = cp->state;
 	/*

+ 1 - 0
net/netfilter/nf_conntrack_core.c

@@ -1301,6 +1301,7 @@ static void nf_conntrack_cleanup_net(struct net *net)
 
 	nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
 	nf_conntrack_ecache_fini(net);
+	nf_conntrack_tstamp_fini(net);
 	nf_conntrack_acct_fini(net);
 	nf_conntrack_expect_fini(net);
 	kmem_cache_destroy(net->ct.nf_conntrack_cachep);

+ 17 - 9
net/netfilter/x_tables.c

@@ -183,14 +183,14 @@ EXPORT_SYMBOL(xt_unregister_matches);
 /*
  * These are weird, but module loading must not be done with mutex
  * held (since they will register), and we have to have a single
- * function to use try_then_request_module().
+ * function to use.
  */
 
 /* Find match, grabs ref.  Returns ERR_PTR() on error. */
 struct xt_match *xt_find_match(u8 af, const char *name, u8 revision)
 {
 	struct xt_match *m;
-	int err = 0;
+	int err = -ENOENT;
 
 	if (mutex_lock_interruptible(&xt[af].mutex) != 0)
 		return ERR_PTR(-EINTR);
@@ -221,9 +221,13 @@ xt_request_find_match(uint8_t nfproto, const char *name, uint8_t revision)
 {
 	struct xt_match *match;
 
-	match = try_then_request_module(xt_find_match(nfproto, name, revision),
-					"%st_%s", xt_prefix[nfproto], name);
-	return (match != NULL) ? match : ERR_PTR(-ENOENT);
+	match = xt_find_match(nfproto, name, revision);
+	if (IS_ERR(match)) {
+		request_module("%st_%s", xt_prefix[nfproto], name);
+		match = xt_find_match(nfproto, name, revision);
+	}
+
+	return match;
 }
 EXPORT_SYMBOL_GPL(xt_request_find_match);
 
@@ -231,7 +235,7 @@ EXPORT_SYMBOL_GPL(xt_request_find_match);
 struct xt_target *xt_find_target(u8 af, const char *name, u8 revision)
 {
 	struct xt_target *t;
-	int err = 0;
+	int err = -ENOENT;
 
 	if (mutex_lock_interruptible(&xt[af].mutex) != 0)
 		return ERR_PTR(-EINTR);
@@ -261,9 +265,13 @@ struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision)
 {
 	struct xt_target *target;
 
-	target = try_then_request_module(xt_find_target(af, name, revision),
-					 "%st_%s", xt_prefix[af], name);
-	return (target != NULL) ? target : ERR_PTR(-ENOENT);
+	target = xt_find_target(af, name, revision);
+	if (IS_ERR(target)) {
+		request_module("%st_%s", xt_prefix[af], name);
+		target = xt_find_target(af, name, revision);
+	}
+
+	return target;
 }
 EXPORT_SYMBOL_GPL(xt_request_find_target);
 

+ 229 - 0
net/netfilter/xt_addrtype.c

@@ -0,0 +1,229 @@
+/*
+ *  iptables module to match inet_addr_type() of an ip.
+ *
+ *  Copyright (c) 2004 Patrick McHardy <kaber@trash.net>
+ *  (C) 2007 Laszlo Attila Toth <panther@balabit.hu>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/ip.h>
+#include <net/route.h>
+
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/ip6_fib.h>
+#endif
+
+#include <linux/netfilter/xt_addrtype.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("Xtables: address type match");
+MODULE_ALIAS("ipt_addrtype");
+MODULE_ALIAS("ip6t_addrtype");
+
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+static u32 xt_addrtype_rt6_to_type(const struct rt6_info *rt)
+{
+	u32 ret;
+
+	if (!rt)
+		return XT_ADDRTYPE_UNREACHABLE;
+
+	if (rt->rt6i_flags & RTF_REJECT)
+		ret = XT_ADDRTYPE_UNREACHABLE;
+	else
+		ret = 0;
+
+	if (rt->rt6i_flags & RTF_LOCAL)
+		ret |= XT_ADDRTYPE_LOCAL;
+	if (rt->rt6i_flags & RTF_ANYCAST)
+		ret |= XT_ADDRTYPE_ANYCAST;
+	return ret;
+}
+
+static bool match_type6(struct net *net, const struct net_device *dev,
+				const struct in6_addr *addr, u16 mask)
+{
+	int addr_type = ipv6_addr_type(addr);
+
+	if ((mask & XT_ADDRTYPE_MULTICAST) &&
+	    !(addr_type & IPV6_ADDR_MULTICAST))
+		return false;
+	if ((mask & XT_ADDRTYPE_UNICAST) && !(addr_type & IPV6_ADDR_UNICAST))
+		return false;
+	if ((mask & XT_ADDRTYPE_UNSPEC) && addr_type != IPV6_ADDR_ANY)
+		return false;
+
+	if ((XT_ADDRTYPE_LOCAL | XT_ADDRTYPE_ANYCAST |
+	     XT_ADDRTYPE_UNREACHABLE) & mask) {
+		struct rt6_info *rt;
+		u32 type;
+		int ifindex = dev ? dev->ifindex : 0;
+
+		rt = rt6_lookup(net, addr, NULL, ifindex, !!dev);
+
+		type = xt_addrtype_rt6_to_type(rt);
+
+		dst_release(&rt->dst);
+		return !!(mask & type);
+	}
+	return true;
+}
+
+static bool
+addrtype_mt6(struct net *net, const struct net_device *dev,
+	const struct sk_buff *skb, const struct xt_addrtype_info_v1 *info)
+{
+	const struct ipv6hdr *iph = ipv6_hdr(skb);
+	bool ret = true;
+
+	if (info->source)
+		ret &= match_type6(net, dev, &iph->saddr, info->source) ^
+		       (info->flags & XT_ADDRTYPE_INVERT_SOURCE);
+	if (ret && info->dest)
+		ret &= match_type6(net, dev, &iph->daddr, info->dest) ^
+		       !!(info->flags & XT_ADDRTYPE_INVERT_DEST);
+	return ret;
+}
+#endif
+
+static inline bool match_type(struct net *net, const struct net_device *dev,
+			      __be32 addr, u_int16_t mask)
+{
+	return !!(mask & (1 << inet_dev_addr_type(net, dev, addr)));
+}
+
+static bool
+addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	struct net *net = dev_net(par->in ? par->in : par->out);
+	const struct xt_addrtype_info *info = par->matchinfo;
+	const struct iphdr *iph = ip_hdr(skb);
+	bool ret = true;
+
+	if (info->source)
+		ret &= match_type(net, NULL, iph->saddr, info->source) ^
+		       info->invert_source;
+	if (info->dest)
+		ret &= match_type(net, NULL, iph->daddr, info->dest) ^
+		       info->invert_dest;
+
+	return ret;
+}
+
+static bool
+addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	struct net *net = dev_net(par->in ? par->in : par->out);
+	const struct xt_addrtype_info_v1 *info = par->matchinfo;
+	const struct iphdr *iph;
+	const struct net_device *dev = NULL;
+	bool ret = true;
+
+	if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN)
+		dev = par->in;
+	else if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT)
+		dev = par->out;
+
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+	if (par->family == NFPROTO_IPV6)
+		return addrtype_mt6(net, dev, skb, info);
+#endif
+	iph = ip_hdr(skb);
+	if (info->source)
+		ret &= match_type(net, dev, iph->saddr, info->source) ^
+		       (info->flags & XT_ADDRTYPE_INVERT_SOURCE);
+	if (ret && info->dest)
+		ret &= match_type(net, dev, iph->daddr, info->dest) ^
+		       !!(info->flags & XT_ADDRTYPE_INVERT_DEST);
+	return ret;
+}
+
+static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par)
+{
+	struct xt_addrtype_info_v1 *info = par->matchinfo;
+
+	if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN &&
+	    info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) {
+		pr_info("both incoming and outgoing "
+			"interface limitation cannot be selected\n");
+		return -EINVAL;
+	}
+
+	if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) |
+	    (1 << NF_INET_LOCAL_IN)) &&
+	    info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) {
+		pr_info("output interface limitation "
+			"not valid in PREROUTING and INPUT\n");
+		return -EINVAL;
+	}
+
+	if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) |
+	    (1 << NF_INET_LOCAL_OUT)) &&
+	    info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN) {
+		pr_info("input interface limitation "
+			"not valid in POSTROUTING and OUTPUT\n");
+		return -EINVAL;
+	}
+
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+	if (par->family == NFPROTO_IPV6) {
+		if ((info->source | info->dest) & XT_ADDRTYPE_BLACKHOLE) {
+			pr_err("ipv6 BLACKHOLE matching not supported\n");
+			return -EINVAL;
+		}
+		if ((info->source | info->dest) >= XT_ADDRTYPE_PROHIBIT) {
+			pr_err("ipv6 PROHIBT (THROW, NAT ..) matching not supported\n");
+			return -EINVAL;
+		}
+		if ((info->source | info->dest) & XT_ADDRTYPE_BROADCAST) {
+			pr_err("ipv6 does not support BROADCAST matching\n");
+			return -EINVAL;
+		}
+	}
+#endif
+	return 0;
+}
+
+static struct xt_match addrtype_mt_reg[] __read_mostly = {
+	{
+		.name		= "addrtype",
+		.family		= NFPROTO_IPV4,
+		.match		= addrtype_mt_v0,
+		.matchsize	= sizeof(struct xt_addrtype_info),
+		.me		= THIS_MODULE
+	},
+	{
+		.name		= "addrtype",
+		.family		= NFPROTO_UNSPEC,
+		.revision	= 1,
+		.match		= addrtype_mt_v1,
+		.checkentry	= addrtype_mt_checkentry_v1,
+		.matchsize	= sizeof(struct xt_addrtype_info_v1),
+		.me		= THIS_MODULE
+	}
+};
+
+static int __init addrtype_mt_init(void)
+{
+	return xt_register_matches(addrtype_mt_reg,
+				   ARRAY_SIZE(addrtype_mt_reg));
+}
+
+static void __exit addrtype_mt_exit(void)
+{
+	xt_unregister_matches(addrtype_mt_reg, ARRAY_SIZE(addrtype_mt_reg));
+}
+
+module_init(addrtype_mt_init);
+module_exit(addrtype_mt_exit);

+ 30 - 29
net/netfilter/xt_connlimit.c

@@ -33,17 +33,17 @@
 
 /* we will save the tuples of all connections we care about */
 struct xt_connlimit_conn {
-	struct list_head list;
-	struct nf_conntrack_tuple tuple;
+	struct hlist_node		node;
+	struct nf_conntrack_tuple	tuple;
+	union nf_inet_addr		addr;
 };
 
 struct xt_connlimit_data {
-	struct list_head iphash[256];
-	spinlock_t lock;
+	struct hlist_head	iphash[256];
+	spinlock_t		lock;
 };
 
 static u_int32_t connlimit_rnd __read_mostly;
-static bool connlimit_rnd_inited __read_mostly;
 
 static inline unsigned int connlimit_iphash(__be32 addr)
 {
@@ -101,9 +101,9 @@ static int count_them(struct net *net,
 {
 	const struct nf_conntrack_tuple_hash *found;
 	struct xt_connlimit_conn *conn;
-	struct xt_connlimit_conn *tmp;
+	struct hlist_node *pos, *n;
 	struct nf_conn *found_ct;
-	struct list_head *hash;
+	struct hlist_head *hash;
 	bool addit = true;
 	int matches = 0;
 
@@ -115,7 +115,7 @@ static int count_them(struct net *net,
 	rcu_read_lock();
 
 	/* check the saved connections */
-	list_for_each_entry_safe(conn, tmp, hash, list) {
+	hlist_for_each_entry_safe(conn, pos, n, hash, node) {
 		found    = nf_conntrack_find_get(net, NF_CT_DEFAULT_ZONE,
 						 &conn->tuple);
 		found_ct = NULL;
@@ -135,7 +135,7 @@ static int count_them(struct net *net,
 
 		if (found == NULL) {
 			/* this one is gone */
-			list_del(&conn->list);
+			hlist_del(&conn->node);
 			kfree(conn);
 			continue;
 		}
@@ -146,12 +146,12 @@ static int count_them(struct net *net,
 			 * closed already -> ditch it
 			 */
 			nf_ct_put(found_ct);
-			list_del(&conn->list);
+			hlist_del(&conn->node);
 			kfree(conn);
 			continue;
 		}
 
-		if (same_source_net(addr, mask, &conn->tuple.src.u3, family))
+		if (same_source_net(addr, mask, &conn->addr, family))
 			/* same source network -> be counted! */
 			++matches;
 		nf_ct_put(found_ct);
@@ -161,11 +161,12 @@ static int count_them(struct net *net,
 
 	if (addit) {
 		/* save the new connection in our list */
-		conn = kzalloc(sizeof(*conn), GFP_ATOMIC);
+		conn = kmalloc(sizeof(*conn), GFP_ATOMIC);
 		if (conn == NULL)
 			return -ENOMEM;
 		conn->tuple = *tuple;
-		list_add(&conn->list, hash);
+		conn->addr = *addr;
+		hlist_add_head(&conn->node, hash);
 		++matches;
 	}
 
@@ -185,15 +186,11 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	int connections;
 
 	ct = nf_ct_get(skb, &ctinfo);
-	if (ct != NULL) {
-		if (info->flags & XT_CONNLIMIT_DADDR)
-			tuple_ptr = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
-		else
-			tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
-	} else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
-				    par->family, &tuple)) {
+	if (ct != NULL)
+		tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+	else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
+				    par->family, &tuple))
 		goto hotdrop;
-	}
 
 	if (par->family == NFPROTO_IPV6) {
 		const struct ipv6hdr *iph = ipv6_hdr(skb);
@@ -228,9 +225,13 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par)
 	unsigned int i;
 	int ret;
 
-	if (unlikely(!connlimit_rnd_inited)) {
-		get_random_bytes(&connlimit_rnd, sizeof(connlimit_rnd));
-		connlimit_rnd_inited = true;
+	if (unlikely(!connlimit_rnd)) {
+		u_int32_t rand;
+
+		do {
+			get_random_bytes(&rand, sizeof(rand));
+		} while (!rand);
+		cmpxchg(&connlimit_rnd, 0, rand);
 	}
 	ret = nf_ct_l3proto_try_module_get(par->family);
 	if (ret < 0) {
@@ -248,7 +249,7 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par)
 
 	spin_lock_init(&info->data->lock);
 	for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i)
-		INIT_LIST_HEAD(&info->data->iphash[i]);
+		INIT_HLIST_HEAD(&info->data->iphash[i]);
 
 	return 0;
 }
@@ -257,15 +258,15 @@ static void connlimit_mt_destroy(const struct xt_mtdtor_param *par)
 {
 	const struct xt_connlimit_info *info = par->matchinfo;
 	struct xt_connlimit_conn *conn;
-	struct xt_connlimit_conn *tmp;
-	struct list_head *hash = info->data->iphash;
+	struct hlist_node *pos, *n;
+	struct hlist_head *hash = info->data->iphash;
 	unsigned int i;
 
 	nf_ct_l3proto_module_put(par->family);
 
 	for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) {
-		list_for_each_entry_safe(conn, tmp, &hash[i], list) {
-			list_del(&conn->list);
+		hlist_for_each_entry_safe(conn, pos, n, &hash[i], node) {
+			hlist_del(&conn->node);
 			kfree(conn);
 		}
 	}