|
@@ -54,6 +54,7 @@
|
|
|
#include <linux/slab.h>
|
|
|
#include <linux/uaccess.h>
|
|
|
#include <linux/memcontrol.h>
|
|
|
+#include <linux/res_counter.h>
|
|
|
|
|
|
#include <linux/filter.h>
|
|
|
#include <linux/rculist_nulls.h>
|
|
@@ -168,6 +169,7 @@ struct sock_common {
|
|
|
/* public: */
|
|
|
};
|
|
|
|
|
|
+struct cg_proto;
|
|
|
/**
|
|
|
* struct sock - network layer representation of sockets
|
|
|
* @__sk_common: shared layout with inet_timewait_sock
|
|
@@ -228,6 +230,7 @@ struct sock_common {
|
|
|
* @sk_security: used by security modules
|
|
|
* @sk_mark: generic packet mark
|
|
|
* @sk_classid: this socket's cgroup classid
|
|
|
+ * @sk_cgrp: this socket's cgroup-specific proto data
|
|
|
* @sk_write_pending: a write to stream socket waits to start
|
|
|
* @sk_state_change: callback to indicate change in the state of the sock
|
|
|
* @sk_data_ready: callback to indicate there is data to be processed
|
|
@@ -342,6 +345,7 @@ struct sock {
|
|
|
#endif
|
|
|
__u32 sk_mark;
|
|
|
u32 sk_classid;
|
|
|
+ struct cg_proto *sk_cgrp;
|
|
|
void (*sk_state_change)(struct sock *sk);
|
|
|
void (*sk_data_ready)(struct sock *sk, int bytes);
|
|
|
void (*sk_write_space)(struct sock *sk);
|
|
@@ -838,6 +842,37 @@ struct proto {
|
|
|
#ifdef SOCK_REFCNT_DEBUG
|
|
|
atomic_t socks;
|
|
|
#endif
|
|
|
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
|
|
|
+ /*
|
|
|
+ * cgroup specific init/deinit functions. Called once for all
|
|
|
+ * protocols that implement it, from cgroups populate function.
|
|
|
+ * This function has to setup any files the protocol want to
|
|
|
+ * appear in the kmem cgroup filesystem.
|
|
|
+ */
|
|
|
+ int (*init_cgroup)(struct cgroup *cgrp,
|
|
|
+ struct cgroup_subsys *ss);
|
|
|
+ void (*destroy_cgroup)(struct cgroup *cgrp,
|
|
|
+ struct cgroup_subsys *ss);
|
|
|
+ struct cg_proto *(*proto_cgroup)(struct mem_cgroup *memcg);
|
|
|
+#endif
|
|
|
+};
|
|
|
+
|
|
|
+struct cg_proto {
|
|
|
+ void (*enter_memory_pressure)(struct sock *sk);
|
|
|
+ struct res_counter *memory_allocated; /* Current allocated memory. */
|
|
|
+ struct percpu_counter *sockets_allocated; /* Current number of sockets. */
|
|
|
+ int *memory_pressure;
|
|
|
+ long *sysctl_mem;
|
|
|
+ /*
|
|
|
+ * memcg field is used to find which memcg we belong directly
|
|
|
+ * Each memcg struct can hold more than one cg_proto, so container_of
|
|
|
+ * won't really cut.
|
|
|
+ *
|
|
|
+ * The elegant solution would be having an inverse function to
|
|
|
+ * proto_cgroup in struct proto, but that means polluting the structure
|
|
|
+ * for everybody, instead of just for memcg users.
|
|
|
+ */
|
|
|
+ struct mem_cgroup *memcg;
|
|
|
};
|
|
|
|
|
|
extern int proto_register(struct proto *prot, int alloc_slab);
|
|
@@ -856,7 +891,7 @@ static inline void sk_refcnt_debug_dec(struct sock *sk)
|
|
|
sk->sk_prot->name, sk, atomic_read(&sk->sk_prot->socks));
|
|
|
}
|
|
|
|
|
|
-static inline void sk_refcnt_debug_release(const struct sock *sk)
|
|
|
+inline void sk_refcnt_debug_release(const struct sock *sk)
|
|
|
{
|
|
|
if (atomic_read(&sk->sk_refcnt) != 1)
|
|
|
printk(KERN_DEBUG "Destruction of the %s socket %p delayed, refcnt=%d\n",
|
|
@@ -868,6 +903,24 @@ static inline void sk_refcnt_debug_release(const struct sock *sk)
|
|
|
#define sk_refcnt_debug_release(sk) do { } while (0)
|
|
|
#endif /* SOCK_REFCNT_DEBUG */
|
|
|
|
|
|
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
|
|
|
+extern struct jump_label_key memcg_socket_limit_enabled;
|
|
|
+static inline struct cg_proto *parent_cg_proto(struct proto *proto,
|
|
|
+ struct cg_proto *cg_proto)
|
|
|
+{
|
|
|
+ return proto->proto_cgroup(parent_mem_cgroup(cg_proto->memcg));
|
|
|
+}
|
|
|
+#define mem_cgroup_sockets_enabled static_branch(&memcg_socket_limit_enabled)
|
|
|
+#else
|
|
|
+#define mem_cgroup_sockets_enabled 0
|
|
|
+static inline struct cg_proto *parent_cg_proto(struct proto *proto,
|
|
|
+ struct cg_proto *cg_proto)
|
|
|
+{
|
|
|
+ return NULL;
|
|
|
+}
|
|
|
+#endif
|
|
|
+
|
|
|
+
|
|
|
static inline bool sk_has_memory_pressure(const struct sock *sk)
|
|
|
{
|
|
|
return sk->sk_prot->memory_pressure != NULL;
|
|
@@ -877,6 +930,10 @@ static inline bool sk_under_memory_pressure(const struct sock *sk)
|
|
|
{
|
|
|
if (!sk->sk_prot->memory_pressure)
|
|
|
return false;
|
|
|
+
|
|
|
+ if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
|
|
|
+ return !!*sk->sk_cgrp->memory_pressure;
|
|
|
+
|
|
|
return !!*sk->sk_prot->memory_pressure;
|
|
|
}
|
|
|
|
|
@@ -884,52 +941,136 @@ static inline void sk_leave_memory_pressure(struct sock *sk)
|
|
|
{
|
|
|
int *memory_pressure = sk->sk_prot->memory_pressure;
|
|
|
|
|
|
- if (memory_pressure && *memory_pressure)
|
|
|
+ if (!memory_pressure)
|
|
|
+ return;
|
|
|
+
|
|
|
+ if (*memory_pressure)
|
|
|
*memory_pressure = 0;
|
|
|
+
|
|
|
+ if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
|
|
|
+ struct cg_proto *cg_proto = sk->sk_cgrp;
|
|
|
+ struct proto *prot = sk->sk_prot;
|
|
|
+
|
|
|
+ for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto))
|
|
|
+ if (*cg_proto->memory_pressure)
|
|
|
+ *cg_proto->memory_pressure = 0;
|
|
|
+ }
|
|
|
+
|
|
|
}
|
|
|
|
|
|
static inline void sk_enter_memory_pressure(struct sock *sk)
|
|
|
{
|
|
|
- if (sk->sk_prot->enter_memory_pressure)
|
|
|
- sk->sk_prot->enter_memory_pressure(sk);
|
|
|
+ if (!sk->sk_prot->enter_memory_pressure)
|
|
|
+ return;
|
|
|
+
|
|
|
+ if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
|
|
|
+ struct cg_proto *cg_proto = sk->sk_cgrp;
|
|
|
+ struct proto *prot = sk->sk_prot;
|
|
|
+
|
|
|
+ for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto))
|
|
|
+ cg_proto->enter_memory_pressure(sk);
|
|
|
+ }
|
|
|
+
|
|
|
+ sk->sk_prot->enter_memory_pressure(sk);
|
|
|
}
|
|
|
|
|
|
static inline long sk_prot_mem_limits(const struct sock *sk, int index)
|
|
|
{
|
|
|
long *prot = sk->sk_prot->sysctl_mem;
|
|
|
+ if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
|
|
|
+ prot = sk->sk_cgrp->sysctl_mem;
|
|
|
return prot[index];
|
|
|
}
|
|
|
|
|
|
+static inline void memcg_memory_allocated_add(struct cg_proto *prot,
|
|
|
+ unsigned long amt,
|
|
|
+ int *parent_status)
|
|
|
+{
|
|
|
+ struct res_counter *fail;
|
|
|
+ int ret;
|
|
|
+
|
|
|
+ ret = res_counter_charge(prot->memory_allocated,
|
|
|
+ amt << PAGE_SHIFT, &fail);
|
|
|
+
|
|
|
+ if (ret < 0)
|
|
|
+ *parent_status = OVER_LIMIT;
|
|
|
+}
|
|
|
+
|
|
|
+static inline void memcg_memory_allocated_sub(struct cg_proto *prot,
|
|
|
+ unsigned long amt)
|
|
|
+{
|
|
|
+ res_counter_uncharge(prot->memory_allocated, amt << PAGE_SHIFT);
|
|
|
+}
|
|
|
+
|
|
|
+static inline u64 memcg_memory_allocated_read(struct cg_proto *prot)
|
|
|
+{
|
|
|
+ u64 ret;
|
|
|
+ ret = res_counter_read_u64(prot->memory_allocated, RES_USAGE);
|
|
|
+ return ret >> PAGE_SHIFT;
|
|
|
+}
|
|
|
+
|
|
|
static inline long
|
|
|
sk_memory_allocated(const struct sock *sk)
|
|
|
{
|
|
|
struct proto *prot = sk->sk_prot;
|
|
|
+ if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
|
|
|
+ return memcg_memory_allocated_read(sk->sk_cgrp);
|
|
|
+
|
|
|
return atomic_long_read(prot->memory_allocated);
|
|
|
}
|
|
|
|
|
|
static inline long
|
|
|
-sk_memory_allocated_add(struct sock *sk, int amt)
|
|
|
+sk_memory_allocated_add(struct sock *sk, int amt, int *parent_status)
|
|
|
{
|
|
|
struct proto *prot = sk->sk_prot;
|
|
|
+
|
|
|
+ if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
|
|
|
+ memcg_memory_allocated_add(sk->sk_cgrp, amt, parent_status);
|
|
|
+ /* update the root cgroup regardless */
|
|
|
+ atomic_long_add_return(amt, prot->memory_allocated);
|
|
|
+ return memcg_memory_allocated_read(sk->sk_cgrp);
|
|
|
+ }
|
|
|
+
|
|
|
return atomic_long_add_return(amt, prot->memory_allocated);
|
|
|
}
|
|
|
|
|
|
static inline void
|
|
|
-sk_memory_allocated_sub(struct sock *sk, int amt)
|
|
|
+sk_memory_allocated_sub(struct sock *sk, int amt, int parent_status)
|
|
|
{
|
|
|
struct proto *prot = sk->sk_prot;
|
|
|
+
|
|
|
+ if (mem_cgroup_sockets_enabled && sk->sk_cgrp &&
|
|
|
+ parent_status != OVER_LIMIT) /* Otherwise was uncharged already */
|
|
|
+ memcg_memory_allocated_sub(sk->sk_cgrp, amt);
|
|
|
+
|
|
|
atomic_long_sub(amt, prot->memory_allocated);
|
|
|
}
|
|
|
|
|
|
static inline void sk_sockets_allocated_dec(struct sock *sk)
|
|
|
{
|
|
|
struct proto *prot = sk->sk_prot;
|
|
|
+
|
|
|
+ if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
|
|
|
+ struct cg_proto *cg_proto = sk->sk_cgrp;
|
|
|
+
|
|
|
+ for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto))
|
|
|
+ percpu_counter_dec(cg_proto->sockets_allocated);
|
|
|
+ }
|
|
|
+
|
|
|
percpu_counter_dec(prot->sockets_allocated);
|
|
|
}
|
|
|
|
|
|
static inline void sk_sockets_allocated_inc(struct sock *sk)
|
|
|
{
|
|
|
struct proto *prot = sk->sk_prot;
|
|
|
+
|
|
|
+ if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
|
|
|
+ struct cg_proto *cg_proto = sk->sk_cgrp;
|
|
|
+
|
|
|
+ for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto))
|
|
|
+ percpu_counter_inc(cg_proto->sockets_allocated);
|
|
|
+ }
|
|
|
+
|
|
|
percpu_counter_inc(prot->sockets_allocated);
|
|
|
}
|
|
|
|
|
@@ -938,6 +1079,9 @@ sk_sockets_allocated_read_positive(struct sock *sk)
|
|
|
{
|
|
|
struct proto *prot = sk->sk_prot;
|
|
|
|
|
|
+ if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
|
|
|
+ return percpu_counter_sum_positive(sk->sk_cgrp->sockets_allocated);
|
|
|
+
|
|
|
return percpu_counter_sum_positive(prot->sockets_allocated);
|
|
|
}
|
|
|
|