|
@@ -145,6 +145,43 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here)
|
|
|
BUG();
|
|
|
}
|
|
|
|
|
|
+
|
|
|
+/*
|
|
|
+ * kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells
|
|
|
+ * the caller if emergency pfmemalloc reserves are being used. If it is and
|
|
|
+ * the socket is later found to be SOCK_MEMALLOC then PFMEMALLOC reserves
|
|
|
+ * may be used. Otherwise, the packet data may be discarded until enough
|
|
|
+ * memory is free
|
|
|
+ */
|
|
|
+#define kmalloc_reserve(size, gfp, node, pfmemalloc) \
|
|
|
+ __kmalloc_reserve(size, gfp, node, _RET_IP_, pfmemalloc)
|
|
|
+void *__kmalloc_reserve(size_t size, gfp_t flags, int node, unsigned long ip,
|
|
|
+ bool *pfmemalloc)
|
|
|
+{
|
|
|
+ void *obj;
|
|
|
+ bool ret_pfmemalloc = false;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Try a regular allocation, when that fails and we're not entitled
|
|
|
+ * to the reserves, fail.
|
|
|
+ */
|
|
|
+ obj = kmalloc_node_track_caller(size,
|
|
|
+ flags | __GFP_NOMEMALLOC | __GFP_NOWARN,
|
|
|
+ node);
|
|
|
+ if (obj || !(gfp_pfmemalloc_allowed(flags)))
|
|
|
+ goto out;
|
|
|
+
|
|
|
+ /* Try again but now we are using pfmemalloc reserves */
|
|
|
+ ret_pfmemalloc = true;
|
|
|
+ obj = kmalloc_node_track_caller(size, flags, node);
|
|
|
+
|
|
|
+out:
|
|
|
+ if (pfmemalloc)
|
|
|
+ *pfmemalloc = ret_pfmemalloc;
|
|
|
+
|
|
|
+ return obj;
|
|
|
+}
|
|
|
+
|
|
|
/* Allocate a new skbuff. We do this ourselves so we can fill in a few
|
|
|
* 'private' fields and also do memory statistics to find all the
|
|
|
* [BEEP] leaks.
|
|
@@ -155,8 +192,10 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here)
|
|
|
* __alloc_skb - allocate a network buffer
|
|
|
* @size: size to allocate
|
|
|
* @gfp_mask: allocation mask
|
|
|
- * @fclone: allocate from fclone cache instead of head cache
|
|
|
- * and allocate a cloned (child) skb
|
|
|
+ * @flags: If SKB_ALLOC_FCLONE is set, allocate from fclone cache
|
|
|
+ * instead of head cache and allocate a cloned (child) skb.
|
|
|
+ * If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for
|
|
|
+ * allocations in case the data is required for writeback
|
|
|
* @node: numa node to allocate memory on
|
|
|
*
|
|
|
* Allocate a new &sk_buff. The returned buffer has no headroom and a
|
|
@@ -167,14 +206,19 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here)
|
|
|
* %GFP_ATOMIC.
|
|
|
*/
|
|
|
struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
|
|
|
- int fclone, int node)
|
|
|
+ int flags, int node)
|
|
|
{
|
|
|
struct kmem_cache *cache;
|
|
|
struct skb_shared_info *shinfo;
|
|
|
struct sk_buff *skb;
|
|
|
u8 *data;
|
|
|
+ bool pfmemalloc;
|
|
|
|
|
|
- cache = fclone ? skbuff_fclone_cache : skbuff_head_cache;
|
|
|
+ cache = (flags & SKB_ALLOC_FCLONE)
|
|
|
+ ? skbuff_fclone_cache : skbuff_head_cache;
|
|
|
+
|
|
|
+ if (sk_memalloc_socks() && (flags & SKB_ALLOC_RX))
|
|
|
+ gfp_mask |= __GFP_MEMALLOC;
|
|
|
|
|
|
/* Get the HEAD */
|
|
|
skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node);
|
|
@@ -189,7 +233,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
|
|
|
*/
|
|
|
size = SKB_DATA_ALIGN(size);
|
|
|
size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
|
|
|
- data = kmalloc_node_track_caller(size, gfp_mask, node);
|
|
|
+ data = kmalloc_reserve(size, gfp_mask, node, &pfmemalloc);
|
|
|
if (!data)
|
|
|
goto nodata;
|
|
|
/* kmalloc(size) might give us more room than requested.
|
|
@@ -207,6 +251,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
|
|
|
memset(skb, 0, offsetof(struct sk_buff, tail));
|
|
|
/* Account for allocated memory : skb + skb->head */
|
|
|
skb->truesize = SKB_TRUESIZE(size);
|
|
|
+ skb->pfmemalloc = pfmemalloc;
|
|
|
atomic_set(&skb->users, 1);
|
|
|
skb->head = data;
|
|
|
skb->data = data;
|
|
@@ -222,7 +267,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
|
|
|
atomic_set(&shinfo->dataref, 1);
|
|
|
kmemcheck_annotate_variable(shinfo->destructor_arg);
|
|
|
|
|
|
- if (fclone) {
|
|
|
+ if (flags & SKB_ALLOC_FCLONE) {
|
|
|
struct sk_buff *child = skb + 1;
|
|
|
atomic_t *fclone_ref = (atomic_t *) (child + 1);
|
|
|
|
|
@@ -232,6 +277,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
|
|
|
atomic_set(fclone_ref, 1);
|
|
|
|
|
|
child->fclone = SKB_FCLONE_UNAVAILABLE;
|
|
|
+ child->pfmemalloc = pfmemalloc;
|
|
|
}
|
|
|
out:
|
|
|
return skb;
|
|
@@ -302,14 +348,7 @@ static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache);
|
|
|
|
|
|
#define NETDEV_PAGECNT_BIAS (PAGE_SIZE / SMP_CACHE_BYTES)
|
|
|
|
|
|
-/**
|
|
|
- * netdev_alloc_frag - allocate a page fragment
|
|
|
- * @fragsz: fragment size
|
|
|
- *
|
|
|
- * Allocates a frag from a page for receive buffer.
|
|
|
- * Uses GFP_ATOMIC allocations.
|
|
|
- */
|
|
|
-void *netdev_alloc_frag(unsigned int fragsz)
|
|
|
+static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
|
|
|
{
|
|
|
struct netdev_alloc_cache *nc;
|
|
|
void *data = NULL;
|
|
@@ -319,7 +358,7 @@ void *netdev_alloc_frag(unsigned int fragsz)
|
|
|
nc = &__get_cpu_var(netdev_alloc_cache);
|
|
|
if (unlikely(!nc->page)) {
|
|
|
refill:
|
|
|
- nc->page = alloc_page(GFP_ATOMIC | __GFP_COLD);
|
|
|
+ nc->page = alloc_page(gfp_mask);
|
|
|
if (unlikely(!nc->page))
|
|
|
goto end;
|
|
|
recycle:
|
|
@@ -343,6 +382,18 @@ end:
|
|
|
local_irq_restore(flags);
|
|
|
return data;
|
|
|
}
|
|
|
+
|
|
|
+/**
|
|
|
+ * netdev_alloc_frag - allocate a page fragment
|
|
|
+ * @fragsz: fragment size
|
|
|
+ *
|
|
|
+ * Allocates a frag from a page for receive buffer.
|
|
|
+ * Uses GFP_ATOMIC allocations.
|
|
|
+ */
|
|
|
+void *netdev_alloc_frag(unsigned int fragsz)
|
|
|
+{
|
|
|
+ return __netdev_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD);
|
|
|
+}
|
|
|
EXPORT_SYMBOL(netdev_alloc_frag);
|
|
|
|
|
|
/**
|
|
@@ -366,7 +417,12 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
|
|
|
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
|
|
|
|
|
|
if (fragsz <= PAGE_SIZE && !(gfp_mask & (__GFP_WAIT | GFP_DMA))) {
|
|
|
- void *data = netdev_alloc_frag(fragsz);
|
|
|
+ void *data;
|
|
|
+
|
|
|
+ if (sk_memalloc_socks())
|
|
|
+ gfp_mask |= __GFP_MEMALLOC;
|
|
|
+
|
|
|
+ data = __netdev_alloc_frag(fragsz, gfp_mask);
|
|
|
|
|
|
if (likely(data)) {
|
|
|
skb = build_skb(data, fragsz);
|
|
@@ -374,7 +430,8 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
|
|
|
put_page(virt_to_head_page(data));
|
|
|
}
|
|
|
} else {
|
|
|
- skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, NUMA_NO_NODE);
|
|
|
+ skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask,
|
|
|
+ SKB_ALLOC_RX, NUMA_NO_NODE);
|
|
|
}
|
|
|
if (likely(skb)) {
|
|
|
skb_reserve(skb, NET_SKB_PAD);
|
|
@@ -656,6 +713,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
|
|
|
#if IS_ENABLED(CONFIG_IP_VS)
|
|
|
new->ipvs_property = old->ipvs_property;
|
|
|
#endif
|
|
|
+ new->pfmemalloc = old->pfmemalloc;
|
|
|
new->protocol = old->protocol;
|
|
|
new->mark = old->mark;
|
|
|
new->skb_iif = old->skb_iif;
|
|
@@ -814,6 +872,9 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
|
|
|
n->fclone = SKB_FCLONE_CLONE;
|
|
|
atomic_inc(fclone_ref);
|
|
|
} else {
|
|
|
+ if (skb_pfmemalloc(skb))
|
|
|
+ gfp_mask |= __GFP_MEMALLOC;
|
|
|
+
|
|
|
n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
|
|
|
if (!n)
|
|
|
return NULL;
|
|
@@ -850,6 +911,13 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
|
|
|
skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type;
|
|
|
}
|
|
|
|
|
|
+static inline int skb_alloc_rx_flag(const struct sk_buff *skb)
|
|
|
+{
|
|
|
+ if (skb_pfmemalloc(skb))
|
|
|
+ return SKB_ALLOC_RX;
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
/**
|
|
|
* skb_copy - create private copy of an sk_buff
|
|
|
* @skb: buffer to copy
|
|
@@ -871,7 +939,8 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
|
|
|
{
|
|
|
int headerlen = skb_headroom(skb);
|
|
|
unsigned int size = skb_end_offset(skb) + skb->data_len;
|
|
|
- struct sk_buff *n = alloc_skb(size, gfp_mask);
|
|
|
+ struct sk_buff *n = __alloc_skb(size, gfp_mask,
|
|
|
+ skb_alloc_rx_flag(skb), NUMA_NO_NODE);
|
|
|
|
|
|
if (!n)
|
|
|
return NULL;
|
|
@@ -906,7 +975,8 @@ EXPORT_SYMBOL(skb_copy);
|
|
|
struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask)
|
|
|
{
|
|
|
unsigned int size = skb_headlen(skb) + headroom;
|
|
|
- struct sk_buff *n = alloc_skb(size, gfp_mask);
|
|
|
+ struct sk_buff *n = __alloc_skb(size, gfp_mask,
|
|
|
+ skb_alloc_rx_flag(skb), NUMA_NO_NODE);
|
|
|
|
|
|
if (!n)
|
|
|
goto out;
|
|
@@ -979,8 +1049,10 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
|
|
|
|
|
|
size = SKB_DATA_ALIGN(size);
|
|
|
|
|
|
- data = kmalloc(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
|
|
|
- gfp_mask);
|
|
|
+ if (skb_pfmemalloc(skb))
|
|
|
+ gfp_mask |= __GFP_MEMALLOC;
|
|
|
+ data = kmalloc_reserve(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
|
|
|
+ gfp_mask, NUMA_NO_NODE, NULL);
|
|
|
if (!data)
|
|
|
goto nodata;
|
|
|
size = SKB_WITH_OVERHEAD(ksize(data));
|
|
@@ -1092,8 +1164,9 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
|
|
|
/*
|
|
|
* Allocate the copy buffer
|
|
|
*/
|
|
|
- struct sk_buff *n = alloc_skb(newheadroom + skb->len + newtailroom,
|
|
|
- gfp_mask);
|
|
|
+ struct sk_buff *n = __alloc_skb(newheadroom + skb->len + newtailroom,
|
|
|
+ gfp_mask, skb_alloc_rx_flag(skb),
|
|
|
+ NUMA_NO_NODE);
|
|
|
int oldheadroom = skb_headroom(skb);
|
|
|
int head_copy_len, head_copy_off;
|
|
|
int off;
|
|
@@ -2775,8 +2848,9 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
|
|
|
skb_release_head_state(nskb);
|
|
|
__skb_push(nskb, doffset);
|
|
|
} else {
|
|
|
- nskb = alloc_skb(hsize + doffset + headroom,
|
|
|
- GFP_ATOMIC);
|
|
|
+ nskb = __alloc_skb(hsize + doffset + headroom,
|
|
|
+ GFP_ATOMIC, skb_alloc_rx_flag(skb),
|
|
|
+ NUMA_NO_NODE);
|
|
|
|
|
|
if (unlikely(!nskb))
|
|
|
goto err;
|