Skip to content

Commit d6e0b7f

Browse files
Vladimir Davydovtorvalds
Vladimir Davydov
authored andcommittedFeb 13, 2015
slub: make dead caches discard free slabs immediately
To speed up further allocations SLUB may store empty slabs in per cpu/node partial lists instead of freeing them immediately. This prevents per memcg caches destruction, because kmem caches created for a memory cgroup are only destroyed after the last page charged to the cgroup is freed. To fix this issue, this patch resurrects approach first proposed in [1]. It forbids SLUB to cache empty slabs after the memory cgroup that the cache belongs to was destroyed. It is achieved by setting kmem_cache's cpu_partial and min_partial constants to 0 and tuning put_cpu_partial() so that it would drop frozen empty slabs immediately if cpu_partial = 0. The runtime overhead is minimal. From all the hot functions, we only touch relatively cold put_cpu_partial(): we make it call unfreeze_partials() after freezing a slab that belongs to an offline memory cgroup. Since slab freezing exists to avoid moving slabs from/to a partial list on free/alloc, and there can't be allocations from dead caches, it shouldn't cause any overhead. We do have to disable preemption for put_cpu_partial() to achieve that though. The original patch was accepted well and even merged to the mm tree. However, I decided to withdraw it due to changes happening to the memcg core at that time. I had an idea of introducing per-memcg shrinkers for kmem caches, but now, as memcg has finally settled down, I do not see it as an option, because SLUB shrinker would be too costly to call since SLUB does not keep free slabs on a separate list. Besides, we currently do not even call per-memcg shrinkers for offline memcgs. Overall, it would introduce much more complexity to both SLUB and memcg than this small patch. Regarding to SLAB, there's no problem with it, because it shrinks per-cpu/node caches periodically. Thanks to list_lru reparenting, we no longer keep entries for offline cgroups in per-memcg arrays (such as memcg_cache_params->memcg_caches), so we do not have to bother if a per-memcg cache will be shrunk a bit later than it could be. [1] http://thread.gmane.org/gmane.linux.kernel.mm/118649/focus=118650 Signed-off-by: Vladimir Davydov <vdavydov@parallels.com> Cc: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: David Rientjes <rientjes@google.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent ce3712d commit d6e0b7f

File tree

5 files changed

+43
-11
lines changed

5 files changed

+43
-11
lines changed
 

‎mm/slab.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2382,7 +2382,7 @@ static int drain_freelist(struct kmem_cache *cache,
23822382
return nr_freed;
23832383
}
23842384

2385-
int __kmem_cache_shrink(struct kmem_cache *cachep)
2385+
int __kmem_cache_shrink(struct kmem_cache *cachep, bool deactivate)
23862386
{
23872387
int ret = 0;
23882388
int node;
@@ -2404,7 +2404,7 @@ int __kmem_cache_shutdown(struct kmem_cache *cachep)
24042404
{
24052405
int i;
24062406
struct kmem_cache_node *n;
2407-
int rc = __kmem_cache_shrink(cachep);
2407+
int rc = __kmem_cache_shrink(cachep, false);
24082408

24092409
if (rc)
24102410
return rc;

‎mm/slab.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ static inline unsigned long kmem_cache_flags(unsigned long object_size,
138138
#define CACHE_CREATE_MASK (SLAB_CORE_FLAGS | SLAB_DEBUG_FLAGS | SLAB_CACHE_FLAGS)
139139

140140
int __kmem_cache_shutdown(struct kmem_cache *);
141-
int __kmem_cache_shrink(struct kmem_cache *);
141+
int __kmem_cache_shrink(struct kmem_cache *, bool);
142142
void slab_kmem_cache_release(struct kmem_cache *);
143143

144144
struct seq_file;

‎mm/slab_common.c

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -549,20 +549,31 @@ void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg)
549549
{
550550
int idx;
551551
struct memcg_cache_array *arr;
552-
struct kmem_cache *s;
552+
struct kmem_cache *s, *c;
553553

554554
idx = memcg_cache_id(memcg);
555555

556+
get_online_cpus();
557+
get_online_mems();
558+
556559
mutex_lock(&slab_mutex);
557560
list_for_each_entry(s, &slab_caches, list) {
558561
if (!is_root_cache(s))
559562
continue;
560563

561564
arr = rcu_dereference_protected(s->memcg_params.memcg_caches,
562565
lockdep_is_held(&slab_mutex));
566+
c = arr->entries[idx];
567+
if (!c)
568+
continue;
569+
570+
__kmem_cache_shrink(c, true);
563571
arr->entries[idx] = NULL;
564572
}
565573
mutex_unlock(&slab_mutex);
574+
575+
put_online_mems();
576+
put_online_cpus();
566577
}
567578

568579
void memcg_destroy_kmem_caches(struct mem_cgroup *memcg)
@@ -649,7 +660,7 @@ int kmem_cache_shrink(struct kmem_cache *cachep)
649660

650661
get_online_cpus();
651662
get_online_mems();
652-
ret = __kmem_cache_shrink(cachep);
663+
ret = __kmem_cache_shrink(cachep, false);
653664
put_online_mems();
654665
put_online_cpus();
655666
return ret;

‎mm/slob.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -618,7 +618,7 @@ int __kmem_cache_shutdown(struct kmem_cache *c)
618618
return 0;
619619
}
620620

621-
int __kmem_cache_shrink(struct kmem_cache *d)
621+
int __kmem_cache_shrink(struct kmem_cache *d, bool deactivate)
622622
{
623623
return 0;
624624
}

‎mm/slub.c

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2007,6 +2007,7 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
20072007
int pages;
20082008
int pobjects;
20092009

2010+
preempt_disable();
20102011
do {
20112012
pages = 0;
20122013
pobjects = 0;
@@ -2040,6 +2041,14 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
20402041

20412042
} while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page)
20422043
!= oldpage);
2044+
if (unlikely(!s->cpu_partial)) {
2045+
unsigned long flags;
2046+
2047+
local_irq_save(flags);
2048+
unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
2049+
local_irq_restore(flags);
2050+
}
2051+
preempt_enable();
20432052
#endif
20442053
}
20452054

@@ -3369,7 +3378,7 @@ EXPORT_SYMBOL(kfree);
33693378
* being allocated from last increasing the chance that the last objects
33703379
* are freed in them.
33713380
*/
3372-
int __kmem_cache_shrink(struct kmem_cache *s)
3381+
int __kmem_cache_shrink(struct kmem_cache *s, bool deactivate)
33733382
{
33743383
int node;
33753384
int i;
@@ -3381,11 +3390,23 @@ int __kmem_cache_shrink(struct kmem_cache *s)
33813390
unsigned long flags;
33823391
int ret = 0;
33833392

3393+
if (deactivate) {
3394+
/*
3395+
* Disable empty slabs caching. Used to avoid pinning offline
3396+
* memory cgroups by kmem pages that can be freed.
3397+
*/
3398+
s->cpu_partial = 0;
3399+
s->min_partial = 0;
3400+
3401+
/*
3402+
* s->cpu_partial is checked locklessly (see put_cpu_partial),
3403+
* so we have to make sure the change is visible.
3404+
*/
3405+
kick_all_cpus_sync();
3406+
}
3407+
33843408
flush_all(s);
33853409
for_each_kmem_cache_node(s, node, n) {
3386-
if (!n->nr_partial)
3387-
continue;
3388-
33893410
INIT_LIST_HEAD(&discard);
33903411
for (i = 0; i < SHRINK_PROMOTE_MAX; i++)
33913412
INIT_LIST_HEAD(promote + i);
@@ -3440,7 +3461,7 @@ static int slab_mem_going_offline_callback(void *arg)
34403461

34413462
mutex_lock(&slab_mutex);
34423463
list_for_each_entry(s, &slab_caches, list)
3443-
__kmem_cache_shrink(s);
3464+
__kmem_cache_shrink(s, false);
34443465
mutex_unlock(&slab_mutex);
34453466

34463467
return 0;

0 commit comments

Comments
 (0)
Please sign in to comment.