2014-04-07 23:54:11 +00:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2014 Apple Inc. All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
|
|
|
|
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
|
|
|
|
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
|
|
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
|
|
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
|
|
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
|
|
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
2016-03-25 23:44:53 +00:00
|
|
|
#ifndef Chunk_h
|
|
|
|
#define Chunk_h
|
2014-04-07 23:54:11 +00:00
|
|
|
|
2016-03-25 18:07:31 +00:00
|
|
|
#include "Object.h"
|
2014-04-07 23:54:11 +00:00
|
|
|
#include "Sizes.h"
|
2016-03-25 18:07:31 +00:00
|
|
|
#include "SmallLine.h"
|
|
|
|
#include "SmallPage.h"
|
2014-04-07 23:54:11 +00:00
|
|
|
#include "VMAllocate.h"
|
2016-02-21 18:43:22 +00:00
|
|
|
#include <array>
|
2014-04-07 23:54:11 +00:00
|
|
|
|
|
|
|
namespace bmalloc {
|
|
|
|
|
bmalloc: Small and large objects should share memory
https://bugs.webkit.org/show_bug.cgi?id=172880
<rdar://problem/31494732>
Reviewed by Sam Weinig.
This reduces our high water mark memory usage on JetStream on macOS
by 10%-20%. It also has the nice side effect that we can free small
object metadata after returning from a high water mark.
No change in throughput.
Our old algorithm allocated small object chunks and large objects in
segregated virtual memory and never recycled addresses between them.
This provided a slight security benefit because we could apply guard
pages between the segregated ranges and we would never reuse the same
virtual address for object and metadata memory.
Our new algorithm allocates small object chunks from the large object
allocator. This naturally recycles memory between small chunks and large
objects, and between small chunks of different page classes. This allows
us to shift memory between allocation types as a program moves between
different phases of allocation, and to delete small object chunk metadata
when a program shrinks back from a high water mark.
Two intuitions I had about memory use turned out to be backwards in
this context:
(1) I thought that this optimization would work because it allowed you to
allocate and free a 4MB object and then reuse that large allocation to
service small allocations. In practice, the common benefit seems to be
the opposite: After you allocate and free many small objects, you can
stitch them together to allocate a large object without growing the heap.
(2) I thought that it would be more memory-efficient to allocate
fine-grained pages from the large object allocator. In practice, giving
the large object allocator too many arbitrarily-sized ranges to manage
leads to fragmentation. Meanwhile, segregated fit is a powerful memory
optimization. So, it's best to return small object memory to the large
allocator only when a whole small object chunk is free.
* bmalloc/Chunk.h:
(bmalloc::Chunk::ref):
(bmalloc::Chunk::deref):
(bmalloc::Chunk::refCount):
(bmalloc::Chunk::freePages): We keep a free list per chunk and refcount
each chunk so we can notice when a chunk becomes empty, and return it
to the large allocator.
(bmalloc::forEachPage): A new helper function for iterating the pages
in a Chunk.
(bmalloc::Chunk::Chunk): Use forEachPage instead of manual iteration.
Use { } initialization because we don't get zero-initialized by the OS
anymore.
* bmalloc/Heap.cpp:
(bmalloc::Heap::Heap):
(bmalloc::Heap::concurrentScavenge):
(bmalloc::Heap::scavenge): Don't bother unlocking while scavenging. I
wasn't able to show it to be a consistent speedup. A more promising
approach, if we find a motivating example, is for the scavenger to give
up and return early if any other client is waiting on the lock.
(bmalloc::Heap::allocateSmallChunk): New helper function for allocating
a small chunk. It allocates through the large allocator to facilitate
sharing. We still allocate a chunk at a time instead of a page at a time.
Surprisingly, more precise page-at-a-time allocation is worse for memory
use because of fragmentation. Segregated fit is a powerful optimization.
(bmalloc::Heap::deallocateSmallChunk): New helper function for deallocating
a small chunk.
(bmalloc::Heap::allocateSmallPage): Updated for new APIs.
(bmalloc::Heap::deallocateSmallLine): Updated for new APIs. Note that
we cache one free chunk per page class. This avoids churn in the large
allocator when you free(malloc(X)).
(bmalloc::Heap::allocateSmallBumpRangesByMetadata):
(bmalloc::Heap::allocateSmallBumpRangesByObject):
(bmalloc::Heap::tryAllocateLarge):
(bmalloc::Heap::scavengeSmallPages): Deleted.
(bmalloc::Heap::scavengeLargeObjects): Deleted.
* bmalloc/Heap.h:
* bmalloc/LargeMap.h:
(bmalloc::LargeMap::begin):
(bmalloc::LargeMap::end): Added iteration helpers for scavenging.
* bmalloc/LargeRange.h:
(bmalloc::LargeRange::physicalSize): Added a comment about something
that I confused myself about in this patch.
* bmalloc/List.h:
(bmalloc::List::iterator::operator*):
(bmalloc::List::iterator::operator->):
(bmalloc::List::iterator::operator!=):
(bmalloc::List::iterator::operator++):
(bmalloc::List::begin):
(bmalloc::List::end):
(bmalloc::List::pushFront):
(bmalloc::List::remove):
(bmalloc::ListNode::ListNode): Deleted. Added iteration helpers for
scavenging. Changed the default state of a Node to null pointers instead
of self pointers to distinguish the null node from the empty node for
easier debugging.
* bmalloc/Sizes.h: Changed the chunk size to 1MB to increase the chances
of a chunk becoming free and recyclable.
* bmalloc/SmallPage.h:
(bmalloc::SmallPage::hasPhysicalPages):
(bmalloc::SmallPage::setHasPhysicalPages): Track physical state by page
instead of implicitly by which list a page is in. It's simpler not
to have to move chunks and pages between physical vs virtual lists.
(bmalloc::SmallPage::SmallPage): Deleted.
* bmalloc/VMHeap.cpp:
(bmalloc::VMHeap::tryAllocateLargeChunk):
(bmalloc::VMHeap::allocateSmallChunk): Deleted.
* bmalloc/VMHeap.h:
(bmalloc::VMHeap::allocateSmallPage): Deleted.
(bmalloc::VMHeap::deallocateSmallPage): Deleted. Small chunk allocation
just forwards to the large allocator now.
* bmalloc/bmalloc.h:
(bmalloc::api::scavenge):
Canonical link: https://commits.webkit.org/189836@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@217811 268f45cc-cd09-0410-ab3c-d52691b4dbfc
2017-06-06 02:21:11 +00:00
|
|
|
class Chunk : public ListNode<Chunk> {
|
2014-04-07 23:54:11 +00:00
|
|
|
public:
|
2016-03-25 23:44:53 +00:00
|
|
|
static Chunk* get(void*);
|
2019-11-19 03:29:12 +00:00
|
|
|
static size_t metadataSize(size_t pageSize);
|
2014-04-07 23:54:11 +00:00
|
|
|
|
bmalloc: Small and large objects should share memory
https://bugs.webkit.org/show_bug.cgi?id=172880
<rdar://problem/31494732>
Reviewed by Sam Weinig.
This reduces our high water mark memory usage on JetStream on macOS
by 10%-20%. It also has the nice side effect that we can free small
object metadata after returning from a high water mark.
No change in throughput.
Our old algorithm allocated small object chunks and large objects in
segregated virtual memory and never recycled addresses between them.
This provided a slight security benefit because we could apply guard
pages between the segregated ranges and we would never reuse the same
virtual address for object and metadata memory.
Our new algorithm allocates small object chunks from the large object
allocator. This naturally recycles memory between small chunks and large
objects, and between small chunks of different page classes. This allows
us to shift memory between allocation types as a program moves between
different phases of allocation, and to delete small object chunk metadata
when a program shrinks back from a high water mark.
Two intuitions I had about memory use turned out to be backwards in
this context:
(1) I thought that this optimization would work because it allowed you to
allocate and free a 4MB object and then reuse that large allocation to
service small allocations. In practice, the common benefit seems to be
the opposite: After you allocate and free many small objects, you can
stitch them together to allocate a large object without growing the heap.
(2) I thought that it would be more memory-efficient to allocate
fine-grained pages from the large object allocator. In practice, giving
the large object allocator too many arbitrarily-sized ranges to manage
leads to fragmentation. Meanwhile, segregated fit is a powerful memory
optimization. So, it's best to return small object memory to the large
allocator only when a whole small object chunk is free.
* bmalloc/Chunk.h:
(bmalloc::Chunk::ref):
(bmalloc::Chunk::deref):
(bmalloc::Chunk::refCount):
(bmalloc::Chunk::freePages): We keep a free list per chunk and refcount
each chunk so we can notice when a chunk becomes empty, and return it
to the large allocator.
(bmalloc::forEachPage): A new helper function for iterating the pages
in a Chunk.
(bmalloc::Chunk::Chunk): Use forEachPage instead of manual iteration.
Use { } initialization because we don't get zero-initialized by the OS
anymore.
* bmalloc/Heap.cpp:
(bmalloc::Heap::Heap):
(bmalloc::Heap::concurrentScavenge):
(bmalloc::Heap::scavenge): Don't bother unlocking while scavenging. I
wasn't able to show it to be a consistent speedup. A more promising
approach, if we find a motivating example, is for the scavenger to give
up and return early if any other client is waiting on the lock.
(bmalloc::Heap::allocateSmallChunk): New helper function for allocating
a small chunk. It allocates through the large allocator to facilitate
sharing. We still allocate a chunk at a time instead of a page at a time.
Surprisingly, more precise page-at-a-time allocation is worse for memory
use because of fragmentation. Segregated fit is a powerful optimization.
(bmalloc::Heap::deallocateSmallChunk): New helper function for deallocating
a small chunk.
(bmalloc::Heap::allocateSmallPage): Updated for new APIs.
(bmalloc::Heap::deallocateSmallLine): Updated for new APIs. Note that
we cache one free chunk per page class. This avoids churn in the large
allocator when you free(malloc(X)).
(bmalloc::Heap::allocateSmallBumpRangesByMetadata):
(bmalloc::Heap::allocateSmallBumpRangesByObject):
(bmalloc::Heap::tryAllocateLarge):
(bmalloc::Heap::scavengeSmallPages): Deleted.
(bmalloc::Heap::scavengeLargeObjects): Deleted.
* bmalloc/Heap.h:
* bmalloc/LargeMap.h:
(bmalloc::LargeMap::begin):
(bmalloc::LargeMap::end): Added iteration helpers for scavenging.
* bmalloc/LargeRange.h:
(bmalloc::LargeRange::physicalSize): Added a comment about something
that I confused myself about in this patch.
* bmalloc/List.h:
(bmalloc::List::iterator::operator*):
(bmalloc::List::iterator::operator->):
(bmalloc::List::iterator::operator!=):
(bmalloc::List::iterator::operator++):
(bmalloc::List::begin):
(bmalloc::List::end):
(bmalloc::List::pushFront):
(bmalloc::List::remove):
(bmalloc::ListNode::ListNode): Deleted. Added iteration helpers for
scavenging. Changed the default state of a Node to null pointers instead
of self pointers to distinguish the null node from the empty node for
easier debugging.
* bmalloc/Sizes.h: Changed the chunk size to 1MB to increase the chances
of a chunk becoming free and recyclable.
* bmalloc/SmallPage.h:
(bmalloc::SmallPage::hasPhysicalPages):
(bmalloc::SmallPage::setHasPhysicalPages): Track physical state by page
instead of implicitly by which list a page is in. It's simpler not
to have to move chunks and pages between physical vs virtual lists.
(bmalloc::SmallPage::SmallPage): Deleted.
* bmalloc/VMHeap.cpp:
(bmalloc::VMHeap::tryAllocateLargeChunk):
(bmalloc::VMHeap::allocateSmallChunk): Deleted.
* bmalloc/VMHeap.h:
(bmalloc::VMHeap::allocateSmallPage): Deleted.
(bmalloc::VMHeap::deallocateSmallPage): Deleted. Small chunk allocation
just forwards to the large allocator now.
* bmalloc/bmalloc.h:
(bmalloc::api::scavenge):
Canonical link: https://commits.webkit.org/189836@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@217811 268f45cc-cd09-0410-ab3c-d52691b4dbfc
2017-06-06 02:21:11 +00:00
|
|
|
Chunk(size_t pageSize);
|
|
|
|
|
|
|
|
void ref() { ++m_refCount; }
|
|
|
|
void deref() { BASSERT(m_refCount); --m_refCount; }
|
|
|
|
unsigned refCount() { return m_refCount; }
|
2016-03-25 18:07:31 +00:00
|
|
|
|
|
|
|
size_t offset(void*);
|
|
|
|
|
2016-04-28 02:11:18 +00:00
|
|
|
char* address(size_t offset);
|
2016-03-25 18:07:31 +00:00
|
|
|
SmallPage* page(size_t offset);
|
|
|
|
SmallLine* line(size_t offset);
|
|
|
|
|
2016-04-06 20:52:11 +00:00
|
|
|
char* bytes() { return reinterpret_cast<char*>(this); }
|
2016-09-26 18:17:47 +00:00
|
|
|
SmallLine* lines() { return &m_lines[0]; }
|
|
|
|
SmallPage* pages() { return &m_pages[0]; }
|
bmalloc: Small and large objects should share memory
https://bugs.webkit.org/show_bug.cgi?id=172880
<rdar://problem/31494732>
Reviewed by Sam Weinig.
This reduces our high water mark memory usage on JetStream on macOS
by 10%-20%. It also has the nice side effect that we can free small
object metadata after returning from a high water mark.
No change in throughput.
Our old algorithm allocated small object chunks and large objects in
segregated virtual memory and never recycled addresses between them.
This provided a slight security benefit because we could apply guard
pages between the segregated ranges and we would never reuse the same
virtual address for object and metadata memory.
Our new algorithm allocates small object chunks from the large object
allocator. This naturally recycles memory between small chunks and large
objects, and between small chunks of different page classes. This allows
us to shift memory between allocation types as a program moves between
different phases of allocation, and to delete small object chunk metadata
when a program shrinks back from a high water mark.
Two intuitions I had about memory use turned out to be backwards in
this context:
(1) I thought that this optimization would work because it allowed you to
allocate and free a 4MB object and then reuse that large allocation to
service small allocations. In practice, the common benefit seems to be
the opposite: After you allocate and free many small objects, you can
stitch them together to allocate a large object without growing the heap.
(2) I thought that it would be more memory-efficient to allocate
fine-grained pages from the large object allocator. In practice, giving
the large object allocator too many arbitrarily-sized ranges to manage
leads to fragmentation. Meanwhile, segregated fit is a powerful memory
optimization. So, it's best to return small object memory to the large
allocator only when a whole small object chunk is free.
* bmalloc/Chunk.h:
(bmalloc::Chunk::ref):
(bmalloc::Chunk::deref):
(bmalloc::Chunk::refCount):
(bmalloc::Chunk::freePages): We keep a free list per chunk and refcount
each chunk so we can notice when a chunk becomes empty, and return it
to the large allocator.
(bmalloc::forEachPage): A new helper function for iterating the pages
in a Chunk.
(bmalloc::Chunk::Chunk): Use forEachPage instead of manual iteration.
Use { } initialization because we don't get zero-initialized by the OS
anymore.
* bmalloc/Heap.cpp:
(bmalloc::Heap::Heap):
(bmalloc::Heap::concurrentScavenge):
(bmalloc::Heap::scavenge): Don't bother unlocking while scavenging. I
wasn't able to show it to be a consistent speedup. A more promising
approach, if we find a motivating example, is for the scavenger to give
up and return early if any other client is waiting on the lock.
(bmalloc::Heap::allocateSmallChunk): New helper function for allocating
a small chunk. It allocates through the large allocator to facilitate
sharing. We still allocate a chunk at a time instead of a page at a time.
Surprisingly, more precise page-at-a-time allocation is worse for memory
use because of fragmentation. Segregated fit is a powerful optimization.
(bmalloc::Heap::deallocateSmallChunk): New helper function for deallocating
a small chunk.
(bmalloc::Heap::allocateSmallPage): Updated for new APIs.
(bmalloc::Heap::deallocateSmallLine): Updated for new APIs. Note that
we cache one free chunk per page class. This avoids churn in the large
allocator when you free(malloc(X)).
(bmalloc::Heap::allocateSmallBumpRangesByMetadata):
(bmalloc::Heap::allocateSmallBumpRangesByObject):
(bmalloc::Heap::tryAllocateLarge):
(bmalloc::Heap::scavengeSmallPages): Deleted.
(bmalloc::Heap::scavengeLargeObjects): Deleted.
* bmalloc/Heap.h:
* bmalloc/LargeMap.h:
(bmalloc::LargeMap::begin):
(bmalloc::LargeMap::end): Added iteration helpers for scavenging.
* bmalloc/LargeRange.h:
(bmalloc::LargeRange::physicalSize): Added a comment about something
that I confused myself about in this patch.
* bmalloc/List.h:
(bmalloc::List::iterator::operator*):
(bmalloc::List::iterator::operator->):
(bmalloc::List::iterator::operator!=):
(bmalloc::List::iterator::operator++):
(bmalloc::List::begin):
(bmalloc::List::end):
(bmalloc::List::pushFront):
(bmalloc::List::remove):
(bmalloc::ListNode::ListNode): Deleted. Added iteration helpers for
scavenging. Changed the default state of a Node to null pointers instead
of self pointers to distinguish the null node from the empty node for
easier debugging.
* bmalloc/Sizes.h: Changed the chunk size to 1MB to increase the chances
of a chunk becoming free and recyclable.
* bmalloc/SmallPage.h:
(bmalloc::SmallPage::hasPhysicalPages):
(bmalloc::SmallPage::setHasPhysicalPages): Track physical state by page
instead of implicitly by which list a page is in. It's simpler not
to have to move chunks and pages between physical vs virtual lists.
(bmalloc::SmallPage::SmallPage): Deleted.
* bmalloc/VMHeap.cpp:
(bmalloc::VMHeap::tryAllocateLargeChunk):
(bmalloc::VMHeap::allocateSmallChunk): Deleted.
* bmalloc/VMHeap.h:
(bmalloc::VMHeap::allocateSmallPage): Deleted.
(bmalloc::VMHeap::deallocateSmallPage): Deleted. Small chunk allocation
just forwards to the large allocator now.
* bmalloc/bmalloc.h:
(bmalloc::api::scavenge):
Canonical link: https://commits.webkit.org/189836@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@217811 268f45cc-cd09-0410-ab3c-d52691b4dbfc
2017-06-06 02:21:11 +00:00
|
|
|
|
|
|
|
List<SmallPage>& freePages() { return m_freePages; }
|
2014-04-07 23:54:11 +00:00
|
|
|
|
|
|
|
private:
|
bmalloc: Small and large objects should share memory
https://bugs.webkit.org/show_bug.cgi?id=172880
<rdar://problem/31494732>
Reviewed by Sam Weinig.
This reduces our high water mark memory usage on JetStream on macOS
by 10%-20%. It also has the nice side effect that we can free small
object metadata after returning from a high water mark.
No change in throughput.
Our old algorithm allocated small object chunks and large objects in
segregated virtual memory and never recycled addresses between them.
This provided a slight security benefit because we could apply guard
pages between the segregated ranges and we would never reuse the same
virtual address for object and metadata memory.
Our new algorithm allocates small object chunks from the large object
allocator. This naturally recycles memory between small chunks and large
objects, and between small chunks of different page classes. This allows
us to shift memory between allocation types as a program moves between
different phases of allocation, and to delete small object chunk metadata
when a program shrinks back from a high water mark.
Two intuitions I had about memory use turned out to be backwards in
this context:
(1) I thought that this optimization would work because it allowed you to
allocate and free a 4MB object and then reuse that large allocation to
service small allocations. In practice, the common benefit seems to be
the opposite: After you allocate and free many small objects, you can
stitch them together to allocate a large object without growing the heap.
(2) I thought that it would be more memory-efficient to allocate
fine-grained pages from the large object allocator. In practice, giving
the large object allocator too many arbitrarily-sized ranges to manage
leads to fragmentation. Meanwhile, segregated fit is a powerful memory
optimization. So, it's best to return small object memory to the large
allocator only when a whole small object chunk is free.
* bmalloc/Chunk.h:
(bmalloc::Chunk::ref):
(bmalloc::Chunk::deref):
(bmalloc::Chunk::refCount):
(bmalloc::Chunk::freePages): We keep a free list per chunk and refcount
each chunk so we can notice when a chunk becomes empty, and return it
to the large allocator.
(bmalloc::forEachPage): A new helper function for iterating the pages
in a Chunk.
(bmalloc::Chunk::Chunk): Use forEachPage instead of manual iteration.
Use { } initialization because we don't get zero-initialized by the OS
anymore.
* bmalloc/Heap.cpp:
(bmalloc::Heap::Heap):
(bmalloc::Heap::concurrentScavenge):
(bmalloc::Heap::scavenge): Don't bother unlocking while scavenging. I
wasn't able to show it to be a consistent speedup. A more promising
approach, if we find a motivating example, is for the scavenger to give
up and return early if any other client is waiting on the lock.
(bmalloc::Heap::allocateSmallChunk): New helper function for allocating
a small chunk. It allocates through the large allocator to facilitate
sharing. We still allocate a chunk at a time instead of a page at a time.
Surprisingly, more precise page-at-a-time allocation is worse for memory
use because of fragmentation. Segregated fit is a powerful optimization.
(bmalloc::Heap::deallocateSmallChunk): New helper function for deallocating
a small chunk.
(bmalloc::Heap::allocateSmallPage): Updated for new APIs.
(bmalloc::Heap::deallocateSmallLine): Updated for new APIs. Note that
we cache one free chunk per page class. This avoids churn in the large
allocator when you free(malloc(X)).
(bmalloc::Heap::allocateSmallBumpRangesByMetadata):
(bmalloc::Heap::allocateSmallBumpRangesByObject):
(bmalloc::Heap::tryAllocateLarge):
(bmalloc::Heap::scavengeSmallPages): Deleted.
(bmalloc::Heap::scavengeLargeObjects): Deleted.
* bmalloc/Heap.h:
* bmalloc/LargeMap.h:
(bmalloc::LargeMap::begin):
(bmalloc::LargeMap::end): Added iteration helpers for scavenging.
* bmalloc/LargeRange.h:
(bmalloc::LargeRange::physicalSize): Added a comment about something
that I confused myself about in this patch.
* bmalloc/List.h:
(bmalloc::List::iterator::operator*):
(bmalloc::List::iterator::operator->):
(bmalloc::List::iterator::operator!=):
(bmalloc::List::iterator::operator++):
(bmalloc::List::begin):
(bmalloc::List::end):
(bmalloc::List::pushFront):
(bmalloc::List::remove):
(bmalloc::ListNode::ListNode): Deleted. Added iteration helpers for
scavenging. Changed the default state of a Node to null pointers instead
of self pointers to distinguish the null node from the empty node for
easier debugging.
* bmalloc/Sizes.h: Changed the chunk size to 1MB to increase the chances
of a chunk becoming free and recyclable.
* bmalloc/SmallPage.h:
(bmalloc::SmallPage::hasPhysicalPages):
(bmalloc::SmallPage::setHasPhysicalPages): Track physical state by page
instead of implicitly by which list a page is in. It's simpler not
to have to move chunks and pages between physical vs virtual lists.
(bmalloc::SmallPage::SmallPage): Deleted.
* bmalloc/VMHeap.cpp:
(bmalloc::VMHeap::tryAllocateLargeChunk):
(bmalloc::VMHeap::allocateSmallChunk): Deleted.
* bmalloc/VMHeap.h:
(bmalloc::VMHeap::allocateSmallPage): Deleted.
(bmalloc::VMHeap::deallocateSmallPage): Deleted. Small chunk allocation
just forwards to the large allocator now.
* bmalloc/bmalloc.h:
(bmalloc::api::scavenge):
Canonical link: https://commits.webkit.org/189836@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@217811 268f45cc-cd09-0410-ab3c-d52691b4dbfc
2017-06-06 02:21:11 +00:00
|
|
|
size_t m_refCount { };
|
|
|
|
List<SmallPage> m_freePages { };
|
|
|
|
|
|
|
|
std::array<SmallLine, chunkSize / smallLineSize> m_lines { };
|
|
|
|
std::array<SmallPage, chunkSize / smallPageSize> m_pages { };
|
2014-04-07 23:54:11 +00:00
|
|
|
};
|
|
|
|
|
bmalloc: Merge the large and xlarge allocators
https://bugs.webkit.org/show_bug.cgi?id=156734
Reviewed by Andreas Kling.
This give us better defense against worst case memory usage:
Baseline Patch Δ
Peak Memory:
nimlang 198,132kB 181,468kB ^ 1.09x smaller
It also eliminates inline metadata for large objects, fixing the
regression introduced in r198675, and more:
run-malloc-benchmarks Baseline:~/OpenSource/WebKitBuildBaseline/Release/ Patch:~/OpenSource/WebKitBuild/Release/
Baseline Patch Δ
Memory at End:
big 10,880kB 3,328kB ^ 3.27x smaller
facebook 3,112kB 2,868kB ^ 1.09x smaller
fragment --parallel 1,848kB 760kB ^ 2.43x smaller
fragment_iterate --parallel 4,908kB 776kB ^ 6.32x smaller
big --parallel 48,076kB 11,892kB ^ 4.04x smaller
Overall memory use looks OK:
run-malloc-benchmarks --memory_warning Baseline:~/OpenSource/WebKitBuildBaseline/Release/ Patch:~/OpenSource/WebKitBuild/Release/
Baseline Patch Δ
Memory at End:
<arithmetic mean> 13,992kB 13,987kB ^ 1.0x smaller
Overall throughput looks OK:
run-malloc-benchmarks Baseline:~/OpenSource/WebKitBuildBaseline/Release/ Patch:~/OpenSource/WebKitBuild/Release/
Baseline Patch Δ
Execution Time:
<arithmetic mean> 103ms 104ms ! 1.01x slower
We're a bit slower on the "all-out large allocations on all cores"
benchmark, but I think that's an OK price to pay:
Baseline Patch Δ
Execution Time:
big --parallel 125ms 136ms ! 1.09x slower
This patch net removes 1.5k lines of code. It turns out that large
allocations are rare, and free memory fragments are also rare, so the
combination is super rare, and a simple O(n) algorithm that ensures good
memory behavior is the best option.
Fun fact: In practice, the odds that the old code would save memory
were *worse* than the odds that it would contain a bug that wasted
memory. :)
* bmalloc.xcodeproj/project.pbxproj:
* bmalloc/Allocator.cpp:
(bmalloc::Allocator::tryAllocate): largeMax is the new xLargeMax since
xLargeMax is gone now.
(bmalloc::Allocator::allocate): I moved the rounding code into allocateLarge,
so we don't have to do it here.
(bmalloc::Allocator::reallocate):
(bmalloc::Allocator::allocateSlowCase):
(bmalloc::Allocator::allocateXLarge): Deleted. No more XLarge case.
* bmalloc/Allocator.h:
* bmalloc/BeginTag.h: Removed.
* bmalloc/BoundaryTag.h: Removed.
* bmalloc/Chunk.h:
(bmalloc::ChunkHash::hash): Added a hash function. The best hash function
is a unique and monotonically increasing integer, and that's exactly what
we typically get from the high bits of a Chunk, since the OS allocates
Chunks at unique and increasing addresses.
(bmalloc::Chunk::boundaryTags): Deleted.
(bmalloc::Chunk::objectType): Deleted.
(bmalloc::Chunk::beginTag): Deleted.
(bmalloc::Chunk::endTag): Deleted.
* bmalloc/Deallocator.cpp:
(bmalloc::Deallocator::deallocateSlowCase): We no longer know for sure,
by looking at its bit pattern, whether a pointer is small or large.
Instead, any pointer with large alignment *might* be large, and when
we occasionally encounter such an object, we have to consult a hash
table in the Heap to find out for sure. This turns out to be just as
cheap in practice.
We don't deallocate large objects on the fast path anymore. We can't,
because large objects have out-of-line metadata now.
(bmalloc::Deallocator::deallocateXLarge): Deleted.
* bmalloc/Deallocator.h:
(bmalloc::Deallocator::deallocateFastCase): See deallocateSlowCase.
* bmalloc/EndTag.h: Removed.
* bmalloc/FreeList.cpp: Removed.
* bmalloc/FreeList.h: Removed.
* bmalloc/Heap.cpp:
(bmalloc::Heap::allocateSmallPage): Be sure to track each chunk in
the object type map, so we can distinguish small vs large objects.
(bmalloc::Heap::deallocateSmallLine): No need to check object type
because we know object type now by virtue of being on the small object
path.
(bmalloc::Heap::splitAndAllocate): Be sure to track each chunk in
the object type map, so we can distinguish small vs large objects. Large
objects can split across chunks, so we need to add each large object's
chunk as it is allocated.
(bmalloc::Heap::tryAllocateLarge):
(bmalloc::Heap::allocateLarge):
(bmalloc::Heap::isLarge):
(bmalloc::Heap::largeSize):
(bmalloc::Heap::shrinkLarge):
(bmalloc::Heap::deallocateLarge): Merged in existing XLarge logic for
large objects.
(bmalloc::Heap::scavengeXLargeObjects): Deleted.
(bmalloc::Heap::allocateXLarge): Deleted.
(bmalloc::Heap::tryAllocateXLarge): Deleted.
(bmalloc::Heap::xLargeSize): Deleted.
(bmalloc::Heap::shrinkXLarge): Deleted.
(bmalloc::Heap::deallocateXLarge): Deleted.
* bmalloc/Heap.h:
(bmalloc::Heap::LargeObjectHash::hash):
* bmalloc/LargeObject.h: Removed.
* bmalloc/Map.h: Added.
(bmalloc::Map::size):
(bmalloc::Map::capacity):
(bmalloc::Map::get):
(bmalloc::Map::set):
(bmalloc::Map::remove):
(bmalloc::Map::shouldGrow):
(bmalloc::Map::shouldShrink):
(bmalloc::Map::find):
(bmalloc::Hash>::rehash): Simple hash table.
* bmalloc/Object.h:
* bmalloc/ObjectType.cpp:
(bmalloc::objectType):
* bmalloc/ObjectType.h:
(bmalloc::mightBeLarge): See deallocateSlowCase.
(bmalloc::isXLarge): Deleted.
* bmalloc/SegregatedFreeList.cpp: Removed.
* bmalloc/SegregatedFreeList.h: Removed.
* bmalloc/Sizes.h: Upped smallMax to 64kB. Upping to 32kB is pretty
reasonable, since sizes between 16kB and 32kB share page sizes. I went
all the way up to 64kB because the GC uses 64kB blocks, and also just
for extra padding to ensure that large allocations are indeed rare.
* bmalloc/SortedVector.h: Removed.
* bmalloc/VMHeap.cpp:
(bmalloc::VMHeap::tryAllocateLargeChunk):
(bmalloc::VMHeap::allocateSmallChunk):
(bmalloc::VMHeap::VMHeap): Deleted.
(bmalloc::VMHeap::allocateChunk): Deleted.
* bmalloc/VMHeap.h:
(bmalloc::VMHeap::deallocateSmallPage):
(bmalloc::VMHeap::allocateLargeObject): Deleted.
(bmalloc::VMHeap::deallocateLargeObject): Deleted. Nixed all the boundary
tag logic since metadata is out of line now.
* bmalloc/VMState.h: Removed. Instead of an abstract state, we track
the precise amount of committed physical pages at the head of a VM
range. This allows us to merge aggressively without triggering an madvise
storm most of the time.
* bmalloc/Vector.h:
(bmalloc::Vector<T>::Vector):
(bmalloc::Vector<T>::insert):
(bmalloc::Vector<T>::remove):
(bmalloc::Vector<T>::resize): Filled out some missing helpers.
* bmalloc/XLargeMap.cpp:
(bmalloc::XLargeMap::remove):
(bmalloc::XLargeMap::add):
(bmalloc::XLargeMap::removePhysical):
(bmalloc::XLargeMap::takeFree): Deleted.
(bmalloc::XLargeMap::addFree): Deleted.
(bmalloc::XLargeMap::addAllocated): Deleted.
(bmalloc::XLargeMap::getAllocated): Deleted.
(bmalloc::XLargeMap::takeAllocated): Deleted.
(bmalloc::XLargeMap::shrinkToFit): Deleted.
(bmalloc::XLargeMap::takePhysical): Deleted.
(bmalloc::XLargeMap::addVirtual): Deleted.
* bmalloc/XLargeMap.h:
(bmalloc::XLargeMap::Allocation::operator<): Deleted. We don't track
object sizes anymore -- just free space. (The Heap tracks object sizes.)
We use plain old linear search for free space. (See intro.)
* bmalloc/XLargeRange.h:
(bmalloc::XLargeRange::physicalSize):
(bmalloc::XLargeRange::setPhysicalSize):
(bmalloc::merge):
(bmalloc::XLargeRange::split):
(bmalloc::XLargeRange::vmState): Deleted.
(bmalloc::XLargeRange::setVMState): Deleted. See VMState.h.
Canonical link: https://commits.webkit.org/174881@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@199746 268f45cc-cd09-0410-ab3c-d52691b4dbfc
2016-04-19 23:36:20 +00:00
|
|
|
struct ChunkHash {
|
|
|
|
static unsigned hash(Chunk* key)
|
|
|
|
{
|
|
|
|
return static_cast<unsigned>(
|
|
|
|
reinterpret_cast<uintptr_t>(key) / chunkSize);
|
|
|
|
}
|
|
|
|
};
|
bmalloc: segregate small and large objects again, and allocate more objects on the small path
https://bugs.webkit.org/show_bug.cgi?id=156152
Reviewed by Sam Weinig.
Microbenchmark data suggested that it was a good idea for small and large
objects to share memory. But r198675 did not improve memory use in
full browser benchmarks.
This patch reverts to segregating small and large objects -- but without
going back to doubled VM usage -- in order to capture a few benefits:
(*) Small pages fragment the large heap. Separating them out saves a lot
of memory in our worst case fragmentation recording:
nimlang 276,076kB 209,636kB ^ 1.32x smaller
(*) Small objects are common enough that even their slow paths benefit
from simpler code:
Execution Time:
...
facebook 234ms 216ms ^ 1.08x faster
reddit 114ms 108ms ^ 1.06x faster
flickr 118ms 111ms ^ 1.06x faster
theverge 146ms 140ms ^ 1.04x faster
...
<arithmetic mean> 107ms 102ms ^ 1.04x faster
(*) We can use less metadata:
Memory at End:
...
list_allocate 460kB 384kB ^ 1.2x smaller
tree_allocate 492kB 424kB ^ 1.16x smaller
tree_churn 480kB 404kB ^ 1.19x smaller
fragment 532kB 452kB ^ 1.18x smaller
fragment_iterate 712kB 588kB ^ 1.21x smaller
medium 15,152kB 11,796kB ^ 1.28x smaller
big 15,044kB 10,976kB ^ 1.37x smaller
...
<arithmetic mean> 7,724kB 7,190kB ^ 1.07x smaller
This patch also takes advantage of our support for varying the page size
at runtime by allocating more objects on the small object path:
medium 178ms 150ms ^ 1.19x faster
Some microbenchmarks report memory use increases from this change -- like
they reported memory use decreases from r198675 -- but I'm ignoring them
for now because I expect our full browser memory benchmarks to confirm
that this patch is fine.
* bmalloc/BumpAllocator.h:
(bmalloc::BumpAllocator::BumpAllocator): Use a full unsigned because we
can allocate objects larger than 16kB - 1, and a full unsigned does not
make BumpAllocator any larger on 64bit systems.
* bmalloc/Chunk.h:
(bmalloc::Chunk::begin):
(bmalloc::Chunk::end):
(bmalloc::Chunk::size):
(bmalloc::Chunk::objectType): Store ObjectType in the Chunk, since it only
varies by Chunk now, and not from page to page within a Chunk. Also,
union together small and large object metadata, since we will only use
one or the other. This saves memory.
(bmalloc::Chunk::Chunk): Conditionalize initialization based on object
type, since only one kind of metadata or the other can be used at runtime.
(bmalloc::Object::Object):
(bmalloc::Object::begin):
(bmalloc::SmallPage::end): Deleted.
* bmalloc/Heap.cpp:
(bmalloc::Heap::Heap):
(bmalloc::Heap::initializeLineMetadata): Save a little space, since we
know that lines are only 256 bytes long.
(bmalloc::Heap::initializePageMetadata): Store a dynamic page size for
each size class. We used to use only one page size (the system page size)
but that limited our ability to allocate objects larger than 1kB on the
small object path. Now we can handle any object size we want by storing
objects of that size in a custom page size.
(bmalloc::Heap::concurrentScavenge):
(bmalloc::Heap::scavenge):
(bmalloc::Heap::scavengeSmallPages): Revert to our old linked list
strategy for storing small pages.
(bmalloc::Heap::splitAndAllocate): Object type is per Chunk now.
(bmalloc::Heap::allocateLarge): Don't nuke the small page list when
allocating a large object because the two don't share memory anymore.
(bmalloc::Heap::allocateSmallPage): Revert to our old linked list
strategy for storing small pages.
(bmalloc::Heap::deallocateSmallLine): Don't return early in the case
where this is the first free object in the page. In the case of large-ish
objects, the first free object might also be the last free object,
since there's one object per page.
(bmalloc::Heap::allocateSmallBumpRangesByMetadata): Split out some helper
lambdas to make this code clearer.
(bmalloc::Heap::allocateSmallBumpRangesByObject): Added a fast scan
for objects larger than the line size. When multiple objects fit in
a single line, it's an optimization to scan a line at a time. But when
it's one object per line, or one object per 64 lines, it's better just
to scan an object at a time.
* bmalloc/Heap.h:
(bmalloc::Heap::allocateSmallBumpRanges):
(bmalloc::Heap::derefSmallLine): Match the changes above.
* bmalloc/LineMetadata.h: We weren't using all those bits.
* bmalloc/List.h:
(bmalloc::List::remove): Put a removed Node fully back into the default
(empty) state it was in before it entered the list. This change is not
observable, but it makes things clearer when you're debugging.
* bmalloc/Object.h:
(bmalloc::Object::Object):
(bmalloc::Object::chunk):
(bmalloc::Object::offset):
(bmalloc::Object::operator+):
(bmalloc::Object::operator<=): Added some helpers for iterating by object.
* bmalloc/ObjectType.cpp:
(bmalloc::objectType): Updated for API change.
* bmalloc/Sizes.h:
(bmalloc::Sizes::maskObjectSize):
(bmalloc::Sizes::objectSize):
(bmalloc::Sizes::pageSize): Support more page sizes.
* bmalloc/SmallPage.h:
(bmalloc::SmallPage::SmallPage):
(bmalloc::SmallPage::objectType): Deleted.
(bmalloc::SmallPage::setObjectType): Deleted.
(bmalloc::SmallPage::smallPageCount): Deleted.
(bmalloc::SmallPage::setSmallPageCount): Deleted. Object type is per
Chunk now, and we can infer page count from size class.
* bmalloc/VMHeap.cpp:
(bmalloc::VMHeap::allocateChunk):
(bmalloc::VMHeap::allocateSmallChunk):
* bmalloc/VMHeap.h:
(bmalloc::VMHeap::allocateSmallPage):
(bmalloc::VMHeap::deallocateSmallPage):
(bmalloc::VMHeap::allocateLargeObject): Support our old behavior of
storing free pages in linked lists.
Canonical link: https://commits.webkit.org/174274@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@198995 268f45cc-cd09-0410-ab3c-d52691b4dbfc
2016-04-04 05:26:43 +00:00
|
|
|
|
2019-11-19 03:29:12 +00:00
|
|
|
inline size_t Chunk::metadataSize(size_t pageSize)
|
bmalloc: Small and large objects should share memory
https://bugs.webkit.org/show_bug.cgi?id=172880
<rdar://problem/31494732>
Reviewed by Sam Weinig.
This reduces our high water mark memory usage on JetStream on macOS
by 10%-20%. It also has the nice side effect that we can free small
object metadata after returning from a high water mark.
No change in throughput.
Our old algorithm allocated small object chunks and large objects in
segregated virtual memory and never recycled addresses between them.
This provided a slight security benefit because we could apply guard
pages between the segregated ranges and we would never reuse the same
virtual address for object and metadata memory.
Our new algorithm allocates small object chunks from the large object
allocator. This naturally recycles memory between small chunks and large
objects, and between small chunks of different page classes. This allows
us to shift memory between allocation types as a program moves between
different phases of allocation, and to delete small object chunk metadata
when a program shrinks back from a high water mark.
Two intuitions I had about memory use turned out to be backwards in
this context:
(1) I thought that this optimization would work because it allowed you to
allocate and free a 4MB object and then reuse that large allocation to
service small allocations. In practice, the common benefit seems to be
the opposite: After you allocate and free many small objects, you can
stitch them together to allocate a large object without growing the heap.
(2) I thought that it would be more memory-efficient to allocate
fine-grained pages from the large object allocator. In practice, giving
the large object allocator too many arbitrarily-sized ranges to manage
leads to fragmentation. Meanwhile, segregated fit is a powerful memory
optimization. So, it's best to return small object memory to the large
allocator only when a whole small object chunk is free.
* bmalloc/Chunk.h:
(bmalloc::Chunk::ref):
(bmalloc::Chunk::deref):
(bmalloc::Chunk::refCount):
(bmalloc::Chunk::freePages): We keep a free list per chunk and refcount
each chunk so we can notice when a chunk becomes empty, and return it
to the large allocator.
(bmalloc::forEachPage): A new helper function for iterating the pages
in a Chunk.
(bmalloc::Chunk::Chunk): Use forEachPage instead of manual iteration.
Use { } initialization because we don't get zero-initialized by the OS
anymore.
* bmalloc/Heap.cpp:
(bmalloc::Heap::Heap):
(bmalloc::Heap::concurrentScavenge):
(bmalloc::Heap::scavenge): Don't bother unlocking while scavenging. I
wasn't able to show it to be a consistent speedup. A more promising
approach, if we find a motivating example, is for the scavenger to give
up and return early if any other client is waiting on the lock.
(bmalloc::Heap::allocateSmallChunk): New helper function for allocating
a small chunk. It allocates through the large allocator to facilitate
sharing. We still allocate a chunk at a time instead of a page at a time.
Surprisingly, more precise page-at-a-time allocation is worse for memory
use because of fragmentation. Segregated fit is a powerful optimization.
(bmalloc::Heap::deallocateSmallChunk): New helper function for deallocating
a small chunk.
(bmalloc::Heap::allocateSmallPage): Updated for new APIs.
(bmalloc::Heap::deallocateSmallLine): Updated for new APIs. Note that
we cache one free chunk per page class. This avoids churn in the large
allocator when you free(malloc(X)).
(bmalloc::Heap::allocateSmallBumpRangesByMetadata):
(bmalloc::Heap::allocateSmallBumpRangesByObject):
(bmalloc::Heap::tryAllocateLarge):
(bmalloc::Heap::scavengeSmallPages): Deleted.
(bmalloc::Heap::scavengeLargeObjects): Deleted.
* bmalloc/Heap.h:
* bmalloc/LargeMap.h:
(bmalloc::LargeMap::begin):
(bmalloc::LargeMap::end): Added iteration helpers for scavenging.
* bmalloc/LargeRange.h:
(bmalloc::LargeRange::physicalSize): Added a comment about something
that I confused myself about in this patch.
* bmalloc/List.h:
(bmalloc::List::iterator::operator*):
(bmalloc::List::iterator::operator->):
(bmalloc::List::iterator::operator!=):
(bmalloc::List::iterator::operator++):
(bmalloc::List::begin):
(bmalloc::List::end):
(bmalloc::List::pushFront):
(bmalloc::List::remove):
(bmalloc::ListNode::ListNode): Deleted. Added iteration helpers for
scavenging. Changed the default state of a Node to null pointers instead
of self pointers to distinguish the null node from the empty node for
easier debugging.
* bmalloc/Sizes.h: Changed the chunk size to 1MB to increase the chances
of a chunk becoming free and recyclable.
* bmalloc/SmallPage.h:
(bmalloc::SmallPage::hasPhysicalPages):
(bmalloc::SmallPage::setHasPhysicalPages): Track physical state by page
instead of implicitly by which list a page is in. It's simpler not
to have to move chunks and pages between physical vs virtual lists.
(bmalloc::SmallPage::SmallPage): Deleted.
* bmalloc/VMHeap.cpp:
(bmalloc::VMHeap::tryAllocateLargeChunk):
(bmalloc::VMHeap::allocateSmallChunk): Deleted.
* bmalloc/VMHeap.h:
(bmalloc::VMHeap::allocateSmallPage): Deleted.
(bmalloc::VMHeap::deallocateSmallPage): Deleted. Small chunk allocation
just forwards to the large allocator now.
* bmalloc/bmalloc.h:
(bmalloc::api::scavenge):
Canonical link: https://commits.webkit.org/189836@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@217811 268f45cc-cd09-0410-ab3c-d52691b4dbfc
2017-06-06 02:21:11 +00:00
|
|
|
{
|
|
|
|
// We align to at least the page size so we can service aligned allocations
|
|
|
|
// at equal and smaller powers of two, and also so we can vmDeallocatePhysicalPages().
|
2019-11-19 03:29:12 +00:00
|
|
|
return roundUpToMultipleOfNonPowerOfTwo(pageSize, sizeof(Chunk));
|
|
|
|
}
|
bmalloc: Small and large objects should share memory
https://bugs.webkit.org/show_bug.cgi?id=172880
<rdar://problem/31494732>
Reviewed by Sam Weinig.
This reduces our high water mark memory usage on JetStream on macOS
by 10%-20%. It also has the nice side effect that we can free small
object metadata after returning from a high water mark.
No change in throughput.
Our old algorithm allocated small object chunks and large objects in
segregated virtual memory and never recycled addresses between them.
This provided a slight security benefit because we could apply guard
pages between the segregated ranges and we would never reuse the same
virtual address for object and metadata memory.
Our new algorithm allocates small object chunks from the large object
allocator. This naturally recycles memory between small chunks and large
objects, and between small chunks of different page classes. This allows
us to shift memory between allocation types as a program moves between
different phases of allocation, and to delete small object chunk metadata
when a program shrinks back from a high water mark.
Two intuitions I had about memory use turned out to be backwards in
this context:
(1) I thought that this optimization would work because it allowed you to
allocate and free a 4MB object and then reuse that large allocation to
service small allocations. In practice, the common benefit seems to be
the opposite: After you allocate and free many small objects, you can
stitch them together to allocate a large object without growing the heap.
(2) I thought that it would be more memory-efficient to allocate
fine-grained pages from the large object allocator. In practice, giving
the large object allocator too many arbitrarily-sized ranges to manage
leads to fragmentation. Meanwhile, segregated fit is a powerful memory
optimization. So, it's best to return small object memory to the large
allocator only when a whole small object chunk is free.
* bmalloc/Chunk.h:
(bmalloc::Chunk::ref):
(bmalloc::Chunk::deref):
(bmalloc::Chunk::refCount):
(bmalloc::Chunk::freePages): We keep a free list per chunk and refcount
each chunk so we can notice when a chunk becomes empty, and return it
to the large allocator.
(bmalloc::forEachPage): A new helper function for iterating the pages
in a Chunk.
(bmalloc::Chunk::Chunk): Use forEachPage instead of manual iteration.
Use { } initialization because we don't get zero-initialized by the OS
anymore.
* bmalloc/Heap.cpp:
(bmalloc::Heap::Heap):
(bmalloc::Heap::concurrentScavenge):
(bmalloc::Heap::scavenge): Don't bother unlocking while scavenging. I
wasn't able to show it to be a consistent speedup. A more promising
approach, if we find a motivating example, is for the scavenger to give
up and return early if any other client is waiting on the lock.
(bmalloc::Heap::allocateSmallChunk): New helper function for allocating
a small chunk. It allocates through the large allocator to facilitate
sharing. We still allocate a chunk at a time instead of a page at a time.
Surprisingly, more precise page-at-a-time allocation is worse for memory
use because of fragmentation. Segregated fit is a powerful optimization.
(bmalloc::Heap::deallocateSmallChunk): New helper function for deallocating
a small chunk.
(bmalloc::Heap::allocateSmallPage): Updated for new APIs.
(bmalloc::Heap::deallocateSmallLine): Updated for new APIs. Note that
we cache one free chunk per page class. This avoids churn in the large
allocator when you free(malloc(X)).
(bmalloc::Heap::allocateSmallBumpRangesByMetadata):
(bmalloc::Heap::allocateSmallBumpRangesByObject):
(bmalloc::Heap::tryAllocateLarge):
(bmalloc::Heap::scavengeSmallPages): Deleted.
(bmalloc::Heap::scavengeLargeObjects): Deleted.
* bmalloc/Heap.h:
* bmalloc/LargeMap.h:
(bmalloc::LargeMap::begin):
(bmalloc::LargeMap::end): Added iteration helpers for scavenging.
* bmalloc/LargeRange.h:
(bmalloc::LargeRange::physicalSize): Added a comment about something
that I confused myself about in this patch.
* bmalloc/List.h:
(bmalloc::List::iterator::operator*):
(bmalloc::List::iterator::operator->):
(bmalloc::List::iterator::operator!=):
(bmalloc::List::iterator::operator++):
(bmalloc::List::begin):
(bmalloc::List::end):
(bmalloc::List::pushFront):
(bmalloc::List::remove):
(bmalloc::ListNode::ListNode): Deleted. Added iteration helpers for
scavenging. Changed the default state of a Node to null pointers instead
of self pointers to distinguish the null node from the empty node for
easier debugging.
* bmalloc/Sizes.h: Changed the chunk size to 1MB to increase the chances
of a chunk becoming free and recyclable.
* bmalloc/SmallPage.h:
(bmalloc::SmallPage::hasPhysicalPages):
(bmalloc::SmallPage::setHasPhysicalPages): Track physical state by page
instead of implicitly by which list a page is in. It's simpler not
to have to move chunks and pages between physical vs virtual lists.
(bmalloc::SmallPage::SmallPage): Deleted.
* bmalloc/VMHeap.cpp:
(bmalloc::VMHeap::tryAllocateLargeChunk):
(bmalloc::VMHeap::allocateSmallChunk): Deleted.
* bmalloc/VMHeap.h:
(bmalloc::VMHeap::allocateSmallPage): Deleted.
(bmalloc::VMHeap::deallocateSmallPage): Deleted. Small chunk allocation
just forwards to the large allocator now.
* bmalloc/bmalloc.h:
(bmalloc::api::scavenge):
Canonical link: https://commits.webkit.org/189836@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@217811 268f45cc-cd09-0410-ab3c-d52691b4dbfc
2017-06-06 02:21:11 +00:00
|
|
|
|
2019-11-19 03:29:12 +00:00
|
|
|
template<typename Function> void forEachPage(Chunk* chunk, size_t pageSize, Function function)
|
|
|
|
{
|
|
|
|
Object begin(chunk, Chunk::metadataSize(pageSize));
|
bmalloc: Small and large objects should share memory
https://bugs.webkit.org/show_bug.cgi?id=172880
<rdar://problem/31494732>
Reviewed by Sam Weinig.
This reduces our high water mark memory usage on JetStream on macOS
by 10%-20%. It also has the nice side effect that we can free small
object metadata after returning from a high water mark.
No change in throughput.
Our old algorithm allocated small object chunks and large objects in
segregated virtual memory and never recycled addresses between them.
This provided a slight security benefit because we could apply guard
pages between the segregated ranges and we would never reuse the same
virtual address for object and metadata memory.
Our new algorithm allocates small object chunks from the large object
allocator. This naturally recycles memory between small chunks and large
objects, and between small chunks of different page classes. This allows
us to shift memory between allocation types as a program moves between
different phases of allocation, and to delete small object chunk metadata
when a program shrinks back from a high water mark.
Two intuitions I had about memory use turned out to be backwards in
this context:
(1) I thought that this optimization would work because it allowed you to
allocate and free a 4MB object and then reuse that large allocation to
service small allocations. In practice, the common benefit seems to be
the opposite: After you allocate and free many small objects, you can
stitch them together to allocate a large object without growing the heap.
(2) I thought that it would be more memory-efficient to allocate
fine-grained pages from the large object allocator. In practice, giving
the large object allocator too many arbitrarily-sized ranges to manage
leads to fragmentation. Meanwhile, segregated fit is a powerful memory
optimization. So, it's best to return small object memory to the large
allocator only when a whole small object chunk is free.
* bmalloc/Chunk.h:
(bmalloc::Chunk::ref):
(bmalloc::Chunk::deref):
(bmalloc::Chunk::refCount):
(bmalloc::Chunk::freePages): We keep a free list per chunk and refcount
each chunk so we can notice when a chunk becomes empty, and return it
to the large allocator.
(bmalloc::forEachPage): A new helper function for iterating the pages
in a Chunk.
(bmalloc::Chunk::Chunk): Use forEachPage instead of manual iteration.
Use { } initialization because we don't get zero-initialized by the OS
anymore.
* bmalloc/Heap.cpp:
(bmalloc::Heap::Heap):
(bmalloc::Heap::concurrentScavenge):
(bmalloc::Heap::scavenge): Don't bother unlocking while scavenging. I
wasn't able to show it to be a consistent speedup. A more promising
approach, if we find a motivating example, is for the scavenger to give
up and return early if any other client is waiting on the lock.
(bmalloc::Heap::allocateSmallChunk): New helper function for allocating
a small chunk. It allocates through the large allocator to facilitate
sharing. We still allocate a chunk at a time instead of a page at a time.
Surprisingly, more precise page-at-a-time allocation is worse for memory
use because of fragmentation. Segregated fit is a powerful optimization.
(bmalloc::Heap::deallocateSmallChunk): New helper function for deallocating
a small chunk.
(bmalloc::Heap::allocateSmallPage): Updated for new APIs.
(bmalloc::Heap::deallocateSmallLine): Updated for new APIs. Note that
we cache one free chunk per page class. This avoids churn in the large
allocator when you free(malloc(X)).
(bmalloc::Heap::allocateSmallBumpRangesByMetadata):
(bmalloc::Heap::allocateSmallBumpRangesByObject):
(bmalloc::Heap::tryAllocateLarge):
(bmalloc::Heap::scavengeSmallPages): Deleted.
(bmalloc::Heap::scavengeLargeObjects): Deleted.
* bmalloc/Heap.h:
* bmalloc/LargeMap.h:
(bmalloc::LargeMap::begin):
(bmalloc::LargeMap::end): Added iteration helpers for scavenging.
* bmalloc/LargeRange.h:
(bmalloc::LargeRange::physicalSize): Added a comment about something
that I confused myself about in this patch.
* bmalloc/List.h:
(bmalloc::List::iterator::operator*):
(bmalloc::List::iterator::operator->):
(bmalloc::List::iterator::operator!=):
(bmalloc::List::iterator::operator++):
(bmalloc::List::begin):
(bmalloc::List::end):
(bmalloc::List::pushFront):
(bmalloc::List::remove):
(bmalloc::ListNode::ListNode): Deleted. Added iteration helpers for
scavenging. Changed the default state of a Node to null pointers instead
of self pointers to distinguish the null node from the empty node for
easier debugging.
* bmalloc/Sizes.h: Changed the chunk size to 1MB to increase the chances
of a chunk becoming free and recyclable.
* bmalloc/SmallPage.h:
(bmalloc::SmallPage::hasPhysicalPages):
(bmalloc::SmallPage::setHasPhysicalPages): Track physical state by page
instead of implicitly by which list a page is in. It's simpler not
to have to move chunks and pages between physical vs virtual lists.
(bmalloc::SmallPage::SmallPage): Deleted.
* bmalloc/VMHeap.cpp:
(bmalloc::VMHeap::tryAllocateLargeChunk):
(bmalloc::VMHeap::allocateSmallChunk): Deleted.
* bmalloc/VMHeap.h:
(bmalloc::VMHeap::allocateSmallPage): Deleted.
(bmalloc::VMHeap::deallocateSmallPage): Deleted. Small chunk allocation
just forwards to the large allocator now.
* bmalloc/bmalloc.h:
(bmalloc::api::scavenge):
Canonical link: https://commits.webkit.org/189836@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@217811 268f45cc-cd09-0410-ab3c-d52691b4dbfc
2017-06-06 02:21:11 +00:00
|
|
|
Object end(chunk, chunkSize);
|
|
|
|
|
|
|
|
for (auto it = begin; it + pageSize <= end; it = it + pageSize)
|
|
|
|
function(it.page());
|
|
|
|
}
|
|
|
|
|
|
|
|
inline Chunk::Chunk(size_t pageSize)
|
2016-02-21 18:43:22 +00:00
|
|
|
{
|
bmalloc: Small and large objects should share memory
https://bugs.webkit.org/show_bug.cgi?id=172880
<rdar://problem/31494732>
Reviewed by Sam Weinig.
This reduces our high water mark memory usage on JetStream on macOS
by 10%-20%. It also has the nice side effect that we can free small
object metadata after returning from a high water mark.
No change in throughput.
Our old algorithm allocated small object chunks and large objects in
segregated virtual memory and never recycled addresses between them.
This provided a slight security benefit because we could apply guard
pages between the segregated ranges and we would never reuse the same
virtual address for object and metadata memory.
Our new algorithm allocates small object chunks from the large object
allocator. This naturally recycles memory between small chunks and large
objects, and between small chunks of different page classes. This allows
us to shift memory between allocation types as a program moves between
different phases of allocation, and to delete small object chunk metadata
when a program shrinks back from a high water mark.
Two intuitions I had about memory use turned out to be backwards in
this context:
(1) I thought that this optimization would work because it allowed you to
allocate and free a 4MB object and then reuse that large allocation to
service small allocations. In practice, the common benefit seems to be
the opposite: After you allocate and free many small objects, you can
stitch them together to allocate a large object without growing the heap.
(2) I thought that it would be more memory-efficient to allocate
fine-grained pages from the large object allocator. In practice, giving
the large object allocator too many arbitrarily-sized ranges to manage
leads to fragmentation. Meanwhile, segregated fit is a powerful memory
optimization. So, it's best to return small object memory to the large
allocator only when a whole small object chunk is free.
* bmalloc/Chunk.h:
(bmalloc::Chunk::ref):
(bmalloc::Chunk::deref):
(bmalloc::Chunk::refCount):
(bmalloc::Chunk::freePages): We keep a free list per chunk and refcount
each chunk so we can notice when a chunk becomes empty, and return it
to the large allocator.
(bmalloc::forEachPage): A new helper function for iterating the pages
in a Chunk.
(bmalloc::Chunk::Chunk): Use forEachPage instead of manual iteration.
Use { } initialization because we don't get zero-initialized by the OS
anymore.
* bmalloc/Heap.cpp:
(bmalloc::Heap::Heap):
(bmalloc::Heap::concurrentScavenge):
(bmalloc::Heap::scavenge): Don't bother unlocking while scavenging. I
wasn't able to show it to be a consistent speedup. A more promising
approach, if we find a motivating example, is for the scavenger to give
up and return early if any other client is waiting on the lock.
(bmalloc::Heap::allocateSmallChunk): New helper function for allocating
a small chunk. It allocates through the large allocator to facilitate
sharing. We still allocate a chunk at a time instead of a page at a time.
Surprisingly, more precise page-at-a-time allocation is worse for memory
use because of fragmentation. Segregated fit is a powerful optimization.
(bmalloc::Heap::deallocateSmallChunk): New helper function for deallocating
a small chunk.
(bmalloc::Heap::allocateSmallPage): Updated for new APIs.
(bmalloc::Heap::deallocateSmallLine): Updated for new APIs. Note that
we cache one free chunk per page class. This avoids churn in the large
allocator when you free(malloc(X)).
(bmalloc::Heap::allocateSmallBumpRangesByMetadata):
(bmalloc::Heap::allocateSmallBumpRangesByObject):
(bmalloc::Heap::tryAllocateLarge):
(bmalloc::Heap::scavengeSmallPages): Deleted.
(bmalloc::Heap::scavengeLargeObjects): Deleted.
* bmalloc/Heap.h:
* bmalloc/LargeMap.h:
(bmalloc::LargeMap::begin):
(bmalloc::LargeMap::end): Added iteration helpers for scavenging.
* bmalloc/LargeRange.h:
(bmalloc::LargeRange::physicalSize): Added a comment about something
that I confused myself about in this patch.
* bmalloc/List.h:
(bmalloc::List::iterator::operator*):
(bmalloc::List::iterator::operator->):
(bmalloc::List::iterator::operator!=):
(bmalloc::List::iterator::operator++):
(bmalloc::List::begin):
(bmalloc::List::end):
(bmalloc::List::pushFront):
(bmalloc::List::remove):
(bmalloc::ListNode::ListNode): Deleted. Added iteration helpers for
scavenging. Changed the default state of a Node to null pointers instead
of self pointers to distinguish the null node from the empty node for
easier debugging.
* bmalloc/Sizes.h: Changed the chunk size to 1MB to increase the chances
of a chunk becoming free and recyclable.
* bmalloc/SmallPage.h:
(bmalloc::SmallPage::hasPhysicalPages):
(bmalloc::SmallPage::setHasPhysicalPages): Track physical state by page
instead of implicitly by which list a page is in. It's simpler not
to have to move chunks and pages between physical vs virtual lists.
(bmalloc::SmallPage::SmallPage): Deleted.
* bmalloc/VMHeap.cpp:
(bmalloc::VMHeap::tryAllocateLargeChunk):
(bmalloc::VMHeap::allocateSmallChunk): Deleted.
* bmalloc/VMHeap.h:
(bmalloc::VMHeap::allocateSmallPage): Deleted.
(bmalloc::VMHeap::deallocateSmallPage): Deleted. Small chunk allocation
just forwards to the large allocator now.
* bmalloc/bmalloc.h:
(bmalloc::api::scavenge):
Canonical link: https://commits.webkit.org/189836@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@217811 268f45cc-cd09-0410-ab3c-d52691b4dbfc
2017-06-06 02:21:11 +00:00
|
|
|
size_t smallPageCount = pageSize / smallPageSize;
|
|
|
|
forEachPage(this, pageSize, [&](SmallPage* page) {
|
|
|
|
for (size_t i = 0; i < smallPageCount; ++i)
|
|
|
|
page[i].setSlide(i);
|
|
|
|
});
|
2016-02-21 18:43:22 +00:00
|
|
|
}
|
2016-02-05 21:34:27 +00:00
|
|
|
|
2016-04-28 02:11:18 +00:00
|
|
|
inline Chunk* Chunk::get(void* address)
|
2014-04-07 23:54:11 +00:00
|
|
|
{
|
2016-04-28 02:11:18 +00:00
|
|
|
return static_cast<Chunk*>(mask(address, chunkMask));
|
2014-04-07 23:54:11 +00:00
|
|
|
}
|
|
|
|
|
2016-04-28 02:11:18 +00:00
|
|
|
inline size_t Chunk::offset(void* address)
|
2016-03-25 18:07:31 +00:00
|
|
|
{
|
2016-04-28 02:11:18 +00:00
|
|
|
BASSERT(address >= this);
|
|
|
|
BASSERT(address < bytes() + chunkSize);
|
|
|
|
return static_cast<char*>(address) - bytes();
|
2016-03-25 18:07:31 +00:00
|
|
|
}
|
|
|
|
|
2016-04-28 02:11:18 +00:00
|
|
|
inline char* Chunk::address(size_t offset)
|
2016-03-25 18:07:31 +00:00
|
|
|
{
|
2016-04-06 20:52:11 +00:00
|
|
|
return bytes() + offset;
|
2016-03-25 18:07:31 +00:00
|
|
|
}
|
|
|
|
|
2016-03-25 23:44:53 +00:00
|
|
|
inline SmallPage* Chunk::page(size_t offset)
|
2016-03-25 18:07:31 +00:00
|
|
|
{
|
2016-03-30 02:22:47 +00:00
|
|
|
size_t pageNumber = offset / smallPageSize;
|
|
|
|
SmallPage* page = &m_pages[pageNumber];
|
|
|
|
return page - page->slide();
|
2016-03-25 18:07:31 +00:00
|
|
|
}
|
|
|
|
|
2016-03-25 23:44:53 +00:00
|
|
|
inline SmallLine* Chunk::line(size_t offset)
|
2016-03-25 18:07:31 +00:00
|
|
|
{
|
|
|
|
size_t lineNumber = offset / smallLineSize;
|
|
|
|
return &m_lines[lineNumber];
|
|
|
|
}
|
|
|
|
|
|
|
|
inline char* SmallLine::begin()
|
|
|
|
{
|
2016-03-25 23:44:53 +00:00
|
|
|
Chunk* chunk = Chunk::get(this);
|
2016-03-25 18:07:31 +00:00
|
|
|
size_t lineNumber = this - chunk->lines();
|
|
|
|
size_t offset = lineNumber * smallLineSize;
|
|
|
|
return &reinterpret_cast<char*>(chunk)[offset];
|
|
|
|
}
|
|
|
|
|
|
|
|
inline char* SmallLine::end()
|
|
|
|
{
|
|
|
|
return begin() + smallLineSize;
|
|
|
|
}
|
|
|
|
|
|
|
|
inline SmallLine* SmallPage::begin()
|
|
|
|
{
|
2016-03-30 02:22:47 +00:00
|
|
|
BASSERT(!m_slide);
|
2016-03-25 23:44:53 +00:00
|
|
|
Chunk* chunk = Chunk::get(this);
|
2016-03-25 18:07:31 +00:00
|
|
|
size_t pageNumber = this - chunk->pages();
|
2016-03-30 02:22:47 +00:00
|
|
|
size_t lineNumber = pageNumber * smallPageLineCount;
|
2016-03-25 18:07:31 +00:00
|
|
|
return &chunk->lines()[lineNumber];
|
|
|
|
}
|
|
|
|
|
|
|
|
inline Object::Object(void* object)
|
2016-03-25 23:44:53 +00:00
|
|
|
: m_chunk(Chunk::get(object))
|
2016-03-25 18:07:31 +00:00
|
|
|
, m_offset(m_chunk->offset(object))
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2016-03-25 23:44:53 +00:00
|
|
|
inline Object::Object(Chunk* chunk, void* object)
|
2016-03-25 18:07:31 +00:00
|
|
|
: m_chunk(chunk)
|
|
|
|
, m_offset(m_chunk->offset(object))
|
|
|
|
{
|
2016-03-25 23:44:53 +00:00
|
|
|
BASSERT(chunk == Chunk::get(object));
|
2016-03-25 18:07:31 +00:00
|
|
|
}
|
|
|
|
|
2016-04-28 02:11:18 +00:00
|
|
|
inline char* Object::address()
|
2016-03-25 18:07:31 +00:00
|
|
|
{
|
2016-04-28 02:11:18 +00:00
|
|
|
return m_chunk->address(m_offset);
|
2016-03-25 18:07:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
inline SmallLine* Object::line()
|
|
|
|
{
|
|
|
|
return m_chunk->line(m_offset);
|
|
|
|
}
|
|
|
|
|
|
|
|
inline SmallPage* Object::page()
|
|
|
|
{
|
|
|
|
return m_chunk->page(m_offset);
|
|
|
|
}
|
|
|
|
|
2014-04-07 23:54:11 +00:00
|
|
|
}; // namespace bmalloc
|
|
|
|
|
2016-03-25 23:44:53 +00:00
|
|
|
#endif // Chunk
|