Merge pull request #87814 from bruvzg/memalign
[Core] Improve `CowData` and `Memory` metadata alignment.
This commit is contained in:
commit
f8f2c8c85a
3 changed files with 99 additions and 52 deletions
|
@ -72,23 +72,23 @@ void *Memory::alloc_static(size_t p_bytes, bool p_pad_align) {
|
||||||
bool prepad = p_pad_align;
|
bool prepad = p_pad_align;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void *mem = malloc(p_bytes + (prepad ? PAD_ALIGN : 0));
|
void *mem = malloc(p_bytes + (prepad ? DATA_OFFSET : 0));
|
||||||
|
|
||||||
ERR_FAIL_NULL_V(mem, nullptr);
|
ERR_FAIL_NULL_V(mem, nullptr);
|
||||||
|
|
||||||
alloc_count.increment();
|
alloc_count.increment();
|
||||||
|
|
||||||
if (prepad) {
|
if (prepad) {
|
||||||
uint64_t *s = (uint64_t *)mem;
|
|
||||||
*s = p_bytes;
|
|
||||||
|
|
||||||
uint8_t *s8 = (uint8_t *)mem;
|
uint8_t *s8 = (uint8_t *)mem;
|
||||||
|
|
||||||
|
uint64_t *s = (uint64_t *)(s8 + SIZE_OFFSET);
|
||||||
|
*s = p_bytes;
|
||||||
|
|
||||||
#ifdef DEBUG_ENABLED
|
#ifdef DEBUG_ENABLED
|
||||||
uint64_t new_mem_usage = mem_usage.add(p_bytes);
|
uint64_t new_mem_usage = mem_usage.add(p_bytes);
|
||||||
max_usage.exchange_if_greater(new_mem_usage);
|
max_usage.exchange_if_greater(new_mem_usage);
|
||||||
#endif
|
#endif
|
||||||
return s8 + PAD_ALIGN;
|
return s8 + DATA_OFFSET;
|
||||||
} else {
|
} else {
|
||||||
return mem;
|
return mem;
|
||||||
}
|
}
|
||||||
|
@ -108,8 +108,8 @@ void *Memory::realloc_static(void *p_memory, size_t p_bytes, bool p_pad_align) {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (prepad) {
|
if (prepad) {
|
||||||
mem -= PAD_ALIGN;
|
mem -= DATA_OFFSET;
|
||||||
uint64_t *s = (uint64_t *)mem;
|
uint64_t *s = (uint64_t *)(mem + SIZE_OFFSET);
|
||||||
|
|
||||||
#ifdef DEBUG_ENABLED
|
#ifdef DEBUG_ENABLED
|
||||||
if (p_bytes > *s) {
|
if (p_bytes > *s) {
|
||||||
|
@ -126,14 +126,14 @@ void *Memory::realloc_static(void *p_memory, size_t p_bytes, bool p_pad_align) {
|
||||||
} else {
|
} else {
|
||||||
*s = p_bytes;
|
*s = p_bytes;
|
||||||
|
|
||||||
mem = (uint8_t *)realloc(mem, p_bytes + PAD_ALIGN);
|
mem = (uint8_t *)realloc(mem, p_bytes + DATA_OFFSET);
|
||||||
ERR_FAIL_NULL_V(mem, nullptr);
|
ERR_FAIL_NULL_V(mem, nullptr);
|
||||||
|
|
||||||
s = (uint64_t *)mem;
|
s = (uint64_t *)(mem + SIZE_OFFSET);
|
||||||
|
|
||||||
*s = p_bytes;
|
*s = p_bytes;
|
||||||
|
|
||||||
return mem + PAD_ALIGN;
|
return mem + DATA_OFFSET;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
mem = (uint8_t *)realloc(mem, p_bytes);
|
mem = (uint8_t *)realloc(mem, p_bytes);
|
||||||
|
@ -158,10 +158,10 @@ void Memory::free_static(void *p_ptr, bool p_pad_align) {
|
||||||
alloc_count.decrement();
|
alloc_count.decrement();
|
||||||
|
|
||||||
if (prepad) {
|
if (prepad) {
|
||||||
mem -= PAD_ALIGN;
|
mem -= DATA_OFFSET;
|
||||||
|
|
||||||
#ifdef DEBUG_ENABLED
|
#ifdef DEBUG_ENABLED
|
||||||
uint64_t *s = (uint64_t *)mem;
|
uint64_t *s = (uint64_t *)(mem + SIZE_OFFSET);
|
||||||
mem_usage.sub(*s);
|
mem_usage.sub(*s);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -38,10 +38,6 @@
|
||||||
#include <new>
|
#include <new>
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
|
|
||||||
#ifndef PAD_ALIGN
|
|
||||||
#define PAD_ALIGN 16 //must always be greater than this at much
|
|
||||||
#endif
|
|
||||||
|
|
||||||
class Memory {
|
class Memory {
|
||||||
#ifdef DEBUG_ENABLED
|
#ifdef DEBUG_ENABLED
|
||||||
static SafeNumeric<uint64_t> mem_usage;
|
static SafeNumeric<uint64_t> mem_usage;
|
||||||
|
@ -51,6 +47,17 @@ class Memory {
|
||||||
static SafeNumeric<uint64_t> alloc_count;
|
static SafeNumeric<uint64_t> alloc_count;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
// Alignment: ↓ max_align_t ↓ uint64_t ↓ max_align_t
|
||||||
|
// ┌─────────────────┬──┬────────────────┬──┬───────────...
|
||||||
|
// │ uint64_t │░░│ uint64_t │░░│ T[]
|
||||||
|
// │ alloc size │░░│ element count │░░│ data
|
||||||
|
// └─────────────────┴──┴────────────────┴──┴───────────...
|
||||||
|
// Offset: ↑ SIZE_OFFSET ↑ ELEMENT_OFFSET ↑ DATA_OFFSET
|
||||||
|
|
||||||
|
static constexpr size_t SIZE_OFFSET = 0;
|
||||||
|
static constexpr size_t ELEMENT_OFFSET = ((SIZE_OFFSET + sizeof(uint64_t)) % alignof(uint64_t) == 0) ? (SIZE_OFFSET + sizeof(uint64_t)) : ((SIZE_OFFSET + sizeof(uint64_t)) + alignof(uint64_t) - ((SIZE_OFFSET + sizeof(uint64_t)) % alignof(uint64_t)));
|
||||||
|
static constexpr size_t DATA_OFFSET = ((ELEMENT_OFFSET + sizeof(uint64_t)) % alignof(max_align_t) == 0) ? (ELEMENT_OFFSET + sizeof(uint64_t)) : ((ELEMENT_OFFSET + sizeof(uint64_t)) + alignof(max_align_t) - ((ELEMENT_OFFSET + sizeof(uint64_t)) % alignof(max_align_t)));
|
||||||
|
|
||||||
static void *alloc_static(size_t p_bytes, bool p_pad_align = false);
|
static void *alloc_static(size_t p_bytes, bool p_pad_align = false);
|
||||||
static void *realloc_static(void *p_memory, size_t p_bytes, bool p_pad_align = false);
|
static void *realloc_static(void *p_memory, size_t p_bytes, bool p_pad_align = false);
|
||||||
static void free_static(void *p_ptr, bool p_pad_align = false);
|
static void free_static(void *p_ptr, bool p_pad_align = false);
|
||||||
|
@ -133,6 +140,10 @@ void memdelete_allocator(T *p_class) {
|
||||||
|
|
||||||
#define memnew_arr(m_class, m_count) memnew_arr_template<m_class>(m_count)
|
#define memnew_arr(m_class, m_count) memnew_arr_template<m_class>(m_count)
|
||||||
|
|
||||||
|
_FORCE_INLINE_ uint64_t *_get_element_count_ptr(uint8_t *p_ptr) {
|
||||||
|
return (uint64_t *)(p_ptr - Memory::DATA_OFFSET + Memory::ELEMENT_OFFSET);
|
||||||
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
T *memnew_arr_template(size_t p_elements) {
|
T *memnew_arr_template(size_t p_elements) {
|
||||||
if (p_elements == 0) {
|
if (p_elements == 0) {
|
||||||
|
@ -142,10 +153,12 @@ T *memnew_arr_template(size_t p_elements) {
|
||||||
same strategy used by std::vector, and the Vector class, so it should be safe.*/
|
same strategy used by std::vector, and the Vector class, so it should be safe.*/
|
||||||
|
|
||||||
size_t len = sizeof(T) * p_elements;
|
size_t len = sizeof(T) * p_elements;
|
||||||
uint64_t *mem = (uint64_t *)Memory::alloc_static(len, true);
|
uint8_t *mem = (uint8_t *)Memory::alloc_static(len, true);
|
||||||
T *failptr = nullptr; //get rid of a warning
|
T *failptr = nullptr; //get rid of a warning
|
||||||
ERR_FAIL_NULL_V(mem, failptr);
|
ERR_FAIL_NULL_V(mem, failptr);
|
||||||
*(mem - 1) = p_elements;
|
|
||||||
|
uint64_t *_elem_count_ptr = _get_element_count_ptr(mem);
|
||||||
|
*(_elem_count_ptr) = p_elements;
|
||||||
|
|
||||||
if constexpr (!std::is_trivially_constructible_v<T>) {
|
if constexpr (!std::is_trivially_constructible_v<T>) {
|
||||||
T *elems = (T *)mem;
|
T *elems = (T *)mem;
|
||||||
|
@ -166,16 +179,18 @@ T *memnew_arr_template(size_t p_elements) {
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
size_t memarr_len(const T *p_class) {
|
size_t memarr_len(const T *p_class) {
|
||||||
uint64_t *ptr = (uint64_t *)p_class;
|
uint8_t *ptr = (uint8_t *)p_class;
|
||||||
return *(ptr - 1);
|
uint64_t *_elem_count_ptr = _get_element_count_ptr(ptr);
|
||||||
|
return *(_elem_count_ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void memdelete_arr(T *p_class) {
|
void memdelete_arr(T *p_class) {
|
||||||
uint64_t *ptr = (uint64_t *)p_class;
|
uint8_t *ptr = (uint8_t *)p_class;
|
||||||
|
|
||||||
if constexpr (!std::is_trivially_destructible_v<T>) {
|
if constexpr (!std::is_trivially_destructible_v<T>) {
|
||||||
uint64_t elem_count = *(ptr - 1);
|
uint64_t *_elem_count_ptr = _get_element_count_ptr(ptr);
|
||||||
|
uint64_t elem_count = *(_elem_count_ptr);
|
||||||
|
|
||||||
for (uint64_t i = 0; i < elem_count; i++) {
|
for (uint64_t i = 0; i < elem_count; i++) {
|
||||||
p_class[i].~T();
|
p_class[i].~T();
|
||||||
|
|
|
@ -46,7 +46,7 @@ class CharString;
|
||||||
template <class T, class V>
|
template <class T, class V>
|
||||||
class VMap;
|
class VMap;
|
||||||
|
|
||||||
SAFE_NUMERIC_TYPE_PUN_GUARANTEES(uint64_t)
|
static_assert(std::is_trivially_destructible_v<std::atomic<uint64_t>>);
|
||||||
|
|
||||||
// Silence a false positive warning (see GH-52119).
|
// Silence a false positive warning (see GH-52119).
|
||||||
#if defined(__GNUC__) && !defined(__clang__)
|
#if defined(__GNUC__) && !defined(__clang__)
|
||||||
|
@ -89,18 +89,39 @@ private:
|
||||||
return ++x;
|
return ++x;
|
||||||
}
|
}
|
||||||
|
|
||||||
static constexpr USize ALLOC_PAD = sizeof(USize) * 2; // For size and atomic refcount.
|
// Alignment: ↓ max_align_t ↓ USize ↓ max_align_t
|
||||||
|
// ┌────────────────────┬──┬─────────────┬──┬───────────...
|
||||||
|
// │ SafeNumeric<USize> │░░│ USize │░░│ T[]
|
||||||
|
// │ ref. count │░░│ data size │░░│ data
|
||||||
|
// └────────────────────┴──┴─────────────┴──┴───────────...
|
||||||
|
// Offset: ↑ REF_COUNT_OFFSET ↑ SIZE_OFFSET ↑ DATA_OFFSET
|
||||||
|
|
||||||
|
static constexpr size_t REF_COUNT_OFFSET = 0;
|
||||||
|
static constexpr size_t SIZE_OFFSET = ((REF_COUNT_OFFSET + sizeof(SafeNumeric<USize>)) % alignof(USize) == 0) ? (REF_COUNT_OFFSET + sizeof(SafeNumeric<USize>)) : ((REF_COUNT_OFFSET + sizeof(SafeNumeric<USize>)) + alignof(USize) - ((REF_COUNT_OFFSET + sizeof(SafeNumeric<USize>)) % alignof(USize)));
|
||||||
|
static constexpr size_t DATA_OFFSET = ((SIZE_OFFSET + sizeof(USize)) % alignof(max_align_t) == 0) ? (SIZE_OFFSET + sizeof(USize)) : ((SIZE_OFFSET + sizeof(USize)) + alignof(max_align_t) - ((SIZE_OFFSET + sizeof(USize)) % alignof(max_align_t)));
|
||||||
|
|
||||||
mutable T *_ptr = nullptr;
|
mutable T *_ptr = nullptr;
|
||||||
|
|
||||||
// internal helpers
|
// internal helpers
|
||||||
|
|
||||||
|
static _FORCE_INLINE_ SafeNumeric<USize> *_get_refcount_ptr(uint8_t *p_ptr) {
|
||||||
|
return (SafeNumeric<USize> *)(p_ptr + REF_COUNT_OFFSET);
|
||||||
|
}
|
||||||
|
|
||||||
|
static _FORCE_INLINE_ USize *_get_size_ptr(uint8_t *p_ptr) {
|
||||||
|
return (USize *)(p_ptr + SIZE_OFFSET);
|
||||||
|
}
|
||||||
|
|
||||||
|
static _FORCE_INLINE_ T *_get_data_ptr(uint8_t *p_ptr) {
|
||||||
|
return (T *)(p_ptr + DATA_OFFSET);
|
||||||
|
}
|
||||||
|
|
||||||
_FORCE_INLINE_ SafeNumeric<USize> *_get_refcount() const {
|
_FORCE_INLINE_ SafeNumeric<USize> *_get_refcount() const {
|
||||||
if (!_ptr) {
|
if (!_ptr) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
return reinterpret_cast<SafeNumeric<USize> *>(_ptr) - 2;
|
return (SafeNumeric<USize> *)((uint8_t *)_ptr - DATA_OFFSET + REF_COUNT_OFFSET);
|
||||||
}
|
}
|
||||||
|
|
||||||
_FORCE_INLINE_ USize *_get_size() const {
|
_FORCE_INLINE_ USize *_get_size() const {
|
||||||
|
@ -108,7 +129,7 @@ private:
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
return reinterpret_cast<USize *>(_ptr) - 1;
|
return (USize *)((uint8_t *)_ptr - DATA_OFFSET + SIZE_OFFSET);
|
||||||
}
|
}
|
||||||
|
|
||||||
_FORCE_INLINE_ USize _get_alloc_size(USize p_elements) const {
|
_FORCE_INLINE_ USize _get_alloc_size(USize p_elements) const {
|
||||||
|
@ -244,7 +265,7 @@ void CowData<T>::_unref(void *p_data) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// free mem
|
// free mem
|
||||||
Memory::free_static(((uint8_t *)p_data) - ALLOC_PAD, false);
|
Memory::free_static(((uint8_t *)p_data) - DATA_OFFSET, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
|
@ -260,26 +281,27 @@ typename CowData<T>::USize CowData<T>::_copy_on_write() {
|
||||||
/* in use by more than me */
|
/* in use by more than me */
|
||||||
USize current_size = *_get_size();
|
USize current_size = *_get_size();
|
||||||
|
|
||||||
USize *mem_new = (USize *)Memory::alloc_static(_get_alloc_size(current_size) + ALLOC_PAD, false);
|
uint8_t *mem_new = (uint8_t *)Memory::alloc_static(_get_alloc_size(current_size) + DATA_OFFSET, false);
|
||||||
mem_new += 2;
|
ERR_FAIL_NULL_V(mem_new, 0);
|
||||||
|
|
||||||
new (mem_new - 2) SafeNumeric<USize>(1); //refcount
|
SafeNumeric<USize> *_refc_ptr = _get_refcount_ptr(mem_new);
|
||||||
*(mem_new - 1) = current_size; //size
|
USize *_size_ptr = _get_size_ptr(mem_new);
|
||||||
|
T *_data_ptr = _get_data_ptr(mem_new);
|
||||||
|
|
||||||
T *_data = (T *)(mem_new);
|
new (_refc_ptr) SafeNumeric<USize>(1); //refcount
|
||||||
|
*(_size_ptr) = current_size; //size
|
||||||
|
|
||||||
// initialize new elements
|
// initialize new elements
|
||||||
if constexpr (std::is_trivially_copyable_v<T>) {
|
if constexpr (std::is_trivially_copyable_v<T>) {
|
||||||
memcpy(mem_new, _ptr, current_size * sizeof(T));
|
memcpy((uint8_t *)_data_ptr, _ptr, current_size * sizeof(T));
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
for (USize i = 0; i < current_size; i++) {
|
for (USize i = 0; i < current_size; i++) {
|
||||||
memnew_placement(&_data[i], T(_ptr[i]));
|
memnew_placement(&_data_ptr[i], T(_ptr[i]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
_unref(_ptr);
|
_unref(_ptr);
|
||||||
_ptr = _data;
|
_ptr = _data_ptr;
|
||||||
|
|
||||||
rc = 1;
|
rc = 1;
|
||||||
}
|
}
|
||||||
|
@ -315,21 +337,28 @@ Error CowData<T>::resize(Size p_size) {
|
||||||
if (alloc_size != current_alloc_size) {
|
if (alloc_size != current_alloc_size) {
|
||||||
if (current_size == 0) {
|
if (current_size == 0) {
|
||||||
// alloc from scratch
|
// alloc from scratch
|
||||||
USize *ptr = (USize *)Memory::alloc_static(alloc_size + ALLOC_PAD, false);
|
uint8_t *mem_new = (uint8_t *)Memory::alloc_static(alloc_size + DATA_OFFSET, false);
|
||||||
ptr += 2;
|
ERR_FAIL_NULL_V(mem_new, ERR_OUT_OF_MEMORY);
|
||||||
ERR_FAIL_NULL_V(ptr, ERR_OUT_OF_MEMORY);
|
|
||||||
*(ptr - 1) = 0; //size, currently none
|
|
||||||
new (ptr - 2) SafeNumeric<USize>(1); //refcount
|
|
||||||
|
|
||||||
_ptr = (T *)ptr;
|
SafeNumeric<USize> *_refc_ptr = _get_refcount_ptr(mem_new);
|
||||||
|
USize *_size_ptr = _get_size_ptr(mem_new);
|
||||||
|
T *_data_ptr = _get_data_ptr(mem_new);
|
||||||
|
|
||||||
|
new (_refc_ptr) SafeNumeric<USize>(1); //refcount
|
||||||
|
*(_size_ptr) = 0; //size, currently none
|
||||||
|
|
||||||
|
_ptr = _data_ptr;
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
USize *_ptrnew = (USize *)Memory::realloc_static(((uint8_t *)_ptr) - ALLOC_PAD, alloc_size + ALLOC_PAD, false);
|
uint8_t *mem_new = (uint8_t *)Memory::realloc_static(((uint8_t *)_ptr) - DATA_OFFSET, alloc_size + DATA_OFFSET, false);
|
||||||
ERR_FAIL_NULL_V(_ptrnew, ERR_OUT_OF_MEMORY);
|
ERR_FAIL_NULL_V(mem_new, ERR_OUT_OF_MEMORY);
|
||||||
_ptrnew += 2;
|
|
||||||
new (_ptrnew - 2) SafeNumeric<USize>(rc); //refcount
|
|
||||||
|
|
||||||
_ptr = (T *)(_ptrnew);
|
SafeNumeric<USize> *_refc_ptr = _get_refcount_ptr(mem_new);
|
||||||
|
T *_data_ptr = _get_data_ptr(mem_new);
|
||||||
|
|
||||||
|
new (_refc_ptr) SafeNumeric<USize>(rc); //refcount
|
||||||
|
|
||||||
|
_ptr = _data_ptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -355,12 +384,15 @@ Error CowData<T>::resize(Size p_size) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (alloc_size != current_alloc_size) {
|
if (alloc_size != current_alloc_size) {
|
||||||
USize *_ptrnew = (USize *)Memory::realloc_static(((uint8_t *)_ptr) - ALLOC_PAD, alloc_size + ALLOC_PAD, false);
|
uint8_t *mem_new = (uint8_t *)Memory::realloc_static(((uint8_t *)_ptr) - DATA_OFFSET, alloc_size + DATA_OFFSET, false);
|
||||||
ERR_FAIL_NULL_V(_ptrnew, ERR_OUT_OF_MEMORY);
|
ERR_FAIL_NULL_V(mem_new, ERR_OUT_OF_MEMORY);
|
||||||
_ptrnew += 2;
|
|
||||||
new (_ptrnew - 2) SafeNumeric<USize>(rc); //refcount
|
|
||||||
|
|
||||||
_ptr = (T *)(_ptrnew);
|
SafeNumeric<USize> *_refc_ptr = _get_refcount_ptr(mem_new);
|
||||||
|
T *_data_ptr = _get_data_ptr(mem_new);
|
||||||
|
|
||||||
|
new (_refc_ptr) SafeNumeric<USize>(rc); //refcount
|
||||||
|
|
||||||
|
_ptr = _data_ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
*_get_size() = p_size;
|
*_get_size() = p_size;
|
||||||
|
|
Loading…
Reference in a new issue