123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630 |
- /**
- * @file llmemory.cpp
- * @brief Very special memory allocation/deallocation stuff here
- *
- * $LicenseInfo:firstyear=2002&license=viewergpl$
- *
- * Copyright (c) 2002-2009, Linden Research, Inc.
- *
- * Second Life Viewer Source Code
- * The source code in this file ("Source Code") is provided by Linden Lab
- * to you under the terms of the GNU General Public License, version 2.0
- * ("GPL"), unless you have obtained a separate licensing agreement
- * ("Other License"), formally executed by you and Linden Lab. Terms of
- * the GPL can be found in doc/GPL-license.txt in this distribution, or
- * online at http://secondlifegrid.net/programs/open_source/licensing/gplv2
- *
- * There are special exceptions to the terms and conditions of the GPL as
- * it is applied to this Source Code. View the full text of the exception
- * in the file doc/FLOSS-exception.txt in this software distribution, or
- * online at
- * http://secondlifegrid.net/programs/open_source/licensing/flossexception
- *
- * By copying, modifying or distributing this software, you acknowledge
- * that you have read and understood your obligations described above,
- * and agree to abide by those obligations.
- *
- * ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS." LINDEN LAB MAKES NO
- * WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY,
- * COMPLETENESS OR PERFORMANCE.
- * $/LicenseInfo$
- */
- #include "linden_common.h"
- #include "llmemory.h"
- #include "llsys.h"
- #if LL_JEMALLOC
- # include "jemalloc/jemalloc.h"
- #elif LL_MIMALLOC && LL_WINDOWS
- # include "mimalloc/mimalloc-new-delete.h"
- #endif
- #if LL_DARWIN
- # include <sys/types.h>
- # include <sys/sysctl.h>
- # include <mach/task.h>
- # include <mach/mach.h>
- # include <mach/mach_init.h>
- # include <mach/mach_host.h>
- # include <unistd.h>
- #elif LL_LINUX
- # include <malloc.h>
- # include <sys/resource.h>
- # include <unistd.h>
- #elif LL_WINDOWS
- # include <malloc.h>
- # include <psapi.h>
- #endif
- void* gReservedFreeSpace = NULL;
- //static
- U32 LLMemory::sMaxPhysicalMemInKB = 0;
- U32 LLMemory::sMaxVirtualMemInKB = 0;
- U32 LLMemory::sAvailPhysicalMemInKB = U32_MAX;
- U32 LLMemory::sAvailVirtualMemInKB = U32_MAX;
- U32 LLMemory::sAllocatedMemInKB = 0;
- U32 LLMemory::sAllocatedPageSizeInKB = 0;
- bool LLMemory::sFailedAllocation = false;
- bool LLMemory::sFailedAllocationOnce = false;
- #if LL_LINUX && !LL_JEMALLOC
- // Stats: number of successful malloc timming.
- static U32 sTrimmed = 0;
- #endif
- #if LL_DEBUG
- void ll_assert_aligned_error()
- {
- llerrs << "Alignment check failed !" << llendl;
- }
- #endif
- //static
- void LLMemory::initClass()
- {
- #if LL_JEMALLOC
- unsigned int oval, nval = 0;
- size_t osz = sizeof(oval);
- size_t nsz = sizeof(nval);
- mallctl("thread.arena", &oval, &osz, &nval, nsz);
- #endif
- if (!gReservedFreeSpace)
- {
- // Reserve some space that we will free() on crash to try and avoid out
- // of memory conditions while dumping the stack trace log... 256Kb
- // should be plenty enough.
- gReservedFreeSpace = malloc(262144);
- }
- }
- //static
- void LLMemory::cleanupClass()
- {
- if (gReservedFreeSpace)
- {
- free(gReservedFreeSpace);
- gReservedFreeSpace = NULL;
- }
- }
- //static
- void LLMemory::allocationFailed(size_t size)
- {
- sFailedAllocation = sFailedAllocationOnce = true;
- if (size > 0)
- {
- llwarns << "Memory allocation failure for size: " << size << llendl;
- }
- }
- //static
- void LLMemory::updateMemoryInfo(bool trim_heap)
- {
- // jemalloc v5.0+ does properly release memory to the system, and v5.2+
- // does redirect new and delete calls to its own allocators, so there is
- // no need for trimming with the system malloc() here. HB
- #if !LL_JEMALLOC
- if (trim_heap)
- {
- // Trim the heap down from freed memory so that we can compute the
- // actual available virtual space. HB
- // *TODO: implement heap trimming for macOS, if at all possible (if
- // not, try and use jemalloc with macOS ?)...
- # if LL_LINUX
- constexpr size_t keep = 100 * 1024 * 1024; // Trim all but 100Mb
- sTrimmed += malloc_trim(keep);
- # elif LL_WINDOWS
- _heapmin();
- # endif
- }
- #endif
- getMaxMemoryKB(sMaxPhysicalMemInKB, sMaxVirtualMemInKB);
- getAvailableMemoryKB(sAvailPhysicalMemInKB, sAvailVirtualMemInKB);
- #if LL_WINDOWS
- HANDLE self = GetCurrentProcess();
- PROCESS_MEMORY_COUNTERS counters;
- if (!GetProcessMemoryInfo(self, &counters, sizeof(counters)))
- {
- llwarns << "GetProcessMemoryInfo failed" << llendl;
- sAllocatedPageSizeInKB = sMaxVirtualMemInKB - sAvailVirtualMemInKB;
- sAllocatedMemInKB = 0;
- }
- else
- {
- sAllocatedPageSizeInKB = (U32)(counters.PagefileUsage / 1024);
- sAllocatedMemInKB = (U32)(counters.WorkingSetSize / 1024);
- }
- #elif LL_DARWIN
- task_vm_info info;
- mach_msg_type_number_t info_count = TASK_VM_INFO_COUNT;
- // MACH_TASK_BASIC_INFO reports the same resident_size, but does not tell
- // us the reusable bytes or phys_footprint.
- if (task_info(mach_task_self(), TASK_VM_INFO,
- reinterpret_cast<task_info_t>(&info),
- &info_count) == KERN_SUCCESS)
- {
- sAllocatedPageSizeInKB = U32(info.resident_size) / 1024;
- sAllocatedMemInKB = U32(info.resident_size - info.reusable) / 1024;
- }
- else
- {
- sAllocatedPageSizeInKB = sMaxVirtualMemInKB - sAvailVirtualMemInKB;
- sAllocatedMemInKB = (U32)(LLMemory::getCurrentRSS() / 1024);
- }
- #else
- sAllocatedPageSizeInKB = sMaxVirtualMemInKB - sAvailVirtualMemInKB;
- sAllocatedMemInKB = (U32)(LLMemory::getCurrentRSS() / 1024);
- #endif
- }
- #if LL_JEMALLOC
- #define JEMALLOC_STATS_STRING_SIZE 262144
- static void jemalloc_write_cb(void* data, const char* s)
- {
- if (data && s)
- {
- std::string* buff = (std::string*)data;
- size_t buff_len = buff->length();
- size_t line_len = strlen(s);
- if (buff_len + line_len >= JEMALLOC_STATS_STRING_SIZE)
- {
- line_len = JEMALLOC_STATS_STRING_SIZE - buff_len - 1;
- }
- if (line_len > 0)
- {
- buff->append(s, line_len);
- }
- }
- }
- #endif
- //static
- void LLMemory::logMemoryInfo()
- {
- updateMemoryInfo();
- #if LL_JEMALLOC
- unsigned int arenas, arena;
- size_t sz = sizeof(arenas);
- mallctl("opt.narenas", &arenas, &sz, NULL, 0);
- U32 opt_arenas = arenas;
- mallctl("arenas.narenas", &arenas, &sz, NULL, 0);
- mallctl("thread.arena", &arena, &sz, NULL, 0);
- llinfos << "jemalloc initialized with " << opt_arenas
- << " arenas, using now " << arenas
- << " arenas, main thread using arena " << arena << "." << llendl;
- bool stats_enabled = false;
- sz = sizeof(stats_enabled);
- mallctl("config.stats", &stats_enabled, &sz, NULL, 0);
- if (stats_enabled)
- {
- std::string malloc_stats_str;
- // IMPORTANT: we cannot reserve memory during jemalloc_write_cb() call
- // by malloc_stats_print(), so we reserve a fixed string buffer.
- malloc_stats_str.reserve(JEMALLOC_STATS_STRING_SIZE);
- malloc_stats_print(jemalloc_write_cb, &malloc_stats_str, NULL);
- llinfos << "jemalloc stats:\n" << malloc_stats_str << llendl;
- }
- #endif
- llinfos << "System memory information: Max physical memory: "
- << sMaxPhysicalMemInKB << "KB - Allocated physical memory: "
- << sAllocatedMemInKB << "KB - Available physical memory: "
- << sAvailPhysicalMemInKB << "KB - Allocated virtual memory: "
- << sAllocatedPageSizeInKB << "KB"
- #if LL_LINUX && !LL_JEMALLOC
- << " - Number of actual glibc malloc() heap trimming occurrences: "
- << sTrimmed
- #endif
- << llendl;
- }
- #if LL_WINDOWS
- U64 LLMemory::getCurrentRSS()
- {
- HANDLE self = GetCurrentProcess();
- PROCESS_MEMORY_COUNTERS counters;
- if (!GetProcessMemoryInfo(self, &counters, sizeof(counters)))
- {
- llwarns_once << "GetProcessMemoryInfo() failed !" << llendl;
- return 0;
- }
- return counters.WorkingSetSize;
- }
- #elif LL_DARWIN
- U64 LLMemory::getCurrentRSS()
- {
- U64 resident_size = 0;
- mach_task_basic_info_data_t basic_info;
- mach_msg_type_number_t basic_info_count = MACH_TASK_BASIC_INFO_COUNT;
- if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO,
- (task_info_t)&basic_info, &basic_info_count) == KERN_SUCCESS)
- {
- resident_size = basic_info.resident_size;
- }
- else
- {
- llwarns_once << "task_info() failed !" << llendl;
- }
- return resident_size;
- }
- #elif LL_LINUX
- U64 LLMemory::getCurrentRSS()
- {
- struct rusage usage;
- if (getrusage(RUSAGE_SELF, &usage))
- {
- llwarns_once << "getrusage() failed !" << llendl;
- return 0;
- }
- return usage.ru_maxrss * 1024;
- }
- #else
- U64 LLMemory::getCurrentRSS()
- {
- return 0;
- }
- #endif
- ///////////////////////////////////////////////////////////////////////////////
- // The following methods used to be part of LLMemoryInfo (in llsys.h/cpp),
- // which I removed to merge here, since they did not even relate with global
- // system info, but instead with the memory consumption of the viewer itself...
- // HB
- //static
- U32 LLMemory::getPhysicalMemoryKB()
- {
- #if LL_WINDOWS
- MEMORYSTATUSEX state;
- state.dwLength = sizeof(state);
- GlobalMemoryStatusEx(&state);
- U32 amount = state.ullTotalPhys >> 10;
- // *HACK: for some reason, the reported amount of memory is always wrong.
- // The original adjustment assumes it is always off by one meg, however
- // errors of as much as 2520 KB have been observed in the value returned
- // from the GetMemoryStatusEx function. Here we keep the original
- // adjustment from llfoaterabout.cpp until this can be fixed somehow. HB
- amount += 1024;
- return amount;
- #elif LL_DARWIN
- // This might work on Linux as well. Someone check...
- uint64_t phys = 0;
- int mib[2] = { CTL_HW, HW_MEMSIZE };
- size_t len = sizeof(phys);
- sysctl(mib, 2, &phys, &len, NULL, 0);
- return (U32)(phys >> 10);
- #elif LL_LINUX
- U64 phys = (U64)(sysconf(_SC_PAGESIZE)) * (U64)(sysconf(_SC_PHYS_PAGES));
- return (U32)(phys >> 10);
- #else
- return 0;
- #endif
- }
- //static
- void LLMemory::getMaxMemoryKB(U32& max_physical_mem_kb,
- U32& max_virtual_mem_kb)
- {
- static U32 saved_max_physical_mem_kb = 0;
- static U32 saved_max_virtual_mem_kb = 0;
- if (saved_max_virtual_mem_kb)
- {
- max_physical_mem_kb = saved_max_physical_mem_kb;
- max_virtual_mem_kb = saved_max_virtual_mem_kb;
- return;
- }
- U32 addressable = U32_MAX; // No limit...
- #if LL_WINDOWS
- MEMORYSTATUSEX state;
- state.dwLength = sizeof(state);
- GlobalMemoryStatusEx(&state);
- max_physical_mem_kb = (U32)(state.ullAvailPhys / 1024);
- U32 total_virtual_memory = (U32)(state.ullTotalVirtual / 1024);
- max_virtual_mem_kb = total_virtual_memory;
- #elif LL_LINUX || LL_DARWIN
- max_physical_mem_kb = getPhysicalMemoryKB();
- max_virtual_mem_kb = addressable;
- #else
- max_physical_mem_kb = max_virtual_mem_kb = addressable;
- #endif
- max_virtual_mem_kb = llmin(max_virtual_mem_kb, addressable);
- #if LL_WINDOWS
- LL_DEBUGS("Memory") << "Total physical memory: "
- << max_physical_mem_kb / 1024
- << "Mb - Total available virtual memory: "
- << total_virtual_memory / 1024
- << "Mb - Retained max virtual memory: "
- << max_virtual_mem_kb / 1024 << "Mb" << LL_ENDL;
- #else
- LL_DEBUGS("Memory") << "Total physical memory: "
- << max_physical_mem_kb / 1024
- << "Mb - Retained max virtual memory: "
- << max_virtual_mem_kb / 1024 << "Mb" << LL_ENDL;
- #endif
- saved_max_physical_mem_kb = max_physical_mem_kb;
- saved_max_virtual_mem_kb = max_virtual_mem_kb;
- }
- #if LL_LINUX || LL_DARWIN
- static U32 get_process_virtual_size_kb()
- {
- U32 virtual_size = 0;
- # if LL_LINUX
- LLFILE* status_filep = LLFile::open("/proc/self/status", "rb");
- if (status_filep)
- {
- constexpr S32 STATUS_SIZE = 8192;
- char buff[STATUS_SIZE];
- size_t nbytes = fread(buff, 1, STATUS_SIZE - 1, status_filep);
- buff[nbytes] = '\0';
- // All these guys return numbers in KB
- U32 temp = 0;
- char* memp = strstr(buff, "VmRSS:");
- if (memp)
- {
- sscanf(memp, "%*s %u", &temp);
- virtual_size = temp;
- }
- memp = strstr(buff, "VmStk:");
- if (memp)
- {
- sscanf(memp, "%*s %u", &temp);
- virtual_size += temp;
- }
- memp = strstr(buff, "VmExe:");
- if (memp)
- {
- sscanf(memp, "%*s %u", &temp);
- virtual_size += temp;
- }
- memp = strstr(buff, "VmLib:");
- if (memp)
- {
- sscanf(memp, "%*s %u", &temp);
- virtual_size += temp;
- }
- memp = strstr(buff, "VmPTE:");
- if (memp)
- {
- sscanf(memp, "%*s %u", &temp);
- virtual_size += temp;
- }
- LLFile::close(status_filep);
- }
- # elif LL_DARWIN
- struct task_basic_info t_info;
- mach_msg_type_number_t t_info_count = TASK_BASIC_INFO_COUNT;
- if (KERN_SUCCESS != task_info(mach_task_self(), TASK_BASIC_INFO,
- (task_info_t)&t_info, &t_info_count))
- {
- return 0;
- }
- virtual_size = t_info.virtual_size / 1024;
- # endif
- return virtual_size;
- }
- #endif // LL_LINUX || LL_DARWIN
- //static
- void LLMemory::getAvailableMemoryKB(U32& avail_physical_mem_kb,
- U32& avail_virtual_mem_kb)
- {
- U32 max_physical_mem_kb, max_virtual_mem_kb;
- getMaxMemoryKB(max_physical_mem_kb, max_virtual_mem_kb);
- #if LL_DARWIN
- // Total installed and available physical memory are properties of the
- // host, not just our process.
- vm_statistics64_data_t vmstat;
- mach_msg_type_number_t count = HOST_VM_INFO64_COUNT;
- mach_port_t host = mach_host_self();
- vm_size_t page_size;
- host_page_size(host, &page_size);
- kern_return_t result =
- host_statistics64(host, HOST_VM_INFO64,
- reinterpret_cast<host_info_t>(&vmstat), &count);
- if (result == KERN_SUCCESS)
- {
- avail_physical_mem_kb = vmstat.free_count * page_size / 1024;
- }
- else
- #endif
- {
- avail_physical_mem_kb = max_physical_mem_kb;
- }
- #if LL_WINDOWS
- MEMORYSTATUSEX state;
- state.dwLength = sizeof(state);
- GlobalMemoryStatusEx(&state);
- avail_virtual_mem_kb = (U32)(state.ullAvailVirtual / 1024);
- LL_DEBUGS("Memory") << "Memory check: reported available virtual space: "
- << avail_virtual_mem_kb / 1024 << "Mb" << LL_ENDL;
- #else
- U32 virtual_size_kb = get_process_virtual_size_kb();
- if (virtual_size_kb < max_virtual_mem_kb)
- {
- avail_virtual_mem_kb = max_virtual_mem_kb - virtual_size_kb;
- }
- else
- {
- avail_virtual_mem_kb = 0;
- }
- LL_DEBUGS("Memory") << "Memory check: Retained available virtual space: "
- << avail_virtual_mem_kb / 1024 << "Mb" << LL_ENDL;
- #endif
- }
- //static
- std::string LLMemory::getInfo()
- {
- std::ostringstream s;
- #if LL_WINDOWS
- MEMORYSTATUSEX state;
- state.dwLength = sizeof(state);
- GlobalMemoryStatusEx(&state);
- s << "Percent Memory use: " << (U32)state.dwMemoryLoad << '%' << std::endl;
- s << "Total Physical KB: " << (U32)(state.ullTotalPhys / 1024) << std::endl;
- s << "Avail Physical KB: " << (U32)(state.ullAvailPhys / 1024) << std::endl;
- s << "Total page KB: " << (U32)(state.ullTotalPageFile / 1024) << std::endl;
- s << "Avail page KB: " << (U32)(state.ullAvailPageFile / 1024) << std::endl;
- s << "Total Virtual KB: " << (U32)(state.ullTotalVirtual / 1024) << std::endl;
- s << "Avail Virtual KB: " << (U32)(state.ullAvailVirtual / 1024) << std::endl;
- #elif LL_DARWIN
- uint64_t phys = 0;
- size_t len = sizeof(phys);
- if (sysctlbyname("hw.memsize", &phys, &len, NULL, 0) == 0)
- {
- s << "Total Physical KB: " << phys / 1024 << std::endl;
- }
- else
- {
- s << "Unable to collect memory information";
- }
- #else
- // *NOTE: This works on Linux. What will it do on other systems ?
- static const char MEMINFO_FILE[] = "/proc/meminfo";
- LLFILE* meminfo = LLFile::open(MEMINFO_FILE, "rb");
- if (meminfo)
- {
- char line[MAX_STRING];
- memset(line, 0, MAX_STRING);
- while (fgets(line, MAX_STRING, meminfo))
- {
- line[strlen(line) - 1] = ' ';
- s << line;
- }
- LLFile::close(meminfo);
- }
- else
- {
- s << "Unable to collect memory information";
- }
- #endif
- return s.str();
- }
- // End of former LLMemoryInfo methods
- ///////////////////////////////////////////////////////////////////////////////
- template <typename T> T* LL_NEXT_ALIGNED_ADDRESS_64(T* address)
- {
- return reinterpret_cast<T*>(
- (reinterpret_cast<uintptr_t>(address) + 0x3F) & ~0x3F);
- }
- // Used to be force-inlined in llmemory.h, but does not really deserve it. HB
- void ll_memcpy_nonaliased_aligned_16(char* __restrict dst,
- const char* __restrict src, size_t bytes)
- {
- llassert(src != NULL && dst != NULL);
- llassert(bytes > 0 && bytes % sizeof(F32) == 0 && bytes % 16 == 0);
- llassert(src < dst ? src + bytes <= dst : dst + bytes <= src);
- ll_assert_aligned(src, 16);
- ll_assert_aligned(dst, 16);
- char* end = dst + bytes;
- if (bytes > 64)
- {
- // Find start of 64 bytes-aligned area within block
- void* begin_64 = LL_NEXT_ALIGNED_ADDRESS_64(dst);
- // At least 64 bytes before the end of the destination, switch to 16
- // bytes copies
- void* end_64 = end - 64;
- // Prefetch the head of the 64 bytes area now
- _mm_prefetch((char*)begin_64, _MM_HINT_NTA);
- _mm_prefetch((char*)begin_64 + 64, _MM_HINT_NTA);
- _mm_prefetch((char*)begin_64 + 128, _MM_HINT_NTA);
- _mm_prefetch((char*)begin_64 + 192, _MM_HINT_NTA);
- // Copy 16 bytes chunks until we're 64 bytes aligned
- while (dst < begin_64)
- {
- _mm_store_ps((F32*)dst, _mm_load_ps((F32*)src));
- dst += 16;
- src += 16;
- }
- // Copy 64 bytes chunks up to your tail
- //
- // Might be good to shmoo the 512b prefetch offset (characterize
- // performance for various values)
- while (dst < end_64)
- {
- _mm_prefetch((char*)src + 512, _MM_HINT_NTA);
- _mm_prefetch((char*)dst + 512, _MM_HINT_NTA);
- _mm_store_ps((F32*)dst, _mm_load_ps((F32*)src));
- _mm_store_ps((F32*)(dst + 16), _mm_load_ps((F32*)(src + 16)));
- _mm_store_ps((F32*)(dst + 32), _mm_load_ps((F32*)(src + 32)));
- _mm_store_ps((F32*)(dst + 48), _mm_load_ps((F32*)(src + 48)));
- dst += 64;
- src += 64;
- }
- }
- // Copy remainder 16 bytes tail chunks (or ALL 16 bytes chunks for
- // sub-64 bytes copies)
- while (dst < end)
- {
- _mm_store_ps((F32*)dst, _mm_load_ps((F32*)src));
- dst += 16;
- src += 16;
- }
- }
|