/** * @file llmemory.cpp * @brief Very special memory allocation/deallocation stuff here * * $LicenseInfo:firstyear=2002&license=viewergpl$ * * Copyright (c) 2002-2009, Linden Research, Inc. * * Second Life Viewer Source Code * The source code in this file ("Source Code") is provided by Linden Lab * to you under the terms of the GNU General Public License, version 2.0 * ("GPL"), unless you have obtained a separate licensing agreement * ("Other License"), formally executed by you and Linden Lab. Terms of * the GPL can be found in doc/GPL-license.txt in this distribution, or * online at http://secondlifegrid.net/programs/open_source/licensing/gplv2 * * There are special exceptions to the terms and conditions of the GPL as * it is applied to this Source Code. View the full text of the exception * in the file doc/FLOSS-exception.txt in this software distribution, or * online at * http://secondlifegrid.net/programs/open_source/licensing/flossexception * * By copying, modifying or distributing this software, you acknowledge * that you have read and understood your obligations described above, * and agree to abide by those obligations. * * ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS." LINDEN LAB MAKES NO * WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY, * COMPLETENESS OR PERFORMANCE. * $/LicenseInfo$ */ #include "linden_common.h" #include "llmemory.h" #include "llsys.h" #if LL_JEMALLOC # include "jemalloc/jemalloc.h" #elif LL_MIMALLOC && LL_WINDOWS # include "mimalloc/mimalloc-new-delete.h" #endif #if LL_DARWIN # include # include # include # include # include # include # include #elif LL_LINUX # include # include # include #elif LL_WINDOWS # include # include #endif void* gReservedFreeSpace = NULL; //static U32 LLMemory::sMaxPhysicalMemInKB = 0; U32 LLMemory::sMaxVirtualMemInKB = 0; U32 LLMemory::sAvailPhysicalMemInKB = U32_MAX; U32 LLMemory::sAvailVirtualMemInKB = U32_MAX; U32 LLMemory::sAllocatedMemInKB = 0; U32 LLMemory::sAllocatedPageSizeInKB = 0; bool LLMemory::sFailedAllocation = false; bool LLMemory::sFailedAllocationOnce = false; #if LL_LINUX && !LL_JEMALLOC // Stats: number of successful malloc timming. static U32 sTrimmed = 0; #endif #if LL_DEBUG void ll_assert_aligned_error() { llerrs << "Alignment check failed !" << llendl; } #endif //static void LLMemory::initClass() { #if LL_JEMALLOC unsigned int oval, nval = 0; size_t osz = sizeof(oval); size_t nsz = sizeof(nval); mallctl("thread.arena", &oval, &osz, &nval, nsz); #endif if (!gReservedFreeSpace) { // Reserve some space that we will free() on crash to try and avoid out // of memory conditions while dumping the stack trace log... 256Kb // should be plenty enough. gReservedFreeSpace = malloc(262144); } } //static void LLMemory::cleanupClass() { if (gReservedFreeSpace) { free(gReservedFreeSpace); gReservedFreeSpace = NULL; } } //static void LLMemory::allocationFailed(size_t size) { sFailedAllocation = sFailedAllocationOnce = true; if (size > 0) { llwarns << "Memory allocation failure for size: " << size << llendl; } } //static void LLMemory::updateMemoryInfo(bool trim_heap) { // jemalloc v5.0+ does properly release memory to the system, and v5.2+ // does redirect new and delete calls to its own allocators, so there is // no need for trimming with the system malloc() here. HB #if !LL_JEMALLOC if (trim_heap) { // Trim the heap down from freed memory so that we can compute the // actual available virtual space. HB // *TODO: implement heap trimming for macOS, if at all possible (if // not, try and use jemalloc with macOS ?)... # if LL_LINUX constexpr size_t keep = 100 * 1024 * 1024; // Trim all but 100Mb sTrimmed += malloc_trim(keep); # elif LL_WINDOWS _heapmin(); # endif } #endif getMaxMemoryKB(sMaxPhysicalMemInKB, sMaxVirtualMemInKB); getAvailableMemoryKB(sAvailPhysicalMemInKB, sAvailVirtualMemInKB); #if LL_WINDOWS HANDLE self = GetCurrentProcess(); PROCESS_MEMORY_COUNTERS counters; if (!GetProcessMemoryInfo(self, &counters, sizeof(counters))) { llwarns << "GetProcessMemoryInfo failed" << llendl; sAllocatedPageSizeInKB = sMaxVirtualMemInKB - sAvailVirtualMemInKB; sAllocatedMemInKB = 0; } else { sAllocatedPageSizeInKB = (U32)(counters.PagefileUsage / 1024); sAllocatedMemInKB = (U32)(counters.WorkingSetSize / 1024); } #elif LL_DARWIN task_vm_info info; mach_msg_type_number_t info_count = TASK_VM_INFO_COUNT; // MACH_TASK_BASIC_INFO reports the same resident_size, but does not tell // us the reusable bytes or phys_footprint. if (task_info(mach_task_self(), TASK_VM_INFO, reinterpret_cast(&info), &info_count) == KERN_SUCCESS) { sAllocatedPageSizeInKB = U32(info.resident_size) / 1024; sAllocatedMemInKB = U32(info.resident_size - info.reusable) / 1024; } else { sAllocatedPageSizeInKB = sMaxVirtualMemInKB - sAvailVirtualMemInKB; sAllocatedMemInKB = (U32)(LLMemory::getCurrentRSS() / 1024); } #else sAllocatedPageSizeInKB = sMaxVirtualMemInKB - sAvailVirtualMemInKB; sAllocatedMemInKB = (U32)(LLMemory::getCurrentRSS() / 1024); #endif } #if LL_JEMALLOC #define JEMALLOC_STATS_STRING_SIZE 262144 static void jemalloc_write_cb(void* data, const char* s) { if (data && s) { std::string* buff = (std::string*)data; size_t buff_len = buff->length(); size_t line_len = strlen(s); if (buff_len + line_len >= JEMALLOC_STATS_STRING_SIZE) { line_len = JEMALLOC_STATS_STRING_SIZE - buff_len - 1; } if (line_len > 0) { buff->append(s, line_len); } } } #endif //static void LLMemory::logMemoryInfo() { updateMemoryInfo(); #if LL_JEMALLOC unsigned int arenas, arena; size_t sz = sizeof(arenas); mallctl("opt.narenas", &arenas, &sz, NULL, 0); U32 opt_arenas = arenas; mallctl("arenas.narenas", &arenas, &sz, NULL, 0); mallctl("thread.arena", &arena, &sz, NULL, 0); llinfos << "jemalloc initialized with " << opt_arenas << " arenas, using now " << arenas << " arenas, main thread using arena " << arena << "." << llendl; bool stats_enabled = false; sz = sizeof(stats_enabled); mallctl("config.stats", &stats_enabled, &sz, NULL, 0); if (stats_enabled) { std::string malloc_stats_str; // IMPORTANT: we cannot reserve memory during jemalloc_write_cb() call // by malloc_stats_print(), so we reserve a fixed string buffer. malloc_stats_str.reserve(JEMALLOC_STATS_STRING_SIZE); malloc_stats_print(jemalloc_write_cb, &malloc_stats_str, NULL); llinfos << "jemalloc stats:\n" << malloc_stats_str << llendl; } #endif llinfos << "System memory information: Max physical memory: " << sMaxPhysicalMemInKB << "KB - Allocated physical memory: " << sAllocatedMemInKB << "KB - Available physical memory: " << sAvailPhysicalMemInKB << "KB - Allocated virtual memory: " << sAllocatedPageSizeInKB << "KB" #if LL_LINUX && !LL_JEMALLOC << " - Number of actual glibc malloc() heap trimming occurrences: " << sTrimmed #endif << llendl; } #if LL_WINDOWS U64 LLMemory::getCurrentRSS() { HANDLE self = GetCurrentProcess(); PROCESS_MEMORY_COUNTERS counters; if (!GetProcessMemoryInfo(self, &counters, sizeof(counters))) { llwarns_once << "GetProcessMemoryInfo() failed !" << llendl; return 0; } return counters.WorkingSetSize; } #elif LL_DARWIN U64 LLMemory::getCurrentRSS() { U64 resident_size = 0; mach_task_basic_info_data_t basic_info; mach_msg_type_number_t basic_info_count = MACH_TASK_BASIC_INFO_COUNT; if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&basic_info, &basic_info_count) == KERN_SUCCESS) { resident_size = basic_info.resident_size; } else { llwarns_once << "task_info() failed !" << llendl; } return resident_size; } #elif LL_LINUX U64 LLMemory::getCurrentRSS() { struct rusage usage; if (getrusage(RUSAGE_SELF, &usage)) { llwarns_once << "getrusage() failed !" << llendl; return 0; } return usage.ru_maxrss * 1024; } #else U64 LLMemory::getCurrentRSS() { return 0; } #endif /////////////////////////////////////////////////////////////////////////////// // The following methods used to be part of LLMemoryInfo (in llsys.h/cpp), // which I removed to merge here, since they did not even relate with global // system info, but instead with the memory consumption of the viewer itself... // HB //static U32 LLMemory::getPhysicalMemoryKB() { #if LL_WINDOWS MEMORYSTATUSEX state; state.dwLength = sizeof(state); GlobalMemoryStatusEx(&state); U32 amount = state.ullTotalPhys >> 10; // *HACK: for some reason, the reported amount of memory is always wrong. // The original adjustment assumes it is always off by one meg, however // errors of as much as 2520 KB have been observed in the value returned // from the GetMemoryStatusEx function. Here we keep the original // adjustment from llfoaterabout.cpp until this can be fixed somehow. HB amount += 1024; return amount; #elif LL_DARWIN // This might work on Linux as well. Someone check... uint64_t phys = 0; int mib[2] = { CTL_HW, HW_MEMSIZE }; size_t len = sizeof(phys); sysctl(mib, 2, &phys, &len, NULL, 0); return (U32)(phys >> 10); #elif LL_LINUX U64 phys = (U64)(sysconf(_SC_PAGESIZE)) * (U64)(sysconf(_SC_PHYS_PAGES)); return (U32)(phys >> 10); #else return 0; #endif } //static void LLMemory::getMaxMemoryKB(U32& max_physical_mem_kb, U32& max_virtual_mem_kb) { static U32 saved_max_physical_mem_kb = 0; static U32 saved_max_virtual_mem_kb = 0; if (saved_max_virtual_mem_kb) { max_physical_mem_kb = saved_max_physical_mem_kb; max_virtual_mem_kb = saved_max_virtual_mem_kb; return; } U32 addressable = U32_MAX; // No limit... #if LL_WINDOWS MEMORYSTATUSEX state; state.dwLength = sizeof(state); GlobalMemoryStatusEx(&state); max_physical_mem_kb = (U32)(state.ullAvailPhys / 1024); U32 total_virtual_memory = (U32)(state.ullTotalVirtual / 1024); max_virtual_mem_kb = total_virtual_memory; #elif LL_LINUX || LL_DARWIN max_physical_mem_kb = getPhysicalMemoryKB(); max_virtual_mem_kb = addressable; #else max_physical_mem_kb = max_virtual_mem_kb = addressable; #endif max_virtual_mem_kb = llmin(max_virtual_mem_kb, addressable); #if LL_WINDOWS LL_DEBUGS("Memory") << "Total physical memory: " << max_physical_mem_kb / 1024 << "Mb - Total available virtual memory: " << total_virtual_memory / 1024 << "Mb - Retained max virtual memory: " << max_virtual_mem_kb / 1024 << "Mb" << LL_ENDL; #else LL_DEBUGS("Memory") << "Total physical memory: " << max_physical_mem_kb / 1024 << "Mb - Retained max virtual memory: " << max_virtual_mem_kb / 1024 << "Mb" << LL_ENDL; #endif saved_max_physical_mem_kb = max_physical_mem_kb; saved_max_virtual_mem_kb = max_virtual_mem_kb; } #if LL_LINUX || LL_DARWIN static U32 get_process_virtual_size_kb() { U32 virtual_size = 0; # if LL_LINUX LLFILE* status_filep = LLFile::open("/proc/self/status", "rb"); if (status_filep) { constexpr S32 STATUS_SIZE = 8192; char buff[STATUS_SIZE]; size_t nbytes = fread(buff, 1, STATUS_SIZE - 1, status_filep); buff[nbytes] = '\0'; // All these guys return numbers in KB U32 temp = 0; char* memp = strstr(buff, "VmRSS:"); if (memp) { sscanf(memp, "%*s %u", &temp); virtual_size = temp; } memp = strstr(buff, "VmStk:"); if (memp) { sscanf(memp, "%*s %u", &temp); virtual_size += temp; } memp = strstr(buff, "VmExe:"); if (memp) { sscanf(memp, "%*s %u", &temp); virtual_size += temp; } memp = strstr(buff, "VmLib:"); if (memp) { sscanf(memp, "%*s %u", &temp); virtual_size += temp; } memp = strstr(buff, "VmPTE:"); if (memp) { sscanf(memp, "%*s %u", &temp); virtual_size += temp; } LLFile::close(status_filep); } # elif LL_DARWIN struct task_basic_info t_info; mach_msg_type_number_t t_info_count = TASK_BASIC_INFO_COUNT; if (KERN_SUCCESS != task_info(mach_task_self(), TASK_BASIC_INFO, (task_info_t)&t_info, &t_info_count)) { return 0; } virtual_size = t_info.virtual_size / 1024; # endif return virtual_size; } #endif // LL_LINUX || LL_DARWIN //static void LLMemory::getAvailableMemoryKB(U32& avail_physical_mem_kb, U32& avail_virtual_mem_kb) { U32 max_physical_mem_kb, max_virtual_mem_kb; getMaxMemoryKB(max_physical_mem_kb, max_virtual_mem_kb); #if LL_DARWIN // Total installed and available physical memory are properties of the // host, not just our process. vm_statistics64_data_t vmstat; mach_msg_type_number_t count = HOST_VM_INFO64_COUNT; mach_port_t host = mach_host_self(); vm_size_t page_size; host_page_size(host, &page_size); kern_return_t result = host_statistics64(host, HOST_VM_INFO64, reinterpret_cast(&vmstat), &count); if (result == KERN_SUCCESS) { avail_physical_mem_kb = vmstat.free_count * page_size / 1024; } else #endif { avail_physical_mem_kb = max_physical_mem_kb; } #if LL_WINDOWS MEMORYSTATUSEX state; state.dwLength = sizeof(state); GlobalMemoryStatusEx(&state); avail_virtual_mem_kb = (U32)(state.ullAvailVirtual / 1024); LL_DEBUGS("Memory") << "Memory check: reported available virtual space: " << avail_virtual_mem_kb / 1024 << "Mb" << LL_ENDL; #else U32 virtual_size_kb = get_process_virtual_size_kb(); if (virtual_size_kb < max_virtual_mem_kb) { avail_virtual_mem_kb = max_virtual_mem_kb - virtual_size_kb; } else { avail_virtual_mem_kb = 0; } LL_DEBUGS("Memory") << "Memory check: Retained available virtual space: " << avail_virtual_mem_kb / 1024 << "Mb" << LL_ENDL; #endif } //static std::string LLMemory::getInfo() { std::ostringstream s; #if LL_WINDOWS MEMORYSTATUSEX state; state.dwLength = sizeof(state); GlobalMemoryStatusEx(&state); s << "Percent Memory use: " << (U32)state.dwMemoryLoad << '%' << std::endl; s << "Total Physical KB: " << (U32)(state.ullTotalPhys / 1024) << std::endl; s << "Avail Physical KB: " << (U32)(state.ullAvailPhys / 1024) << std::endl; s << "Total page KB: " << (U32)(state.ullTotalPageFile / 1024) << std::endl; s << "Avail page KB: " << (U32)(state.ullAvailPageFile / 1024) << std::endl; s << "Total Virtual KB: " << (U32)(state.ullTotalVirtual / 1024) << std::endl; s << "Avail Virtual KB: " << (U32)(state.ullAvailVirtual / 1024) << std::endl; #elif LL_DARWIN uint64_t phys = 0; size_t len = sizeof(phys); if (sysctlbyname("hw.memsize", &phys, &len, NULL, 0) == 0) { s << "Total Physical KB: " << phys / 1024 << std::endl; } else { s << "Unable to collect memory information"; } #else // *NOTE: This works on Linux. What will it do on other systems ? static const char MEMINFO_FILE[] = "/proc/meminfo"; LLFILE* meminfo = LLFile::open(MEMINFO_FILE, "rb"); if (meminfo) { char line[MAX_STRING]; memset(line, 0, MAX_STRING); while (fgets(line, MAX_STRING, meminfo)) { line[strlen(line) - 1] = ' '; s << line; } LLFile::close(meminfo); } else { s << "Unable to collect memory information"; } #endif return s.str(); } // End of former LLMemoryInfo methods /////////////////////////////////////////////////////////////////////////////// template T* LL_NEXT_ALIGNED_ADDRESS_64(T* address) { return reinterpret_cast( (reinterpret_cast(address) + 0x3F) & ~0x3F); } // Used to be force-inlined in llmemory.h, but does not really deserve it. HB void ll_memcpy_nonaliased_aligned_16(char* __restrict dst, const char* __restrict src, size_t bytes) { llassert(src != NULL && dst != NULL); llassert(bytes > 0 && bytes % sizeof(F32) == 0 && bytes % 16 == 0); llassert(src < dst ? src + bytes <= dst : dst + bytes <= src); ll_assert_aligned(src, 16); ll_assert_aligned(dst, 16); char* end = dst + bytes; if (bytes > 64) { // Find start of 64 bytes-aligned area within block void* begin_64 = LL_NEXT_ALIGNED_ADDRESS_64(dst); // At least 64 bytes before the end of the destination, switch to 16 // bytes copies void* end_64 = end - 64; // Prefetch the head of the 64 bytes area now _mm_prefetch((char*)begin_64, _MM_HINT_NTA); _mm_prefetch((char*)begin_64 + 64, _MM_HINT_NTA); _mm_prefetch((char*)begin_64 + 128, _MM_HINT_NTA); _mm_prefetch((char*)begin_64 + 192, _MM_HINT_NTA); // Copy 16 bytes chunks until we're 64 bytes aligned while (dst < begin_64) { _mm_store_ps((F32*)dst, _mm_load_ps((F32*)src)); dst += 16; src += 16; } // Copy 64 bytes chunks up to your tail // // Might be good to shmoo the 512b prefetch offset (characterize // performance for various values) while (dst < end_64) { _mm_prefetch((char*)src + 512, _MM_HINT_NTA); _mm_prefetch((char*)dst + 512, _MM_HINT_NTA); _mm_store_ps((F32*)dst, _mm_load_ps((F32*)src)); _mm_store_ps((F32*)(dst + 16), _mm_load_ps((F32*)(src + 16))); _mm_store_ps((F32*)(dst + 32), _mm_load_ps((F32*)(src + 32))); _mm_store_ps((F32*)(dst + 48), _mm_load_ps((F32*)(src + 48))); dst += 64; src += 64; } } // Copy remainder 16 bytes tail chunks (or ALL 16 bytes chunks for // sub-64 bytes copies) while (dst < end) { _mm_store_ps((F32*)dst, _mm_load_ps((F32*)src)); dst += 16; src += 16; } }