From d28979939306eff96aa96c590a39740c550d13f3 Mon Sep 17 00:00:00 2001 From: sys64738 Date: Mon, 19 Jul 2021 03:17:03 +0200 Subject: [PATCH] dynamic memory allocation so the SUMP mode can use all available memory w/o limiting other modes --- CMakeLists.txt | 3 ++ bsp/rp2040/linkdefs.h | 9 ++++ bsp/rp2040/m_sump/sump_hw.h | 2 +- src/alloc.c | 86 +++++++++++++++++++++++++++++++++++++ src/alloc.h | 23 ++++++++++ src/m_sump/cdc_sump.c | 45 +++++++++++-------- src/modeset.c | 3 ++ src/no_malloc.c | 45 +++++++++++++++++++ 8 files changed, 197 insertions(+), 19 deletions(-) create mode 100644 bsp/rp2040/linkdefs.h create mode 100644 src/alloc.c create mode 100644 src/alloc.h create mode 100644 src/no_malloc.c diff --git a/CMakeLists.txt b/CMakeLists.txt index c909e28..4ad2d0e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -55,6 +55,7 @@ else() endif() target_sources(${PROJECT} PUBLIC + ${CMAKE_CURRENT_SOURCE_DIR}/src/no_malloc.c ${CMAKE_CURRENT_SOURCE_DIR}/libco/libco.S ${CMAKE_CURRENT_SOURCE_DIR}/CMSIS_5/CMSIS/DAP/Firmware/Source/DAP.c ${CMAKE_CURRENT_SOURCE_DIR}/CMSIS_5/CMSIS/DAP/Firmware/Source/JTAG_DP.c @@ -63,6 +64,7 @@ target_sources(${PROJECT} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/CMSIS_5/CMSIS/DAP/Firmware/Source/SW_DP.c ${CMAKE_CURRENT_SOURCE_DIR}/bsp/${FAMILY}/unique.c ${CMAKE_CURRENT_SOURCE_DIR}/src/main.c + ${CMAKE_CURRENT_SOURCE_DIR}/src/alloc.c ${CMAKE_CURRENT_SOURCE_DIR}/src/modeset.c ${CMAKE_CURRENT_SOURCE_DIR}/src/thread.c ${CMAKE_CURRENT_SOURCE_DIR}/src/tusb_plt.S @@ -101,6 +103,7 @@ add_custom_target(fix_db ALL WORKING_DIRECTORY ${OUTPUT_DIR} COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/scripts/fix_clang_db.py") if(FAMILY STREQUAL "rp2040") + # NOTE: do NOT enable pico_runtime here, as it pulls in malloc! target_link_libraries(${PROJECT} pico_stdlib pico_unique_id hardware_spi hardware_i2c hardware_adc hardware_pio hardware_dma hardware_pwm pico_fix_rp2040_usb_device_enumeration diff --git a/bsp/rp2040/linkdefs.h b/bsp/rp2040/linkdefs.h new file mode 100644 index 0000000..3564b5e --- /dev/null +++ b/bsp/rp2040/linkdefs.h @@ -0,0 +1,9 @@ + +#ifndef BSP_LINKDEFS_H_ +#define BSP_LINKDEFS_H_ + +#define BSP_HEAP_START_SYM __end__ +#define BSP_HEAP_END_SYM __StackLimit /* __HeapLimit? */ + +#endif + diff --git a/bsp/rp2040/m_sump/sump_hw.h b/bsp/rp2040/m_sump/sump_hw.h index 3904d3c..d683dd8 100644 --- a/bsp/rp2040/m_sump/sump_hw.h +++ b/bsp/rp2040/m_sump/sump_hw.h @@ -10,7 +10,7 @@ #define SAMPLING_BYTES ((SAMPLING_BITS+7)/8) #if PICO_NO_FLASH -#define SUMP_MEMORY_SIZE 102400 // 100kB +#define SUMP_MEMORY_SIZE 151552/*102400*/ // 150kB #else #define SUMP_MEMORY_SIZE 204800 // 200kB #endif diff --git a/src/alloc.c b/src/alloc.c new file mode 100644 index 0000000..88ded37 --- /dev/null +++ b/src/alloc.c @@ -0,0 +1,86 @@ +// vim: set et: + +#include "alloc.h" + +#include +#include +#include + +#include "linkdefs.h" + +extern size_t BSP_HEAP_START_SYM; +extern size_t BSP_HEAP_END_SYM; + +static size_t alloc_pos = 0; + +size_t m_mem_available(void) { + return (size_t)&BSP_HEAP_END_SYM - (size_t)&BSP_HEAP_START_SYM - alloc_pos; +} +void m_alloc_clear(void) { alloc_pos = 0; } + +#ifndef likely +#define likely(x) __builtin_expect(x, 1) +#endif + +static size_t get_aligned(size_t startpos, size_t align) { + if (likely(!(align & (align - 1)))) { // if align is a power of two + // use much faster bitops + if (startpos & (align - 1)) { startpos += align - (startpos & (align - 1)); } + } else if (startpos % align) { + startpos += align - (startpos % align); + } + + return startpos; +} + +void* m_alloc(size_t size, size_t align) { + size_t startpos = (size_t)&BSP_HEAP_START_SYM + alloc_pos; + startpos = get_aligned(startpos, align); + + if (startpos + size > (size_t)&BSP_HEAP_END_SYM) { + // out of memory + return NULL; + } + + alloc_pos = startpos + size; + + return (void*)startpos; +} + +void* m_alloc0(size_t size, size_t align) { + void* ret = m_alloc(size, align); + + if (!ret) return NULL; + + memset(ret, 0, size); + + return ret; +} + +void* m_alloc_all_remaining(size_t sizemult, size_t align, size_t* size) { + if (!size) return NULL; + *size = 0xEEEEEEEEul; + + size_t startpos = (size_t)&BSP_HEAP_START_SYM + alloc_pos; + startpos = get_aligned(startpos, align); + + size_t available = (size_t)&BSP_HEAP_END_SYM - (size_t)&BSP_HEAP_START_SYM - alloc_pos; + + // out of memory + if (available < sizemult) return NULL; + + // align alloc'ed size down + if (likely(!(sizemult & (sizemult - 1)))) { // if sizemult is a power of two + if (available & (sizemult - 1)) { + available -= available & (sizemult - 1); + } + } else if (available % sizemult) { + available -= available % sizemult; + } + + *size = available; + alloc_pos = startpos + available; + + return (void*)startpos; +} + diff --git a/src/alloc.h b/src/alloc.h new file mode 100644 index 0000000..e0a19a6 --- /dev/null +++ b/src/alloc.h @@ -0,0 +1,23 @@ + +#ifndef ALLOC_H_ +#define ALLOC_H_ + +#include + +// this allocator is intended for large blocks of memory that live +// for the entire time a mode is active. therefore, this is a very simple +// linear allocator + +size_t m_mem_available(void); + +__attribute__((__alloc_size__(1), __alloc_align__(2))) +void* m_alloc(size_t size, size_t align); +__attribute__((__alloc_size__(1), __alloc_align__(2))) +void* m_alloc0(size_t size, size_t align); +__attribute__((__alloc_align__(2))) +void* m_alloc_all_remaining(size_t sizemult, size_t align, size_t *size); + +void m_alloc_clear(void); + +#endif + diff --git a/src/m_sump/cdc_sump.c b/src/m_sump/cdc_sump.c index e4c5e29..5d03de2 100644 --- a/src/m_sump/cdc_sump.c +++ b/src/m_sump/cdc_sump.c @@ -29,6 +29,7 @@ #include #include +#include "alloc.h" #include "info.h" #include "m_sump/bsp-feature.h" #include "m_sump/sump.h" @@ -46,7 +47,7 @@ #if (SUMP_MEMORY_SIZE % SUMP_MAX_CHUNK_SIZE) != 0 #error "Invalid maximal chunk size!" #endif - +// TODO #if (SUMP_MEMORY_SIZE / SUMP_MAX_CHUNK_SIZE) < SUMP_DMA_CHANNELS #error "DMA buffer and DMA channels out of sync!" #endif @@ -97,9 +98,13 @@ static struct _sump { // uint32_t dma_curr_idx; // current DMA channel (index) uint32_t dma_pos; uint32_t next_count; - uint8_t buffer[SUMP_MEMORY_SIZE]; + //uint8_t buffer[SUMP_MEMORY_SIZE]; } sump; +// not in the main sump struct, as the latter gets cleared every so often +size_t sump_memory_size; +uint8_t* sump_buffer; + /* utility functions ======================================================= */ /*static void picoprobe_debug_hexa(uint8_t *buf, uint32_t len) { @@ -261,7 +266,7 @@ static int sump_capture_next(uint32_t pos) { } // waiting for the trigger samples - uint8_t* ptr = sump_analyze_trigger(sump.buffer + pos, sump.chunk_size); + uint8_t* ptr = sump_analyze_trigger(sump_buffer + pos, sump.chunk_size); if (ptr == NULL) { // call this routine again right after next chunk return sump.chunk_size; @@ -271,8 +276,8 @@ static int sump_capture_next(uint32_t pos) { // calculate read start uint32_t tmp = (sump.read_count - sump.delay_count) * sump.width; - pos = ptr - sump.buffer; - sump.read_start = (pos - tmp) % SUMP_MEMORY_SIZE; + pos = ptr - sump_buffer; + sump.read_start = (pos - tmp) % sump_memory_size;//SUMP_MEMORY_SIZE; // calculate the samples after trigger uint32_t delay_bytes = sump.delay_count * sump.width; @@ -285,7 +290,7 @@ static int sump_capture_next(uint32_t pos) { } uint8_t* sump_capture_get_next_dest(uint32_t numch) { - return sump.buffer + (sump.dma_pos + numch * sump.chunk_size) % SUMP_MEMORY_SIZE; + return sump_buffer + (sump.dma_pos + numch * sump.chunk_size) % sump_memory_size;//SUMP_MEMORY_SIZE; } void sump_capture_callback_cancel(void) { @@ -304,7 +309,7 @@ void sump_capture_callback(uint32_t ch, uint32_t numch) { // sump_irq_debug("%s(): next=0x%x\n", __func__, sump.next_count); sump.dma_pos += sump.chunk_size; - sump.dma_pos %= SUMP_MEMORY_SIZE; + sump.dma_pos %= sump_memory_size;//SUMP_MEMORY_SIZE; if (sump.state == SUMP_STATE_SAMPLING && sump.next_count >= sump.chunk_size && sump.next_count < numch * sump.chunk_size) { @@ -325,7 +330,7 @@ static void sump_xfer_start(uint8_t state) { sump.delay_count, sump.divider); uint32_t count = sump.read_count; - if (count > SUMP_MEMORY_SIZE) count = SUMP_MEMORY_SIZE; + if (count > sump_memory_size) count = sump_memory_size; sump.dma_count = count; if (sump.read_count <= sump.delay_count) @@ -336,13 +341,13 @@ static void sump_xfer_start(uint8_t state) { sump.read_start = 0; picoprobe_debug("%s(): buffer = 0x%08x, dma_count=0x%08x next_count=0x%08x\n", __func__, - sump.buffer, sump.dma_count, sump.next_count); + sump_buffer, sump.dma_count, sump.next_count); // limit chunk size for slow sampling sump_set_chunk_size(); /*sump.timestamp_start =*/sump_hw_capture_start( - sump.width, sump.flags, sump.chunk_size, sump.buffer); + sump.width, sump.flags, sump.chunk_size, sump_buffer); sump.state = state; } @@ -359,7 +364,7 @@ static void sump_do_meta(void) { sump_hw_get_cpu_name(cpu); ptr = sump_add_metas(ptr, SUMP_META_CPU_VERSION, cpu); ptr = sump_add_meta4(ptr, SUMP_META_SAMPLE_RATE, sump_hw_get_sysclk() / SAMPLING_DIVIDER); - ptr = sump_add_meta4(ptr, SUMP_META_SAMPLE_RAM, SUMP_MEMORY_SIZE); + ptr = sump_add_meta4(ptr, SUMP_META_SAMPLE_RAM, sump_memory_size); ptr = sump_add_meta1(ptr, SUMP_META_PROBES_B, SAMPLING_BITS); ptr = sump_add_meta1(ptr, SUMP_META_PROTOCOL_B, 2); *ptr++ = SUMP_META_END; @@ -636,13 +641,13 @@ static uint32_t sump_tx8(uint8_t* buf, uint32_t len) { uint32_t i; uint32_t count = sump.read_count; // picoprobe_debug("%s: count=%u, start=%u\n", __func__, count); - uint8_t* ptr = sump.buffer + (sump.read_start + count) % SUMP_MEMORY_SIZE; + uint8_t* ptr = sump_buffer + (sump.read_start + count) % sump_memory_size;//SUMP_MEMORY_SIZE; if (sump.flags & SUMP_FLAG1_ENABLE_RLE) { uint8_t b, rle_last = 0x80, rle_count = 0; for (i = 0; i + 1 < len && count > 0; count--) { - if (ptr == sump.buffer) ptr = sump.buffer + SUMP_MEMORY_SIZE; + if (ptr == sump_buffer) ptr = sump_buffer + sump_memory_size;//SUMP_MEMORY_SIZE; b = *(--ptr) & 0x7f; @@ -668,7 +673,7 @@ static uint32_t sump_tx8(uint8_t* buf, uint32_t len) { } } else { for (i = 0; i < len && count > 0; i++, count--) { - if (ptr == sump.buffer) ptr = sump.buffer + SUMP_MEMORY_SIZE; + if (ptr == sump_buffer) ptr = sump_buffer + sump_memory_size;//SUMP_MEMORY_SIZE; *buf++ = *(--ptr); } @@ -684,13 +689,13 @@ static uint32_t sump_tx16(uint8_t* buf, uint32_t len) { uint32_t i; uint32_t count = sump.read_count; // picoprobe_debug("%s: count=%u, start=%u\n", __func__, count, sump.read_count); - volatile uint8_t* ptr = sump.buffer + (sump.read_start + count * 2) % SUMP_MEMORY_SIZE; + volatile uint8_t* ptr = sump_buffer + (sump.read_start + count * 2) % sump_memory_size;//SUMP_MEMORY_SIZE; if (sump.flags & SUMP_FLAG1_ENABLE_RLE) { uint16_t rle_last = 0x8000, rle_count = 0; for (i = 0; i + 3 < len && count > 0; count--) { - if (ptr == sump.buffer) ptr = sump.buffer + SUMP_MEMORY_SIZE; + if (ptr == sump_buffer) ptr = sump_buffer + sump_memory_size;//SUMP_MEMORY_SIZE; ptr -= 2; @@ -718,7 +723,7 @@ static uint32_t sump_tx16(uint8_t* buf, uint32_t len) { } } else { for (i = 0; i + 1 < len && count > 0; i += 2, count--) { - if (ptr == sump.buffer) ptr = sump.buffer + SUMP_MEMORY_SIZE; + if (ptr == sump_buffer) ptr = sump_buffer + sump_memory_size;//SUMP_MEMORY_SIZE; ptr -= 2; *((uint16_t*)buf) = *((uint16_t*)ptr); @@ -762,6 +767,7 @@ static uint32_t sump_fill_tx(uint8_t* buf, uint32_t len) { static void sump_init_connect(void) { memset(&sump, 0, sizeof(sump)); + memset(sump_buffer, 0, sump_memory_size); sump.width = 1; sump.divider = 1000; // a safe value sump.read_count = 256; @@ -769,16 +775,19 @@ static void sump_init_connect(void) { } void cdc_sump_init(void) { + sump_buffer = m_alloc_all_remaining(SUMP_MAX_CHUNK_SIZE, 4, &sump_memory_size); + sump_hw_init(); sump_init_connect(); - picoprobe_debug("%s(): memory buffer %u bytes\n", __func__, SUMP_MEMORY_SIZE); + picoprobe_debug("%s(): memory buffer %u bytes\n", __func__, sump_memory_size); } void cdc_sump_deinit(void) { sump_hw_deinit(); memset(&sump, 0, sizeof(sump)); + memset(sump_buffer, 0, sump_memory_size); } #define MAX_UART_PKT 64 diff --git a/src/modeset.c b/src/modeset.c index 934311c..404cfde 100644 --- a/src/modeset.c +++ b/src/modeset.c @@ -2,6 +2,7 @@ #include +#include "alloc.h" #include "mode.h" extern struct mode m_01_default, m_04_sump; @@ -82,6 +83,8 @@ void modes_switch(uint8_t newmode) { } if (mode_current) mode_current->leave(); + // wipe all used data + m_alloc_clear(); // to force a reconfig from the device, we basically have to kill the USB // physical connection for a while diff --git a/src/no_malloc.c b/src/no_malloc.c new file mode 100644 index 0000000..d9eb819 --- /dev/null +++ b/src/no_malloc.c @@ -0,0 +1,45 @@ +// vim: set et: +// society has passed beyond the need for malloc + +#include + +extern void* this_symbol_should_not_exist_DO_NOT_USE_MALLOC_KTHX; + +void* malloc(size_t size) { + (void)size; + return this_symbol_should_not_exist_DO_NOT_USE_MALLOC_KTHX; +} +void* calloc(size_t nmemb, size_t size) { + (void)nmemb; + return malloc(size); +} +void* realloc(void* ptr, size_t size) { + (void)ptr; + return malloc(size); +} +void* reallocarray(void* ptr, size_t nmemb, size_t size) { + (void)ptr; (void)nmemb; + return malloc(size); +} +void free(void* ptr) { + (void)ptr; + (void)*(volatile size_t*)this_symbol_should_not_exist_DO_NOT_USE_MALLOC_KTHX; +} + +// newlib stuff +extern void* __real_malloc(size_t size); +extern void* __real_calloc(size_t nmemb, size_t size); +extern void __real_free(void* ptr); + +void* __real_malloc(size_t size) { return malloc(size); } +void* __real_calloc(size_t nmemb, size_t size) { return calloc(nmemb, size); } +void __real_free(void* ptr) { free(ptr); } + +/*extern void* __wrap_malloc(size_t size); +extern void* __wrap_calloc(size_t nmemb, size_t size); +extern void __wrap_free(void* ptr); + +void* __wrap_malloc(size_t size) { return malloc(size); } +void* __wrap_calloc(size_t nmemb, size_t size) { return calloc(nmemb, size); } +void __wrap_free(void* ptr) { free(ptr); }*/ +