diff --git a/CMakeLists.txt b/CMakeLists.txt
index bab43939a686d7586dd75b0a22557adbb5dde95f..4e2cbe886e67297742e49c921d4871a2a805e785 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -285,7 +285,8 @@ add_executable(test-usart-f7 src/tests/drivers/usart/test-usart.cpp)
 sbs_target(test-usart-f7 stm32f767zi_nucleo)
 
 add_executable(test-dma-mem-to-mem src/tests/drivers/test-dma-mem-to-mem.cpp)
-sbs_target(test-dma-mem-to-mem stm32f407vg_stm32f4discovery)
+# sbs_target(test-dma-mem-to-mem stm32f407vg_stm32f4discovery)
+sbs_target(test-dma-mem-to-mem stm32f767zi_orion_engine)
 
 add_executable(test-i2c-driver-f4 src/tests/drivers/i2c/test-i2c-driver.cpp)
 sbs_target(test-i2c-driver-f4 stm32f429zi_stm32f4discovery)
diff --git a/src/tests/drivers/test-dma-mem-to-mem.cpp b/src/tests/drivers/test-dma-mem-to-mem.cpp
index 6eef0b9f22b5d4b13861974cb5865c551bf081df..30bf8c03b87bb163ffdbcc5cc9d6673d86fc2db5 100644
--- a/src/tests/drivers/test-dma-mem-to-mem.cpp
+++ b/src/tests/drivers/test-dma-mem-to-mem.cpp
@@ -29,51 +29,53 @@ using namespace Boardcore;
 
 void printBuffer(uint8_t* buffer, size_t size);
 
+void dmaCpy(uint8_t* srcBuf, uint8_t* dstBuf, const uint16_t nBytes);
+
 int main()
 {
-    DMAStreamGuard stream = DMADriver::instance().acquireStreamForPeripheral(
-        DMADefs::Peripherals::PE_MEM_ONLY);
-
-    if (!stream.isValid())
-    {
-        printf("Error, cannot allocate dma stream\n");
-        return 0;
-    }
+    // SCB_DisableDCache();
 
     /**
      * In this test we want to copy a buffer1 into buffer2 with the DMA.
      */
 
-    uint8_t buffer1[8] = {1, 2, 3, 4, 5, 6, 7, 8};
-    uint8_t buffer2[8] = {0};
+    const int SIZE = 4;
+    // uint8_t buffer1[8] = {1, 2, 3, 4, 5, 6, 7, 8};
+    // uint8_t buffer2[8] = {0};
+    __attribute__((aligned(32))) uint8_t buffer1[SIZE];
+    for(int i = 0; i < SIZE; ++i)
+        buffer1[i] = i+1;
+    __attribute__((aligned(32))) uint8_t buffer2[SIZE] = {0};
+    __attribute__((aligned(32))) uint8_t buffer3[SIZE] = {0};
 
     printf("Before:\n");
     printf("Buffer 1:\n");
     printBuffer(buffer1, sizeof(buffer1));
     printf("Buffer 2:\n");
     printBuffer(buffer2, sizeof(buffer2));
+    printf("Buffer 3:\n");
+    printBuffer(buffer3, sizeof(buffer3));
 
-    DMATransaction trn{
-        .direction         = DMATransaction::Direction::MEM_TO_MEM,
-        .srcSize           = DMATransaction::DataSize::BITS_8,
-        .dstSize           = DMATransaction::DataSize::BITS_8,
-        .srcAddress        = buffer1,
-        .dstAddress        = buffer2,
-        .numberOfDataItems = sizeof(buffer1),
-        .srcIncrement      = true,
-        .dstIncrement      = true,
-        .enableTransferCompleteInterrupt = true,
-    };
-    stream->setup(trn);
-    stream->enable();
+    dmaCpy(buffer1, buffer2, SIZE);
 
-    stream->waitForTransferComplete();
+    buffer2[0] = 9;
+    // non è necess fare cleanDcache perchè la cache
+    // è dichiarata write-through
+    // SCB_CleanDCache_by_Addr((uint32_t*)buffer2, SIZE);
+
+    dmaCpy(buffer2, buffer3, SIZE);
+
+    #ifdef STM32F767xx
+    SCB_InvalidateDCache_by_Addr((uint32_t*)buffer3, SIZE); // se correttamente  allineato basta questo
+    #endif
 
-    printf("After:\n");
+    printf("--- After:\n");
     printf("Buffer 1:\n");
     printBuffer(buffer1, sizeof(buffer1));
     printf("Buffer 2:\n");
     printBuffer(buffer2, sizeof(buffer2));
+    printf("Buffer 3:\n");
+    printBuffer(buffer3, sizeof(buffer3));
 
     return 0;
 }
@@ -85,3 +87,49 @@ void printBuffer(uint8_t* buffer, size_t size)
 
     printf("%x\n", buffer[size - 1]);
 }
+
+void dmaCpy(uint8_t* srcBuf, uint8_t* dstBuf, const uint16_t nBytes)
+{
+    DMAStreamGuard stream = DMADriver::instance().acquireStreamForPeripheral(
+        DMADefs::Peripherals::PE_MEM_ONLY);
+
+    if (!stream.isValid())
+    {
+        printf("Error, cannot allocate dma stream\n");
+        return;
+    }
+
+    DMATransaction trn{
+        .direction         = DMATransaction::Direction::MEM_TO_MEM,
+        .srcSize           = DMATransaction::DataSize::BITS_8,
+        .dstSize           = DMATransaction::DataSize::BITS_8,
+        .srcAddress        = srcBuf,
+        .dstAddress        = dstBuf,
+        .numberOfDataItems = nBytes,
+        .srcIncrement      = true,
+        .dstIncrement      = true,
+        .enableTransferCompleteInterrupt = true,
+    };
+    stream->setup(trn);
+    stream->enable();
+
+    stream->waitForTransferComplete();
+
+    /**
+     * con questi 2 funziona, anche se non mi è chiaro il perchè:
+     * - la prima linea serve per fare QUASI tutto l'array: copre tutto meno gli ultimi
+     * 4-8 bytes
+     * - la seconda linea serve a coprire gli ultimi, anche se è sus: gli dico di coprire
+     * gli ultimi 4 bytes, ma copre anche gli ultimi 8 se necessario (???)
+     * */
+    // per uint8_t
+    // SCB_InvalidateDCache_by_Addr((uint32_t*)dstBuf, nBytes); // risolve solo per i primi 4 byte
+    // SCB_InvalidateDCache_by_Addr((uint32_t*)dstBuf + (nBytes/4) - 1, 4); // risolve solo per i primi 4 byte
+
+    // per uint32_t
+    // SCB_InvalidateDCache_by_Addr((uint32_t*)buffer2, sizeof(buffer2)/*+sizeof(uint32_t)*/); // risolve solo per i primi 4 byte
+    // SCB_InvalidateDCache_by_Addr((uint32_t*)buffer2 + (SIZE) - 1, 4); // risolve solo per i primi 4 byte
+
+
+    // SCB_CleanDCache_by_Addr((uint32_t*)buffer2, 8); // non fa nulla
+}
\ No newline at end of file