diff --git a/main.cpp b/main.cpp
index 8764b9fabb7ead786e8a9eae3d8bd03873dc6ec4..69a7ca9ac8dbf04cfd984b4a27ca9ca397bf3783 100644
--- a/main.cpp
+++ b/main.cpp
@@ -1,11 +1,134 @@
 
 #include <cstdio>
+#include <cmath>
+#include <cassert>
+#include <pthread.h>
+#include <utility>
 #include "miosix.h"
 
 using namespace std;
 using namespace miosix;
 
+volatile float f1=3.0f; //Volatile to prevent compiler optimization
+volatile float f2=2.0f; //from moving the computation out of the loop
+
+static float approxSqrt1()
+{
+    float result=f1;
+    for(int j=0;j<10;j++)
+    {
+        for(int i=0;i<1000000/10;i++) result=(result+f1/result)/2.0f;
+        delayMs(2); //To test code that first uses fp. then stops and restarts
+    }
+    return result;
+}
+
+static float approxSqrt2()
+{
+    float result=f2;
+    for(int i=0;i<1000000;i++) result=(result+f2/result)/2.0f;
+    return result;
+}
+
+void *thread(void*)
+{
+    for(;;)
+    {
+        volatile float value=approxSqrt1();
+        //assert(fabsf(value-sqrt(3.0f))<0.000001f);
+        printf("b: %12.10f\n",value);
+    }
+}
+
 int main()
 {
-    //iprintf("Hello world, write your application here\n");
+    pthread_t t;
+    pthread_create(&t,0,thread,0);
+    for(;;)
+    {
+        volatile float value=approxSqrt2();
+        //assert(fabsf(value-sqrt(2.0f))<0.000001f);
+        printf("a: %12.10f\n",value);
+    }
 }
+
+//#define sarcazzo
+//
+//int exchange_and_add(volatile int* __mem, int __val)
+//  {
+//    int __result;
+//
+//    #ifdef sarcazzo
+//    int __ok;
+//    do {
+//      asm volatile("ldrex %0, [%1]"     : "=r"(__result) : "r"(__mem)             : "memory");
+//      int __tmp = __result + __val;
+//      asm volatile("strex %0, %1, [%2]" : "=r"(__ok)     : "r"(__tmp), "r"(__mem) : "memory");
+//    } while(__ok);
+//    #else
+//    __result = *__mem;
+//    *__mem += __val;
+//    #endif
+//
+//    return __result;
+//  }
+//
+//int atomic_add(volatile int* __mem, int __val)
+//{
+//	int result;
+//    #ifdef sarcazzo
+//    int __ok;
+//    do {
+//      int __tmp;
+//      asm volatile("ldrex %0, [%1]"     : "=r"(__tmp) : "r"(__mem)             : "memory");
+//      __tmp += __val;
+//      asm volatile("strex %0, %1, [%2]" : "=r"(__ok)  : "r"(__tmp), "r"(__mem) : "memory");
+//	  result++;
+//    } while(__ok);
+//    #else //sarcazzo
+//    int __tmp=*__mem;
+//	__tmp += __val;
+//	*__mem=__tmp;
+//	result=1;
+//    #endif //sarcazzo
+//	return result;
+//}
+//
+//int mazz;
+//int k;
+//int w;
+//
+//void *thread(void*)
+//{
+//	mazz=0;
+//	for(int i=0;i<1000;i++)
+//	{
+//		mazz=max(mazz,atomic_add(&k, 1));
+//		mazz=max(mazz,atomic_add(&k,-1));
+//		exchange_and_add(&w, 1);
+//		exchange_and_add(&w,-1);
+//	}
+//	return 0;
+//}
+//
+//int main()
+//{
+//	getchar();
+//	for(;;)
+//	{
+//		k=0;
+//		w=0;
+//		pthread_t t;
+//		pthread_create(&t,0,thread,0);
+//		int maz=0;
+//		for(int i=0;i<1000;i++)
+//		{
+//			maz=max(maz,atomic_add(&k, 1));
+//			maz=max(maz,atomic_add(&k,-1));
+//			exchange_and_add(&w, 1);
+//			exchange_and_add(&w,-1);
+//		}
+//		pthread_join(t,0);
+//		iprintf("Main: k=%d, w=%d, max1=%d max2=%d\n",k,w,maz,mazz);
+//	}
+//}
diff --git a/miosix/arch/cortexM3_stm32/common/interfaces-impl/disk.cpp b/miosix/arch/cortexM3_stm32/common/interfaces-impl/disk.cpp
index 3cd10841ddcb42c3dadb4b3ac6f0aa0370b1c710..a7cacaf80130e29876de43cd2ef1ec5a06be5ab2 100644
--- a/miosix/arch/cortexM3_stm32/common/interfaces-impl/disk.cpp
+++ b/miosix/arch/cortexM3_stm32/common/interfaces-impl/disk.cpp
@@ -721,7 +721,7 @@ private:
 
     ///\internal Clock enabled, bus width 4bit, clock powersave enabled.
     static const unsigned int CLKCR_FLAGS=SDIO_CLKCR_CLKEN |
-        SDIO_CLKCR_WIDBUS_0 | SDIO_CLKCR_PWRSAV;
+        SDIO_CLKCR_WIDBUS_0 | SDIO_CLKCR_PWRSAV | SDIO_CLKCR_HWFC_EN;
 
     ///\internal Maximum number of calls to IRQreduceClockSpeed() allowed
     static const unsigned char MAX_ALLOWED_REDUCTIONS=5;
diff --git a/miosix/arch/cortexM3_stm32/common/interfaces-impl/portability.cpp b/miosix/arch/cortexM3_stm32/common/interfaces-impl/portability.cpp
index 78081e63f9ae808238f60ac8607e5853cddaff66..f00bbafb0b8d6559383dce24a5da4ab9ae5ddb84 100644
--- a/miosix/arch/cortexM3_stm32/common/interfaces-impl/portability.cpp
+++ b/miosix/arch/cortexM3_stm32/common/interfaces-impl/portability.cpp
@@ -174,7 +174,7 @@ void IRQportableStartKernel()
     SCB->SHCSR |= SCB_SHCSR_USGFAULTENA | SCB_SHCSR_BUSFAULTENA
             | SCB_SHCSR_MEMFAULTENA;
     //Enable traps for unaligned memory access and division by zero
-    SCB->CCR |= SCB_CCR_DIV_0_TRP | SCB_CCR_UNALIGN_TRP;
+    SCB->CCR |= SCB_CCR_DIV_0_TRP;// | SCB_CCR_UNALIGN_TRP;
     NVIC_SetPriorityGrouping(7);//This should disable interrupt nesting
     NVIC_SetPriority(SVCall_IRQn,3);//High priority for SVC (Max=0, min=15)
     NVIC_SetPriority(SysTick_IRQn,3);//High priority for SysTick (Max=0, min=15)
diff --git a/miosix/arch/cortexM3_stm32f2/common/interfaces-impl/portability.cpp b/miosix/arch/cortexM3_stm32f2/common/interfaces-impl/portability.cpp
index c82fa7f5b11a057816feec6da4c981a71926944b..3d995f902870e94e4493ce1dbc97ef384db797e4 100644
--- a/miosix/arch/cortexM3_stm32f2/common/interfaces-impl/portability.cpp
+++ b/miosix/arch/cortexM3_stm32f2/common/interfaces-impl/portability.cpp
@@ -174,7 +174,7 @@ void IRQportableStartKernel()
     SCB->SHCSR |= SCB_SHCSR_USGFAULTENA_Msk | SCB_SHCSR_BUSFAULTENA_Msk
             | SCB_SHCSR_MEMFAULTENA_Msk;
     //Enable traps for unaligned memory access and division by zero
-    SCB->CCR |= SCB_CCR_DIV_0_TRP_Msk | SCB_CCR_UNALIGN_TRP_Msk;
+    SCB->CCR |= SCB_CCR_DIV_0_TRP_Msk;
     NVIC_SetPriorityGrouping(7);//This should disable interrupt nesting
     NVIC_SetPriority(SVCall_IRQn,3);//High priority for SVC (Max=0, min=15)
     NVIC_SetPriority(SysTick_IRQn,3);//High priority for SysTick (Max=0, min=15)
diff --git a/miosix/arch/cortexM3_stm32l1/common/interfaces-impl/portability.cpp b/miosix/arch/cortexM3_stm32l1/common/interfaces-impl/portability.cpp
index ac8c64f7be6a21c94a7d81c69b5d851805ad997f..14ad5d8baa19a5e0de2ab5e5fef1772358f95b9c 100644
--- a/miosix/arch/cortexM3_stm32l1/common/interfaces-impl/portability.cpp
+++ b/miosix/arch/cortexM3_stm32l1/common/interfaces-impl/portability.cpp
@@ -174,7 +174,7 @@ void IRQportableStartKernel()
     SCB->SHCSR |= SCB_SHCSR_USGFAULTENA_Msk | SCB_SHCSR_BUSFAULTENA_Msk
             | SCB_SHCSR_MEMFAULTENA_Msk;
     //Enable traps for unaligned memory access and division by zero
-    SCB->CCR |= SCB_CCR_DIV_0_TRP_Msk | SCB_CCR_UNALIGN_TRP_Msk;
+    SCB->CCR |= SCB_CCR_DIV_0_TRP_Msk;
     NVIC_SetPriorityGrouping(7);//This should disable interrupt nesting
     NVIC_SetPriority(SVC_IRQn,3);//High priority for SVC (Max=0, min=15)
     NVIC_SetPriority(SysTick_IRQn,3);//High priority for SysTick (Max=0, min=15)
diff --git a/miosix/arch/cortexM4_stm32f4/common/CMSIS/core_cm4.h b/miosix/arch/cortexM4_stm32f4/common/CMSIS/core_cm4.h
index b58f146c5c63f180e9a59e7d6b12fc60056fab49..c7718bfa331cf081bab8eef8b42f13cb0d45edd1 100644
--- a/miosix/arch/cortexM4_stm32f4/common/CMSIS/core_cm4.h
+++ b/miosix/arch/cortexM4_stm32f4/common/CMSIS/core_cm4.h
@@ -132,7 +132,7 @@
       #define __FPU_USED       0
     #endif
   #else
-    #define __FPU_USED         0
+    #define __FPU_USED         1 /* by TFT: enable FPU */
   #endif
 
 #elif defined ( __TASKING__ )
diff --git a/miosix/arch/cortexM4_stm32f4/common/CMSIS/system_stm32f4xx.c b/miosix/arch/cortexM4_stm32f4/common/CMSIS/system_stm32f4xx.c
index 2edd11cc936bc72a7c0e6ead01011c74efbf5e02..0764e832f9892fcda86b8e7223d8611b7956a9c1 100644
--- a/miosix/arch/cortexM4_stm32f4/common/CMSIS/system_stm32f4xx.c
+++ b/miosix/arch/cortexM4_stm32f4/common/CMSIS/system_stm32f4xx.c
@@ -219,6 +219,8 @@ void SystemInit(void)
   /* FPU settings ------------------------------------------------------------*/
   #if (__FPU_PRESENT == 1) && (__FPU_USED == 1)
     SCB->CPACR |= ((3UL << 10*2)|(3UL << 11*2));  /* set CP10 and CP11 Full Access */
+  #else
+  #error "FPU disabled!" //By TFT: added a check to be really sure the FPU is on
   #endif
 
   /* Reset the RCC clock configuration to the default reset state ------------*/
diff --git a/miosix/arch/cortexM4_stm32f4/common/arch_settings.h b/miosix/arch/cortexM4_stm32f4/common/arch_settings.h
index 686e61a3c0dc043153b0d91426dc0b1fa8fcab54..8d74c1e436f7c2403c3822c5030e09ae4d99fd88 100644
--- a/miosix/arch/cortexM4_stm32f4/common/arch_settings.h
+++ b/miosix/arch/cortexM4_stm32f4/common/arch_settings.h
@@ -35,17 +35,22 @@ namespace miosix {
  * \{
  */
 
-/// \internal Size of vector to store registers during ctx switch (9*4=36Bytes)
-/// Only sp and r4-r11 are saved here, since r0-r3,r12,lr,pc,xPSR and
-/// old sp are saved by hardware on the process stack on Cortex M3 CPUs.
-const unsigned char CTXSAVE_SIZE=9;
+/// \internal Size of vector to store registers during ctx switch
+/// ((9+16+1)*4=104Bytes). Only sp, r4-r11 and s16-s31 are saved here, since
+/// r0-r3,r12,lr,pc,xPSR, old sp and s0-s15,fpscr are saved by hardware on the
+/// process stack on Cortex M4 CPUs. The +1 is to save the exception lr, that
+/// is, EXC_RETURN, as it is necessary to know if the thread has used fp regs
+const unsigned char CTXSAVE_SIZE=9+16+1;
 
 /// \internal some architectures save part of the context on their stack.
 /// This constant is used to increase the stack size by the size of context
 /// save frame. If zero, this architecture does not save anything on stack
 /// during context save. Size is in bytes, not words.
+///  8 registers=r0-r3,r12,lr,pc,xPSR
+/// 17 registers=s0-s15,fpscr
 /// MUST be divisible by 4.
-const unsigned int CTXSAVE_ON_STACK=32;
+// FIXME: +1 because of alignment of the cortex m3!!
+const unsigned int CTXSAVE_ON_STACK=(8+17+1)*4;
 
 /**
  * \}
diff --git a/miosix/arch/cortexM4_stm32f4/common/interfaces-impl/portability.cpp b/miosix/arch/cortexM4_stm32f4/common/interfaces-impl/portability.cpp
index c82fa7f5b11a057816feec6da4c981a71926944b..d82d741d782064a048c576b5257f830cd2138401 100644
--- a/miosix/arch/cortexM4_stm32f4/common/interfaces-impl/portability.cpp
+++ b/miosix/arch/cortexM4_stm32f4/common/interfaces-impl/portability.cpp
@@ -166,6 +166,8 @@ void initCtxsave(unsigned int *ctxsave, void *(*pc)(void *), unsigned int *sp,
 
     ctxsave[0]=reinterpret_cast<unsigned long>(stackPtr);             //--> psp
     //leaving the content of r4-r11 uninitialized
+    //leaving the content of s16-s31 uninitialized
+    ctxsave[25]=0xfffffffd; //EXC_RETURN=thread mode, use psp, no floating ops
 }
 
 void IRQportableStartKernel()
@@ -174,7 +176,7 @@ void IRQportableStartKernel()
     SCB->SHCSR |= SCB_SHCSR_USGFAULTENA_Msk | SCB_SHCSR_BUSFAULTENA_Msk
             | SCB_SHCSR_MEMFAULTENA_Msk;
     //Enable traps for unaligned memory access and division by zero
-    SCB->CCR |= SCB_CCR_DIV_0_TRP_Msk | SCB_CCR_UNALIGN_TRP_Msk;
+    SCB->CCR |= SCB_CCR_DIV_0_TRP_Msk;
     NVIC_SetPriorityGrouping(7);//This should disable interrupt nesting
     NVIC_SetPriority(SVCall_IRQn,3);//High priority for SVC (Max=0, min=15)
     NVIC_SetPriority(SysTick_IRQn,3);//High priority for SysTick (Max=0, min=15)
diff --git a/miosix/arch/cortexM4_stm32f4/common/interfaces-impl/portability_impl.h b/miosix/arch/cortexM4_stm32f4/common/interfaces-impl/portability_impl.h
index 5fcc019b48c22748b3466d29368a89a918708327..69b042116cb7dbb035ad1eec801dd9636d746e2d 100644
--- a/miosix/arch/cortexM4_stm32f4/common/interfaces-impl/portability_impl.h
+++ b/miosix/arch/cortexM4_stm32f4/common/interfaces-impl/portability_impl.h
@@ -42,6 +42,9 @@
  * this is a pointer to a location where to store the thread's registers during
  * context switch. It requires C linkage to be used inside asm statement.
  * Registers are saved in the following order:
+ * *ctxsave+96 --> s31
+ * ...
+ * *ctxsave+36 --> s16
  * *ctxsave+32 --> r11
  * *ctxsave+28 --> r10
  * *ctxsave+24 --> r9
@@ -63,14 +66,15 @@ extern volatile unsigned int *ctxsave;
  * Must be the first line of an IRQ where a context switch can happen.
  * The IRQ must be "naked" to prevent the compiler from generating context save.
  */
-#define saveContext()                                                        \
-{                                                                             \
-    asm volatile("stmdb sp!, {lr}        \n\t" /*save lr on MAIN stack*/      \
-                 "mrs   r1,  psp         \n\t" /*get PROCESS stack pointer*/  \
-                 "ldr   r0,  =ctxsave    \n\t" /*get current context*/        \
-                 "ldr   r0,  [r0]        \n\t"                                \
-                 "stmia r0,  {r1,r4-r11} \n\t" /*save PROCESS sp + r4-r11*/   \
-                 );                                                           \
+#define saveContext()                                                         \
+{                                                                              \
+    asm volatile("mrs     r1,  psp         \n\t" /*get PROCESS stack pointer*/ \
+                 "ldr     r0,  =ctxsave    \n\t" /*get current context*/       \
+                 "ldr     r0, [r0]         \n\t"                               \
+                 "stmia   r0!, {r1,r4-r11} \n\t" /*save PROCESS sp + r4-r11*/  \
+                 "vstmia.32 r0!, {s16-s31} \n\t" /*save s16-s31*/              \
+                 "str     lr, [r0]         \n\t"                               \
+                 );                                                            \
 }
 
 /**
@@ -79,14 +83,15 @@ extern volatile unsigned int *ctxsave;
  * of an IRQ where a context switch can happen. The IRQ must be "naked" to
  * prevent the compiler from generating context restore.
  */
-#define restoreContext()                                                     \
-{                                                                             \
-    asm volatile("ldr   r0,  =ctxsave    \n\t" /*get current context*/        \
-                 "ldr   r0,  [r0]        \n\t"                                \
-                 "ldmia r0,  {r1,r4-r11} \n\t" /*restore r4-r11 + r1=psp*/    \
-                 "msr   psp, r1          \n\t" /*restore PROCESS sp*/         \
-                 "ldmia sp!, {pc}        \n\t" /*return*/                     \
-                 );                                                           \
+#define restoreContext()                                                      \
+{                                                                              \
+    asm volatile("ldr     r0,  =ctxsave    \n\t" /*get current context*/       \
+                 "ldr     r0,  [r0]        \n\t"                               \
+                 "ldmia   r0!, {r1,r4-r11} \n\t" /*restore r4-r11 + r1=psp*/   \
+                 "vldmia.32 r0!, {s16-s31} \n\t" /*restore s16-s31*/           \
+                 "msr     psp, r1          \n\t" /*restore PROCESS sp*/        \
+                 "ldmia   r0, {pc}         \n\t" /*return*/                    \
+                 );                                                            \
 }
 
 /**
diff --git a/miosix/config/Makefile.inc b/miosix/config/Makefile.inc
index becb27845867af5ecceb5869511f1985280db46f..70ebc0283d6be97abfda8513ad354cad683016b3 100644
--- a/miosix/config/Makefile.inc
+++ b/miosix/config/Makefile.inc
@@ -13,12 +13,12 @@
 ## architecture
 ##
 #OPT_BOARD := lpc2138_miosix_board
-OPT_BOARD := stm32f103ze_stm3210e-eval
+#OPT_BOARD := stm32f103ze_stm3210e-eval
 #OPT_BOARD := stm32f103ve_mp3v2
 #OPT_BOARD := stm32f100rb_stm32vldiscovery
 #OPT_BOARD := stm32f103ve_strive_mini
 #OPT_BOARD := stm32f103ze_redbull_v2
-#OPT_BOARD := stm32f407vg_stm32f4discovery
+OPT_BOARD := stm32f407vg_stm32f4discovery
 #OPT_BOARD := stm32f207ig_stm3220g-eval
 #OPT_BOARD := stm32f207zg_ethboard_v2
 #OPT_BOARD := stm32f207ze_als_camboard
@@ -31,8 +31,8 @@ OPT_BOARD := stm32f103ze_stm3210e-eval
 ## -O2 is recomended otherwise, as it provides a good balance between code
 ## size and speed
 ##
-#OPT_OPTIMIZATION := -O0
-OPT_OPTIMIZATION := -O2
+OPT_OPTIMIZATION := -O0
+#OPT_OPTIMIZATION := -O2
 #OPT_OPTIMIZATION := -O3
 #OPT_OPTIMIZATION := -Os
 
@@ -259,8 +259,8 @@ endif
 ## Then, initialize C/C++ flags with -D_MIOSIX so that application code can
 ## know if the OS is MIOSIX
 ##
-CFLAGS_BASE   := -D_MIOSIX=\"$(OPT_BOARD)\"
-CXXFLAGS_BASE := -D_MIOSIX=\"$(OPT_BOARD)\"
+CFLAGS_BASE   := -D_MIOSIX_BOARDNAME=\"$(OPT_BOARD)\"
+CXXFLAGS_BASE := -D_MIOSIX_BOARDNAME=\"$(OPT_BOARD)\"
 
 ##
 ## Now two big switch-like constructs nested. The first lists all possible
@@ -538,7 +538,7 @@ else ifeq ($(ARCH),cortexM3_stm32)
     endif
 
     ## Select compiler
-    PREFIX=arm-miosix-eabi-
+    PREFIX := arm-miosix-eabi-
 
     ## From compiler prefix form the name of the compiler and other tools
     CC  := $(PREFIX)gcc
@@ -553,14 +553,12 @@ else ifeq ($(ARCH),cortexM3_stm32)
     ## Select appropriate compiler flags for both ASM/C/C++/linker
     AFLAGS_BASE   := -mcpu=cortex-m3 -mthumb
     CFLAGS_BASE   += -D_ARCH_CORTEXM3_STM32 $(CLOCK_FREQ) $(XRAM)            \
-                     -mcpu=cortex-m3 -mthumb -mfix-cortex-m3-ldrd            \
-                     $(OPT_OPTIMIZATION) -ffunction-sections -Wall -g -c 
+                     -mcpu=cortex-m3 -mthumb $(OPT_OPTIMIZATION)             \
+                     -ffunction-sections -Wall -g -c 
     CXXFLAGS_BASE += -D_ARCH_CORTEXM3_STM32 $(CLOCK_FREQ) $(XRAM)            \
                      $(OPT_EXCEPT) -mcpu=cortex-m3 -mthumb                   \
-                     -mfix-cortex-m3-ldrd $(OPT_OPTIMIZATION)                \
-                     -ffunction-sections -Wall -g -c
-    LFLAGS_BASE   := -mcpu=cortex-m3 -mthumb -mfix-cortex-m3-ldrd            \
-                     -Wl,--gc-sections,-Map,main.map                         \
+                     $(OPT_OPTIMIZATION) -ffunction-sections -Wall -g -c
+    LFLAGS_BASE   := -mcpu=cortex-m3 -mthumb -Wl,--gc-sections,-Map,main.map \
                      -Wl,-T./miosix/$(LINKER_SCRIPT) $(OPT_EXCEPT)           \
                      $(OPT_OPTIMIZATION) -nostdlib
 
@@ -614,7 +612,7 @@ else ifeq ($(ARCH),cortexM4_stm32f4)
         PROGRAM_CMDLINE := qstlink2 -cqewV ./main.bin
 
     ##-------------------------------------------------------------------------
-    ## BOARD: stm32f4discovery
+    ## BOARD: stm32f4bitsboard
     ##
     else ifeq ($(OPT_BOARD),stm32f407vg_bitsboard)
 
@@ -653,7 +651,7 @@ else ifeq ($(ARCH),cortexM4_stm32f4)
     endif
 
     ## Select compiler
-    PREFIX=arm-miosix-eabi-
+    PREFIX := arm-miosix-eabi-
 
     ## From compiler prefix form the name of the compiler and other tools
     CC  := $(PREFIX)gcc
@@ -666,18 +664,17 @@ else ifeq ($(ARCH),cortexM4_stm32f4)
     SZ  := $(PREFIX)size
 
     ## Select appropriate compiler flags for both ASM/C/C++/linker
-    #FIXME: Replace -mcpu=cortex-m3 with -mcpu=cortex-m4 after upgrading GCC
-    AFLAGS_BASE   := -mcpu=cortex-m3 -mthumb
-    CFLAGS_BASE   += -D_ARCH_CORTEXM4_STM32F4 $(CLOCK_FREQ) $(SRAM_BOOT)     \
-                     -mcpu=cortex-m3 -mthumb -mfix-cortex-m3-ldrd            \
+    AFLAGS_BASE   := -mcpu=cortex-m4 -mthumb -mfloat-abi=hard -mfpu=fpv4-sp-d16
+    CFLAGS_BASE   += -D_ARCH_CORTEXM4_STM32F4 $(CLOCK_FREQ) $(SRAM_BOOT)        \
+                     -mcpu=cortex-m4 -mthumb -mfloat-abi=hard -mfpu=fpv4-sp-d16 \
                      $(OPT_OPTIMIZATION) -ffunction-sections -Wall -g -c
-    CXXFLAGS_BASE += -D_ARCH_CORTEXM4_STM32F4 $(CLOCK_FREQ) $(SRAM_BOOT)     \
-                     $(OPT_EXCEPT) -mcpu=cortex-m3 -mthumb                   \
-                     -mfix-cortex-m3-ldrd $(OPT_OPTIMIZATION)                \
-                     -ffunction-sections -Wall -g -c
-    LFLAGS_BASE   := -mcpu=cortex-m3 -mthumb -mfix-cortex-m3-ldrd            \
-                     -Wl,--gc-sections,-Map,main.map                         \
-                     -Wl,-T./miosix/$(LINKER_SCRIPT) $(OPT_EXCEPT)           \
+    CXXFLAGS_BASE += -D_ARCH_CORTEXM4_STM32F4 $(CLOCK_FREQ) $(SRAM_BOOT)        \
+                     -mcpu=cortex-m4 -mthumb -mfloat-abi=hard -mfpu=fpv4-sp-d16 \
+                     $(OPT_EXCEPT) $(OPT_OPTIMIZATION) -ffunction-sections      \
+                     -Wall -g -c
+    LFLAGS_BASE   := -mcpu=cortex-m4 -mthumb -mfloat-abi=hard -mfpu=fpv4-sp-d16 \
+                     -Wl,--gc-sections,-Map,main.map                            \
+                     -Wl,-T./miosix/$(LINKER_SCRIPT) $(OPT_EXCEPT)              \
                      $(OPT_OPTIMIZATION) -nostdlib
 
     ## Select architecture specific files
@@ -840,14 +837,12 @@ else ifeq ($(ARCH),cortexM3_stm32f2)
     ## Select appropriate compiler flags for both ASM/C/C++/linker
     AFLAGS_BASE   := -mcpu=cortex-m3 -mthumb
     CFLAGS_BASE   += -D_ARCH_CORTEXM3_STM32F2 $(CLOCK_FREQ) $(XRAM)          \
-                     -mcpu=cortex-m3 -mthumb -mfix-cortex-m3-ldrd            \
-                     $(OPT_OPTIMIZATION) -ffunction-sections -Wall -g -c
+                     -mcpu=cortex-m3 -mthumb $(OPT_OPTIMIZATION)             \
+                     -ffunction-sections -Wall -g -c
     CXXFLAGS_BASE += -D_ARCH_CORTEXM3_STM32F2 $(CLOCK_FREQ) $(XRAM)          \
                      $(OPT_EXCEPT) -mcpu=cortex-m3 -mthumb                   \
-                     -mfix-cortex-m3-ldrd $(OPT_OPTIMIZATION)                \
-                     -ffunction-sections -Wall -g -c
-    LFLAGS_BASE   := -mcpu=cortex-m3 -mthumb -mfix-cortex-m3-ldrd            \
-                     -Wl,--gc-sections,-Map,main.map                         \
+                     $(OPT_OPTIMIZATION) -ffunction-sections -Wall -g -c
+    LFLAGS_BASE   := -mcpu=cortex-m3 -mthumb -Wl,--gc-sections,-Map,main.map \
                      -Wl,-T./miosix/$(LINKER_SCRIPT) $(OPT_EXCEPT)           \
                      $(OPT_OPTIMIZATION) -nostdlib
 
@@ -923,14 +918,12 @@ else ifeq ($(ARCH),cortexM3_stm32l1)
     ## Select appropriate compiler flags for both ASM/C/C++/linker
     AFLAGS_BASE   := -mcpu=cortex-m3 -mthumb
     CFLAGS_BASE   += -D_ARCH_CORTEXM3_STM32L1 $(CLOCK_FREQ) $(XRAM)          \
-                     -mcpu=cortex-m3 -mthumb -mfix-cortex-m3-ldrd            \
-                     $(OPT_OPTIMIZATION) -ffunction-sections -Wall -g -c
+                     -mcpu=cortex-m3 -mthumb $(OPT_OPTIMIZATION)             \
+                     -ffunction-sections -Wall -g -c
     CXXFLAGS_BASE += -D_ARCH_CORTEXM3_STM32L1 $(CLOCK_FREQ) $(XRAM)          \
                      $(OPT_EXCEPT) -mcpu=cortex-m3 -mthumb                   \
-                     -mfix-cortex-m3-ldrd $(OPT_OPTIMIZATION)                \
-                     -ffunction-sections -Wall -g -c
-    LFLAGS_BASE   := -mcpu=cortex-m3 -mthumb -mfix-cortex-m3-ldrd            \
-                     -Wl,--gc-sections,-Map,main.map                         \
+                     $(OPT_OPTIMIZATION) -ffunction-sections -Wall -g -c
+    LFLAGS_BASE   := -mcpu=cortex-m3 -mthumb -Wl,--gc-sections,-Map,main.map \
                      -Wl,-T./miosix/$(LINKER_SCRIPT) $(OPT_EXCEPT)           \
                      $(OPT_OPTIMIZATION) -nostdlib
 
diff --git a/miosix/config/miosix_settings.h b/miosix/config/miosix_settings.h
index 879e4424ba23ff24ee989c90f4a4da29d74a0f90..82760f0e21325adaec8722dd9cb1c954c512d052 100644
--- a/miosix/config/miosix_settings.h
+++ b/miosix/config/miosix_settings.h
@@ -69,7 +69,7 @@ namespace miosix {
 /// \def WITH_FILESYSTEM
 /// Allows to enable/disable filesystem support.
 /// By default it is defined (filesystem support is enabled)
-#define WITH_FILESYSTEM
+//#define WITH_FILESYSTEM
     
 /// \def SYNC_AFTER_WRITE
 /// Increases filesystem write robustness. After each write operation the
@@ -119,7 +119,7 @@ const unsigned char MAX_OPEN_FILES=8;
  * mode, so to use debugging it is necessary to disble sleep in the idle thread.
  * By default it is not defined (idle thread calls sleep).
  */
-//#define JTAG_DISABLE_SLEEP
+#define JTAG_DISABLE_SLEEP
 
 /// Minimum stack size (MUST be divisible by 4)
 const unsigned int STACK_MIN=256;
diff --git a/miosix/kernel/kernel.cpp b/miosix/kernel/kernel.cpp
index e4fd73b9c8bc12d9b0f76ec5be5d805069e5a9a4..49f3c26555b9a7dd8a872d113e86e4514847d2fc 100644
--- a/miosix/kernel/kernel.cpp
+++ b/miosix/kernel/kernel.cpp
@@ -181,7 +181,7 @@ void startKernel()
     //Fill watermark and stack
     memset(base, WATERMARK_FILL, WATERMARK_LEN);
     base+=WATERMARK_LEN/sizeof(unsigned int);
-    memset(base, STACK_FILL, STACK_IDLE);
+    memset(base, STACK_FILL, STACK_IDLE+CTXSAVE_ON_STACK);
 
     //On some architectures some registers are saved on the stack, therefore
     //initCtxsave *must* be called after filling the stack.
@@ -320,7 +320,10 @@ Thread *Thread::create(void *(*startfunc)(void *), unsigned int stacksize,
     //Fill watermark and stack
     memset(base, WATERMARK_FILL, WATERMARK_LEN);
     base+=WATERMARK_LEN/sizeof(unsigned int);
-    memset(base, STACK_FILL, stacksize);
+    //Note: cortex-M4 has two layouts for ctxsave-on-stack, depending on
+    //whether fp regs are used, and they differ in size, so fill the entire
+    //stack or memory profiling may fail
+    memset(base, STACK_FILL, stacksize+CTXSAVE_ON_STACK);
 
     //On some architectures some registers are saved on the stack, therefore
     //initCtxsave *must* be called after filling the stack.
diff --git a/miosix/kernel/syscalls.cpp b/miosix/kernel/syscalls.cpp
index 550da2fe4baa57c3999611ce9d3a4175ba5d8f1a..07b6e2490812c165249d621486f6e0e1f50ba8e5 100644
--- a/miosix/kernel/syscalls.cpp
+++ b/miosix/kernel/syscalls.cpp
@@ -91,18 +91,24 @@ void operator delete[](void *p) throw()
 
 /**
  * \internal
- * The default version of this function provided with libstdc++ requires
+ * The default version of these functions provided with libstdc++ require
  * exception support. This means that a program using pure virtual functions
  * incurs in the code size penalty of exception support even when compiling
- * without exceptions. By replacing the default implementation with this one the
- * problem is fixed.
+ * without exceptions. By replacing the default implementations with these one
+ * the problem is fixed.
  */
-extern "C" void __cxa_pure_virtual(void)
+extern "C" void __cxxabiv1::__cxa_pure_virtual(void)
 {
     miosix::errorLog("\r\n***Pure virtual method called\r\n");
     _exit(1);
 }
 
+extern "C" void __cxxabiv1::__cxa_deleted_virtual(void)
+{
+    miosix::errorLog("\r\n***Deleted virtual method called\r\n");
+    _exit(1);
+}
+
 /*
  * If not using exceptions, ovverride these functions with
  * an implementation that does not throw, to minimze code size
@@ -405,6 +411,11 @@ void *_sbrk_r(struct _reent *ptr, ptrdiff_t incr)
     return reinterpret_cast<void*>(prev_heap_end);
 }
 
+void *sbrk(ptrdiff_t incr)
+{
+    return _sbrk_r(_impure_ptr,incr);
+}
+
 /**
  * \internal
  * __malloc_lock, called by malloc to ensure no context switch happens during
@@ -433,7 +444,7 @@ void __malloc_unlock()
 
 /**
  * \internal
- * _open_r, Open a file
+ * _open_r, open a file
  */
 int _open_r(struct _reent *ptr, const char *name, int flags, int mode)
 {
@@ -446,9 +457,14 @@ int _open_r(struct _reent *ptr, const char *name, int flags, int mode)
     #endif //WITH_FILESYSTEM
 }
 
+int open(const char *name, int flags, ...)
+{
+    return _open_r(_impure_ptr,name,flags,0); //TODO: retrieve file mode
+}
+
 /**
  * \internal
- * _close_r, Close a file
+ * _close_r, close a file
  */
 int _close_r(struct _reent *ptr, int fd)
 {
@@ -461,11 +477,16 @@ int _close_r(struct _reent *ptr, int fd)
     #endif //WITH_FILESYSTEM
 }
 
+int close(int fd)
+{
+    return _close_r(_impure_ptr,fd);
+}
+
 /**
  * \internal
- * _write (for C++ library)
+ * _write_r, write to a file
  */
-int _write(int fd, const void *buf, size_t cnt)
+int _write_r(struct _reent *ptr, int fd, const void *buf, size_t cnt)
 { 
     switch(fd)
     {
@@ -506,20 +527,21 @@ int _write(int fd, const void *buf, size_t cnt)
     }
 }
 
-/**
- * \internal
- * _write_r, write data to files, or the standard output/ standard error
- */
-int _write_r(struct _reent *ptr, int fd, const void *buf, size_t cnt)
+int _write(int fd, const void *buf, size_t cnt)
+{
+    return _write_r(_impure_ptr,fd,buf,cnt);
+}
+
+int write(int fd, const void *buf, size_t cnt)
 {
-    return _write(fd,buf,cnt);
+    return _write_r(_impure_ptr,fd,buf,cnt);
 }
 
 /**
  * \internal
- * _read (for C++ library)
+ * _read_r, read from a file
  */
-int _read(int fd, void *buf, size_t cnt)
+int _read_r(struct _reent *ptr, int fd, void *buf, size_t cnt)
 {
     switch(fd)
     {
@@ -579,20 +601,21 @@ int _read(int fd, void *buf, size_t cnt)
     }
 }
 
-/**
- * \internal
- * _read_r, read data from files or the standard input
- */
-int _read_r(struct _reent *ptr, int fd, void *buf, size_t cnt)
+int _read(int fd, void *buf, size_t cnt)
+{
+    return _read_r(_impure_ptr,fd,buf,cnt);
+}
+
+int read(int fd, void *buf, size_t cnt)
 {
-    return _read(fd,buf,cnt);
+    return _read_r(_impure_ptr,fd,buf,cnt);
 }
 
 /**
  * \internal
- * _lseek (for C++ library)
+ * _lseek_r, move file pointer
  */
-off_t _lseek(int fd, off_t pos, int whence)
+off_t _lseek_r(struct _reent *ptr, int fd, off_t pos, int whence)
 {
     #ifdef WITH_FILESYSTEM
     return miosix::Filesystem::instance().lseekFile(fd,pos,whence);
@@ -602,20 +625,21 @@ off_t _lseek(int fd, off_t pos, int whence)
     #endif //WITH_FILESYSTEM
 }
 
-/**
- * \internal
- * _lseek_r, seek trough a file
- */
-off_t _lseek_r(struct _reent *ptr, int fd, off_t pos, int whence)
+off_t _lseek(int fd, off_t pos, int whence)
 {
-    return _lseek(fd,pos,whence);
+    return _lseek_r(_impure_ptr,fd,pos,whence);
+}
+
+off_t lseek(int fd, off_t pos, int whence)
+{
+    return _lseek_r(_impure_ptr,fd,pos,whence);
 }
 
 /**
  * \internal
- * _fstat (for C++ library)
+ * _fstat_r, return file info
  */
-int _fstat(int fd, struct stat *pstat)
+int _fstat_r(struct _reent *ptr, int fd, struct stat *pstat)
 {
     if(fd<0) return -1;
     if(fd<3)
@@ -633,13 +657,14 @@ int _fstat(int fd, struct stat *pstat)
     }
 }
 
-/**
- * \internal
- * _fstat_r, collect data about a file
- */
-int _fstat_r(struct _reent *ptr, int fd, struct stat *pstat)
+int _fstat(int fd, struct stat *pstat)
 {
-    return _fstat(fd,pstat);
+    return _fstat_r(_impure_ptr,fd,pstat);
+}
+
+int fstat(int fd, struct stat *pstat)
+{
+    return _fstat_r(_impure_ptr,fd,pstat);
 }
 
 /**
@@ -655,6 +680,16 @@ int _stat_r(struct _reent *ptr, const char *file, struct stat *pstat)
     #endif //WITH_FILESYSTEM
 }
 
+int _stat(const char *file, struct stat *pstat)
+{
+    return _stat_r(_impure_ptr,file,pstat);
+}
+
+int stat(const char *file, struct stat *pstat)
+{
+    return _stat_r(_impure_ptr,file,pstat);
+}
+
 /**
  * \internal
  * isatty, returns 1 if fd is associated with a terminal
@@ -797,6 +832,15 @@ int _wait_r(struct _reent *ptr, int *status)
     return -1;
 }
 
+/**
+ * \internal
+ * abort calls raise and then exit. Currently signals aren't supported. 
+ */
+int raise(int sig)
+{
+    return -1;
+}
+
 #ifdef __cplusplus
 }
 #endif
@@ -807,9 +851,6 @@ int _wait_r(struct _reent *ptr, int *status)
 // Check that newlib has been configured correctly
 // ===============================================
 
-#ifndef _WANT_REENT_SMALL
-#warning "_WANT_REENT_SMALL not defined"
-#endif //_WANT_REENT_SMALL
 
 #ifndef _REENT_SMALL
 #error "_REENT_SMALL not defined"
diff --git a/miosix/util/version.cpp b/miosix/util/version.cpp
index 76dd6ab0e9baca9864e3a6d0af8b078efe7c1222..25a5d7302726ea4a30d4422028519bda30acbb92 100644
--- a/miosix/util/version.cpp
+++ b/miosix/util/version.cpp
@@ -37,7 +37,7 @@ namespace miosix {
 #define AU
 #endif
 
-const char AU ver[]="Miosix v1.61 (" _MIOSIX ", " __DATE__ " " __TIME__ CV ")";
+const char AU ver[]="Miosix v1.61 (" _MIOSIX_BOARDNAME ", " __DATE__ " " __TIME__ CV ")";
 
 const char *getMiosixVersion()
 {
diff --git a/miosix_np_2/nbproject/private/private.xml b/miosix_np_2/nbproject/private/private.xml
index b7fe5780c3e4b636cb719a6eacbeee52f4664c28..191d3a5c503f776d825b71c0eb46ddff1b5aa69a 100644
--- a/miosix_np_2/nbproject/private/private.xml
+++ b/miosix_np_2/nbproject/private/private.xml
@@ -5,7 +5,7 @@
     </code-assistance-data>
     <data xmlns="http://www.netbeans.org/ns/make-project-private/1">
         <activeConfTypeElem>0</activeConfTypeElem>
-        <activeConfIndexElem>10</activeConfIndexElem>
+        <activeConfIndexElem>7</activeConfIndexElem>
     </data>
     <editor-bookmarks xmlns="http://www.netbeans.org/ns/editor-bookmarks/1"/>
 </project-private>