From 4af18987700162ba8e09f5df001e9319324f1778 Mon Sep 17 00:00:00 2001 From: Terraneo Federico <fede.tft@miosix.org> Date: Sun, 26 Mar 2023 11:40:32 +0200 Subject: [PATCH] Patch optimization --- miosix/kernel/kernel.cpp | 3 +- miosix/kernel/kernel.h | 2 - miosix/kernel/pthread.cpp | 69 +++++++++---------- miosix/kernel/pthread_private.h | 7 +- .../stdlib_integration/libc_integration.cpp | 53 ++------------ miosix/stdlib_integration/libc_integration.h | 44 +++++++++++- 6 files changed, 86 insertions(+), 92 deletions(-) diff --git a/miosix/kernel/kernel.cpp b/miosix/kernel/kernel.cpp index 57838a69..cb888ee1 100755 --- a/miosix/kernel/kernel.cpp +++ b/miosix/kernel/kernel.cpp @@ -384,12 +384,11 @@ bool Thread::testTerminate() void Thread::sleep(unsigned int ms) { - nanoSleep(mul32x32to64(ms,1000000)); + nanoSleepUntil(getTime()+mul32x32to64(ms,1000000)); } void Thread::nanoSleep(long long ns) { - if(ns<=0) return; nanoSleepUntil(getTime()+ns); } diff --git a/miosix/kernel/kernel.h b/miosix/kernel/kernel.h index f525b4c4..c7eb36af 100755 --- a/miosix/kernel/kernel.h +++ b/miosix/kernel/kernel.h @@ -420,8 +420,6 @@ struct SleepData; class MemoryProfiling; class Mutex; class ConditionVariable; -void IRQaddToSleepingList(SleepData *x); -void IRQremoveFromSleepingList(SleepData *x); #ifdef WITH_PROCESSES class ProcessBase; #endif //WITH_PROCESSES diff --git a/miosix/kernel/pthread.cpp b/miosix/kernel/pthread.cpp index f0dc18c2..e7360f01 100644 --- a/miosix/kernel/pthread.cpp +++ b/miosix/kernel/pthread.cpp @@ -38,15 +38,19 @@ #include "kernel.h" #include "error.h" #include "pthread_private.h" -#include "timeconversion.h" +#include "stdlib_integration/libc_integration.h" using namespace miosix; +namespace miosix { +void IRQaddToSleepingList(SleepData *x); +void IRQremoveFromSleepingList(SleepData *x); +} //namespace miosix + // // Newlib's pthread.h has been patched since Miosix 1.68 to contain a definition // for pthread_mutex_t and pthread_cond_t that allows a fast implementation -// of mutexes and condition variables. This *requires* to use gcc 4.5.2 with -// Miosix specific patches. +// of mutexes and condition variables. This *requires* to use an up-to-date gcc. // //These functions needs to be callable from C @@ -289,35 +293,30 @@ int pthread_mutex_unlock(pthread_mutex_t *mutex) // Condition variable API // +static_assert(sizeof(IntrusiveList<CondData>)==sizeof(pthread_cond_t),"Invalid pthread_cond_t size"); + int pthread_cond_init(pthread_cond_t *cond, const pthread_condattr_t *attr) { //attr is currently not considered - static_assert(sizeof(IntrusiveList<CondData>)==sizeof(*cond), "Invalid pthread_cond_t size"); - - // Using placement mechanism to instantiate an IntrusiveList inside the cond variable - new (cond) IntrusiveList<CondData>; + new (cond) IntrusiveList<CondData>; //Placement new as cond is a C type return 0; } int pthread_cond_destroy(pthread_cond_t *cond) { - static_assert(sizeof(IntrusiveList<CondData>)==sizeof(*cond), "Invalid pthread_cond_t size"); - auto *condList = reinterpret_cast<IntrusiveList<CondData>*>(cond); - + auto *condList=reinterpret_cast<IntrusiveList<CondData>*>(cond); if(!condList->empty()) return EBUSY; - // Placement destroy - condList->~IntrusiveList<CondData>(); + condList->~IntrusiveList<CondData>(); //Call destructor manually return 0; } int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex) { - static_assert(sizeof(IntrusiveList<CondData>)==sizeof(*cond), "Invalid pthread_cond_t size"); auto *condList=reinterpret_cast<IntrusiveList<CondData>*>(cond); FastInterruptDisableLock dLock; Thread *p=Thread::IRQgetCurrentThread(); - CondData listItem; //Element of a linked list on stack + CondData listItem; listItem.thread=p; condList->push_back(&listItem); //Putting this thread last on the list (lifo policy) p->flags.IRQsetCondWait(true); @@ -331,21 +330,24 @@ int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex) return 0; } -int pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex, const struct timespec *abstime) +int pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex, const struct timespec *abstime) { - if (abstime->tv_nsec<=0) - return EINVAL; - static_assert(sizeof(IntrusiveList<CondData>)==sizeof(*cond), "Invalid pthread_cond_t size"); auto *condList=reinterpret_cast<IntrusiveList<CondData>*>(cond); - + + long long timeout=timespec2ll(abstime); + //Disallow absolute sleeps with negative or too low values, as the ns2tick() + //algorithm in TimeConversion can't handle negative values and may undeflow + //even with very low values due to a negative adjustOffsetNs. As an unlikely + //side effect, very shor sleeps done very early at boot will be extended. + timeout=std::max(timeout,100000LL); FastInterruptDisableLock dLock; Thread *p=Thread::IRQgetCurrentThread(); - CondData listItem; //Element of a linked list on stack + CondData listItem; listItem.thread=p; condList->push_back(&listItem); //Putting this thread last on the list (lifo policy) - SleepData sleepData; //Element to put in the sleepingList + SleepData sleepData; sleepData.p=p; - sleepData.wakeupTime=mul32x32to64(abstime->tv_sec, 1000000000) + abstime->tv_nsec; + sleepData.wakeupTime=timeout; IRQaddToSleepingList(&sleepData); //Putting this thread on the sleeping list too p->flags.IRQsetCondWait(true); @@ -354,8 +356,9 @@ int pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex, const s FastInterruptEnableLock eLock(dLock); Thread::yield(); //Here the wait becomes effective } - //Ensure that the thread is removed from both list, as it can be woken by either - //a signal/broadcast (that removes it from condList) or by IRQwakeThreads (that removes it from sleeping list). + //Ensure that the thread is removed from both list, as it can be woken by + //either a signal/broadcast (that removes it from condList) or by + //IRQwakeThreads (that removes it from sleeping list). bool removed=condList->removeFast(&listItem); IRQremoveFromSleepingList(&sleepData); @@ -368,7 +371,6 @@ int pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex, const s int pthread_cond_signal(pthread_cond_t *cond) { - static_assert(sizeof(IntrusiveList<CondData>)==sizeof(*cond), "Invalid pthread_cond_t size"); auto *condList=reinterpret_cast<IntrusiveList<CondData>*>(cond); #ifdef SCHED_TYPE_EDF @@ -379,13 +381,12 @@ int pthread_cond_signal(pthread_cond_t *cond) if(condList->empty()) return 0; Thread *t=condList->front()->thread; - t->flags.IRQsetCondWait(false); condList->pop_front(); - //Need to reset the sleep flag to ensure wakeup of threads in timedwait - t->flags.IRQsetSleep(false); + t->flags.IRQsetCondWait(false); + t->flags.IRQsetSleep(false); //Needed due to timedwait #ifdef SCHED_TYPE_EDF - if(t->IRQgetPriority() >Thread::IRQgetCurrentThread()->IRQgetPriority()) + if(t->IRQgetPriority()>Thread::IRQgetCurrentThread()->IRQgetPriority()) hppw=true; #endif //SCHED_TYPE_EDF } @@ -398,7 +399,6 @@ int pthread_cond_signal(pthread_cond_t *cond) int pthread_cond_broadcast(pthread_cond_t *cond) { - static_assert(sizeof(IntrusiveList<CondData>)==sizeof(*cond), "Invalid pthread_cond_t size"); auto *condList=reinterpret_cast<IntrusiveList<CondData>*>(cond); #ifdef SCHED_TYPE_EDF @@ -409,14 +409,13 @@ int pthread_cond_broadcast(pthread_cond_t *cond) while(!condList->empty()) { Thread *t=condList->front()->thread; - t->flags.IRQsetCondWait(false); condList->pop_front(); - //Need to reset the sleep flag to ensure wakeup of threads in timedwait - t->flags.IRQsetSleep(false); + t->flags.IRQsetCondWait(false); + t->flags.IRQsetSleep(false); //Needed due to timedwait #ifdef SCHED_TYPE_EDF - if(t->IRQgetPriority() > - Thread::IRQgetCurrentThread()->IRQgetPriority()) hppw=true; + if(t->IRQgetPriority()>Thread::IRQgetCurrentThread()->IRQgetPriority()) + hppw=true; #endif //SCHED_TYPE_EDF } } diff --git a/miosix/kernel/pthread_private.h b/miosix/kernel/pthread_private.h index b45019d6..a690d230 100644 --- a/miosix/kernel/pthread_private.h +++ b/miosix/kernel/pthread_private.h @@ -219,13 +219,12 @@ static inline unsigned int IRQdoMutexUnlockAllDepthLevels(pthread_mutex_t *mutex /** * \internal * \struct CondData - * This struct is used to make a list of threads that are waiting on a condition variable. - * It is used by the kernel, and should not be used by end users. + * This struct is used to make a list of threads that are waiting on a condition + * variable. It is used by the kernel, and should not be used by end users. */ struct CondData : public IntrusiveListItem { - ///\internal Thread that is waiting - Thread *thread; + Thread *thread; ///<\internal Thread that is waiting }; } //namespace miosix diff --git a/miosix/stdlib_integration/libc_integration.cpp b/miosix/stdlib_integration/libc_integration.cpp index 769fa304..444ff091 100644 --- a/miosix/stdlib_integration/libc_integration.cpp +++ b/miosix/stdlib_integration/libc_integration.cpp @@ -1,6 +1,5 @@ /*************************************************************************** - * Copyright (C) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 * - * by Terraneo Federico * + * Copyright (C) 2008-2023 by Terraneo Federico * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * @@ -34,10 +33,9 @@ #include <unistd.h> #include <dirent.h> #include <reent.h> -#include <sys/time.h> -#include <sys/times.h> #include <sys/stat.h> #include <sys/fcntl.h> +#include <sys/times.h> //// Settings #include "config/miosix_settings.h" //// Filesystem @@ -899,52 +897,11 @@ int getdents(unsigned int fd, struct dirent *dirp, unsigned int count) * - timer_create -> ? */ -static constexpr int nsPerSec = 1000000000; - -/** - * Convert from timespec to the Miosix representation of time - * \param tp input timespec, must not be nullptr and be a valid pointer - * \return Miosix nanoseconds - */ -inline long long timespec2ll(const struct timespec *tp) -{ - //NOTE: the cast is required to prevent overflow with older versions - //of the Miosix compiler where tv_sec is int and not long long - return static_cast<long long>(tp->tv_sec) * nsPerSec + tp->tv_nsec; -} - -/** - * Convert from the Miosix representation of time to a timespec - * \param ns input Miosix nanoseconds - * \param tp output timespec, must not be nullptr and be a valid pointer - */ -inline void ll2timespec(long long ns, struct timespec *tp) -{ - #ifdef __ARM_EABI__ - // Despite there being a single intrinsic, __aeabi_ldivmod, that computes - // both the result of the / and % operator, GCC 9.2.0 isn't smart enough and - // calls the intrinsic twice. This asm implementation takes ~188 cycles - // instead of ~316 by calling it once. Sadly, I had to use asm as the - // calling conventions of the intrinsic appear to be nonstandard. - // NOTE: actually a and b, by being 64 bit numbers, occupy register pairs - register long long a asm("r0") = ns; - register long long b asm("r2") = nsPerSec; - // NOTE: clobbering lr to mark function not leaf due to the bl - asm volatile("bl __aeabi_ldivmod" : "+r"(a), "+r"(b) :: "lr"); - tp->tv_sec = a; - tp->tv_nsec = static_cast<long>(b); - #else //__ARM_EABI__ - #warning Warning POSIX time API not optimized for this platform - tp->tv_sec = ns / nsPerSec; - tp->tv_nsec = static_cast<long>(ns % nsPerSec); - #endif //__ARM_EABI__ -} - int clock_gettime(clockid_t clock_id, struct timespec *tp) { if(tp==nullptr) return -1; //TODO: support CLOCK_REALTIME - ll2timespec(miosix::getTime(),tp); + miosix::ll2timespec(miosix::getTime(),tp); return 0; } @@ -960,7 +917,7 @@ int clock_getres(clockid_t clock_id, struct timespec *res) //TODO: support CLOCK_REALTIME //Integer division with round-to-nearest for better accuracy - int resolution=2*nsPerSec/miosix::internal::osTimerGetFrequency(); + int resolution=2*miosix::nsPerSec/miosix::internal::osTimerGetFrequency(); resolution=(resolution & 1) ? resolution/2+1 : resolution/2; res->tv_sec=0; @@ -973,7 +930,7 @@ int clock_nanosleep(clockid_t clock_id, int flags, { if(req==nullptr) return -1; //TODO: support CLOCK_REALTIME - long long timeNs=timespec2ll(req); + long long timeNs=miosix::timespec2ll(req); if(flags!=TIMER_ABSTIME) timeNs+=miosix::getTime(); miosix::Thread::nanoSleepUntil(timeNs); return 0; diff --git a/miosix/stdlib_integration/libc_integration.h b/miosix/stdlib_integration/libc_integration.h index abff8245..16df08f8 100644 --- a/miosix/stdlib_integration/libc_integration.h +++ b/miosix/stdlib_integration/libc_integration.h @@ -1,5 +1,5 @@ /*************************************************************************** - * Copyright (C) 2008, 2009, 2010, 2011, 2012, 2013 by Terraneo Federico * + * Copyright (C) 2008-2023 by Terraneo Federico * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * @@ -31,6 +31,7 @@ #include <reent.h> #include <cstdlib> #include <cstring> +#include <sys/time.h> #ifndef COMPILING_MIOSIX #error "This is header is private, it can't be used outside Miosix itself." @@ -56,6 +57,47 @@ unsigned int getMaxHeap(); */ void setCReentrancyCallback(struct _reent *(*callback)()); +static constexpr int nsPerSec = 1000000000; + +/** + * Convert from timespec to the Miosix representation of time + * \param tp input timespec, must not be nullptr and be a valid pointer + * \return Miosix nanoseconds + */ +inline long long timespec2ll(const struct timespec *tp) +{ + //NOTE: the cast is required to prevent overflow with older versions + //of the Miosix compiler where tv_sec is int and not long long + return static_cast<long long>(tp->tv_sec) * nsPerSec + tp->tv_nsec; +} + +/** + * Convert from the Miosix representation of time to a timespec + * \param ns input Miosix nanoseconds + * \param tp output timespec, must not be nullptr and be a valid pointer + */ +inline void ll2timespec(long long ns, struct timespec *tp) +{ + #ifdef __ARM_EABI__ + // Despite there being a single intrinsic, __aeabi_ldivmod, that computes + // both the result of the / and % operator, GCC 9.2.0 isn't smart enough and + // calls the intrinsic twice. This asm implementation takes ~188 cycles + // instead of ~316 by calling it once. Sadly, I had to use asm as the + // calling conventions of the intrinsic appear to be nonstandard. + // NOTE: actually a and b, by being 64 bit numbers, occupy register pairs + register long long a asm("r0") = ns; + register long long b asm("r2") = nsPerSec; + // NOTE: clobbering lr to mark function not leaf due to the bl + asm volatile("bl __aeabi_ldivmod" : "+r"(a), "+r"(b) :: "lr"); + tp->tv_sec = a; + tp->tv_nsec = static_cast<long>(b); + #else //__ARM_EABI__ + #warning Warning POSIX time API not optimized for this platform + tp->tv_sec = ns / nsPerSec; + tp->tv_nsec = static_cast<long>(ns % nsPerSec); + #endif //__ARM_EABI__ +} + } //namespace miosix #endif //LIBC_INTEGRATION_H -- GitLab