删除stm32代码

2026-05-07 00:04:41 +08:00 · 2021-09-12 18:06:07 +08:00
parent 7b96b852af
commit fa91c71ce1
85 changed files with 0 additions and 15332 deletions
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/LICENSE.txt
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/LICENSE.txt
@@ -1,201 +0,0 @@
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/
   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
   1. Definitions.
      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.
      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.
      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.
      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.
      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.
      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.
      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).
      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.
      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."
      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.
   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.
   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.
   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:
      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and
      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and
      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and
      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.
      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.
   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.
   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.
   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.
   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.
   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.
   END OF TERMS AND CONDITIONS
   APPENDIX: How to apply the Apache License to your work.
      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "{}"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.
   Copyright {yyyy} {name of copyright owner}
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at
       http://www.apache.org/licenses/LICENSE-2.0
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Examples/ARM/arm_nn_examples/cifar10/RTE/Compiler/EventRecorderConf.h
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Examples/ARM/arm_nn_examples/cifar10/RTE/Compiler/EventRecorderConf.h
@@ -1,44 +0,0 @@
 /*------------------------------------------------------------------------------
 * MDK - Component ::Event Recorder
 * Copyright (c) 2016 ARM Germany GmbH. All rights reserved.
 *------------------------------------------------------------------------------
 * Name:    EventRecorderConf.h
 * Purpose: Event Recorder Configuration
 * Rev.:    V1.0.0
 *----------------------------------------------------------------------------*/
 //-------- <<< Use Configuration Wizard in Context Menu >>> --------------------
 // <h>Event Recorder
 //   <o>Number of Records
 //     <8=>8 <16=>16 <32=>32 <64=>64 <128=>128 <256=>256 <512=>512 <1024=>1024
 //     <2048=>2048 <4096=>4096 <8192=>8192 <16384=>16384 <32768=>32768
 //     <65536=>65536 <131072=>131072 <262144=>262144 <524288=>524288
 //     <1048576=>1048576
 //   <i>Configure size of Event Record Buffer (each record is 16 bytes)
 //   <i>Must be 2^n (min=8, max=1048576)
 #define EVENT_RECORD_COUNT      64U
 //   <o>Time Stamp Source
 //      <0=> DWT Cycle Counter  <1=> SysTick
 //      <3=> User Timer (Normal Reset)  <4=> User Timer (Power-On Reset)
 //   <i>Selects source for 32-bit time stamp
 #define EVENT_TIMESTAMP_SOURCE  1
 //   <h>SysTick Configuration
 //   <i>Configure values when Time Stamp Source is set to SysTick
 //     <o>SysTick Input Clock Frequency [Hz] <1-1000000000>
 //     <i>Defines SysTick input clock (typical identical with processor clock)
 #define SYSTICK_CLOCK           100000000U
 //     <o>SysTick Interrupt Period [us] <1-1000000000>
 //     <i>Defines time period of the SysTick timer interrupt
 #define SYSTICK_PERIOD_US       1000U
 //   </h>
 // </h>
 //------------- <<< end of configuration section >>> ---------------------------
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Examples/ARM/arm_nn_examples/cifar10/RTE/_ARMCM0/RTE_Components.h
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Examples/ARM/arm_nn_examples/cifar10/RTE/_ARMCM0/RTE_Components.h
@@ -1,24 +0,0 @@
 /*
 * Auto generated Run-Time-Environment Component Configuration File
 *      *** Do not modify ! ***
 *
 * Project: 'arm_nnexamples_cifar10' 
 * Target:  'ARMCM0' 
 */
 #ifndef RTE_COMPONENTS_H
 #define RTE_COMPONENTS_H
 /*
 * Define the Device Header File: 
 */
 #define CMSIS_device_header "ARMCM0.h"
 #define RTE_Compiler_EventRecorder
          #define RTE_Compiler_EventRecorder_DAP
 #define RTE_Compiler_IO_STDOUT          /* Compiler I/O: STDOUT */
          #define RTE_Compiler_IO_STDOUT_EVR      /* Compiler I/O: STDOUT EVR */
 #endif /* RTE_COMPONENTS_H */
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Examples/ARM/arm_nn_examples/cifar10/RTE/_ARMCM3/RTE_Components.h
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Examples/ARM/arm_nn_examples/cifar10/RTE/_ARMCM3/RTE_Components.h
@@ -1,22 +0,0 @@
 /*
 * Auto generated Run-Time-Environment Component Configuration File
 *      *** Do not modify ! ***
 *
 * Project: 'arm_nnexamples_cifar10' 
 * Target:  'ARMCM3' 
 */
 #ifndef RTE_COMPONENTS_H
 #define RTE_COMPONENTS_H
 /*
 * Define the Device Header File: 
 */
 #define CMSIS_device_header "ARMCM3.h"
 #define RTE_Compiler_IO_STDOUT          /* Compiler I/O: STDOUT */
          #define RTE_Compiler_IO_STDOUT_ITM      /* Compiler I/O: STDOUT ITM */
 #endif /* RTE_COMPONENTS_H */
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Examples/ARM/arm_nn_examples/cifar10/RTE/_ARMCM4_FP/RTE_Components.h
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Examples/ARM/arm_nn_examples/cifar10/RTE/_ARMCM4_FP/RTE_Components.h
@@ -1,22 +0,0 @@
 /*
 * Auto generated Run-Time-Environment Component Configuration File
 *      *** Do not modify ! ***
 *
 * Project: 'arm_nnexamples_cifar10' 
 * Target:  'ARMCM4_FP' 
 */
 #ifndef RTE_COMPONENTS_H
 #define RTE_COMPONENTS_H
 /*
 * Define the Device Header File: 
 */
 #define CMSIS_device_header "ARMCM4_FP.h"
 #define RTE_Compiler_IO_STDOUT          /* Compiler I/O: STDOUT */
          #define RTE_Compiler_IO_STDOUT_ITM      /* Compiler I/O: STDOUT ITM */
 #endif /* RTE_COMPONENTS_H */
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Examples/ARM/arm_nn_examples/cifar10/RTE/_ARMCM7_SP/RTE_Components.h
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Examples/ARM/arm_nn_examples/cifar10/RTE/_ARMCM7_SP/RTE_Components.h
@@ -1,22 +0,0 @@
 /*
 * Auto generated Run-Time-Environment Component Configuration File
 *      *** Do not modify ! ***
 *
 * Project: 'arm_nnexamples_cifar10' 
 * Target:  'ARMCM7_SP' 
 */
 #ifndef RTE_COMPONENTS_H
 #define RTE_COMPONENTS_H
 /*
 * Define the Device Header File: 
 */
 #define CMSIS_device_header "ARMCM7_SP.h"
 #define RTE_Compiler_IO_STDOUT          /* Compiler I/O: STDOUT */
          #define RTE_Compiler_IO_STDOUT_ITM      /* Compiler I/O: STDOUT ITM */
 #endif /* RTE_COMPONENTS_H */
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Examples/ARM/arm_nn_examples/cifar10/arm_nnexamples_cifar10.cpp
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Examples/ARM/arm_nn_examples/cifar10/arm_nnexamples_cifar10.cpp
@@ -1,196 +0,0 @@
 /* ----------------------------------------------------------------------
 * Copyright (C) 2010-2018 Arm Limited. All rights reserved.
 *
 *
 * Project:       CMSIS NN Library
 * Title:         arm_nnexamples_cifar10.cpp
 *
 * Description:   Convolutional Neural Network Example
 *
 * Target Processor: Cortex-M4/Cortex-M7
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *   - Redistributions of source code must retain the above copyright
 *     notice, this list of conditions and the following disclaimer.
 *   - Redistributions in binary form must reproduce the above copyright
 *     notice, this list of conditions and the following disclaimer in
 *     the documentation and/or other materials provided with the
 *     distribution.
 *   - Neither the name of Arm LIMITED nor the names of its contributors
 *     may be used to endorse or promote products derived from this
 *     software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 * -------------------------------------------------------------------- */
 /**
 * @ingroup groupExamples
 */
 /**
 * @defgroup CNNExample Convolutional Neural Network Example
 *
 * \par Description:
 * \par
 * Demonstrates a convolutional neural network (CNN) example with the use of convolution,
 * ReLU activation, pooling and fully-connected functions.
 *
 * \par Model definition:
 * \par
 * The CNN used in this example is based on CIFAR-10 example from Caffe [1]. 
 * The neural network consists
 * of 3 convolution layers interspersed by ReLU activation and max pooling layers, followed by a 
 * fully-connected layer at the end. The input to the network is a 32x32 pixel color image, which will 
 * be classified into one of the 10 output classes. 
 * This example model implementation needs 32.3 KB to store weights, 40 KB for activations and 
 * 3.1 KB for storing the \c im2col data.
 *
 * \image html CIFAR10_CNN.gif "Neural Network model definition"
 *
 * \par Variables Description:
 * \par
 * \li \c conv1_wt, \c conv2_wt, \c conv3_wt are convolution layer weight matrices
 * \li \c conv1_bias, \c conv2_bias, \c conv3_bias are convolution layer bias arrays
 * \li \c ip1_wt, ip1_bias point to fully-connected layer weights and biases
 * \li \c input_data points to the input image data
 * \li \c output_data points to the classification output
 * \li \c col_buffer is a buffer to store the \c im2col output
 * \li \c scratch_buffer is used to store the activation data (intermediate layer outputs)
 *
 * \par CMSIS DSP Software Library Functions Used:
 * \par
 * - arm_convolve_HWC_q7_RGB()
 * - arm_convolve_HWC_q7_fast()
 * - arm_relu_q7()
 * - arm_maxpool_q7_HWC()
 * - arm_avepool_q7_HWC()
 * - arm_fully_connected_q7_opt()
 * - arm_fully_connected_q7()
 *
 * <b> Refer  </b>
 * \link arm_nnexamples_cifar10.cpp \endlink
 *
 * \par [1] https://github.com/BVLC/caffe
 */
 #include <stdint.h>
 #include <stdio.h>
 #include "arm_math.h"
 #include "arm_nnexamples_cifar10_parameter.h"
 #include "arm_nnexamples_cifar10_weights.h"
 #include "arm_nnfunctions.h"
 #include "arm_nnexamples_cifar10_inputs.h"
 #ifdef _RTE_
 #include "RTE_Components.h"
 #ifdef RTE_Compiler_EventRecorder
 #include "EventRecorder.h"
 #endif
 #endif
 // include the input and weights
 static q7_t conv1_wt[CONV1_IM_CH * CONV1_KER_DIM * CONV1_KER_DIM * CONV1_OUT_CH] = CONV1_WT;
 static q7_t conv1_bias[CONV1_OUT_CH] = CONV1_BIAS;
 static q7_t conv2_wt[CONV2_IM_CH * CONV2_KER_DIM * CONV2_KER_DIM * CONV2_OUT_CH] = CONV2_WT;
 static q7_t conv2_bias[CONV2_OUT_CH] = CONV2_BIAS;
 static q7_t conv3_wt[CONV3_IM_CH * CONV3_KER_DIM * CONV3_KER_DIM * CONV3_OUT_CH] = CONV3_WT;
 static q7_t conv3_bias[CONV3_OUT_CH] = CONV3_BIAS;
 static q7_t ip1_wt[IP1_DIM * IP1_OUT] = IP1_WT;
 static q7_t ip1_bias[IP1_OUT] = IP1_BIAS;
 /* Here the image_data should be the raw uint8 type RGB image in [RGB, RGB, RGB ... RGB] format */
 uint8_t   image_data[CONV1_IM_CH * CONV1_IM_DIM * CONV1_IM_DIM] = IMG_DATA;
 q7_t      output_data[IP1_OUT];
 //vector buffer: max(im2col buffer,average pool buffer, fully connected buffer)
 q7_t      col_buffer[2 * 5 * 5 * 32 * 2];
 q7_t      scratch_buffer[32 * 32 * 10 * 4];
 int main()
 {
  #ifdef RTE_Compiler_EventRecorder
  EventRecorderInitialize (EventRecordAll, 1);  // initialize and start Event Recorder
  #endif
  printf("start execution\n");
  /* start the execution */
  q7_t     *img_buffer1 = scratch_buffer;
  q7_t     *img_buffer2 = img_buffer1 + 32 * 32 * 32;
  /* input pre-processing */
  int mean_data[3] = INPUT_MEAN_SHIFT;
  unsigned int scale_data[3] = INPUT_RIGHT_SHIFT;
  for (int i=0;i<32*32*3; i+=3) {
    img_buffer2[i] =   (q7_t)__SSAT( ((((int)image_data[i]   - mean_data[0])<<7) + (0x1<<(scale_data[0]-1)))
                             >> scale_data[0], 8);
    img_buffer2[i+1] = (q7_t)__SSAT( ((((int)image_data[i+1] - mean_data[1])<<7) + (0x1<<(scale_data[1]-1)))
                             >> scale_data[1], 8);
    img_buffer2[i+2] = (q7_t)__SSAT( ((((int)image_data[i+2] - mean_data[2])<<7) + (0x1<<(scale_data[2]-1)))
                             >> scale_data[2], 8);
  }
  // conv1 img_buffer2 -> img_buffer1
  arm_convolve_HWC_q7_RGB(img_buffer2, CONV1_IM_DIM, CONV1_IM_CH, conv1_wt, CONV1_OUT_CH, CONV1_KER_DIM, CONV1_PADDING,
                          CONV1_STRIDE, conv1_bias, CONV1_BIAS_LSHIFT, CONV1_OUT_RSHIFT, img_buffer1, CONV1_OUT_DIM,
                          (q15_t *) col_buffer, NULL);
  arm_relu_q7(img_buffer1, CONV1_OUT_DIM * CONV1_OUT_DIM * CONV1_OUT_CH);
  // pool1 img_buffer1 -> img_buffer2
  arm_maxpool_q7_HWC(img_buffer1, CONV1_OUT_DIM, CONV1_OUT_CH, POOL1_KER_DIM,
                     POOL1_PADDING, POOL1_STRIDE, POOL1_OUT_DIM, NULL, img_buffer2);
  // conv2 img_buffer2 -> img_buffer1
  arm_convolve_HWC_q7_fast(img_buffer2, CONV2_IM_DIM, CONV2_IM_CH, conv2_wt, CONV2_OUT_CH, CONV2_KER_DIM,
                           CONV2_PADDING, CONV2_STRIDE, conv2_bias, CONV2_BIAS_LSHIFT, CONV2_OUT_RSHIFT, img_buffer1,
                           CONV2_OUT_DIM, (q15_t *) col_buffer, NULL);
  arm_relu_q7(img_buffer1, CONV2_OUT_DIM * CONV2_OUT_DIM * CONV2_OUT_CH);
  // pool2 img_buffer1 -> img_buffer2
  arm_maxpool_q7_HWC(img_buffer1, CONV2_OUT_DIM, CONV2_OUT_CH, POOL2_KER_DIM,
                     POOL2_PADDING, POOL2_STRIDE, POOL2_OUT_DIM, col_buffer, img_buffer2);
 // conv3 img_buffer2 -> img_buffer1
  arm_convolve_HWC_q7_fast(img_buffer2, CONV3_IM_DIM, CONV3_IM_CH, conv3_wt, CONV3_OUT_CH, CONV3_KER_DIM,
                           CONV3_PADDING, CONV3_STRIDE, conv3_bias, CONV3_BIAS_LSHIFT, CONV3_OUT_RSHIFT, img_buffer1,
                           CONV3_OUT_DIM, (q15_t *) col_buffer, NULL);
  arm_relu_q7(img_buffer1, CONV3_OUT_DIM * CONV3_OUT_DIM * CONV3_OUT_CH);
  // pool3 img_buffer-> img_buffer2
  arm_maxpool_q7_HWC(img_buffer1, CONV3_OUT_DIM, CONV3_OUT_CH, POOL3_KER_DIM,
                     POOL3_PADDING, POOL3_STRIDE, POOL3_OUT_DIM, col_buffer, img_buffer2);
  arm_fully_connected_q7_opt(img_buffer2, ip1_wt, IP1_DIM, IP1_OUT, IP1_BIAS_LSHIFT, IP1_OUT_RSHIFT, ip1_bias,
                             output_data, (q15_t *) img_buffer1);
  arm_softmax_q7(output_data, 10, output_data);
  for (int i = 0; i < 10; i++)
  {
      printf("%d: %d\n", i, output_data[i]);
  }
  return 0;
 }
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Examples/ARM/arm_nn_examples/cifar10/arm_nnexamples_cifar10_inputs.h
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Examples/ARM/arm_nn_examples/cifar10/arm_nnexamples_cifar10_inputs.h
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Examples/ARM/arm_nn_examples/cifar10/arm_nnexamples_cifar10_parameter.h
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Examples/ARM/arm_nn_examples/cifar10/arm_nnexamples_cifar10_parameter.h
@@ -1,43 +0,0 @@
 #define CONV1_IM_DIM 32
 #define CONV1_IM_CH 3
 #define CONV1_KER_DIM 5
 #define CONV1_PADDING 2
 #define CONV1_STRIDE 1
 #define CONV1_OUT_CH 32
 #define CONV1_OUT_DIM 32
 #define POOL1_KER_DIM 3
 #define POOL1_STRIDE 2
 #define POOL1_PADDING 0
 #define POOL1_OUT_DIM 16
 #define CONV2_IM_DIM 16
 #define CONV2_IM_CH 32
 #define CONV2_KER_DIM 5
 #define CONV2_PADDING 2
 #define CONV2_STRIDE 1
 #define CONV2_OUT_CH 16
 #define CONV2_OUT_DIM 16
 #define POOL2_KER_DIM 3
 #define POOL2_STRIDE 2
 #define POOL2_PADDING 0
 #define POOL2_OUT_DIM 8
 #define CONV3_IM_DIM 8
 #define CONV3_IM_CH 16
 #define CONV3_KER_DIM 5
 #define CONV3_PADDING 2
 #define CONV3_STRIDE 1
 #define CONV3_OUT_CH 32
 #define CONV3_OUT_DIM 8
 #define POOL3_KER_DIM 3
 #define POOL3_STRIDE 2
 #define POOL3_PADDING 0
 #define POOL3_OUT_DIM 4
 #define IP1_DIM 4*4*32
 #define IP1_IM_DIM 4
 #define IP1_IM_CH 32
 #define IP1_OUT 10
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Examples/ARM/arm_nn_examples/cifar10/arm_nnexamples_cifar10_weights.h
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Examples/ARM/arm_nn_examples/cifar10/arm_nnexamples_cifar10_weights.h
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Examples/ARM/arm_nn_examples/cifar10/readme.txt
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Examples/ARM/arm_nn_examples/cifar10/readme.txt
@@ -1,4 +0,0 @@
 CMSIS NN Lib example arm_nnexample_cifar10 for
  Cortex-M4 and Cortex-M7.
 The example is configured for uVision Simulator.
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Examples/ARM/arm_nn_examples/gru/RTE/Compiler/EventRecorderConf.h
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Examples/ARM/arm_nn_examples/gru/RTE/Compiler/EventRecorderConf.h
@@ -1,44 +0,0 @@
 /*------------------------------------------------------------------------------
 * MDK - Component ::Event Recorder
 * Copyright (c) 2016 ARM Germany GmbH. All rights reserved.
 *------------------------------------------------------------------------------
 * Name:    EventRecorderConf.h
 * Purpose: Event Recorder Configuration
 * Rev.:    V1.0.0
 *----------------------------------------------------------------------------*/
 //-------- <<< Use Configuration Wizard in Context Menu >>> --------------------
 // <h>Event Recorder
 //   <o>Number of Records
 //     <8=>8 <16=>16 <32=>32 <64=>64 <128=>128 <256=>256 <512=>512 <1024=>1024
 //     <2048=>2048 <4096=>4096 <8192=>8192 <16384=>16384 <32768=>32768
 //     <65536=>65536 <131072=>131072 <262144=>262144 <524288=>524288
 //     <1048576=>1048576
 //   <i>Configure size of Event Record Buffer (each record is 16 bytes)
 //   <i>Must be 2^n (min=8, max=1048576)
 #define EVENT_RECORD_COUNT      64U
 //   <o>Time Stamp Source
 //      <0=> DWT Cycle Counter  <1=> SysTick
 //      <3=> User Timer (Normal Reset)  <4=> User Timer (Power-On Reset)
 //   <i>Selects source for 32-bit time stamp
 #define EVENT_TIMESTAMP_SOURCE  1
 //   <h>SysTick Configuration
 //   <i>Configure values when Time Stamp Source is set to SysTick
 //     <o>SysTick Input Clock Frequency [Hz] <1-1000000000>
 //     <i>Defines SysTick input clock (typical identical with processor clock)
 #define SYSTICK_CLOCK           100000000U
 //     <o>SysTick Interrupt Period [us] <1-1000000000>
 //     <i>Defines time period of the SysTick timer interrupt
 #define SYSTICK_PERIOD_US       1000U
 //   </h>
 // </h>
 //------------- <<< end of configuration section >>> ---------------------------
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Examples/ARM/arm_nn_examples/gru/RTE/_ARMCM0/RTE_Components.h
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Examples/ARM/arm_nn_examples/gru/RTE/_ARMCM0/RTE_Components.h
@@ -1,24 +0,0 @@
 /*
 * Auto generated Run-Time-Environment Component Configuration File
 *      *** Do not modify ! ***
 *
 * Project: 'arm_nnexamples_gru' 
 * Target:  'ARMCM0' 
 */
 #ifndef RTE_COMPONENTS_H
 #define RTE_COMPONENTS_H
 /*
 * Define the Device Header File: 
 */
 #define CMSIS_device_header "ARMCM0.h"
 #define RTE_Compiler_EventRecorder
          #define RTE_Compiler_EventRecorder_DAP
 #define RTE_Compiler_IO_STDOUT          /* Compiler I/O: STDOUT */
          #define RTE_Compiler_IO_STDOUT_EVR      /* Compiler I/O: STDOUT EVR */
 #endif /* RTE_COMPONENTS_H */
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Examples/ARM/arm_nn_examples/gru/RTE/_ARMCM3/RTE_Components.h
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Examples/ARM/arm_nn_examples/gru/RTE/_ARMCM3/RTE_Components.h
@@ -1,22 +0,0 @@
 /*
 * Auto generated Run-Time-Environment Component Configuration File
 *      *** Do not modify ! ***
 *
 * Project: 'arm_nnexamples_gru' 
 * Target:  'ARMCM3' 
 */
 #ifndef RTE_COMPONENTS_H
 #define RTE_COMPONENTS_H
 /*
 * Define the Device Header File: 
 */
 #define CMSIS_device_header "ARMCM3.h"
 #define RTE_Compiler_IO_STDOUT          /* Compiler I/O: STDOUT */
          #define RTE_Compiler_IO_STDOUT_ITM      /* Compiler I/O: STDOUT ITM */
 #endif /* RTE_COMPONENTS_H */
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Examples/ARM/arm_nn_examples/gru/RTE/_ARMCM4_FP/RTE_Components.h
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Examples/ARM/arm_nn_examples/gru/RTE/_ARMCM4_FP/RTE_Components.h
@@ -1,22 +0,0 @@
 /*
 * Auto generated Run-Time-Environment Component Configuration File
 *      *** Do not modify ! ***
 *
 * Project: 'arm_nnexamples_gru' 
 * Target:  'ARMCM4_FP' 
 */
 #ifndef RTE_COMPONENTS_H
 #define RTE_COMPONENTS_H
 /*
 * Define the Device Header File: 
 */
 #define CMSIS_device_header "ARMCM4_FP.h"
 #define RTE_Compiler_IO_STDOUT          /* Compiler I/O: STDOUT */
          #define RTE_Compiler_IO_STDOUT_ITM      /* Compiler I/O: STDOUT ITM */
 #endif /* RTE_COMPONENTS_H */
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Examples/ARM/arm_nn_examples/gru/RTE/_ARMCM7_SP/RTE_Components.h
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Examples/ARM/arm_nn_examples/gru/RTE/_ARMCM7_SP/RTE_Components.h
@@ -1,22 +0,0 @@
 /*
 * Auto generated Run-Time-Environment Component Configuration File
 *      *** Do not modify ! ***
 *
 * Project: 'arm_nnexamples_gru' 
 * Target:  'ARMCM7_SP' 
 */
 #ifndef RTE_COMPONENTS_H
 #define RTE_COMPONENTS_H
 /*
 * Define the Device Header File: 
 */
 #define CMSIS_device_header "ARMCM7_SP.h"
 #define RTE_Compiler_IO_STDOUT          /* Compiler I/O: STDOUT */
          #define RTE_Compiler_IO_STDOUT_ITM      /* Compiler I/O: STDOUT ITM */
 #endif /* RTE_COMPONENTS_H */
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Examples/ARM/arm_nn_examples/gru/arm_nnexamples_gru.cpp
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Examples/ARM/arm_nn_examples/gru/arm_nnexamples_gru.cpp
@@ -1,221 +0,0 @@
 /* ----------------------------------------------------------------------
 * Copyright (C) 2010-2018 Arm Limited. All rights reserved.
 *
 *
 * Project:       CMSIS NN Library
 * Title:         arm_nnexamples_gru.cpp
 *
 * Description:   Gated Recurrent Unit Example
 *
 * Target Processor: Cortex-M4/Cortex-M7
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *   - Redistributions of source code must retain the above copyright
 *     notice, this list of conditions and the following disclaimer.
 *   - Redistributions in binary form must reproduce the above copyright
 *     notice, this list of conditions and the following disclaimer in
 *     the documentation and/or other materials provided with the
 *     distribution.
 *   - Neither the name of Arm LIMITED nor the names of its contributors
 *     may be used to endorse or promote products derived from this
 *     software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 * -------------------------------------------------------------------- */
 /**
 * @ingroup groupExamples
 */
 /**
 * @defgroup GRUExample Gated Recurrent Unit Example
 *
 * \par Description:
 * \par
 * Demonstrates a gated recurrent unit (GRU) example with the use of fully-connected,
 * Tanh/Sigmoid activation functions.
 *
 * \par Model definition:
 * \par
 * GRU is a type of recurrent neural network (RNN). It contains two sigmoid gates and one hidden
 * state. 
 * \par
 * The computation can be summarized as:
 * <pre>z[t] = sigmoid( W_z &sdot; {h[t-1],x[t]} )
 * r[t] = sigmoid( W_r &sdot; {h[t-1],x[t]} ) 
 * n[t] = tanh( W_n &sdot; [r[t] &times; {h[t-1], x[t]} ) 
 * h[t] = (1 - z[t]) &times; h[t-1] + z[t] &times; n[t] </pre>
 * \image html GRU.gif "Gate Recurrent Unit Diagram"
 *
 * \par Variables Description:
 * \par
 * \li \c update_gate_weights, \c reset_gate_weights, \c hidden_state_weights are weights corresponding to update gate (W_z), reset gate (W_r), and hidden state (W_n).
 * \li \c update_gate_bias, \c reset_gate_bias, \c hidden_state_bias are layer bias arrays
 * \li \c test_input1, \c test_input2, \c test_history are the inputs and initial history
 *
 * \par
 * The buffer is allocated as:
 * \par
 * | reset | input | history | update | hidden_state |
 * \par
 * In this way, the concatination is automatically done since (reset, input) and (input, history)
 * are physically concatinated in memory.
 * \par
 *  The ordering of the weight matrix should be adjusted accordingly.
 *
  *
 * 
 * \par CMSIS DSP Software Library Functions Used:
 * \par
 * - arm_fully_connected_mat_q7_vec_q15_opt()
 * - arm_nn_activations_direct_q15()
 * - arm_mult_q15()
 * - arm_offset_q15()
 * - arm_sub_q15()
 * - arm_copy_q15()
 *
 * <b> Refer  </b>
 * \link arm_nnexamples_gru.cpp \endlink
 *
 */
 #include <stdio.h>
 #include <stdlib.h>
 #include <math.h>
 #include "arm_nnexamples_gru_test_data.h"
 #include "arm_math.h"
 #include "arm_nnfunctions.h"
 #ifdef _RTE_
 #include "RTE_Components.h"
 #ifdef RTE_Compiler_EventRecorder
 #include "EventRecorder.h"
 #endif
 #endif
 #define DIM_HISTORY 32
 #define DIM_INPUT 32
 #define DIM_VEC 64
 #define USE_X4
 #ifndef USE_X4
 static q7_t update_gate_weights[DIM_VEC * DIM_HISTORY] = UPDATE_GATE_WEIGHT_X2;
 static q7_t reset_gate_weights[DIM_VEC * DIM_HISTORY] = RESET_GATE_WEIGHT_X2;
 static q7_t hidden_state_weights[DIM_VEC * DIM_HISTORY] = HIDDEN_STATE_WEIGHT_X2;
 #else
 static q7_t update_gate_weights[DIM_VEC * DIM_HISTORY] = UPDATE_GATE_WEIGHT_X4;
 static q7_t reset_gate_weights[DIM_VEC * DIM_HISTORY] = RESET_GATE_WEIGHT_X4;
 static q7_t hidden_state_weights[DIM_VEC * DIM_HISTORY] = HIDDEN_STATE_WEIGHT_X4;
 #endif
 static q7_t update_gate_bias[DIM_HISTORY] = UPDATE_GATE_BIAS;
 static q7_t reset_gate_bias[DIM_HISTORY] = RESET_GATE_BIAS;
 static q7_t hidden_state_bias[DIM_HISTORY] = HIDDEN_STATE_BIAS;
 static q15_t test_input1[DIM_INPUT] = INPUT_DATA1;
 static q15_t test_input2[DIM_INPUT] = INPUT_DATA2;
 static q15_t test_history[DIM_HISTORY] = HISTORY_DATA;
 q15_t     scratch_buffer[DIM_HISTORY * 4 + DIM_INPUT];
 void gru_example(q15_t * scratch_input, uint16_t input_size, uint16_t history_size,
                 q7_t * weights_update, q7_t * weights_reset, q7_t * weights_hidden_state,
                 q7_t * bias_update, q7_t * bias_reset, q7_t * bias_hidden_state)
 {
  q15_t    *reset = scratch_input;
  q15_t    *input = scratch_input + history_size;
  q15_t    *history = scratch_input + history_size + input_size;
  q15_t    *update = scratch_input + 2 * history_size + input_size;
  q15_t    *hidden_state = scratch_input + 3 * history_size + input_size;
  // reset gate calculation
  // the range of the output can be adjusted with bias_shift and output_shift
 #ifndef USE_X4
  arm_fully_connected_mat_q7_vec_q15(input, weights_reset, input_size + history_size, history_size, 0, 15, bias_reset,
                                     reset, NULL);
 #else
  arm_fully_connected_mat_q7_vec_q15_opt(input, weights_reset, input_size + history_size, history_size, 0, 15,
                                         bias_reset, reset, NULL);
 #endif
  // sigmoid function, the size of the integer bit-width should be consistent with out_shift
  arm_nn_activations_direct_q15(reset, history_size, 0, ARM_SIGMOID);
  arm_mult_q15(history, reset, reset, history_size);
  // update gate calculation
  // the range of the output can be adjusted with bias_shift and output_shift
 #ifndef USE_X4
  arm_fully_connected_mat_q7_vec_q15(input, weights_update, input_size + history_size, history_size, 0, 15,
                                     bias_update, update, NULL);
 #else
  arm_fully_connected_mat_q7_vec_q15_opt(input, weights_update, input_size + history_size, history_size, 0, 15,
                                         bias_update, update, NULL);
 #endif
  // sigmoid function, the size of the integer bit-width should be consistent with out_shift
  arm_nn_activations_direct_q15(update, history_size, 0, ARM_SIGMOID);
  // hidden state calculation
 #ifndef USE_X4
  arm_fully_connected_mat_q7_vec_q15(reset, weights_hidden_state, input_size + history_size, history_size, 0, 15,
                                     bias_hidden_state, hidden_state, NULL);
 #else
  arm_fully_connected_mat_q7_vec_q15_opt(reset, weights_hidden_state, input_size + history_size, history_size, 0, 15,
                                         bias_hidden_state, hidden_state, NULL);
 #endif
  // tanh function, the size of the integer bit-width should be consistent with out_shift
  arm_nn_activations_direct_q15(hidden_state, history_size, 0, ARM_TANH);
  arm_mult_q15(update, hidden_state, hidden_state, history_size);
  // we calculate z - 1 here
  // so final addition becomes substraction
  arm_offset_q15(update, 0x8000, update, history_size);
  // multiply history
  arm_mult_q15(history, update, update, history_size);
  // calculate history_out
  arm_sub_q15(hidden_state, update, history, history_size);
  return;
 }
 int main()
 {
  #ifdef RTE_Compiler_EventRecorder
  EventRecorderInitialize (EventRecordAll, 1);  // initialize and start Event Recorder
  #endif
  printf("Start GRU execution\n");
  int       input_size = DIM_INPUT;
  int       history_size = DIM_HISTORY;
  // copy over the input data 
  arm_copy_q15(test_input1, scratch_buffer + history_size, input_size);
  arm_copy_q15(test_history, scratch_buffer + history_size + input_size, history_size);
  gru_example(scratch_buffer, input_size, history_size,
              update_gate_weights, reset_gate_weights, hidden_state_weights,
              update_gate_bias, reset_gate_bias, hidden_state_bias);
  printf("Complete first iteration on GRU\n");
  arm_copy_q15(test_input2, scratch_buffer + history_size, input_size);
  gru_example(scratch_buffer, input_size, history_size,
              update_gate_weights, reset_gate_weights, hidden_state_weights,
              update_gate_bias, reset_gate_bias, hidden_state_bias);
  printf("Complete second iteration on GRU\n");
  return 0;
 }
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Examples/ARM/arm_nn_examples/gru/arm_nnexamples_gru_test_data.h
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Examples/ARM/arm_nn_examples/gru/arm_nnexamples_gru_test_data.h
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Examples/ARM/arm_nn_examples/gru/readme.txt
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Examples/ARM/arm_nn_examples/gru/readme.txt
@@ -1,4 +0,0 @@
 CMSIS NN Lib example arm_nnexample_gru0 for
  Cortex-M4 and Cortex-M7.
 The example is configured for uVision Simulator.
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Include/arm_nn_tables.h
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Include/arm_nn_tables.h
@@ -1,59 +0,0 @@
 /* ----------------------------------------------------------------------
 * Project:      CMSIS NN Library
 * Title:        arm_nn_tables.h
 * Description:  Extern declaration for NN tables
 *
 * $Date:        17. January 2018
 * $Revision:    V.1.0.0
 *
 * Target Processor:  Cortex-M cores
 * -------------------------------------------------------------------- */
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef _ARM_NN_TABLES_H
 #define _ARM_NN_TABLES_H
 #include "arm_math.h"
 /**
 * @brief tables for various activation functions
 *
 */
 extern const q15_t sigmoidTable_q15[256];
 extern const q7_t sigmoidTable_q7[256];
 extern const q7_t tanhTable_q7[256];
 extern const q15_t tanhTable_q15[256];
  /**
   * @brief 2-way tables for various activation functions
   *
   * 2-way table, H table for value larger than 1/4
   * L table for value smaller than 1/4, H table for remaining
   * We have this only for the q15_t version. It does not make
   * sense to have it for q7_t type
   */
 extern const q15_t sigmoidHTable_q15[192];
 extern const q15_t sigmoidLTable_q15[128];
 extern const q15_t sigmoidLTable_q15[128];
 extern const q15_t sigmoidHTable_q15[192];
 #endif                          /*  ARM_NN_TABLES_H */
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Include/arm_nnfunctions.h
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Include/arm_nnfunctions.h
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Include/arm_nnsupportfunctions.h
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Include/arm_nnsupportfunctions.h
@@ -1,202 +0,0 @@
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /* ----------------------------------------------------------------------
 * Project:      CMSIS NN Library
 * Title:        arm_nnsupportfunctions.h
 * Description:  Public header file of support functions for CMSIS NN Library
 *
 * $Date:        13. July 2018
 * $Revision:    V.1.0.0
 *
 * Target Processor:  Cortex-M cores
 * -------------------------------------------------------------------- */
 #ifndef _ARM_NNSUPPORTFUNCTIONS_H_
 #define _ARM_NNSUPPORTFUNCTIONS_H_
 #include "arm_math.h"
 #include "arm_common_tables.h"
 //#include <cstring>
 #ifdef __cplusplus
 extern    "C"
 {
 #endif
 /**
 * @brief Union for SIMD access of Q31/Q15/Q7 types
 */
 union arm_nnword
 {
    q31_t     word;
               /**< Q31 type */
    q15_t     half_words[2];
               /**< Q15 type */
    q7_t      bytes[4];
               /**< Q7 type */
 };
 /**
 * @brief Struct for specifying activation function types
 *
 */
 typedef enum
 {
    ARM_SIGMOID = 0,
                /**< Sigmoid activation function */
    ARM_TANH = 1,
             /**< Tanh activation function */
 } arm_nn_activation_type;
 /**
 * @defgroup nndata_convert Neural Network Data Conversion Functions
 *
 * Perform data type conversion in-between neural network operations
 *
 */
 /**
 * @brief Converts the elements of the Q7 vector to Q15 vector without left-shift 
 * @param[in]       *pSrc points to the Q7 input vector    
 * @param[out]      *pDst points to the Q15 output vector   
 * @param[in]       blockSize length of the input vector    
 * @return none.    
 *
 */
 void      arm_q7_to_q15_no_shift(const q7_t * pSrc, q15_t * pDst, uint32_t blockSize);
 /**
 * @brief  Converts the elements of the Q7 vector to reordered Q15 vector without left-shift
 * @param[in]       *pSrc points to the Q7 input vector    
 * @param[out]      *pDst points to the Q15 output vector   
 * @param[in]       blockSize length of the input vector    
 * @return none.    
 *
 */
 void      arm_q7_to_q15_reordered_no_shift(const q7_t * pSrc, q15_t * pDst, uint32_t blockSize);
 #if defined (ARM_MATH_DSP)
 /**
 * @brief read and expand one Q7 word into two Q15 words
 */
 __STATIC_FORCEINLINE void *read_and_pad(void *source, q31_t * out1, q31_t * out2)
 {
        q31_t     inA = *__SIMD32(source)++;
        q31_t     inAbuf1 = __SXTB16(__ROR(inA, 8));
        q31_t     inAbuf2 = __SXTB16(inA);
 #ifndef ARM_MATH_BIG_ENDIAN
        *out2 = __PKHTB(inAbuf1, inAbuf2, 16);
        *out1 = __PKHBT(inAbuf2, inAbuf1, 16);
 #else
        *out1 = __PKHTB(inAbuf1, inAbuf2, 16);
        *out2 = __PKHBT(inAbuf2, inAbuf1, 16);
 #endif
        return source;
 }
 /**
 * @brief read and expand one Q7 word into two Q15 words with reordering
 */
 __STATIC_FORCEINLINE void *read_and_pad_reordered(void *source, q31_t * out1, q31_t * out2)
 {
        q31_t     inA = *__SIMD32(source)++;
 #ifndef ARM_MATH_BIG_ENDIAN
        *out2 = __SXTB16(__ROR(inA, 8));
        *out1 = __SXTB16(inA);
 #else
        *out1 = __SXTB16(__ROR(inA, 8));
        *out2 = __SXTB16(inA);
 #endif
        return source;
 }
 #endif
 /**
 * @defgroup NNBasicMath Basic Math Functions for Neural Network Computation
 *
 * Basic Math Functions for Neural Network Computation
 *
 */
 /**
 * @brief           Q7 vector multiplication with variable output shifts
 * @param[in]       *pSrcA        pointer to the first input vector
 * @param[in]       *pSrcB        pointer to the second input vector
 * @param[out]      *pDst         pointer to the output vector
 * @param[in]       out_shift     amount of right-shift for output
 * @param[in]       blockSize     number of samples in each vector
 * @return none.
 *
 * <b>Scaling and Overflow Behavior:</b>
 * \par
 * The function uses saturating arithmetic.
 * Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated.
 */
 void arm_nn_mult_q15(
  q15_t * pSrcA,
  q15_t * pSrcB,
  q15_t * pDst,
  const uint16_t out_shift,
  uint32_t blockSize);
 /**
 * @brief           Q7 vector multiplication with variable output shifts
 * @param[in]       *pSrcA        pointer to the first input vector
 * @param[in]       *pSrcB        pointer to the second input vector
 * @param[out]      *pDst         pointer to the output vector
 * @param[in]       out_shift     amount of right-shift for output
 * @param[in]       blockSize     number of samples in each vector
 * @return none.
 *
 * <b>Scaling and Overflow Behavior:</b>
 * \par
 * The function uses saturating arithmetic.
 * Results outside of the allowable Q7 range [0x80 0x7F] will be saturated.
 */
 void arm_nn_mult_q7(
  q7_t * pSrcA,
  q7_t * pSrcB,
  q7_t * pDst,
  const uint16_t out_shift,
  uint32_t blockSize);
 /**
 * @brief defition to adding rouding offset
 */
 #ifndef ARM_NN_TRUNCATE
    #define NN_ROUND(out_shift) ( 0x1 << (out_shift - 1) )
 #else
    #define NN_ROUND(out_shift) 0
 #endif
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/RTE/_ARMCM0/RTE_Components.h
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/RTE/_ARMCM0/RTE_Components.h
@@ -1,20 +0,0 @@
 /*
 * Auto generated Run-Time-Environment Component Configuration File
 *      *** Do not modify ! ***
 *
 * Project: 'arm_nnexamples_cifar10' 
 * Target:  'ARMCM0' 
 */
 #ifndef RTE_COMPONENTS_H
 #define RTE_COMPONENTS_H
 /*
 * Define the Device Header File: 
 */
 #define CMSIS_device_header "ARMCM0.h"
 #endif /* RTE_COMPONENTS_H */
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/RTE/_ARMCM3/RTE_Components.h
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/RTE/_ARMCM3/RTE_Components.h
@@ -1,26 +0,0 @@
 /*
 * Auto generated Run-Time-Environment Component Configuration File
 *      *** Do not modify ! ***
 *
 * Project: 'arm_nnexamples_nn_test' 
 * Target:  'ARMCM3' 
 */
 #ifndef RTE_COMPONENTS_H
 #define RTE_COMPONENTS_H
 /*
 * Define the Device Header File: 
 */
 #define CMSIS_device_header "ARMCM3.h"
 #define RTE_Compiler_IO_STDERR          /* Compiler I/O: STDERR */
          #define RTE_Compiler_IO_STDERR_ITM      /* Compiler I/O: STDERR ITM */
 #define RTE_Compiler_IO_STDOUT          /* Compiler I/O: STDOUT */
          #define RTE_Compiler_IO_STDOUT_ITM      /* Compiler I/O: STDOUT ITM */
 #define RTE_Compiler_IO_TTY             /* Compiler I/O: TTY */
          #define RTE_Compiler_IO_TTY_ITM         /* Compiler I/O: TTY ITM */
 #endif /* RTE_COMPONENTS_H */
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/RTE/_ARMCM4_FP/RTE_Components.h
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/RTE/_ARMCM4_FP/RTE_Components.h
@@ -1,26 +0,0 @@
 /*
 * Auto generated Run-Time-Environment Component Configuration File
 *      *** Do not modify ! ***
 *
 * Project: 'arm_nnexamples_nn_test' 
 * Target:  'ARMCM4_FP' 
 */
 #ifndef RTE_COMPONENTS_H
 #define RTE_COMPONENTS_H
 /*
 * Define the Device Header File: 
 */
 #define CMSIS_device_header "ARMCM4_FP.h"
 #define RTE_Compiler_IO_STDERR          /* Compiler I/O: STDERR */
          #define RTE_Compiler_IO_STDERR_ITM      /* Compiler I/O: STDERR ITM */
 #define RTE_Compiler_IO_STDOUT          /* Compiler I/O: STDOUT */
          #define RTE_Compiler_IO_STDOUT_ITM      /* Compiler I/O: STDOUT ITM */
 #define RTE_Compiler_IO_TTY             /* Compiler I/O: TTY */
          #define RTE_Compiler_IO_TTY_ITM         /* Compiler I/O: TTY ITM */
 #endif /* RTE_COMPONENTS_H */
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/RTE/_ARMCM7_SP/RTE_Components.h
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/RTE/_ARMCM7_SP/RTE_Components.h
@@ -1,26 +0,0 @@
 /*
 * Auto generated Run-Time-Environment Component Configuration File
 *      *** Do not modify ! ***
 *
 * Project: 'arm_nnexamples_nn_test' 
 * Target:  'ARMCM7_SP' 
 */
 #ifndef RTE_COMPONENTS_H
 #define RTE_COMPONENTS_H
 /*
 * Define the Device Header File: 
 */
 #define CMSIS_device_header "ARMCM7_SP.h"
 #define RTE_Compiler_IO_STDERR          /* Compiler I/O: STDERR */
          #define RTE_Compiler_IO_STDERR_ITM      /* Compiler I/O: STDERR ITM */
 #define RTE_Compiler_IO_STDOUT          /* Compiler I/O: STDOUT */
          #define RTE_Compiler_IO_STDOUT_ITM      /* Compiler I/O: STDOUT ITM */
 #define RTE_Compiler_IO_TTY             /* Compiler I/O: TTY */
          #define RTE_Compiler_IO_TTY_ITM         /* Compiler I/O: TTY ITM */
 #endif /* RTE_COMPONENTS_H */
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/Ref_Implementations/arm_convolve_HWC_q15_ref.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/Ref_Implementations/arm_convolve_HWC_q15_ref.c
@@ -1,71 +0,0 @@
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "ref_functions.h"
 void arm_convolve_HWC_q15_ref(const q15_t * Im_in,  // input image
                              const uint16_t dim_im_in, // input image dimention
                              const uint16_t ch_im_in,  // number of input image channels
                              const q15_t * wt, // kernel weights 
                              const uint16_t ch_im_out, // number of filters, i.e., output image channels
                              const uint16_t dim_kernel,    // filter kernel size
                              const uint16_t padding,   // padding sizes
                              const uint16_t stride,    // stride
                              const q15_t * bias,   // bias
                              const uint16_t bias_shift, const uint16_t out_shift, q15_t * Im_out,  // output image
                              const uint16_t dim_im_out,    // output image dimension
                              q15_t * bufferA,  //buffer space for input
                              q7_t * bufferB    //buffer space for output
    )
 {
    int       i, j, k, l, m, n;
    int       conv_out;
    int       in_row, in_col;
    for (i = 0; i < ch_im_out; i++)
    {
        for (j = 0; j < dim_im_out; j++)
        {
            for (k = 0; k < dim_im_out; k++)
            {
 #ifndef ARM_NN_TRUNCATE
                conv_out = (bias[i] << bias_shift) + (0x1 << (out_shift - 1));
 #else
                conv_out = bias[i] << bias_shift;
 #endif
                for (m = 0; m < dim_kernel; m++)
                {
                    for (n = 0; n < dim_kernel; n++)
                    {
                        in_row = stride * j + m - padding;
                        in_col = stride * k + n - padding;
                        if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in && in_col < dim_im_in)
                        {
                            for (l = 0; l < ch_im_in; l++)
                            {
                                conv_out += Im_in[(in_row * dim_im_in + in_col) * ch_im_in + l] *
                                    wt[i * ch_im_in * dim_kernel * dim_kernel + (m * dim_kernel + n) * ch_im_in + l];
                            }
                        }
                    }
                }
                Im_out[i + (j * dim_im_out + k) * ch_im_out] = (q15_t) __SSAT((conv_out >> out_shift), 16);
            }
        }
    }
 }
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/Ref_Implementations/arm_convolve_HWC_q15_ref_nonsquare.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/Ref_Implementations/arm_convolve_HWC_q15_ref_nonsquare.c
@@ -1,83 +0,0 @@
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "ref_functions.h"
 void
 arm_convolve_HWC_q15_nonsquare_ref(const q15_t * Im_in,
                          const uint16_t dim_im_in_x,
                          const uint16_t dim_im_in_y,
                          const uint16_t ch_im_in,
                          const q15_t * wt,
                          const uint16_t ch_im_out,
                          const uint16_t dim_kernel_x,
                          const uint16_t dim_kernel_y,
                          const uint16_t padding_x,
                          const uint16_t padding_y,
                          const uint16_t stride_x,
                          const uint16_t stride_y,
                          const q15_t * bias,
                          const uint16_t bias_shift,
                          const uint16_t out_shift,
                          q15_t * Im_out,
                          const uint16_t dim_im_out_x,
                          const uint16_t dim_im_out_y, 
                          q15_t * bufferA, 
                          q7_t * bufferB)
 {	
    uint16_t  i, j, k, l, m, n;
    int       conv_out;
    signed char in_row, in_col;
    for (i = 0; i < ch_im_out; i++)
    {
        for (j = 0; j < dim_im_out_y; j++)
        {
            for (k = 0; k < dim_im_out_x; k++)
            {
 #ifndef ARM_NN_TRUNCATE
                conv_out = (bias[i] << bias_shift) + (0x1 << (out_shift - 1));
 #else
                conv_out = bias[i] << bias_shift;
 #endif
                for (m = 0; m < dim_kernel_y; m++)
                {
                    for (n = 0; n < dim_kernel_x; n++)
                    {
                        in_row = stride_y * j + m - padding_y;
                        in_col = stride_x * k + n - padding_x;
                        if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in_y && in_col < dim_im_in_x)
                        {
                            for (l = 0; l < ch_im_in; l++)
                            {
                                conv_out +=
                                    Im_in[(in_row * dim_im_in_x + in_col) * ch_im_in +
                                          l] * wt[i * ch_im_in * dim_kernel_x * dim_kernel_y + (m * dim_kernel_x +
                                                                                            n) * ch_im_in + l];
                            }
                        }
                    }
                }
                Im_out[i + (j * dim_im_out_x + k) * ch_im_out] = (q15_t) __SSAT((conv_out >> out_shift), 16);
            }
        }
    }
 }	
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/Ref_Implementations/arm_convolve_HWC_q7_ref.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/Ref_Implementations/arm_convolve_HWC_q7_ref.c
@@ -1,72 +0,0 @@
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "ref_functions.h"
 void arm_convolve_HWC_q7_ref(const q7_t * Im_in,    // input image
                             const uint16_t dim_im_in,  // input image dimention
                             const uint16_t ch_im_in,   // number of input image channels
                             const q7_t * wt,   // kernel weights 
                             const uint16_t ch_im_out,  // number of filters, i.e., output image channels
                             const uint16_t dim_kernel, // filter kernel size
                             const uint16_t padding,    // padding sizes
                             const uint16_t stride, // stride
                             const q7_t * bias, // bias
                             const uint16_t bias_shift, const uint16_t out_shift, q7_t * Im_out,    // output image
                             const uint16_t dim_im_out, // output image dimension
                             q15_t * bufferA,   //buffer space for input
                             q7_t * bufferB //buffer space for output
    )
 {
    int       i, j, k, l, m, n;
    int       conv_out;
    int       in_row, in_col;
    for (i = 0; i < ch_im_out; i++)
    {
        for (j = 0; j < dim_im_out; j++)
        {
            for (k = 0; k < dim_im_out; k++)
            {
 #ifndef ARM_NN_TRUNCATE
                conv_out = ((q31_t) (bias[i]) << bias_shift) + (0x1 << (out_shift - 1));
 #else
                conv_out = bias[i] << bias_shift;
 #endif
                for (m = 0; m < dim_kernel; m++)
                {
                    for (n = 0; n < dim_kernel; n++)
                    {
                        // if-for implementation
                        in_row = stride * j + m - padding;
                        in_col = stride * k + n - padding;
                        if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in && in_col < dim_im_in)
                        {
                            for (l = 0; l < ch_im_in; l++)
                            {
                                conv_out += Im_in[(in_row * dim_im_in + in_col) * ch_im_in + l] *
                                    wt[i * ch_im_in * dim_kernel * dim_kernel + (m * dim_kernel + n) * ch_im_in + l];
                            }
                        }
                    }
                }
                Im_out[i + (j * dim_im_out + k) * ch_im_out] = (q7_t) __SSAT((conv_out >> out_shift), 8);
            }
        }
    }
 }
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/Ref_Implementations/arm_convolve_HWC_q7_ref_nonsquare.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/Ref_Implementations/arm_convolve_HWC_q7_ref_nonsquare.c
@@ -1,78 +0,0 @@
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "ref_functions.h"
 void arm_convolve_HWC_q7_ref_nonsquare(const q7_t * Im_in,  // input image
                                       const uint16_t dim_im_in_x,  // input image dimention x
                                       const uint16_t dim_im_in_y,  // input image dimention y
                                       const uint16_t ch_im_in, // number of input image channels
                                       const q7_t * wt, // kernel weights 
                                       const uint16_t ch_im_out,    // number of filters, i.e., output image channels
                                       const uint16_t dim_kernel_x, // filter kernel size x
                                       const uint16_t dim_kernel_y, // filter kernel size y
                                       const uint16_t padding_x,    // padding sizes x
                                       const uint16_t padding_y,    // padding sizes y
                                       const uint16_t stride_x, // stride x
                                       const uint16_t stride_y, // stride y
                                       const q7_t * bias,   // bias
                                       const uint16_t bias_shift, const uint16_t out_shift, q7_t * Im_out,  // output image
                                       const uint16_t dim_im_out_x, // output image dimension x
                                       const uint16_t dim_im_out_y, // output image dimension y
                                       q15_t * bufferA, //buffer space for input
                                       q7_t * bufferB   //buffer space for output
    )
 {
    int       i, j, k, l, m, n;
    int       conv_out;
    int       in_row, in_col;
    for (i = 0; i < ch_im_out; i++)
    {
        for (j = 0; j < dim_im_out_y; j++)
        {
            for (k = 0; k < dim_im_out_x; k++)
            {
 #ifndef ARM_NN_TRUNCATE
                conv_out = ((q31_t) (bias[i]) << bias_shift) + (0x1 << (out_shift - 1));
 #else
                conv_out = bias[i] << bias_shift;
 #endif
                for (m = 0; m < dim_kernel_y; m++)
                {
                    for (n = 0; n < dim_kernel_x; n++)
                    {
                        // if-for implementation
                        in_row = stride_y * j + m - padding_y;
                        in_col = stride_x * k + n - padding_x;
                        if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in_y && in_col < dim_im_in_x)
                        {
                            for (l = 0; l < ch_im_in; l++)
                            {
                                conv_out += Im_in[(in_row * dim_im_in_x + in_col) * ch_im_in + l] *
                                    wt[i * ch_im_in * dim_kernel_y * dim_kernel_x + (m * dim_kernel_x + n) * ch_im_in +
                                       l];
                            }
                        }
                    }
                }
                Im_out[i + (j * dim_im_out_x + k) * ch_im_out] = (q7_t) __SSAT((conv_out >> out_shift), 8);
            }
        }
    }
 }
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/Ref_Implementations/arm_depthwise_separable_conv_HWC_q7_ref.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/Ref_Implementations/arm_depthwise_separable_conv_HWC_q7_ref.c
@@ -1,70 +0,0 @@
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "ref_functions.h"
 void arm_depthwise_separable_conv_HWC_q7_ref(const q7_t * Im_in,    // input image
                                             const uint16_t dim_im_in,  // input image dimention
                                             const uint16_t ch_im_in,   // number of input image channels
                                             const q7_t * wt,   // kernel weights 
                                             const uint16_t ch_im_out,  // number of filters, i.e., output image channels
                                             const uint16_t dim_kernel, // filter kernel size
                                             const uint16_t padding,    // padding sizes
                                             const uint16_t stride, // stride
                                             const q7_t * bias, // bias
                                             const uint16_t bias_shift, // amount of left-shift for bias
                                             const uint16_t out_shift,  // amount of right-shift for output
                                             q7_t * Im_out, // output image
                                             const uint16_t dim_im_out, // output image dimension
                                             q15_t * bufferA,   //buffer space for input
                                             q7_t * bufferB //buffer space for output
    )
 {
    int       i_out_y, i_out_x, i_ch_out;
    int       i_ker_y, i_ker_x;
    for (i_out_y = 0; i_out_y < dim_im_out; i_out_y++)
    {
        for (i_out_x = 0; i_out_x < dim_im_out; i_out_x++)
        {
            for (i_ch_out = 0; i_ch_out < ch_im_out; i_ch_out++)
            {
                // for each output
 #ifndef ARM_NN_TRUNCATE
                int       conv_out = (bias[i_ch_out] << bias_shift) + (0x1 << (out_shift - 1));
 #else
                int       conv_out = bias[i_ch_out] << bias_shift;
 #endif
                for (i_ker_y = 0; i_ker_y < dim_kernel; i_ker_y++)
                {
                    for (i_ker_x = 0; i_ker_x < dim_kernel; i_ker_x++)
                    {
                        int       in_row = stride * i_out_y + i_ker_y - padding;
                        int       in_col = stride * i_out_x + i_ker_x - padding;
                        if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in && in_col < dim_im_in)
                        {
                            conv_out += Im_in[(in_row * dim_im_in + in_col) * ch_im_in + i_ch_out] *
                                wt[(i_ker_y * dim_kernel + i_ker_x) * ch_im_out + i_ch_out];
                        }
                    }
                }
                Im_out[(i_out_y * dim_im_out + i_out_x) * ch_im_out + i_ch_out] =
                    (q7_t) __SSAT((conv_out >> out_shift), 8);
            }
        }
    }
 }
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/Ref_Implementations/arm_depthwise_separable_conv_HWC_q7_ref_nonsquare.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/Ref_Implementations/arm_depthwise_separable_conv_HWC_q7_ref_nonsquare.c
@@ -1,75 +0,0 @@
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "ref_functions.h"
 void arm_depthwise_separable_conv_HWC_q7_ref_nonsquare(const q7_t * Im_in,  // input image
                                                       const uint16_t dim_im_in_x,  // input image dimention x
                                                       const uint16_t dim_im_in_y,  // input image dimention y
                                                       const uint16_t ch_im_in, // number of input image channels
                                                       const q7_t * wt, // kernel weights 
                                                       const uint16_t ch_im_out,    // number of filters, i.e., output image channels
                                                       const uint16_t dim_kernel_x, // filter kernel size x
                                                       const uint16_t dim_kernel_y, // filter kernel size y
                                                       const uint16_t padding_x,    // padding sizes x
                                                       const uint16_t padding_y,    // padding sizes y
                                                       const uint16_t stride_x, // stride x
                                                       const uint16_t stride_y, // stride y
                                                       const q7_t * bias,   // bias
                                                       const uint16_t bias_shift,   // amount of left-shift for bias
                                                       const uint16_t out_shift,    // amount of right-shift for output
                                                       q7_t * Im_out,   // output image
                                                       const uint16_t dim_im_out_x, // output image dimension x
                                                       const uint16_t dim_im_out_y, // output image dimension y
                                                       q15_t * bufferA, //buffer space for input
                                                       q7_t * bufferB   //buffer space for output
    )
 {
    int       i_out_y, i_out_x, i_ch_out;
    int       i_ker_y, i_ker_x;
    for (i_out_y = 0; i_out_y < dim_im_out_y; i_out_y++)
    {
        for (i_out_x = 0; i_out_x < dim_im_out_x; i_out_x++)
        {
            for (i_ch_out = 0; i_ch_out < ch_im_out; i_ch_out++)
            {
                // for each output
 #ifndef ARM_NN_TRUNCATE
                int       conv_out = (bias[i_ch_out] << bias_shift) + (0x1 << (out_shift - 1));
 #else
                int       conv_out = bias[i_ch_out] << bias_shift;
 #endif
                for (i_ker_y = 0; i_ker_y < dim_kernel_y; i_ker_y++)
                {
                    for (i_ker_x = 0; i_ker_x < dim_kernel_x; i_ker_x++)
                    {
                        int       in_row = stride_y * i_out_y + i_ker_y - padding_y;
                        int       in_col = stride_x * i_out_x + i_ker_x - padding_x;
                        if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in_y && in_col < dim_im_in_x)
                        {
                            conv_out += Im_in[(in_row * dim_im_in_x + in_col) * ch_im_in + i_ch_out] *
                                wt[(i_ker_y * dim_kernel_x + i_ker_x) * ch_im_out + i_ch_out];
                        }
                    }
                }
                Im_out[(i_out_y * dim_im_out_x + i_out_x) * ch_im_out + i_ch_out] =
                    (q7_t) __SSAT((conv_out >> out_shift), 8);
            }
        }
    }
 }
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/Ref_Implementations/arm_fully_connected_mat_q7_vec_q15_opt_ref.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/Ref_Implementations/arm_fully_connected_mat_q7_vec_q15_opt_ref.c
@@ -1,120 +0,0 @@
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "ref_functions.h"
 void arm_fully_connected_mat_q7_vec_q15_opt_ref(const q15_t * pV,   // pointer to vector
                                                const q7_t * pM,    // pointer to matrix
                                                const uint16_t dim_vec, // length of the vector
                                                const uint16_t num_of_rows, // numCol of A
                                                const uint16_t bias_shift,  // amount of left-shift for bias
                                                const uint16_t out_shift,   // amount of right-shift for output
                                                const q7_t * bias, q15_t * pOut,    // output operand
                                                q15_t * vec_buffer)
 {
    uint16_t  rowCnt = num_of_rows >> 2;
    const q7_t *pB = pM;
    const q15_t *pA;
    q15_t    *pO = pOut;
    const q7_t *pBias = bias;
    while (rowCnt)
    {
        pA = pV;
 #ifndef ARM_NN_TRUNCATE
        q31_t     sum = (*pBias++ << bias_shift) + (0x1 << (out_shift - 1));
        q31_t     sum2 = (*pBias++ << bias_shift) + (0x1 << (out_shift - 1));
        q31_t     sum3 = (*pBias++ << bias_shift) + (0x1 << (out_shift - 1));
        q31_t     sum4 = (*pBias++ << bias_shift) + (0x1 << (out_shift - 1));
 #else
        q31_t     sum = *pBias++ << bias_shift;
        q31_t     sum2 = *pBias++ << bias_shift;
        q31_t     sum3 = *pBias++ << bias_shift;
        q31_t     sum4 = *pBias++ << bias_shift;
 #endif
        uint16_t  colCnt = dim_vec >> 1;
        while (colCnt)
        {
            q15_t     inA1 = *pA++;
            q15_t     inA2 = *pA++;
            q7_t      inB1 = *pB++;
            q7_t      inB3 = *pB++;
            q7_t      inB2 = *pB++;
            q7_t      inB4 = *pB++;
            sum += inA1 * inB1 + inA2 * inB2;
            sum2 += inA1 * inB3 + inA2 * inB4;
            inB1 = *pB++;
            inB3 = *pB++;
            inB2 = *pB++;
            inB4 = *pB++;
            sum3 += inA1 * inB1 + inA2 * inB2;
            sum4 += inA1 * inB3 + inA2 * inB4;
            colCnt--;
        }
        colCnt = dim_vec & 0x1;
        while (colCnt)
        {
            q15_t     inA = *pA++;
            q7_t      inB = *pB++;
            sum += inA * inB;
            inB = *pB++;
            sum2 += inA * inB;
            inB = *pB++;
            sum3 += inA * inB;
            inB = *pB++;
            sum4 += inA * inB;
            colCnt--;
        }
        *pO++ = (q15_t) __SSAT((sum >> out_shift), 16);
        *pO++ = (q15_t) __SSAT((sum2 >> out_shift), 16);
        *pO++ = (q15_t) __SSAT((sum3 >> out_shift), 16);
        *pO++ = (q15_t) __SSAT((sum4 >> out_shift), 16);
        rowCnt--;
    }
    rowCnt = num_of_rows & 0x3;
    while (rowCnt)
    {
        pA = pV;
 #ifndef ARM_NN_TRUNCATE
        int       ip_out = (*pBias++ << bias_shift) + (0x1 << (out_shift - 1));
 #else
        int       ip_out = *pBias++ << bias_shift;
 #endif
        for (int j = 0; j < dim_vec; j++)
        {
            q15_t     inA = *pA++;
            q7_t      inB = *pB++;
            ip_out += inA * inB;
        }
        *pO++ = (q15_t) __SSAT((ip_out >> out_shift), 16);
        rowCnt--;
    }
 }
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/Ref_Implementations/arm_fully_connected_mat_q7_vec_q15_ref.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/Ref_Implementations/arm_fully_connected_mat_q7_vec_q15_ref.c
@@ -1,43 +0,0 @@
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "ref_functions.h"
 void arm_fully_connected_mat_q7_vec_q15_ref(const q15_t * pV,   // pointer to vector
                                            const q7_t * pM,    // pointer to matrix
                                            const uint16_t dim_vec, // length of the vector
                                            const uint16_t num_of_rows, // numCol of A
                                            const uint16_t bias_shift,  // amount of left-shift for bias
                                            const uint16_t out_shift,   // amount of right-shift for output
                                            const q7_t * bias, q15_t * pOut,    // output operand
                                            q15_t * vec_buffer)
 {
    for (int i = 0; i < num_of_rows; i++)
    {
 #ifndef ARM_NN_TRUNCATE
        int       ip_out = (bias[i] << bias_shift) + (0x1 << (out_shift - 1));
 #else
        int       ip_out = bias[i] << bias_shift;
 #endif
        for (int j = 0; j < dim_vec; j++)
        {
            ip_out += pV[j] * pM[i * dim_vec + j];
        }
        pOut[i] = (q15_t) __SSAT((ip_out >> out_shift), 16);
    }
 }
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/Ref_Implementations/arm_fully_connected_q15_opt_ref.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/Ref_Implementations/arm_fully_connected_q15_opt_ref.c
@@ -1,119 +0,0 @@
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "ref_functions.h"
 void arm_fully_connected_q15_opt_ref(const q15_t * pV,  // pointer to vector
                                     const q15_t * pM,  // pointer to matrix
                                     const uint16_t dim_vec,    // length of the vector
                                     const uint16_t num_of_rows,    // numCol of A
                                     const uint16_t bias_shift, // amount of left-shift for bias
                                     const uint16_t out_shift,  // amount of right-shift for output
                                     const q15_t * bias, q15_t * pOut,  // output operand
                                     q15_t * vec_buffer)
 {
    uint16_t  rowCnt = num_of_rows >> 2;
    const q15_t *pB = pM;
    const q15_t *pA;
    q15_t    *pO = pOut;
    const q15_t *pBias = bias;
    while (rowCnt)
    {
        pA = pV;
 #ifndef ARM_NN_TRUNCATE
        q31_t     sum = (*pBias++ << bias_shift) + (0x1 << (out_shift - 1));
        q31_t     sum2 = (*pBias++ << bias_shift) + (0x1 << (out_shift - 1));
        q31_t     sum3 = (*pBias++ << bias_shift) + (0x1 << (out_shift - 1));
        q31_t     sum4 = (*pBias++ << bias_shift) + (0x1 << (out_shift - 1));
 #else
        q31_t     sum = *pBias++ << bias_shift;
        q31_t     sum2 = *pBias++ << bias_shift;
        q31_t     sum3 = *pBias++ << bias_shift;
        q31_t     sum4 = *pBias++ << bias_shift;
 #endif
        uint16_t  colCnt = dim_vec >> 1;
        while (colCnt)
        {
            q15_t     inA1 = *pA++;
            q15_t     inA2 = *pA++;
            q15_t     inB1 = *pB++;
            q15_t     inB2 = *pB++;
            sum += inA1 * inB1 + inA2 * inB2;
            inB1 = *pB++;
            inB2 = *pB++;
            sum2 += inA1 * inB1 + inA2 * inB2;
            inB1 = *pB++;
            inB2 = *pB++;
            sum3 += inA1 * inB1 + inA2 * inB2;
            inB1 = *pB++;
            inB2 = *pB++;
            sum4 += inA1 * inB1 + inA2 * inB2;
            colCnt--;
        }
        colCnt = dim_vec & 0x1;
        while (colCnt)
        {
            q15_t     inA = *pA++;
            q15_t     inB = *pB++;
            sum += inA * inB;
            inB = *pB++;
            sum2 += inA * inB;
            inB = *pB++;
            sum3 += inA * inB;
            inB = *pB++;
            sum4 += inA * inB;
            colCnt--;
        }
        *pO++ = (q15_t) __SSAT((sum >> out_shift), 16);
        *pO++ = (q15_t) __SSAT((sum2 >> out_shift), 16);
        *pO++ = (q15_t) __SSAT((sum3 >> out_shift), 16);
        *pO++ = (q15_t) __SSAT((sum4 >> out_shift), 16);
        rowCnt--;
    }
    rowCnt = num_of_rows & 0x3;
    while (rowCnt)
    {
        pA = pV;
 #ifndef ARM_NN_TRUNCATE
        int       ip_out = (*pBias++ << bias_shift) + (0x1 << (out_shift - 1));
 #else
        int       ip_out = *pBias++ << bias_shift;
 #endif
        for (int j = 0; j < dim_vec; j++)
        {
            q15_t     inA = *pA++;
            q15_t     inB = *pB++;
            ip_out += inA * inB;
        }
        *pO++ = (q15_t) __SSAT((ip_out >> out_shift), 16);
        rowCnt--;
    }
 }
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/Ref_Implementations/arm_fully_connected_q15_ref.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/Ref_Implementations/arm_fully_connected_q15_ref.c
@@ -1,43 +0,0 @@
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "ref_functions.h"
 void arm_fully_connected_q15_ref(const q15_t * pV,  // pointer to vector
                                 const q15_t * pM,  // pointer to matrix
                                 const uint16_t dim_vec,    // length of the vector
                                 const uint16_t num_of_rows,    // numCol of A
                                 const uint16_t bias_shift, // amount of left-shift for bias
                                 const uint16_t out_shift,  // amount of right-shift for output
                                 const q15_t * bias, q15_t * pOut,  // output operand
                                 q15_t * vec_buffer)
 {
    for (int i = 0; i < num_of_rows; i++)
    {
 #ifndef ARM_NN_TRUNCATE
        int       ip_out = (bias[i] << bias_shift) + (0x1 << (out_shift - 1));
 #else
        int       ip_out = bias[i] << bias_shift;
 #endif
        for (int j = 0; j < dim_vec; j++)
        {
            ip_out += pV[j] * pM[i * dim_vec + j];
        }
        pOut[i] = (q15_t) __SSAT((ip_out >> out_shift), 16);
    }
 }
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/Ref_Implementations/arm_fully_connected_q7_opt_ref.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/Ref_Implementations/arm_fully_connected_q7_opt_ref.c
@@ -1,138 +0,0 @@
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "ref_functions.h"
 void arm_fully_connected_q7_opt_ref(const q7_t * pV,    // pointer to vector
                                    const q7_t * pM,    // pointer to matrix
                                    const uint16_t dim_vec, // length of the vector
                                    const uint16_t num_of_rows, // numCol of A
                                    const uint16_t bias_shift,  // amount of left-shift for bias
                                    const uint16_t out_shift,   // amount of right-shift for output
                                    const q7_t * bias, q7_t * pOut, // output operand
                                    q15_t * vec_buffer)
 {
    uint16_t  rowCnt = num_of_rows >> 2;
    const q7_t *pB = pM;
    const q7_t *pA;
    q7_t     *pO = pOut;
    const q7_t *pBias = bias;
    while (rowCnt)
    {
        pA = pV;
 #ifndef ARM_NN_TRUNCATE
        q31_t     sum = (*pBias++ << bias_shift) + (0x1 << (out_shift - 1));
        q31_t     sum2 = (*pBias++ << bias_shift) + (0x1 << (out_shift - 1));
        q31_t     sum3 = (*pBias++ << bias_shift) + (0x1 << (out_shift - 1));
        q31_t     sum4 = (*pBias++ << bias_shift) + (0x1 << (out_shift - 1));
 #else
        q31_t     sum = *pBias++ << bias_shift;
        q31_t     sum2 = *pBias++ << bias_shift;
        q31_t     sum3 = *pBias++ << bias_shift;
        q31_t     sum4 = *pBias++ << bias_shift;
 #endif
        uint16_t  colCnt = dim_vec >> 2;
        while (colCnt)
        {
            q7_t      inA1 = *pA++;
            q7_t      inA3 = *pA++;
            q7_t      inA2 = *pA++;
            q7_t      inA4 = *pA++;
            q7_t      inB1 = *pB++;
            q7_t      inB3 = *pB++;
            q7_t      inB2 = *pB++;
            q7_t      inB4 = *pB++;
            sum += inA1 * inB1 + inA2 * inB2;
            sum2 += inA1 * inB3 + inA2 * inB4;
            inB1 = *pB++;
            inB3 = *pB++;
            inB2 = *pB++;
            inB4 = *pB++;
            sum3 += inA1 * inB1 + inA2 * inB2;
            sum4 += inA1 * inB3 + inA2 * inB4;
            inB1 = *pB++;
            inB3 = *pB++;
            inB2 = *pB++;
            inB4 = *pB++;
            sum += inA3 * inB1 + inA4 * inB2;
            sum2 += inA3 * inB3 + inA4 * inB4;
            inB1 = *pB++;
            inB3 = *pB++;
            inB2 = *pB++;
            inB4 = *pB++;
            sum3 += inA3 * inB1 + inA4 * inB2;
            sum4 += inA3 * inB3 + inA4 * inB4;
            colCnt--;
        }
        colCnt = dim_vec & 0x3;
        while (colCnt)
        {
            q7_t      inA = *pA++;
            q7_t      inB = *pB++;
            sum += inA * inB;
            inB = *pB++;
            sum2 += inA * inB;
            inB = *pB++;
            sum3 += inA * inB;
            inB = *pB++;
            sum4 += inA * inB;
            colCnt--;
        }
        *pO++ = (q7_t) __SSAT((sum >> out_shift), 8);
        *pO++ = (q7_t) __SSAT((sum2 >> out_shift), 8);
        *pO++ = (q7_t) __SSAT((sum3 >> out_shift), 8);
        *pO++ = (q7_t) __SSAT((sum4 >> out_shift), 8);
        rowCnt--;
    }
    rowCnt = num_of_rows & 0x3;
    while (rowCnt)
    {
        pA = pV;
 #ifndef ARM_NN_TRUNCATE
        int       ip_out = (*pBias++ << bias_shift) + (0x1 << (out_shift - 1));
 #else
        int       ip_out = *pBias++ << bias_shift;
 #endif
        for (int j = 0; j < dim_vec; j++)
        {
            q7_t      inA = *pA++;
            q7_t      inB = *pB++;
            ip_out += inA * inB;
        }
        *pO++ = (q7_t) __SSAT((ip_out >> out_shift), 8);
        rowCnt--;
    }
 }
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/Ref_Implementations/arm_fully_connected_q7_ref.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/Ref_Implementations/arm_fully_connected_q7_ref.c
@@ -1,43 +0,0 @@
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "ref_functions.h"
 void arm_fully_connected_q7_ref(const q7_t * pV,    // pointer to vector
                                const q7_t * pM,    // pointer to matrix
                                const uint16_t dim_vec, // length of the vector
                                const uint16_t num_of_rows, // numCol of A
                                const uint16_t bias_shift,  // amount of left-shift for bias
                                const uint16_t out_shift,   // amount of right-shift for output
                                const q7_t * bias, q7_t * pOut, // output operand
                                q15_t * vec_buffer)
 {
    for (int i = 0; i < num_of_rows; i++)
    {
 #ifndef ARM_NN_TRUNCATE
        int       ip_out = (bias[i] << bias_shift) + (0x1 << (out_shift - 1));
 #else
        int       ip_out = bias[i] << bias_shift;
 #endif
        for (int j = 0; j < dim_vec; j++)
        {
            ip_out += pV[j] * pM[i * dim_vec + j];
        }
        pOut[i] = (q7_t) __SSAT((ip_out >> out_shift), 8);
    }
 }
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/Ref_Implementations/arm_nn_mult_ref.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/Ref_Implementations/arm_nn_mult_ref.c
@@ -1,58 +0,0 @@
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "arm_math.h"
 #include "arm_nnfunctions.h"
 void      arm_nn_mult_q7_ref(q7_t * pSrcA, 
                             q7_t * pSrcB, 
                             q7_t * pDst, 
                             const uint16_t out_shift, 
                             uint32_t blockSize) {
    uint16_t  i;
 for (i = 0; i < blockSize; i++)
    {
 		q31_t product = pSrcA[i] * pSrcB[i];
 #ifndef ARM_NN_TRUNCATE
        pDst[i] = (q7_t)__SSAT((product + (0x1 << (out_shift - 1)))>>out_shift, 8);
 #else
        pDst[i] = (q7_t)__SSAT(product >> out_shift, 8);
 #endif
    }
 }
 void     arm_nn_mult_q15_ref(q15_t * pSrcA, 
                             q15_t * pSrcB, 
                             q15_t * pDst, 
                             const uint16_t out_shift, 
                             uint32_t blockSize) {
    uint16_t  i;
 for (i = 0; i < blockSize; i++)
    {
 		q31_t product = pSrcA[i] * pSrcB[i];
 #ifndef ARM_NN_TRUNCATE
        pDst[i] = (q15_t)__SSAT((product + (0x1 << (out_shift - 1)))>>out_shift, 16);
 #else
        pDst[i] = (q15_t)__SSAT(product >> out_shift, 16);
 #endif
    }
 }
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/Ref_Implementations/arm_pool_ref.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/Ref_Implementations/arm_pool_ref.c
@@ -1,96 +0,0 @@
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "ref_functions.h"
 void arm_avepool_q7_HWC_ref(const q7_t * Im_in, // input image
                            const uint16_t dim_im_in,   // input image dimension
                            const uint16_t ch_im_in,    // number of input image channels
                            const uint16_t dim_kernel,  // window kernel size
                            const uint16_t padding, // padding sizes
                            const uint16_t stride,  // stride
                            const uint16_t dim_im_out,  // output image dimension
                            q7_t * bufferA, // a buffer for local storage
                            q7_t * Im_out)
 {
    int16_t   i_ch_in, i_x, i_y;
    int16_t   k_x, k_y;
    for (i_ch_in = 0; i_ch_in < ch_im_in; i_ch_in++)
    {
        for (i_y = 0; i_y < dim_im_out; i_y++)
        {
            for (i_x = 0; i_x < dim_im_out; i_x++)
            {
                int       sum = 0;
                int       count = 0;
                for (k_y = i_y * stride - padding; k_y < i_y * stride - padding + dim_kernel; k_y++)
                {
                    for (k_x = i_x * stride - padding; k_x < i_x * stride - padding + dim_kernel; k_x++)
                    {
                        if (k_y >= 0 && k_x >= 0 && k_y < dim_im_in && k_x < dim_im_in)
                        {
                            sum += Im_in[i_ch_in + ch_im_in * (k_x + k_y * dim_im_in)];
                            count++;
                        }
                    }
                }
                Im_out[i_ch_in + ch_im_in * (i_x + i_y * dim_im_out)] = sum / count;
            }
        }
    }
 }
 void arm_maxpool_q7_HWC_ref(const q7_t * Im_in, // input image
                            const uint16_t dim_im_in,   // input image dimension
                            const uint16_t ch_im_in,    // number of input image channels
                            const uint16_t dim_kernel,  // window kernel size
                            const uint16_t padding, // padding sizes
                            const uint16_t stride,  // stride
                            const uint16_t dim_im_out,  // output image dimension
                            q7_t * bufferA, // a buffer for local storage
                            q7_t * Im_out)
 {
    int16_t   i_ch_in, i_x, i_y;
    int16_t   k_x, k_y;
    for (i_ch_in = 0; i_ch_in < ch_im_in; i_ch_in++)
    {
        for (i_y = 0; i_y < dim_im_out; i_y++)
        {
            for (i_x = 0; i_x < dim_im_out; i_x++)
            {
                int       max = -129;
                for (k_y = i_y * stride - padding; k_y < i_y * stride - padding + dim_kernel; k_y++)
                {
                    for (k_x = i_x * stride - padding; k_x < i_x * stride - padding + dim_kernel; k_x++)
                    {
                        if (k_y >= 0 && k_x >= 0 && k_y < dim_im_in && k_x < dim_im_in)
                        {
                            if (Im_in[i_ch_in + ch_im_in * (k_x + k_y * dim_im_in)] > max)
                            {
                                max = Im_in[i_ch_in + ch_im_in * (k_x + k_y * dim_im_in)];
                            }
                        }
                    }
                }
                Im_out[i_ch_in + ch_im_in * (i_x + i_y * dim_im_out)] = max;
            }
        }
    }
 }
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/Ref_Implementations/arm_relu_ref.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/Ref_Implementations/arm_relu_ref.c
@@ -1,42 +0,0 @@
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "arm_math.h"
 #include "arm_nnfunctions.h"
 void arm_relu_q7_ref(q7_t * data, uint16_t size)
 {
    uint16_t  i;
    for (i = 0; i < size; i++)
    {
        if (data[i] < 0)
            data[i] = 0;
    }
 }
 void arm_relu_q15_ref(q15_t * data, uint16_t size)
 {
    uint16_t  i;
    for (i = 0; i < size; i++)
    {
        if (data[i] < 0)
            data[i] = 0;
    }
 }
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/Ref_Implementations/fully_connected_testing_weights.h
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/Ref_Implementations/fully_connected_testing_weights.h
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/Ref_Implementations/ref_functions.h
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/Ref_Implementations/ref_functions.h
@@ -1,250 +0,0 @@
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef _REF_FUNCTIONS_H_
 #define _REF_FUNCTIONS_H_
 #include "arm_math.h"
 #include "arm_nnfunctions.h"
 //#include "arm_nnsupportfunctions.h"
 #include "fully_connected_testing_weights.h"
 #ifdef __cplusplus
 extern    "C"
 {
 #endif
 /*
 *
 * Convolution reference implemenation
 *
 */
    void      arm_convolve_HWC_q7_ref(const q7_t * Im_in,   // input image
                                      const uint16_t dim_im_in, // input image dimention
                                      const uint16_t ch_im_in,  // number of input image channels
                                      const q7_t * wt,  // kernel weights 
                                      const uint16_t ch_im_out, // number of filters, i.e., output image channels
                                      const uint16_t dim_kernel,    // filter kernel size
                                      const uint16_t padding,   // padding sizes
                                      const uint16_t stride,    // stride
                                      const q7_t * bias,    // bias
                                      const uint16_t bias_shift, const uint16_t out_shift, q7_t * Im_out,   // output image
                                      const uint16_t dim_im_out,    // output image dimension
                                      q15_t * bufferA,  //buffer space for input
                                      q7_t * bufferB    //buffer space for output
        );
    void      arm_convolve_HWC_q7_ref_nonsquare(const q7_t * Im_in, // input image
                                                const uint16_t dim_im_in_x, // input image dimention x
                                                const uint16_t dim_im_in_y, // input image dimention y
                                                const uint16_t ch_im_in,    // number of input image channels
                                                const q7_t * wt,    // kernel weights 
                                                const uint16_t ch_im_out,   // number of filters, i.e., output image channels
                                                const uint16_t dim_kernel_x,    // filter kernel size x
                                                const uint16_t dim_kernel_y,    // filter kernel size y
                                                const uint16_t padding_x,   // padding sizes x
                                                const uint16_t padding_y,   // padding sizes y
                                                const uint16_t stride_x,    // stride x
                                                const uint16_t stride_y,    // stride y
                                                const q7_t * bias,  // bias
                                                const uint16_t bias_shift, const uint16_t out_shift, q7_t * Im_out, // output image
                                                const uint16_t dim_im_out_x,    // output image dimension x
                                                const uint16_t dim_im_out_y,    // output image dimension y
                                                q15_t * bufferA,    //buffer space for input
                                                q7_t * bufferB  //buffer space for output
        );
    void      arm_convolve_HWC_q15_ref(const q15_t * Im_in, // input image
                                       const uint16_t dim_im_in,    // input image dimention
                                       const uint16_t ch_im_in, // number of input image channels
                                       const q15_t * wt,    // kernel weights 
                                       const uint16_t ch_im_out,    // number of filters, i.e., output image channels
                                       const uint16_t dim_kernel,   // filter kernel size
                                       const uint16_t padding,  // padding sizes
                                       const uint16_t stride,   // stride
                                       const q15_t * bias,  // bias
                                       const uint16_t bias_shift, const uint16_t out_shift, q15_t * Im_out, // output image
                                       const uint16_t dim_im_out,   // output image dimension
                                       q15_t * bufferA, //buffer space for input
                                       q7_t * bufferB   //buffer space for output
        );
    void      arm_convolve_HWC_q15_nonsquare_ref(const q15_t * Im_in,
                                                      const uint16_t dim_im_in_x,
                                                      const uint16_t dim_im_in_y,
                                                      const uint16_t ch_im_in,
                                                      const q15_t * wt,
                                                      const uint16_t ch_im_out,
                                                      const uint16_t dim_kernel_x,
                                                      const uint16_t dim_kernel_y,
                                                      const uint16_t padding_x,
                                                      const uint16_t padding_y,
                                                      const uint16_t stride_x,
                                                      const uint16_t stride_y,
                                                      const q15_t * bias,
                                                      const uint16_t bias_shift,
                                                      const uint16_t out_shift,
                                                      q15_t * Im_out,
                                                      const uint16_t dim_im_out_x,
                                                      const uint16_t dim_im_out_y, 
                                                      q15_t * bufferA, 
                                                      q7_t * bufferB);
    void      arm_depthwise_separable_conv_HWC_q7_ref(const q7_t * Im_in,   // input image
                                                      const uint16_t dim_im_in, // input image dimention
                                                      const uint16_t ch_im_in,  // number of input image channels
                                                      const q7_t * wt,  // kernel weights 
                                                      const uint16_t ch_im_out, // number of filters, i.e., output image channels
                                                      const uint16_t dim_kernel,    // filter kernel size
                                                      const uint16_t padding,   // padding sizes
                                                      const uint16_t stride,    // stride
                                                      const q7_t * bias,    // bias
                                                      const uint16_t bias_shift,    // amount of left-shift for bias
                                                      const uint16_t out_shift, // amount of right-shift for output
                                                      q7_t * Im_out,    // output image
                                                      const uint16_t dim_im_out,    // output image dimension
                                                      q15_t * bufferA,  //buffer space for input
                                                      q7_t * bufferB    //buffer space for output
        );
    void      arm_depthwise_separable_conv_HWC_q7_ref_nonsquare(const q7_t * Im_in, // input image
                                                                const uint16_t dim_im_in_x, // input image dimention x
                                                                const uint16_t dim_im_in_y, // input image dimention y
                                                                const uint16_t ch_im_in,    // number of input image channels
                                                                const q7_t * wt,    // kernel weights 
                                                                const uint16_t ch_im_out,   // number of filters, i.e., output image channels
                                                                const uint16_t dim_kernel_x,    // filter kernel size x
                                                                const uint16_t dim_kernel_y,    // filter kernel size y
                                                                const uint16_t padding_x,   // padding sizes x
                                                                const uint16_t padding_y,   // padding sizes y
                                                                const uint16_t stride_x,    // stride x
                                                                const uint16_t stride_y,    // stride y
                                                                const q7_t * bias,  // bias
                                                                const uint16_t bias_shift,  // amount of left-shift for bias
                                                                const uint16_t out_shift,   // amount of right-shift for output
                                                                q7_t * Im_out,  // output image
                                                                const uint16_t dim_im_out_x,    // output image dimension x
                                                                const uint16_t dim_im_out_y,    // output image dimension y
                                                                q15_t * bufferA,    //buffer space for input
                                                                q7_t * bufferB  //buffer space for output
        );
 /*
 *
 * Fully-connected reference implemenation
 *
 */
    void      arm_fully_connected_q7_ref(const q7_t * pV,   // pointer to vector
                                         const q7_t * pM,   // pointer to matrix
                                         const uint16_t dim_vec,    // length of the vector
                                         const uint16_t num_of_rows,    // numCol of A
                                         const uint16_t bias_shift, // amount of left-shift for bias
                                         const uint16_t out_shift,  // amount of right-shift for output
                                         const q7_t * bias, q7_t * pOut,    // output operand
                                         q15_t * vec_buffer);
    void      arm_fully_connected_q15_ref(const q15_t * pV, // pointer to vector
                                          const q15_t * pM, // pointer to matrix
                                          const uint16_t dim_vec,   // length of the vector
                                          const uint16_t num_of_rows,   // numCol of A
                                          const uint16_t bias_shift,    // amount of left-shift for bias
                                          const uint16_t out_shift, // amount of right-shift for output
                                          const q15_t * bias, q15_t * pOut, // output operand
                                          q15_t * vec_buffer);
    void      arm_fully_connected_mat_q7_vec_q15_ref(const q15_t * pV,  // pointer to vector
                                                     const q7_t * pM,   // pointer to matrix
                                                     const uint16_t dim_vec,    // length of the vector
                                                     const uint16_t num_of_rows,    // numCol of A
                                                     const uint16_t bias_shift, // amount of left-shift for bias
                                                     const uint16_t out_shift,  // amount of right-shift for output
                                                     const q7_t * bias, q15_t * pOut,   // output operand
                                                     q15_t * vec_buffer);
    void      arm_fully_connected_q7_opt_ref(const q7_t * pV,   // pointer to vector
                                             const q7_t * pM,   // pointer to matrix
                                             const uint16_t dim_vec,    // length of the vector
                                             const uint16_t num_of_rows,    // numCol of A
                                             const uint16_t bias_shift, // amount of left-shift for bias
                                             const uint16_t out_shift,  // amount of right-shift for output
                                             const q7_t * bias, q7_t * pOut,    // output operand
                                             q15_t * vec_buffer);
    void      arm_fully_connected_q15_opt_ref(const q15_t * pV, // pointer to vector
                                              const q15_t * pM, // pointer to matrix
                                              const uint16_t dim_vec,   // length of the vector
                                              const uint16_t num_of_rows,   // numCol of A
                                              const uint16_t bias_shift,    // amount of left-shift for bias
                                              const uint16_t out_shift, // amount of right-shift for output
                                              const q15_t * bias, q15_t * pOut, // output operand
                                              q15_t * vec_buffer);
    void      arm_fully_connected_mat_q7_vec_q15_opt_ref(const q15_t * pV,  // pointer to vector
                                                         const q7_t * pM,   // pointer to matrix
                                                         const uint16_t dim_vec,    // length of the vector
                                                         const uint16_t num_of_rows,    // numCol of A
                                                         const uint16_t bias_shift, // amount of left-shift for bias
                                                         const uint16_t out_shift,  // amount of right-shift for output
                                                         const q7_t * bias, q15_t * pOut,   // output operand
                                                         q15_t * vec_buffer);
 /*
 *
 * Pooling reference implemenation
 *
 */
    void      arm_avepool_q7_HWC_ref(const q7_t * Im_in,    // input image
                                     const uint16_t dim_im_in,  // input image dimension
                                     const uint16_t ch_im_in,   // number of input image channels
                                     const uint16_t dim_kernel, // window kernel size
                                     const uint16_t padding,    // padding sizes
                                     const uint16_t stride, // stride
                                     const uint16_t dim_im_out, // output image dimension
                                     q7_t * bufferA,    // a buffer for local storage
                                     q7_t * Im_out);
    void      arm_maxpool_q7_HWC_ref(const q7_t * Im_in,    // input image
                                     const uint16_t dim_im_in,  // input image dimension
                                     const uint16_t ch_im_in,   // number of input image channels
                                     const uint16_t dim_kernel, // window kernel size
                                     const uint16_t padding,    // padding sizes
                                     const uint16_t stride, // stride
                                     const uint16_t dim_im_out, // output image dimension
                                     q7_t * bufferA,    // a buffer for local storage
                                     q7_t * Im_out);
 /*
 *
 * Other reference implemenation
 *
 */
    void      arm_relu_q7_ref(q7_t * data, uint16_t size);
    void      arm_relu_q15_ref(q15_t * data, uint16_t size);
    void      arm_nn_mult_q7_ref(q7_t * pSrcA, q7_t * pSrcB, q7_t * pDst, const uint16_t out_shift, uint32_t blockSize);
    void      arm_nn_mult_q15_ref(q15_t * pSrcA, q15_t * pSrcB, q15_t * pDst, const uint16_t out_shift, uint32_t blockSize);
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/arm_nnexamples_nn_test.cpp
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/arm_nnexamples_nn_test.cpp
@@ -1,801 +0,0 @@
 /* ----------------------------------------------------------------------
 * Copyright (C) 2010-2018 Arm Limited. All rights reserved.
 *
 *
 * Project:       CMSIS NN Library
 * Title:         arm_nnexamples_nn_test.cpp
 *
 * Description:   Example code for NN kernel testing.
 *
 * Target Processor: Cortex-M cores
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *   - Redistributions of source code must retain the above copyright
 *     notice, this list of conditions and the following disclaimer.
 *   - Redistributions in binary form must reproduce the above copyright
 *     notice, this list of conditions and the following disclaimer in
 *     the documentation and/or other materials provided with the
 *     distribution.
 *   - Neither the name of ARM LIMITED nor the names of its contributors
 *     may be used to endorse or promote products derived from this
 *     software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 * -------------------------------------------------------------------- */
 #include "arm_nnexamples_nn_test.h"
 //#define TEST_SIGMOID
 //#define TEST_TANH
 #define TEST_POOL
 #define TEST_RELU
 #define TEST_IP
 #define TEST_CONV
 #define TEST_NONSQUARE
 #define TEST_NNMULT
 int test_index = 0;
 q7_t test_flags[50];
 bool test_pass;
 int main()
 {
    printf("start tests\n");
    srand(1);
    // common pointers for testing data
    q7_t     *test1;
    q15_t    *test2;
    q7_t     *test3;
    q15_t    *test4;
    for (test_index = 0; test_index<50; test_index++) {
        test_flags[test_index] = -1;
    }
    test_index = 0;
 #ifdef TEST_NNMULT
 #define NNMULT_DIM 128
    test1 = new q7_t[NNMULT_DIM*2];
    test2 = new q15_t[NNMULT_DIM*2];
    test3 = new q7_t[NNMULT_DIM*2];
    test4 = new q15_t[NNMULT_DIM*2];
    q7_t * mult_out_q7 = test3;
    q7_t * mult_ref_q7 = test3 + NNMULT_DIM;
    q15_t * mult_out_q15 = test4;
    q15_t * mult_ref_q15 = test4 + NNMULT_DIM;
    for (int i=0;i<NNMULT_DIM*2;i++) {
        test1[i] = (rand() % 256 - 128);
        test2[i] = (rand() % 65536 - 32768);
    }
    // Test q7
    arm_nn_mult_q7(test1, test1+NNMULT_DIM, mult_out_q7, 5, NNMULT_DIM);
    arm_nn_mult_q7_ref(test1, test1+NNMULT_DIM, mult_ref_q7, 5, NNMULT_DIM);
    verify_results_q7(mult_out_q7, mult_ref_q7, NNMULT_DIM);
    arm_nn_mult_q7(test1, test1+NNMULT_DIM, mult_out_q7, 9, NNMULT_DIM);
    arm_nn_mult_q7_ref(test1, test1+NNMULT_DIM, mult_ref_q7, 9, NNMULT_DIM);
    verify_results_q7(mult_out_q7, mult_ref_q7, NNMULT_DIM);
    // Test q15
    arm_nn_mult_q15(test2, test2+NNMULT_DIM, mult_out_q15, 13, NNMULT_DIM);
    arm_nn_mult_q15_ref(test2, test2+NNMULT_DIM, mult_ref_q15, 13, NNMULT_DIM);
    verify_results_q15(mult_out_q15, mult_ref_q15, NNMULT_DIM);
    arm_nn_mult_q15(test2, test2+NNMULT_DIM, mult_out_q15, 18, NNMULT_DIM);
    arm_nn_mult_q15_ref(test2, test2+NNMULT_DIM, mult_ref_q15, 18, NNMULT_DIM);
    verify_results_q15(mult_out_q15, mult_ref_q15, NNMULT_DIM);
 #endif
 #ifdef TEST_SIGMOID
 #define SIGMOID_DIM 128
    /* This part tests the running of sigmoid functions */
    test1 = new q7_t[SIGMOID_DIM];
    test2 = new q15_t[SIGMOID_DIM];
    test3 = new q7_t[SIGMOID_DIM];
    test4 = new q15_t[SIGMOID_DIM];
    srand(1);
    for (int i = 0; i < SIGMOID_DIM; i++)
    {
        test1[i] = (rand() % 256 - 128);
        test2[i] = (rand() % 65536 - 32768);
        test3[i] = test1[i];
        test4[i] = test2[i];
    }
    arm_nn_activations_direct_q7(test3, SIGMOID_DIM, 3, ARM_SIGMOID);
    for (int i = 0; i < SIGMOID_DIM; i++)
    {
        printf("in: %d  out: %d\n", test1[i], test3[i]);
    }
    printf("start testing q15_t sigmoid\n\n");
    arm_nn_activations_direct_q15(test4, SIGMOID_DIM, 3, ARM_SIGMOID);
    for (int i = 0; i < SIGMOID_DIM; i++)
    {
        printf("in: %d  out: %d\n", test2[i], test4[i]);
    }
    delete[]test1;
    delete[]test2;
    delete[]test3;
    delete[]test4;
 #endif
 #ifdef TEST_TANH
 #define TANH_DIM 128
    /* This part tests the running of sigmoid functions */
    test1 = new q7_t[TANH_DIM];
    test2 = new q15_t[TANH_DIM];
    test3 = new q7_t[TANH_DIM];
    test4 = new q15_t[TANH_DIM];
    srand(1);
    for (int i = 0; i < TANH_DIM; i++)
    {
        test1[i] = (rand() % 256 - 128);
        test2[i] = (rand() % 65536 - 32768);
        test3[i] = test1[i];
        test4[i] = test2[i];
    }
    arm_nn_activations_direct_q7(test3, TANH_DIM, 3, ARM_TANH);
    printf("start testing q7_t tanh\n\n");
    for (int i = 0; i < TANH_DIM; i++)
    {
        printf("in: %d  out: %d\n", test1[i], test3[i]);
    }
    printf("start testing q15_t tanh\n\n");
    arm_nn_activations_direct_q15(test4, TANH_DIM, 3, ARM_TANH);
    for (int i = 0; i < TANH_DIM; i++)
    {
        printf("in: %d  out: %d\n", test2[i], test4[i]);
    }
    delete[]test1;
    delete[]test2;
    delete[]test3;
    delete[]test4;
 #endif
 #ifdef TEST_POOL
 #define POOL_IM_DIM 32
 #define POOL_IM_CH 8
    test1 = new q7_t[POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH * 2];
    test2 = new q15_t[POOL_IM_DIM * POOL_IM_CH];
    test3 = new q7_t[POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH];
    for (int i = 0; i < POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH; i++)
    {
        test1[i] = (rand() % 256 - 128);
    }
    q7_t     *img_in = test1 + POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH;
    q7_t     *pool_out_ref = test3;
    q7_t     *pool_out_opt = test3 + POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH / 2;
    for (int i = 0; i < POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH; i++)
    {
        test3[i] = 0;
    }
    // copy over the img input
    for (int i = 0; i < POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH; i++)
    {
        img_in[i] = test1[i];
    }
    initialize_results_q7(pool_out_ref, pool_out_opt, POOL_IM_DIM / 2 * POOL_IM_DIM / 2 * POOL_IM_CH);
    printf("Start maxpool reference implementation\n");
    arm_maxpool_q7_HWC_ref(img_in, POOL_IM_DIM, POOL_IM_CH, 3, 0, 2, POOL_IM_DIM / 2, (q7_t *) test2, pool_out_ref);
    // copy over the img input
    for (int i = 0; i < POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH; i++)
    {
        img_in[i] = test1[i];
    }
    printf("Start maxpool opt implementation\n");
    arm_maxpool_q7_HWC(img_in, POOL_IM_DIM, POOL_IM_CH, 3, 0, 2, POOL_IM_DIM / 2, (q7_t *) test2, pool_out_opt);
    verify_results_q7(pool_out_ref, pool_out_opt, POOL_IM_DIM / 2 * POOL_IM_DIM / 2 * POOL_IM_CH);
    // copy over the img input
    for (int i = 0; i < POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH; i++)
    {
        img_in[i] = test1[i];
    }
    // copy over the img input
    for (int i = 0; i < POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH; i++)
    {
        img_in[i] = test1[i];
    }
    printf("Start avepool ref implementation\n");
    arm_avepool_q7_HWC_ref(img_in, POOL_IM_DIM, POOL_IM_CH, 3, 0, 2, POOL_IM_DIM / 2, (q7_t *) test2, pool_out_ref);
    // copy over the img input
    for (int i = 0; i < POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH; i++)
    {
        img_in[i] = test1[i];
    }
    printf("Start avepool opt implementation\n");
    arm_avepool_q7_HWC(img_in, POOL_IM_DIM, POOL_IM_CH, 3, 0, 2, POOL_IM_DIM / 2, (q7_t *) test2, pool_out_opt);
    // special check here
    bool      if_ave_pool_match = true;
    for (int i = 0; i < POOL_IM_DIM / 2 * POOL_IM_DIM / 2 * POOL_IM_CH; i++)
    {
        // we tolerate at most difference of 1 here because of rounding errors
        if (pool_out_ref[i] - pool_out_opt[i] >= 2 || pool_out_opt[i] - pool_out_ref[i] >= 2)
        {
            printf("Output mismatch at %d, expected %d, actual %d\n", i, pool_out_ref[i], pool_out_opt[i]);
            if_ave_pool_match = false;
        }
    }
    if (if_ave_pool_match == true)
    {
        printf("Outputs match.\n");
    }
    delete[]test1;
    delete[]test2;
    delete[]test3;
 #endif
 #ifdef TEST_RELU
 #define RELU_DIM 127
    test1 = new q7_t[RELU_DIM];
    test2 = new q15_t[RELU_DIM];
    test3 = new q7_t[RELU_DIM];
    test4 = new q15_t[RELU_DIM];
    for (int i = 0; i < RELU_DIM; i++)
    {
        test1[i] = (rand() % 256 - 128);
        test2[i] = (rand() % 65536 - 32768);
        test3[i] = test1[i];
        test4[i] = test2[i];
    }
    q7_t     *relu_ref_data_q7 = test1;
    q7_t     *relu_opt_data_q7 = test3;
    q15_t    *relu_ref_data_q15 = test2;
    q15_t    *relu_opt_data_q15 = test4;
    printf("Start ref relu q7 implementation\n");
    arm_relu_q7_ref(relu_ref_data_q7, RELU_DIM);
    printf("Start opt relu q7 implementation\n");
    arm_relu_q7(relu_opt_data_q7, RELU_DIM);
    verify_results_q7(relu_ref_data_q7, relu_opt_data_q7, RELU_DIM);
    printf("Start ref relu q15 implementation\n");
    arm_relu_q15_ref(relu_ref_data_q15, RELU_DIM);
    printf("Start opt relu q15 implementation\n");
    arm_relu_q15(relu_opt_data_q15, RELU_DIM);
    verify_results_q15(relu_ref_data_q15, relu_opt_data_q15, RELU_DIM);
    delete[]test1;
    delete[]test2;
    delete[]test3;
    delete[]test4;
 #endif
 #ifdef TEST_IP
 #define IP_ROW_DIM 127
 #define IP_COL_DIM 127
    q7_t      ip_weights[IP_ROW_DIM * IP_COL_DIM] = IP2_WEIGHT;
    q7_t      ip_q7_opt_weights[IP_ROW_DIM * IP_COL_DIM] = IP4_WEIGHT;
    q7_t      ip_q7_q15_opt_weights[IP_ROW_DIM * IP_COL_DIM] = IP4_q7_q15_WEIGHT;
    q15_t     ip_q15_weights[IP_ROW_DIM * IP_COL_DIM] = IP2_WEIGHT;
    q15_t     ip_q15_opt_weights[IP_ROW_DIM * IP_COL_DIM] = IP4_WEIGHT_Q15;
    test1 = new q7_t[IP_COL_DIM + IP_ROW_DIM];
    test2 = new q15_t[IP_COL_DIM];
    test3 = new q7_t[IP_ROW_DIM * 3];
    test4 = new q15_t[IP_COL_DIM + IP_ROW_DIM * 2];
    for (int i = 0; i < IP_ROW_DIM + IP_COL_DIM; i++)
    {
        test1[i] = rand() % 256 - 100;
    }
    for (int i = 0; i < IP_ROW_DIM * 3; i++)
    {
        test3[i] = 0;
    }
    q7_t     *ip_bias_q7 = test1 + IP_COL_DIM;
    q7_t     *ip_out_q7_ref = test3;
    q7_t     *ip_out_q7_opt = test3 + IP_ROW_DIM;
    q7_t     *ip_out_q7_opt_fast = test3 + 2 * IP_ROW_DIM;
    q15_t    *ip_out_q15_ref = test4 + IP_COL_DIM;
    q15_t    *ip_out_q15_opt = test4 + IP_COL_DIM + IP_ROW_DIM;
    initialize_results_q7(ip_out_q7_ref, ip_out_q7_opt, IP_ROW_DIM);
    initialize_results_q7(ip_out_q7_ref, ip_out_q7_opt_fast, IP_ROW_DIM);
    initialize_results_q7(ip_out_q7_ref, ip_out_q7_opt_fast, IP_ROW_DIM);
    printf("Start ref q7 implementation\n");
    arm_fully_connected_q7_ref(test1, ip_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, ip_bias_q7, ip_out_q7_ref, test2);
    printf("Start q7 implementation\n");
    arm_fully_connected_q7(test1, ip_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, ip_bias_q7, ip_out_q7_opt, test2);
    verify_results_q7(ip_out_q7_ref, ip_out_q7_opt, IP_ROW_DIM);
    printf("Start q7 ref opt implementation\n");
    arm_fully_connected_q7_opt_ref(test1, ip_q7_opt_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, ip_bias_q7,
                                   ip_out_q7_opt_fast, test2);
    verify_results_q7(ip_out_q7_ref, ip_out_q7_opt_fast, IP_ROW_DIM);
    printf("Start q7 opt implementation\n");
    arm_fully_connected_q7_opt(test1, ip_q7_opt_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, ip_bias_q7, ip_out_q7_opt_fast,
                               test2);
    verify_results_q7(ip_out_q7_ref, ip_out_q7_opt_fast, IP_ROW_DIM);
    for (int i = 0; i < IP_ROW_DIM + IP_COL_DIM; i++)
    {
        test4[i] = (rand() % 65536 - 32768);
    }
    initialize_results_q15(ip_out_q15_ref, ip_out_q15_opt, IP_ROW_DIM);
    printf("Start ref q15 implementation\n");
    arm_fully_connected_q15_ref(test4, ip_q15_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, test2, ip_out_q15_ref, NULL);
    printf("Start q15 implementation\n");
    arm_fully_connected_q15(test4, ip_q15_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, test2, ip_out_q15_opt, NULL);
    verify_results_q15(ip_out_q15_ref, ip_out_q15_opt, IP_ROW_DIM);
    printf("Start ref opt q15 implementation\n");
    arm_fully_connected_q15_opt_ref(test4, ip_q15_opt_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, test2, ip_out_q15_opt,
                                    NULL);
    verify_results_q15(ip_out_q15_ref, ip_out_q15_opt, IP_ROW_DIM);
    printf("Start opt q15 implementation\n");
    arm_fully_connected_q15_opt(test4, ip_q15_opt_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, test2, ip_out_q15_opt, NULL);
    verify_results_q15(ip_out_q15_ref, ip_out_q15_opt, IP_ROW_DIM);
    initialize_results_q15(ip_out_q15_ref, ip_out_q15_opt, IP_ROW_DIM);
    printf("Start ref q7_q15 implementation\n");
    arm_fully_connected_mat_q7_vec_q15_ref(test4, ip_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, ip_bias_q7, ip_out_q15_ref,
                                           test2);
    printf("Start q7_q15 implementation\n");
    arm_fully_connected_mat_q7_vec_q15(test4, ip_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, ip_bias_q7, ip_out_q15_opt,
                                       test2);
    verify_results_q15(ip_out_q15_ref, ip_out_q15_opt, IP_ROW_DIM);
    printf("Start ref opt q7_q15 implementation\n");
    arm_fully_connected_mat_q7_vec_q15_opt_ref(test4, ip_q7_q15_opt_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, ip_bias_q7,
                                               ip_out_q15_opt, test2);
    verify_results_q15(ip_out_q15_ref, ip_out_q15_opt, IP_ROW_DIM);
    printf("Start opt q7_q15 implementation\n");
    arm_fully_connected_mat_q7_vec_q15_opt(test4, ip_q7_q15_opt_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, ip_bias_q7,
                                           ip_out_q15_opt, test2);
    verify_results_q15(ip_out_q15_ref, ip_out_q15_opt, IP_ROW_DIM);
    delete[]test1;
    delete[]test2;
    delete[]test3;
    delete[]test4;
 #endif
 #ifdef TEST_NONSQUARE
 /* Use RCONV to differential with square CONV */
 #define RCONV_IM_DIM_X 10
 #define RCONV_IM_DIM_Y 8
 #define RCONV_IM_CH 4
 #define RCONV_KER_DIM_X 5
 #define RCONV_KER_DIM_Y 3
 #define RCONV_STRIDE_X 1
 #define RCONV_STRIDE_Y 1
 #define RCONV_PADDING_X 2
 #define RCONV_PADDING_Y 1
 #define RCONV_OUT_CH 4
 #define RCONV_OUT_DIM_X 10
 #define RCONV_OUT_DIM_Y 8
    test1 = new q7_t[RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH * RCONV_OUT_CH + RCONV_OUT_CH];
    test2 = new q15_t[2 * RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH];
    test3 =
        new q7_t[RCONV_IM_DIM_Y * RCONV_IM_DIM_X * RCONV_IM_CH + 2 * RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH];
    for (int i = 0; i < RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH * RCONV_OUT_CH + RCONV_OUT_CH; i++)
    {
        test1[i] = rand() % 256 - 100;
    }
    for (int i = 0;
         i < RCONV_IM_DIM_Y * RCONV_IM_DIM_X * RCONV_IM_CH + 2 * RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH; i++)
    {
        test3[i] = rand() % 256 - 100;
    }
    q7_t     *rconv_weight_q7 = test1;
    q7_t     *rconv_bias_q7 = test1 + RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH * RCONV_OUT_CH;
    q15_t    *rconv_buf = test2;
    q7_t     *rconv_im_in_q7 = test3;
    q7_t     *rconv_im_out_ref_q7 = test3 + RCONV_IM_DIM_Y * RCONV_IM_DIM_X * RCONV_IM_CH;
    q7_t     *rconv_im_out_opt_q7 =
        test3 + RCONV_IM_DIM_Y * RCONV_IM_DIM_X * RCONV_IM_CH + RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH;
    initialize_results_q7(rconv_im_out_ref_q7, rconv_im_out_opt_q7, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH);
    printf("start conv q7 nonsquare ref implementation\n");
    arm_convolve_HWC_q7_ref_nonsquare(rconv_im_in_q7, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, rconv_weight_q7,
                                      RCONV_OUT_CH, RCONV_KER_DIM_X, RCONV_KER_DIM_Y, RCONV_PADDING_X, RCONV_PADDING_Y,
                                      RCONV_STRIDE_X, RCONV_STRIDE_Y, rconv_bias_q7, 1, 7, rconv_im_out_ref_q7,
                                      RCONV_OUT_DIM_X, RCONV_OUT_DIM_Y, rconv_buf, NULL);
    printf("start conv q7 nonsquare opt implementation\n");
    arm_convolve_HWC_q7_fast_nonsquare(rconv_im_in_q7, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, rconv_weight_q7,
                                       RCONV_OUT_CH, RCONV_KER_DIM_X, RCONV_KER_DIM_Y, RCONV_PADDING_X, RCONV_PADDING_Y,
                                       RCONV_STRIDE_X, RCONV_STRIDE_Y, rconv_bias_q7, 1, 7, rconv_im_out_opt_q7,
                                       RCONV_OUT_DIM_X, RCONV_OUT_DIM_Y, rconv_buf, NULL);
    verify_results_q7(rconv_im_out_ref_q7, rconv_im_out_opt_q7, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH);
    initialize_results_q7(rconv_im_out_ref_q7, rconv_im_out_opt_q7, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH);
    printf("start conv q7 nonsquare ref implementation\n");
    arm_convolve_HWC_q7_ref_nonsquare(rconv_im_in_q7, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, rconv_weight_q7,
                                      RCONV_OUT_CH, RCONV_KER_DIM_X, RCONV_KER_DIM_Y, RCONV_PADDING_X, RCONV_PADDING_Y,
                                      RCONV_STRIDE_X, RCONV_STRIDE_Y, rconv_bias_q7, 1, 7, rconv_im_out_ref_q7,
                                      RCONV_OUT_DIM_X, RCONV_OUT_DIM_Y, rconv_buf, NULL);
    printf("start conv q7 nonsquare basic implementation\n");
    arm_convolve_HWC_q7_basic_nonsquare(rconv_im_in_q7, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, rconv_weight_q7,
                                       RCONV_OUT_CH, RCONV_KER_DIM_X, RCONV_KER_DIM_Y, RCONV_PADDING_X, RCONV_PADDING_Y,
                                       RCONV_STRIDE_X, RCONV_STRIDE_Y, rconv_bias_q7, 1, 7, rconv_im_out_opt_q7,
                                       RCONV_OUT_DIM_X, RCONV_OUT_DIM_Y, rconv_buf, NULL);
    verify_results_q7(rconv_im_out_ref_q7, rconv_im_out_opt_q7, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH);
    initialize_results_q7(rconv_im_out_ref_q7, rconv_im_out_opt_q7, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH);
    printf("start 1x1 conv q7 nonsquare fast implementation\n");
    arm_convolve_HWC_q7_fast_nonsquare(rconv_im_in_q7, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, rconv_weight_q7,
                                       RCONV_OUT_CH, 1, 1, 0, 0, RCONV_STRIDE_X,
                                       RCONV_STRIDE_Y, rconv_bias_q7, 1, 7, rconv_im_out_ref_q7, RCONV_OUT_DIM_X,
                                       RCONV_OUT_DIM_Y, rconv_buf, NULL);
    printf("start 1x1 conv q7 nonsquare dedicated function implementation\n");
    arm_convolve_1x1_HWC_q7_fast_nonsquare(rconv_im_in_q7, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, rconv_weight_q7,
                                           RCONV_OUT_CH, 1, 1, 0, 0, RCONV_STRIDE_X,
                                           RCONV_STRIDE_Y, rconv_bias_q7, 1, 7, rconv_im_out_opt_q7, RCONV_OUT_DIM_X,
                                           RCONV_OUT_DIM_Y, rconv_buf, NULL);
    verify_results_q7(rconv_im_out_ref_q7, rconv_im_out_opt_q7, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH);
    printf("start depthwise separable conv q7 nonsquare ref implementation\n");
    arm_depthwise_separable_conv_HWC_q7_ref_nonsquare(rconv_im_in_q7, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH,
                                                      rconv_weight_q7, RCONV_OUT_CH, RCONV_KER_DIM_X, RCONV_KER_DIM_Y,
                                                      RCONV_PADDING_X, RCONV_PADDING_Y, RCONV_STRIDE_X, RCONV_STRIDE_Y,
                                                      rconv_bias_q7, 1, 7, rconv_im_out_ref_q7, RCONV_OUT_DIM_X,
                                                      RCONV_OUT_DIM_Y, rconv_buf, NULL);
    printf("start depthwise separable conv q7 nonsquare opt implementation\n");
    arm_depthwise_separable_conv_HWC_q7_nonsquare(rconv_im_in_q7, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH,
                                                  rconv_weight_q7, RCONV_OUT_CH, RCONV_KER_DIM_X, RCONV_KER_DIM_Y,
                                                  RCONV_PADDING_X, RCONV_PADDING_Y, RCONV_STRIDE_X, RCONV_STRIDE_Y,
                                                  rconv_bias_q7, 1, 7, rconv_im_out_opt_q7, RCONV_OUT_DIM_X,
                                                  RCONV_OUT_DIM_Y, rconv_buf, NULL);
    verify_results_q7(rconv_im_out_ref_q7, rconv_im_out_opt_q7, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH);
    delete[]test1;
    delete[]test2;
    delete[]test3;
 	test2 = new q15_t[RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH * RCONV_OUT_CH + RCONV_OUT_CH]; // weights + bias
 	test4 = new q15_t[2 * RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH   //buffer
 	         + RCONV_IM_DIM_Y * RCONV_IM_DIM_X * RCONV_IM_CH + 2 * RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH]; // i/o
    for (int i = 0; i < RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH * RCONV_OUT_CH + RCONV_OUT_CH; i++)
    {
        test2[i] = rand() % 256 - 100;
    }
    for (int i = 0;
         i < 2 * RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH
         + RCONV_IM_DIM_Y * RCONV_IM_DIM_X * RCONV_IM_CH + 2 * RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH;
        i++)
    {
        test4[i] = rand() % 256 - 100;
    }
    q15_t     *rconv_weight_q15 = test2;
    q15_t     *rconv_bias_q15 = test2 + RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH * RCONV_OUT_CH;
    rconv_buf = test4;
    q15_t     *rconv_im_in_q15 = test4 + 2 * RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH;
    q15_t     *rconv_im_out_ref_q15 = rconv_im_in_q15 + RCONV_IM_DIM_Y * RCONV_IM_DIM_X * RCONV_IM_CH;
    q15_t     *rconv_im_out_opt_q15 = rconv_im_out_ref_q15 + RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH;
    initialize_results_q15(rconv_im_out_ref_q15, rconv_im_out_opt_q15, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH);
    printf("start conv q15 nonsquare ref implementation\n");
    arm_convolve_HWC_q15_nonsquare_ref(rconv_im_in_q15, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, rconv_weight_q15,
                                      RCONV_OUT_CH, RCONV_KER_DIM_X, RCONV_KER_DIM_Y, RCONV_PADDING_X, RCONV_PADDING_Y,
                                      RCONV_STRIDE_X, RCONV_STRIDE_Y, rconv_bias_q15, 1, 7, rconv_im_out_ref_q15,
                                      RCONV_OUT_DIM_X, RCONV_OUT_DIM_Y, rconv_buf, NULL);
    printf("start conv q5 nonsquare opt implementation\n");
    arm_convolve_HWC_q15_fast_nonsquare(rconv_im_in_q15, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, rconv_weight_q15,
                                       RCONV_OUT_CH, RCONV_KER_DIM_X, RCONV_KER_DIM_Y, RCONV_PADDING_X, RCONV_PADDING_Y,
                                       RCONV_STRIDE_X, RCONV_STRIDE_Y, rconv_bias_q15, 1, 7, rconv_im_out_opt_q15,
                                       RCONV_OUT_DIM_X, RCONV_OUT_DIM_Y, rconv_buf, NULL);
    verify_results_q15(rconv_im_out_ref_q15, rconv_im_out_opt_q15, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH);
    delete [] test2;
    delete [] test4;
 #endif
 #ifdef TEST_CONV
 #define CONV_IM_DIM 16
 #define CONV_IM_CH 16
 #define CONV_KER_DIM 5
 #define CONV_OUT_CH 16
 #define CONV_OUT_DIM 16
    test1 = new q7_t[CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH + CONV_OUT_CH];
    test2 =
        new q15_t[CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH +
                  2 * CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH + CONV_OUT_CH];
    test3 = new q7_t[CONV_IM_DIM * CONV_IM_DIM * CONV_IM_CH + 2 * CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH];
    test4 = new q15_t[CONV_IM_DIM * CONV_IM_DIM * CONV_IM_CH + 2 * CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH];
    for (int i = 0; i < CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH + CONV_OUT_CH; i++)
    {
        test1[i] = rand() % 256 - 100;
    }
    for (int i = 0;
         i <
         CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH +
         2 * CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH + CONV_OUT_CH; i++)
    {
        test2[i] = (rand() % 65536 - 32768);
    }
    for (int i = 0; i < CONV_IM_DIM * CONV_IM_DIM * CONV_IM_CH + 2 * CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH; i++)
    {
        test3[i] = rand() % 256 - 100;
    }
    for (int i = 0; i < CONV_IM_DIM * CONV_IM_DIM * CONV_IM_CH + 2 * CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH; i++)
    {
        test4[i] = (rand() % 65536 - 32768);
    }
    q7_t     *conv_weight_q7 = test1;
    q7_t     *conv_bias_q7 = test1 + CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH;
    q15_t    *conv_weight_q15 = test2;
    q15_t    *conv_buf = test2 + CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH;
    q15_t    *conv_bias_q15 =
        test2 + CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH +
        2 * CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH;
    q7_t     *conv_im_in_q7 = test3;
    q7_t     *conv_im_out_ref_q7 = test3 + CONV_IM_DIM * CONV_IM_DIM * CONV_IM_CH;
    q7_t     *conv_im_out_opt_q7 =
        test3 + CONV_IM_DIM * CONV_IM_DIM * CONV_IM_CH + CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH;
    q15_t    *conv_im_in_q15 = test4;
    q15_t    *conv_im_out_ref_q15 = test4 + CONV_IM_DIM * CONV_IM_DIM * CONV_IM_CH;
    q15_t    *conv_im_out_opt_q15 =
        test4 + CONV_IM_DIM * CONV_IM_DIM * CONV_IM_CH + CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH;
    initialize_results_q7(conv_im_out_ref_q7, conv_im_out_opt_q7, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH);
    printf("start q7 ref implementation\n");
    arm_convolve_HWC_q7_ref(conv_im_in_q7, CONV_IM_DIM, CONV_IM_CH, conv_weight_q7,
                            CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q7, 1, 7, conv_im_out_ref_q7,
                            CONV_OUT_DIM, conv_buf, NULL);
    printf("start q7 basic implementation\n");
    arm_convolve_HWC_q7_basic(conv_im_in_q7, CONV_IM_DIM, CONV_IM_CH, conv_weight_q7,
                              CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q7, 1, 7, conv_im_out_opt_q7,
                              CONV_OUT_DIM, conv_buf, NULL);
    verify_results_q7(conv_im_out_ref_q7, conv_im_out_opt_q7, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH);
    printf("start q7 fast implementation\n");
    arm_convolve_HWC_q7_fast(conv_im_in_q7, CONV_IM_DIM, CONV_IM_CH, conv_weight_q7,
                             CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q7, 1, 7, conv_im_out_opt_q7,
                             CONV_OUT_DIM, conv_buf, NULL);
    verify_results_q7(conv_im_out_ref_q7, conv_im_out_opt_q7, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH);
    // testing with RGB
    printf("start q7 ref implementation for RGB\n");
    arm_convolve_HWC_q7_ref(conv_im_in_q7, CONV_IM_DIM, 3, conv_weight_q7,
                            CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q7, 1, 7, conv_im_out_ref_q7,
                            CONV_OUT_DIM, conv_buf, NULL);
    printf("start q7 basic implementation for RGB\n");
    arm_convolve_HWC_q7_basic(conv_im_in_q7, CONV_IM_DIM, 3, conv_weight_q7,
                              CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q7, 1, 7, conv_im_out_opt_q7,
                              CONV_OUT_DIM, conv_buf, NULL);
    verify_results_q7(conv_im_out_ref_q7, conv_im_out_opt_q7, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH);
    printf("start q7 RGB implementation for RGB\n");
    arm_convolve_HWC_q7_RGB(conv_im_in_q7, CONV_IM_DIM, 3, conv_weight_q7,
                            CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q7, 1, 7, conv_im_out_opt_q7,
                            CONV_OUT_DIM, conv_buf, NULL);
    verify_results_q7(conv_im_out_ref_q7, conv_im_out_opt_q7, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH);
    // testing q15
    initialize_results_q15(conv_im_out_ref_q15, conv_im_out_opt_q15, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH);
    printf("start q15 ref implementation\n");
    arm_convolve_HWC_q15_ref(conv_im_in_q15, CONV_IM_DIM, CONV_IM_CH, conv_weight_q15,
                             CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q15, 0, 15, conv_im_out_ref_q15,
                             CONV_OUT_DIM, conv_buf, NULL);
    printf("start q15 basic implementation\n");
    arm_convolve_HWC_q15_basic(conv_im_in_q15, CONV_IM_DIM, CONV_IM_CH, conv_weight_q15,
                               CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q15, 0, 15, conv_im_out_opt_q15,
                               CONV_OUT_DIM, conv_buf, NULL);
    verify_results_q15(conv_im_out_ref_q15, conv_im_out_opt_q15, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH);
    printf("start q15 fast implementation\n");
    arm_convolve_HWC_q15_fast(conv_im_in_q15, CONV_IM_DIM, CONV_IM_CH, conv_weight_q15,
                              CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q15, 0, 15, conv_im_out_opt_q15,
                              CONV_OUT_DIM, conv_buf, NULL);
    verify_results_q15(conv_im_out_ref_q15, conv_im_out_opt_q15, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH);
    // depthwise separable conv
    initialize_results_q7(conv_im_out_ref_q7, conv_im_out_opt_q7, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH);
    printf("start q7 depthwise_separable_conv ref implementation\n");
    arm_depthwise_separable_conv_HWC_q7_ref(conv_im_in_q7, CONV_IM_DIM, CONV_IM_CH, conv_weight_q7,
                                            CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q7, 1, 7, conv_im_out_ref_q7,
                                            CONV_OUT_DIM, conv_buf, NULL);
    printf("start q7 depthwise_separable_conv implementation\n");
    arm_depthwise_separable_conv_HWC_q7(conv_im_in_q7, CONV_IM_DIM, CONV_IM_CH, conv_weight_q7,
                                        CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q7, 1, 7, conv_im_out_opt_q7,
                                        CONV_OUT_DIM, conv_buf, NULL);
    verify_results_q7(conv_im_out_ref_q7, conv_im_out_opt_q7, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH);
    delete[]test1;
    delete[]test2;
    delete[]test3;
    delete[]test4;
 #endif
    test_pass = true;
    test_index = 0;
    while (test_flags[test_index] != -1) {
        if (test_flags[test_index]) {
             test_pass = false;
        }
        test_index ++;
    }
    if (test_pass) {
        printf("All tests passed\n");
    } else {
        printf("Test failed passed\n");
    }
    return 0;
 }
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/arm_nnexamples_nn_test.h
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/arm_nnexamples_nn_test.h
@@ -1,78 +0,0 @@
 #ifndef _MAIN_H_
 #define _MAIN_H_
 #include <stdio.h>
 #include <stdlib.h>
 #include <math.h>
 #include "arm_math.h"
 #include "arm_nnfunctions.h"
 #include "ref_functions.h"
 extern int test_index;
 extern q7_t test_flags[50];
 void initialize_results_q7(q7_t * ref, q7_t * opt, int length)
 {
    arm_fill_q7(0, ref, length);
    arm_fill_q7(37, opt, length);
 }
 void initialize_results_q15(q15_t * ref, q15_t * opt, int length)
 {
    arm_fill_q15(0, ref, length);
    arm_fill_q15(0x5F5, opt, length);
 }
 void verify_results_q7(q7_t * ref, q7_t * opt, int length)
 {
    bool      if_match = true;
    for (int i = 0; i < length; i++)
    {
        if (ref[i] != opt[i])
        {
            printf("Output mismatch at %d, expected %d, actual %d\r\n", i, ref[i], opt[i]);
            if_match = false;
        }
    }
    if (if_match == true)
    {
        printf("Outputs match.\r\n\r\n");
        test_flags[test_index++] = 0;
    } else {
        test_flags[test_index++] = 1;
    }
 }
 void verify_results_q15(q15_t * ref, q15_t * opt, int length)
 {
    bool      if_match = true;
    for (int i = 0; i < length; i++)
    {
        if (ref[i] != opt[i])
        {
            printf("Output mismatch at %d, expected %d, actual %d\r\n", i, ref[i], opt[i]);
            if_match = false;
        }
    }
    if (if_match == true)
    {
        printf("Outputs match.\r\n\r\n");
        test_flags[test_index++] = 0;
    } else {
        test_flags[test_index++] = 1;
    }
 }
 #endif
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/readme.txt
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/readme.txt
@@ -1,4 +0,0 @@
 CMSIS DSP_Lib example arm_nnexample_nn_test for
  Cortex-M3, Cortex-M4 and Cortex-M7.
 The example is configured for uVision Simulator.
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/ActivationFunctions/arm_nn_activations_q15.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/ActivationFunctions/arm_nn_activations_q15.c
@@ -1,101 +0,0 @@
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /* ----------------------------------------------------------------------
 * Project:      CMSIS NN Library
 * Title:        arm_nn_activations_q15.c
 * Description:  Q15 neural network activation function using direct table look-up
 *
 * $Date:        17. January 2018
 * $Revision:    V.1.0.0
 *
 * Target Processor:  Cortex-M cores
 *
 * -------------------------------------------------------------------- */
 #include "arm_math.h"
 #include "arm_common_tables.h"
 #include "arm_nnfunctions.h"
 /**
 *  @ingroup groupNN
 */
 /**
 * @addtogroup Acti
 * @{
 */
  /**
   * @brief Q15 neural network activation function using direct table look-up
   * @param[in,out]   data        pointer to input
   * @param[in]       size        number of elements
   * @param[in]       int_width   bit-width of the integer part, assume to be smaller than 3
   * @param[in]       type        type of activation functions
   * @return none.
   *
   * @details
   * 
   * This is the direct table look-up approach.
   *
   * Assume here the integer part of the fixed-point is <= 3.
   * More than 3 just not making much sense, makes no difference with
   * saturation followed by any of these activation functions. 
   */
 void arm_nn_activations_direct_q15(q15_t * data, uint16_t size, uint16_t int_width, arm_nn_activation_type type)
 {
    uint16_t  i = size;
    q15_t    *pIn = data;
    q15_t    *pOut = data;
    uint16_t  shift_size = 8 + 3 - int_width;
    uint32_t  bit_mask = 0x7FF >> int_width;
    uint32_t  full_frac = bit_mask + 1;
    const q15_t *lookup_table;
    switch (type)
    {
    case ARM_SIGMOID:
        lookup_table = sigmoidTable_q15;
        break;
    case ARM_TANH:
    default:
        lookup_table = tanhTable_q15;
        break;
    }
    while (i)
    {
        q15_t     out;
        q15_t     in = *pIn++;
        q15_t     frac = (uint32_t) in & bit_mask;
        q15_t     value = lookup_table[__USAT(in >> shift_size, 8)];
        q15_t     value2 = lookup_table[__USAT(1 + (in >> shift_size), 8)];
        /* doing the interpolation here for better accuracy */
        out = ((q31_t) (full_frac - frac) * value + (q31_t) value2 * frac) >> shift_size;
        *pOut++ = out;
        i--;
    }
 }
 /**
 * @} end of Acti group
 */
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/ActivationFunctions/arm_nn_activations_q7.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/ActivationFunctions/arm_nn_activations_q7.c
@@ -1,91 +0,0 @@
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /* ----------------------------------------------------------------------
 * Project:      CMSIS NN Library
 * Title:        arm_nn_activations_q7.c
 * Description:  Q7 neural network activation function using direct table look-up
 *
 * $Date:        17. January 2018
 * $Revision:    V.1.0.0
 *
 * Target Processor:  Cortex-M cores
 *
 * -------------------------------------------------------------------- */
 #include "arm_math.h"
 #include "arm_common_tables.h"
 #include "arm_nnfunctions.h"
 /**
 *  @ingroup groupNN
 */
 /**
 * @addtogroup Acti
 * @{
 */
  /**
   * @brief Q7 neural network activation function using direct table look-up
   * @param[in,out]   data        pointer to input
   * @param[in]       size        number of elements
   * @param[in]       int_width   bit-width of the integer part, assume to be smaller than 3
   * @param[in]       type        type of activation functions
   * @return none.
   *
   * @details
   * 
   * This is the direct table look-up approach.
   *
   * Assume here the integer part of the fixed-point is <= 3.
   * More than 3 just not making much sense, makes no difference with
   * saturation followed by any of these activation functions. 
   */
 void arm_nn_activations_direct_q7(q7_t * data, uint16_t size, uint16_t int_width, arm_nn_activation_type type)
 {
    uint16_t  i = size;
    q7_t     *pIn = data;
    q7_t     *pOut = data;
    q7_t      in;
    q7_t      out;
    uint16_t  shift_size = 3 - int_width;
    const q7_t *lookup_table;
    switch (type)
    {
    case ARM_SIGMOID:
        lookup_table = sigmoidTable_q7;
        break;
    case ARM_TANH:
    default:
        lookup_table = tanhTable_q7;
        break;
    }
    while (i)
    {
        in = *pIn++;
        out = lookup_table[(uint8_t) (in >> shift_size)];
        *pOut++ = out;
        i--;
    }
 }
 /**
 * @} end of Acti group
 */
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/ActivationFunctions/arm_relu_q15.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/ActivationFunctions/arm_relu_q15.c
@@ -1,106 +0,0 @@
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /* ----------------------------------------------------------------------
 * Project:      CMSIS NN Library
 * Title:        arm_relu_q15.c
 * Description:  Q15 version of ReLU
 *
 * $Date:        17. January 2018
 * $Revision:    V.1.0.0
 *
 * Target Processor:  Cortex-M cores
 *
 * -------------------------------------------------------------------- */
 #include "arm_math.h"
 #include "arm_nnfunctions.h"
 /**
 *  @ingroup groupNN
 */
 /**
 * @addtogroup Acti
 * @{
 */
  /**
   * @brief Q15 RELU function
   * @param[in,out]   data        pointer to input
   * @param[in]       size        number of elements
   * @return none.
   * 
   * @details
   *
   * Optimized relu with QSUB instructions.
   *
   */
 void arm_relu_q15(q15_t * data, uint16_t size)
 {
 #if defined (ARM_MATH_DSP)
    /* Run the following code for Cortex-M4 and Cortex-M7 */
    uint16_t  i = size >> 1;
    q15_t    *pIn = data;
    q15_t    *pOut = data;
    q31_t     in;
    q31_t     buf;
    q31_t     mask;
    while (i)
    {
        in = *__SIMD32(pIn)++;
        /* extract the first bit */
        buf = __ROR(in & 0x80008000, 15);
        /* if MSB=1, mask will be 0xFF, 0x0 otherwise */
        mask = __QSUB16(0x00000000, buf);
        *__SIMD32(pOut)++ = in & (~mask);
        i--;
    }
    if (size & 0x1)
    {
        if (*pIn < 0)
        {
            *pIn = 0;
        }
        pIn++;
    }
 #else
    /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */
    uint16_t  i;
    for (i = 0; i < size; i++)
    {
        if (data[i] < 0)
            data[i] = 0;
    }
 #endif                          /* ARM_MATH_DSP */
 }
 /**
 * @} end of Acti group
 */
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/ActivationFunctions/arm_relu_q7.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/ActivationFunctions/arm_relu_q7.c
@@ -1,110 +0,0 @@
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /* ----------------------------------------------------------------------
 * Project:      CMSIS NN Library
 * Title:        arm_relu_q7.c
 * Description:  Q7 version of ReLU
 *
 * $Date:        17. January 2018
 * $Revision:    V.1.0.0
 *
 * Target Processor:  Cortex-M cores
 *
 * -------------------------------------------------------------------- */
 #include "arm_math.h"
 #include "arm_nnfunctions.h"
 /**
 *  @ingroup groupNN
 */
 /**
 * @addtogroup Acti
 * @{
 */
  /**
   * @brief Q7 RELU function
   * @param[in,out]   data        pointer to input
   * @param[in]       size        number of elements
   * @return none.
   * 
   * @details
   *
   * Optimized relu with QSUB instructions.
   *
   */
 void arm_relu_q7(q7_t * data, uint16_t size)
 {
 #if defined (ARM_MATH_DSP)
    /* Run the following code for Cortex-M4 and Cortex-M7 */
    uint16_t  i = size >> 2;
    q7_t     *pIn = data;
    q7_t     *pOut = data;
    q31_t     in;
    q31_t     buf;
    q31_t     mask;
    while (i)
    {
        in = *__SIMD32(pIn)++;
        /* extract the first bit */
        buf = __ROR(in & 0x80808080, 7);
        /* if MSB=1, mask will be 0xFF, 0x0 otherwise */
        mask = __QSUB8(0x00000000, buf);
        *__SIMD32(pOut)++ = in & (~mask);
        i--;
    }
    i = size & 0x3;
    while (i)
    {
        if (*pIn < 0)
        {
            *pIn = 0;
        }
        pIn++;
        i--;
    }
 #else
    /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */
    uint16_t  i;
    for (i = 0; i < size; i++)
    {
        if (data[i] < 0)
            data[i] = 0;
    }
 #endif                          /* ARM_MATH_DSP */
 }
 /**
 * @} end of Acti group
 */
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_HWC_q7_fast_nonsquare.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_HWC_q7_fast_nonsquare.c
@@ -1,235 +0,0 @@
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /* ----------------------------------------------------------------------
 * Project:      CMSIS NN Library
 * Title:        arm_convolve_1x1_HWC_q7_fast_nonsquare.c
 * Description:  Fast Q7 version of 1x1 convolution (non-square shape)
 *
 * $Date:        17. January 2018
 * $Revision:    V.1.0.0
 *
 * Target Processor:  Cortex-M cores
 *
 * -------------------------------------------------------------------- */
 #include "arm_math.h"
 #include "arm_nnfunctions.h"
 /**
 *  @ingroup groupNN
 */
 /**
 * @addtogroup NNConv
 * @{
 */
 /**
 * @brief Fast Q7 version of 1x1 convolution (non-sqaure shape)
 * @param[in]       Im_in        pointer to input tensor
 * @param[in]       dim_im_in_x  input tensor dimention x
 * @param[in]       dim_im_in_y  input tensor dimention y
 * @param[in]       ch_im_in     number of input tensor channels
 * @param[in]       wt           pointer to kernel weights
 * @param[in]       ch_im_out    number of filters, i.e., output tensor channels
 * @param[in]       dim_kernel_x filter kernel size x
 * @param[in]       dim_kernel_y filter kernel size y
 * @param[in]       padding_x    padding size x
 * @param[in]       padding_y    padding size y
 * @param[in]       stride_x     convolution stride x
 * @param[in]       stride_y     convolution stride y
 * @param[in]       bias         pointer to bias
 * @param[in]       bias_shift   amount of left-shift for bias
 * @param[in]       out_shift    amount of right-shift for output
 * @param[in,out]   Im_out       pointer to output tensor
 * @param[in]       dim_im_out_x output tensor dimension x
 * @param[in]       dim_im_out_y output tensor dimension y
 * @param[in,out]   bufferA      pointer to buffer space for input 
 * @param[in,out]   bufferB      pointer to buffer space for output
 * @return     The function returns either
 * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
 *
 * This function is optimized for convolution with 1x1 kernel size (i.e., dim_kernel_x=1
 * and dim_kernel_y=1). It can be used for the second half of MobileNets [1] after depthwise 
 * separable convolution.
 *
 * This function is the version with full list of optimization tricks, but with
 * some contraints:
 *   ch_im_in is multiple of 4
 *   ch_im_out is multiple of 2
 *
 * [1] MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications
 * https://arxiv.org/abs/1704.04861
 */
 arm_status arm_convolve_1x1_HWC_q7_fast_nonsquare(const q7_t * Im_in,
                                                  const uint16_t dim_im_in_x,
                                                  const uint16_t dim_im_in_y,
                                                  const uint16_t ch_im_in,
                                                  const q7_t * wt,
                                                  const uint16_t ch_im_out,
                                                  const uint16_t dim_kernel_x,
                                                  const uint16_t dim_kernel_y,
                                                  const uint16_t padding_x,
                                                  const uint16_t padding_y,
                                                  const uint16_t stride_x,
                                                  const uint16_t stride_y,
                                                  const q7_t * bias,
                                                  const uint16_t bias_shift,
                                                  const uint16_t out_shift,
                                                  q7_t * Im_out,
                                                  const uint16_t dim_im_out_x,
                                                  const uint16_t dim_im_out_y, 
                                                  q15_t * bufferA, 
                                                  q7_t * bufferB)
 {
 #if defined (ARM_MATH_DSP)
    /* Run the following code for Cortex-M4 and Cortex-M7 */
    int16_t   i_out_y, i_out_x;
    int16_t   i_ch_out;
    /* -----------------------
     *  Here we use bufferA as q15_t internally as computation are done with q15_t level
     *  im2col are done to output in q15_t format from q7_t input
     */
    q15_t    *pBuffer = bufferA;
    q7_t     *pOut = Im_out;
    if (ch_im_in % 4 != 0 || ch_im_out % 2 != 0 || dim_kernel_x != 1 || dim_kernel_y != 1
        || padding_x != 0 || padding_y != 0 || stride_x != 1 || stride_y != 1)
    {
        /* check if the input dimension meets the constraints */
        return ARM_MATH_SIZE_MISMATCH;
    }
    for (i_out_y = 0; i_out_y < dim_im_out_y; i_out_y++)
    {
        for (i_out_x = 0; i_out_x < dim_im_out_x; i_out_x++)
        {
            /* This part implements the im2col function */
            arm_q7_to_q15_reordered_no_shift((q7_t *) Im_in + (i_out_y * dim_im_in_x + i_out_x) * ch_im_in, pBuffer,
                                             ch_im_in);
            pBuffer += ch_im_in;
            if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel_x * dim_kernel_y)
            {
                pOut =
                    arm_nn_mat_mult_kernel_q7_q15_reordered(wt, bufferA, ch_im_out, ch_im_in, bias_shift, out_shift, bias, pOut);
                /* counter reset */
                pBuffer = bufferA;
            }
        }
    }
    /* check if there is left-over for compute */
    if (pBuffer != bufferA)
    {
        const q7_t *pA = wt;
        for (i_ch_out = 0; i_ch_out < ch_im_out; i_ch_out++)
        {
            q31_t     sum = ((q31_t)(bias[i_ch_out]) << bias_shift) + NN_ROUND(out_shift);
            q15_t    *pB = bufferA;
            /* basically each time it process 4 entries */
            uint16_t  colCnt = ch_im_in * dim_kernel_x * dim_kernel_y >> 2;
            while (colCnt)
            {
                q31_t     inA1, inA2;
                q31_t     inB1, inB2;
                pA = (const q7_t *)read_and_pad_reordered((void *)pA, &inA1, &inA2);
                inB1 = *__SIMD32(pB)++;
                sum = __SMLAD(inA1, inB1, sum);
                inB2 = *__SIMD32(pB)++;
                sum = __SMLAD(inA2, inB2, sum);
                colCnt--;
            }
            colCnt = ch_im_in * dim_kernel_y * dim_kernel_x & 0x3;
            while (colCnt)
            {
                q7_t      inA1 = *pA++;
                q15_t     inB1 = *pB++;
                sum += inA1 * inB1;
                colCnt--;
            }
            *pOut = (q7_t) __SSAT((sum >> out_shift), 8);
            pOut++;
        }
    }
 #else
    /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */
    int       i, j, k, l, m, n;
    int       conv_out;
    int       in_row, in_col;
    if (ch_im_in % 4 != 0 || ch_im_out % 2 != 0 || dim_kernel_x != 1 || dim_kernel_y != 1
        || padding_x != 0 || padding_y != 0 || stride_x != 1 || stride_y != 1)
    {
        /* check if the input dimension meets the constraints */
        return ARM_MATH_SIZE_MISMATCH;
    }
    for (i = 0; i < ch_im_out; i++)
    {
        for (j = 0; j < dim_im_out_y; j++)
        {
            for (k = 0; k < dim_im_out_x; k++)
            {
                conv_out = ((q31_t)(bias[i]) << bias_shift) + NN_ROUND(out_shift);
                for (m = 0; m < dim_kernel_y; m++)
                {
                    for (n = 0; n < dim_kernel_x; n++)
                    {
                        // if-for implementation
                        in_row = stride_y * j + m - padding_y;
                        in_col = stride_x * k + n - padding_x;
                        if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in_y && in_col < dim_im_in_x)
                        {
                            for (l = 0; l < ch_im_in; l++)
                            {
                                conv_out += Im_in[(in_row * dim_im_in_x + in_col) * ch_im_in + l] *
                                    wt[i * ch_im_in * dim_kernel_y * dim_kernel_x + (m * dim_kernel_y + n) * ch_im_in + l];
                            }
                        }
                    }
                }
                Im_out[i + (j * dim_im_out_x + k) * ch_im_out] = (q7_t) __SSAT((conv_out >> out_shift), 8);
            }
        }
    }
 #endif                          /* ARM_MATH_DSP */
    /* Return to application */
    return ARM_MATH_SUCCESS;
 }
 /**
 * @} end of NNConv group
 */
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_basic.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_basic.c
@@ -1,207 +0,0 @@
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /* ----------------------------------------------------------------------
 * Project:      CMSIS NN Library
 * Title:        arm_convolve_HWC_q15_basic.c
 * Description:  Q15 version of convolution
 *
 * $Date:        17. January 2018
 * $Revision:    V.1.0.0
 *
 * Target Processor:  Cortex-M cores
 *
 * -------------------------------------------------------------------- */
 #include "arm_math.h"
 #include "arm_nnfunctions.h"
 /**
 *  @ingroup groupNN
 */
 /**
 * @addtogroup NNConv
 * @{
 */
  /**
   * @brief Basic Q15 convolution function
   * @param[in]       Im_in       pointer to input tensor
   * @param[in]       dim_im_in   input tensor dimention
   * @param[in]       ch_im_in    number of input tensor channels
   * @param[in]       wt          pointer to kernel weights
   * @param[in]       ch_im_out   number of filters, i.e., output tensor channels
   * @param[in]       dim_kernel  filter kernel size
   * @param[in]       padding     padding sizes
   * @param[in]       stride      convolution stride
   * @param[in]       bias        pointer to bias
   * @param[in]       bias_shift  amount of left-shift for bias
   * @param[in]       out_shift   amount of right-shift for output
   * @param[in,out]   Im_out      pointer to output tensor
   * @param[in]       dim_im_out  output tensor dimension
   * @param[in,out]   bufferA     pointer to buffer space for input
   * @param[in,out]   bufferB     pointer to buffer space for output
   * @return     The function returns <code>ARM_MATH_SUCCESS</code> 
   *
   * @details
   *
   * <b>Buffer size:</b>
   *
   * bufferA size: ch_im_in*dim_kernel*dim_kernel
   *
   * bufferB size: 0
   *
   * This basic version is designed to work for any input tensor and weight
   * dimension. 
   */
 arm_status
 arm_convolve_HWC_q15_basic(const q15_t * Im_in,
                           const uint16_t dim_im_in,
                           const uint16_t ch_im_in,
                           const q15_t * wt,
                           const uint16_t ch_im_out,
                           const uint16_t dim_kernel,
                           const uint16_t padding,
                           const uint16_t stride,
                           const q15_t * bias,
                           const uint16_t bias_shift,
                           const uint16_t out_shift,
                           q15_t * Im_out, 
                           const uint16_t dim_im_out, 
                           q15_t * bufferA, 
                           q7_t * bufferB)
 {
 #if defined (ARM_MATH_DSP)
    /* Run the following code for Cortex-M4 and Cortex-M7 */
    int16_t   i_out_y, i_out_x, i_ker_y, i_ker_x;
    uint16_t  im2col_out_pixel_index = 0;
    q15_t    *pBuffer = bufferA;
    q15_t    *pOut = Im_out;
    q15_t    *im_buffer = bufferA;
    const q15_t *pA;
    int       i;
    /* This part implements the im2col function */
    for (i_out_y = 0; i_out_y < dim_im_out; i_out_y++)
    {
        for (i_out_x = 0; i_out_x < dim_im_out; i_out_x++)
        {
            for (i_ker_y = i_out_y * stride - padding; i_ker_y < i_out_y * stride - padding + dim_kernel; i_ker_y++)
            {
                for (i_ker_x = i_out_x * stride - padding; i_ker_x < i_out_x * stride - padding + dim_kernel; i_ker_x++)
                {
                    if (i_ker_y < 0 || i_ker_y >= dim_im_in || i_ker_x < 0 || i_ker_x >= dim_im_in)
                    {
                        /* Filling 0 for out-of-bound paddings */
                        /* arm_fill_q15(0, pBuffer, ch_im_in); */
                        memset(pBuffer, 0, sizeof(q15_t)*ch_im_in);
                    } else
                    {
                        /* arm_copy_q15((q15_t *) Im_in + (i_ker_y * dim_im_in + i_ker_x) * ch_im_in, pBuffer, ch_im_in); */
                        memcpy(pBuffer, (q15_t *) Im_in + (i_ker_y * dim_im_in + i_ker_x) * ch_im_in, sizeof(q15_t)*ch_im_in);
                    }
                    pBuffer += ch_im_in;
                }
            }
            pA = wt;
            for (i = 0; i < ch_im_out; i++)
            {
                q31_t     sum = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift);
                q15_t    *pB = im_buffer;
                uint16_t  colCnt = ch_im_in * dim_kernel * dim_kernel >> 2;
                while (colCnt)
                {
                    q31_t     inA1 = *__SIMD32(pA)++;
                    q31_t     inB1 = *__SIMD32(pB)++;
                    q31_t     inA2 = *__SIMD32(pA)++;
                    q31_t     inB2 = *__SIMD32(pB)++;
                    sum = __SMLAD(inA1, inB1, sum);
                    sum = __SMLAD(inA2, inB2, sum);
                    colCnt--;
                }
                colCnt = ch_im_in * dim_kernel * dim_kernel & 0x3;
                while (colCnt)
                {
                    q15_t     inA1 = *pA++;
                    q15_t     inB1 = *pB++;
                    sum += inA1 * inB1;
                    colCnt--;
                }
                *pOut = (q15_t) __SSAT((sum >> out_shift), 16);
                pOut++;
            }
            /* counter reset */
            pBuffer = im_buffer;
            im2col_out_pixel_index++;
        }
    }
 #else
    /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */
    uint16_t  i, j, k, l, m, n;
    int       conv_out;
    signed char in_row, in_col;
    for (i = 0; i < ch_im_out; i++)
    {
        for (j = 0; j < dim_im_out; j++)
        {
            for (k = 0; k < dim_im_out; k++)
            {
                conv_out = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift);
                for (m = 0; m < dim_kernel; m++)
                {
                    for (n = 0; n < dim_kernel; n++)
                    {
                        in_row = stride * j + m - padding;
                        in_col = stride * k + n - padding;
                        if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in && in_col < dim_im_in)
                        {
                            for (l = 0; l < ch_im_in; l++)
                            {
                                conv_out +=
                                    Im_in[(in_row * dim_im_in + in_col) * ch_im_in +
                                          l] * wt[i * ch_im_in * dim_kernel * dim_kernel + (m * dim_kernel +
                                                                                            n) * ch_im_in + l];
                            }
                        }
                    }
                }
                Im_out[i + (j * dim_im_out + k) * ch_im_out] = (q15_t) __SSAT((conv_out >> out_shift), 16);
            }
        }
    }
 #endif                          /* ARM_MATH_DSP */
    /* Return to application */
    return ARM_MATH_SUCCESS;
 }
 /**
 * @} end of NNConv group
 */
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast.c
@@ -1,255 +0,0 @@
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /* ----------------------------------------------------------------------
 * Project:      CMSIS NN Library
 * Title:        arm_convolve_HWC_q15_fast.c
 * Description:  Fast Q15 version of convolution
 *
 * $Date:        17. January 2018
 * $Revision:    V.1.0.0
 *
 * Target Processor:  Cortex-M cores
 *
 * -------------------------------------------------------------------- */
 #include "arm_math.h"
 #include "arm_nnfunctions.h"
 /**
 *  @ingroup groupNN
 */
 /**
 * @addtogroup NNConv
 * @{
 */
  /**
   * @brief Fast Q15 convolution function
   * @param[in]       Im_in       pointer to input tensor
   * @param[in]       dim_im_in   input tensor dimention
   * @param[in]       ch_im_in    number of input tensor channels
   * @param[in]       wt          pointer to kernel weights
   * @param[in]       ch_im_out   number of filters, i.e., output tensor channels
   * @param[in]       dim_kernel  filter kernel size
   * @param[in]       padding     padding sizes
   * @param[in]       stride      convolution stride
   * @param[in]       bias        pointer to bias
   * @param[in]       bias_shift  amount of left-shift for bias
   * @param[in]       out_shift   amount of right-shift for output
   * @param[in,out]   Im_out      pointer to output tensor
   * @param[in]       dim_im_out  output tensor dimension
   * @param[in,out]   bufferA     pointer to buffer space for input 
   * @param[in,out]   bufferB     pointer to buffer space for output
   * @return     The function returns either
   * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
   *
   * @details
   *
   * <b>Buffer size:</b>
   *
   * bufferA size: 2*ch_im_in*dim_kernel*dim_kernel
   *
   * bufferB size: 0
   *
   * <b>Input dimension constraints:</b>
   *
   * ch_im_in is multiple of 2 
   *
   * ch_im_out is multipe of 2
   *
   */
 arm_status
 arm_convolve_HWC_q15_fast(const q15_t * Im_in,
                          const uint16_t dim_im_in,
                          const uint16_t ch_im_in,
                          const q15_t * wt,
                          const uint16_t ch_im_out,
                          const uint16_t dim_kernel,
                          const uint16_t padding,
                          const uint16_t stride,
                          const q15_t * bias,
                          const uint16_t bias_shift,
                          const uint16_t out_shift,
                          q15_t * Im_out, 
                          const uint16_t dim_im_out, 
                          q15_t * bufferA, 
                          q7_t * bufferB)
 {
 #if defined (ARM_MATH_DSP)
    int16_t   i_out_y, i_out_x, i_ker_y, i_ker_x;
    q15_t    *pBuffer = bufferA;
    q15_t    *im_buffer = bufferA;
    q15_t    *pOut = Im_out;
    if (ch_im_in % 2 != 0 || ch_im_out % 2 != 0)
    {
        /* check if the input dimension meets the constraints */
        return ARM_MATH_SIZE_MISMATCH;
    }
    /* Run the following code for Cortex-M4 and Cortex-M7 */
    /* This part implements the im2col function */
    for (i_out_y = 0; i_out_y < dim_im_out; i_out_y++)
    {
        for (i_out_x = 0; i_out_x < dim_im_out; i_out_x++)
        {
            for (i_ker_y = i_out_y * stride - padding; i_ker_y < i_out_y * stride - padding + dim_kernel; i_ker_y++)
            {
                for (i_ker_x = i_out_x * stride - padding; i_ker_x < i_out_x * stride - padding + dim_kernel; i_ker_x++)
                {
                    if (i_ker_y < 0 || i_ker_y >= dim_im_in || i_ker_x < 0 || i_ker_x >= dim_im_in)
                    {
                        /* arm_fill_q15(0, pBuffer, ch_im_in); */
                        memset(pBuffer, 0, sizeof(q15_t)*ch_im_in);
                    } else
                    {
                        /* arm_copy_q15((q15_t *) Im_in + (i_ker_y * dim_im_in + i_ker_x) * ch_im_in, pBuffer, ch_im_in); */
                        memcpy(pBuffer, (q15_t *) Im_in + (i_ker_y * dim_im_in + i_ker_x) * ch_im_in, sizeof(q15_t)*ch_im_in);
                    }
                    pBuffer += ch_im_in;
                }
            }
            if (i_out_x & 0x1)
            {
                int       i;
                /* initialize the matrix pointers for A */
                const q15_t *pA = wt;
                /* set up the second output pointers */
                q15_t    *pOut2 = pOut + ch_im_out;
                /* this loop over rows in A */
                for (i = 0; i < ch_im_out; i += 2)
                {
                    /* setup pointers for B */
                    q15_t    *pB = im_buffer;
                    const q15_t *pB2 = pB + ch_im_in * dim_kernel * dim_kernel;
                    /* aling the second pointer for A */
                    const q15_t *pA2 = pA + ch_im_in * dim_kernel * dim_kernel;
                    /* init the sum with bias */
                    q31_t     sum =  ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift);
                    q31_t     sum2 = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift);
                    q31_t     sum3 = ((q31_t)bias[i + 1] << bias_shift) + NN_ROUND(out_shift);
                    q31_t     sum4 = ((q31_t)bias[i + 1] << bias_shift) + NN_ROUND(out_shift);
                    uint16_t  colCnt = ch_im_in * dim_kernel * dim_kernel >> 1;
                    /* accumulate over the vector */
                    while (colCnt)
                    {
                        q31_t     inA1 = *__SIMD32(pA)++;
                        q31_t     inB1 = *__SIMD32(pB)++;
                        q31_t     inA2 = *__SIMD32(pA2)++;
                        q31_t     inB2 = *__SIMD32(pB2)++;
                        sum = __SMLAD(inA1, inB1, sum);
                        sum2 = __SMLAD(inA1, inB2, sum2);
                        sum3 = __SMLAD(inA2, inB1, sum3);
                        sum4 = __SMLAD(inA2, inB2, sum4);
                        colCnt--;
                    }           /* while over colCnt */
                    colCnt = ch_im_in * dim_kernel * dim_kernel & 0x1;
                    while (colCnt)
                    {
                        q15_t     inA1 = *pA++;
                        q15_t     inB1 = *pB++;
                        q15_t     inA2 = *pA2++;
                        q15_t     inB2 = *pB2++;
                        sum += inA1 * inB1;
                        sum2 += inA1 * inB2;
                        sum3 += inA2 * inB1;
                        sum4 += inA2 * inB2;
                        colCnt--;
                    }           /* while over colCnt */
                    *pOut++ = (q15_t) __SSAT(sum >> out_shift, 16);
                    *pOut++ = (q15_t) __SSAT(sum3 >> out_shift, 16);
                    *pOut2++ = (q15_t) __SSAT(sum2 >> out_shift, 16);
                    *pOut2++ = (q15_t) __SSAT(sum4 >> out_shift, 16);
                    /* skip the row computed with A2 */
                    pA += ch_im_in * dim_kernel * dim_kernel;
                }               /* for over ch_im_out */
                pOut += ch_im_out;
                /* counter reset */
                pBuffer = im_buffer;
            }
        }
    }
 #else
    /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */
    uint16_t  i, j, k, l, m, n;
    int       conv_out;
    signed char in_row, in_col;
    if (ch_im_in % 2 != 0 || ch_im_out % 2 != 0)
    {
        /* check if the input dimension meets the constraints */
        return ARM_MATH_SIZE_MISMATCH;
    }
    for (i = 0; i < ch_im_out; i++)
    {
        for (j = 0; j < dim_im_out; j++)
        {
            for (k = 0; k < dim_im_out; k++)
            {
                conv_out = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift);
                for (m = 0; m < dim_kernel; m++)
                {
                    for (n = 0; n < dim_kernel; n++)
                    {
                        in_row = stride * j + m - padding;
                        in_col = stride * k + n - padding;
                        if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in && in_col < dim_im_in)
                        {
                            for (l = 0; l < ch_im_in; l++)
                            {
                                conv_out +=
                                    Im_in[(in_row * dim_im_in + in_col) * ch_im_in +
                                          l] * wt[i * ch_im_in * dim_kernel * dim_kernel + (m * dim_kernel +
                                                                                            n) * ch_im_in + l];
                            }
                        }
                    }
                }
                Im_out[i + (j * dim_im_out + k) * ch_im_out] = (q15_t) __SSAT((conv_out >> out_shift), 16);
            }
        }
    }
 #endif                          /* ARM_MATH_DSP */
    /* Return to application */
    return ARM_MATH_SUCCESS;
 }
 /**
 * @} end of NNConv group
 */
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast_nonsquare.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast_nonsquare.c
@@ -1,265 +0,0 @@
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /* ----------------------------------------------------------------------
 * Project:      CMSIS NN Library
 * Title:        arm_convolve_HWC_q15_fast.c
 * Description:  Fast Q15 version of convolution
 *
 * $Date:        24. May 2018
 * $Revision:    V.1.0.0
 *
 * Target Processor:  Cortex-M cores
 *
 * -------------------------------------------------------------------- */
 #include "arm_math.h"
 #include "arm_nnfunctions.h"
 /**
 *  @ingroup groupNN
 */
 /**
 * @addtogroup NNConv
 * @{
 */
  /**
   * @brief Fast Q15 convolution function (non-sqaure shape)
   * @param[in]       Im_in        pointer to input tensor
   * @param[in]       dim_im_in_x  input tensor dimention x
   * @param[in]       dim_im_in_y  input tensor dimention y
   * @param[in]       ch_im_in     number of input tensor channels
   * @param[in]       wt           pointer to kernel weights
   * @param[in]       ch_im_out    number of filters, i.e., output tensor channels
   * @param[in]       dim_kernel_x filter kernel size x
   * @param[in]       dim_kernel_y filter kernel size y
   * @param[in]       padding_x    padding size x
   * @param[in]       padding_y    padding size y
   * @param[in]       stride_x     convolution stride x
   * @param[in]       stride_y     convolution stride y
   * @param[in]       bias         pointer to bias
   * @param[in]       bias_shift   amount of left-shift for bias
   * @param[in]       out_shift    amount of right-shift for output
   * @param[in,out]   Im_out       pointer to output tensor
   * @param[in]       dim_im_out_x output tensor dimension x
   * @param[in]       dim_im_out_y output tensor dimension y
   * @param[in,out]   bufferA      pointer to buffer space for input 
   * @param[in,out]   bufferB      pointer to buffer space for output
   * @return     The function returns either
   * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
   *
   * @details
   *
   * <b>Buffer size:</b>
   *
   * bufferA size: 2*ch_im_in*dim_kernel*dim_kernel
   *
   * bufferB size: 0
   *
   * <b>Input dimension constraints:</b>
   *
   * ch_im_in is multiple of 2 
   *
   * ch_im_out is multipe of 2
   *
   */
 arm_status
 arm_convolve_HWC_q15_fast_nonsquare(const q15_t * Im_in,
                                    const uint16_t dim_im_in_x,
                                    const uint16_t dim_im_in_y,
                                    const uint16_t ch_im_in,
                                    const q15_t * wt,
                                    const uint16_t ch_im_out,
                                    const uint16_t dim_kernel_x,
                                    const uint16_t dim_kernel_y,
                                    const uint16_t padding_x,
                                    const uint16_t padding_y,
                                    const uint16_t stride_x,
                                    const uint16_t stride_y,
                                    const q15_t * bias,
                                    const uint16_t bias_shift,
                                    const uint16_t out_shift,
                                    q15_t * Im_out,
                                    const uint16_t dim_im_out_x,
                                    const uint16_t dim_im_out_y, 
                                    q15_t * bufferA, 
                                    q7_t * bufferB)
 {
 #if defined (ARM_MATH_DSP)
    int16_t   i_out_y, i_out_x, i_ker_y, i_ker_x;
    q15_t    *pBuffer = bufferA;
    q15_t    *im_buffer = bufferA;
    q15_t    *pOut = Im_out;
    if (ch_im_in % 2 != 0 || ch_im_out % 2 != 0)
    {
        /* check if the input dimension meets the constraints */
        return ARM_MATH_SIZE_MISMATCH;
    }
    /* Run the following code for Cortex-M4 and Cortex-M7 */
    /* This part implements the im2col function */
    for (i_out_y = 0; i_out_y < dim_im_out_y; i_out_y++)
    {
        for (i_out_x = 0; i_out_x < dim_im_out_x; i_out_x++)
        {
            for (i_ker_y = i_out_y * stride_y - padding_y; i_ker_y < i_out_y * stride_y - padding_y + dim_kernel_y; i_ker_y++)
            {
                for (i_ker_x = i_out_x * stride_x - padding_x; i_ker_x < i_out_x * stride_x - padding_x + dim_kernel_x; i_ker_x++)
                {
                    if (i_ker_y < 0 || i_ker_y >= dim_im_in_y || i_ker_x < 0 || i_ker_x >= dim_im_in_x)
                    {
                        /* arm_fill_q15(0, pBuffer, ch_im_in); */
                        memset(pBuffer, 0, sizeof(q15_t)*ch_im_in);
                    } else
                    {
                        /* arm_copy_q15((q15_t *) Im_in + (i_ker_y * dim_im_in_x + i_ker_x) * ch_im_in, pBuffer, ch_im_in); */
                        memcpy(pBuffer, (q15_t *) Im_in + (i_ker_y * dim_im_in_x + i_ker_x) * ch_im_in, sizeof(q15_t)*ch_im_in);
                    }
                    pBuffer += ch_im_in;
                }
            }
            if (i_out_x & 0x1)
            {
                int       i;
                /* initialize the matrix pointers for A */
                const q15_t *pA = wt;
                /* set up the second output pointers */
                q15_t    *pOut2 = pOut + ch_im_out;
                /* this loop over rows in A */
                for (i = 0; i < ch_im_out; i += 2)
                {
                    /* setup pointers for B */
                    q15_t    *pB = im_buffer;
                    const q15_t *pB2 = pB + ch_im_in * dim_kernel_y * dim_kernel_x;
                    /* aling the second pointer for A */
                    const q15_t *pA2 = pA + ch_im_in * dim_kernel_y * dim_kernel_x;
                    /* init the sum with bias */
                    q31_t     sum =  ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift);
                    q31_t     sum2 = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift);
                    q31_t     sum3 = ((q31_t)bias[i + 1] << bias_shift) + NN_ROUND(out_shift);
                    q31_t     sum4 = ((q31_t)bias[i + 1] << bias_shift) + NN_ROUND(out_shift);
                    uint16_t  colCnt = ch_im_in * dim_kernel_y * dim_kernel_x >> 1;
                    /* accumulate over the vector */
                    while (colCnt)
                    {
                        q31_t     inA1 = *__SIMD32(pA)++;
                        q31_t     inB1 = *__SIMD32(pB)++;
                        q31_t     inA2 = *__SIMD32(pA2)++;
                        q31_t     inB2 = *__SIMD32(pB2)++;
                        sum = __SMLAD(inA1, inB1, sum);
                        sum2 = __SMLAD(inA1, inB2, sum2);
                        sum3 = __SMLAD(inA2, inB1, sum3);
                        sum4 = __SMLAD(inA2, inB2, sum4);
                        colCnt--;
                    }           /* while over colCnt */
                    colCnt = ch_im_in * dim_kernel_y * dim_kernel_x & 0x1;
                    while (colCnt)
                    {
                        q15_t     inA1 = *pA++;
                        q15_t     inB1 = *pB++;
                        q15_t     inA2 = *pA2++;
                        q15_t     inB2 = *pB2++;
                        sum += inA1 * inB1;
                        sum2 += inA1 * inB2;
                        sum3 += inA2 * inB1;
                        sum4 += inA2 * inB2;
                        colCnt--;
                    }           /* while over colCnt */
                    *pOut++ = (q15_t) __SSAT(sum >> out_shift, 16);
                    *pOut++ = (q15_t) __SSAT(sum3 >> out_shift, 16);
                    *pOut2++ = (q15_t) __SSAT(sum2 >> out_shift, 16);
                    *pOut2++ = (q15_t) __SSAT(sum4 >> out_shift, 16);
                    /* skip the row computed with A2 */
                    pA += ch_im_in * dim_kernel_y * dim_kernel_x;
                }               /* for over ch_im_out */
                pOut += ch_im_out;
                /* counter reset */
                pBuffer = im_buffer;
            }
        }
    }
 #else
    /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */
    uint16_t  i, j, k, l, m, n;
    int       conv_out;
    signed char in_row, in_col;
    if (ch_im_in % 2 != 0 || ch_im_out % 2 != 0)
    {
        /* check if the input dimension meets the constraints */
        return ARM_MATH_SIZE_MISMATCH;
    }
    for (i = 0; i < ch_im_out; i++)
    {
        for (j = 0; j < dim_im_out_y; j++)
        {
            for (k = 0; k < dim_im_out_x; k++)
            {
                conv_out = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift);
                for (m = 0; m < dim_kernel_y; m++)
                {
                    for (n = 0; n < dim_kernel_x; n++)
                    {
                        in_row = stride_y * j + m - padding_y;
                        in_col = stride_x * k + n - padding_x;
                        if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in_y && in_col < dim_im_in_x)
                        {
                            for (l = 0; l < ch_im_in; l++)
                            {
                                conv_out +=
                                    Im_in[(in_row * dim_im_in_x + in_col) * ch_im_in +
                                          l] * wt[i * ch_im_in * dim_kernel_x * dim_kernel_y + (m * dim_kernel_x +
                                                                                            n) * ch_im_in + l];
                            }
                        }
                    }
                }
                Im_out[i + (j * dim_im_out_x + k) * ch_im_out] = (q15_t) __SSAT((conv_out >> out_shift), 16);
            }
        }
    }
 #endif                          /* ARM_MATH_DSP */
    /* Return to application */
    return ARM_MATH_SUCCESS;
 }
 /**
 * @} end of NNConv group
 */
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_RGB.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_RGB.c
@@ -1,279 +0,0 @@
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /* ----------------------------------------------------------------------
 * Project:      CMSIS NN Library
 * Title:        arm_convolve_HWC_q7_RGB.c
 * Description:  Q7 version of convolution for RGB image
 *
 * $Date:        17. January 2018
 * $Revision:    V.1.0.0
 *
 * Target Processor:  Cortex-M cores
 *
 * -------------------------------------------------------------------- */
 #include "arm_math.h"
 #include "arm_nnfunctions.h"
 /**
 *  @ingroup groupNN
 */
 /**
 * @addtogroup NNConv
 * @{
 */
  /**
   * @brief Q7 convolution function for RGB image
   * @param[in]       Im_in       pointer to input tensor
   * @param[in]       dim_im_in   input tensor dimention
   * @param[in]       ch_im_in    number of input tensor channels
   * @param[in]       wt          pointer to kernel weights
   * @param[in]       ch_im_out   number of filters, i.e., output tensor channels
   * @param[in]       dim_kernel  filter kernel size
   * @param[in]       padding     padding sizes
   * @param[in]       stride      convolution stride
   * @param[in]       bias        pointer to bias
   * @param[in]       bias_shift  amount of left-shift for bias
   * @param[in]       out_shift   amount of right-shift for output
   * @param[in,out]   Im_out      pointer to output tensor
   * @param[in]       dim_im_out  output tensor dimension
   * @param[in,out]   bufferA     pointer to buffer space for input
   * @param[in,out]   bufferB     pointer to buffer space for output
   * @return     The function returns either
   * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
   *
   * @details
   *
   * <b>Buffer size:</b>
   *
   * bufferA size: 2*ch_im_in*dim_kernel*dim_kernel
   *
   * bufferB size: 0
   *
   * <b>Input dimension constraints:</b>
   *
   * ch_im_in equals 3
   *
   * This kernel is written exclusively for convolution with ch_im_in
   * equals 3. This applies on the first layer of CNNs which has input
   * image with RGB format.
   */
 arm_status
 arm_convolve_HWC_q7_RGB(const q7_t * Im_in,
                        const uint16_t dim_im_in,
                        const uint16_t ch_im_in,
                        const q7_t * wt,
                        const uint16_t ch_im_out,
                        const uint16_t dim_kernel,
                        const uint16_t padding,
                        const uint16_t stride,
                        const q7_t * bias,
                        const uint16_t bias_shift,
                        const uint16_t out_shift,
                        q7_t * Im_out, const uint16_t dim_im_out, q15_t * bufferA, q7_t * bufferB)
 {
 #if defined (ARM_MATH_DSP)
    /* Run the following code for Cortex-M4 and Cortex-M7 */
    int16_t   i_out_y, i_out_x, i_ker_y, i_ker_x;
    /*
     *  Here we use bufferA as q15_t internally as computation are done with q15_t level
     *  im2col are done to output in q15_t format from q7_t input
     */
    q15_t    *pBuffer = bufferA;
    q7_t     *pOut = Im_out;
    // check if number of input channels is 3
    if (ch_im_in != 3)
    {
        return ARM_MATH_SIZE_MISMATCH;
    }
    // This part implements the im2col function
    for (i_out_y = 0; i_out_y < dim_im_out; i_out_y++)
    {
        for (i_out_x = 0; i_out_x < dim_im_out; i_out_x++)
        {
            for (i_ker_y = i_out_y * stride - padding; i_ker_y < i_out_y * stride - padding + dim_kernel; i_ker_y++)
            {
                for (i_ker_x = i_out_x * stride - padding; i_ker_x < i_out_x * stride - padding + dim_kernel; i_ker_x++)
                {
                    if (i_ker_y < 0 || i_ker_y >= dim_im_in || i_ker_x < 0 || i_ker_x >= dim_im_in)
                    {
                        /* Equivalent to arm_fill_q15(0, pBuffer, ch_im_in) with assumption: ch_im_in = 3 */
                        *__SIMD32(pBuffer) = 0x0;
                        *(pBuffer + 2) = 0;
                        pBuffer += 3;
                    } else
                    {
                        /* 
                         * Equivalent to:
                         *  arm_q7_to_q15_no_shift( (q7_t*)Im_in+(i_ker_y*dim_im_in+i_ker_x)*3, pBuffer, 3);
                         */
                        const q7_t *pPixel = Im_in + (i_ker_y * dim_im_in + i_ker_x) * 3;
                        q31_t     buf = *__SIMD32(pPixel);
                        union arm_nnword top;
                        union arm_nnword bottom;
                        top.word = __SXTB16(buf);
                        bottom.word = __SXTB16(__ROR(buf, 8));
 #ifndef ARM_MATH_BIG_ENDIAN
                        /*
                         *  little-endian, | omit | 3rd  | 2nd  | 1st  |
                         *                MSB                         LSB
                         *   top | 3rd | 1st |; bottom | omit | 2nd |
                         *
                         *  version 1, need to swap 2nd and 3rd weight
                         * *__SIMD32(pBuffer) = top.word;
                         * *(pBuffer+2) = bottom.half_words[0];
                         *
                         *  version 2, no weight shuffling required
                         */
                        *pBuffer++ = top.half_words[0];
                        *__SIMD32(pBuffer) = __PKHBT(bottom.word, top.word, 0);
 #else
                        /*
                         *  big-endian,    | 1st  | 2nd  | 3rd  | omit | 
                         *                MSB                         LSB
                         *  top | 2nd | omit |; bottom | 1st | 3rd |
                         * 
                         *  version 1, need to swap 2nd and 3rd weight
                         * *__SIMD32(pBuffer) = bottom.word;
                         * *(pBuffer+2) = top.half_words[1];
                         * 
                         *  version 2, no weight shuffling required
                         */
                        *pBuffer++ = bottom.half_words[0];
                        *__SIMD32(pBuffer) = __PKHTB(top.word, bottom.word, 0);
 #endif
                        pBuffer += 2;
                    }
                }
            }
            if (pBuffer == bufferA + 2 * 3 * dim_kernel * dim_kernel)
            {
                pOut =
                    arm_nn_mat_mult_kernel_q7_q15(wt, bufferA,
                                                  ch_im_out,
                                                  3 * dim_kernel * dim_kernel, bias_shift, out_shift, bias, pOut);
                /* counter reset */
                pBuffer = bufferA;
            }
        }
    }
    /* left-over because odd number of output pixels */
    if (pBuffer != bufferA)
    {
        const q7_t *pA = wt;
        int       i;
        for (i = 0; i < ch_im_out; i++)
        {
            q31_t     sum = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift);
            q15_t    *pB = bufferA;
            /* basically each time it process 4 entries */
            uint16_t  colCnt = 3 * dim_kernel * dim_kernel >> 2;
            while (colCnt)
            {
                q31_t     inA1, inA2;
                q31_t     inB1, inB2;
                pA = (q7_t *) read_and_pad((void *)pA, &inA1, &inA2);
                inB1 = *__SIMD32(pB)++;
                sum = __SMLAD(inA1, inB1, sum);
                inB2 = *__SIMD32(pB)++;
                sum = __SMLAD(inA2, inB2, sum);
                colCnt--;
            }
            colCnt = 3 * dim_kernel * dim_kernel & 0x3;
            while (colCnt)
            {
                q7_t      inA1 = *pA++;
                q15_t     inB1 = *pB++;
                sum += inA1 * inB1;
                colCnt--;
            }
            *pOut++ = (q7_t) __SSAT((sum >> out_shift), 8);
        }
    }
 #else
    /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */
    uint16_t  i, j, k, l, m, n;
    int       conv_out;
    signed char in_row, in_col;
    // check if number of input channels is 3
    if (ch_im_in != 3)
    {
        return ARM_MATH_SIZE_MISMATCH;
    }
    for (i = 0; i < ch_im_out; i++)
    {
        for (j = 0; j < dim_im_out; j++)
        {
            for (k = 0; k < dim_im_out; k++)
            {
                conv_out = (bias[i] << bias_shift) + NN_ROUND(out_shift);
                for (m = 0; m < dim_kernel; m++)
                {
                    for (n = 0; n < dim_kernel; n++)
                    {
                        /* if-for implementation */
                        in_row = stride * j + m - padding;
                        in_col = stride * k + n - padding;
                        if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in && in_col < dim_im_in)
                        {
                            for (l = 0; l < ch_im_in; l++)
                            {
                                conv_out +=
                                    Im_in[(in_row * dim_im_in + in_col) * ch_im_in +
                                          l] * wt[i * ch_im_in * dim_kernel * dim_kernel + (m * dim_kernel +
                                                                                            n) * ch_im_in + l];
                            }
                        }
                    }
                }
                Im_out[i + (j * dim_im_out + k) * ch_im_out] = (q7_t) __SSAT((conv_out >> out_shift), 8);
            }
        }
    }
 #endif                          /* ARM_MATH_DSP */
    /* Return to application */
    return (ARM_MATH_SUCCESS);
 }
 /**
 * @} end of NNConv group
 */
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic.c
@@ -1,230 +0,0 @@
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /* ----------------------------------------------------------------------
 * Project:      CMSIS NN Library
 * Title:        arm_convolve_HWC_q7_basic.c
 * Description:	 Q7 version of convolution
 *
 * $Date:        17. January 2018
 * $Revision:    V.1.0.0
 *
 * Target Processor:  Cortex-M cores
 *
 * -------------------------------------------------------------------- */
 #include "arm_math.h"
 #include "arm_nnfunctions.h"
 /**
 *  @ingroup groupNN
 */
 /**
 * @addtogroup NNConv
 * @{
 */
  /**
   * @brief Basic Q7 convolution function
   * @param[in]       Im_in       pointer to input tensor
   * @param[in]       dim_im_in   input tensor dimention
   * @param[in]       ch_im_in    number of input tensor channels
   * @param[in]       wt          pointer to kernel weights
   * @param[in]       ch_im_out   number of filters, i.e., output tensor channels
   * @param[in]       dim_kernel  filter kernel size
   * @param[in]       padding     padding sizes
   * @param[in]       stride      convolution stride
   * @param[in]       bias        pointer to bias
   * @param[in]       bias_shift  amount of left-shift for bias
   * @param[in]       out_shift   amount of right-shift for output
   * @param[in,out]   Im_out      pointer to output tensor
   * @param[in]       dim_im_out  output tensor dimension
   * @param[in,out]   bufferA     pointer to buffer space for input 
   * @param[in,out]   bufferB     pointer to buffer space for output
   * @return     The function returns <code>ARM_MATH_SUCCESS</code> 
   *
   * @details
   *
   * <b>Buffer size:</b>
   *
   * bufferA size: 2*ch_im_in*dim_kernel*dim_kernel
   *
   * bufferB size: 0
   *
   * This basic version is designed to work for any input tensor and weight
   * dimension. 
   */
 arm_status
 arm_convolve_HWC_q7_basic(const q7_t * Im_in,
                          const uint16_t dim_im_in,
                          const uint16_t ch_im_in,
                          const q7_t * wt,
                          const uint16_t ch_im_out,
                          const uint16_t dim_kernel,
                          const uint16_t padding,
                          const uint16_t stride,
                          const q7_t * bias,
                          const uint16_t bias_shift,
                          const uint16_t out_shift,
                          q7_t * Im_out, 
                          const uint16_t dim_im_out, 
                          q15_t * bufferA, 
                          q7_t * bufferB)
 {
 #if defined (ARM_MATH_DSP)
    /* Run the following code for Cortex-M4 and Cortex-M7 */
    int16_t   i_out_y, i_out_x, i_ker_y, i_ker_x;
    /* 
     *  Here we use bufferA as q15_t internally as computation are done with q15_t level
     *  im2col are done to output in q15_t format from q7_t input
     */
    q15_t    *pBuffer = bufferA;
    q7_t     *pOut = Im_out;
    /* This part implements the im2col function */
    for (i_out_y = 0; i_out_y < dim_im_out; i_out_y++)
    {
        for (i_out_x = 0; i_out_x < dim_im_out; i_out_x++)
        {
            for (i_ker_y = i_out_y * stride - padding; i_ker_y < i_out_y * stride - padding + dim_kernel; i_ker_y++)
            {
                for (i_ker_x = i_out_x * stride - padding; i_ker_x < i_out_x * stride - padding + dim_kernel; i_ker_x++)
                {
                    if (i_ker_y < 0 || i_ker_y >= dim_im_in || i_ker_x < 0 || i_ker_x >= dim_im_in)
                    {
                        /* Filling 0 for out-of-bound paddings */
                        /* arm_fill_q15(0, pBuffer, ch_im_in); */
                        memset(pBuffer, 0, sizeof(q15_t)*ch_im_in);
                    } else
                    {
                        /* Copying the pixel data to column */
                        arm_q7_to_q15_no_shift((q7_t *)
                                               Im_in + (i_ker_y * dim_im_in + i_ker_x) * ch_im_in, pBuffer, ch_im_in);
                    }
                    pBuffer += ch_im_in;
                }
            }
            /* Computation is filed for every 2 columns */
            if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel * dim_kernel)
            {
                pOut =
                    arm_nn_mat_mult_kernel_q7_q15(wt, bufferA,
                                                  ch_im_out,
                                                  ch_im_in *
                                                  dim_kernel * dim_kernel, bias_shift, out_shift, bias, pOut);
                /* counter reset */
                pBuffer = bufferA;
            }
        }
    }
    /* left-over because odd number of output pixels */
    if (pBuffer != bufferA)
    {
        const q7_t *pA = wt;
        int       i;
        for (i = 0; i < ch_im_out; i++)
        {
            /* Load the accumulator with bias first */
            q31_t     sum = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift);
            /* Point to the beging of the im2col buffer */
            q15_t    *pB = bufferA;
            /* Each time it process 4 entries */
            uint16_t  colCnt = ch_im_in * dim_kernel * dim_kernel >> 2;
            while (colCnt)
            {
                q31_t     inA1, inA2;
                q31_t     inB1, inB2;
                pA = (q7_t *) read_and_pad((void *)pA, &inA1, &inA2);
                inB1 = *__SIMD32(pB)++;
                sum = __SMLAD(inA1, inB1, sum);
                inB2 = *__SIMD32(pB)++;
                sum = __SMLAD(inA2, inB2, sum);
                colCnt--;
            }
            colCnt = ch_im_in * dim_kernel * dim_kernel & 0x3;
            while (colCnt)
            {
                q7_t      inA1 = *pA++;
                q15_t     inB1 = *pB++;
                sum += inA1 * inB1;
                colCnt--;
            }
            *pOut++ = (q7_t) __SSAT((sum >> out_shift), 8);
        }
    }
 #else
    /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */
    uint16_t  i, j, k, l, m, n;
    int       conv_out;
    signed char in_row, in_col;
    for (i = 0; i < ch_im_out; i++)
    {
        for (j = 0; j < dim_im_out; j++)
        {
            for (k = 0; k < dim_im_out; k++)
            {
                conv_out = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift);
                for (m = 0; m < dim_kernel; m++)
                {
                    for (n = 0; n < dim_kernel; n++)
                    {
                        // if-for implementation
                        in_row = stride * j + m - padding;
                        in_col = stride * k + n - padding;
                        if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in && in_col < dim_im_in)
                        {
                            for (l = 0; l < ch_im_in; l++)
                            {
                                conv_out +=
                                    Im_in[(in_row * dim_im_in + in_col) * ch_im_in +
                                          l] * wt[i * ch_im_in * dim_kernel * dim_kernel + (m * dim_kernel +
                                                                                            n) * ch_im_in + l];
                            }
                        }
                    }
                }
                Im_out[i + (j * dim_im_out + k) * ch_im_out] = (q7_t) __SSAT((conv_out >> out_shift), 8);
            }
        }
    }
 #endif                          /* ARM_MATH_DSP */
    /* Return to application */
    return ARM_MATH_SUCCESS;
 }
 /**
 * @} end of NNConv group
 */
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic_nonsquare.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic_nonsquare.c
@@ -1,228 +0,0 @@
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /* ----------------------------------------------------------------------
 * Project:      CMSIS NN Library
 * Title:        arm_convolve_HWC_q7_basic.c
 * Description:	 Q7 version of convolution
 *
 * $Date:        13. July 2018
 * $Revision:    V.1.0.0
 *
 * Target Processor:  Cortex-M cores
 *
 * -------------------------------------------------------------------- */
 #include "arm_math.h"
 #include "arm_nnfunctions.h"
 /**
 *  @ingroup groupNN
 */
 /**
 * @addtogroup NNConv
 * @{
 */
  /**
   * @brief Basic Q7 convolution function (non-sqaure shape)
   * @param[in]       Im_in        pointer to input tensor
   * @param[in]       dim_im_in_x  input tensor dimention x
   * @param[in]       dim_im_in_y  input tensor dimention y
   * @param[in]       ch_im_in     number of input tensor channels
   * @param[in]       wt           pointer to kernel weights
   * @param[in]       ch_im_out    number of filters, i.e., output tensor channels
   * @param[in]       dim_kernel_x filter kernel size x
   * @param[in]       dim_kernel_y filter kernel size y
   * @param[in]       padding_x    padding size x
   * @param[in]       padding_y    padding size y
   * @param[in]       stride_x     convolution stride x
   * @param[in]       stride_y     convolution stride y
   * @param[in]       bias         pointer to bias
   * @param[in]       bias_shift   amount of left-shift for bias
   * @param[in]       out_shift    amount of right-shift for output
   * @param[in,out]   Im_out       pointer to output tensor
   * @param[in]       dim_im_out_x output tensor dimension x
   * @param[in]       dim_im_out_y output tensor dimension y
   * @param[in,out]   bufferA      pointer to buffer space for input
   * @param[in,out]   bufferB      pointer to buffer space for output
   * @return     The function returns <code>ARM_MATH_SUCCESS</code>
   */
 arm_status arm_convolve_HWC_q7_basic_nonsquare(const q7_t * Im_in,
                                               const uint16_t dim_im_in_x,
                                               const uint16_t dim_im_in_y,
                                               const uint16_t ch_im_in,
                                               const q7_t * wt,
                                               const uint16_t ch_im_out,
                                               const uint16_t dim_kernel_x,
                                               const uint16_t dim_kernel_y,
                                               const uint16_t padding_x,
                                               const uint16_t padding_y,
                                               const uint16_t stride_x,
                                               const uint16_t stride_y,
                                               const q7_t * bias,
                                               const uint16_t bias_shift,
                                               const uint16_t out_shift,
                                               q7_t * Im_out,
                                               const uint16_t dim_im_out_x,
                                               const uint16_t dim_im_out_y,
                                               q15_t * bufferA,
                                               q7_t * bufferB)
 {
 #if defined (ARM_MATH_DSP)
    /* Run the following code for Cortex-M4 and Cortex-M7 */
    int16_t   i_out_y, i_out_x, i_ker_y, i_ker_x;
    /* 
     *  Here we use bufferA as q15_t internally as computation are done with q15_t level
     *  im2col are done to output in q15_t format from q7_t input
     */
    q15_t    *pBuffer = bufferA;
    q7_t     *pOut = Im_out;
    /* This part implements the im2col function */
    for (i_out_y = 0; i_out_y < dim_im_out_y; i_out_y++)
    {
        for (i_out_x = 0; i_out_x < dim_im_out_x; i_out_x++)
        {
            for (i_ker_y = i_out_y * stride_y - padding_y; i_ker_y < i_out_y * stride_y - padding_y + dim_kernel_y; i_ker_y++)
            {
                for (i_ker_x = i_out_x * stride_x - padding_x; i_ker_x < i_out_x * stride_x - padding_x + dim_kernel_x; i_ker_x++)
                {
                    if (i_ker_y < 0 || i_ker_y >= dim_im_in_y || i_ker_x < 0 || i_ker_x >= dim_im_in_x)
                    {
                        /* Filling 0 for out-of-bound paddings */
                        /* arm_fill_q15(0, pBuffer, ch_im_in); */
                        memset(pBuffer, 0, sizeof(q15_t)*ch_im_in);
                    } else
                    {
                        /* Copying the pixel data to column */
                        arm_q7_to_q15_no_shift((q7_t *)
                                               Im_in + (i_ker_y * dim_im_in_x + i_ker_x) * ch_im_in, pBuffer, ch_im_in);
                    }
                    pBuffer += ch_im_in;
                }
            }
            /* Computation is filed for every 2 columns */
            if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel_y * dim_kernel_x)
            {
                pOut =
                    arm_nn_mat_mult_kernel_q7_q15(wt, bufferA,
                                                  ch_im_out,
                                                  ch_im_in *
                                                  dim_kernel_y * dim_kernel_x, bias_shift, out_shift, bias, pOut);
                /* counter reset */
                pBuffer = bufferA;
            }
        }
    }
    /* left-over because odd number of output pixels */
    if (pBuffer != bufferA)
    {
        const q7_t *pA = wt;
        int       i;
        for (i = 0; i < ch_im_out; i++)
        {
            /* Load the accumulator with bias first */
            q31_t     sum = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift);
            /* Point to the beging of the im2col buffer */
            q15_t    *pB = bufferA;
            /* Each time it process 4 entries */
            uint16_t  colCnt = ch_im_in * dim_kernel_y * dim_kernel_x >> 2;
            while (colCnt)
            {
                q31_t     inA1, inA2;
                q31_t     inB1, inB2;
                pA = (q7_t *) read_and_pad((void *)pA, &inA1, &inA2);
                inB1 = *__SIMD32(pB)++;
                sum = __SMLAD(inA1, inB1, sum);
                inB2 = *__SIMD32(pB)++;
                sum = __SMLAD(inA2, inB2, sum);
                colCnt--;
            }
            colCnt = ch_im_in * dim_kernel_y * dim_kernel_x & 0x3;
            while (colCnt)
            {
                q7_t      inA1 = *pA++;
                q15_t     inB1 = *pB++;
                sum += inA1 * inB1;
                colCnt--;
            }
            *pOut++ = (q7_t) __SSAT((sum >> out_shift), 8);
        }
    }
 #else
    /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */
    uint16_t  i, j, k, l, m, n;
    int       conv_out;
    signed char in_row, in_col;
    for (i = 0; i < ch_im_out; i++)
    {
        for (j = 0; j < dim_im_out_y; j++)
        {
            for (k = 0; k < dim_im_out_x; k++)
            {
                conv_out = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift);
                for (m = 0; m < dim_kernel_y; m++)
                {
                    for (n = 0; n < dim_kernel_x; n++)
                    {
                        // if-for implementation
                        in_row = stride_y * j + m - padding_y;
                        in_col = stride_x * k + n - padding_x;
                        if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in_y && in_col < dim_im_in_x)
                        {
                            for (l = 0; l < ch_im_in; l++)
                            {
                                conv_out +=
                                    Im_in[(in_row * dim_im_in_x + in_col) * ch_im_in + l] * 
                                         wt[i * ch_im_in * dim_kernel_y * dim_kernel_x + 
                                         (m * dim_kernel_x + n) * ch_im_in + l];
                            }
                        }
                    }
                }
                Im_out[i + (j * dim_im_out_x + k) * ch_im_out] = (q7_t) __SSAT((conv_out >> out_shift), 8);
            }
        }
    }
 #endif                          /* ARM_MATH_DSP */
    /* Return to application */
    return ARM_MATH_SUCCESS;
 }
 /**
 * @} end of NNConv group
 */
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast.c
@@ -1,408 +0,0 @@
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /* ----------------------------------------------------------------------
 * Project:      CMSIS NN Library
 * Title:        arm_convolve_HWC_q7_fast.c
 * Description:  Fast Q7 version of convolution
 *
 * $Date:        17. January 2018
 * $Revision:    V.1.0.0
 *
 * Target Processor:  Cortex-M cores
 *
 * -------------------------------------------------------------------- */
 #include "arm_math.h"
 #include "arm_nnfunctions.h"
 /**
 *  @ingroup groupNN
 */
 /**
 * @addtogroup NNConv
 * @{
 */
  /**
   * @brief Fast Q7 convolution function
   * @param[in]       Im_in       pointer to input tensor
   * @param[in]       dim_im_in   input tensor dimention
   * @param[in]       ch_im_in    number of input tensor channels
   * @param[in]       wt          pointer to kernel weights
   * @param[in]       ch_im_out   number of filters, i.e., output tensor channels
   * @param[in]       dim_kernel  filter kernel size
   * @param[in]       padding     padding sizes
   * @param[in]       stride      convolution stride
   * @param[in]       bias        pointer to bias
   * @param[in]       bias_shift  amount of left-shift for bias
   * @param[in]       out_shift   amount of right-shift for output
   * @param[in,out]   Im_out      pointer to output tensor
   * @param[in]       dim_im_out  output tensor dimension
   * @param[in,out]   bufferA     pointer to buffer space for input 
   * @param[in,out]   bufferB     pointer to buffer space for output
   * @return     The function returns either
   * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
   *
   * @details
   *
   * <b>Buffer size:</b>
   *
   * bufferA size: 2*ch_im_in*dim_kernel*dim_kernel
   *
   * bufferB size: 0
   *
   * <b>Input dimension constraints:</b>
   *
   * ch_im_in is multiple of 4    ( because of the SIMD32 read and swap )
   *
   * ch_im_out is multipe of 2    ( bacause 2x2 mat_mult kernel )
   *
   * The im2col converts the Q7 tensor input into Q15 column, which is stored in
   * bufferA. There is reordering happenning during this im2col process with
   * arm_q7_to_q15_reordered_no_shift. For every four elements, the second and
   * third elements are swapped. 
   *
   * The computation kernel arm_nn_mat_mult_kernel_q7_q15_reordered does the
   * GEMM computation with the reordered columns.
   *
   * To speed-up the determination of the padding condition, we split the
   * computation into 3x3 parts, i.e., {top, mid, bottom} X {left, mid, right}.
   * This reduces the total number of boundary condition checks and improves
   * the data copying performance.
   */
 arm_status
 arm_convolve_HWC_q7_fast(const q7_t * Im_in,
                         const uint16_t dim_im_in,
                         const uint16_t ch_im_in,
                         const q7_t * wt,
                         const uint16_t ch_im_out,
                         const uint16_t dim_kernel,
                         const uint16_t padding,
                         const uint16_t stride,
                         const q7_t * bias,
                         const uint16_t bias_shift,
                         const uint16_t out_shift,
                         q7_t * Im_out, 
                         const uint16_t dim_im_out, 
                         q15_t * bufferA, 
                         q7_t * bufferB)
 {
 #if defined (ARM_MATH_DSP)
    /* Run the following code for Cortex-M4 and Cortex-M7 */
    int16_t   i_out_y, i_out_x, i_ker_y, i_ker_x;
    /*
     *  Here we use bufferA as q15_t internally as computation are done with q15_t level
     *  im2col are done to output in q15_t format from q7_t input
     */
    q15_t    *pBuffer = bufferA;
    q7_t     *pOut = Im_out;
    if (ch_im_in % 4 != 0 || ch_im_out % 2 != 0)
    {
        /* check if the input dimension meets the constraints */
        return ARM_MATH_SIZE_MISMATCH;
    }
    /*
     *  Here we split the entire matrix into three regions depending on the padding situation
     *    Top: i_out_y from 0 to padding - 1
     * Middle: i_out_y from padding to dim_im_out-padding-1
     * Bottom: i_out_y from dim_im_out-padding to dim_im_out-1
     */
    /* top part */
    for (i_out_y = 0; i_out_y < padding; i_out_y++)
    {
        for (i_out_x = 0; i_out_x < dim_im_out; i_out_x++)
        {
            /* This part implements the im2col function */
            for (i_ker_y = i_out_y * stride - padding; i_ker_y < i_out_y * stride - padding + dim_kernel; i_ker_y++)
            {
                for (i_ker_x = i_out_x * stride - padding; i_ker_x < i_out_x * stride - padding + dim_kernel; i_ker_x++)
                {
                    if (i_ker_y < 0 || i_ker_y >= dim_im_in || i_ker_x < 0 || i_ker_x >= dim_im_in)
                    {
                        /* arm_fill_q15(0, pBuffer, ch_im_in); */
                        memset(pBuffer, 0, sizeof(q15_t)*ch_im_in);
                    } else
                    {
                        arm_q7_to_q15_reordered_no_shift
                            ((q7_t *) Im_in + (i_ker_y * dim_im_in + i_ker_x) * ch_im_in, pBuffer, ch_im_in);
                    }
                    pBuffer += ch_im_in;
                }
            }
            if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel * dim_kernel)
            {
                pOut =
                    arm_nn_mat_mult_kernel_q7_q15_reordered(wt,
                                                            bufferA,
                                                            ch_im_out,
                                                            ch_im_in
                                                            *
                                                            dim_kernel * dim_kernel, bias_shift, out_shift, bias, pOut);
                /* counter reset */
                pBuffer = bufferA;
            }
        }
    }
    /* middle part, here we also divide the x into left, mid and right */
    for (; i_out_y < dim_im_out - padding; i_out_y++)
    {
        /* left part */
        for (i_out_x = 0; i_out_x < padding; i_out_x++)
        {
            /* This part implements the im2col function */
            for (i_ker_y = i_out_y * stride - padding; i_ker_y < i_out_y * stride - padding + dim_kernel; i_ker_y++)
            {
                for (i_ker_x = i_out_x * stride - padding; i_ker_x < i_out_x * stride - padding + dim_kernel; i_ker_x++)
                {
                    if (i_ker_x < 0 || i_ker_x >= dim_im_in)
                    {
                        /* arm_fill_q15(0, pBuffer, ch_im_in); */
                        memset(pBuffer, 0, sizeof(q15_t)*ch_im_in);
                    } else
                    {
                        arm_q7_to_q15_reordered_no_shift
                            ((q7_t *) Im_in + (i_ker_y * dim_im_in + i_ker_x) * ch_im_in, pBuffer, ch_im_in);
                    }
                    pBuffer += ch_im_in;
                }
            }
            if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel * dim_kernel)
            {
                pOut =
                    arm_nn_mat_mult_kernel_q7_q15_reordered(wt,
                                                            bufferA,
                                                            ch_im_out,
                                                            ch_im_in
                                                            *
                                                            dim_kernel * dim_kernel, bias_shift, out_shift, bias, pOut);
                /* counter reset */
                pBuffer = bufferA;
            }
        }
        /* mid part */
        for (; i_out_x < dim_im_out - padding; i_out_x++)
        {
            /* This part implements the im2col function */
            for (i_ker_y = i_out_y * stride - padding; i_ker_y < i_out_y * stride - padding + dim_kernel; i_ker_y++)
            {
                arm_q7_to_q15_reordered_no_shift((q7_t *) Im_in
                                                 +
                                                 (i_ker_y *
                                                  dim_im_in +
                                                  i_out_x *
                                                  stride - padding) * ch_im_in, pBuffer, ch_im_in * dim_kernel);
                pBuffer += ch_im_in * dim_kernel;
            }
            if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel * dim_kernel)
            {
                pOut =
                    arm_nn_mat_mult_kernel_q7_q15_reordered(wt,
                                                            bufferA,
                                                            ch_im_out,
                                                            ch_im_in
                                                            *
                                                            dim_kernel * dim_kernel, bias_shift, out_shift, bias, pOut);
                /* counter reset */
                pBuffer = bufferA;
            }
        }
        /* right part */
        for (; i_out_x < dim_im_out; i_out_x++)
        {
            /* This part implements the im2col function */
            for (i_ker_y = i_out_y * stride - padding; i_ker_y < i_out_y * stride - padding + dim_kernel; i_ker_y++)
            {
                for (i_ker_x = i_out_x * stride - padding; i_ker_x < i_out_x * stride - padding + dim_kernel; i_ker_x++)
                {
                    if (i_ker_x < 0 || i_ker_x >= dim_im_in)
                    {
                        /* arm_fill_q15(0, pBuffer, ch_im_in); */
                        memset(pBuffer, 0, sizeof(q15_t)*ch_im_in);
                    } else
                    {
                        arm_q7_to_q15_reordered_no_shift
                            ((q7_t *) Im_in + (i_ker_y * dim_im_in + i_ker_x) * ch_im_in, pBuffer, ch_im_in);
                    }
                    pBuffer += ch_im_in;
                }
            }
            if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel * dim_kernel)
            {
                pOut =
                    arm_nn_mat_mult_kernel_q7_q15_reordered(wt,
                                                            bufferA,
                                                            ch_im_out,
                                                            ch_im_in
                                                            *
                                                            dim_kernel * dim_kernel, bias_shift, out_shift, bias, pOut);
                /* counter reset */
                pBuffer = bufferA;
            }
        }
    }
    for (; i_out_y < dim_im_out; i_out_y++)
    {
        for (i_out_x = 0; i_out_x < dim_im_out; i_out_x++)
        {
            /* This part implements the im2col function */
            for (i_ker_y = i_out_y * stride - padding; i_ker_y < i_out_y * stride - padding + dim_kernel; i_ker_y++)
            {
                for (i_ker_x = i_out_x * stride - padding; i_ker_x < i_out_x * stride - padding + dim_kernel; i_ker_x++)
                {
                    if (i_ker_y < 0 || i_ker_y >= dim_im_in || i_ker_x < 0 || i_ker_x >= dim_im_in)
                    {
                        /* arm_fill_q15(0, pBuffer, ch_im_in); */
                        memset(pBuffer, 0, sizeof(q15_t)*ch_im_in);
                    } else
                    {
                        arm_q7_to_q15_reordered_no_shift
                            ((q7_t *) Im_in + (i_ker_y * dim_im_in + i_ker_x) * ch_im_in, pBuffer, ch_im_in);
                    }
                    pBuffer += ch_im_in;
                }
            }
            if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel * dim_kernel)
            {
                pOut =
                    arm_nn_mat_mult_kernel_q7_q15_reordered(wt,
                                                            bufferA,
                                                            ch_im_out,
                                                            ch_im_in
                                                            *
                                                            dim_kernel * dim_kernel, bias_shift, out_shift, bias, pOut);
                /* counter reset */
                pBuffer = bufferA;
            }
        }
    }
    /* check if there is left-over for compute */
    if (pBuffer != bufferA)
    {
        const q7_t *pA = wt;
        int       i;
        for (i = 0; i < ch_im_out; i++)
        {
            q31_t     sum = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift);
            q15_t    *pB = bufferA;
            /* each time it process 4 entries */
            uint16_t  colCnt = ch_im_in * dim_kernel * dim_kernel >> 2;
            while (colCnt)
            {
                q31_t     inA1, inA2;
                q31_t     inB1, inB2;
                pA = (q7_t *) read_and_pad_reordered((void *)pA, &inA1, &inA2);
                inB1 = *__SIMD32(pB)++;
                sum = __SMLAD(inA1, inB1, sum);
                inB2 = *__SIMD32(pB)++;
                sum = __SMLAD(inA2, inB2, sum);
                colCnt--;
            }
            colCnt = ch_im_in * dim_kernel * dim_kernel & 0x3;
            while (colCnt)
            {
                q7_t      inA1 = *pA++;
                q15_t     inB1 = *pB++;
                sum += inA1 * inB1;
                colCnt--;
            }
            *pOut = (q7_t) __SSAT((sum >> out_shift), 8);
            pOut++;
        }
    }
 #else
    /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */
    uint16_t  i, j, k, l, m, n;
    int       conv_out;
    signed char in_row, in_col;
    if (ch_im_in % 4 != 0 || ch_im_out % 2 != 0)
    {
        /* check if the input dimension meets the constraints */
        return ARM_MATH_SIZE_MISMATCH;
    }
    for (i = 0; i < ch_im_out; i++)
    {
        for (j = 0; j < dim_im_out; j++)
        {
            for (k = 0; k < dim_im_out; k++)
            {
                conv_out = (bias[i] << bias_shift) + NN_ROUND(out_shift);
                for (m = 0; m < dim_kernel; m++)
                {
                    for (n = 0; n < dim_kernel; n++)
                    {
                        // if-for implementation
                        in_row = stride * j + m - padding;
                        in_col = stride * k + n - padding;
                        if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in && in_col < dim_im_in)
                        {
                            for (l = 0; l < ch_im_in; l++)
                            {
                                conv_out +=
                                    Im_in[(in_row * dim_im_in + in_col) * ch_im_in +
                                          l] * wt[i * ch_im_in * dim_kernel * dim_kernel + (m * dim_kernel +
                                                                                            n) * ch_im_in + l];
                            }
                        }
                    }
                }
                Im_out[i + (j * dim_im_out + k) * ch_im_out] = (q7_t) __SSAT((conv_out >> out_shift), 8);
            }
        }
    }
 #endif                          /* ARM_MATH_DSP */
    /* Return to application */
    return ARM_MATH_SUCCESS;
 }
 /**
 * @} end of NNConv group
 */
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast_nonsquare.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast_nonsquare.c
@@ -1,379 +0,0 @@
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /* ----------------------------------------------------------------------
 * Project:      CMSIS NN Library
 * Title:        arm_convolve_HWC_q7_fast_nonsquare.c
 * Description:  Fast Q7 version of convolution (non-sqaure shape)
 *
 * $Date:        17. January 2018
 * $Revision:    V.1.0.0
 *
 * Target Processor:  Cortex-M cores
 *
 * -------------------------------------------------------------------- */
 #include "arm_math.h"
 #include "arm_nnfunctions.h"
 /**
 *  @ingroup groupNN
 */
 /**
 * @addtogroup NNConv
 * @{
 */
 /**
 * @brief Fast Q7 convolution function (non-sqaure shape)
 * @param[in]       Im_in        pointer to input tensor
 * @param[in]       dim_im_in_x  input tensor dimention x
 * @param[in]       dim_im_in_y  input tensor dimention y
 * @param[in]       ch_im_in     number of input tensor channels
 * @param[in]       wt           pointer to kernel weights
 * @param[in]       ch_im_out    number of filters, i.e., output tensor channels
 * @param[in]       dim_kernel_x filter kernel size x
 * @param[in]       dim_kernel_y filter kernel size y
 * @param[in]       padding_x    padding size x
 * @param[in]       padding_y    padding size y
 * @param[in]       stride_x     convolution stride x
 * @param[in]       stride_y     convolution stride y
 * @param[in]       bias         pointer to bias
 * @param[in]       bias_shift   amount of left-shift for bias
 * @param[in]       out_shift    amount of right-shift for output
 * @param[in,out]   Im_out       pointer to output tensor
 * @param[in]       dim_im_out_x output tensor dimension x
 * @param[in]       dim_im_out_y output tensor dimension y
 * @param[in,out]   bufferA      pointer to buffer space for input 
 * @param[in,out]   bufferB      pointer to buffer space for output
 * @return     The function returns either
 * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
 *
 * This function is the version with full list of optimization tricks, but with
 * some contraints:
 *   ch_im_in is multiple of 4
 *   ch_im_out is multiple of 2
 */
 arm_status arm_convolve_HWC_q7_fast_nonsquare(const q7_t * Im_in,
                                              const uint16_t dim_im_in_x,
                                              const uint16_t dim_im_in_y,
                                              const uint16_t ch_im_in,
                                              const q7_t * wt,
                                              const uint16_t ch_im_out,
                                              const uint16_t dim_kernel_x,
                                              const uint16_t dim_kernel_y,
                                              const uint16_t padding_x,
                                              const uint16_t padding_y,
                                              const uint16_t stride_x,
                                              const uint16_t stride_y,
                                              const q7_t * bias,
                                              const uint16_t bias_shift,
                                              const uint16_t out_shift,
                                              q7_t * Im_out,
                                              const uint16_t dim_im_out_x,
                                              const uint16_t dim_im_out_y, 
                                              q15_t * bufferA, 
                                              q7_t * bufferB)
 {
 #if defined (ARM_MATH_DSP)
    /* Run the following code for Cortex-M4 and Cortex-M7 */
    int16_t   i_out_y, i_out_x, i_ker_y, i_ker_x;
    /* -----------------------
     *  Here we use bufferA as q15_t internally as computation are done with q15_t level
     *  im2col are done to output in q15_t format from q7_t input
     */
    q15_t    *pBuffer = bufferA;
    q7_t     *pOut = Im_out;
    if (ch_im_in % 4 != 0 || ch_im_out % 2 != 0)
    {
        /* check if the input dimension meets the constraints */
        return ARM_MATH_SIZE_MISMATCH;
    }
    /*
     *  Here we split the entire matrix into three regions depending on the padding situation
     *    Top: i_out_y from 0 to padding - 1
     * Middle: i_out_y from padding to dim_im_out-padding-1
     * Bottom: i_out_y from dim_im_out-padding to dim_im_out-1
     */
    /* top part */
    for (i_out_y = 0; i_out_y < padding_y; i_out_y++)
    {
        for (i_out_x = 0; i_out_x < dim_im_out_x; i_out_x++)
        {
            /* This part implements the im2col function */
            for (i_ker_y = i_out_y * stride_y - padding_y; i_ker_y < i_out_y * stride_y - padding_y + dim_kernel_y;
                 i_ker_y++)
            {
                for (i_ker_x = i_out_x * stride_x - padding_x; i_ker_x < i_out_x * stride_x - padding_x + dim_kernel_x;
                     i_ker_x++)
                {
                    if (i_ker_y < 0 || i_ker_y >= dim_im_in_y || i_ker_x < 0 || i_ker_x >= dim_im_in_x)
                    {
                        /* arm_fill_q15(0, pBuffer, ch_im_in); */
                        memset(pBuffer, 0, sizeof(q15_t)*ch_im_in);
                    } else
                    {
                        arm_q7_to_q15_reordered_no_shift((q7_t *) Im_in + (i_ker_y * dim_im_in_x + i_ker_x) * ch_im_in,
                                                         pBuffer, ch_im_in);
                    }
                    pBuffer += ch_im_in;
                }
            }
            if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel_x * dim_kernel_y)
            {
                pOut =
                    arm_nn_mat_mult_kernel_q7_q15_reordered(wt, bufferA, ch_im_out, ch_im_in * dim_kernel_x * dim_kernel_y,
                                                  bias_shift, out_shift, bias, pOut);
                /* counter reset */
                pBuffer = bufferA;
            }
        }
    }
    /* middle part, here we also divide the x into left, mid and right */
    for (; i_out_y < dim_im_out_y - padding_y; i_out_y++)
    {
        /* left part */
        for (i_out_x = 0; i_out_x < padding_x; i_out_x++)
        {
            /* This part implements the im2col function */
            for (i_ker_y = i_out_y * stride_y - padding_y; i_ker_y < i_out_y * stride_y - padding_y + dim_kernel_y;
                 i_ker_y++)
            {
                for (i_ker_x = i_out_x * stride_x - padding_x; i_ker_x < i_out_x * stride_x - padding_x + dim_kernel_x;
                     i_ker_x++)
                {
                    if (i_ker_x < 0 || i_ker_x >= dim_im_in_x)
                    {
                        /* arm_fill_q15(0, pBuffer, ch_im_in); */
                        memset(pBuffer, 0, sizeof(q15_t)*ch_im_in);
                    } else
                    {
                        arm_q7_to_q15_reordered_no_shift((q7_t *) Im_in + (i_ker_y * dim_im_in_x + i_ker_x) * ch_im_in,
                                                         pBuffer, ch_im_in);
                    }
                    pBuffer += ch_im_in;
                }
            }
            if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel_x * dim_kernel_y)
            {
                pOut =
                    arm_nn_mat_mult_kernel_q7_q15_reordered(wt, bufferA, ch_im_out, ch_im_in * dim_kernel_x * dim_kernel_y,
                                                  bias_shift, out_shift, bias, pOut);
                /* counter reset */
                pBuffer = bufferA;
            }
        }
        /* mid part */
        for (; i_out_x < dim_im_out_x - padding_x; i_out_x++)
        {
            /* This part implements the im2col function */
            for (i_ker_y = i_out_y * stride_y - padding_y; i_ker_y < i_out_y * stride_y - padding_y + dim_kernel_y;
                 i_ker_y++)
            {
                arm_q7_to_q15_reordered_no_shift((q7_t *) Im_in +
                                                 (i_ker_y * dim_im_in_x + i_out_x * stride_x - padding_x) * ch_im_in,
                                                 pBuffer, ch_im_in * dim_kernel_x);
                pBuffer += ch_im_in * dim_kernel_x;
            }
            if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel_x * dim_kernel_y)
            {
                pOut =
                    arm_nn_mat_mult_kernel_q7_q15_reordered(wt, bufferA, ch_im_out, ch_im_in * dim_kernel_x * dim_kernel_y,
                                                  bias_shift, out_shift, bias, pOut);
                /* counter reset */
                pBuffer = bufferA;
            }
        }
        /* right part */
        for (; i_out_x < dim_im_out_x; i_out_x++)
        {
            /* This part implements the im2col function */
            for (i_ker_y = i_out_y * stride_y - padding_y; i_ker_y < i_out_y * stride_y - padding_y + dim_kernel_y;
                 i_ker_y++)
            {
                for (i_ker_x = i_out_x * stride_x - padding_x; i_ker_x < i_out_x * stride_x - padding_x + dim_kernel_x;
                     i_ker_x++)
                {
                    if (i_ker_x < 0 || i_ker_x >= dim_im_in_x)
                    {
                        /* arm_fill_q15(0, pBuffer, ch_im_in); */
                        memset(pBuffer, 0, sizeof(q15_t)*ch_im_in);
                    } else
                    {
                        arm_q7_to_q15_reordered_no_shift((q7_t *) Im_in + (i_ker_y * dim_im_in_x + i_ker_x) * ch_im_in,
                                                         pBuffer, ch_im_in);
                    }
                    pBuffer += ch_im_in;
                }
            }
            if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel_x * dim_kernel_y)
            {
                pOut =
                    arm_nn_mat_mult_kernel_q7_q15_reordered(wt, bufferA, ch_im_out, ch_im_in * dim_kernel_x * dim_kernel_y,
                                                  bias_shift, out_shift, bias, pOut);
                /* counter reset */
                pBuffer = bufferA;
            }
        }
    }
    for (; i_out_y < dim_im_out_y; i_out_y++)
    {
        for (i_out_x = 0; i_out_x < dim_im_out_x; i_out_x++)
        {
            /* This part implements the im2col function */
            for (i_ker_y = i_out_y * stride_y - padding_y; i_ker_y < i_out_y * stride_y - padding_y + dim_kernel_y;
                 i_ker_y++)
            {
                for (i_ker_x = i_out_x * stride_x - padding_x; i_ker_x < i_out_x * stride_x - padding_x + dim_kernel_x;
                     i_ker_x++)
                {
                    if (i_ker_y < 0 || i_ker_y >= dim_im_in_y || i_ker_x < 0 || i_ker_x >= dim_im_in_x)
                    {
                        /* arm_fill_q15(0, pBuffer, ch_im_in); */
                        memset(pBuffer, 0, sizeof(q15_t)*ch_im_in);
                    } else
                    {
                        arm_q7_to_q15_reordered_no_shift((q7_t *) Im_in + (i_ker_y * dim_im_in_x + i_ker_x) * ch_im_in,
                                                         pBuffer, ch_im_in);
                    }
                    pBuffer += ch_im_in;
                }
            }
            if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel_x * dim_kernel_y)
            {
                pOut =
                    arm_nn_mat_mult_kernel_q7_q15_reordered(wt, bufferA, ch_im_out, ch_im_in * dim_kernel_x * dim_kernel_y,
                                                  bias_shift, out_shift, bias, pOut);
                /* counter reset */
                pBuffer = bufferA;
            }
        }
    }
    /* check if there is left-over for compute */
    if (pBuffer != bufferA)
    {
        const q7_t *pA = wt;
        int       i;
        for (i = 0; i < ch_im_out; i++)
        {
            q31_t     sum = ((q31_t)(bias[i]) << bias_shift) + NN_ROUND(out_shift);
            q15_t    *pB = bufferA;
            /* basically each time it process 4 entries */
            uint16_t  colCnt = ch_im_in * dim_kernel_x * dim_kernel_y >> 2;
            while (colCnt)
            {
                q31_t     inA1, inA2;
                q31_t     inB1, inB2;
                pA = (const q7_t *)read_and_pad_reordered((void *)pA, &inA1, &inA2);
                inB1 = *__SIMD32(pB)++;
                sum = __SMLAD(inA1, inB1, sum);
                inB2 = *__SIMD32(pB)++;
                sum = __SMLAD(inA2, inB2, sum);
                colCnt--;
            }
            colCnt = (ch_im_in * dim_kernel_y * dim_kernel_x) & 0x3;
            while (colCnt)
            {
                q7_t      inA1 = *pA++;
                q15_t     inB1 = *pB++;
                sum += inA1 * inB1;
                colCnt--;
            }
            *pOut = (q7_t) __SSAT((sum >> out_shift), 8);
            pOut++;
        }
    }
 #else
    /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */
    int       i, j, k, l, m, n;
    int       conv_out;
    int       in_row, in_col;
    if (ch_im_in % 4 != 0 || ch_im_out % 2 != 0)
    {
        /* check if the input dimension meets the constraints */
        return ARM_MATH_SIZE_MISMATCH;
    }
    for (i = 0; i < ch_im_out; i++)
    {
        for (j = 0; j < dim_im_out_y; j++)
        {
            for (k = 0; k < dim_im_out_x; k++)
            {
                conv_out = ((q31_t)(bias[i]) << bias_shift) + NN_ROUND(out_shift);
                for (m = 0; m < dim_kernel_y; m++)
                {
                    for (n = 0; n < dim_kernel_x; n++)
                    {
                        /* if-for implementation */
                        in_row = stride_y * j + m - padding_y;
                        in_col = stride_x * k + n - padding_x;
                        if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in_y && in_col < dim_im_in_x)
                        {
                            for (l = 0; l < ch_im_in; l++)
                            {
                                conv_out += Im_in[(in_row * dim_im_in_x + in_col) * ch_im_in + l] *
                                    wt[i * ch_im_in * dim_kernel_y * dim_kernel_x + (m * dim_kernel_x + n) * ch_im_in + l];      
                            }
                        }
                    }
                }
                Im_out[i + (j * dim_im_out_x + k) * ch_im_out] = (q7_t) __SSAT((conv_out >> out_shift), 8);
            }
        }
    }
 #endif                          /* ARM_MATH_DSP */
    /* Return to application */
    return ARM_MATH_SUCCESS;
 }
 /**
 * @} end of NNConv group
 */
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7.c
@@ -1,418 +0,0 @@
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /* ----------------------------------------------------------------------
 * Project:      CMSIS NN Library
 * Title:        arm_depthwise_separable_conv_HWC_q7.c
 * Description:  Q7 depthwise separable convolution function
 *
 * $Date:        17. January 2018
 * $Revision:    V.1.0.0
 *
 * Target Processor:  Cortex-M cores
 *
 * -------------------------------------------------------------------- */
 #include "arm_math.h"
 #include "arm_nnfunctions.h"
 /**
 *  @ingroup groupNN
 */
 /**
 * @addtogroup NNConv
 * @{
 */
 /**
 * @brief Q7 depthwise separable convolution function
 * @param[in]       Im_in       pointer to input tensor
 * @param[in]       dim_im_in   input tensor dimention
 * @param[in]       ch_im_in    number of input tensor channels
 * @param[in]       wt          pointer to kernel weights
 * @param[in]       ch_im_out   number of filters, i.e., output tensor channels
 * @param[in]       dim_kernel  filter kernel size
 * @param[in]       padding     padding sizes
 * @param[in]       stride      convolution stride
 * @param[in]       bias        pointer to bias
 * @param[in]       bias_shift  amount of left-shift for bias
 * @param[in]       out_shift   amount of right-shift for output
 * @param[in,out]   Im_out      pointer to output tensor
 * @param[in]       dim_im_out  output tensor dimension
 * @param[in,out]   bufferA     pointer to buffer space for input
 * @param[in,out]   bufferB     pointer to buffer space for output
 * @return     The function returns either
 * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
 *
 * @details
 *
 * <b>Buffer size:</b>
 *
 * bufferA size: 2*ch_im_in*dim_kernel*dim_kernel
 *
 * bufferB size: 0
 *
 * <b>Input dimension constraints:</b>
 *
 * ch_im_in equals ch_im_out
 *
 * Implementation:
 * There are 3 nested loop here:
 * Inner loop: calculate each output value with MAC instruction over an accumulator
 * Mid   loop: loop over different output channel
 * Outer loop: loop over different output (x, y)
 */
 arm_status arm_depthwise_separable_conv_HWC_q7(const q7_t * Im_in,
                                               const uint16_t dim_im_in,
                                               const uint16_t ch_im_in,
                                               const q7_t * wt,
                                               const uint16_t ch_im_out,
                                               const uint16_t dim_kernel,
                                               const uint16_t padding,
                                               const uint16_t stride,
                                               const q7_t * bias,
                                               const uint16_t bias_shift,
                                               const uint16_t out_shift,
                                               q7_t * Im_out, 
                                               const uint16_t dim_im_out, 
                                               q15_t * bufferA, 
                                               q7_t * bufferB)
 {
 #if defined (ARM_MATH_DSP)
    /* Run the following code for Cortex-M4 and Cortex-M7 */
    int16_t   i_out_y, i_out_x;
    int16_t   i_ker_y, i_ker_x;
    q7_t     *colBuffer = (q7_t *) bufferA;
    q7_t     *pBuffer = colBuffer;
    const q7_t *pBias = bias;
    q7_t     *pOut = Im_out;
    uint16_t  rowCnt;
    uint16_t  row_shift;
    /* do some checking here, basically ch_im_in == ch_im_out */
    if (ch_im_in != ch_im_out)
    {
        return ARM_MATH_SIZE_MISMATCH;
    }
    for (i_out_y = 0; i_out_y < dim_im_out; i_out_y++)
    {
        for (i_out_x = 0; i_out_x < dim_im_out; i_out_x++)
        {
            /* we first do im2col here */
            for (i_ker_y = i_out_y * stride - padding; i_ker_y < i_out_y * stride - padding + dim_kernel; i_ker_y++)
            {
                for (i_ker_x = i_out_x * stride - padding; i_ker_x < i_out_x * stride - padding + dim_kernel; i_ker_x++)
                {
                    if (i_ker_y < 0 || i_ker_y >= dim_im_in || i_ker_x < 0 || i_ker_x >= dim_im_in)
                    {
                        /* arm_fill_q7(0, pBuffer, ch_im_in); */
                        memset(pBuffer, 0, ch_im_in);
                    } else
                    {
                        /* arm_copy_q7((q7_t *) Im_in + (i_ker_y * dim_im_in + i_ker_x) * ch_im_in, pBuffer, ch_im_in); */
                        memcpy(pBuffer, (q7_t *) Im_in + (i_ker_y * dim_im_in + i_ker_x) * ch_im_in, ch_im_in);
                    }
                    pBuffer += ch_im_in;
                }
            }
            /* we will do the computation here for each channel */
            rowCnt = ch_im_out >> 2;
            row_shift = 0;
            pBias = bias;
            while (rowCnt)
            {
                q31_t     sum =  ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
                q31_t     sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
                q31_t     sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
                q31_t     sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
                uint16_t  colCnt = (dim_kernel * dim_kernel) >> 1;
                q7_t     *pB = colBuffer + row_shift;
                const q7_t *pA = wt + row_shift;
                row_shift += 4;
 #ifdef USE_INTRINSIC
 #ifndef ARM_MATH_BIG_ENDIAN
                while (colCnt)
                {
                    q31_t     inA1, inA2, inB1, inB2, opA, opB;
                    inB1 = *__SIMD32(pB);
                    pB += ch_im_in;
                    opB = *__SIMD32(pB);
                    pB += ch_im_in;
                    inB2 = __PKHTB(opB, inB1, 16);
                    inB1 = __PKHBT(inB1, opB, 16);
                    inA1 = *__SIMD32(pA);
                    pA += ch_im_in;
                    opB = *__SIMD32(pA);
                    pA += ch_im_in;
                    inA2 = __PKHTB(opB, inA1, 16);
                    inA1 = __PKHBT(inA1, opB, 16);
                    opA = __SXTB16(inA1);
                    opB = __SXTB16(inB1);
                    sum = __SMLAD(opA, opB, sum);
                    opA = __SXTB16(__ROR(inA1, 8));
                    opB = __SXTB16(__ROR(inB1, 8));
                    sum2 = __SMLAD(opA, opB, sum2);
                    opA = __SXTB16(inA2);
                    opB = __SXTB16(inB2);
                    sum3 = __SMLAD(opA, opB, sum3);
                    opA = __SXTB16(__ROR(inA2, 8));
                    opB = __SXTB16(__ROR(inB2, 8));
                    sum4 = __SMLAD(opA, opB, sum4);
                    colCnt--;
                }
 #else
                while (colCnt)
                {
                    q31_t     inA1, inA2, inB1, inB2, opA, opB;
                    inB1 = *__SIMD32(pB);
                    pB += ch_im_in;
                    opB = *__SIMD32(pB);
                    pB += ch_im_in;
                    inB2 = __PKHBT(opB, inB1, 16);
                    inB1 = __PKHTB(inB1, opB, 16);
                    inA1 = *__SIMD32(pA);
                    pA += ch_im_in;
                    opB = *__SIMD32(pA);
                    pA += ch_im_in;
                    inA2 = __PKHBT(opB, inA1, 16);
                    inA1 = __PKHTB(inA1, opB, 16);
                    opA = __SXTB16(inA1);
                    opB = __SXTB16(inB1);
                    sum2 = __SMLAD(opA, opB, sum2);
                    opA = __SXTB16(__ROR(inA1, 8));
                    opB = __SXTB16(__ROR(inB1, 8));
                    sum = __SMLAD(opA, opB, sum);
                    opA = __SXTB16(inA2);
                    opB = __SXTB16(inB2);
                    sum4 = __SMLAD(opA, opB, sum4);
                    opA = __SXTB16(__ROR(inA2, 8));
                    opB = __SXTB16(__ROR(inB2, 8));
                    sum3 = __SMLAD(opA, opB, sum3);
                    colCnt--;
                }
 #endif                          /* ARM_MATH_BIG_ENDIAN */
 #else
 #ifndef ARM_MATH_BIG_ENDIAN
                /*
                 *   r0    r1    r2    r3    r4   r5
                 *  inA1, inA2, inB1, inB2, opA, opB
                 */
                asm volatile ("COL_LOOP_%=:\n"
                              "ldr.w r2, [%[pB], #0]\n"
                              "add.w %[pB], %[pB], %[ch_im_in]\n"
                              "ldr.w r5, [%[pB], #0]\n"
                              "add.w %[pB], %[pB], %[ch_im_in]\n"
                              "pkhtb r3, r5, r2, ASR #16\n"
                              "pkhbt r2, r2, r5, LSL #16\n"
                              "ldr.w r0, [%[pA], #0]\n"
                              "add.w %[pA], %[pA], %[ch_im_in]\n"
                              "ldr.w r5, [%[pA], #0]\n"
                              "add.w %[pA], %[pA], %[ch_im_in]\n"
                              "pkhtb r1, r5, r0, ASR #16\n"
                              "pkhbt r0, r0, r5, LSL #16\n"
                              "sxtb16 r4, r0\n"
                              "sxtb16 r5, r2\n"
                              "smlad %[sum], r4, r5, %[sum]\n"
                              "mov.w r4, r0, ror #8\n"
                              "mov.w r5, r2, ror #8\n"
                              "sxtb16 r4, r4\n"
                              "sxtb16 r5, r5\n"
                              "smlad %[sum2], r4, r5, %[sum2]\n"
                              "sxtb16 r4, r1\n"
                              "sxtb16 r5, r3\n"
                              "smlad %[sum3], r4, r5, %[sum3]\n"
                              "mov.w r4, r1, ror #8\n"
                              "mov.w r5, r3, ror #8\n"
                              "sxtb16 r4, r4\n"
                              "sxtb16 r5, r5\n"
                              "smlad %[sum4], r4, r5, %[sum4]\n"
                              "subs %[colCnt], #1\n"
                              "bne COL_LOOP_%=\n":[sum]
                              "+r"(sum),[sum2] "+r"(sum2),
                              [sum3] "+r"(sum3),
                              [sum4] "+r"(sum4),[pB] "+r"(pB),
                              [pA] "+r"(pA):[colCnt]
                              "r"(colCnt),[ch_im_in] "r"(ch_im_in):"r0", "r1", "r2", "r3", "r4", "r5");
 #else
                /*
                 *  r0    r1    r2    r3    r4   r5
                 * inA1, inA2, inB1, inB2, opA, opB
                 */
                asm volatile ("COL_LOOP_%=:\n"
                              "ldr.w r2, [%[pB], #0]\n"
                              "add.w %[pB], %[pB], %[ch_im_in]\n"
                              "ldr.w r5, [%[pB], #0]\n"
                              "add.w %[pB], %[pB], %[ch_im_in]\n"
                              "pkhbt r3, r5, r2, LSL #16\n"
                              "pkhtb r2, r2, r5, ASR #16\n"
                              "ldr.w r0, [%[pA], #0]\n"
                              "add.w %[pA], %[pA], %[ch_im_in]\n"
                              "ldr.w r5, [%[pA], #0]\n"
                              "add.w %[pA], %[pA], %[ch_im_in]\n"
                              "pkhbt r1, r5, r0, LSL #16\n"
                              "pkhtb r0, r0, r5, ASR #16\n"
                              "sxtb16 r4, r0\n"
                              "sxtb16 r5, r2\n"
                              "smlad %[sum2], r4, r5, %[sum2]\n"
                              "mov.w r4, r0, ror #8\n"
                              "mov.w r5, r2, ror #8\n"
                              "sxtb16 r4, r4\n"
                              "sxtb16 r5, r5\n"
                              "smlad %[sum], r4, r5, %[sum]\n"
                              "sxtb16 r4, r1\n"
                              "sxtb16 r5, r3\n"
                              "smlad %[sum4], r4, r5, %[sum4]\n"
                              "mov.w r4, r1, ror #8\n"
                              "mov.w r5, r3, ror #8\n"
                              "sxtb16 r4, r4\n"
                              "sxtb16 r5, r5\n"
                              "smlad %[sum3], r4, r5, %[sum3]\n"
                              "subs %[colCnt], #1\n"
                              "bne COL_LOOP_%=\n":[sum]
                              "+r"(sum),[sum2] "+r"(sum2),
                              [sum3] "+r"(sum3),
                              [sum4] "+r"(sum4),[pB] "+r"(pB),
                              [pA] "+r"(pA):[colCnt]
                              "r"(colCnt),[ch_im_in] "r"(ch_im_in):"r0", "r1", "r2", "r3", "r4", "r5");
 #endif                          /* ARM_MATH_BIG_ENDIAN */
 #endif                          /* USE_INTRINSIC */
                colCnt = (dim_kernel * dim_kernel) & 0x1;
                while (colCnt)
                {
                    union arm_nnword inA, inB;
                    inA.word = *__SIMD32(pA);
                    pA += ch_im_in;
                    inB.word = *__SIMD32(pB);
                    pB += ch_im_in;
                    sum += inA.bytes[0] * inB.bytes[0];
                    sum2 += inA.bytes[1] * inB.bytes[1];
                    sum3 += inA.bytes[2] * inB.bytes[2];
                    sum4 += inA.bytes[3] * inB.bytes[3];
                    colCnt--;
                }
                *pOut++ = (q7_t) __SSAT((sum >> out_shift), 8);
                *pOut++ = (q7_t) __SSAT((sum2 >> out_shift), 8);
                *pOut++ = (q7_t) __SSAT((sum3 >> out_shift), 8);
                *pOut++ = (q7_t) __SSAT((sum4 >> out_shift), 8);
                rowCnt--;
            }
            rowCnt = ch_im_out & 0x3;
            while (rowCnt)
            {
                q7_t     *pB = colBuffer + row_shift;
                const q7_t *pA = wt + row_shift;
                q31_t     sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
                uint16_t  colCnt = (dim_kernel * dim_kernel);
                row_shift += 1;
                while (colCnt)
                {
                    q7_t      A1 = *pA;
                    q7_t      B1 = *pB;
                    pA += ch_im_in;
                    pB += ch_im_in;
                    sum += A1 * B1;
                    colCnt--;
                }
                *pOut++ = (q7_t) __SSAT((sum >> out_shift), 8);
                rowCnt--;
            }
            /* clear counter and pointers */
            pBuffer = colBuffer;
        }
    }
 #else
    /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */
    int       i_out_y, i_out_x, i_ch_out, i_ker_x, i_ker_y;
    int       conv_out;
    /* do some checking here, basically ch_im_in == ch_im_out */
    if (ch_im_in != ch_im_out)
    {
        return ARM_MATH_SIZE_MISMATCH;
    }
    for (i_out_y = 0; i_out_y < dim_im_out; i_out_y++)
    {
        for (i_out_x = 0; i_out_x < dim_im_out; i_out_x++)
        {
            for (i_ch_out = 0; i_ch_out < ch_im_out; i_ch_out++)
            {
                // for each output
                conv_out = ((q31_t)(bias[i_ch_out]) << bias_shift) + NN_ROUND(out_shift);
                for (i_ker_y = 0; i_ker_y < dim_kernel; i_ker_y++)
                {
                    for (i_ker_x = 0; i_ker_x < dim_kernel; i_ker_x++)
                    {
                        int       in_row = stride * i_out_y + i_ker_y - padding;
                        int       in_col = stride * i_out_x + i_ker_x - padding;
                        if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in && in_col < dim_im_in)
                        {
                            conv_out +=
                                Im_in[(in_row *
                                       dim_im_in +
                                       in_col) *
                                      ch_im_in +
                                      i_ch_out] * wt[(i_ker_y * dim_kernel + i_ker_x) * ch_im_out + i_ch_out];
                        }
                    }
                }
                Im_out[(i_out_y * dim_im_out +
                        i_out_x) * ch_im_out + i_ch_out] = (q7_t) __SSAT((conv_out >> out_shift), 8);
            }
        }
    }
 #endif                          /* ARM_MATH_DSP */
    /* Return to application */
    return ARM_MATH_SUCCESS;
 }
 /**
 * @} end of NNConv group
 */
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7_nonsquare.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7_nonsquare.c
@@ -1,411 +0,0 @@
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /* ----------------------------------------------------------------------
 * Project:      CMSIS NN Library
 * Title:        arm_depthwise_separable_conv_HWC_q7_nonsquare.c
 * Description:  Q7 depthwise separable convolution function (non-square shape)
 *
 * $Date:        17. January 2018
 * $Revision:    V.1.0.0
 *
 * Target Processor:  Cortex-M cores
 *
 * -------------------------------------------------------------------- */
 #include "arm_math.h"
 #include "arm_nnfunctions.h"
 /**
 *  @ingroup groupNN
 */
 /**
 * @addtogroup NNConv
 * @{
 */
 /**
 * @brief Q7 depthwise separable convolution function (non-square shape)
 * @param[in]       Im_in         pointer to input tensor
 * @param[in]       dim_im_in_x   input tensor dimention x
 * @param[in]       dim_im_in_y   input tensor dimention y
 * @param[in]       ch_im_in      number of input tensor channels
 * @param[in]       wt            pointer to kernel weights
 * @param[in]       ch_im_out     number of filters, i.e., output tensor channels
 * @param[in]       dim_kernel_x  filter kernel size x
 * @param[in]       dim_kernel_y  filter kernel size y
 * @param[in]       padding_x     padding sizes x
 * @param[in]       padding_y     padding sizes y
 * @param[in]       stride_x      convolution stride x
 * @param[in]       stride_y      convolution stride y
 * @param[in]       bias          pointer to bias
 * @param[in]       bias_shift    amount of left-shift for bias
 * @param[in]       out_shift     amount of right-shift for output
 * @param[in,out]   Im_out        pointer to output tensor
 * @param[in]       dim_im_out_x  output tensor dimension x
 * @param[in]       dim_im_out_y  output tensor dimension y
 * @param[in,out]   bufferA       pointer to buffer space for input 
 * @param[in,out]   bufferB       pointer to buffer space for output
 * @return     The function returns either
 * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
 *
 * This function is the version with full list of optimization tricks, but with
 * some contraints:
 *   ch_im_in is multiple of 2
 *   ch_im_out is multiple of 2
 */
 arm_status arm_depthwise_separable_conv_HWC_q7_nonsquare(const q7_t * Im_in,
                                                         const uint16_t dim_im_in_x,
                                                         const uint16_t dim_im_in_y,
                                                         const uint16_t ch_im_in,
                                                         const q7_t * wt,
                                                         const uint16_t ch_im_out,
                                                         const uint16_t dim_kernel_x,
                                                         const uint16_t dim_kernel_y,
                                                         const uint16_t padding_x,
                                                         const uint16_t padding_y,
                                                         const uint16_t stride_x,
                                                         const uint16_t stride_y,
                                                         const q7_t * bias,
                                                         const uint16_t bias_shift,
                                                         const uint16_t out_shift,
                                                         q7_t * Im_out,
                                                         const uint16_t dim_im_out_x,
                                                         const uint16_t dim_im_out_y, 
                                                         q15_t * bufferA, 
                                                         q7_t * bufferB)
 {
 #if defined (ARM_MATH_DSP)
    /* Run the following code for Cortex-M4 and Cortex-M7 */
 /*
 * Implementation:
 * There are 3 nested loop here:
 * Inner loop: calculate each output value with MAC instruction over an accumulator
 * Mid   loop: loop over different output channel
 * Outer loop: loop over different output (x, y)
 *
 */
    int16_t   i_out_y, i_out_x;
    int16_t   i_ker_y, i_ker_x;
    q7_t     *colBuffer = (q7_t *) bufferA;
    q7_t     *pBuffer = colBuffer;
    const q7_t *pBias = bias;
    q7_t     *pOut = Im_out;
    uint16_t  rowCnt;
    uint16_t  row_shift;
    /* do some checking here, basically ch_im_in == ch_im_out */
    if (ch_im_in != ch_im_out)
    {
        return ARM_MATH_SIZE_MISMATCH;
    }
    for (i_out_y = 0; i_out_y < dim_im_out_y; i_out_y++)
    {
        for (i_out_x = 0; i_out_x < dim_im_out_x; i_out_x++)
        {
            /* we first do im2col here */
            for (i_ker_y = i_out_y * stride_y - padding_y; i_ker_y < i_out_y * stride_y - padding_y + dim_kernel_y;
                 i_ker_y++)
            {
                for (i_ker_x = i_out_x * stride_x - padding_x; i_ker_x < i_out_x * stride_x - padding_x + dim_kernel_x;
                     i_ker_x++)
                {
                    if (i_ker_y < 0 || i_ker_y >= dim_im_in_y || i_ker_x < 0 || i_ker_x >= dim_im_in_x)
                    {
                        /* arm_fill_q7(0, pBuffer, ch_im_in); */
                        memset(pBuffer, 0, ch_im_in);
                    } else
                    {
                        /* arm_copy_q7((q7_t *) Im_in + (i_ker_y * dim_im_in_x + i_ker_x) * ch_im_in, pBuffer, ch_im_in); */
                        memcpy(pBuffer, (q7_t *) Im_in + (i_ker_y * dim_im_in_x + i_ker_x) * ch_im_in, ch_im_in);
                    }
                    pBuffer += ch_im_in;
                }
            }
            /* we will do the computation here for each channel */
            rowCnt = ch_im_out >> 2;
            row_shift = 0;
            pBias = bias;
            while (rowCnt)
            {
                q31_t     sum =  ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
                q31_t     sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
                q31_t     sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
                q31_t     sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
                uint16_t  colCnt = (dim_kernel_x * dim_kernel_y) >> 1;
                q7_t     *pB = colBuffer + row_shift;
                const q7_t *pA = wt + row_shift;
                row_shift += 4;
 #ifdef USE_INTRINSIC
 #ifndef ARM_MATH_BIG_ENDIAN
                while (colCnt)
                {
                    q31_t     inA1, inA2, inB1, inB2, opA, opB;
                    inB1 = *__SIMD32(pB);
                    pB += ch_im_in;
                    opB = *__SIMD32(pB);
                    pB += ch_im_in;
                    inB2 = __PKHTB(opB, inB1, 16);
                    inB1 = __PKHBT(inB1, opB, 16);
                    inA1 = *__SIMD32(pA);
                    pA += ch_im_in;
                    opB = *__SIMD32(pA);
                    pA += ch_im_in;
                    inA2 = __PKHTB(opB, inA1, 16);
                    inA1 = __PKHBT(inA1, opB, 16);
                    opA = __SXTB16(inA1);
                    opB = __SXTB16(inB1);
                    sum = __SMLAD(opA, opB, sum);
                    opA = __SXTB16(__ROR(inA1, 8));
                    opB = __SXTB16(__ROR(inB1, 8));
                    sum2 = __SMLAD(opA, opB, sum2);
                    opA = __SXTB16(inA2);
                    opB = __SXTB16(inB2);
                    sum3 = __SMLAD(opA, opB, sum3);
                    opA = __SXTB16(__ROR(inA2, 8));
                    opB = __SXTB16(__ROR(inB2, 8));
                    sum4 = __SMLAD(opA, opB, sum4);
                    colCnt--;
                }
 #else
                while (colCnt)
                {
                    q31_t     inA1, inA2, inB1, inB2, opA, opB;
                    inB1 = *__SIMD32(pB);
                    pB += ch_im_in;
                    opB = *__SIMD32(pB);
                    pB += ch_im_in;
                    inB2 = __PKHBT(opB, inB1, 16);
                    inB1 = __PKHTB(inB1, opB, 16);
                    inA1 = *__SIMD32(pA);
                    pA += ch_im_in;
                    opB = *__SIMD32(pA);
                    pA += ch_im_in;
                    inA2 = __PKHBT(opB, inA1, 16);
                    inA1 = __PKHTB(inA1, opB, 16);
                    opA = __SXTB16(inA1);
                    opB = __SXTB16(inB1);
                    sum2 = __SMLAD(opA, opB, sum2);
                    opA = __SXTB16(__ROR(inA1, 8));
                    opB = __SXTB16(__ROR(inB1, 8));
                    sum = __SMLAD(opA, opB, sum);
                    opA = __SXTB16(inA2);
                    opB = __SXTB16(inB2);
                    sum4 = __SMLAD(opA, opB, sum4);
                    opA = __SXTB16(__ROR(inA2, 8));
                    opB = __SXTB16(__ROR(inB2, 8));
                    sum3 = __SMLAD(opA, opB, sum3);
                    colCnt--;
                }
 #endif                          /* ARM_MATH_BIG_ENDIAN */
 #else
 #ifndef ARM_MATH_BIG_ENDIAN
                //  r0    r1    r2    r3    r4   r5
                // inA1, inA2, inB1, inB2, opA, opB
                asm volatile ("COL_LOOP:\n"
                              "ldr.w r2, [%[pB], #0]\n"
                              "add.w %[pB], %[pB], %[ch_im_in]\n"
                              "ldr.w r5, [%[pB], #0]\n"
                              "add.w %[pB], %[pB], %[ch_im_in]\n"
                              "pkhtb r3, r5, r2, ASR #16\n"
                              "pkhbt r2, r2, r5, LSL #16\n"
                              "ldr.w r0, [%[pA], #0]\n"
                              "add.w %[pA], %[pA], %[ch_im_in]\n"
                              "ldr.w r5, [%[pA], #0]\n"
                              "add.w %[pA], %[pA], %[ch_im_in]\n"
                              "pkhtb r1, r5, r0, ASR #16\n"
                              "pkhbt r0, r0, r5, LSL #16\n"
                              "sxtb16 r4, r0\n"
                              "sxtb16 r5, r2\n"
                              "smlad %[sum], r4, r5, %[sum]\n"
                              "mov.w r4, r0, ror #8\n"
                              "mov.w r5, r2, ror #8\n"
                              "sxtb16 r4, r4\n"
                              "sxtb16 r5, r5\n"
                              "smlad %[sum2], r4, r5, %[sum2]\n"
                              "sxtb16 r4, r1\n"
                              "sxtb16 r5, r3\n"
                              "smlad %[sum3], r4, r5, %[sum3]\n"
                              "mov.w r4, r1, ror #8\n"
                              "mov.w r5, r3, ror #8\n"
                              "sxtb16 r4, r4\n"
                              "sxtb16 r5, r5\n"
                              "smlad %[sum4], r4, r5, %[sum4]\n"
                              "subs %[colCnt], #1\n"
                              "bne COL_LOOP\n":[sum] "+r"(sum),[sum2] "+r"(sum2),[sum3] "+r"(sum3),
                              [sum4] "+r"(sum4),[pB] "+r"(pB),[pA] "+r"(pA):[colCnt] "r"(colCnt),
                              [ch_im_in] "r"(ch_im_in):"r0", "r1", "r2", "r3", "r4", "r5");
 #else
                //  r0    r1    r2    r3    r4   r5
                // inA1, inA2, inB1, inB2, opA, opB
                asm volatile ("COL_LOOP:\n"
                              "ldr.w r2, [%[pB], #0]\n"
                              "add.w %[pB], %[pB], %[ch_im_in]\n"
                              "ldr.w r5, [%[pB], #0]\n"
                              "add.w %[pB], %[pB], %[ch_im_in]\n"
                              "pkhbt r3, r5, r2, LSL #16\n"
                              "pkhtb r2, r2, r5, ASR #16\n"
                              "ldr.w r0, [%[pA], #0]\n"
                              "add.w %[pA], %[pA], %[ch_im_in]\n"
                              "ldr.w r5, [%[pA], #0]\n"
                              "add.w %[pA], %[pA], %[ch_im_in]\n"
                              "pkhbt r1, r5, r0, LSL #16\n"
                              "pkhtb r0, r0, r5, ASR #16\n"
                              "sxtb16 r4, r0\n"
                              "sxtb16 r5, r2\n"
                              "smlad %[sum2], r4, r5, %[sum2]\n"
                              "mov.w r4, r0, ror #8\n"
                              "mov.w r5, r2, ror #8\n"
                              "sxtb16 r4, r4\n"
                              "sxtb16 r5, r5\n"
                              "smlad %[sum], r4, r5, %[sum]\n"
                              "sxtb16 r4, r1\n"
                              "sxtb16 r5, r3\n"
                              "smlad %[sum4], r4, r5, %[sum4]\n"
                              "mov.w r4, r1, ror #8\n"
                              "mov.w r5, r3, ror #8\n"
                              "sxtb16 r4, r4\n"
                              "sxtb16 r5, r5\n"
                              "smlad %[sum3], r4, r5, %[sum3]\n"
                              "subs %[colCnt], #1\n"
                              "bne COL_LOOP\n":[sum] "+r"(sum),[sum2] "+r"(sum2),[sum3] "+r"(sum3),
                              [sum4] "+r"(sum4),[pB] "+r"(pB),[pA] "+r"(pA):[colCnt] "r"(colCnt),
                              [ch_im_in] "r"(ch_im_in):"r0", "r1", "r2", "r3", "r4", "r5");
 #endif                          /*ARM_MATH_BIG_ENDIAN */
 #endif                          /* USE_INTRINSIC */
                colCnt = (dim_kernel_x * dim_kernel_y) & 0x1;
                while (colCnt)
                {
                    union arm_nnword inA, inB;
                    inA.word = *__SIMD32(pA);
                    pA += ch_im_in;
                    inB.word = *__SIMD32(pB);
                    pB += ch_im_in;
                    sum += inA.bytes[0] * inB.bytes[0];
                    sum2 += inA.bytes[1] * inB.bytes[1];
                    sum3 += inA.bytes[2] * inB.bytes[2];
                    sum4 += inA.bytes[3] * inB.bytes[3];
                    colCnt--;
                }
                *pOut++ = (q7_t) __SSAT((sum >> out_shift), 8);
                *pOut++ = (q7_t) __SSAT((sum2 >> out_shift), 8);
                *pOut++ = (q7_t) __SSAT((sum3 >> out_shift), 8);
                *pOut++ = (q7_t) __SSAT((sum4 >> out_shift), 8);
                rowCnt--;
            }
            rowCnt = ch_im_out & 0x3;
            while (rowCnt)
            {
                q7_t     *pB = colBuffer + row_shift;
                const q7_t *pA = wt + row_shift;
                q31_t     sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
                uint16_t  colCnt = (dim_kernel_x * dim_kernel_y);
                row_shift += 1;
                while (colCnt)
                {
                    q7_t      A1 = *pA;
                    q7_t      B1 = *pB;
                    pA += ch_im_in;
                    pB += ch_im_in;
                    sum += A1 * B1;
                    colCnt--;
                }
                *pOut++ = (q7_t) __SSAT((sum >> out_shift), 8);
                rowCnt--;
            }
            // clear counter and pointers
            pBuffer = colBuffer;
        }
    }
 #else
    /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */
    int       i_out_y, i_out_x, i_ch_out;
    int       i_ker_y, i_ker_x; 
    /* do some checking here, basically ch_im_in == ch_im_out */
    if (ch_im_in != ch_im_out)
    {
        return ARM_MATH_SIZE_MISMATCH;
    }
    for (i_out_y = 0; i_out_y < dim_im_out_y; i_out_y++)
    {
        for (i_out_x = 0; i_out_x < dim_im_out_x; i_out_x++)
        {
            for (i_ch_out = 0; i_ch_out < ch_im_out; i_ch_out++)
            {
                // for each output 
                int       conv_out = ((q31_t)(bias[i_ch_out]) << bias_shift) + NN_ROUND(out_shift);
                for (i_ker_y = 0; i_ker_y < dim_kernel_y; i_ker_y++)
                {
                    for (i_ker_x = 0; i_ker_x < dim_kernel_x; i_ker_x++)
                    {
                        int       in_row = stride_y * i_out_y + i_ker_y - padding_y;
                        int       in_col = stride_x * i_out_x + i_ker_x - padding_x;
                        if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in_y && in_col < dim_im_in_x)
                        {
                            conv_out += Im_in[(in_row * dim_im_in_x + in_col) * ch_im_in + i_ch_out] *                        
                                wt[(i_ker_y * dim_kernel_x + i_ker_x) * ch_im_out + i_ch_out];
                        }
                    }
                }
                Im_out[(i_out_y * dim_im_out_x + i_out_x) * ch_im_out + i_ch_out] =
                    (q7_t) __SSAT((conv_out >> out_shift), 8);
            }
        }
    }
 #endif                          /* ARM_MATH_DSP */
    /* Return to application */
    return ARM_MATH_SUCCESS;
 }
 /**
 * @} end of NNConv group
 */
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_q7_q15.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_q7_q15.c
@@ -1,187 +0,0 @@
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /* ----------------------------------------------------------------------
 * Project:      CMSIS NN Library
 * Title:        arm_nn_mat_mult_kernel_q7_q15.c
 * Description:  Matrix-multiplication function for convolution
 *
 * $Date:        17. January 2018
 * $Revision:    V.1.0.0
 *
 * Target Processor:  Cortex-M cores
 * -------------------------------------------------------------------- */
 #include "arm_math.h"
 #include "arm_nnfunctions.h"
  /**
   * @brief Matrix-multiplication function for convolution
   * @param[in]       pA          pointer to operand A
   * @param[in]       pInBuffer   pointer to operand B, always conssists of 2 vectors
   * @param[in]       ch_im_out   numRow of A
   * @param[in]       numCol_A    numCol of A
   * @param[in]       bias_shift  amount of left-shift for bias
   * @param[in]       out_shift   amount of right-shift for output
   * @param[in]       bias        the bias
   * @param[in,out]   pOut        pointer to output
   * @return     The function returns the incremented output pointer
   *
   * @details
   *
   * This function does the matrix multiplication with weight matrix
   * and 2 columns from im2col. 
   */
 q7_t     *arm_nn_mat_mult_kernel_q7_q15(const q7_t * pA,
                                        const q15_t * pInBuffer,
                                        const uint16_t ch_im_out,
                                        const uint16_t numCol_A,
                                        const uint16_t bias_shift,
                                        const uint16_t out_shift, 
                                        const q7_t * bias, 
                                        q7_t * pOut)
 {
 #if defined (ARM_MATH_DSP)
    /* set up the second output pointers */
    q7_t     *pOut2 = pOut + ch_im_out;
    const q7_t *pBias = bias;
    uint16_t  rowCnt = ch_im_out >> 1;
    /* this loop over rows in A */
    while (rowCnt)
    {
        /* setup pointers for B */
        const q15_t *pB = pInBuffer;
        const q15_t *pB2 = pB + numCol_A;
        /* align the second pointer for A */
        const q7_t *pA2 = pA + numCol_A;
        /* init the sum with bias */
        q31_t     sum =  ((q31_t)(*pBias) << bias_shift) + NN_ROUND(out_shift);
        q31_t     sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
        q31_t     sum3 = ((q31_t)(*pBias) << bias_shift) + NN_ROUND(out_shift);
        q31_t     sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
        uint16_t  colCnt = numCol_A >> 2;
        /* accumulate over the vector */
        while (colCnt)
        {
            q31_t     inA11, inA12, inA21, inA22;
            q31_t     inB1 = *__SIMD32(pB)++;
            q31_t     inB2 = *__SIMD32(pB2)++;
            pA = (q7_t *) read_and_pad((void *)pA, &inA11, &inA12);
            pA2 = (q7_t *) read_and_pad((void *)pA2, &inA21, &inA22);
            sum = __SMLAD(inA11, inB1, sum);
            sum2 = __SMLAD(inA11, inB2, sum2);
            sum3 = __SMLAD(inA21, inB1, sum3);
            sum4 = __SMLAD(inA21, inB2, sum4);
            inB1 = *__SIMD32(pB)++;
            inB2 = *__SIMD32(pB2)++;
            sum = __SMLAD(inA12, inB1, sum);
            sum2 = __SMLAD(inA12, inB2, sum2);
            sum3 = __SMLAD(inA22, inB1, sum3);
            sum4 = __SMLAD(inA22, inB2, sum4);
            colCnt--;
        }                       /* while over colCnt */
        colCnt = numCol_A & 0x3;
        while (colCnt)
        {
            q7_t      inA1 = *pA++;
            q15_t     inB1 = *pB++;
            q7_t      inA2 = *pA2++;
            q15_t     inB2 = *pB2++;
            sum += inA1 * inB1;
            sum2 += inA1 * inB2;
            sum3 += inA2 * inB1;
            sum4 += inA2 * inB2;
            colCnt--;
        }                       /* while over colCnt */
        *pOut++ = (q7_t) __SSAT((sum >> out_shift), 8);
        *pOut++ = (q7_t) __SSAT((sum3 >> out_shift), 8);
        *pOut2++ = (q7_t) __SSAT((sum2 >> out_shift), 8);
        *pOut2++ = (q7_t) __SSAT((sum4 >> out_shift), 8);
        /* skip the row computed with A2 */
        pA += numCol_A;
        rowCnt--;
    }                           /* for over ch_im_out */
    /* compute left-over row if any */
    if (ch_im_out & 0x1)
    {
        /* setup pointers for B */
        const q15_t *pB = pInBuffer;
        const q15_t *pB2 = pB + numCol_A;
        /* load the bias */
        q31_t     sum = ((q31_t)(*pBias) << bias_shift) + NN_ROUND(out_shift);
        q31_t     sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
        uint16_t  colCnt = numCol_A >> 2;
        while (colCnt)
        {
            q31_t     inA11, inA12;
            q31_t     inB1 = *__SIMD32(pB)++;
            q31_t     inB2 = *__SIMD32(pB2)++;
            pA = (q7_t *) read_and_pad((void *)pA, &inA11, &inA12);
            sum = __SMLAD(inA11, inB1, sum);
            sum2 = __SMLAD(inA11, inB2, sum2);
            inB1 = *__SIMD32(pB)++;
            inB2 = *__SIMD32(pB2)++;
            sum = __SMLAD(inA12, inB1, sum);
            sum2 = __SMLAD(inA12, inB2, sum2);
            colCnt--;
        }
        colCnt = numCol_A & 0x3;
        while (colCnt)
        {
            q7_t      inA1 = *pA++;
            q15_t     inB1 = *pB++;
            q15_t     inB2 = *pB2++;
            sum += inA1 * inB1;
            sum2 += inA1 * inB2;
            colCnt--;
        }
        *pOut++ = (q7_t) __SSAT((sum >> out_shift), 8);
        *pOut2++ = (q7_t) __SSAT((sum2 >> out_shift), 8);
    }
    pOut += ch_im_out;
    /* return the new output pointer with offset */
    return pOut;
 #else
    /* To be completed */
    return NULL;
 #endif                          /* ARM_MATH_DSP */
 }
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_q7_q15_reordered.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_q7_q15_reordered.c
@@ -1,138 +0,0 @@
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /* ----------------------------------------------------------------------
 * Project:      CMSIS NN Library
 * Title:        arm_nn_mat_mult_kernel_q7_q15_reordered.c
 * Description:  Matrix-multiplication function for convolution with reordered columns
 *
 * $Date:        17. January 2018
 * $Revision:    V.1.0.0
 *
 * Target Processor:  Cortex-M cores
 * -------------------------------------------------------------------- */
 #include "arm_nnfunctions.h"
 #include "arm_math.h"
  /**
   * @brief Matrix-multiplication function for convolution with reordered columns
   * @param[in]       pA          pointer to operand A
   * @param[in]       pInBuffer   pointer to operand B, always conssists of 2 vectors
   * @param[in]       ch_im_out   numRow of A
   * @param[in]       numCol_A    numCol of A
   * @param[in]       bias_shift  amount of left-shift for bias
   * @param[in]       out_shift   amount of right-shift for output
   * @param[in]       bias        the bias
   * @param[in,out]   pOut        pointer to output
   * @return     The function returns the incremented output pointer
   *
   * @details
   *
   * This function assumes that data in pInBuffer are reordered
   */
 q7_t     *arm_nn_mat_mult_kernel_q7_q15_reordered(const q7_t * pA,
                                                  const q15_t * pInBuffer,
                                                  const uint16_t ch_im_out,
                                                  const uint16_t numCol_A,
                                                  const uint16_t bias_shift,
                                                  const uint16_t out_shift, 
                                                  const q7_t * bias, 
                                                  q7_t * pOut)
 {
 #if defined (ARM_MATH_DSP)
    /* set up the second output pointers */
    q7_t     *pOut2 = pOut + ch_im_out;
    int       i;
    /* this loop over rows in A */
    for (i = 0; i < ch_im_out; i += 2)
    {
        /* setup pointers for B */
        const q15_t *pB = pInBuffer;
        const q15_t *pB2 = pB + numCol_A;
        /* align the second pointer for A */
        const q7_t *pA2 = pA + numCol_A;
        /* init the sum with bias */
        q31_t     sum =  ((q31_t)(bias[i]) << bias_shift) + NN_ROUND(out_shift);
        q31_t     sum2 = ((q31_t)(bias[i]) << bias_shift) + NN_ROUND(out_shift);
        q31_t     sum3 = ((q31_t)(bias[i + 1]) << bias_shift) + NN_ROUND(out_shift);
        q31_t     sum4 = ((q31_t)(bias[i + 1]) << bias_shift) + NN_ROUND(out_shift);
        uint16_t  colCnt = numCol_A >> 2;
        /* accumulate over the vector */
        while (colCnt)
        {
            q31_t     inA11, inA12, inA21, inA22;
            q31_t     inB1 = *__SIMD32(pB)++;
            q31_t     inB2 = *__SIMD32(pB2)++;
            pA = (q7_t *) read_and_pad_reordered((void *)pA, &inA11, &inA12);
            pA2 = (q7_t *) read_and_pad_reordered((void *)pA2, &inA21, &inA22);
            sum = __SMLAD(inA11, inB1, sum);
            sum2 = __SMLAD(inA11, inB2, sum2);
            sum3 = __SMLAD(inA21, inB1, sum3);
            sum4 = __SMLAD(inA21, inB2, sum4);
            inB1 = *__SIMD32(pB)++;
            inB2 = *__SIMD32(pB2)++;
            sum = __SMLAD(inA12, inB1, sum);
            sum2 = __SMLAD(inA12, inB2, sum2);
            sum3 = __SMLAD(inA22, inB1, sum3);
            sum4 = __SMLAD(inA22, inB2, sum4);
            colCnt--;
        }                       /* while over colCnt */
        colCnt = numCol_A & 0x3;
        while (colCnt)
        {
            q7_t      inA1 = *pA++;
            q15_t     inB1 = *pB++;
            q7_t      inA2 = *pA2++;
            q15_t     inB2 = *pB2++;
            sum += inA1 * inB1;
            sum2 += inA1 * inB2;
            sum3 += inA2 * inB1;
            sum4 += inA2 * inB2;
            colCnt--;
        }                       /* while over colCnt */
        *pOut++ = (q7_t) __SSAT((sum >> out_shift), 8);
        *pOut++ = (q7_t) __SSAT((sum3 >> out_shift), 8);
        *pOut2++ = (q7_t) __SSAT((sum2 >> out_shift), 8);
        *pOut2++ = (q7_t) __SSAT((sum4 >> out_shift), 8);
        /* skip the row computed with A2 */
        pA += numCol_A;
    }                           /* for over ch_im_out */
    pOut += ch_im_out;
    /* return the new output pointer with offset */
    return pOut;
 #else
    /* To be completed */
    return NULL;
 #endif                          /* ARM_MATH_DSP */
 }
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15.c
@@ -1,199 +0,0 @@
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /* ----------------------------------------------------------------------
 * Project:      CMSIS NN Library
 * Title:        arm_fully_connected_mat_q7_vec_q15.c
 * Description:  Mixed Q15-Q7 fully-connected layer function
 *
 * $Date:        17. January 2018
 * $Revision:    V.1.0.0
 *
 * Target Processor:  Cortex-M cores
 *
 * -------------------------------------------------------------------- */
 #include "arm_math.h"
 #include "arm_nnfunctions.h"
 /**
 *  @ingroup groupNN
 */
 /**
 * @addtogroup FC
 * @{
 */
  /**
   * @brief Mixed Q15-Q7 fully-connected layer function
   * @param[in]       pV          pointer to input vector
   * @param[in]       pM          pointer to matrix weights
   * @param[in]       dim_vec     length of the vector
   * @param[in]       num_of_rows number of rows in weight matrix
   * @param[in]       bias_shift  amount of left-shift for bias
   * @param[in]       out_shift   amount of right-shift for output
   * @param[in]       bias        pointer to bias
   * @param[in,out]   pOut        pointer to output vector
   * @param[in,out]   vec_buffer  pointer to buffer space for input
   * @return     The function returns <code>ARM_MATH_SUCCESS</code>
   *
   * @details
   *
   * <b>Buffer size:</b>
   *
   * vec_buffer size: 0
   *
   *  Q7_Q15 version of the fully connected layer
   *
   *  Weights are in q7_t and Activations are in q15_t
   *
   */
 arm_status
 arm_fully_connected_mat_q7_vec_q15(const q15_t * pV,
                                   const q7_t * pM,
                                   const uint16_t dim_vec,
                                   const uint16_t num_of_rows,
                                   const uint16_t bias_shift,
                                   const uint16_t out_shift, 
                                   const q7_t * bias, 
                                   q15_t * pOut, 
                                   q15_t * vec_buffer)
 {
 #if defined (ARM_MATH_DSP)
    /* Run the following code for Cortex-M4 and Cortex-M7 */
    const q7_t *pB = pM;
    const q7_t *pB2;
    q15_t    *pO = pOut;
    const q7_t *pBias = bias;
    const q15_t *pA = pV;
    uint16_t  rowCnt = num_of_rows >> 1;
    while (rowCnt)
    {
        q31_t     sum =  ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
        q31_t     sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
        uint16_t  colCnt = dim_vec >> 2;
        pA = pV;
        pB2 = pB + dim_vec;
        while (colCnt)
        {
            q31_t     inV, inM11, inM12, inM21, inM22;
            pB = (q7_t *) read_and_pad((void *)pB, &inM11, &inM12);
            pB2 = (q7_t *) read_and_pad((void *)pB2, &inM21, &inM22);
            inV = *__SIMD32(pA)++;
            sum = __SMLAD(inV, inM11, sum);
            sum2 = __SMLAD(inV, inM21, sum2);
            inV = *__SIMD32(pA)++;
            sum = __SMLAD(inV, inM12, sum);
            sum2 = __SMLAD(inV, inM22, sum2);
            colCnt--;
        }
        colCnt = dim_vec & 0x3;
        while (colCnt)
        {
            q15_t     inV = *pA++;
            q7_t      inM = *pB++;
            q7_t      inM2 = *pB2++;
            sum += inV * inM;
            sum2 += inV * inM2;
            colCnt--;
        }                       /* while over colCnt */
        *pO++ = (q15_t) (__SSAT((sum >> out_shift), 16));
        *pO++ = (q15_t) (__SSAT((sum2 >> out_shift), 16));
        /*adjust the pointers and counters */
        pB += dim_vec;
        rowCnt--;
    }
    /* left-over part of the rows */
    rowCnt = num_of_rows & 0x1;
    while (rowCnt)
    {
        q31_t     sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
        uint16_t  colCnt = dim_vec >> 2;
        pA = pV;
        while (colCnt)
        {
            q31_t     inV1, inV2, inM11, inM12;
            pB = (q7_t *) read_and_pad((void *)pB, &inM11, &inM12);
            inV1 = *__SIMD32(pA)++;
            sum = __SMLAD(inV1, inM11, sum);
            inV2 = *__SIMD32(pA)++;
            sum = __SMLAD(inV2, inM12, sum);
            colCnt--;
        }
        /* left-over of the vector */
        colCnt = dim_vec & 0x3;
        while (colCnt)
        {
            q15_t     inV = *pA++;
            q7_t      inM = *pB++;
            sum += inV * inM;
            colCnt--;
        }
        *pO++ = (q15_t) (__SSAT((sum >> out_shift), 16));
        rowCnt--;
    }
 #else
    int       i, j;
    /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */
    for (i = 0; i < num_of_rows; i++)
    {
        int       ip_out = ((q31_t)(bias[i]) << bias_shift) + NN_ROUND(out_shift);
        for (j = 0; j < dim_vec; j++)
        {
            ip_out += pV[j] * pM[i * dim_vec + j];
        }
        pOut[i] = (q15_t) __SSAT((ip_out >> out_shift), 16);
    }
 #endif                          /* ARM_MATH_DSP */
    /* Return to ARM_MATH_SUCCESS */
    return (ARM_MATH_SUCCESS);
 }
 /**
 * @} end of FC group
 */
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15_opt.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15_opt.c
@@ -1,403 +0,0 @@
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /* ----------------------------------------------------------------------
 * Project:      CMSIS NN Library
 * Title:        arm_fully_connected_mat_q7_vec_q15_opt.c
 * Description:  Mixed Q15-Q7 opt fully-connected layer function
 *
 * $Date:        17. January 2018
 * $Revision:    V.1.0.0
 *
 * Target Processor:  Cortex-M cores
 *
 * -------------------------------------------------------------------- */
 #include "arm_math.h"
 #include "arm_nnfunctions.h"
 /**
 *  @ingroup groupNN
 */
 /**
 * @addtogroup FC
 * @{
 */
  /**
   * @brief Mixed Q15-Q7 opt fully-connected layer function
   * @param[in]       pV          pointer to input vector
   * @param[in]       pM          pointer to matrix weights
   * @param[in]       dim_vec     length of the vector
   * @param[in]       num_of_rows number of rows in weight matrix
   * @param[in]       bias_shift  amount of left-shift for bias
   * @param[in]       out_shift   amount of right-shift for output
   * @param[in]       bias        pointer to bias
   * @param[in,out]   pOut        pointer to output vector
   * @param[in,out]   vec_buffer  pointer to buffer space for input
   * @return     The function returns <code>ARM_MATH_SUCCESS</code>
   *
   * @details
   *
   * <b>Buffer size:</b>
   *
   * vec_buffer size: 0
   *
   *  Q7_Q15 version of the fully connected layer
   *
   *  Weights are in q7_t and Activations are in q15_t
   *
   *  Limitation: x4 version requires weight reordering to work
   *
   *  Here we use only one pointer to read 4 rows in the weight
   *  matrix. So if the original q7_t matrix looks like this:
   *
   *  | a11 | a12 | a13 | a14 | a15 | a16 | a17 |
   *
   *  | a21 | a22 | a23 | a24 | a25 | a26 | a27 |
   *
   *  | a31 | a32 | a33 | a34 | a35 | a36 | a37 |
   *
   *  | a41 | a42 | a43 | a44 | a45 | a46 | a47 |
   *
   *  | a51 | a52 | a53 | a54 | a55 | a56 | a57 |
   *
   *  | a61 | a62 | a63 | a64 | a65 | a66 | a67 |
   *
   *  We operates on multiple-of-4 rows, so the first four rows becomes
   *
   *  | a11 | a21 | a12 | a22 | a31 | a41 | a32 | a42 |
   *
   *  | a13 | a23 | a14 | a24 | a33 | a43 | a34 | a44 |
   *
   *  | a15 | a25 | a16 | a26 | a35 | a45 | a36 | a46 |
   *
   *  The column left over will be in-order.
   *  which is:
   *  | a17 | a27 | a37 | a47 |
   *
   *  For the left-over rows, we do 1x1 computation, so the data remains
   *  as its original order. 
   *
   *  So the stored weight matrix looks like this:
   *
   *  | a11 | a21 | a12 | a22 | a31 | a41 |
   *
   *  | a32 | a42 | a13 | a23 | a14 | a24 |
   *
   *  | a33 | a43 | a34 | a44 | a15 | a25 |
   *
   *  | a16 | a26 | a35 | a45 | a36 | a46 |
   *
   *  | a17 | a27 | a37 | a47 | a51 | a52 |
   *
   *  | a53 | a54 | a55 | a56 | a57 | a61 |
   *
   *  | a62 | a63 | a64 | a65 | a66 | a67 |
   *
   */
 arm_status
 arm_fully_connected_mat_q7_vec_q15_opt(const q15_t * pV,
                                       const q7_t * pM,
                                       const uint16_t dim_vec,
                                       const uint16_t num_of_rows,
                                       const uint16_t bias_shift,
                                       const uint16_t out_shift, const q7_t * bias, q15_t * pOut, q15_t * vec_buffer)
 {
 #if defined (ARM_MATH_DSP)
    /* Run the following code for Cortex-M4 and Cortex-M7 */
    const q7_t *pB = pM;
    q15_t    *pO = pOut;
    const q7_t *pBias = bias;
    const q15_t *pA = pV;
    uint16_t  rowCnt = num_of_rows >> 2;
    while (rowCnt)
    {
        q31_t     sum =  ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
        q31_t     sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
        q31_t     sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
        q31_t     sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
        uint16_t  colCnt = dim_vec >> 1;
        pA = pV;
 #ifdef USE_INTRINSIC
 #ifndef ARM_MATH_BIG_ENDIAN
        while (colCnt)
        {
            q31_t     inM11, inM12, inM13, inM14;
            q31_t     inV;
            inV = *__SIMD32(pA)++;
            inM11 = *__SIMD32(pB)++;
            inM12 = __SXTB16(__ROR(inM11, 8));
            inM11 = __SXTB16(inM11);
            sum = __SMLAD(inM11, inV, sum);
            sum2 = __SMLAD(inM12, inV, sum2);
            inM13 = *__SIMD32(pB)++;
            inM14 = __SXTB16(__ROR(inM13, 8));
            inM13 = __SXTB16(inM13);
            sum3 = __SMLAD(inM13, inV, sum3);
            sum4 = __SMLAD(inM14, inV, sum4);
            colCnt--;
        }
 #else
        while (colCnt)
        {
            q31_t     inM11, inM12, inM13, inM14;
            q31_t     inV;
            inV = *__SIMD32(pA)++;
            inM11 = *__SIMD32(pB)++;
            inM12 = __SXTB16(__ROR(inM11, 8));
            inM11 = __SXTB16(inM11);
            sum = __SMLAD(inM12, inV, sum);
            sum2 = __SMLAD(inM11, inV, sum2);
            inM13 = *__SIMD32(pB)++;
            inM14 = __SXTB16(__ROR(inM13, 8));
            inM13 = __SXTB16(inM13);
            sum3 = __SMLAD(inM14, inV, sum3);
            sum4 = __SMLAD(inM13, inV, sum4);
            colCnt--;
        }
 #endif                          /* ARM_MATH_BIG_ENDIAN */
 #else
        /*
         * register needed:
         * loop counter: colCnt
         * accumulators: sum, sum2, sum3, sum4
         * pointers: pB, pA
         * weight data: inM11, inM12, inM13, inM14
         * activation data: inV
         */
 #ifndef ARM_MATH_BIG_ENDIAN
        asm volatile ("COL_LOOP_%=:\n"
                      "ldr.w r4, [%[pA]], #4\n"
                      "ldr.w r1, [%[pB]], #8\n"
                      "mov.w r0, r1, ror #8\n"
                      "sxtb16 r0, r0\n"
                      "sxtb16 r1, r1\n"
                      "smlad %[sum], r4, r1, %[sum]\n"
                      "smlad %[sum2], r4, r0, %[sum2]\n"
                      "ldr.w r3, [%[pB], #-4]\n"
                      "mov.w r2, r3, ror #8\n"
                      "sxtb16 r2, r2\n"
                      "sxtb16 r3, r3\n"
                      "smlad %[sum3], r4, r3, %[sum3]\n"
                      "smlad %[sum4], r4, r2, %[sum4]\n"
                      "subs %[colCnt], #1\n"
                      "bne COL_LOOP_%=\n":[sum] "+r"(sum),
                      [sum2] "+r"(sum2),[sum3] "+r"(sum3),
                      [sum4] "+r"(sum4),[pB] "+r"(pB),[pA] "+r"(pA):[colCnt] "r"(colCnt):"r0", "r1", "r2", "r3", "r4");
 #else
        asm volatile ("COL_LOOP_%=:\n"
                      "ldr.w r4, [%[pA]], #4\n"
                      "ldr.w r1, [%[pB]], #8\n"
                      "mov.w r0, r1, ror #8\n"
                      "sxtb16 r0, r0\n"
                      "sxtb16 r1, r1\n"
                      "smlad %[sum], r4, r0, %[sum]\n"
                      "smlad %[sum2], r4, r1, %[sum2]\n"
                      "ldr.w r3, [%[pB], #-4]\n"
                      "mov.w r2, r3, ror #8\n"
                      "sxtb16 r2, r2\n"
                      "sxtb16 r3, r3\n"
                      "smlad %[sum3], r4, r2, %[sum3]\n"
                      "smlad %[sum4], r4, r3, %[sum4]\n"
                      "subs %[colCnt], #1\n"
                      "bne COL_LOOP_%=\n":[sum] "+r"(sum),
                      [sum2] "+r"(sum2),[sum3] "+r"(sum3),
                      [sum4] "+r"(sum4),[pB] "+r"(pB),[pA] "+r"(pA):[colCnt] "r"(colCnt):"r0", "r1", "r2", "r3", "r4");
 #endif                          /* ARM_MATH_BIG_ENDIAN */
 #endif                          /* USE_INTRINSIC */
        colCnt = dim_vec & 0x1;
        while (colCnt)
        {
            q15_t     inV = *pA++;
            q7_t      inM = *pB++;
            q7_t      inM2 = *pB++;
            q7_t      inM3 = *pB++;
            q7_t      inM4 = *pB++;
            sum += inV * inM;
            sum2 += inV * inM2;
            sum3 += inV * inM3;
            sum4 += inV * inM4;
            colCnt--;
        }                       /* while over colCnt */
        *pO++ = (q15_t) (__SSAT((sum >> out_shift), 16));
        *pO++ = (q15_t) (__SSAT((sum2 >> out_shift), 16));
        *pO++ = (q15_t) (__SSAT((sum3 >> out_shift), 16));
        *pO++ = (q15_t) (__SSAT((sum4 >> out_shift), 16));
        /* adjust the pointers and counters */
        rowCnt--;
    }
    /* left-over part of the rows */
    rowCnt = num_of_rows & 0x3;
    while (rowCnt)
    {
        q31_t     sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
        uint16_t  colCnt = dim_vec >> 2;
        pA = pV;
        while (colCnt)
        {
            q31_t     inV1, inV2, inM11, inM12;
            pB = (q7_t *) read_and_pad((void *)pB, &inM11, &inM12);
            inV1 = *__SIMD32(pA)++;
            sum = __SMLAD(inV1, inM11, sum);
            inV2 = *__SIMD32(pA)++;
            sum = __SMLAD(inV2, inM12, sum);
            colCnt--;
        }
        /* left-over of the vector */
        colCnt = dim_vec & 0x3;
        while (colCnt)
        {
            q15_t     inV = *pA++;
            q7_t      inM = *pB++;
            sum += inV * inM;
            colCnt--;
        }
        *pO++ = (q15_t) (__SSAT((sum >> out_shift), 16));
        rowCnt--;
    }
 #else
    /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */
    uint16_t  rowCnt = num_of_rows >> 2;
    const q7_t *pB = pM;
    const q15_t *pA;
    q15_t    *pO = pOut;
    const q7_t *pBias = bias;
    while (rowCnt)
    {
        q31_t     sum =  ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
        q31_t     sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
        q31_t     sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); 
        q31_t     sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); 
        uint16_t  colCnt = dim_vec >> 1;
        pA = pV;
        while (colCnt)
        {
            q15_t     inA1 = *pA++;
            q15_t     inA2 = *pA++;
            q7_t      inB1 = *pB++;
            q7_t      inB3 = *pB++;
            q7_t      inB2 = *pB++;
            q7_t      inB4 = *pB++;
            sum += inA1 * inB1 + inA2 * inB2;
            sum2 += inA1 * inB3 + inA2 * inB4;
            inB1 = *pB++;
            inB3 = *pB++;
            inB2 = *pB++;
            inB4 = *pB++;
            sum3 += inA1 * inB1 + inA2 * inB2;
            sum4 += inA1 * inB3 + inA2 * inB4;
            colCnt--;
        }
        colCnt = dim_vec & 0x1;
        while (colCnt)
        {
            q15_t     inA = *pA++;
            q7_t      inB = *pB++;
            sum += inA * inB;
            inB = *pB++;
            sum2 += inA * inB;
            inB = *pB++;
            sum3 += inA * inB;
            inB = *pB++;
            sum4 += inA * inB;
            colCnt--;
        }
        *pO++ = (q15_t) __SSAT((sum >> out_shift), 16);
        *pO++ = (q15_t) __SSAT((sum2 >> out_shift), 16);
        *pO++ = (q15_t) __SSAT((sum3 >> out_shift), 16);
        *pO++ = (q15_t) __SSAT((sum4 >> out_shift), 16);
        rowCnt--;
    }
    rowCnt = num_of_rows & 0x3;
    while (rowCnt)
    {
        int       ip_out = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
        int       j;
        pA = pV;
        for (j = 0; j < dim_vec; j++)
        {
            q15_t     inA = *pA++;
            q7_t      inB = *pB++;
            ip_out += inA * inB;
        }
        *pO++ = (q15_t) __SSAT((ip_out >> out_shift), 16);
        rowCnt--;
    }
 #endif                          /* ARM_MATH_DSP */
    /* Return to ARM_MATH_SUCCESS */
    return (ARM_MATH_SUCCESS);
 }
 /**
 * @} end of FC group
 */
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15.c
@@ -1,193 +0,0 @@
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /* ----------------------------------------------------------------------
 * Project:      CMSIS NN Library
 * Title:        arm_fully_connected_q15.c
 * Description:  Q15 basic fully-connected layer function
 *
 * $Date:        17. January 2018
 * $Revision:    V.1.0.0
 *
 * Target Processor:  Cortex-M cores
 *
 * -------------------------------------------------------------------- */
 #include "arm_math.h"
 #include "arm_nnfunctions.h"
 /**
 *  @ingroup groupNN
 */
 /**
 * @addtogroup FC
 * @{
 */
  /**
   * @brief Q15 opt fully-connected layer function
   * @param[in]       pV          pointer to input vector
   * @param[in]       pM          pointer to matrix weights
   * @param[in]       dim_vec     length of the vector
   * @param[in]       num_of_rows number of rows in weight matrix
   * @param[in]       bias_shift  amount of left-shift for bias
   * @param[in]       out_shift   amount of right-shift for output
   * @param[in]       bias        pointer to bias
   * @param[in,out]   pOut        pointer to output vector
   * @param[in,out]   vec_buffer  pointer to buffer space for input
   * @return     The function returns <code>ARM_MATH_SUCCESS</code>
   *
   *
   * @details
   *
   * <b>Buffer size:</b>
   *
   * vec_buffer size: 0
   *
   */
 arm_status
 arm_fully_connected_q15(const q15_t * pV,
                        const q15_t * pM,
                        const uint16_t dim_vec,
                        const uint16_t num_of_rows,
                        const uint16_t bias_shift,
                        const uint16_t out_shift, 
                        const q15_t * bias, 
                        q15_t * pOut,
                        q15_t * vec_buffer)
 {
 #if defined (ARM_MATH_DSP)
    /* Run the following code for Cortex-M4 and Cortex-M7 */
    const q15_t *pB = pM;
    const q15_t *pB2 = pB + dim_vec;
    q15_t    *pO = pOut;
    const q15_t    *pA;
    const q15_t    *pBias = bias;
    uint16_t rowCnt = num_of_rows >> 1;
    /* this loop loops over different output */
    while (rowCnt) {
        q31_t     sum =  ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
        q31_t     sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
        uint16_t  colCnt = dim_vec >> 2;
        pA = pV;
        pB2 = pB + dim_vec;
        while (colCnt)
        {
            q31_t     inV1, inM1, inM2;
            inV1 = *__SIMD32(pA)++;
            inM1 = *__SIMD32(pB)++;
            sum = __SMLAD(inV1, inM1, sum);
            inM2 = *__SIMD32(pB2)++;
            sum2 = __SMLAD(inV1, inM2, sum2);
            inV1 = *__SIMD32(pA)++;
            inM1 = *__SIMD32(pB)++;
            sum = __SMLAD(inV1, inM1, sum);
            inM2 = *__SIMD32(pB2)++;
            sum2 = __SMLAD(inV1, inM2, sum2);
            colCnt--;
        }
        colCnt = dim_vec & 0x3;
        while (colCnt)
        {
            q15_t     inV = *pA++;
            q15_t     inM = *pB++;
            q15_t     inM2 = *pB2++;
            sum += inV * inM;
            sum2 += inV * inM2;
            colCnt--;
        }                       /* while over colCnt */
        *pO++ =  (q15_t) (__SSAT((sum >> out_shift), 16));
        *pO++ = (q15_t) (__SSAT((sum2>> out_shift), 16));
        /* adjust the pointers and counters */
        pB = pB + dim_vec;
        rowCnt --;
    }
    rowCnt = num_of_rows & 0x1;
    while (rowCnt) {
        q31_t     sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
        uint16_t  colCnt = dim_vec >> 2;
        pA = pV;
        while (colCnt) {
            q31_t     inV1, inM1;
            inV1 = *__SIMD32(pA)++;
            inM1 = *__SIMD32(pB)++;
            sum = __SMLAD(inV1, inM1, sum);
            inV1 = *__SIMD32(pA)++;
            inM1 = *__SIMD32(pB)++;
            sum = __SMLAD(inV1, inM1, sum);
            colCnt--;
 	}
 	/* left-over of the vector */
 	colCnt = dim_vec & 0x3;
 	while(colCnt) {
            q15_t     inV = *pA++;
            q15_t     inM = *pB++;
            sum += inV * inM;
            colCnt--;
 	}
        *pO++ =  (q15_t) (__SSAT((sum >> out_shift), 16));
        rowCnt --;
    }
 #else
    int       i, j;
    /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */
    for (i = 0; i < num_of_rows; i++)
    {
        int       ip_out = ((q31_t)(bias[i]) << bias_shift) + NN_ROUND(out_shift);
        for (j = 0; j < dim_vec; j++)
        {
            ip_out += pV[j] * pM[i * dim_vec + j];
        }
        pOut[i] = (q15_t) __SSAT((ip_out >> out_shift), 16);
    }
 #endif                          /* ARM_MATH_DSP */
    /* Return to application */
    return (ARM_MATH_SUCCESS);
 }
 /**
 * @} end of FC group
 */
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15_opt.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15_opt.c
@@ -1,332 +0,0 @@
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /* ----------------------------------------------------------------------
 * Project:      CMSIS NN Library
 * Title:        arm_fully_connected_q15_opt.c
 * Description:  Q15 opt fully-connected layer function
 *
 * $Date:        17. January 2018
 * $Revision:    V.1.0.0
 *
 * Target Processor:  Cortex-M cores
 *
 * -------------------------------------------------------------------- */
 #include "arm_math.h"
 #include "arm_nnfunctions.h"
 /**
 *  @ingroup groupNN
 */
 /**
 * @addtogroup FC
 * @{
 */
  /**
   * @brief Q15 opt fully-connected layer function
   * @param[in]       pV          pointer to input vector
   * @param[in]       pM          pointer to matrix weights
   * @param[in]       dim_vec     length of the vector
   * @param[in]       num_of_rows number of rows in weight matrix
   * @param[in]       bias_shift  amount of left-shift for bias
   * @param[in]       out_shift   amount of right-shift for output
   * @param[in]       bias        pointer to bias
   * @param[in,out]   pOut        pointer to output vector
   * @param[in,out]   vec_buffer  pointer to buffer space for input
   * @return     The function returns <code>ARM_MATH_SUCCESS</code>
   *
   *
   * @details
   *
   * <b>Buffer size:</b>
   *
   * vec_buffer size: 0
   *
   *  Here we use only one pointer to read 4 rows in the weight
   *  matrix. So if the original matrix looks like this:
   *
   *  | a11 | a12 | a13 |
   *
   *  | a21 | a22 | a23 |
   *
   *  | a31 | a32 | a33 |
   *
   *  | a41 | a42 | a43 |
   *
   *  | a51 | a52 | a53 |
   *
   *  | a61 | a62 | a63 |
   *
   *  We operates on multiple-of-4 rows, so the first four rows becomes
   *
   *  | a11 | a12 | a21 | a22 | a31 | a32 | a41 | a42 |
   *
   *  | a13 | a23 | a33 | a43 |
   *
   *  Remaining rows are kept the same original order.
   *
   *  So the stored weight matrix looks like this:
   *
   *
   *  | a11 | a12 | a21 | a22 | a31 | a32 | a41 | a42 |
   *
   *  | a13 | a23 | a33 | a43 | a51 | a52 | a53 | a61 |
   *
   *  | a62 | a63 |
   */
 arm_status
 arm_fully_connected_q15_opt(const q15_t * pV,
                            const q15_t * pM,
                            const uint16_t dim_vec,
                            const uint16_t num_of_rows,
                            const uint16_t bias_shift,
                            const uint16_t out_shift, 
                            const q15_t * bias, 
                            q15_t * pOut, 
                            q15_t * vec_buffer)
 {
 #if defined (ARM_MATH_DSP)
    /* Run the following code for Cortex-M4 and Cortex-M7 */
    const q15_t *pB = pM;
    q15_t    *pO = pOut;
    const q15_t *pBias = bias;
    const q15_t *pA = pV;
    uint16_t  rowCnt = num_of_rows >> 2;
    while (rowCnt)
    {
        q31_t     sum =  ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
        q31_t     sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); 
        q31_t     sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); 
        q31_t     sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); 
        uint16_t  colCnt = dim_vec >> 1;
        pA = pV;
 #ifdef USE_INTRINSIC
        while (colCnt)
        {
            q31_t     inM11, inM12, inM13, inM14;
            q31_t     inV;
            inV = *__SIMD32(pA)++;
            inM11 = *__SIMD32(pB)++;
            sum = __SMLAD(inV, inM11, sum);
            inM12 = *__SIMD32(pB)++;
            sum2 = __SMLAD(inV, inM12, sum2);
            inM13 = *__SIMD32(pB)++;
            sum3 = __SMLAD(inV, inM13, sum3);
            inM14 = *__SIMD32(pB)++;
            sum4 = __SMLAD(inV, inM14, sum4);
            colCnt--;
        }
 #else
        /*
         * register needed:
         * loop counter: colCnt
         * accumulators: sum, sum2, sum3, sum4
         * pointers: pB, pA
         * weight data: inM11, inM12, inM13, inM14
         * activation data: inV
         */
        asm volatile ("COL_LOOP_%=:\n"
                      "ldr.w r4, [%[pA]], #4\n"
                      "ldr.w r0, [%[pB]], #16\n"
                      "smlad %[sum], r4, r0, %[sum]\n"
                      "ldr.w r1, [%[pB] , #-12]\n"
                      "smlad %[sum2], r4, r1, %[sum2]\n"
                      "ldr.w r2, [%[pB] , #-8]\n"
                      "smlad %[sum3], r4, r2, %[sum3]\n"
                      "ldr.w r3, [%[pB] , #-4]\n"
                      "smlad %[sum4], r4, r3, %[sum4]\n"
                      "subs %[colCnt], #1\n"
                      "bne COL_LOOP_%=\n":[sum] "+r"(sum),
                      [sum2] "+r"(sum2),[sum3] "+r"(sum3),
                      [sum4] "+r"(sum4),[pB] "+r"(pB),[pA] "+r"(pA):[colCnt] "r"(colCnt):"r0", "r1", "r2", "r3", "r4");
 #endif                          /* USE_INTRINSIC */
        colCnt = dim_vec & 0x1;
        while (colCnt)
        {
            q15_t     inV = *pA++;
            q15_t     inM = *pB++;
            q15_t     inM2 = *pB++;
            q15_t     inM3 = *pB++;
            q15_t     inM4 = *pB++;
            sum += inV * inM;
            sum2 += inV * inM2;
            sum3 += inV * inM3;
            sum4 += inV * inM4;
            colCnt--;
        }                       /* while over colCnt */
        *pO++ = (q15_t) (__SSAT((sum >> out_shift), 16));
        *pO++ = (q15_t) (__SSAT((sum2 >> out_shift), 16));
        *pO++ = (q15_t) (__SSAT((sum3 >> out_shift), 16));
        *pO++ = (q15_t) (__SSAT((sum4 >> out_shift), 16));
        /* adjust the pointers and counters */
        rowCnt--;
    }
    /* left-over part of the rows */
    rowCnt = num_of_rows & 0x3;
    while (rowCnt)
    {
        q31_t     sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
        uint16_t  colCnt = dim_vec >> 2;
        pA = pV;
        while (colCnt)
        {
            q31_t     inV1, inV2, inM1, inM2;
            inM1 = *__SIMD32(pB)++;
            inV1 = *__SIMD32(pA)++;
            sum = __SMLAD(inV1, inM1, sum);
            inM2 = *__SIMD32(pB)++;
            inV2 = *__SIMD32(pA)++;
            sum = __SMLAD(inV2, inM2, sum);
            colCnt--;
        }
        /* left-over of the vector */
        colCnt = dim_vec & 0x3;
        while (colCnt)
        {
            q15_t     inV = *pA++;
            q15_t     inM = *pB++;
            sum += inV * inM;
            colCnt--;
        }
        *pO++ = (q15_t) (__SSAT((sum >> out_shift), 16));
        rowCnt--;
    }
 #else
    /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */
    uint16_t  rowCnt = num_of_rows >> 2;
    const q15_t *pB = pM;
    const q15_t *pA;
    q15_t    *pO = pOut;
    const q15_t *pBias = bias;
    while (rowCnt)
    {
        q31_t     sum =  ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
        q31_t     sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
        q31_t     sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
        q31_t     sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
        uint16_t  colCnt = dim_vec >> 1;
        pA = pV;
        while (colCnt)
        {
            q15_t     inA1 = *pA++;
            q15_t     inA2 = *pA++;
            q15_t     inB1 = *pB++;
            q15_t     inB2 = *pB++;
            sum += inA1 * inB1 + inA2 * inB2;
            inB1 = *pB++;
            inB2 = *pB++;
            sum2 += inA1 * inB1 + inA2 * inB2;
            inB1 = *pB++;
            inB2 = *pB++;
            sum3 += inA1 * inB1 + inA2 * inB2;
            inB1 = *pB++;
            inB2 = *pB++;
            sum4 += inA1 * inB1 + inA2 * inB2;
            colCnt--;
        }
        colCnt = dim_vec & 0x1;
        while (colCnt)
        {
            q15_t     inA = *pA++;
            q15_t     inB = *pB++;
            sum += inA * inB;
            inB = *pB++;
            sum2 += inA * inB;
            inB = *pB++;
            sum3 += inA * inB;
            inB = *pB++;
            sum4 += inA * inB;
            colCnt--;
        }
        *pO++ = (q15_t) __SSAT((sum >> out_shift), 16);
        *pO++ = (q15_t) __SSAT((sum2 >> out_shift), 16);
        *pO++ = (q15_t) __SSAT((sum3 >> out_shift), 16);
        *pO++ = (q15_t) __SSAT((sum4 >> out_shift), 16);
        rowCnt--;
    }
    rowCnt = num_of_rows & 0x3;
    while (rowCnt)
    {
        int       ip_out = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
        int       j;
        pA = pV;
        for (j = 0; j < dim_vec; j++)
        {
            q15_t     inA = *pA++;
            q15_t     inB = *pB++;
            ip_out += inA * inB;
        }
        *pO++ = (q15_t) __SSAT((ip_out >> out_shift), 16);
        rowCnt--;
    }
 #endif                          /* ARM_MATH_DSP */
    /* Return to ARM_MATH_SUCCESS */
    return (ARM_MATH_SUCCESS);
 }
 /**
 * @} end of FC group
 */
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7.c
@@ -1,198 +0,0 @@
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /* ----------------------------------------------------------------------
 * Project:      CMSIS NN Library
 * Title:        arm_fully_connected_q7.c
 * Description:  Q7 basic fully-connected layer function
 *
 * $Date:        17. January 2018
 * $Revision:    V.1.0.0
 *
 * Target Processor:  Cortex-M cores
 *
 * -------------------------------------------------------------------- */
 #include "arm_math.h"
 #include "arm_nnfunctions.h"
 /**
 *  @ingroup groupNN
 */
 /**
 * @addtogroup FC
 * @{
 */
  /**
   * @brief Q7 basic fully-connected layer function
   * @param[in]       pV          pointer to input vector
   * @param[in]       pM          pointer to matrix weights
   * @param[in]       dim_vec     length of the vector
   * @param[in]       num_of_rows number of rows in weight matrix
   * @param[in]       bias_shift  amount of left-shift for bias
   * @param[in]       out_shift   amount of right-shift for output
   * @param[in]       bias        pointer to bias
   * @param[in,out]   pOut        pointer to output vector
   * @param[in,out]   vec_buffer  pointer to buffer space for input
   * @return     The function returns <code>ARM_MATH_SUCCESS</code>
   *
   * @details
   *
   * <b>Buffer size:</b>
   *
   * vec_buffer size: dim_vec
   *
   * This basic function is designed to work with regular weight
   * matrix without interleaving.
   *
   */
 arm_status
 arm_fully_connected_q7(const q7_t * pV,
                       const q7_t * pM,
                       const uint16_t dim_vec,
                       const uint16_t num_of_rows,
                       const uint16_t bias_shift,
                       const uint16_t out_shift, const q7_t * bias, q7_t * pOut, q15_t * vec_buffer)
 {
 #if defined (ARM_MATH_DSP)
    /* Run the following code for Cortex-M4 and Cortex-M7 */
    const q7_t *pB = pM;
    const q7_t *pB2;
    q7_t     *pO = pOut;
    const q7_t *pBias = bias;
    q15_t    *pA;
    uint16_t  rowCnt = num_of_rows >> 1;
    /* expand the vector into the buffer */
    arm_q7_to_q15_reordered_no_shift(pV, vec_buffer, dim_vec);
    while (rowCnt)
    {
        q31_t     sum =  ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
        q31_t     sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
        uint16_t  colCnt = dim_vec >> 2;
        pA = vec_buffer;
        pB2 = pB + dim_vec;
        while (colCnt)
        {
            q31_t     inV, inM11, inM12, inM21, inM22;
            pB = (q7_t *) read_and_pad_reordered((void *)pB, &inM11, &inM12);
            pB2 = (q7_t *) read_and_pad_reordered((void *)pB2, &inM21, &inM22);
            inV = *__SIMD32(pA)++;
            sum = __SMLAD(inV, inM11, sum);
            sum2 = __SMLAD(inV, inM21, sum2);
            inV = *__SIMD32(pA)++;
            sum = __SMLAD(inV, inM12, sum);
            sum2 = __SMLAD(inV, inM22, sum2);
            colCnt--;
        }
        colCnt = dim_vec & 0x3;
        while (colCnt)
        {
            q7_t      inV = *pA++;
            q15_t     inM = *pB++;
            q15_t     inM2 = *pB2++;
            sum += inV * inM;
            sum2 += inV * inM2;
            colCnt--;
        }                       /* while over colCnt */
        *pO++ = (q7_t) (__SSAT((sum >> out_shift), 8));
        *pO++ = (q7_t) (__SSAT((sum2 >> out_shift), 8));
        /* adjust the pointers and counters */
        pB += dim_vec;
        rowCnt--;
    }
    /* left-over part of the rows */
    rowCnt = num_of_rows & 0x1;
    while (rowCnt)
    {
        uint16_t  colCnt = dim_vec >> 2;
        q31_t     sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
        pA = vec_buffer;
        while (colCnt)
        {
            q31_t     inV1, inV2, inM11, inM12;
            pB = (q7_t *) read_and_pad_reordered((void *)pB, &inM11, &inM12);
            inV1 = *__SIMD32(pA)++;
            sum = __SMLAD(inV1, inM11, sum);
            inV2 = *__SIMD32(pA)++;
            sum = __SMLAD(inV2, inM12, sum);
            colCnt--;
        }
        /* left-over of the vector */
        colCnt = dim_vec & 0x3;
        while (colCnt)
        {
            q7_t      inV = *pA++;
            q15_t     inM = *pB++;
            sum += inV * inM;
            colCnt--;
        }
        *pO++ = (q7_t) (__SSAT((sum >> out_shift), 8));
        rowCnt--;
    }
 #else
    int       i, j;
    /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */
    for (i = 0; i < num_of_rows; i++)
    {
        int       ip_out = ((q31_t)(bias[i]) << bias_shift) + NN_ROUND(out_shift);
        for (j = 0; j < dim_vec; j++)
        {
            ip_out += pV[j] * pM[i * dim_vec + j];
        }
        pOut[i] = (q7_t) __SSAT((ip_out >> out_shift), 8);
    }
 #endif                          /* ARM_MATH_DSP */
    /* Return to ARM_MATH_SUCCESS */
    return (ARM_MATH_SUCCESS);
 }
 /**
 * @} end of FC group
 */
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7_opt.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7_opt.c
@@ -1,484 +0,0 @@
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /* ----------------------------------------------------------------------
 * Project:      CMSIS NN Library
 * Title:        arm_fully_connected_q7_opt.c
 * Description:  Q7 basic fully-connected layer function
 *
 * $Date:        17. January 2018
 * $Revision:    V.1.0.0
 *
 * Target Processor:  Cortex-M cores
 *
 * -------------------------------------------------------------------- */
 #include "arm_math.h"
 #include "arm_nnfunctions.h"
 /**
 *  @ingroup groupNN
 */
 /**
 * @addtogroup FC
 * @{
 */
  /**
   * @brief Q7 opt fully-connected layer function
   * @param[in]       pV          pointer to input vector
   * @param[in]       pM          pointer to matrix weights
   * @param[in]       dim_vec     length of the vector
   * @param[in]       num_of_rows number of rows in weight matrix
   * @param[in]       bias_shift  amount of left-shift for bias
   * @param[in]       out_shift   amount of right-shift for output
   * @param[in]       bias        pointer to bias
   * @param[in,out]   pOut        pointer to output vector
   * @param[in,out]   vec_buffer  pointer to buffer space for input
   * @return     The function returns <code>ARM_MATH_SUCCESS</code>
   *
   * @details
   *
   * <b>Buffer size:</b>
   *
   * vec_buffer size: dim_vec
   *
   * This opt function is designed to work with interleaved weight
   * matrix. The vector input is assumed in q7_t format, we call
   *  arm_q7_to_q15_no_shift_shuffle function to expand into
   *  q15_t format with certain weight re-ordering, refer to the function
   *  comments for more details.
   *  Here we use only one pointer to read 4 rows in the weight
   *  matrix. So if the original q7_t matrix looks like this:
   *
   *  | a11 | a12 | a13 | a14 | a15 | a16 | a17 |
   *
   *  | a21 | a22 | a23 | a24 | a25 | a26 | a27 |
   *
   *  | a31 | a32 | a33 | a34 | a35 | a36 | a37 |
   *
   *  | a41 | a42 | a43 | a44 | a45 | a46 | a47 |
   *
   *  | a51 | a52 | a53 | a54 | a55 | a56 | a57 |
   *
   *  | a61 | a62 | a63 | a64 | a65 | a66 | a67 |
   *
   *
   *  We operates on multiple-of-4 rows, so the first four rows becomes
   *
   *  | a11 | a21 | a13 | a23 | a31 | a41 | a33 | a43 |
   *
   *  | a12 | a22 | a14 | a24 | a32 | a42 | a34 | a44 |
   *
   *  | a15 | a25 | a35 | a45 | a16 | a26 | a36 | a46 |
   *
   *  So within the kernel, we first read the re-ordered vector in as:
   *
   *  | b1  | b3  | and | b2  | b4  |
   *
   *  the four q31_t weights will look like
   *
   *  | a11 | a13 |, | a21 | a23 |, | a31 | a33 |, | a41 | a43 |
   *
   *  | a12 | a14 |, | a22 | a24 |, | a32 | a34 |, | a42 | a44 |
   *
   *  The column left over will be in-order.
   *  which is:
   *
   *  | a17 | a27 | a37 | a47 |
   *
   *  For the left-over rows, we do 1x1 computation, so the data remains
   *  as its original order. 
   *
   *  So the stored weight matrix looks like this:
   *
   *  | a11 | a21 | a13 | a23 | a31 | a41 |
   *
   *  | a33 | a43 | a12 | a22 | a14 | a24 |
   *
   *  | a32 | a42 | a34 | a44 | a15 | a25 |
   *
   *  | a35 | a45 | a16 | a26 | a36 | a46 |
   *
   *  | a17 | a27 | a37 | a47 | a51 | a52 |
   *
   *  | a53 | a54 | a55 | a56 | a57 | a61 |
   *
   *  | a62 | a63 | a64 | a65 | a66 | a67 |
   *
   *
   */
 arm_status
 arm_fully_connected_q7_opt(const q7_t * pV,
                           const q7_t * pM,
                           const uint16_t dim_vec,
                           const uint16_t num_of_rows,
                           const uint16_t bias_shift,
                           const uint16_t out_shift, 
                           const q7_t * bias, 
                           q7_t * pOut, 
                           q15_t * vec_buffer)
 {
 #if defined (ARM_MATH_DSP)
    /* Run the following code for Cortex-M4 and Cortex-M7 */
    const q7_t *pB = pM;
    q7_t     *pO = pOut;
    const q7_t *pBias = bias;
    q15_t    *pA;
    uint16_t  rowCnt = num_of_rows >> 2;
    arm_q7_to_q15_reordered_no_shift(pV, vec_buffer, dim_vec);
    while (rowCnt)
    {
        q31_t     sum =  ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
        q31_t     sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
        q31_t     sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
        q31_t     sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
        uint16_t  colCnt = dim_vec >> 2;
        pA = vec_buffer;
 #ifdef USE_INTRINSIC
 #ifndef ARM_MATH_BIG_ENDIAN
        while (colCnt)
        {
            q31_t     inM11, inM12, inM13, inM14;
            q31_t     inV;
            inV = *__SIMD32(pA)++;
            inM11 = *__SIMD32(pB)++;
            inM12 = __SXTB16(__ROR(inM11, 8));
            inM11 = __SXTB16(inM11);
            sum = __SMLAD(inM11, inV, sum);
            sum2 = __SMLAD(inM12, inV, sum2);
            inM13 = *__SIMD32(pB)++;
            inM14 = __SXTB16(__ROR(inM13, 8));
            inM13 = __SXTB16(inM13);
            sum3 = __SMLAD(inM13, inV, sum3);
            sum4 = __SMLAD(inM14, inV, sum4);
            inV = *__SIMD32(pA)++;
            inM11 = *__SIMD32(pB)++;
            inM12 = __SXTB16(__ROR(inM11, 8));
            inM11 = __SXTB16(inM11);
            sum = __SMLAD(inM11, inV, sum);
            sum2 = __SMLAD(inM12, inV, sum2);
            inM13 = *__SIMD32(pB)++;
            inM14 = __SXTB16(__ROR(inM13, 8));
            inM13 = __SXTB16(inM13);
            sum3 = __SMLAD(inM13, inV, sum3);
            sum4 = __SMLAD(inM14, inV, sum4);
            colCnt--;
        }
 #else
        while (colCnt)
        {
            q31_t     inM11, inM12, inM13, inM14;
            q31_t     inV;
            inV = *__SIMD32(pA)++;
            inM11 = *__SIMD32(pB)++;
            inM12 = __SXTB16(__ROR(inM11, 8));
            inM11 = __SXTB16(inM11);
            sum = __SMLAD(inM12, inV, sum);
            sum2 = __SMLAD(inM11, inV, sum2);
            inM13 = *__SIMD32(pB)++;
            inM14 = __SXTB16(__ROR(inM13, 8));
            inM13 = __SXTB16(inM13);
            sum3 = __SMLAD(inM14, inV, sum3);
            sum4 = __SMLAD(inM13, inV, sum4);
            inV = *__SIMD32(pA)++;
            inM11 = *__SIMD32(pB)++;
            inM12 = __SXTB16(__ROR(inM11, 8));
            inM11 = __SXTB16(inM11);
            sum = __SMLAD(inM12, inV, sum);
            sum2 = __SMLAD(inM11, inV, sum2);
            inM13 = *__SIMD32(pB)++;
            inM14 = __SXTB16(__ROR(inM13, 8));
            inM13 = __SXTB16(inM13);
            sum3 = __SMLAD(inM14, inV, sum3);
            sum4 = __SMLAD(inM13, inV, sum4);
            colCnt--;
        }
 #endif                          /* ARM_MATH_BIG_ENDIAN */
 #else
        /*
         * register needed:
         * loop counter: colCnt
         * accumulators: sum, sum2, sum3, sum4
         * pointers: pB, pA
         * weight data: inM11, inM12, inM13, inM14
         * activation data: inV
         */
 #ifndef ARM_MATH_BIG_ENDIAN
        asm volatile ("COL_LOOP_%=:\n"
                      "ldr.w r4, [%[pA]], #8\n"
                      "ldr.w r1, [%[pB]], #16\n"
                      "mov.w r0, r1, ror #8\n"
                      "sxtb16 r0, r0\n"
                      "sxtb16 r1, r1\n"
                      "smlad %[sum], r4, r1, %[sum]\n"
                      "smlad %[sum2], r4, r0, %[sum2]\n"
                      "ldr.w r3, [%[pB], #-12]\n"
                      "mov.w r2, r3, ror #8\n"
                      "sxtb16 r2, r2\n"
                      "sxtb16 r3, r3\n"
                      "smlad %[sum3], r4, r3, %[sum3]\n"
                      "smlad %[sum4], r4, r2, %[sum4]\n"
                      "ldr.w r4, [%[pA], #-4]\n"
                      "ldr.w r1, [%[pB], #-8]\n"
                      "mov.w r0, r1, ror #8\n"
                      "sxtb16 r0, r0\n"
                      "sxtb16 r1, r1\n"
                      "smlad %[sum], r4, r1, %[sum]\n"
                      "smlad %[sum2], r4, r0, %[sum2]\n"
                      "ldr.w r3, [%[pB], #-4]\n"
                      "mov.w r2, r3, ror #8\n"
                      "sxtb16 r2, r2\n"
                      "sxtb16 r3, r3\n"
                      "smlad %[sum3], r4, r3, %[sum3]\n"
                      "smlad %[sum4], r4, r2, %[sum4]\n"
                      "subs %[colCnt], #1\n"
                      "bne COL_LOOP_%=\n":[sum] "+r"(sum),
                      [sum2] "+r"(sum2),[sum3] "+r"(sum3),
                      [sum4] "+r"(sum4),[pB] "+r"(pB),[pA] "+r"(pA):[colCnt] "r"(colCnt):"r0", "r1", "r2", "r3", "r4");
 #else
        asm volatile ("COL_LOOP_%=:\n"
                      "ldr.w r4, [%[pA]], #8\n"
                      "ldr.w r1, [%[pB]], #16\n"
                      "mov.w r0, r1, ror #8\n"
                      "sxtb16 r0, r0\n"
                      "sxtb16 r1, r1\n"
                      "smlad %[sum], r4, r0, %[sum]\n"
                      "smlad %[sum2], r4, r1, %[sum2]\n"
                      "ldr.w r3, [%[pB], #-12]\n"
                      "mov.w r2, r3, ror #8\n"
                      "sxtb16 r2, r2\n"
                      "sxtb16 r3, r3\n"
                      "smlad %[sum3], r4, r2, %[sum3]\n"
                      "smlad %[sum4], r4, r3, %[sum4]\n"
                      "ldr.w r4, [%[pA], #-4]\n"
                      "ldr.w r1, [%[pB], #-8]\n"
                      "mov.w r0, r1, ror #8\n"
                      "sxtb16 r0, r0\n"
                      "sxtb16 r1, r1\n"
                      "smlad %[sum], r4, r0, %[sum]\n"
                      "smlad %[sum2], r4, r1, %[sum2]\n"
                      "ldr.w r3, [%[pB], #-4]\n"
                      "mov.w r2, r3, ror #8\n"
                      "sxtb16 r2, r2\n"
                      "sxtb16 r3, r3\n"
                      "smlad %[sum3], r4, r2, %[sum3]\n"
                      "smlad %[sum4], r4, r3, %[sum4]\n"
                      "subs %[colCnt], #1\n"
                      "bne COL_LOOP_%=\n":[sum] "+r"(sum),
                      [sum2] "+r"(sum2),[sum3] "+r"(sum3),
                      [sum4] "+r"(sum4),[pB] "+r"(pB),[pA] "+r"(pA):[colCnt] "r"(colCnt):"r0", "r1", "r2", "r3", "r4");
 #endif                          /* ARM_MATH_BIG_ENDIAN */
 #endif                          /* USE_INTRINSIC */
        colCnt = dim_vec & 0x3;
        while (colCnt)
        {
            q15_t     inV = *pA++;
            q7_t      inM = *pB++;
            q7_t      inM2 = *pB++;
            q7_t      inM3 = *pB++;
            q7_t      inM4 = *pB++;
            sum += inV * inM;
            sum2 += inV * inM2;
            sum3 += inV * inM3;
            sum4 += inV * inM4;
            colCnt--;
        }                       /* while over colCnt */
        *pO++ = (q7_t) (__SSAT((sum >> out_shift), 8));
        *pO++ = (q7_t) (__SSAT((sum2 >> out_shift), 8));
        *pO++ = (q7_t) (__SSAT((sum3 >> out_shift), 8));
        *pO++ = (q7_t) (__SSAT((sum4 >> out_shift), 8));
        /* adjust the pointers and counters */
        rowCnt--;
    }
    /* left-over part of the rows */
    rowCnt = num_of_rows & 0x3;
    while (rowCnt)
    {
        q31_t     sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
        uint16_t  colCnt = dim_vec >> 2;
        pA = vec_buffer;
        while (colCnt)
        {
            q31_t     inV1, inV2, inM11, inM12;
            pB = (q7_t *) read_and_pad_reordered((void *)pB, &inM11, &inM12);
            inV1 = *__SIMD32(pA)++;
            sum = __SMLAD(inV1, inM11, sum);
            inV2 = *__SIMD32(pA)++;
            sum = __SMLAD(inV2, inM12, sum);
            colCnt--;
        }
        /* left-over of the vector */
        colCnt = dim_vec & 0x3;
        while (colCnt)
        {
            q15_t     inV = *pA++;
            q7_t      inM = *pB++;
            sum += inV * inM;
            colCnt--;
        }
        *pO++ = (q7_t) (__SSAT((sum >> out_shift), 8));
        rowCnt--;
    }
 #else
    /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */
    uint16_t  rowCnt = num_of_rows >> 2;
    const q7_t *pB = pM;
    const q7_t *pA;
    q7_t     *pO = pOut;
    const q7_t *pBias = bias;
    while (rowCnt)
    {
        q31_t     sum =  ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
        q31_t     sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
        q31_t     sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
        q31_t     sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
        uint16_t  colCnt = dim_vec >> 2;
        pA = pV;
        while (colCnt)
        {
            q7_t      inA1 = *pA++;
            q7_t      inA3 = *pA++;
            q7_t      inA2 = *pA++;
            q7_t      inA4 = *pA++;
            q7_t      inB1 = *pB++;
            q7_t      inB3 = *pB++;
            q7_t      inB2 = *pB++;
            q7_t      inB4 = *pB++;
            sum += inA1 * inB1 + inA2 * inB2;
            sum2 += inA1 * inB3 + inA2 * inB4;
            inB1 = *pB++;
            inB3 = *pB++;
            inB2 = *pB++;
            inB4 = *pB++;
            sum3 += inA1 * inB1 + inA2 * inB2;
            sum4 += inA1 * inB3 + inA2 * inB4;
            inB1 = *pB++;
            inB3 = *pB++;
            inB2 = *pB++;
            inB4 = *pB++;
            sum += inA3 * inB1 + inA4 * inB2;
            sum2 += inA3 * inB3 + inA4 * inB4;
            inB1 = *pB++;
            inB3 = *pB++;
            inB2 = *pB++;
            inB4 = *pB++;
            sum3 += inA3 * inB1 + inA4 * inB2;
            sum4 += inA3 * inB3 + inA4 * inB4;
            colCnt--;
        }
        colCnt = dim_vec & 0x3;
        while (colCnt)
        {
            q7_t      inA = *pA++;
            q7_t      inB = *pB++;
            sum += inA * inB;
            inB = *pB++;
            sum2 += inA * inB;
            inB = *pB++;
            sum3 += inA * inB;
            inB = *pB++;
            sum4 += inA * inB;
            colCnt--;
        }
        *pO++ = (q7_t) __SSAT((sum >> out_shift), 8);
        *pO++ = (q7_t) __SSAT((sum2 >> out_shift), 8);
        *pO++ = (q7_t) __SSAT((sum3 >> out_shift), 8);
        *pO++ = (q7_t) __SSAT((sum4 >> out_shift), 8);
        rowCnt--;
    }
    rowCnt = num_of_rows & 0x3;
    while (rowCnt)
    {
        int       ip_out = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
        int       j;
        pA = pV;
        for (j = 0; j < dim_vec; j++)
        {
            q7_t      inA = *pA++;
            q7_t      inB = *pB++;
            ip_out += inA * inB;
        }
        *pO++ = (q7_t) __SSAT((ip_out >> out_shift), 8);
        rowCnt--;
    }
 #endif                          /* ARM_MATH_DSP */
    /* Return to ARM_MATH_SUCCESS */
    return (ARM_MATH_SUCCESS);
 }
 /**
 * @} end of FC group
 */
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q15.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q15.c
@@ -1,147 +0,0 @@
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /* ----------------------------------------------------------------------
 * Project:      CMSIS NN Library
 * Title:        arm_nn_mult_q15.c
 * Description:  Q15 vector multiplication with variable output shifts
 *
 * $Date:        13. July 2018
 * $Revision:    V.1.0.0
 *
 * Target Processor:  Cortex-M cores
 *
 * -------------------------------------------------------------------- */
 #include "arm_nnfunctions.h"
 /**    
 * @ingroup groupSupport    
 */
 /**
 * @addtogroup NNBasicMath
 * @{
 */
 /**
 * @brief           Q7 vector multiplication with variable output shifts
 * @param[in]       *pSrcA        pointer to the first input vector
 * @param[in]       *pSrcB        pointer to the second input vector
 * @param[out]      *pDst         pointer to the output vector
 * @param[in]       out_shift     amount of right-shift for output
 * @param[in]       blockSize     number of samples in each vector
 * @return none.
 *
 * <b>Scaling and Overflow Behavior:</b>
 * \par
 * The function uses saturating arithmetic.
 * Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated.
 */
 void arm_nn_mult_q15(
  q15_t * pSrcA,
  q15_t * pSrcB,
  q15_t * pDst,
  const uint16_t out_shift,
  uint32_t blockSize)
 {
  uint32_t blkCnt;                               /* loop counters */
 #if defined (ARM_MATH_DSP)
 /* Run the below code for Cortex-M4 and Cortex-M3 */
  q31_t inA1, inA2, inB1, inB2;                  /* temporary input variables */
  q15_t out1, out2, out3, out4;                  /* temporary output variables */
  q31_t mul1, mul2, mul3, mul4;                  /* temporary variables */
  /* loop Unrolling */
  blkCnt = blockSize >> 2U;
  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
  while (blkCnt > 0U)
  {
    /* read two samples at a time from sourceA */
    inA1 = *__SIMD32(pSrcA)++;
    /* read two samples at a time from sourceB */
    inB1 = *__SIMD32(pSrcB)++;
    /* read two samples at a time from sourceA */
    inA2 = *__SIMD32(pSrcA)++;
    /* read two samples at a time from sourceB */
    inB2 = *__SIMD32(pSrcB)++;
    /* multiply mul = sourceA * sourceB */
    mul1 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1 >> 16));
    mul2 = (q31_t) ((q15_t) inA1 * (q15_t) inB1);
    mul3 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) (inB2 >> 16));
    mul4 = (q31_t) ((q15_t) inA2 * (q15_t) inB2);
    /* saturate result to 16 bit */
    out1 = (q15_t) __SSAT((mul1 + NN_ROUND(out_shift)) >> out_shift, 16);
    out2 = (q15_t) __SSAT((mul2 + NN_ROUND(out_shift)) >> out_shift, 16);
    out3 = (q15_t) __SSAT((mul3 + NN_ROUND(out_shift)) >> out_shift, 16);
    out4 = (q15_t) __SSAT((mul4 + NN_ROUND(out_shift)) >> out_shift, 16);
    /* store the result */
 #ifndef ARM_MATH_BIG_ENDIAN
    *__SIMD32(pDst)++ = __PKHBT(out2, out1, 16);
    *__SIMD32(pDst)++ = __PKHBT(out4, out3, 16);
 #else
    *__SIMD32(pDst)++ = __PKHBT(out2, out1, 16);
    *__SIMD32(pDst)++ = __PKHBT(out4, out3, 16);
 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
    /* Decrement the blockSize loop counter */
    blkCnt--;
  }
  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
  blkCnt = blockSize % 0x4U;
 #else
  /* Run the below code for Cortex-M0 */
  /* Initialize blkCnt with number of samples */
  blkCnt = blockSize;
 #endif /* #if defined (ARM_MATH_DSP) */
  while (blkCnt > 0U)
  {
    /* C = A * B */
    /* Multiply the inputs and store the result in the destination buffer */
    *pDst++ = (q15_t) __SSAT((((q31_t) (*pSrcA++) * (*pSrcB++) + NN_ROUND(out_shift)) >> out_shift), 16);
    /* Decrement the blockSize loop counter */
    blkCnt--;
  }
 }
 /**
 * @} end of NNBasicMath group
 */
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q7.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q7.c
@@ -1,119 +0,0 @@
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /* ----------------------------------------------------------------------
 * Project:      CMSIS NN Library
 * Title:        arm_nn_mult_q7.c
 * Description:  Q7 vector multiplication with variable output shifts
 *
 * $Date:        13. July 2018
 * $Revision:    V.1.0.0
 *
 * Target Processor:  Cortex-M cores
 *
 * -------------------------------------------------------------------- */
 #include "arm_nnfunctions.h"
 /**    
 * @ingroup groupSupport    
 */
 /**
 * @addtogroup NNBasicMath
 * @{
 */
 /**
 * @brief           Q7 vector multiplication with variable output shifts
 * @param[in]       *pSrcA        pointer to the first input vector
 * @param[in]       *pSrcB        pointer to the second input vector
 * @param[out]      *pDst         pointer to the output vector
 * @param[in]       out_shift     amount of right-shift for output
 * @param[in]       blockSize     number of samples in each vector
 * @return none.
 *
 * <b>Scaling and Overflow Behavior:</b>
 * \par
 * The function uses saturating arithmetic.
 * Results outside of the allowable Q7 range [0x80 0x7F] will be saturated.
 */
 void arm_nn_mult_q7(
  q7_t * pSrcA,
  q7_t * pSrcB,
  q7_t * pDst,
  const uint16_t out_shift,
  uint32_t blockSize)
 {
  uint32_t blkCnt;                               /* loop counters */
 #if defined (ARM_MATH_DSP)
 /* Run the below code for Cortex-M4 and Cortex-M3 */
  q7_t out1, out2, out3, out4;                   /* Temporary variables to store the product */
  /* loop Unrolling */
  blkCnt = blockSize >> 2U;
  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
  while (blkCnt > 0U)
  {
    /* C = A * B */
    /* Multiply the inputs and store the results in temporary variables */
    out1 = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++) + NN_ROUND(out_shift)) >> out_shift), 8);
    out2 = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++) + NN_ROUND(out_shift)) >> out_shift), 8);
    out3 = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++) + NN_ROUND(out_shift)) >> out_shift), 8);
    out4 = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++) + NN_ROUND(out_shift)) >> out_shift), 8);
    /* Store the results of 4 inputs in the destination buffer in single cycle by packing */
    *__SIMD32(pDst)++ = __PACKq7(out1, out2, out3, out4);
    /* Decrement the blockSize loop counter */
    blkCnt--;
  }
  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
  blkCnt = blockSize % 0x4U;
 #else
  /* Run the below code for Cortex-M0 */
  /* Initialize blkCnt with number of samples */
  blkCnt = blockSize;
 #endif /* #if defined (ARM_MATH_DSP) */
  while (blkCnt > 0U)
  {
    /* C = A * B */
    /* Multiply the inputs and store the result in the destination buffer */
    *pDst++ = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++) + NN_ROUND(out_shift)) >> out_shift), 8);
    /* Decrement the blockSize loop counter */
    blkCnt--;
  }
 }
 /**
 * @} end of NNBasicMath group
 */
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nntables.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nntables.c
@@ -1,297 +0,0 @@
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /* ----------------------------------------------------------------------
 * Project:      CMSIS NN Library
 * Title:        arm_nntables.c
 * Description:  Converts the elements of the Q7 vector to Q15 vector without left-shift
 *
 * $Date:        17. January 2018
 * $Revision:    V.1.0.0
 *
 * Target Processor:  Cortex-M cores
 *
 * -------------------------------------------------------------------- */
 #include "arm_nnsupportfunctions.h"
 /**
 * @brief tables for various activation functions
 *
 * This file include the declaration of common tables.
 * Most of them are used for activation functions 
 *
 * Assumption:
 * Unified table: input is 3.x format, i.e, range of [-8, 8)
 * sigmoid(8) = 0.9996646498695336
 * tanh(8) = 0.9999997749296758
 * The accuracy here should be good enough
 *
 * 2-stage HL table: 
 *
 * The entire input range is divided into two parts:
 *
 * Low range table: 0x000x xxxx or 0x111x xxxx 
 * table entry will be the binary number excluding the first
 * two digits, i.e., 0x0x xxxx or 0x1x xxxx
 * 
 *
 *
 * High range table 0x0010 0000 -- 0x0111 1111
 *                  0x1000 0000 -- 0x1101 1111
 * 
 * For positive numbers, table entry will be
 * 0x0010 0000 -- 0x0111 1111 minus 0x0010 0000
 * i.e., 0x0000 0000 - 0x0101 11111
 *
 * same thing for the negative numbers, table entry will be
 * 0x1000 0000 -- 0x1101 1111 minux 0x0010 0000
 * i.e., 0x0110 0000 - 0x1011 1111
 */
 const q7_t sigmoidTable_q7[256] = {
    0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e,
    0x50, 0x52, 0x53, 0x55, 0x57, 0x59, 0x5a, 0x5c,
    0x5e, 0x5f, 0x61, 0x62, 0x63, 0x65, 0x66, 0x67,
    0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70,
    0x71, 0x72, 0x72, 0x73, 0x74, 0x74, 0x75, 0x76,
    0x76, 0x77, 0x77, 0x78, 0x78, 0x79, 0x79, 0x7a,
    0x7a, 0x7a, 0x7b, 0x7b, 0x7b, 0x7c, 0x7c, 0x7c,
    0x7c, 0x7c, 0x7d, 0x7d, 0x7d, 0x7d, 0x7d, 0x7e,
    0x7e, 0x7e, 0x7e, 0x7e, 0x7e, 0x7e, 0x7e, 0x7f,
    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
    0x01, 0x01, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
    0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x03, 0x04,
    0x04, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06,
    0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x09,
    0x0a, 0x0a, 0x0b, 0x0c, 0x0c, 0x0d, 0x0e, 0x0e,
    0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
    0x17, 0x19, 0x1a, 0x1b, 0x1d, 0x1e, 0x1f, 0x21,
    0x22, 0x24, 0x26, 0x27, 0x29, 0x2b, 0x2d, 0x2e,
    0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e,
 };
 const q15_t sigmoidTable_q15[256] = {
    0x4000, 0x4200, 0x43ff, 0x45fc, 0x47f5, 0x49eb, 0x4bdc, 0x4dc8,
    0x4fad, 0x518a, 0x5360, 0x552c, 0x56ef, 0x58a8, 0x5a57, 0x5bfb,
    0x5d93, 0x5f20, 0x60a1, 0x6216, 0x637f, 0x64db, 0x662b, 0x676f,
    0x68a6, 0x69d2, 0x6af1, 0x6c05, 0x6d0d, 0x6e09, 0x6efb, 0x6fe2,
    0x70be, 0x7190, 0x7258, 0x7316, 0x73cc, 0x7478, 0x751b, 0x75b7,
    0x764a, 0x76d6, 0x775b, 0x77d8, 0x784f, 0x78c0, 0x792a, 0x798f,
    0x79ee, 0x7a48, 0x7a9d, 0x7aed, 0x7b39, 0x7b80, 0x7bc4, 0x7c03,
    0x7c3f, 0x7c78, 0x7cad, 0x7ce0, 0x7d0f, 0x7d3c, 0x7d66, 0x7d8d,
    0x7db3, 0x7dd6, 0x7df7, 0x7e16, 0x7e33, 0x7e4f, 0x7e69, 0x7e81,
    0x7e98, 0x7eae, 0x7ec2, 0x7ed5, 0x7ee7, 0x7ef8, 0x7f08, 0x7f17,
    0x7f25, 0x7f32, 0x7f3e, 0x7f4a, 0x7f55, 0x7f5f, 0x7f69, 0x7f72,
    0x7f7b, 0x7f83, 0x7f8a, 0x7f91, 0x7f98, 0x7f9e, 0x7fa4, 0x7faa,
    0x7faf, 0x7fb4, 0x7fb8, 0x7fbd, 0x7fc1, 0x7fc5, 0x7fc8, 0x7fcc,
    0x7fcf, 0x7fd2, 0x7fd5, 0x7fd7, 0x7fda, 0x7fdc, 0x7fde, 0x7fe0,
    0x7fe2, 0x7fe4, 0x7fe6, 0x7fe7, 0x7fe9, 0x7fea, 0x7feb, 0x7fed,
    0x7fee, 0x7fef, 0x7ff0, 0x7ff1, 0x7ff2, 0x7ff3, 0x7ff4, 0x7ff4,
    0x000b, 0x000c, 0x000c, 0x000d, 0x000e, 0x000f, 0x0010, 0x0011,
    0x0012, 0x0013, 0x0015, 0x0016, 0x0017, 0x0019, 0x001a, 0x001c,
    0x001e, 0x0020, 0x0022, 0x0024, 0x0026, 0x0029, 0x002b, 0x002e,
    0x0031, 0x0034, 0x0038, 0x003b, 0x003f, 0x0043, 0x0048, 0x004c,
    0x0051, 0x0056, 0x005c, 0x0062, 0x0068, 0x006f, 0x0076, 0x007d,
    0x0085, 0x008e, 0x0097, 0x00a1, 0x00ab, 0x00b6, 0x00c2, 0x00ce,
    0x00db, 0x00e9, 0x00f8, 0x0108, 0x0119, 0x012b, 0x013e, 0x0152,
    0x0168, 0x017f, 0x0197, 0x01b1, 0x01cd, 0x01ea, 0x0209, 0x022a,
    0x024d, 0x0273, 0x029a, 0x02c4, 0x02f1, 0x0320, 0x0353, 0x0388,
    0x03c1, 0x03fd, 0x043c, 0x0480, 0x04c7, 0x0513, 0x0563, 0x05b8,
    0x0612, 0x0671, 0x06d6, 0x0740, 0x07b1, 0x0828, 0x08a5, 0x092a,
    0x09b6, 0x0a49, 0x0ae5, 0x0b88, 0x0c34, 0x0cea, 0x0da8, 0x0e70,
    0x0f42, 0x101e, 0x1105, 0x11f7, 0x12f3, 0x13fb, 0x150f, 0x162e,
    0x175a, 0x1891, 0x19d5, 0x1b25, 0x1c81, 0x1dea, 0x1f5f, 0x20e0,
    0x226d, 0x2405, 0x25a9, 0x2758, 0x2911, 0x2ad4, 0x2ca0, 0x2e76,
    0x3053, 0x3238, 0x3424, 0x3615, 0x380b, 0x3a04, 0x3c01, 0x3e00,
 };
 const q15_t sigmoidLTable_q15[128] = {
    0x4000, 0x4100, 0x4200, 0x42ff, 0x43ff, 0x44fd, 0x45fc, 0x46f9,
    0x47f5, 0x48f1, 0x49eb, 0x4ae5, 0x4bdc, 0x4cd3, 0x4dc8, 0x4ebb,
    0x4fad, 0x509c, 0x518a, 0x5276, 0x5360, 0x5447, 0x552c, 0x560f,
    0x56ef, 0x57cd, 0x58a8, 0x5981, 0x5a57, 0x5b2a, 0x5bfb, 0x5cc9,
    0x5d93, 0x5e5b, 0x5f20, 0x5fe2, 0x60a1, 0x615d, 0x6216, 0x62cc,
    0x637f, 0x642e, 0x64db, 0x6584, 0x662b, 0x66ce, 0x676f, 0x680c,
    0x68a6, 0x693d, 0x69d2, 0x6a63, 0x6af1, 0x6b7c, 0x6c05, 0x6c8a,
    0x6d0d, 0x6d8d, 0x6e09, 0x6e84, 0x6efb, 0x6f70, 0x6fe2, 0x7051,
    0x0f42, 0x0faf, 0x101e, 0x1090, 0x1105, 0x117c, 0x11f7, 0x1273,
    0x12f3, 0x1376, 0x13fb, 0x1484, 0x150f, 0x159d, 0x162e, 0x16c3,
    0x175a, 0x17f4, 0x1891, 0x1932, 0x19d5, 0x1a7c, 0x1b25, 0x1bd2,
    0x1c81, 0x1d34, 0x1dea, 0x1ea3, 0x1f5f, 0x201e, 0x20e0, 0x21a5,
    0x226d, 0x2337, 0x2405, 0x24d6, 0x25a9, 0x267f, 0x2758, 0x2833,
    0x2911, 0x29f1, 0x2ad4, 0x2bb9, 0x2ca0, 0x2d8a, 0x2e76, 0x2f64,
    0x3053, 0x3145, 0x3238, 0x332d, 0x3424, 0x351b, 0x3615, 0x370f,
    0x380b, 0x3907, 0x3a04, 0x3b03, 0x3c01, 0x3d01, 0x3e00, 0x3f00,
 };
 const q15_t sigmoidHTable_q15[192] = {
    0x70be, 0x7190, 0x7258, 0x7316, 0x73cc, 0x7478, 0x751b, 0x75b7,
    0x764a, 0x76d6, 0x775b, 0x77d8, 0x784f, 0x78c0, 0x792a, 0x798f,
    0x79ee, 0x7a48, 0x7a9d, 0x7aed, 0x7b39, 0x7b80, 0x7bc4, 0x7c03,
    0x7c3f, 0x7c78, 0x7cad, 0x7ce0, 0x7d0f, 0x7d3c, 0x7d66, 0x7d8d,
    0x7db3, 0x7dd6, 0x7df7, 0x7e16, 0x7e33, 0x7e4f, 0x7e69, 0x7e81,
    0x7e98, 0x7eae, 0x7ec2, 0x7ed5, 0x7ee7, 0x7ef8, 0x7f08, 0x7f17,
    0x7f25, 0x7f32, 0x7f3e, 0x7f4a, 0x7f55, 0x7f5f, 0x7f69, 0x7f72,
    0x7f7b, 0x7f83, 0x7f8a, 0x7f91, 0x7f98, 0x7f9e, 0x7fa4, 0x7faa,
    0x7faf, 0x7fb4, 0x7fb8, 0x7fbd, 0x7fc1, 0x7fc5, 0x7fc8, 0x7fcc,
    0x7fcf, 0x7fd2, 0x7fd5, 0x7fd7, 0x7fda, 0x7fdc, 0x7fde, 0x7fe0,
    0x7fe2, 0x7fe4, 0x7fe6, 0x7fe7, 0x7fe9, 0x7fea, 0x7feb, 0x7fed,
    0x7fee, 0x7fef, 0x7ff0, 0x7ff1, 0x7ff2, 0x7ff3, 0x7ff4, 0x7ff4,
    0x000b, 0x000c, 0x000c, 0x000d, 0x000e, 0x000f, 0x0010, 0x0011,
    0x0012, 0x0013, 0x0015, 0x0016, 0x0017, 0x0019, 0x001a, 0x001c,
    0x001e, 0x0020, 0x0022, 0x0024, 0x0026, 0x0029, 0x002b, 0x002e,
    0x0031, 0x0034, 0x0038, 0x003b, 0x003f, 0x0043, 0x0048, 0x004c,
    0x0051, 0x0056, 0x005c, 0x0062, 0x0068, 0x006f, 0x0076, 0x007d,
    0x0085, 0x008e, 0x0097, 0x00a1, 0x00ab, 0x00b6, 0x00c2, 0x00ce,
    0x00db, 0x00e9, 0x00f8, 0x0108, 0x0119, 0x012b, 0x013e, 0x0152,
    0x0168, 0x017f, 0x0197, 0x01b1, 0x01cd, 0x01ea, 0x0209, 0x022a,
    0x024d, 0x0273, 0x029a, 0x02c4, 0x02f1, 0x0320, 0x0353, 0x0388,
    0x03c1, 0x03fd, 0x043c, 0x0480, 0x04c7, 0x0513, 0x0563, 0x05b8,
    0x0612, 0x0671, 0x06d6, 0x0740, 0x07b1, 0x0828, 0x08a5, 0x092a,
    0x09b6, 0x0a49, 0x0ae5, 0x0b88, 0x0c34, 0x0cea, 0x0da8, 0x0e70,
 };
 const q7_t tanhTable_q7[256] = {
    0x00, 0x08, 0x10, 0x18, 0x1f, 0x27, 0x2e, 0x35,
    0x3b, 0x41, 0x47, 0x4c, 0x51, 0x56, 0x5a, 0x5e,
    0x61, 0x65, 0x68, 0x6a, 0x6d, 0x6f, 0x71, 0x72,
    0x74, 0x75, 0x76, 0x78, 0x78, 0x79, 0x7a, 0x7b,
    0x7b, 0x7c, 0x7c, 0x7d, 0x7d, 0x7e, 0x7e, 0x7e,
    0x7e, 0x7e, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x81,
    0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x82,
    0x82, 0x82, 0x82, 0x82, 0x83, 0x83, 0x84, 0x84,
    0x85, 0x85, 0x86, 0x87, 0x88, 0x88, 0x8a, 0x8b,
    0x8c, 0x8e, 0x8f, 0x91, 0x93, 0x96, 0x98, 0x9b,
    0x9f, 0xa2, 0xa6, 0xaa, 0xaf, 0xb4, 0xb9, 0xbf,
    0xc5, 0xcb, 0xd2, 0xd9, 0xe1, 0xe8, 0xf0, 0xf8,
 };
 const q15_t tanhTable_q15[256] = {
    0x0000, 0x07fd, 0x0feb, 0x17b9, 0x1f59, 0x26bf, 0x2ddf, 0x34ae,
    0x3b27, 0x4142, 0x46fd, 0x4c56, 0x514d, 0x55e2, 0x5a1a, 0x5df6,
    0x617c, 0x64b0, 0x6797, 0x6a37, 0x6c95, 0x6eb5, 0x709e, 0x7254,
    0x73dc, 0x753a, 0x7672, 0x7788, 0x787f, 0x795b, 0x7a1e, 0x7acb,
    0x7b65, 0x7bee, 0x7c66, 0x7cd1, 0x7d30, 0x7d84, 0x7dce, 0x7e0f,
    0x7e49, 0x7e7d, 0x7eaa, 0x7ed2, 0x7ef5, 0x7f14, 0x7f30, 0x7f48,
    0x7f5e, 0x7f71, 0x7f82, 0x7f91, 0x7f9e, 0x7fa9, 0x7fb3, 0x7fbc,
    0x7fc4, 0x7fcb, 0x7fd1, 0x7fd7, 0x7fdc, 0x7fe0, 0x7fe4, 0x7fe7,
    0x7fea, 0x7fed, 0x7fef, 0x7ff1, 0x7ff3, 0x7ff4, 0x7ff6, 0x7ff7,
    0x7ff8, 0x7ff9, 0x7ffa, 0x7ffa, 0x7ffb, 0x7ffc, 0x7ffc, 0x7ffd,
    0x7ffd, 0x7ffd, 0x7ffe, 0x7ffe, 0x7ffe, 0x7ffe, 0x7fff, 0x7fff,
    0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff,
    0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff,
    0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff,
    0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff,
    0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff,
    0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
    0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
    0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
    0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
    0x8000, 0x8000, 0x8001, 0x8001, 0x8001, 0x8001, 0x8001, 0x8001,
    0x8001, 0x8001, 0x8001, 0x8002, 0x8002, 0x8002, 0x8002, 0x8003,
    0x8003, 0x8003, 0x8004, 0x8004, 0x8005, 0x8006, 0x8006, 0x8007,
    0x8008, 0x8009, 0x800a, 0x800c, 0x800d, 0x800f, 0x8011, 0x8013,
    0x8016, 0x8019, 0x801c, 0x8020, 0x8024, 0x8029, 0x802f, 0x8035,
    0x803c, 0x8044, 0x804d, 0x8057, 0x8062, 0x806f, 0x807e, 0x808f,
    0x80a2, 0x80b8, 0x80d0, 0x80ec, 0x810b, 0x812e, 0x8156, 0x8183,
    0x81b7, 0x81f1, 0x8232, 0x827c, 0x82d0, 0x832f, 0x839a, 0x8412,
    0x849b, 0x8535, 0x85e2, 0x86a5, 0x8781, 0x8878, 0x898e, 0x8ac6,
    0x8c24, 0x8dac, 0x8f62, 0x914b, 0x936b, 0x95c9, 0x9869, 0x9b50,
    0x9e84, 0xa20a, 0xa5e6, 0xaa1e, 0xaeb3, 0xb3aa, 0xb903, 0xbebe,
    0xc4d9, 0xcb52, 0xd221, 0xd941, 0xe0a7, 0xe847, 0xf015, 0xf803,
 };
 const q15_t tanhLTable_q15[128] = {
    0x0000, 0x0400, 0x07fd, 0x0bf7, 0x0feb, 0x13d7, 0x17b9, 0x1b90,
    0x1f59, 0x2314, 0x26bf, 0x2a58, 0x2ddf, 0x3151, 0x34ae, 0x37f6,
    0x3b27, 0x3e40, 0x4142, 0x442c, 0x46fd, 0x49b6, 0x4c56, 0x4edd,
    0x514d, 0x53a3, 0x55e2, 0x580a, 0x5a1a, 0x5c13, 0x5df6, 0x5fc4,
    0x617c, 0x6320, 0x64b0, 0x662d, 0x6797, 0x68f0, 0x6a37, 0x6b6e,
    0x6c95, 0x6dac, 0x6eb5, 0x6fb0, 0x709e, 0x717f, 0x7254, 0x731e,
    0x73dc, 0x7490, 0x753a, 0x75da, 0x7672, 0x7701, 0x7788, 0x7807,
    0x787f, 0x78f0, 0x795b, 0x79bf, 0x7a1e, 0x7a77, 0x7acb, 0x7b1b,
    0x849b, 0x84e5, 0x8535, 0x8589, 0x85e2, 0x8641, 0x86a5, 0x8710,
    0x8781, 0x87f9, 0x8878, 0x88ff, 0x898e, 0x8a26, 0x8ac6, 0x8b70,
    0x8c24, 0x8ce2, 0x8dac, 0x8e81, 0x8f62, 0x9050, 0x914b, 0x9254,
    0x936b, 0x9492, 0x95c9, 0x9710, 0x9869, 0x99d3, 0x9b50, 0x9ce0,
    0x9e84, 0xa03c, 0xa20a, 0xa3ed, 0xa5e6, 0xa7f6, 0xaa1e, 0xac5d,
    0xaeb3, 0xb123, 0xb3aa, 0xb64a, 0xb903, 0xbbd4, 0xbebe, 0xc1c0,
    0xc4d9, 0xc80a, 0xcb52, 0xceaf, 0xd221, 0xd5a8, 0xd941, 0xdcec,
    0xe0a7, 0xe470, 0xe847, 0xec29, 0xf015, 0xf409, 0xf803, 0xfc00,
 };
 const q15_t tanhHTable_q15[192] = {
    0x7b65, 0x7bee, 0x7c66, 0x7cd1, 0x7d30, 0x7d84, 0x7dce, 0x7e0f,
    0x7e49, 0x7e7d, 0x7eaa, 0x7ed2, 0x7ef5, 0x7f14, 0x7f30, 0x7f48,
    0x7f5e, 0x7f71, 0x7f82, 0x7f91, 0x7f9e, 0x7fa9, 0x7fb3, 0x7fbc,
    0x7fc4, 0x7fcb, 0x7fd1, 0x7fd7, 0x7fdc, 0x7fe0, 0x7fe4, 0x7fe7,
    0x7fea, 0x7fed, 0x7fef, 0x7ff1, 0x7ff3, 0x7ff4, 0x7ff6, 0x7ff7,
    0x7ff8, 0x7ff9, 0x7ffa, 0x7ffa, 0x7ffb, 0x7ffc, 0x7ffc, 0x7ffd,
    0x7ffd, 0x7ffd, 0x7ffe, 0x7ffe, 0x7ffe, 0x7ffe, 0x7fff, 0x7fff,
    0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff,
    0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff,
    0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff,
    0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff,
    0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff,
    0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
    0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
    0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
    0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
    0x8000, 0x8000, 0x8001, 0x8001, 0x8001, 0x8001, 0x8001, 0x8001,
    0x8001, 0x8001, 0x8001, 0x8002, 0x8002, 0x8002, 0x8002, 0x8003,
    0x8003, 0x8003, 0x8004, 0x8004, 0x8005, 0x8006, 0x8006, 0x8007,
    0x8008, 0x8009, 0x800a, 0x800c, 0x800d, 0x800f, 0x8011, 0x8013,
    0x8016, 0x8019, 0x801c, 0x8020, 0x8024, 0x8029, 0x802f, 0x8035,
    0x803c, 0x8044, 0x804d, 0x8057, 0x8062, 0x806f, 0x807e, 0x808f,
    0x80a2, 0x80b8, 0x80d0, 0x80ec, 0x810b, 0x812e, 0x8156, 0x8183,
    0x81b7, 0x81f1, 0x8232, 0x827c, 0x82d0, 0x832f, 0x839a, 0x8412,
 };
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_no_shift.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_no_shift.c
@@ -1,134 +0,0 @@
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /* ----------------------------------------------------------------------
 * Project:      CMSIS NN Library
 * Title:        arm_q7_to_q15_no_shift.c
 * Description:  Converts the elements of the Q7 vector to Q15 vector without left-shift
 *
 * $Date:        17. January 2018
 * $Revision:    V.1.0.0
 *
 * Target Processor:  Cortex-M cores
 *
 * -------------------------------------------------------------------- */
 #include "arm_nnsupportfunctions.h"
 /**    
 * @ingroup groupSupport    
 */
 /**    
 * @addtogroup nndata_convert    
 * @{    
 */
 /**    
 * @brief Converts the elements of the Q7 vector to Q15 vector without left-shift 
 * @param[in]       *pSrc points to the Q7 input vector    
 * @param[out]      *pDst points to the Q15 output vector   
 * @param[in]       blockSize length of the input vector    
 * @return none.    
 *    
 * \par Description:    
 *    
 * The equation used for the conversion process is:    
 *   
 * <pre>    
 * 	pDst[n] = (q15_t) pSrc[n];   0 <= n < blockSize.    
 * </pre>    
 *   
 */
 void arm_q7_to_q15_no_shift(const q7_t * pSrc, q15_t * pDst, uint32_t blockSize)
 {
    const q7_t *pIn = pSrc;     /* Src pointer */
    uint32_t  blkCnt;           /* loop counter */
 #ifndef ARM_MATH_CM0_FAMILY
    q31_t     in;
    q31_t     in1, in2;
    q31_t     out1, out2;
    /* Run the below code for Cortex-M4 and Cortex-M3 */
    /*loop Unrolling */
    blkCnt = blockSize >> 2u;
    /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
     ** a second loop below computes the remaining 1 to 3 samples. */
    while (blkCnt > 0u)
    {
        /* C = (q15_t) A << 8 */
        /* convert from q7 to q15 and then store the results in the destination buffer */
        in = *__SIMD32(pIn)++;
        /* rotatate in by 8 and extend two q7_t values to q15_t values */
        in1 = __SXTB16(__ROR(in, 8));
        /* extend remainig two q7_t values to q15_t values */
        in2 = __SXTB16(in);
 #ifndef ARM_MATH_BIG_ENDIAN
        out2 = __PKHTB(in1, in2, 16);
        out1 = __PKHBT(in2, in1, 16);
 #else
        out1 = __PKHTB(in1, in2, 16);
        out2 = __PKHBT(in2, in1, 16);
 #endif
        *__SIMD32(pDst)++ = out1;
        *__SIMD32(pDst)++ = out2;
        /* Decrement the loop counter */
        blkCnt--;
    }
    /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
     ** No loop unrolling is used. */
    blkCnt = blockSize % 0x4u;
 #else
    /* Run the below code for Cortex-M0 */
    /* Loop over blockSize number of values */
    blkCnt = blockSize;
 #endif                          /* #ifndef ARM_MATH_CM0_FAMILY */
    while (blkCnt > 0u)
    {
        /* C = (q15_t) A << 8 */
        /* convert from q7 to q15 and then store the results in the destination buffer */
        *pDst++ = (q15_t) * pIn++;
        /* Decrement the loop counter */
        blkCnt--;
    }
 }
 /**    
 * @} end of nndata_convert group   
 */
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_no_shift.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_no_shift.c
@@ -1,145 +0,0 @@
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /* ----------------------------------------------------------------------
 * Project:      CMSIS NN Library
 * Title:        arm_q7_to_q15_reordered_no_shift.c
 * Description:  Converts the elements of the Q7 vector to reordered Q15 vector without left-shift
 *
 * $Date:        17. January 2018
 * $Revision:    V.1.0.0
 *
 * Target Processor:  Cortex-M cores
 *
 * -------------------------------------------------------------------- */
 #include "arm_nnsupportfunctions.h"
 /**    
 * @ingroup groupSupport    
 */
 /**    
 * @addtogroup nndata_convert    
 * @{    
 */
 /**    
 * @brief Converts the elements of the Q7 vector to reordered Q15 vector without left-shift
 * @param[in]       *pSrc points to the Q7 input vector    
 * @param[out]      *pDst points to the Q15 output vector   
 * @param[in]       blockSize length of the input vector    
 * @return none.    
 *    
 * @details
 *
 * This function does the q7 to q15 expansion with re-ordering 
 *
 * <pre>
 *                          |   A1   |   A2   |   A3   |   A4   |
 *
 *                           0      7 8     15 16    23 24    31
 * </pre>
 *
 * is converted into:
 *
 * <pre>
 *  |       A1       |       A3       |   and  |       A2       |       A4       |
 *
 *   0             15 16            31          0             15 16            31
 * </pre>
 *
 *
 * This looks strange but is natural considering how sign-extension is done at
 * assembly level. 
 *
 * The expansion of other other oprand will follow the same rule so that the end 
 * results are the same.
 *
 * The tail (i.e., last (N % 4) elements) will still be in original order.
 *   
 */
 void arm_q7_to_q15_reordered_no_shift(const q7_t * pSrc, q15_t * pDst, uint32_t blockSize)
 {
    const q7_t *pIn = pSrc;     /* Src pointer */
    uint32_t  blkCnt;           /* loop counter */
 #ifndef ARM_MATH_CM0_FAMILY
    q31_t     in;
    q31_t     in1, in2;
    /* Run the below code for Cortex-M4 and Cortex-M3 */
    /*loop Unrolling */
    blkCnt = blockSize >> 2u;
    /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
     ** a second loop below computes the remaining 1 to 3 samples. */
    while (blkCnt > 0u)
    {
        /* C = (q15_t) A << 8 */
        /* convert from q7 to q15 and then store the results in the destination buffer */
        in = *__SIMD32(pIn)++;
        /* rotatate in by 8 and extend two q7_t values to q15_t values */
        in1 = __SXTB16(__ROR(in, 8));
        /* extend remainig two q7_t values to q15_t values */
        in2 = __SXTB16(in);
 #ifndef ARM_MATH_BIG_ENDIAN
        *__SIMD32(pDst)++ = in2;
        *__SIMD32(pDst)++ = in1;
 #else
        *__SIMD32(pDst)++ = in1;
        *__SIMD32(pDst)++ = in2;
 #endif
        /* Decrement the loop counter */
        blkCnt--;
    }
    /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
     ** No loop unrolling is used. */
    blkCnt = blockSize % 0x4u;
 #else
    /* Run the below code for Cortex-M0 */
    /* Loop over blockSize number of values */
    blkCnt = blockSize;
 #endif                          /* #ifndef ARM_MATH_CM0_FAMILY */
    while (blkCnt > 0u)
    {
        /* C = (q15_t) A << 8 */
        /* convert from q7 to q15 and then store the results in the destination buffer */
        *pDst++ = (q15_t) * pIn++;
        /* Decrement the loop counter */
        blkCnt--;
    }
 }
 /**    
 * @} end of q7_to_x group    
 */
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/PoolingFunctions/arm_pool_q7_HWC.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/PoolingFunctions/arm_pool_q7_HWC.c
@@ -1,448 +0,0 @@
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /* ----------------------------------------------------------------------
 * Project:      CMSIS NN Library
 * Title:        arm_pool_q7_HWC.c
 * Description:  Pooling function implementations
 *
 * $Date:        17. January 2018
 * $Revision:    V.1.0.0
 *
 * Target Processor:  Cortex-M cores
 *
 * -------------------------------------------------------------------- */
 #include "arm_math.h"
 #include "arm_nnfunctions.h"
 #if defined (ARM_MATH_DSP)
 /**
 * @brief A few utility functions used by pooling functions
 *
 * 
 */
 static void buffer_scale_back_q15_to_q7(q15_t * buffer, q7_t * target, uint16_t length, uint16_t scale)
 {
    int       i;
    for (i = 0; i < length; i++)
    {
        target[i] = (q7_t) (buffer[i] / scale);
    }
 }
 static void compare_and_replace_if_larger_q7(q7_t * base,   // base data
                                             q7_t * target, // compare target
                                             const uint16_t length  // data length
    )
 {
    q7_t     *pIn = base;
    q7_t     *pCom = target;
    union arm_nnword in;
    union arm_nnword com;
    uint16_t  cnt = length >> 2;
    while (cnt > 0u)
    {
        in.word = *__SIMD32(pIn);
        com.word = *__SIMD32(pCom)++;
        // if version
        if (com.bytes[0] > in.bytes[0])
            in.bytes[0] = com.bytes[0];
        if (com.bytes[1] > in.bytes[1])
            in.bytes[1] = com.bytes[1];
        if (com.bytes[2] > in.bytes[2])
            in.bytes[2] = com.bytes[2];
        if (com.bytes[3] > in.bytes[3])
            in.bytes[3] = com.bytes[3];
        *__SIMD32(pIn)++ = in.word;
        cnt--;
    }
 }
 static void accumulate_q7_to_q15(q15_t * base, q7_t * target, const uint16_t length)
 {
    q15_t    *pCnt = base;
    q7_t     *pV = target;
    q31_t     v1, v2, vo1, vo2;
    uint16_t  cnt = length >> 2;
    q31_t     in;
    while (cnt > 0u)
    {
        q31_t     value = *__SIMD32(pV)++;
        v1 = __SXTB16(__ROR(value, 8));
        v2 = __SXTB16(value);
 #ifndef ARM_MATH_BIG_ENDIAN
        vo2 = __PKHTB(v1, v2, 16);
        vo1 = __PKHBT(v2, v1, 16);
 #else
        vo1 = __PKHTB(v1, v2, 16);
        vo2 = __PKHBT(v2, v1, 16);
 #endif
        in = *__SIMD32(pCnt);
        *__SIMD32(pCnt)++ = __QADD16(vo1, in);
        in = *__SIMD32(pCnt);
        *__SIMD32(pCnt)++ = __QADD16(vo2, in);
        cnt--;
    }
    cnt = length & 0x3;
    while (cnt > 0u)
    {
        *pCnt++ += *pV++;
        cnt--;
    }
 }
 #endif                          // ARM_MATH_DSP
 /**
 *  @ingroup groupNN
 */
 /**
 * @addtogroup Pooling
 * @{
 */
  /**
   * @brief Q7 max pooling function
   * @param[in, out]  Im_in       pointer to input tensor
   * @param[in]       dim_im_in   input tensor dimention
   * @param[in]       ch_im_in    number of input tensor channels
   * @param[in]       dim_kernel  filter kernel size
   * @param[in]       padding     padding sizes
   * @param[in]       stride      convolution stride
   * @param[in]       dim_im_out  output tensor dimension
   * @param[in,out]   bufferA     pointer to buffer space for input
   * @param[in,out]   Im_out      pointer to output tensor
   * @return none.
   *
   * @details
   *
   * <b>Buffer size:</b>
   *
   * bufferA size:  0
   *
   * The pooling function is implemented as split x-pooling then
   * y-pooling.
   *
   * This pooling function is input-destructive. Input data is undefined
   * after calling this function.
   *
   */
 void
 arm_maxpool_q7_HWC(q7_t * Im_in,
                   const uint16_t dim_im_in,
                   const uint16_t ch_im_in,
                   const uint16_t dim_kernel,
                   const uint16_t padding,
                   const uint16_t stride, const uint16_t dim_im_out, q7_t * bufferA, q7_t * Im_out)
 {
 #if defined (ARM_MATH_DSP)
    /* Run the following code for Cortex-M4 and Cortex-M7 */
    int16_t   i_x, i_y;
    /* first does the pooling along x axis */
    for (i_y = 0; i_y < dim_im_in; i_y++)
    {
        for (i_x = 0; i_x < dim_im_out; i_x++)
        {
            /* for each output pixel */
            q7_t     *target = Im_in + (i_y * dim_im_in + i_x) * ch_im_in;
            q7_t     *win_start;
            q7_t     *win_stop;
            if (i_x * stride - padding < 0)
            {
                win_start = target;
            } else
            {
                win_start = Im_in + (i_y * dim_im_in + i_x * stride - padding) * ch_im_in;
            }
            if (i_x * stride - padding + dim_kernel >= dim_im_in)
            {
                win_stop = Im_in + (i_y * dim_im_in + dim_im_in) * ch_im_in;
            } else
            {
                win_stop = Im_in + (i_y * dim_im_in + i_x * stride - padding + dim_kernel) * ch_im_in;
            }
            /* first step is to copy over initial data */
            /* arm_copy_q7(win_start, target, ch_im_in); */
            memmove(target, win_start, ch_im_in);
            /* start the max operation from the second part */
            win_start += ch_im_in;
            for (; win_start < win_stop; win_start += ch_im_in)
            {
                compare_and_replace_if_larger_q7(target, win_start, ch_im_in);
            }
        }
    }
    /* then does the pooling along y axis */
    for (i_y = 0; i_y < dim_im_out; i_y++)
    {
        /* for each output row */
        q7_t     *target = Im_out + i_y * dim_im_out * ch_im_in;
        q7_t     *row_start;
        q7_t     *row_end;
        /* setting the starting row */
        if (i_y * stride - padding < 0)
        {
            row_start = Im_in;
        } else
        {
            row_start = Im_in + (i_y * stride - padding) * dim_im_in * ch_im_in;
        }
        /* setting the stopping row */
        if (i_y * stride - padding + dim_kernel >= dim_im_in)
        {
            row_end = Im_in + dim_im_in * dim_im_in * ch_im_in;
        } else
        {
            row_end = Im_in + (i_y * stride - padding + dim_kernel) * dim_im_in * ch_im_in;
        }
        /* copy over the first row */
        /* arm_copy_q7(row_start, target, dim_im_out * ch_im_in); */
        memmove(target, row_start, dim_im_out * ch_im_in);
        /* move over to next row */
        row_start += ch_im_in * dim_im_in;
        for (; row_start < row_end; row_start += dim_im_in * ch_im_in)
        {
            compare_and_replace_if_larger_q7(target, row_start, dim_im_out * ch_im_in);
        }
    }
 #else
    /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */
    int16_t   i_ch_in, i_x, i_y;
    int16_t   k_x, k_y;
    for (i_ch_in = 0; i_ch_in < ch_im_in; i_ch_in++)
    {
        for (i_y = 0; i_y < dim_im_out; i_y++)
        {
            for (i_x = 0; i_x < dim_im_out; i_x++)
            {
                int       max = -129;
                for (k_y = i_y * stride - padding; k_y < i_y * stride - padding + dim_kernel; k_y++)
                {
                    for (k_x = i_x * stride - padding; k_x < i_x * stride - padding + dim_kernel; k_x++)
                    {
                        if (k_y >= 0 && k_x >= 0 && k_y < dim_im_in && k_x < dim_im_in)
                        {
                            if (Im_in[i_ch_in + ch_im_in * (k_x + k_y * dim_im_in)] > max)
                            {
                                max = Im_in[i_ch_in + ch_im_in * (k_x + k_y * dim_im_in)];
                            }
                        }
                    }
                }
                Im_out[i_ch_in + ch_im_in * (i_x + i_y * dim_im_out)] = max;
            }
        }
    }
 #endif                          /* ARM_MATH_DSP */
 }
  /**
   * @brief Q7 average pooling function
   * @param[in,out]   Im_in       pointer to input tensor
   * @param[in]       dim_im_in   input tensor dimention
   * @param[in]       ch_im_in    number of input tensor channels
   * @param[in]       dim_kernel  filter kernel size
   * @param[in]       padding     padding sizes
   * @param[in]       stride      convolution stride
   * @param[in]       dim_im_out  output tensor dimension
   * @param[in,out]   bufferA     pointer to buffer space for input
   * @param[in,out]   Im_out      pointer to output tensor
   * @return none.
   *
   * @details
   *
   * <b>Buffer size:</b>
   *
   * bufferA size:  2*dim_im_out*ch_im_in
   *
   * The pooling function is implemented as split x-pooling then
   * y-pooling.
   *
   * This pooling function is input-destructive. Input data is undefined
   * after calling this function.
   *
   */
 void
 arm_avepool_q7_HWC(q7_t * Im_in,
                   const uint16_t dim_im_in,
                   const uint16_t ch_im_in,
                   const uint16_t dim_kernel,
                   const uint16_t padding,
                   const uint16_t stride, const uint16_t dim_im_out, q7_t * bufferA, q7_t * Im_out)
 {
 #if defined (ARM_MATH_DSP)
    /* Run the following code for Cortex-M4 and Cortex-M7 */
    q15_t    *buffer = (q15_t *) bufferA;
    int16_t   i_x, i_y;
    int16_t   count = 0;
    /* first does the pooling along x axis */
    for (i_y = 0; i_y < dim_im_in; i_y++)
    {
        for (i_x = 0; i_x < dim_im_out; i_x++)
        {
            /* for each output pixel */
            q7_t     *target = Im_in + (i_y * dim_im_in + i_x) * ch_im_in;
            q7_t     *win_start;
            q7_t     *win_stop;
            if (i_x * stride - padding < 0)
            {
                win_start = target;
            } else
            {
                win_start = Im_in + (i_y * dim_im_in + i_x * stride - padding) * ch_im_in;
            }
            if (i_x * stride - padding + dim_kernel >= dim_im_in)
            {
                win_stop = Im_in + (i_y * dim_im_in + dim_im_in) * ch_im_in;
            } else
            {
                win_stop = Im_in + (i_y * dim_im_in + i_x * stride - padding + dim_kernel) * ch_im_in;
            }
            /* first step is to copy over initial data */
            arm_q7_to_q15_no_shift(win_start, buffer, ch_im_in);
            count = 1;
            /* start the max operation from the second part */
            win_start += ch_im_in;
            for (; win_start < win_stop; win_start += ch_im_in)
            {
                accumulate_q7_to_q15(buffer, win_start, ch_im_in);
                count++;
            }
            buffer_scale_back_q15_to_q7(buffer, target, ch_im_in, count);
        }
    }
    /* then does the pooling along y axis */
    for (i_y = 0; i_y < dim_im_out; i_y++)
    {
        /* for each output row */
        q7_t     *target = Im_out + i_y * dim_im_out * ch_im_in;
        q7_t     *row_start;
        q7_t     *row_end;
        /* setting the starting row */
        if (i_y * stride - padding < 0)
        {
            row_start = Im_in;
        } else
        {
            row_start = Im_in + (i_y * stride - padding) * dim_im_in * ch_im_in;
        }
        /* setting the stopping row */
        if (i_y * stride - padding + dim_kernel >= dim_im_in)
        {
            row_end = Im_in + dim_im_in * dim_im_in * ch_im_in;
        } else
        {
            row_end = Im_in + (i_y * stride - padding + dim_kernel) * dim_im_in * ch_im_in;
        }
        /* copy over the first row */
        arm_q7_to_q15_no_shift(row_start, buffer, dim_im_out * ch_im_in);
        count = 1;
        /* move over to next row */
        row_start += ch_im_in * dim_im_in;
        for (; row_start < row_end; row_start += dim_im_in * ch_im_in)
        {
            accumulate_q7_to_q15(buffer, row_start, dim_im_out * ch_im_in);
            count++;
        }
        buffer_scale_back_q15_to_q7(buffer, target, dim_im_out * ch_im_in, count);
    }
 #else
    /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */
    int16_t   i_ch_in, i_x, i_y;
    int16_t   k_x, k_y;
    for (i_ch_in = 0; i_ch_in < ch_im_in; i_ch_in++)
    {
        for (i_y = 0; i_y < dim_im_out; i_y++)
        {
            for (i_x = 0; i_x < dim_im_out; i_x++)
            {
                int       sum = 0;
                int       count = 0;
                for (k_y = i_y * stride - padding; k_y < i_y * stride - padding + dim_kernel; k_y++)
                {
                    for (k_x = i_x * stride - padding; k_x < i_x * stride - padding + dim_kernel; k_x++)
                    {
                        if (k_y >= 0 && k_x >= 0 && k_y < dim_im_in && k_x < dim_im_in)
                        {
                            sum += Im_in[i_ch_in + ch_im_in * (k_x + k_y * dim_im_in)];
                            count++;
                        }
                    }
                }
                Im_out[i_ch_in + ch_im_in * (i_x + i_y * dim_im_out)] = sum / count;
            }
        }
    }
 #endif                          /* ARM_MATH_DSP */
 }
 /**
 * @} end of Pooling group
 */
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_q15.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_q15.c
@@ -1,120 +0,0 @@
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /* ----------------------------------------------------------------------
 * Project:      CMSIS NN Library
 * Title:        arm_softmax_q15.c
 * Description:  Q15 softmax function
 *
 * $Date:        20. February 2018
 * $Revision:    V.1.0.0
 *
 * Target Processor:  Cortex-M cores
 *
 * -------------------------------------------------------------------- */
 #include "arm_math.h"
 #include "arm_nnfunctions.h"
 /**
 *  @ingroup groupNN
 */
 /**
 * @addtogroup Softmax
 * @{
 */
  /**
   * @brief Q15 softmax function
   * @param[in]       vec_in      pointer to input vector
   * @param[in]       dim_vec     input vector dimention
   * @param[out]      p_out       pointer to output vector
   * @return none.
   *
   * @details
   *
   *  Here, instead of typical e based softmax, we use
   *  2-based softmax, i.e.,:
   *
   *  y_i = 2^(x_i) / sum(2^x_j)
   *
   *  The relative output will be different here.
   *  But mathematically, the gradient will be the same
   *  with a log(2) scaling factor.
   *
   */
 void arm_softmax_q15(const q15_t * vec_in, const uint16_t dim_vec, q15_t * p_out)
 {
    q31_t     sum;
    int16_t   i;
    uint8_t   shift;
    q31_t     base;
    base = -1 * 0x100000;
    for (i = 0; i < dim_vec; i++)
    {
        if (vec_in[i] > base)
        {
            base = vec_in[i];
        }
    }
    /* we ignore really small values  
     * anyway, they will be 0 after shrinking
     * to q15_t
     */
    base = base - 16;
    sum = 0;
    for (i = 0; i < dim_vec; i++)
    {
        if (vec_in[i] > base)
        {
            shift = (uint8_t)__USAT(vec_in[i] - base, 5);
            sum += 0x1 << shift;
        }
    }
    /* This is effectively (0x1 << 32) / sum */
    int64_t div_base = 0x100000000LL;
    int output_base = (int32_t)(div_base / sum);
    /* Final confidence will be output_base >> ( 17 - (vec_in[i] - base) )
     * so 32768 (0x1<<15) -> 100% confidence when sum = 0x1 << 16, output_base = 0x1 << 16
     * and vec_in[i]-base = 16
     */
    for (i = 0; i < dim_vec; i++)
    {
        if (vec_in[i] > base) 
        {
            /* Here minimum value of 17+base-vec[i] will be 1 */
            shift = (uint8_t)__USAT(17+base-vec_in[i], 5);
            p_out[i] = (q15_t) __SSAT((output_base >> shift), 16);
        } else
        {
            p_out[i] = 0;
        }
    }
 }
 /**
 * @} end of Softmax group
 */
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_q7.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_q7.c
@@ -1,121 +0,0 @@
 /*
 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /* ----------------------------------------------------------------------
 * Project:      CMSIS NN Library
 * Title:        arm_softmax_q7.c
 * Description:  Q7 softmax function
 *
 * $Date:        20. February 2018
 * $Revision:    V.1.0.0
 *
 * Target Processor:  Cortex-M cores
 *
 * -------------------------------------------------------------------- */
 #include "arm_math.h"
 #include "arm_nnfunctions.h"
 /**
 *  @ingroup groupNN
 */
 /**
 * @addtogroup Softmax
 * @{
 */
  /**
   * @brief Q7 softmax function
   * @param[in]       vec_in      pointer to input vector
   * @param[in]       dim_vec     input vector dimention
   * @param[out]      p_out       pointer to output vector
   * @return none.
   *
   * @details
   *
   *  Here, instead of typical natural logarithm e based softmax, we use
   *  2-based softmax here, i.e.,:
   * 
   *  y_i = 2^(x_i) / sum(2^x_j)
   *
   *  The relative output will be different here.
   *  But mathematically, the gradient will be the same
   *  with a log(2) scaling factor.
   *
   */
 void arm_softmax_q7(const q7_t * vec_in, const uint16_t dim_vec, q7_t * p_out)
 {
    q31_t     sum;
    int16_t   i;
    uint8_t   shift;
    q15_t     base;
    base = -257;
    /* We first search for the maximum */
    for (i = 0; i < dim_vec; i++)
    {
        if (vec_in[i] > base)
        {
            base = vec_in[i];
        }
    }
    /* 
     * So the base is set to max-8, meaning 
     * that we ignore really small values. 
     * anyway, they will be 0 after shrinking to q7_t.
     */
    base = base - 8;
    sum = 0;
    for (i = 0; i < dim_vec; i++)
    {
        if (vec_in[i] > base) 
        {
            shift = (uint8_t)__USAT(vec_in[i] - base, 5);
            sum += 0x1 << shift;
        }
    }
    /* This is effectively (0x1 << 20) / sum */
    int output_base = 0x100000 / sum;
    /* 
     * Final confidence will be output_base >> ( 13 - (vec_in[i] - base) )
     * so 128 (0x1<<7) -> 100% confidence when sum = 0x1 << 8, output_base = 0x1 << 12 
     * and vec_in[i]-base = 8
     */
    for (i = 0; i < dim_vec; i++) 
    {
        if (vec_in[i] > base) 
        {
            /* Here minimum value of 13+base-vec_in[i] will be 5 */
            shift = (uint8_t)__USAT(13+base-vec_in[i], 5);
            p_out[i] = (q7_t) __SSAT((output_base >> shift), 8);
        } else {
            p_out[i] = 0;
        }
    }
 }
 /**
 * @} end of Softmax group
 */
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/RTOS/Template/cmsis_os.h
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/RTOS/Template/cmsis_os.h
@@ -1,698 +0,0 @@
 /* ----------------------------------------------------------------------
 * $Date:        5. February 2013
 * $Revision:    V1.02
 *
 * Project:      CMSIS-RTOS API
 * Title:        cmsis_os.h template header file
 *
 * Version 0.02
 *    Initial Proposal Phase
 * Version 0.03
 *    osKernelStart added, optional feature: main started as thread
 *    osSemaphores have standard behavior
 *    osTimerCreate does not start the timer, added osTimerStart
 *    osThreadPass is renamed to osThreadYield
 * Version 1.01
 *    Support for C++ interface
 *     - const attribute removed from the osXxxxDef_t typedef's
 *     - const attribute added to the osXxxxDef macros
 *    Added: osTimerDelete, osMutexDelete, osSemaphoreDelete
 *    Added: osKernelInitialize
 * Version 1.02
 *    Control functions for short timeouts in microsecond resolution:
 *    Added: osKernelSysTick, osKernelSysTickFrequency, osKernelSysTickMicroSec
 *    Removed: osSignalGet 
 *----------------------------------------------------------------------------
 *
 * Copyright (c) 2013-2017 ARM LIMITED
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *---------------------------------------------------------------------------*/
 #ifndef _CMSIS_OS_H
 #define _CMSIS_OS_H
 /// \note MUST REMAIN UNCHANGED: \b osCMSIS identifies the CMSIS-RTOS API version.
 #define osCMSIS           0x10002      ///< API version (main [31:16] .sub [15:0])
 /// \note CAN BE CHANGED: \b osCMSIS_KERNEL identifies the underlying RTOS kernel and version number.
 #define osCMSIS_KERNEL    0x10000	   ///< RTOS identification and version (main [31:16] .sub [15:0])
 /// \note MUST REMAIN UNCHANGED: \b osKernelSystemId shall be consistent in every CMSIS-RTOS.
 #define osKernelSystemId "KERNEL V1.00"   ///< RTOS identification string
 /// \note MUST REMAIN UNCHANGED: \b osFeature_xxx shall be consistent in every CMSIS-RTOS.
 #define osFeature_MainThread   1       ///< main thread      1=main can be thread, 0=not available
 #define osFeature_Pool         1       ///< Memory Pools:    1=available, 0=not available
 #define osFeature_MailQ        1       ///< Mail Queues:     1=available, 0=not available
 #define osFeature_MessageQ     1       ///< Message Queues:  1=available, 0=not available
 #define osFeature_Signals      8       ///< maximum number of Signal Flags available per thread
 #define osFeature_Semaphore    30      ///< maximum count for \ref osSemaphoreCreate function
 #define osFeature_Wait         1       ///< osWait function: 1=available, 0=not available
 #define osFeature_SysTick      1       ///< osKernelSysTick functions: 1=available, 0=not available
 #include <stdint.h>
 #include <stddef.h>
 #ifdef  __cplusplus
 extern "C"
 {
 #endif
 // ==== Enumeration, structures, defines ====
 /// Priority used for thread control.
 /// \note MUST REMAIN UNCHANGED: \b osPriority shall be consistent in every CMSIS-RTOS.
 typedef enum  {
  osPriorityIdle          = -3,          ///< priority: idle (lowest)
  osPriorityLow           = -2,          ///< priority: low
  osPriorityBelowNormal   = -1,          ///< priority: below normal
  osPriorityNormal        =  0,          ///< priority: normal (default)
  osPriorityAboveNormal   = +1,          ///< priority: above normal
  osPriorityHigh          = +2,          ///< priority: high
  osPriorityRealtime      = +3,          ///< priority: realtime (highest)
  osPriorityError         =  0x84        ///< system cannot determine priority or thread has illegal priority
 } osPriority;
 /// Timeout value.
 /// \note MUST REMAIN UNCHANGED: \b osWaitForever shall be consistent in every CMSIS-RTOS.
 #define osWaitForever     0xFFFFFFFF     ///< wait forever timeout value
 /// Status code values returned by CMSIS-RTOS functions.
 /// \note MUST REMAIN UNCHANGED: \b osStatus shall be consistent in every CMSIS-RTOS.
 typedef enum  {
  osOK                    =     0,       ///< function completed; no error or event occurred.
  osEventSignal           =  0x08,       ///< function completed; signal event occurred.
  osEventMessage          =  0x10,       ///< function completed; message event occurred.
  osEventMail             =  0x20,       ///< function completed; mail event occurred.
  osEventTimeout          =  0x40,       ///< function completed; timeout occurred.
  osErrorParameter        =  0x80,       ///< parameter error: a mandatory parameter was missing or specified an incorrect object.
  osErrorResource         =  0x81,       ///< resource not available: a specified resource was not available.
  osErrorTimeoutResource  =  0xC1,       ///< resource not available within given time: a specified resource was not available within the timeout period.
  osErrorISR              =  0x82,       ///< not allowed in ISR context: the function cannot be called from interrupt service routines.
  osErrorISRRecursive     =  0x83,       ///< function called multiple times from ISR with same object.
  osErrorPriority         =  0x84,       ///< system cannot determine priority or thread has illegal priority.
  osErrorNoMemory         =  0x85,       ///< system is out of memory: it was impossible to allocate or reserve memory for the operation.
  osErrorValue            =  0x86,       ///< value of a parameter is out of range.
  osErrorOS               =  0xFF,       ///< unspecified RTOS error: run-time error but no other error message fits.
  os_status_reserved      =  0x7FFFFFFF  ///< prevent from enum down-size compiler optimization.
 } osStatus;
 /// Timer type value for the timer definition.
 /// \note MUST REMAIN UNCHANGED: \b os_timer_type shall be consistent in every CMSIS-RTOS.
 typedef enum  {
  osTimerOnce             =     0,       ///< one-shot timer
  osTimerPeriodic         =     1        ///< repeating timer
 } os_timer_type;
 /// Entry point of a thread.
 /// \note MUST REMAIN UNCHANGED: \b os_pthread shall be consistent in every CMSIS-RTOS.
 typedef void (*os_pthread) (void const *argument);
 /// Entry point of a timer call back function.
 /// \note MUST REMAIN UNCHANGED: \b os_ptimer shall be consistent in every CMSIS-RTOS.
 typedef void (*os_ptimer) (void const *argument);
 // >>> the following data type definitions may shall adapted towards a specific RTOS
 /// Thread ID identifies the thread (pointer to a thread control block).
 /// \note CAN BE CHANGED: \b os_thread_cb is implementation specific in every CMSIS-RTOS.
 typedef struct os_thread_cb *osThreadId;
 /// Timer ID identifies the timer (pointer to a timer control block).
 /// \note CAN BE CHANGED: \b os_timer_cb is implementation specific in every CMSIS-RTOS.
 typedef struct os_timer_cb *osTimerId;
 /// Mutex ID identifies the mutex (pointer to a mutex control block).
 /// \note CAN BE CHANGED: \b os_mutex_cb is implementation specific in every CMSIS-RTOS.
 typedef struct os_mutex_cb *osMutexId;
 /// Semaphore ID identifies the semaphore (pointer to a semaphore control block).
 /// \note CAN BE CHANGED: \b os_semaphore_cb is implementation specific in every CMSIS-RTOS.
 typedef struct os_semaphore_cb *osSemaphoreId;
 /// Pool ID identifies the memory pool (pointer to a memory pool control block).
 /// \note CAN BE CHANGED: \b os_pool_cb is implementation specific in every CMSIS-RTOS.
 typedef struct os_pool_cb *osPoolId;
 /// Message ID identifies the message queue (pointer to a message queue control block).
 /// \note CAN BE CHANGED: \b os_messageQ_cb is implementation specific in every CMSIS-RTOS.
 typedef struct os_messageQ_cb *osMessageQId;
 /// Mail ID identifies the mail queue (pointer to a mail queue control block).
 /// \note CAN BE CHANGED: \b os_mailQ_cb is implementation specific in every CMSIS-RTOS.
 typedef struct os_mailQ_cb *osMailQId;
 /// Thread Definition structure contains startup information of a thread.
 /// \note CAN BE CHANGED: \b os_thread_def is implementation specific in every CMSIS-RTOS.
 typedef struct os_thread_def  {
  os_pthread               pthread;    ///< start address of thread function
  osPriority             tpriority;    ///< initial thread priority
  uint32_t               instances;    ///< maximum number of instances of that thread function
  uint32_t               stacksize;    ///< stack size requirements in bytes; 0 is default stack size
 } osThreadDef_t;
 /// Timer Definition structure contains timer parameters.
 /// \note CAN BE CHANGED: \b os_timer_def is implementation specific in every CMSIS-RTOS.
 typedef struct os_timer_def  {
  os_ptimer                 ptimer;    ///< start address of a timer function
 } osTimerDef_t;
 /// Mutex Definition structure contains setup information for a mutex.
 /// \note CAN BE CHANGED: \b os_mutex_def is implementation specific in every CMSIS-RTOS.
 typedef struct os_mutex_def  {
  uint32_t                   dummy;    ///< dummy value.
 } osMutexDef_t;
 /// Semaphore Definition structure contains setup information for a semaphore.
 /// \note CAN BE CHANGED: \b os_semaphore_def is implementation specific in every CMSIS-RTOS.
 typedef struct os_semaphore_def  {
  uint32_t                   dummy;    ///< dummy value.
 } osSemaphoreDef_t;
 /// Definition structure for memory block allocation.
 /// \note CAN BE CHANGED: \b os_pool_def is implementation specific in every CMSIS-RTOS.
 typedef struct os_pool_def  {
  uint32_t                 pool_sz;    ///< number of items (elements) in the pool
  uint32_t                 item_sz;    ///< size of an item
  void                       *pool;    ///< pointer to memory for pool
 } osPoolDef_t;
 /// Definition structure for message queue.
 /// \note CAN BE CHANGED: \b os_messageQ_def is implementation specific in every CMSIS-RTOS.
 typedef struct os_messageQ_def  {
  uint32_t                queue_sz;    ///< number of elements in the queue
  uint32_t                 item_sz;    ///< size of an item
  void                       *pool;    ///< memory array for messages
 } osMessageQDef_t;
 /// Definition structure for mail queue.
 /// \note CAN BE CHANGED: \b os_mailQ_def is implementation specific in every CMSIS-RTOS.
 typedef struct os_mailQ_def  {
  uint32_t                queue_sz;    ///< number of elements in the queue
  uint32_t                 item_sz;    ///< size of an item
  void                       *pool;    ///< memory array for mail
 } osMailQDef_t;
 /// Event structure contains detailed information about an event.
 /// \note MUST REMAIN UNCHANGED: \b os_event shall be consistent in every CMSIS-RTOS.
 ///       However the struct may be extended at the end.
 typedef struct  {
  osStatus                 status;     ///< status code: event or error information
  union  {
    uint32_t                    v;     ///< message as 32-bit value
    void                       *p;     ///< message or mail as void pointer
    int32_t               signals;     ///< signal flags
  } value;                             ///< event value
  union  {
    osMailQId             mail_id;     ///< mail id obtained by \ref osMailCreate
    osMessageQId       message_id;     ///< message id obtained by \ref osMessageCreate
  } def;                               ///< event definition
 } osEvent;
 //  ==== Kernel Control Functions ====
 /// Initialize the RTOS Kernel for creating objects.
 /// \return status code that indicates the execution status of the function.
 /// \note MUST REMAIN UNCHANGED: \b osKernelInitialize shall be consistent in every CMSIS-RTOS.
 osStatus osKernelInitialize (void);
 /// Start the RTOS Kernel.
 /// \return status code that indicates the execution status of the function.
 /// \note MUST REMAIN UNCHANGED: \b osKernelStart shall be consistent in every CMSIS-RTOS.
 osStatus osKernelStart (void);
 /// Check if the RTOS kernel is already started.
 /// \note MUST REMAIN UNCHANGED: \b osKernelRunning shall be consistent in every CMSIS-RTOS.
 /// \return 0 RTOS is not started, 1 RTOS is started.
 int32_t osKernelRunning(void);
 #if (defined (osFeature_SysTick)  &&  (osFeature_SysTick != 0))     // System Timer available
 /// Get the RTOS kernel system timer counter 
 /// \note MUST REMAIN UNCHANGED: \b osKernelSysTick shall be consistent in every CMSIS-RTOS.
 /// \return RTOS kernel system timer as 32-bit value 
 uint32_t osKernelSysTick (void);
 /// The RTOS kernel system timer frequency in Hz
 /// \note Reflects the system timer setting and is typically defined in a configuration file.
 #define osKernelSysTickFrequency 100000000
 /// Convert a microseconds value to a RTOS kernel system timer value.
 /// \param         microsec     time value in microseconds.
 /// \return time value normalized to the \ref osKernelSysTickFrequency
 #define osKernelSysTickMicroSec(microsec) (((uint64_t)microsec * (osKernelSysTickFrequency)) / 1000000)
 #endif    // System Timer available
 //  ==== Thread Management ====
 /// Create a Thread Definition with function, priority, and stack requirements.
 /// \param         name         name of the thread function.
 /// \param         priority     initial priority of the thread function.
 /// \param         instances    number of possible thread instances.
 /// \param         stacksz      stack size (in bytes) requirements for the thread function.
 /// \note CAN BE CHANGED: The parameters to \b osThreadDef shall be consistent but the
 ///       macro body is implementation specific in every CMSIS-RTOS.
 #if defined (osObjectsExternal)  // object is external
 #define osThreadDef(name, priority, instances, stacksz)  \
 extern const osThreadDef_t os_thread_def_##name
 #else                            // define the object
 #define osThreadDef(name, priority, instances, stacksz)  \
 const osThreadDef_t os_thread_def_##name = \
 { (name), (priority), (instances), (stacksz)  }
 #endif
 /// Access a Thread definition.
 /// \param         name          name of the thread definition object.
 /// \note CAN BE CHANGED: The parameter to \b osThread shall be consistent but the
 ///       macro body is implementation specific in every CMSIS-RTOS.
 #define osThread(name)  \
 &os_thread_def_##name
 /// Create a thread and add it to Active Threads and set it to state READY.
 /// \param[in]     thread_def    thread definition referenced with \ref osThread.
 /// \param[in]     argument      pointer that is passed to the thread function as start argument.
 /// \return thread ID for reference by other functions or NULL in case of error.
 /// \note MUST REMAIN UNCHANGED: \b osThreadCreate shall be consistent in every CMSIS-RTOS.
 osThreadId osThreadCreate (const osThreadDef_t *thread_def, void *argument);
 /// Return the thread ID of the current running thread.
 /// \return thread ID for reference by other functions or NULL in case of error.
 /// \note MUST REMAIN UNCHANGED: \b osThreadGetId shall be consistent in every CMSIS-RTOS.
 osThreadId osThreadGetId (void);
 /// Terminate execution of a thread and remove it from Active Threads.
 /// \param[in]     thread_id   thread ID obtained by \ref osThreadCreate or \ref osThreadGetId.
 /// \return status code that indicates the execution status of the function.
 /// \note MUST REMAIN UNCHANGED: \b osThreadTerminate shall be consistent in every CMSIS-RTOS.
 osStatus osThreadTerminate (osThreadId thread_id);
 /// Pass control to next thread that is in state \b READY.
 /// \return status code that indicates the execution status of the function.
 /// \note MUST REMAIN UNCHANGED: \b osThreadYield shall be consistent in every CMSIS-RTOS.
 osStatus osThreadYield (void);
 /// Change priority of an active thread.
 /// \param[in]     thread_id     thread ID obtained by \ref osThreadCreate or \ref osThreadGetId.
 /// \param[in]     priority      new priority value for the thread function.
 /// \return status code that indicates the execution status of the function.
 /// \note MUST REMAIN UNCHANGED: \b osThreadSetPriority shall be consistent in every CMSIS-RTOS.
 osStatus osThreadSetPriority (osThreadId thread_id, osPriority priority);
 /// Get current priority of an active thread.
 /// \param[in]     thread_id     thread ID obtained by \ref osThreadCreate or \ref osThreadGetId.
 /// \return current priority value of the thread function.
 /// \note MUST REMAIN UNCHANGED: \b osThreadGetPriority shall be consistent in every CMSIS-RTOS.
 osPriority osThreadGetPriority (osThreadId thread_id);
 //  ==== Generic Wait Functions ====
 /// Wait for Timeout (Time Delay).
 /// \param[in]     millisec      \ref CMSIS_RTOS_TimeOutValue "time delay" value
 /// \return status code that indicates the execution status of the function.
 osStatus osDelay (uint32_t millisec);
 #if (defined (osFeature_Wait)  &&  (osFeature_Wait != 0))     // Generic Wait available
 /// Wait for Signal, Message, Mail, or Timeout.
 /// \param[in] millisec          \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out
 /// \return event that contains signal, message, or mail information or error code.
 /// \note MUST REMAIN UNCHANGED: \b osWait shall be consistent in every CMSIS-RTOS.
 osEvent osWait (uint32_t millisec);
 #endif  // Generic Wait available
 //  ==== Timer Management Functions ====
 /// Define a Timer object.
 /// \param         name          name of the timer object.
 /// \param         function      name of the timer call back function.
 /// \note CAN BE CHANGED: The parameter to \b osTimerDef shall be consistent but the
 ///       macro body is implementation specific in every CMSIS-RTOS.
 #if defined (osObjectsExternal)  // object is external
 #define osTimerDef(name, function)  \
 extern const osTimerDef_t os_timer_def_##name
 #else                            // define the object
 #define osTimerDef(name, function)  \
 const osTimerDef_t os_timer_def_##name = \
 { (function) }
 #endif
 /// Access a Timer definition.
 /// \param         name          name of the timer object.
 /// \note CAN BE CHANGED: The parameter to \b osTimer shall be consistent but the
 ///       macro body is implementation specific in every CMSIS-RTOS.
 #define osTimer(name) \
 &os_timer_def_##name
 /// Create a timer.
 /// \param[in]     timer_def     timer object referenced with \ref osTimer.
 /// \param[in]     type          osTimerOnce for one-shot or osTimerPeriodic for periodic behavior.
 /// \param[in]     argument      argument to the timer call back function.
 /// \return timer ID for reference by other functions or NULL in case of error.
 /// \note MUST REMAIN UNCHANGED: \b osTimerCreate shall be consistent in every CMSIS-RTOS.
 osTimerId osTimerCreate (const osTimerDef_t *timer_def, os_timer_type type, void *argument);
 /// Start or restart a timer.
 /// \param[in]     timer_id      timer ID obtained by \ref osTimerCreate.
 /// \param[in]     millisec      \ref CMSIS_RTOS_TimeOutValue "time delay" value of the timer.
 /// \return status code that indicates the execution status of the function.
 /// \note MUST REMAIN UNCHANGED: \b osTimerStart shall be consistent in every CMSIS-RTOS.
 osStatus osTimerStart (osTimerId timer_id, uint32_t millisec);
 /// Stop the timer.
 /// \param[in]     timer_id      timer ID obtained by \ref osTimerCreate.
 /// \return status code that indicates the execution status of the function.
 /// \note MUST REMAIN UNCHANGED: \b osTimerStop shall be consistent in every CMSIS-RTOS.
 osStatus osTimerStop (osTimerId timer_id);
 /// Delete a timer that was created by \ref osTimerCreate.
 /// \param[in]     timer_id      timer ID obtained by \ref osTimerCreate.
 /// \return status code that indicates the execution status of the function.
 /// \note MUST REMAIN UNCHANGED: \b osTimerDelete shall be consistent in every CMSIS-RTOS.
 osStatus osTimerDelete (osTimerId timer_id);
 //  ==== Signal Management ====
 /// Set the specified Signal Flags of an active thread.
 /// \param[in]     thread_id     thread ID obtained by \ref osThreadCreate or \ref osThreadGetId.
 /// \param[in]     signals       specifies the signal flags of the thread that should be set.
 /// \return previous signal flags of the specified thread or 0x80000000 in case of incorrect parameters.
 /// \note MUST REMAIN UNCHANGED: \b osSignalSet shall be consistent in every CMSIS-RTOS.
 int32_t osSignalSet (osThreadId thread_id, int32_t signals);
 /// Clear the specified Signal Flags of an active thread.
 /// \param[in]     thread_id     thread ID obtained by \ref osThreadCreate or \ref osThreadGetId.
 /// \param[in]     signals       specifies the signal flags of the thread that shall be cleared.
 /// \return previous signal flags of the specified thread or 0x80000000 in case of incorrect parameters or call from ISR.
 /// \note MUST REMAIN UNCHANGED: \b osSignalClear shall be consistent in every CMSIS-RTOS.
 int32_t osSignalClear (osThreadId thread_id, int32_t signals);
 /// Wait for one or more Signal Flags to become signaled for the current \b RUNNING thread.
 /// \param[in]     signals       wait until all specified signal flags set or 0 for any single signal flag.
 /// \param[in]     millisec      \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out.
 /// \return event flag information or error code.
 /// \note MUST REMAIN UNCHANGED: \b osSignalWait shall be consistent in every CMSIS-RTOS.
 osEvent osSignalWait (int32_t signals, uint32_t millisec);
 //  ==== Mutex Management ====
 /// Define a Mutex.
 /// \param         name          name of the mutex object.
 /// \note CAN BE CHANGED: The parameter to \b osMutexDef shall be consistent but the
 ///       macro body is implementation specific in every CMSIS-RTOS.
 #if defined (osObjectsExternal)  // object is external
 #define osMutexDef(name)  \
 extern const osMutexDef_t os_mutex_def_##name
 #else                            // define the object
 #define osMutexDef(name)  \
 const osMutexDef_t os_mutex_def_##name = { 0 }
 #endif
 /// Access a Mutex definition.
 /// \param         name          name of the mutex object.
 /// \note CAN BE CHANGED: The parameter to \b osMutex shall be consistent but the
 ///       macro body is implementation specific in every CMSIS-RTOS.
 #define osMutex(name)  \
 &os_mutex_def_##name
 /// Create and Initialize a Mutex object.
 /// \param[in]     mutex_def     mutex definition referenced with \ref osMutex.
 /// \return mutex ID for reference by other functions or NULL in case of error.
 /// \note MUST REMAIN UNCHANGED: \b osMutexCreate shall be consistent in every CMSIS-RTOS.
 osMutexId osMutexCreate (const osMutexDef_t *mutex_def);
 /// Wait until a Mutex becomes available.
 /// \param[in]     mutex_id      mutex ID obtained by \ref osMutexCreate.
 /// \param[in]     millisec      \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out.
 /// \return status code that indicates the execution status of the function.
 /// \note MUST REMAIN UNCHANGED: \b osMutexWait shall be consistent in every CMSIS-RTOS.
 osStatus osMutexWait (osMutexId mutex_id, uint32_t millisec);
 /// Release a Mutex that was obtained by \ref osMutexWait.
 /// \param[in]     mutex_id      mutex ID obtained by \ref osMutexCreate.
 /// \return status code that indicates the execution status of the function.
 /// \note MUST REMAIN UNCHANGED: \b osMutexRelease shall be consistent in every CMSIS-RTOS.
 osStatus osMutexRelease (osMutexId mutex_id);
 /// Delete a Mutex that was created by \ref osMutexCreate.
 /// \param[in]     mutex_id      mutex ID obtained by \ref osMutexCreate.
 /// \return status code that indicates the execution status of the function.
 /// \note MUST REMAIN UNCHANGED: \b osMutexDelete shall be consistent in every CMSIS-RTOS.
 osStatus osMutexDelete (osMutexId mutex_id);
 //  ==== Semaphore Management Functions ====
 #if (defined (osFeature_Semaphore)  &&  (osFeature_Semaphore != 0))     // Semaphore available
 /// Define a Semaphore object.
 /// \param         name          name of the semaphore object.
 /// \note CAN BE CHANGED: The parameter to \b osSemaphoreDef shall be consistent but the
 ///       macro body is implementation specific in every CMSIS-RTOS.
 #if defined (osObjectsExternal)  // object is external
 #define osSemaphoreDef(name)  \
 extern const osSemaphoreDef_t os_semaphore_def_##name
 #else                            // define the object
 #define osSemaphoreDef(name)  \
 const osSemaphoreDef_t os_semaphore_def_##name = { 0 }
 #endif
 /// Access a Semaphore definition.
 /// \param         name          name of the semaphore object.
 /// \note CAN BE CHANGED: The parameter to \b osSemaphore shall be consistent but the
 ///       macro body is implementation specific in every CMSIS-RTOS.
 #define osSemaphore(name)  \
 &os_semaphore_def_##name
 /// Create and Initialize a Semaphore object used for managing resources.
 /// \param[in]     semaphore_def semaphore definition referenced with \ref osSemaphore.
 /// \param[in]     count         number of available resources.
 /// \return semaphore ID for reference by other functions or NULL in case of error.
 /// \note MUST REMAIN UNCHANGED: \b osSemaphoreCreate shall be consistent in every CMSIS-RTOS.
 osSemaphoreId osSemaphoreCreate (const osSemaphoreDef_t *semaphore_def, int32_t count);
 /// Wait until a Semaphore token becomes available.
 /// \param[in]     semaphore_id  semaphore object referenced with \ref osSemaphoreCreate.
 /// \param[in]     millisec      \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out.
 /// \return number of available tokens, or -1 in case of incorrect parameters.
 /// \note MUST REMAIN UNCHANGED: \b osSemaphoreWait shall be consistent in every CMSIS-RTOS.
 int32_t osSemaphoreWait (osSemaphoreId semaphore_id, uint32_t millisec);
 /// Release a Semaphore token.
 /// \param[in]     semaphore_id  semaphore object referenced with \ref osSemaphoreCreate.
 /// \return status code that indicates the execution status of the function.
 /// \note MUST REMAIN UNCHANGED: \b osSemaphoreRelease shall be consistent in every CMSIS-RTOS.
 osStatus osSemaphoreRelease (osSemaphoreId semaphore_id);
 /// Delete a Semaphore that was created by \ref osSemaphoreCreate.
 /// \param[in]     semaphore_id  semaphore object referenced with \ref osSemaphoreCreate.
 /// \return status code that indicates the execution status of the function.
 /// \note MUST REMAIN UNCHANGED: \b osSemaphoreDelete shall be consistent in every CMSIS-RTOS.
 osStatus osSemaphoreDelete (osSemaphoreId semaphore_id);
 #endif     // Semaphore available
 //  ==== Memory Pool Management Functions ====
 #if (defined (osFeature_Pool)  &&  (osFeature_Pool != 0))  // Memory Pool Management available
 /// \brief Define a Memory Pool.
 /// \param         name          name of the memory pool.
 /// \param         no            maximum number of blocks (objects) in the memory pool.
 /// \param         type          data type of a single block (object).
 /// \note CAN BE CHANGED: The parameter to \b osPoolDef shall be consistent but the
 ///       macro body is implementation specific in every CMSIS-RTOS.
 #if defined (osObjectsExternal)  // object is external
 #define osPoolDef(name, no, type)   \
 extern const osPoolDef_t os_pool_def_##name
 #else                            // define the object
 #define osPoolDef(name, no, type)   \
 const osPoolDef_t os_pool_def_##name = \
 { (no), sizeof(type), NULL }
 #endif
 /// \brief Access a Memory Pool definition.
 /// \param         name          name of the memory pool
 /// \note CAN BE CHANGED: The parameter to \b osPool shall be consistent but the
 ///       macro body is implementation specific in every CMSIS-RTOS.
 #define osPool(name) \
 &os_pool_def_##name
 /// Create and Initialize a memory pool.
 /// \param[in]     pool_def      memory pool definition referenced with \ref osPool.
 /// \return memory pool ID for reference by other functions or NULL in case of error.
 /// \note MUST REMAIN UNCHANGED: \b osPoolCreate shall be consistent in every CMSIS-RTOS.
 osPoolId osPoolCreate (const osPoolDef_t *pool_def);
 /// Allocate a memory block from a memory pool.
 /// \param[in]     pool_id       memory pool ID obtain referenced with \ref osPoolCreate.
 /// \return address of the allocated memory block or NULL in case of no memory available.
 /// \note MUST REMAIN UNCHANGED: \b osPoolAlloc shall be consistent in every CMSIS-RTOS.
 void *osPoolAlloc (osPoolId pool_id);
 /// Allocate a memory block from a memory pool and set memory block to zero.
 /// \param[in]     pool_id       memory pool ID obtain referenced with \ref osPoolCreate.
 /// \return address of the allocated memory block or NULL in case of no memory available.
 /// \note MUST REMAIN UNCHANGED: \b osPoolCAlloc shall be consistent in every CMSIS-RTOS.
 void *osPoolCAlloc (osPoolId pool_id);
 /// Return an allocated memory block back to a specific memory pool.
 /// \param[in]     pool_id       memory pool ID obtain referenced with \ref osPoolCreate.
 /// \param[in]     block         address of the allocated memory block that is returned to the memory pool.
 /// \return status code that indicates the execution status of the function.
 /// \note MUST REMAIN UNCHANGED: \b osPoolFree shall be consistent in every CMSIS-RTOS.
 osStatus osPoolFree (osPoolId pool_id, void *block);
 #endif   // Memory Pool Management available
 //  ==== Message Queue Management Functions ====
 #if (defined (osFeature_MessageQ)  &&  (osFeature_MessageQ != 0))     // Message Queues available
 /// \brief Create a Message Queue Definition.
 /// \param         name          name of the queue.
 /// \param         queue_sz      maximum number of messages in the queue.
 /// \param         type          data type of a single message element (for debugger).
 /// \note CAN BE CHANGED: The parameter to \b osMessageQDef shall be consistent but the
 ///       macro body is implementation specific in every CMSIS-RTOS.
 #if defined (osObjectsExternal)  // object is external
 #define osMessageQDef(name, queue_sz, type)   \
 extern const osMessageQDef_t os_messageQ_def_##name
 #else                            // define the object
 #define osMessageQDef(name, queue_sz, type)   \
 const osMessageQDef_t os_messageQ_def_##name = \
 { (queue_sz), sizeof (type)  }
 #endif
 /// \brief Access a Message Queue Definition.
 /// \param         name          name of the queue
 /// \note CAN BE CHANGED: The parameter to \b osMessageQ shall be consistent but the
 ///       macro body is implementation specific in every CMSIS-RTOS.
 #define osMessageQ(name) \
 &os_messageQ_def_##name
 /// Create and Initialize a Message Queue.
 /// \param[in]     queue_def     queue definition referenced with \ref osMessageQ.
 /// \param[in]     thread_id     thread ID (obtained by \ref osThreadCreate or \ref osThreadGetId) or NULL.
 /// \return message queue ID for reference by other functions or NULL in case of error.
 /// \note MUST REMAIN UNCHANGED: \b osMessageCreate shall be consistent in every CMSIS-RTOS.
 osMessageQId osMessageCreate (const osMessageQDef_t *queue_def, osThreadId thread_id);
 /// Put a Message to a Queue.
 /// \param[in]     queue_id      message queue ID obtained with \ref osMessageCreate.
 /// \param[in]     info          message information.
 /// \param[in]     millisec      \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out.
 /// \return status code that indicates the execution status of the function.
 /// \note MUST REMAIN UNCHANGED: \b osMessagePut shall be consistent in every CMSIS-RTOS.
 osStatus osMessagePut (osMessageQId queue_id, uint32_t info, uint32_t millisec);
 /// Get a Message or Wait for a Message from a Queue.
 /// \param[in]     queue_id      message queue ID obtained with \ref osMessageCreate.
 /// \param[in]     millisec      \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out.
 /// \return event information that includes status code.
 /// \note MUST REMAIN UNCHANGED: \b osMessageGet shall be consistent in every CMSIS-RTOS.
 osEvent osMessageGet (osMessageQId queue_id, uint32_t millisec);
 #endif     // Message Queues available
 //  ==== Mail Queue Management Functions ====
 #if (defined (osFeature_MailQ)  &&  (osFeature_MailQ != 0))     // Mail Queues available
 /// \brief Create a Mail Queue Definition.
 /// \param         name          name of the queue
 /// \param         queue_sz      maximum number of messages in queue
 /// \param         type          data type of a single message element
 /// \note CAN BE CHANGED: The parameter to \b osMailQDef shall be consistent but the
 ///       macro body is implementation specific in every CMSIS-RTOS.
 #if defined (osObjectsExternal)  // object is external
 #define osMailQDef(name, queue_sz, type) \
 extern const osMailQDef_t os_mailQ_def_##name
 #else                            // define the object
 #define osMailQDef(name, queue_sz, type) \
 const osMailQDef_t os_mailQ_def_##name =  \
 { (queue_sz), sizeof (type) }
 #endif
 /// \brief Access a Mail Queue Definition.
 /// \param         name          name of the queue
 /// \note CAN BE CHANGED: The parameter to \b osMailQ shall be consistent but the
 ///       macro body is implementation specific in every CMSIS-RTOS.
 #define osMailQ(name)  \
 &os_mailQ_def_##name
 /// Create and Initialize mail queue.
 /// \param[in]     queue_def     reference to the mail queue definition obtain with \ref osMailQ
 /// \param[in]     thread_id     thread ID (obtained by \ref osThreadCreate or \ref osThreadGetId) or NULL.
 /// \return mail queue ID for reference by other functions or NULL in case of error.
 /// \note MUST REMAIN UNCHANGED: \b osMailCreate shall be consistent in every CMSIS-RTOS.
 osMailQId osMailCreate (const osMailQDef_t *queue_def, osThreadId thread_id);
 /// Allocate a memory block from a mail.
 /// \param[in]     queue_id      mail queue ID obtained with \ref osMailCreate.
 /// \param[in]     millisec      \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out
 /// \return pointer to memory block that can be filled with mail or NULL in case of error.
 /// \note MUST REMAIN UNCHANGED: \b osMailAlloc shall be consistent in every CMSIS-RTOS.
 void *osMailAlloc (osMailQId queue_id, uint32_t millisec);
 /// Allocate a memory block from a mail and set memory block to zero.
 /// \param[in]     queue_id      mail queue ID obtained with \ref osMailCreate.
 /// \param[in]     millisec      \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out
 /// \return pointer to memory block that can be filled with mail or NULL in case of error.
 /// \note MUST REMAIN UNCHANGED: \b osMailCAlloc shall be consistent in every CMSIS-RTOS.
 void *osMailCAlloc (osMailQId queue_id, uint32_t millisec);
 /// Put a mail to a queue.
 /// \param[in]     queue_id      mail queue ID obtained with \ref osMailCreate.
 /// \param[in]     mail          memory block previously allocated with \ref osMailAlloc or \ref osMailCAlloc.
 /// \return status code that indicates the execution status of the function.
 /// \note MUST REMAIN UNCHANGED: \b osMailPut shall be consistent in every CMSIS-RTOS.
 osStatus osMailPut (osMailQId queue_id, void *mail);
 /// Get a mail from a queue.
 /// \param[in]     queue_id      mail queue ID obtained with \ref osMailCreate.
 /// \param[in]     millisec      \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out
 /// \return event that contains mail information or error code.
 /// \note MUST REMAIN UNCHANGED: \b osMailGet shall be consistent in every CMSIS-RTOS.
 osEvent osMailGet (osMailQId queue_id, uint32_t millisec);
 /// Free a memory block from a mail.
 /// \param[in]     queue_id      mail queue ID obtained with \ref osMailCreate.
 /// \param[in]     mail          pointer to the memory block that was obtained with \ref osMailGet.
 /// \return status code that indicates the execution status of the function.
 /// \note MUST REMAIN UNCHANGED: \b osMailFree shall be consistent in every CMSIS-RTOS.
 osStatus osMailFree (osMailQId queue_id, void *mail);
 #endif  // Mail Queues available
 #ifdef  __cplusplus
 }
 #endif
 #endif  // _CMSIS_OS_H
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/RTOS2/Include/cmsis_os2.h
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/RTOS2/Include/cmsis_os2.h
@@ -1,756 +0,0 @@
 /*
 * Copyright (c) 2013-2018 Arm Limited. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * ----------------------------------------------------------------------
 *
 * $Date:        18. June 2018
 * $Revision:    V2.1.3
 *
 * Project:      CMSIS-RTOS2 API
 * Title:        cmsis_os2.h header file
 *
 * Version 2.1.3
 *    Additional functions allowed to be called from Interrupt Service Routines:
 *    - osThreadGetId
 * Version 2.1.2
 *    Additional functions allowed to be called from Interrupt Service Routines:
 *    - osKernelGetInfo, osKernelGetState
 * Version 2.1.1
 *    Additional functions allowed to be called from Interrupt Service Routines:
 *    - osKernelGetTickCount, osKernelGetTickFreq
 *    Changed Kernel Tick type to uint32_t:
 *    - updated: osKernelGetTickCount, osDelayUntil
 * Version 2.1.0
 *    Support for critical and uncritical sections (nesting safe):
 *    - updated: osKernelLock, osKernelUnlock
 *    - added: osKernelRestoreLock
 *    Updated Thread and Event Flags:
 *    - changed flags parameter and return type from int32_t to uint32_t
 * Version 2.0.0
 *    Initial Release
 *---------------------------------------------------------------------------*/
 #ifndef CMSIS_OS2_H_
 #define CMSIS_OS2_H_
 #ifndef __NO_RETURN
 #if   defined(__CC_ARM)
 #define __NO_RETURN __declspec(noreturn)
 #elif defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050)
 #define __NO_RETURN __attribute__((__noreturn__))
 #elif defined(__GNUC__)
 #define __NO_RETURN __attribute__((__noreturn__))
 #elif defined(__ICCARM__)
 #define __NO_RETURN __noreturn
 #else
 #define __NO_RETURN
 #endif
 #endif
 #include <stdint.h>
 #include <stddef.h>
 #ifdef  __cplusplus
 extern "C"
 {
 #endif
 //  ==== Enumerations, structures, defines ====
 /// Version information.
 typedef struct {
  uint32_t                       api;   ///< API version (major.minor.rev: mmnnnrrrr dec).
  uint32_t                    kernel;   ///< Kernel version (major.minor.rev: mmnnnrrrr dec).
 } osVersion_t;
 /// Kernel state.
 typedef enum {
  osKernelInactive        =  0,         ///< Inactive.
  osKernelReady           =  1,         ///< Ready.
  osKernelRunning         =  2,         ///< Running.
  osKernelLocked          =  3,         ///< Locked.
  osKernelSuspended       =  4,         ///< Suspended.
  osKernelError           = -1,         ///< Error.
  osKernelReserved        = 0x7FFFFFFFU ///< Prevents enum down-size compiler optimization.
 } osKernelState_t;
 /// Thread state.
 typedef enum {
  osThreadInactive        =  0,         ///< Inactive.
  osThreadReady           =  1,         ///< Ready.
  osThreadRunning         =  2,         ///< Running.
  osThreadBlocked         =  3,         ///< Blocked.
  osThreadTerminated      =  4,         ///< Terminated.
  osThreadError           = -1,         ///< Error.
  osThreadReserved        = 0x7FFFFFFF  ///< Prevents enum down-size compiler optimization.
 } osThreadState_t;
 /// Priority values.
 typedef enum {
  osPriorityNone          =  0,         ///< No priority (not initialized).
  osPriorityIdle          =  1,         ///< Reserved for Idle thread.
  osPriorityLow           =  8,         ///< Priority: low
  osPriorityLow1          =  8+1,       ///< Priority: low + 1
  osPriorityLow2          =  8+2,       ///< Priority: low + 2
  osPriorityLow3          =  8+3,       ///< Priority: low + 3
  osPriorityLow4          =  8+4,       ///< Priority: low + 4
  osPriorityLow5          =  8+5,       ///< Priority: low + 5
  osPriorityLow6          =  8+6,       ///< Priority: low + 6
  osPriorityLow7          =  8+7,       ///< Priority: low + 7
  osPriorityBelowNormal   = 16,         ///< Priority: below normal
  osPriorityBelowNormal1  = 16+1,       ///< Priority: below normal + 1
  osPriorityBelowNormal2  = 16+2,       ///< Priority: below normal + 2
  osPriorityBelowNormal3  = 16+3,       ///< Priority: below normal + 3
  osPriorityBelowNormal4  = 16+4,       ///< Priority: below normal + 4
  osPriorityBelowNormal5  = 16+5,       ///< Priority: below normal + 5
  osPriorityBelowNormal6  = 16+6,       ///< Priority: below normal + 6
  osPriorityBelowNormal7  = 16+7,       ///< Priority: below normal + 7
  osPriorityNormal        = 24,         ///< Priority: normal
  osPriorityNormal1       = 24+1,       ///< Priority: normal + 1
  osPriorityNormal2       = 24+2,       ///< Priority: normal + 2
  osPriorityNormal3       = 24+3,       ///< Priority: normal + 3
  osPriorityNormal4       = 24+4,       ///< Priority: normal + 4
  osPriorityNormal5       = 24+5,       ///< Priority: normal + 5
  osPriorityNormal6       = 24+6,       ///< Priority: normal + 6
  osPriorityNormal7       = 24+7,       ///< Priority: normal + 7
  osPriorityAboveNormal   = 32,         ///< Priority: above normal
  osPriorityAboveNormal1  = 32+1,       ///< Priority: above normal + 1
  osPriorityAboveNormal2  = 32+2,       ///< Priority: above normal + 2
  osPriorityAboveNormal3  = 32+3,       ///< Priority: above normal + 3
  osPriorityAboveNormal4  = 32+4,       ///< Priority: above normal + 4
  osPriorityAboveNormal5  = 32+5,       ///< Priority: above normal + 5
  osPriorityAboveNormal6  = 32+6,       ///< Priority: above normal + 6
  osPriorityAboveNormal7  = 32+7,       ///< Priority: above normal + 7
  osPriorityHigh          = 40,         ///< Priority: high
  osPriorityHigh1         = 40+1,       ///< Priority: high + 1
  osPriorityHigh2         = 40+2,       ///< Priority: high + 2
  osPriorityHigh3         = 40+3,       ///< Priority: high + 3
  osPriorityHigh4         = 40+4,       ///< Priority: high + 4
  osPriorityHigh5         = 40+5,       ///< Priority: high + 5
  osPriorityHigh6         = 40+6,       ///< Priority: high + 6
  osPriorityHigh7         = 40+7,       ///< Priority: high + 7
  osPriorityRealtime      = 48,         ///< Priority: realtime
  osPriorityRealtime1     = 48+1,       ///< Priority: realtime + 1
  osPriorityRealtime2     = 48+2,       ///< Priority: realtime + 2
  osPriorityRealtime3     = 48+3,       ///< Priority: realtime + 3
  osPriorityRealtime4     = 48+4,       ///< Priority: realtime + 4
  osPriorityRealtime5     = 48+5,       ///< Priority: realtime + 5
  osPriorityRealtime6     = 48+6,       ///< Priority: realtime + 6
  osPriorityRealtime7     = 48+7,       ///< Priority: realtime + 7
  osPriorityISR           = 56,         ///< Reserved for ISR deferred thread.
  osPriorityError         = -1,         ///< System cannot determine priority or illegal priority.
  osPriorityReserved      = 0x7FFFFFFF  ///< Prevents enum down-size compiler optimization.
 } osPriority_t;
 /// Entry point of a thread.
 typedef void (*osThreadFunc_t) (void *argument);
 /// Timer callback function.
 typedef void (*osTimerFunc_t) (void *argument);
 /// Timer type.
 typedef enum {
  osTimerOnce               = 0,          ///< One-shot timer.
  osTimerPeriodic           = 1           ///< Repeating timer.
 } osTimerType_t;
 // Timeout value.
 #define osWaitForever         0xFFFFFFFFU ///< Wait forever timeout value.
 // Flags options (\ref osThreadFlagsWait and \ref osEventFlagsWait).
 #define osFlagsWaitAny        0x00000000U ///< Wait for any flag (default).
 #define osFlagsWaitAll        0x00000001U ///< Wait for all flags.
 #define osFlagsNoClear        0x00000002U ///< Do not clear flags which have been specified to wait for.
 // Flags errors (returned by osThreadFlagsXxxx and osEventFlagsXxxx).
 #define osFlagsError          0x80000000U ///< Error indicator.
 #define osFlagsErrorUnknown   0xFFFFFFFFU ///< osError (-1).
 #define osFlagsErrorTimeout   0xFFFFFFFEU ///< osErrorTimeout (-2).
 #define osFlagsErrorResource  0xFFFFFFFDU ///< osErrorResource (-3).
 #define osFlagsErrorParameter 0xFFFFFFFCU ///< osErrorParameter (-4).
 #define osFlagsErrorISR       0xFFFFFFFAU ///< osErrorISR (-6).
 // Thread attributes (attr_bits in \ref osThreadAttr_t).
 #define osThreadDetached      0x00000000U ///< Thread created in detached mode (default)
 #define osThreadJoinable      0x00000001U ///< Thread created in joinable mode
 // Mutex attributes (attr_bits in \ref osMutexAttr_t).
 #define osMutexRecursive      0x00000001U ///< Recursive mutex.
 #define osMutexPrioInherit    0x00000002U ///< Priority inherit protocol.
 #define osMutexRobust         0x00000008U ///< Robust mutex.
 /// Status code values returned by CMSIS-RTOS functions.
 typedef enum {
  osOK                      =  0,         ///< Operation completed successfully.
  osError                   = -1,         ///< Unspecified RTOS error: run-time error but no other error message fits.
  osErrorTimeout            = -2,         ///< Operation not completed within the timeout period.
  osErrorResource           = -3,         ///< Resource not available.
  osErrorParameter          = -4,         ///< Parameter error.
  osErrorNoMemory           = -5,         ///< System is out of memory: it was impossible to allocate or reserve memory for the operation.
  osErrorISR                = -6,         ///< Not allowed in ISR context: the function cannot be called from interrupt service routines.
  osStatusReserved          = 0x7FFFFFFF  ///< Prevents enum down-size compiler optimization.
 } osStatus_t;
 /// \details Thread ID identifies the thread.
 typedef void *osThreadId_t;
 /// \details Timer ID identifies the timer.
 typedef void *osTimerId_t;
 /// \details Event Flags ID identifies the event flags.
 typedef void *osEventFlagsId_t;
 /// \details Mutex ID identifies the mutex.
 typedef void *osMutexId_t;
 /// \details Semaphore ID identifies the semaphore.
 typedef void *osSemaphoreId_t;
 /// \details Memory Pool ID identifies the memory pool.
 typedef void *osMemoryPoolId_t;
 /// \details Message Queue ID identifies the message queue.
 typedef void *osMessageQueueId_t;
 #ifndef TZ_MODULEID_T
 #define TZ_MODULEID_T
 /// \details Data type that identifies secure software modules called by a process.
 typedef uint32_t TZ_ModuleId_t;
 #endif
 /// Attributes structure for thread.
 typedef struct {
  const char                   *name;   ///< name of the thread
  uint32_t                 attr_bits;   ///< attribute bits
  void                      *cb_mem;    ///< memory for control block
  uint32_t                   cb_size;   ///< size of provided memory for control block
  void                   *stack_mem;    ///< memory for stack
  uint32_t                stack_size;   ///< size of stack
  osPriority_t              priority;   ///< initial thread priority (default: osPriorityNormal)
  TZ_ModuleId_t            tz_module;   ///< TrustZone module identifier
  uint32_t                  reserved;   ///< reserved (must be 0)
 } osThreadAttr_t;
 /// Attributes structure for timer.
 typedef struct {
  const char                   *name;   ///< name of the timer
  uint32_t                 attr_bits;   ///< attribute bits
  void                      *cb_mem;    ///< memory for control block
  uint32_t                   cb_size;   ///< size of provided memory for control block
 } osTimerAttr_t;
 /// Attributes structure for event flags.
 typedef struct {
  const char                   *name;   ///< name of the event flags
  uint32_t                 attr_bits;   ///< attribute bits
  void                      *cb_mem;    ///< memory for control block
  uint32_t                   cb_size;   ///< size of provided memory for control block
 } osEventFlagsAttr_t;
 /// Attributes structure for mutex.
 typedef struct {
  const char                   *name;   ///< name of the mutex
  uint32_t                 attr_bits;   ///< attribute bits
  void                      *cb_mem;    ///< memory for control block
  uint32_t                   cb_size;   ///< size of provided memory for control block
 } osMutexAttr_t;
 /// Attributes structure for semaphore.
 typedef struct {
  const char                   *name;   ///< name of the semaphore
  uint32_t                 attr_bits;   ///< attribute bits
  void                      *cb_mem;    ///< memory for control block
  uint32_t                   cb_size;   ///< size of provided memory for control block
 } osSemaphoreAttr_t;
 /// Attributes structure for memory pool.
 typedef struct {
  const char                   *name;   ///< name of the memory pool
  uint32_t                 attr_bits;   ///< attribute bits
  void                      *cb_mem;    ///< memory for control block
  uint32_t                   cb_size;   ///< size of provided memory for control block
  void                      *mp_mem;    ///< memory for data storage
  uint32_t                   mp_size;   ///< size of provided memory for data storage 
 } osMemoryPoolAttr_t;
 /// Attributes structure for message queue.
 typedef struct {
  const char                   *name;   ///< name of the message queue
  uint32_t                 attr_bits;   ///< attribute bits
  void                      *cb_mem;    ///< memory for control block
  uint32_t                   cb_size;   ///< size of provided memory for control block
  void                      *mq_mem;    ///< memory for data storage
  uint32_t                   mq_size;   ///< size of provided memory for data storage 
 } osMessageQueueAttr_t;
 //  ==== Kernel Management Functions ====
 /// Initialize the RTOS Kernel.
 /// \return status code that indicates the execution status of the function.
 osStatus_t osKernelInitialize (void);
 ///  Get RTOS Kernel Information.
 /// \param[out]    version       pointer to buffer for retrieving version information.
 /// \param[out]    id_buf        pointer to buffer for retrieving kernel identification string.
 /// \param[in]     id_size       size of buffer for kernel identification string.
 /// \return status code that indicates the execution status of the function.
 osStatus_t osKernelGetInfo (osVersion_t *version, char *id_buf, uint32_t id_size);
 /// Get the current RTOS Kernel state.
 /// \return current RTOS Kernel state.
 osKernelState_t osKernelGetState (void);
 /// Start the RTOS Kernel scheduler.
 /// \return status code that indicates the execution status of the function.
 osStatus_t osKernelStart (void);
 /// Lock the RTOS Kernel scheduler.
 /// \return previous lock state (1 - locked, 0 - not locked, error code if negative).
 int32_t osKernelLock (void);
 /// Unlock the RTOS Kernel scheduler.
 /// \return previous lock state (1 - locked, 0 - not locked, error code if negative).
 int32_t osKernelUnlock (void);
 /// Restore the RTOS Kernel scheduler lock state.
 /// \param[in]     lock          lock state obtained by \ref osKernelLock or \ref osKernelUnlock.
 /// \return new lock state (1 - locked, 0 - not locked, error code if negative).
 int32_t osKernelRestoreLock (int32_t lock);
 /// Suspend the RTOS Kernel scheduler.
 /// \return time in ticks, for how long the system can sleep or power-down.
 uint32_t osKernelSuspend (void);
 /// Resume the RTOS Kernel scheduler.
 /// \param[in]     sleep_ticks   time in ticks for how long the system was in sleep or power-down mode.
 void osKernelResume (uint32_t sleep_ticks);
 /// Get the RTOS kernel tick count.
 /// \return RTOS kernel current tick count.
 uint32_t osKernelGetTickCount (void);
 /// Get the RTOS kernel tick frequency.
 /// \return frequency of the kernel tick in hertz, i.e. kernel ticks per second.
 uint32_t osKernelGetTickFreq (void);
 /// Get the RTOS kernel system timer count.
 /// \return RTOS kernel current system timer count as 32-bit value.
 uint32_t osKernelGetSysTimerCount (void);
 /// Get the RTOS kernel system timer frequency.
 /// \return frequency of the system timer in hertz, i.e. timer ticks per second.
 uint32_t osKernelGetSysTimerFreq (void);
 //  ==== Thread Management Functions ====
 /// Create a thread and add it to Active Threads.
 /// \param[in]     func          thread function.
 /// \param[in]     argument      pointer that is passed to the thread function as start argument.
 /// \param[in]     attr          thread attributes; NULL: default values.
 /// \return thread ID for reference by other functions or NULL in case of error.
 osThreadId_t osThreadNew (osThreadFunc_t func, void *argument, const osThreadAttr_t *attr);
 /// Get name of a thread.
 /// \param[in]     thread_id     thread ID obtained by \ref osThreadNew or \ref osThreadGetId.
 /// \return name as null-terminated string.
 const char *osThreadGetName (osThreadId_t thread_id);
 /// Return the thread ID of the current running thread.
 /// \return thread ID for reference by other functions or NULL in case of error.
 osThreadId_t osThreadGetId (void);
 /// Get current thread state of a thread.
 /// \param[in]     thread_id     thread ID obtained by \ref osThreadNew or \ref osThreadGetId.
 /// \return current thread state of the specified thread.
 osThreadState_t osThreadGetState (osThreadId_t thread_id);
 /// Get stack size of a thread.
 /// \param[in]     thread_id     thread ID obtained by \ref osThreadNew or \ref osThreadGetId.
 /// \return stack size in bytes.
 uint32_t osThreadGetStackSize (osThreadId_t thread_id);
 /// Get available stack space of a thread based on stack watermark recording during execution.
 /// \param[in]     thread_id     thread ID obtained by \ref osThreadNew or \ref osThreadGetId.
 /// \return remaining stack space in bytes.
 uint32_t osThreadGetStackSpace (osThreadId_t thread_id);
 /// Change priority of a thread.
 /// \param[in]     thread_id     thread ID obtained by \ref osThreadNew or \ref osThreadGetId.
 /// \param[in]     priority      new priority value for the thread function.
 /// \return status code that indicates the execution status of the function.
 osStatus_t osThreadSetPriority (osThreadId_t thread_id, osPriority_t priority);
 /// Get current priority of a thread.
 /// \param[in]     thread_id     thread ID obtained by \ref osThreadNew or \ref osThreadGetId.
 /// \return current priority value of the specified thread.
 osPriority_t osThreadGetPriority (osThreadId_t thread_id);
 /// Pass control to next thread that is in state \b READY.
 /// \return status code that indicates the execution status of the function.
 osStatus_t osThreadYield (void);
 /// Suspend execution of a thread.
 /// \param[in]     thread_id     thread ID obtained by \ref osThreadNew or \ref osThreadGetId.
 /// \return status code that indicates the execution status of the function.
 osStatus_t osThreadSuspend (osThreadId_t thread_id);
 /// Resume execution of a thread.
 /// \param[in]     thread_id     thread ID obtained by \ref osThreadNew or \ref osThreadGetId.
 /// \return status code that indicates the execution status of the function.
 osStatus_t osThreadResume (osThreadId_t thread_id);
 /// Detach a thread (thread storage can be reclaimed when thread terminates).
 /// \param[in]     thread_id     thread ID obtained by \ref osThreadNew or \ref osThreadGetId.
 /// \return status code that indicates the execution status of the function.
 osStatus_t osThreadDetach (osThreadId_t thread_id);
 /// Wait for specified thread to terminate.
 /// \param[in]     thread_id     thread ID obtained by \ref osThreadNew or \ref osThreadGetId.
 /// \return status code that indicates the execution status of the function.
 osStatus_t osThreadJoin (osThreadId_t thread_id);
 /// Terminate execution of current running thread.
 __NO_RETURN void osThreadExit (void);
 /// Terminate execution of a thread.
 /// \param[in]     thread_id     thread ID obtained by \ref osThreadNew or \ref osThreadGetId.
 /// \return status code that indicates the execution status of the function.
 osStatus_t osThreadTerminate (osThreadId_t thread_id);
 /// Get number of active threads.
 /// \return number of active threads.
 uint32_t osThreadGetCount (void);
 /// Enumerate active threads.
 /// \param[out]    thread_array  pointer to array for retrieving thread IDs.
 /// \param[in]     array_items   maximum number of items in array for retrieving thread IDs.
 /// \return number of enumerated threads.
 uint32_t osThreadEnumerate (osThreadId_t *thread_array, uint32_t array_items);
 //  ==== Thread Flags Functions ====
 /// Set the specified Thread Flags of a thread.
 /// \param[in]     thread_id     thread ID obtained by \ref osThreadNew or \ref osThreadGetId.
 /// \param[in]     flags         specifies the flags of the thread that shall be set.
 /// \return thread flags after setting or error code if highest bit set.
 uint32_t osThreadFlagsSet (osThreadId_t thread_id, uint32_t flags);
 /// Clear the specified Thread Flags of current running thread.
 /// \param[in]     flags         specifies the flags of the thread that shall be cleared.
 /// \return thread flags before clearing or error code if highest bit set.
 uint32_t osThreadFlagsClear (uint32_t flags);
 /// Get the current Thread Flags of current running thread.
 /// \return current thread flags.
 uint32_t osThreadFlagsGet (void);
 /// Wait for one or more Thread Flags of the current running thread to become signaled.
 /// \param[in]     flags         specifies the flags to wait for.
 /// \param[in]     options       specifies flags options (osFlagsXxxx).
 /// \param[in]     timeout       \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out.
 /// \return thread flags before clearing or error code if highest bit set.
 uint32_t osThreadFlagsWait (uint32_t flags, uint32_t options, uint32_t timeout);
 //  ==== Generic Wait Functions ====
 /// Wait for Timeout (Time Delay).
 /// \param[in]     ticks         \ref CMSIS_RTOS_TimeOutValue "time ticks" value
 /// \return status code that indicates the execution status of the function.
 osStatus_t osDelay (uint32_t ticks);
 /// Wait until specified time.
 /// \param[in]     ticks         absolute time in ticks
 /// \return status code that indicates the execution status of the function.
 osStatus_t osDelayUntil (uint32_t ticks);
 //  ==== Timer Management Functions ====
 /// Create and Initialize a timer.
 /// \param[in]     func          function pointer to callback function.
 /// \param[in]     type          \ref osTimerOnce for one-shot or \ref osTimerPeriodic for periodic behavior.
 /// \param[in]     argument      argument to the timer callback function.
 /// \param[in]     attr          timer attributes; NULL: default values.
 /// \return timer ID for reference by other functions or NULL in case of error.
 osTimerId_t osTimerNew (osTimerFunc_t func, osTimerType_t type, void *argument, const osTimerAttr_t *attr);
 /// Get name of a timer.
 /// \param[in]     timer_id      timer ID obtained by \ref osTimerNew.
 /// \return name as null-terminated string.
 const char *osTimerGetName (osTimerId_t timer_id);
 /// Start or restart a timer.
 /// \param[in]     timer_id      timer ID obtained by \ref osTimerNew.
 /// \param[in]     ticks         \ref CMSIS_RTOS_TimeOutValue "time ticks" value of the timer.
 /// \return status code that indicates the execution status of the function.
 osStatus_t osTimerStart (osTimerId_t timer_id, uint32_t ticks);
 /// Stop a timer.
 /// \param[in]     timer_id      timer ID obtained by \ref osTimerNew.
 /// \return status code that indicates the execution status of the function.
 osStatus_t osTimerStop (osTimerId_t timer_id);
 /// Check if a timer is running.
 /// \param[in]     timer_id      timer ID obtained by \ref osTimerNew.
 /// \return 0 not running, 1 running.
 uint32_t osTimerIsRunning (osTimerId_t timer_id);
 /// Delete a timer.
 /// \param[in]     timer_id      timer ID obtained by \ref osTimerNew.
 /// \return status code that indicates the execution status of the function.
 osStatus_t osTimerDelete (osTimerId_t timer_id);
 //  ==== Event Flags Management Functions ====
 /// Create and Initialize an Event Flags object.
 /// \param[in]     attr          event flags attributes; NULL: default values.
 /// \return event flags ID for reference by other functions or NULL in case of error.
 osEventFlagsId_t osEventFlagsNew (const osEventFlagsAttr_t *attr);
 /// Get name of an Event Flags object.
 /// \param[in]     ef_id         event flags ID obtained by \ref osEventFlagsNew.
 /// \return name as null-terminated string.
 const char *osEventFlagsGetName (osEventFlagsId_t ef_id);
 /// Set the specified Event Flags.
 /// \param[in]     ef_id         event flags ID obtained by \ref osEventFlagsNew.
 /// \param[in]     flags         specifies the flags that shall be set.
 /// \return event flags after setting or error code if highest bit set.
 uint32_t osEventFlagsSet (osEventFlagsId_t ef_id, uint32_t flags);
 /// Clear the specified Event Flags.
 /// \param[in]     ef_id         event flags ID obtained by \ref osEventFlagsNew.
 /// \param[in]     flags         specifies the flags that shall be cleared.
 /// \return event flags before clearing or error code if highest bit set.
 uint32_t osEventFlagsClear (osEventFlagsId_t ef_id, uint32_t flags);
 /// Get the current Event Flags.
 /// \param[in]     ef_id         event flags ID obtained by \ref osEventFlagsNew.
 /// \return current event flags.
 uint32_t osEventFlagsGet (osEventFlagsId_t ef_id);
 /// Wait for one or more Event Flags to become signaled.
 /// \param[in]     ef_id         event flags ID obtained by \ref osEventFlagsNew.
 /// \param[in]     flags         specifies the flags to wait for.
 /// \param[in]     options       specifies flags options (osFlagsXxxx).
 /// \param[in]     timeout       \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out.
 /// \return event flags before clearing or error code if highest bit set.
 uint32_t osEventFlagsWait (osEventFlagsId_t ef_id, uint32_t flags, uint32_t options, uint32_t timeout);
 /// Delete an Event Flags object.
 /// \param[in]     ef_id         event flags ID obtained by \ref osEventFlagsNew.
 /// \return status code that indicates the execution status of the function.
 osStatus_t osEventFlagsDelete (osEventFlagsId_t ef_id);
 //  ==== Mutex Management Functions ====
 /// Create and Initialize a Mutex object.
 /// \param[in]     attr          mutex attributes; NULL: default values.
 /// \return mutex ID for reference by other functions or NULL in case of error.
 osMutexId_t osMutexNew (const osMutexAttr_t *attr);
 /// Get name of a Mutex object.
 /// \param[in]     mutex_id      mutex ID obtained by \ref osMutexNew.
 /// \return name as null-terminated string.
 const char *osMutexGetName (osMutexId_t mutex_id);
 /// Acquire a Mutex or timeout if it is locked.
 /// \param[in]     mutex_id      mutex ID obtained by \ref osMutexNew.
 /// \param[in]     timeout       \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out.
 /// \return status code that indicates the execution status of the function.
 osStatus_t osMutexAcquire (osMutexId_t mutex_id, uint32_t timeout);
 /// Release a Mutex that was acquired by \ref osMutexAcquire.
 /// \param[in]     mutex_id      mutex ID obtained by \ref osMutexNew.
 /// \return status code that indicates the execution status of the function.
 osStatus_t osMutexRelease (osMutexId_t mutex_id);
 /// Get Thread which owns a Mutex object.
 /// \param[in]     mutex_id      mutex ID obtained by \ref osMutexNew.
 /// \return thread ID of owner thread or NULL when mutex was not acquired.
 osThreadId_t osMutexGetOwner (osMutexId_t mutex_id);
 /// Delete a Mutex object.
 /// \param[in]     mutex_id      mutex ID obtained by \ref osMutexNew.
 /// \return status code that indicates the execution status of the function.
 osStatus_t osMutexDelete (osMutexId_t mutex_id);
 //  ==== Semaphore Management Functions ====
 /// Create and Initialize a Semaphore object.
 /// \param[in]     max_count     maximum number of available tokens.
 /// \param[in]     initial_count initial number of available tokens.
 /// \param[in]     attr          semaphore attributes; NULL: default values.
 /// \return semaphore ID for reference by other functions or NULL in case of error.
 osSemaphoreId_t osSemaphoreNew (uint32_t max_count, uint32_t initial_count, const osSemaphoreAttr_t *attr);
 /// Get name of a Semaphore object.
 /// \param[in]     semaphore_id  semaphore ID obtained by \ref osSemaphoreNew.
 /// \return name as null-terminated string.
 const char *osSemaphoreGetName (osSemaphoreId_t semaphore_id);
 /// Acquire a Semaphore token or timeout if no tokens are available.
 /// \param[in]     semaphore_id  semaphore ID obtained by \ref osSemaphoreNew.
 /// \param[in]     timeout       \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out.
 /// \return status code that indicates the execution status of the function.
 osStatus_t osSemaphoreAcquire (osSemaphoreId_t semaphore_id, uint32_t timeout);
 /// Release a Semaphore token up to the initial maximum count.
 /// \param[in]     semaphore_id  semaphore ID obtained by \ref osSemaphoreNew.
 /// \return status code that indicates the execution status of the function.
 osStatus_t osSemaphoreRelease (osSemaphoreId_t semaphore_id);
 /// Get current Semaphore token count.
 /// \param[in]     semaphore_id  semaphore ID obtained by \ref osSemaphoreNew.
 /// \return number of tokens available.
 uint32_t osSemaphoreGetCount (osSemaphoreId_t semaphore_id);
 /// Delete a Semaphore object.
 /// \param[in]     semaphore_id  semaphore ID obtained by \ref osSemaphoreNew.
 /// \return status code that indicates the execution status of the function.
 osStatus_t osSemaphoreDelete (osSemaphoreId_t semaphore_id);
 //  ==== Memory Pool Management Functions ====
 /// Create and Initialize a Memory Pool object.
 /// \param[in]     block_count   maximum number of memory blocks in memory pool.
 /// \param[in]     block_size    memory block size in bytes.
 /// \param[in]     attr          memory pool attributes; NULL: default values.
 /// \return memory pool ID for reference by other functions or NULL in case of error.
 osMemoryPoolId_t osMemoryPoolNew (uint32_t block_count, uint32_t block_size, const osMemoryPoolAttr_t *attr);
 /// Get name of a Memory Pool object.
 /// \param[in]     mp_id         memory pool ID obtained by \ref osMemoryPoolNew.
 /// \return name as null-terminated string.
 const char *osMemoryPoolGetName (osMemoryPoolId_t mp_id);
 /// Allocate a memory block from a Memory Pool.
 /// \param[in]     mp_id         memory pool ID obtained by \ref osMemoryPoolNew.
 /// \param[in]     timeout       \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out.
 /// \return address of the allocated memory block or NULL in case of no memory is available.
 void *osMemoryPoolAlloc (osMemoryPoolId_t mp_id, uint32_t timeout);
 /// Return an allocated memory block back to a Memory Pool.
 /// \param[in]     mp_id         memory pool ID obtained by \ref osMemoryPoolNew.
 /// \param[in]     block         address of the allocated memory block to be returned to the memory pool.
 /// \return status code that indicates the execution status of the function.
 osStatus_t osMemoryPoolFree (osMemoryPoolId_t mp_id, void *block);
 /// Get maximum number of memory blocks in a Memory Pool.
 /// \param[in]     mp_id         memory pool ID obtained by \ref osMemoryPoolNew.
 /// \return maximum number of memory blocks.
 uint32_t osMemoryPoolGetCapacity (osMemoryPoolId_t mp_id);
 /// Get memory block size in a Memory Pool.
 /// \param[in]     mp_id         memory pool ID obtained by \ref osMemoryPoolNew.
 /// \return memory block size in bytes.
 uint32_t osMemoryPoolGetBlockSize (osMemoryPoolId_t mp_id);
 /// Get number of memory blocks used in a Memory Pool.
 /// \param[in]     mp_id         memory pool ID obtained by \ref osMemoryPoolNew.
 /// \return number of memory blocks used.
 uint32_t osMemoryPoolGetCount (osMemoryPoolId_t mp_id);
 /// Get number of memory blocks available in a Memory Pool.
 /// \param[in]     mp_id         memory pool ID obtained by \ref osMemoryPoolNew.
 /// \return number of memory blocks available.
 uint32_t osMemoryPoolGetSpace (osMemoryPoolId_t mp_id);
 /// Delete a Memory Pool object.
 /// \param[in]     mp_id         memory pool ID obtained by \ref osMemoryPoolNew.
 /// \return status code that indicates the execution status of the function.
 osStatus_t osMemoryPoolDelete (osMemoryPoolId_t mp_id);
 //  ==== Message Queue Management Functions ====
 /// Create and Initialize a Message Queue object.
 /// \param[in]     msg_count     maximum number of messages in queue.
 /// \param[in]     msg_size      maximum message size in bytes.
 /// \param[in]     attr          message queue attributes; NULL: default values.
 /// \return message queue ID for reference by other functions or NULL in case of error.
 osMessageQueueId_t osMessageQueueNew (uint32_t msg_count, uint32_t msg_size, const osMessageQueueAttr_t *attr);
 /// Get name of a Message Queue object.
 /// \param[in]     mq_id         message queue ID obtained by \ref osMessageQueueNew.
 /// \return name as null-terminated string.
 const char *osMessageQueueGetName (osMessageQueueId_t mq_id);
 /// Put a Message into a Queue or timeout if Queue is full.
 /// \param[in]     mq_id         message queue ID obtained by \ref osMessageQueueNew.
 /// \param[in]     msg_ptr       pointer to buffer with message to put into a queue.
 /// \param[in]     msg_prio      message priority.
 /// \param[in]     timeout       \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out.
 /// \return status code that indicates the execution status of the function.
 osStatus_t osMessageQueuePut (osMessageQueueId_t mq_id, const void *msg_ptr, uint8_t msg_prio, uint32_t timeout);
 /// Get a Message from a Queue or timeout if Queue is empty.
 /// \param[in]     mq_id         message queue ID obtained by \ref osMessageQueueNew.
 /// \param[out]    msg_ptr       pointer to buffer for message to get from a queue.
 /// \param[out]    msg_prio      pointer to buffer for message priority or NULL.
 /// \param[in]     timeout       \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out.
 /// \return status code that indicates the execution status of the function.
 osStatus_t osMessageQueueGet (osMessageQueueId_t mq_id, void *msg_ptr, uint8_t *msg_prio, uint32_t timeout);
 /// Get maximum number of messages in a Message Queue.
 /// \param[in]     mq_id         message queue ID obtained by \ref osMessageQueueNew.
 /// \return maximum number of messages.
 uint32_t osMessageQueueGetCapacity (osMessageQueueId_t mq_id);
 /// Get maximum message size in a Memory Pool.
 /// \param[in]     mq_id         message queue ID obtained by \ref osMessageQueueNew.
 /// \return maximum message size in bytes.
 uint32_t osMessageQueueGetMsgSize (osMessageQueueId_t mq_id);
 /// Get number of queued messages in a Message Queue.
 /// \param[in]     mq_id         message queue ID obtained by \ref osMessageQueueNew.
 /// \return number of queued messages.
 uint32_t osMessageQueueGetCount (osMessageQueueId_t mq_id);
 /// Get number of available slots for messages in a Message Queue.
 /// \param[in]     mq_id         message queue ID obtained by \ref osMessageQueueNew.
 /// \return number of available slots for messages.
 uint32_t osMessageQueueGetSpace (osMessageQueueId_t mq_id);
 /// Reset a Message Queue to initial empty state.
 /// \param[in]     mq_id         message queue ID obtained by \ref osMessageQueueNew.
 /// \return status code that indicates the execution status of the function.
 osStatus_t osMessageQueueReset (osMessageQueueId_t mq_id);
 /// Delete a Message Queue object.
 /// \param[in]     mq_id         message queue ID obtained by \ref osMessageQueueNew.
 /// \return status code that indicates the execution status of the function.
 osStatus_t osMessageQueueDelete (osMessageQueueId_t mq_id);
 #ifdef  __cplusplus
 }
 #endif
 #endif  // CMSIS_OS2_H_
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/RTOS2/Include/os_tick.h
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/RTOS2/Include/os_tick.h
@@ -1,71 +0,0 @@
 /**************************************************************************//**
 * @file     os_tick.h
 * @brief    CMSIS OS Tick header file
 * @version  V1.0.1
 * @date     24. November 2017
 ******************************************************************************/
 /*
 * Copyright (c) 2017-2017 ARM Limited. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef OS_TICK_H
 #define OS_TICK_H
 #include <stdint.h>
 /// IRQ Handler.
 #ifndef IRQHANDLER_T
 #define IRQHANDLER_T
 typedef void (*IRQHandler_t) (void);
 #endif
 /// Setup OS Tick timer to generate periodic RTOS Kernel Ticks
 /// \param[in]     freq         tick frequency in Hz
 /// \param[in]     handler      tick IRQ handler
 /// \return 0 on success, -1 on error.
 int32_t  OS_Tick_Setup (uint32_t freq, IRQHandler_t handler);
 /// Enable OS Tick timer interrupt
 void     OS_Tick_Enable (void);
 /// Disable OS Tick timer interrupt
 void     OS_Tick_Disable (void);
 /// Acknowledge execution of OS Tick timer interrupt
 void     OS_Tick_AcknowledgeIRQ (void);
 /// Get OS Tick timer IRQ number
 /// \return OS Tick IRQ number
 int32_t  OS_Tick_GetIRQn (void);
 /// Get OS Tick timer clock frequency
 /// \return OS Tick timer clock frequency in Hz
 uint32_t OS_Tick_GetClock (void);
 /// Get OS Tick timer interval reload value
 /// \return OS Tick timer interval reload value
 uint32_t OS_Tick_GetInterval (void);
 /// Get OS Tick timer counter value
 /// \return OS Tick timer counter value
 uint32_t OS_Tick_GetCount (void);
 /// Get OS Tick timer overflow status
 /// \return OS Tick overflow status (1 - overflow, 0 - no overflow).
 uint32_t OS_Tick_GetOverflow (void);
 #endif  /* OS_TICK_H */
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/RTOS2/Source/os_systick.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/RTOS2/Source/os_systick.c
@@ -1,132 +0,0 @@
 /**************************************************************************//**
 * @file     os_systick.c
 * @brief    CMSIS OS Tick SysTick implementation
 * @version  V1.0.1
 * @date     24. November 2017
 ******************************************************************************/
 /*
 * Copyright (c) 2017-2017 ARM Limited. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "os_tick.h"
 //lint -emacro((923,9078),SCB,SysTick) "cast from unsigned long to pointer"
 #include "RTE_Components.h"
 #include CMSIS_device_header
 #ifdef  SysTick
 #ifndef SYSTICK_IRQ_PRIORITY
 #define SYSTICK_IRQ_PRIORITY    0xFFU
 #endif
 static uint8_t PendST;
 // Setup OS Tick.
 __WEAK int32_t OS_Tick_Setup (uint32_t freq, IRQHandler_t handler) {
  uint32_t load;
  (void)handler;
  if (freq == 0U) {
    //lint -e{904} "Return statement before end of function"
    return (-1);
  }
  load = (SystemCoreClock / freq) - 1U;
  if (load > 0x00FFFFFFU) {
    //lint -e{904} "Return statement before end of function"
    return (-1);
  }
  // Set SysTick Interrupt Priority
 #if   ((defined(__ARM_ARCH_8M_MAIN__) && (__ARM_ARCH_8M_MAIN__ != 0)) || \
       (defined(__CORTEX_M)           && (__CORTEX_M           == 7U)))
  SCB->SHPR[11] = SYSTICK_IRQ_PRIORITY;
 #elif  (defined(__ARM_ARCH_8M_BASE__) && (__ARM_ARCH_8M_BASE__ != 0))
  SCB->SHPR[1] |= ((uint32_t)SYSTICK_IRQ_PRIORITY << 24);
 #elif ((defined(__ARM_ARCH_7M__)      && (__ARM_ARCH_7M__      != 0)) || \
       (defined(__ARM_ARCH_7EM__)     && (__ARM_ARCH_7EM__     != 0)))
  SCB->SHP[11]  = SYSTICK_IRQ_PRIORITY;
 #elif  (defined(__ARM_ARCH_6M__)      && (__ARM_ARCH_6M__      != 0))
  SCB->SHP[1]  |= ((uint32_t)SYSTICK_IRQ_PRIORITY << 24);
 #else
 #error "Unknown ARM Core!"
 #endif
  SysTick->CTRL =  SysTick_CTRL_CLKSOURCE_Msk | SysTick_CTRL_TICKINT_Msk;
  SysTick->LOAD =  load;
  SysTick->VAL  =  0U;
  PendST = 0U;
  return (0);
 }
 /// Enable OS Tick.
 __WEAK void OS_Tick_Enable (void) {
  if (PendST != 0U) {
    PendST = 0U;
    SCB->ICSR = SCB_ICSR_PENDSTSET_Msk;
  }
  SysTick->CTRL |=  SysTick_CTRL_ENABLE_Msk;
 }
 /// Disable OS Tick.
 __WEAK void OS_Tick_Disable (void) {
  SysTick->CTRL &= ~SysTick_CTRL_ENABLE_Msk;
  if ((SCB->ICSR & SCB_ICSR_PENDSTSET_Msk) != 0U) {
    SCB->ICSR = SCB_ICSR_PENDSTCLR_Msk;
    PendST = 1U;
  }
 }
 // Acknowledge OS Tick IRQ.
 __WEAK void OS_Tick_AcknowledgeIRQ (void) {
  (void)SysTick->CTRL;
 }
 // Get OS Tick IRQ number.
 __WEAK int32_t  OS_Tick_GetIRQn (void) {
  return ((int32_t)SysTick_IRQn);
 }
 // Get OS Tick clock.
 __WEAK uint32_t OS_Tick_GetClock (void) {
  return (SystemCoreClock);
 }
 // Get OS Tick interval.
 __WEAK uint32_t OS_Tick_GetInterval (void) {
  return (SysTick->LOAD + 1U);
 }
 // Get OS Tick count value.
 __WEAK uint32_t OS_Tick_GetCount (void) {
  uint32_t load = SysTick->LOAD;
  return  (load - SysTick->VAL);
 }
 // Get OS Tick overflow status.
 __WEAK uint32_t OS_Tick_GetOverflow (void) {
  return ((SysTick->CTRL >> 16) & 1U);
 }
 #endif  // SysTick
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/RTOS2/Source/os_tick_gtim.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/RTOS2/Source/os_tick_gtim.c
@@ -1,187 +0,0 @@
 /**************************************************************************//**
 * @file     os_tick_gtim.c
 * @brief    CMSIS OS Tick implementation for Generic Timer
 * @version  V1.0.1
 * @date     24. November 2017
 ******************************************************************************/
 /*
 * Copyright (c) 2017 ARM Limited. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "os_tick.h"
 #include "irq_ctrl.h"
 #include "RTE_Components.h"
 #include CMSIS_device_header
 #ifndef GTIM_IRQ_PRIORITY
 #define GTIM_IRQ_PRIORITY           0xFFU
 #endif
 #ifndef GTIM_IRQ_NUM
 #define GTIM_IRQ_NUM                SecurePhyTimer_IRQn
 #endif
 // Timer interrupt pending flag
 static uint8_t GTIM_PendIRQ;
 // Timer tick frequency
 static uint32_t GTIM_Clock;
 // Timer load value
 static uint32_t GTIM_Load;
 // Setup OS Tick.
 int32_t OS_Tick_Setup (uint32_t freq, IRQHandler_t handler) {
  uint32_t prio, bits;
  if (freq == 0U) {
    return (-1);
  }
  GTIM_PendIRQ = 0U;
  // Get timer clock
 #ifdef SCTR_BASE
  GTIM_Clock = *(uint32_t*)(SCTR_BASE+0x20);
 #else
  // FVP REFCLK CNTControl 100MHz
  GTIM_Clock = 100000000UL;
 #endif
  PL1_SetCounterFrequency(GTIM_Clock);
  // Calculate load value
  GTIM_Load = (GTIM_Clock / freq) - 1U;
  // Disable Generic Timer and set load value
  PL1_SetControl(0U);
  PL1_SetLoadValue(GTIM_Load);
  // Disable corresponding IRQ
  IRQ_Disable(GTIM_IRQ_NUM);
  IRQ_ClearPending(GTIM_IRQ_NUM);
  // Determine number of implemented priority bits
  IRQ_SetPriority(GTIM_IRQ_NUM, 0xFFU);
  prio = IRQ_GetPriority(GTIM_IRQ_NUM);
  // At least bits [7:4] must be implemented
  if ((prio & 0xF0U) == 0U) {
    return (-1);
  }
  for (bits = 0; bits < 4; bits++) {
    if ((prio & 0x01) != 0) {
      break;
    }
    prio >>= 1;
  }
  // Adjust configured priority to the number of implemented priority bits
  prio = (GTIM_IRQ_PRIORITY << bits) & 0xFFUL;
  // Set Private Timer interrupt priority
  IRQ_SetPriority(GTIM_IRQ_NUM, prio-1U);
  // Set edge-triggered IRQ
  IRQ_SetMode(GTIM_IRQ_NUM, IRQ_MODE_TRIG_EDGE);
  // Register tick interrupt handler function
  IRQ_SetHandler(GTIM_IRQ_NUM, handler);
  // Enable corresponding interrupt
  IRQ_Enable(GTIM_IRQ_NUM);
  // Enable system counter and timer control
 #ifdef SCTR_BASE
  *(uint32_t*)SCTR_BASE |= 3U;
 #endif
  // Enable timer control
  PL1_SetControl(1U);
  return (0);
 }
 /// Enable OS Tick.
 void OS_Tick_Enable (void) {
  uint32_t ctrl;
  // Set pending interrupt if flag set
  if (GTIM_PendIRQ != 0U) {
    GTIM_PendIRQ = 0U;
    IRQ_SetPending (GTIM_IRQ_NUM);
  }
  // Start the Private Timer
  ctrl = PL1_GetControl();
  // Set bit: Timer enable
  ctrl |= 1U;
  PL1_SetControl(ctrl);
 }
 /// Disable OS Tick.
 void OS_Tick_Disable (void) {
  uint32_t ctrl;
  // Stop the Private Timer
  ctrl = PL1_GetControl();
  // Clear bit: Timer enable
  ctrl &= ~1U;
  PL1_SetControl(ctrl);
  // Remember pending interrupt flag
  if (IRQ_GetPending(GTIM_IRQ_NUM) != 0) {
    IRQ_ClearPending(GTIM_IRQ_NUM);
    GTIM_PendIRQ = 1U;
  }
 }
 // Acknowledge OS Tick IRQ.
 void OS_Tick_AcknowledgeIRQ (void) {
  IRQ_ClearPending (GTIM_IRQ_NUM);
  PL1_SetLoadValue(GTIM_Load);
 }
 // Get OS Tick IRQ number.
 int32_t  OS_Tick_GetIRQn (void) {
  return (GTIM_IRQ_NUM);
 }
 // Get OS Tick clock.
 uint32_t OS_Tick_GetClock (void) {
  return (GTIM_Clock);
 }
 // Get OS Tick interval.
 uint32_t OS_Tick_GetInterval (void) {
  return (GTIM_Load + 1U);
 }
 // Get OS Tick count value.
 uint32_t OS_Tick_GetCount (void) {
  return (GTIM_Load - PL1_GetCurrentValue());
 }
 // Get OS Tick overflow status.
 uint32_t OS_Tick_GetOverflow (void) {
  CNTP_CTL_Type cntp_ctl;
  cntp_ctl.w = PL1_GetControl();
  return (cntp_ctl.b.ISTATUS);
 }
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/RTOS2/Source/os_tick_ptim.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/RTOS2/Source/os_tick_ptim.c
@@ -1,165 +0,0 @@
 /**************************************************************************//**
 * @file     os_tick_ptim.c
 * @brief    CMSIS OS Tick implementation for Private Timer
 * @version  V1.0.2
 * @date     02. March 2018
 ******************************************************************************/
 /*
 * Copyright (c) 2017-2018 Arm Limited. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "RTE_Components.h"
 #include CMSIS_device_header
 #if defined(PTIM)
 #include "os_tick.h"
 #include "irq_ctrl.h"
 #ifndef PTIM_IRQ_PRIORITY
 #define PTIM_IRQ_PRIORITY           0xFFU
 #endif
 static uint8_t PTIM_PendIRQ;        // Timer interrupt pending flag
 // Setup OS Tick.
 int32_t OS_Tick_Setup (uint32_t freq, IRQHandler_t handler) {
  uint32_t load;
  uint32_t prio;
  uint32_t bits;
  if (freq == 0U) {
    return (-1);
  }
  PTIM_PendIRQ = 0U;
  // Private Timer runs with the system frequency
  load = (SystemCoreClock / freq) - 1U;
  // Disable Private Timer and set load value
  PTIM_SetControl   (0U);
  PTIM_SetLoadValue (load);
  // Disable corresponding IRQ
  IRQ_Disable     (PrivTimer_IRQn);
  IRQ_ClearPending(PrivTimer_IRQn);
  // Determine number of implemented priority bits
  IRQ_SetPriority (PrivTimer_IRQn, 0xFFU);
  prio = IRQ_GetPriority (PrivTimer_IRQn);
  // At least bits [7:4] must be implemented
  if ((prio & 0xF0U) == 0U) {
    return (-1);
  }
  for (bits = 0; bits < 4; bits++) {
    if ((prio & 0x01) != 0) {
      break;
    }
    prio >>= 1;
  }
  // Adjust configured priority to the number of implemented priority bits
  prio = (PTIM_IRQ_PRIORITY << bits) & 0xFFUL;
  // Set Private Timer interrupt priority
  IRQ_SetPriority(PrivTimer_IRQn, prio-1U);
  // Set edge-triggered IRQ
  IRQ_SetMode(PrivTimer_IRQn, IRQ_MODE_TRIG_EDGE);
  // Register tick interrupt handler function
  IRQ_SetHandler(PrivTimer_IRQn, handler);
  // Enable corresponding interrupt
  IRQ_Enable (PrivTimer_IRQn);
  // Set bits: IRQ enable and Auto reload
  PTIM_SetControl (0x06U);
  return (0);
 }
 /// Enable OS Tick.
 void OS_Tick_Enable (void) {
  uint32_t ctrl;
  // Set pending interrupt if flag set
  if (PTIM_PendIRQ != 0U) {
    PTIM_PendIRQ = 0U;
    IRQ_SetPending (PrivTimer_IRQn);
  }
  // Start the Private Timer
  ctrl  = PTIM_GetControl();
  // Set bit: Timer enable
  ctrl |= 1U;
  PTIM_SetControl (ctrl);
 }
 /// Disable OS Tick.
 void OS_Tick_Disable (void) {
  uint32_t ctrl;
  // Stop the Private Timer
  ctrl  = PTIM_GetControl();
  // Clear bit: Timer enable
  ctrl &= ~1U;
  PTIM_SetControl (ctrl);
  // Remember pending interrupt flag
  if (IRQ_GetPending(PrivTimer_IRQn) != 0) {
    IRQ_ClearPending (PrivTimer_IRQn);
    PTIM_PendIRQ = 1U;
  }
 }
 // Acknowledge OS Tick IRQ.
 void OS_Tick_AcknowledgeIRQ (void) {
  PTIM_ClearEventFlag();
 }
 // Get OS Tick IRQ number.
 int32_t  OS_Tick_GetIRQn (void) {
  return (PrivTimer_IRQn);
 }
 // Get OS Tick clock.
 uint32_t OS_Tick_GetClock (void) {
  return (SystemCoreClock);
 }
 // Get OS Tick interval.
 uint32_t OS_Tick_GetInterval (void) {
  return (PTIM_GetLoadValue() + 1U);
 }
 // Get OS Tick count value.
 uint32_t OS_Tick_GetCount (void) {
  uint32_t load = PTIM_GetLoadValue();
  return  (load - PTIM_GetCurrentValue());
 }
 // Get OS Tick overflow status.
 uint32_t OS_Tick_GetOverflow (void) {
  return (PTIM->ISR & 1);
 }
 #endif  // PTIM
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/RTOS2/Template/cmsis_os.h
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/RTOS2/Template/cmsis_os.h
@@ -1,922 +0,0 @@
 /*
 * Copyright (c) 2013-2018 Arm Limited. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * ----------------------------------------------------------------------
 *
 * $Date:        18. June 2018
 * $Revision:    V2.1.3
 *
 * Project:      CMSIS-RTOS API
 * Title:        cmsis_os.h template header file
 *
 * Version 0.02
 *    Initial Proposal Phase
 * Version 0.03
 *    osKernelStart added, optional feature: main started as thread
 *    osSemaphores have standard behavior
 *    osTimerCreate does not start the timer, added osTimerStart
 *    osThreadPass is renamed to osThreadYield
 * Version 1.01
 *    Support for C++ interface
 *     - const attribute removed from the osXxxxDef_t typedefs
 *     - const attribute added to the osXxxxDef macros
 *    Added: osTimerDelete, osMutexDelete, osSemaphoreDelete
 *    Added: osKernelInitialize
 * Version 1.02
 *    Control functions for short timeouts in microsecond resolution:
 *    Added: osKernelSysTick, osKernelSysTickFrequency, osKernelSysTickMicroSec
 *    Removed: osSignalGet 
 * Version 2.0.0
 *    OS objects creation without macros (dynamic creation and resource allocation):
 *     - added: osXxxxNew functions which replace osXxxxCreate
 *     - added: osXxxxAttr_t structures
 *     - deprecated: osXxxxCreate functions, osXxxxDef_t structures
 *     - deprecated: osXxxxDef and osXxxx macros
 *    osStatus codes simplified and renamed to osStatus_t
 *    osEvent return structure deprecated
 *    Kernel:
 *     - added: osKernelInfo_t and osKernelGetInfo
 *     - added: osKernelState_t and osKernelGetState (replaces osKernelRunning)
 *     - added: osKernelLock, osKernelUnlock
 *     - added: osKernelSuspend, osKernelResume
 *     - added: osKernelGetTickCount, osKernelGetTickFreq
 *     - renamed osKernelSysTick to osKernelGetSysTimerCount
 *     - replaced osKernelSysTickFrequency with osKernelGetSysTimerFreq
 *     - deprecated osKernelSysTickMicroSec
 *    Thread:
 *     - extended number of thread priorities
 *     - renamed osPrioriry to osPrioriry_t
 *     - replaced osThreadCreate with osThreadNew
 *     - added: osThreadGetName
 *     - added: osThreadState_t and osThreadGetState
 *     - added: osThreadGetStackSize, osThreadGetStackSpace
 *     - added: osThreadSuspend, osThreadResume
 *     - added: osThreadJoin, osThreadDetach, osThreadExit
 *     - added: osThreadGetCount, osThreadEnumerate
 *     - added: Thread Flags (moved from Signals) 
 *    Signals:
 *     - renamed osSignals to osThreadFlags (moved to Thread Flags)
 *     - changed return value of Set/Clear/Wait functions
 *     - Clear function limited to current running thread
 *     - extended Wait function (options)
 *     - added: osThreadFlagsGet
 *    Event Flags:
 *     - added new independent object for handling Event Flags
 *    Delay and Wait functions:
 *     - added: osDelayUntil
 *     - deprecated: osWait
 *    Timer:
 *     - replaced osTimerCreate with osTimerNew
 *     - added: osTimerGetName, osTimerIsRunning
 *    Mutex:
 *     - extended: attributes (Recursive, Priority Inherit, Robust)
 *     - replaced osMutexCreate with osMutexNew
 *     - renamed osMutexWait to osMutexAcquire
 *     - added: osMutexGetName, osMutexGetOwner
 *    Semaphore:
 *     - extended: maximum and initial token count
 *     - replaced osSemaphoreCreate with osSemaphoreNew
 *     - renamed osSemaphoreWait to osSemaphoreAcquire (changed return value)
 *     - added: osSemaphoreGetName, osSemaphoreGetCount
 *    Memory Pool:
 *     - using osMemoryPool prefix instead of osPool
 *     - replaced osPoolCreate with osMemoryPoolNew
 *     - extended osMemoryPoolAlloc (timeout)
 *     - added: osMemoryPoolGetName
 *     - added: osMemoryPoolGetCapacity, osMemoryPoolGetBlockSize
 *     - added: osMemoryPoolGetCount, osMemoryPoolGetSpace
 *     - added: osMemoryPoolDelete
 *     - deprecated: osPoolCAlloc
 *    Message Queue:
 *     - extended: fixed size message instead of a single 32-bit value
 *     - using osMessageQueue prefix instead of osMessage
 *     - replaced osMessageCreate with osMessageQueueNew
 *     - updated: osMessageQueuePut, osMessageQueueGet
 *     - added: osMessageQueueGetName
 *     - added: osMessageQueueGetCapacity, osMessageQueueGetMsgSize
 *     - added: osMessageQueueGetCount, osMessageQueueGetSpace
 *     - added: osMessageQueueReset, osMessageQueueDelete
 *    Mail Queue: 
 *     - deprecated (superseded by extended Message Queue functionality)
 * Version 2.1.0
 *    Support for critical and uncritical sections (nesting safe):
 *    - updated: osKernelLock, osKernelUnlock
 *    - added: osKernelRestoreLock
 *    Updated Thread and Event Flags:
 *    - changed flags parameter and return type from int32_t to uint32_t
 * Version 2.1.1
 *    Additional functions allowed to be called from Interrupt Service Routines:
 *    - osKernelGetTickCount, osKernelGetTickFreq
 *    Changed Kernel Tick type to uint32_t:
 *    - updated: osKernelGetTickCount, osDelayUntil
 * Version 2.1.2
 *    Additional functions allowed to be called from Interrupt Service Routines:
 *    - osKernelGetInfo, osKernelGetState
 * Version 2.1.3
 *    Additional functions allowed to be called from Interrupt Service Routines:
 *    - osThreadGetId
 *---------------------------------------------------------------------------*/
 #ifndef CMSIS_OS_H_
 #define CMSIS_OS_H_
 /// \b osCMSIS identifies the CMSIS-RTOS API version.
 #define osCMSIS             0x20001U    ///< API version (main[31:16].sub[15:0])
 /// \note CAN BE CHANGED: \b osCMSIS_KERNEL identifies the underlying RTOS kernel and version number.
 #define osCMSIS_KERNEL      0x10000U    ///< RTOS identification and version (main[31:16].sub[15:0])
 /// \note CAN BE CHANGED: \b osKernelSystemId identifies the underlying RTOS kernel.
 #define osKernelSystemId "KERNEL V1.0"  ///< RTOS identification string
 /// \note CAN BE CHANGED: \b osFeature_xxx identifies RTOS features.
 #define osFeature_MainThread  0         ///< main thread      1=main can be thread, 0=not available
 #define osFeature_Signals     16U       ///< maximum number of Signal Flags available per thread
 #define osFeature_Semaphore   65535U    ///< maximum count for \ref osSemaphoreCreate function
 #define osFeature_Wait        0         ///< osWait function: 1=available, 0=not available
 #define osFeature_SysTick     1         ///< osKernelSysTick functions: 1=available, 0=not available
 #define osFeature_Pool        1         ///< Memory Pools:    1=available, 0=not available
 #define osFeature_MessageQ    1         ///< Message Queues:  1=available, 0=not available
 #define osFeature_MailQ       1         ///< Mail Queues:     1=available, 0=not available
 #if (osCMSIS >= 0x20000U)
 #include "cmsis_os2.h"
 #else
 #include <stdint.h>
 #include <stddef.h>
 #endif
 #ifdef  __cplusplus
 extern "C"
 {
 #endif
 // ==== Enumerations, structures, defines ====
 /// Priority values.
 #if (osCMSIS < 0x20000U)
 typedef enum {
  osPriorityIdle          = -3,         ///< Priority: idle (lowest)
  osPriorityLow           = -2,         ///< Priority: low
  osPriorityBelowNormal   = -1,         ///< Priority: below normal
  osPriorityNormal        =  0,         ///< Priority: normal (default)
  osPriorityAboveNormal   = +1,         ///< Priority: above normal
  osPriorityHigh          = +2,         ///< Priority: high
  osPriorityRealtime      = +3,         ///< Priority: realtime (highest)
  osPriorityError         = 0x84,       ///< System cannot determine priority or illegal priority.
  osPriorityReserved      = 0x7FFFFFFF  ///< Prevents enum down-size compiler optimization.
 } osPriority;
 #else
 #define osPriority osPriority_t
 #endif
 /// Entry point of a thread.
 typedef void (*os_pthread) (void const *argument);
 /// Entry point of a timer call back function.
 typedef void (*os_ptimer) (void const *argument);
 /// Timer type.
 #if (osCMSIS < 0x20000U)
 typedef enum {
  osTimerOnce             = 0,          ///< One-shot timer.
  osTimerPeriodic         = 1           ///< Repeating timer.
 } os_timer_type;
 #else
 #define os_timer_type osTimerType_t
 #endif
 /// Timeout value.
 #define osWaitForever       0xFFFFFFFFU ///< Wait forever timeout value.
 /// Status code values returned by CMSIS-RTOS functions.
 #if (osCMSIS < 0x20000U)
 typedef enum {
  osOK                    =    0,       ///< Function completed; no error or event occurred.
  osEventSignal           = 0x08,       ///< Function completed; signal event occurred.
  osEventMessage          = 0x10,       ///< Function completed; message event occurred.
  osEventMail             = 0x20,       ///< Function completed; mail event occurred.
  osEventTimeout          = 0x40,       ///< Function completed; timeout occurred.
  osErrorParameter        = 0x80,       ///< Parameter error: a mandatory parameter was missing or specified an incorrect object.
  osErrorResource         = 0x81,       ///< Resource not available: a specified resource was not available.
  osErrorTimeoutResource  = 0xC1,       ///< Resource not available within given time: a specified resource was not available within the timeout period.
  osErrorISR              = 0x82,       ///< Not allowed in ISR context: the function cannot be called from interrupt service routines.
  osErrorISRRecursive     = 0x83,       ///< Function called multiple times from ISR with same object.
  osErrorPriority         = 0x84,       ///< System cannot determine priority or thread has illegal priority.
  osErrorNoMemory         = 0x85,       ///< System is out of memory: it was impossible to allocate or reserve memory for the operation.
  osErrorValue            = 0x86,       ///< Value of a parameter is out of range.
  osErrorOS               = 0xFF,       ///< Unspecified RTOS error: run-time error but no other error message fits.
  osStatusReserved        = 0x7FFFFFFF  ///< Prevents enum down-size compiler optimization.
 } osStatus;
 #else
 typedef int32_t                  osStatus;
 #define osEventSignal           (0x08)
 #define osEventMessage          (0x10)
 #define osEventMail             (0x20)
 #define osEventTimeout          (0x40)
 #define osErrorOS               osError
 #define osErrorTimeoutResource  osErrorTimeout
 #define osErrorISRRecursive     (-126)
 #define osErrorValue            (-127)
 #define osErrorPriority         (-128)
 #endif
 // >>> the following data type definitions may be adapted towards a specific RTOS
 /// Thread ID identifies the thread.
 /// \note CAN BE CHANGED: \b implementation specific in every CMSIS-RTOS.
 #if (osCMSIS < 0x20000U)
 typedef void *osThreadId;
 #else
 #define osThreadId osThreadId_t
 #endif
 /// Timer ID identifies the timer.
 /// \note CAN BE CHANGED: \b implementation specific in every CMSIS-RTOS.
 #if (osCMSIS < 0x20000U)
 typedef void *osTimerId;
 #else
 #define osTimerId osTimerId_t
 #endif
 /// Mutex ID identifies the mutex.
 /// \note CAN BE CHANGED: \b implementation specific in every CMSIS-RTOS.
 #if (osCMSIS < 0x20000U)
 typedef void *osMutexId;
 #else
 #define osMutexId osMutexId_t
 #endif
 /// Semaphore ID identifies the semaphore.
 /// \note CAN BE CHANGED: \b implementation specific in every CMSIS-RTOS.
 #if (osCMSIS < 0x20000U)
 typedef void *osSemaphoreId;
 #else
 #define osSemaphoreId osSemaphoreId_t
 #endif
 /// Pool ID identifies the memory pool.
 /// \note CAN BE CHANGED: \b implementation specific in every CMSIS-RTOS.
 typedef void *osPoolId;
 /// Message ID identifies the message queue.
 /// \note CAN BE CHANGED: \b implementation specific in every CMSIS-RTOS.
 typedef void *osMessageQId;
 /// Mail ID identifies the mail queue.
 /// \note CAN BE CHANGED: \b implementation specific in every CMSIS-RTOS.
 typedef void *osMailQId;
 /// Thread Definition structure contains startup information of a thread.
 /// \note CAN BE CHANGED: \b os_thread_def is implementation specific in every CMSIS-RTOS.
 #if (osCMSIS < 0x20000U)
 typedef struct os_thread_def {
  os_pthread                 pthread;   ///< start address of thread function
  osPriority               tpriority;   ///< initial thread priority
  uint32_t                 instances;   ///< maximum number of instances of that thread function
  uint32_t                 stacksize;   ///< stack size requirements in bytes; 0 is default stack size
 } osThreadDef_t;
 #else
 typedef struct os_thread_def {
  os_pthread                 pthread;   ///< start address of thread function
  osThreadAttr_t                attr;   ///< thread attributes
 } osThreadDef_t;
 #endif
 /// Timer Definition structure contains timer parameters.
 /// \note CAN BE CHANGED: \b os_timer_def is implementation specific in every CMSIS-RTOS.
 #if (osCMSIS < 0x20000U)
 typedef struct os_timer_def {
  os_ptimer                   ptimer;   ///< start address of a timer function
 } osTimerDef_t;
 #else
 typedef struct os_timer_def {
  os_ptimer                   ptimer;   ///< start address of a timer function
  osTimerAttr_t                 attr;   ///< timer attributes
 } osTimerDef_t;
 #endif
 /// Mutex Definition structure contains setup information for a mutex.
 /// \note CAN BE CHANGED: \b os_mutex_def is implementation specific in every CMSIS-RTOS.
 #if (osCMSIS < 0x20000U)
 typedef struct os_mutex_def {
  uint32_t                     dummy;   ///< dummy value
 } osMutexDef_t;
 #else
 #define osMutexDef_t osMutexAttr_t
 #endif
 /// Semaphore Definition structure contains setup information for a semaphore.
 /// \note CAN BE CHANGED: \b os_semaphore_def is implementation specific in every CMSIS-RTOS.
 #if (osCMSIS < 0x20000U)
 typedef struct os_semaphore_def {
  uint32_t                     dummy;   ///< dummy value
 } osSemaphoreDef_t;
 #else
 #define osSemaphoreDef_t osSemaphoreAttr_t
 #endif
 /// Definition structure for memory block allocation.
 /// \note CAN BE CHANGED: \b os_pool_def is implementation specific in every CMSIS-RTOS.
 #if (osCMSIS < 0x20000U)
 typedef struct os_pool_def {
  uint32_t                   pool_sz;   ///< number of items (elements) in the pool
  uint32_t                   item_sz;   ///< size of an item
  void                         *pool;   ///< pointer to memory for pool
 } osPoolDef_t;
 #else
 typedef struct os_pool_def {
  uint32_t                   pool_sz;   ///< number of items (elements) in the pool
  uint32_t                   item_sz;   ///< size of an item
  osMemoryPoolAttr_t            attr;   ///< memory pool attributes
 } osPoolDef_t;
 #endif
 /// Definition structure for message queue.
 /// \note CAN BE CHANGED: \b os_messageQ_def is implementation specific in every CMSIS-RTOS.
 #if (osCMSIS < 0x20000U)
 typedef struct os_messageQ_def {
  uint32_t                  queue_sz;   ///< number of elements in the queue
  void                         *pool;   ///< memory array for messages
 } osMessageQDef_t;
 #else
 typedef struct os_messageQ_def {
  uint32_t                  queue_sz;   ///< number of elements in the queue
  osMessageQueueAttr_t          attr;   ///< message queue attributes
 } osMessageQDef_t;
 #endif
 /// Definition structure for mail queue.
 /// \note CAN BE CHANGED: \b os_mailQ_def is implementation specific in every CMSIS-RTOS.
 #if (osCMSIS < 0x20000U)
 typedef struct os_mailQ_def {
  uint32_t                  queue_sz;   ///< number of elements in the queue
  uint32_t                   item_sz;   ///< size of an item
  void                         *pool;   ///< memory array for mail
 } osMailQDef_t;
 #else
 typedef struct os_mailQ_def {
  uint32_t                  queue_sz;   ///< number of elements in the queue
  uint32_t                   item_sz;   ///< size of an item
  void                         *mail;   ///< pointer to mail
  osMemoryPoolAttr_t         mp_attr;   ///< memory pool attributes
  osMessageQueueAttr_t       mq_attr;   ///< message queue attributes
 } osMailQDef_t;
 #endif
 /// Event structure contains detailed information about an event.
 typedef struct {
  osStatus                    status;   ///< status code: event or error information
  union {
    uint32_t                       v;   ///< message as 32-bit value
    void                          *p;   ///< message or mail as void pointer
    int32_t                  signals;   ///< signal flags
  } value;                              ///< event value
  union {
    osMailQId                mail_id;   ///< mail id obtained by \ref osMailCreate
    osMessageQId          message_id;   ///< message id obtained by \ref osMessageCreate
  } def;                                ///< event definition
 } osEvent;
 //  ==== Kernel Management Functions ====
 /// Initialize the RTOS Kernel for creating objects.
 /// \return status code that indicates the execution status of the function.
 #if (osCMSIS < 0x20000U)
 osStatus osKernelInitialize (void);
 #endif
 /// Start the RTOS Kernel scheduler.
 /// \return status code that indicates the execution status of the function.
 #if (osCMSIS < 0x20000U)
 osStatus osKernelStart (void);
 #endif
 /// Check if the RTOS kernel is already started.
 /// \return 0 RTOS is not started, 1 RTOS is started.
 #if (osCMSIS < 0x20000U)
 int32_t osKernelRunning(void);
 #endif
 #if (defined(osFeature_SysTick) && (osFeature_SysTick != 0))  // System Timer available
 /// Get the RTOS kernel system timer counter.
 /// \return RTOS kernel system timer as 32-bit value 
 #if (osCMSIS < 0x20000U)
 uint32_t osKernelSysTick (void);
 #else
 #define  osKernelSysTick osKernelGetSysTimerCount
 #endif
 /// The RTOS kernel system timer frequency in Hz.
 /// \note Reflects the system timer setting and is typically defined in a configuration file.
 #if (osCMSIS < 0x20000U)
 #define osKernelSysTickFrequency 100000000
 #endif
 /// Convert a microseconds value to a RTOS kernel system timer value.
 /// \param         microsec     time value in microseconds.
 /// \return time value normalized to the \ref osKernelSysTickFrequency
 #if (osCMSIS < 0x20000U)
 #define osKernelSysTickMicroSec(microsec) (((uint64_t)microsec * (osKernelSysTickFrequency)) / 1000000)
 #else
 #define osKernelSysTickMicroSec(microsec) (((uint64_t)microsec *  osKernelGetSysTimerFreq()) / 1000000)
 #endif
 #endif  // System Timer available
 //  ==== Thread Management Functions ====
 /// Create a Thread Definition with function, priority, and stack requirements.
 /// \param         name          name of the thread function.
 /// \param         priority      initial priority of the thread function.
 /// \param         instances     number of possible thread instances.
 /// \param         stacksz       stack size (in bytes) requirements for the thread function.
 /// \note CAN BE CHANGED: The parameters to \b osThreadDef shall be consistent but the
 ///       macro body is implementation specific in every CMSIS-RTOS.
 #if defined (osObjectsExternal)  // object is external
 #define osThreadDef(name, priority, instances, stacksz) \
 extern const osThreadDef_t os_thread_def_##name
 #else                            // define the object
 #if (osCMSIS < 0x20000U)
 #define osThreadDef(name, priority, instances, stacksz) \
 const osThreadDef_t os_thread_def_##name = \
 { (name), (priority), (instances), (stacksz) }
 #else
 #define osThreadDef(name, priority, instances, stacksz) \
 const osThreadDef_t os_thread_def_##name = \
 { (name), \
  { NULL, osThreadDetached, NULL, 0U, NULL, 8*((stacksz+7)/8), (priority), 0U, 0U } }
 #endif
 #endif
 /// Access a Thread definition.
 /// \param         name          name of the thread definition object.
 /// \note CAN BE CHANGED: The parameter to \b osThread shall be consistent but the
 ///       macro body is implementation specific in every CMSIS-RTOS.
 #define osThread(name) \
 &os_thread_def_##name
 /// Create a thread and add it to Active Threads and set it to state READY.
 /// \param[in]     thread_def    thread definition referenced with \ref osThread.
 /// \param[in]     argument      pointer that is passed to the thread function as start argument.
 /// \return thread ID for reference by other functions or NULL in case of error.
 osThreadId osThreadCreate (const osThreadDef_t *thread_def, void *argument);
 /// Return the thread ID of the current running thread.
 /// \return thread ID for reference by other functions or NULL in case of error.
 #if (osCMSIS < 0x20000U)
 osThreadId osThreadGetId (void);
 #endif
 /// Change priority of a thread.
 /// \param[in]     thread_id     thread ID obtained by \ref osThreadCreate or \ref osThreadGetId.
 /// \param[in]     priority      new priority value for the thread function.
 /// \return status code that indicates the execution status of the function.
 #if (osCMSIS < 0x20000U)
 osStatus osThreadSetPriority (osThreadId thread_id, osPriority priority);
 #endif
 /// Get current priority of a thread.
 /// \param[in]     thread_id     thread ID obtained by \ref osThreadCreate or \ref osThreadGetId.
 /// \return current priority value of the specified thread.
 #if (osCMSIS < 0x20000U)
 osPriority osThreadGetPriority (osThreadId thread_id);
 #endif
 /// Pass control to next thread that is in state \b READY.
 /// \return status code that indicates the execution status of the function.
 #if (osCMSIS < 0x20000U)
 osStatus osThreadYield (void);
 #endif
 /// Terminate execution of a thread.
 /// \param[in]     thread_id     thread ID obtained by \ref osThreadCreate or \ref osThreadGetId.
 /// \return status code that indicates the execution status of the function.
 #if (osCMSIS < 0x20000U)
 osStatus osThreadTerminate (osThreadId thread_id);
 #endif
 //  ==== Signal Management ====
 /// Set the specified Signal Flags of an active thread.
 /// \param[in]     thread_id     thread ID obtained by \ref osThreadCreate or \ref osThreadGetId.
 /// \param[in]     signals       specifies the signal flags of the thread that should be set.
 /// \return previous signal flags of the specified thread or 0x80000000 in case of incorrect parameters.
 int32_t osSignalSet (osThreadId thread_id, int32_t signals);
 /// Clear the specified Signal Flags of an active thread.
 /// \param[in]     thread_id     thread ID obtained by \ref osThreadCreate or \ref osThreadGetId.
 /// \param[in]     signals       specifies the signal flags of the thread that shall be cleared.
 /// \return previous signal flags of the specified thread or 0x80000000 in case of incorrect parameters or call from ISR.
 int32_t osSignalClear (osThreadId thread_id, int32_t signals);
 /// Wait for one or more Signal Flags to become signaled for the current \b RUNNING thread.
 /// \param[in]     signals       wait until all specified signal flags set or 0 for any single signal flag.
 /// \param[in]     millisec      \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out.
 /// \return event flag information or error code.
 osEvent osSignalWait (int32_t signals, uint32_t millisec);
 //  ==== Generic Wait Functions ====
 /// Wait for Timeout (Time Delay).
 /// \param[in]     millisec      \ref CMSIS_RTOS_TimeOutValue "time delay" value
 /// \return status code that indicates the execution status of the function.
 #if (osCMSIS < 0x20000U)
 osStatus osDelay (uint32_t millisec);
 #endif
 #if (defined (osFeature_Wait) && (osFeature_Wait != 0))  // Generic Wait available
 /// Wait for Signal, Message, Mail, or Timeout.
 /// \param[in] millisec          \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out
 /// \return event that contains signal, message, or mail information or error code.
 osEvent osWait (uint32_t millisec);
 #endif  // Generic Wait available
 //  ==== Timer Management Functions ====
 /// Define a Timer object.
 /// \param         name          name of the timer object.
 /// \param         function      name of the timer call back function.
 /// \note CAN BE CHANGED: The parameter to \b osTimerDef shall be consistent but the
 ///       macro body is implementation specific in every CMSIS-RTOS.
 #if defined (osObjectsExternal)  // object is external
 #define osTimerDef(name, function) \
 extern const osTimerDef_t os_timer_def_##name
 #else                            // define the object
 #if (osCMSIS < 0x20000U)
 #define osTimerDef(name, function) \
 const osTimerDef_t os_timer_def_##name = { (function) }
 #else
 #define osTimerDef(name, function) \
 const osTimerDef_t os_timer_def_##name = \
 { (function), { NULL, 0U, NULL, 0U } }
 #endif
 #endif
 /// Access a Timer definition.
 /// \param         name          name of the timer object.
 /// \note CAN BE CHANGED: The parameter to \b osTimer shall be consistent but the
 ///       macro body is implementation specific in every CMSIS-RTOS.
 #define osTimer(name) \
 &os_timer_def_##name
 /// Create and Initialize a timer.
 /// \param[in]     timer_def     timer object referenced with \ref osTimer.
 /// \param[in]     type          osTimerOnce for one-shot or osTimerPeriodic for periodic behavior.
 /// \param[in]     argument      argument to the timer call back function.
 /// \return timer ID for reference by other functions or NULL in case of error.
 osTimerId osTimerCreate (const osTimerDef_t *timer_def, os_timer_type type, void *argument);
 /// Start or restart a timer.
 /// \param[in]     timer_id      timer ID obtained by \ref osTimerCreate.
 /// \param[in]     millisec      \ref CMSIS_RTOS_TimeOutValue "time delay" value of the timer.
 /// \return status code that indicates the execution status of the function.
 #if (osCMSIS < 0x20000U)
 osStatus osTimerStart (osTimerId timer_id, uint32_t millisec);
 #endif
 /// Stop a timer.
 /// \param[in]     timer_id      timer ID obtained by \ref osTimerCreate.
 /// \return status code that indicates the execution status of the function.
 #if (osCMSIS < 0x20000U)
 osStatus osTimerStop (osTimerId timer_id);
 #endif
 /// Delete a timer.
 /// \param[in]     timer_id      timer ID obtained by \ref osTimerCreate.
 /// \return status code that indicates the execution status of the function.
 #if (osCMSIS < 0x20000U)
 osStatus osTimerDelete (osTimerId timer_id);
 #endif
 //  ==== Mutex Management Functions ====
 /// Define a Mutex.
 /// \param         name          name of the mutex object.
 /// \note CAN BE CHANGED: The parameter to \b osMutexDef shall be consistent but the
 ///       macro body is implementation specific in every CMSIS-RTOS.
 #if defined (osObjectsExternal)  // object is external
 #define osMutexDef(name) \
 extern const osMutexDef_t os_mutex_def_##name
 #else                            // define the object
 #if (osCMSIS < 0x20000U)
 #define osMutexDef(name) \
 const osMutexDef_t os_mutex_def_##name = { 0 }
 #else
 #define osMutexDef(name) \
 const osMutexDef_t os_mutex_def_##name = \
 { NULL, osMutexRecursive | osMutexPrioInherit | osMutexRobust, NULL, 0U }
 #endif
 #endif
 /// Access a Mutex definition.
 /// \param         name          name of the mutex object.
 /// \note CAN BE CHANGED: The parameter to \b osMutex shall be consistent but the
 ///       macro body is implementation specific in every CMSIS-RTOS.
 #define osMutex(name) \
 &os_mutex_def_##name
 /// Create and Initialize a Mutex object.
 /// \param[in]     mutex_def     mutex definition referenced with \ref osMutex.
 /// \return mutex ID for reference by other functions or NULL in case of error.
 osMutexId osMutexCreate (const osMutexDef_t *mutex_def);
 /// Wait until a Mutex becomes available.
 /// \param[in]     mutex_id      mutex ID obtained by \ref osMutexCreate.
 /// \param[in]     millisec      \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out.
 /// \return status code that indicates the execution status of the function.
 #if (osCMSIS < 0x20000U)
 osStatus osMutexWait (osMutexId mutex_id, uint32_t millisec);
 #else
 #define  osMutexWait osMutexAcquire
 #endif
 /// Release a Mutex that was obtained by \ref osMutexWait.
 /// \param[in]     mutex_id      mutex ID obtained by \ref osMutexCreate.
 /// \return status code that indicates the execution status of the function.
 #if (osCMSIS < 0x20000U)
 osStatus osMutexRelease (osMutexId mutex_id);
 #endif
 /// Delete a Mutex object.
 /// \param[in]     mutex_id      mutex ID obtained by \ref osMutexCreate.
 /// \return status code that indicates the execution status of the function.
 #if (osCMSIS < 0x20000U)
 osStatus osMutexDelete (osMutexId mutex_id);
 #endif
 //  ==== Semaphore Management Functions ====
 #if (defined (osFeature_Semaphore) && (osFeature_Semaphore != 0U))  // Semaphore available
 /// Define a Semaphore object.
 /// \param         name          name of the semaphore object.
 /// \note CAN BE CHANGED: The parameter to \b osSemaphoreDef shall be consistent but the
 ///       macro body is implementation specific in every CMSIS-RTOS.
 #if defined (osObjectsExternal)  // object is external
 #define osSemaphoreDef(name) \
 extern const osSemaphoreDef_t os_semaphore_def_##name
 #else                            // define the object
 #if (osCMSIS < 0x20000U)
 #define osSemaphoreDef(name) \
 const osSemaphoreDef_t os_semaphore_def_##name = { 0 }
 #else
 #define osSemaphoreDef(name) \
 const osSemaphoreDef_t os_semaphore_def_##name = \
 { NULL, 0U, NULL, 0U }
 #endif
 #endif
 /// Access a Semaphore definition.
 /// \param         name          name of the semaphore object.
 /// \note CAN BE CHANGED: The parameter to \b osSemaphore shall be consistent but the
 ///       macro body is implementation specific in every CMSIS-RTOS.
 #define osSemaphore(name) \
 &os_semaphore_def_##name
 /// Create and Initialize a Semaphore object.
 /// \param[in]     semaphore_def semaphore definition referenced with \ref osSemaphore.
 /// \param[in]     count         maximum and initial number of available tokens.
 /// \return semaphore ID for reference by other functions or NULL in case of error.
 osSemaphoreId osSemaphoreCreate (const osSemaphoreDef_t *semaphore_def, int32_t count);
 /// Wait until a Semaphore token becomes available.
 /// \param[in]     semaphore_id  semaphore object referenced with \ref osSemaphoreCreate.
 /// \param[in]     millisec      \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out.
 /// \return number of available tokens, or -1 in case of incorrect parameters.
 int32_t osSemaphoreWait (osSemaphoreId semaphore_id, uint32_t millisec);
 /// Release a Semaphore token.
 /// \param[in]     semaphore_id  semaphore object referenced with \ref osSemaphoreCreate.
 /// \return status code that indicates the execution status of the function.
 #if (osCMSIS < 0x20000U)
 osStatus osSemaphoreRelease (osSemaphoreId semaphore_id);
 #endif
 /// Delete a Semaphore object.
 /// \param[in]     semaphore_id  semaphore object referenced with \ref osSemaphoreCreate.
 /// \return status code that indicates the execution status of the function.
 #if (osCMSIS < 0x20000U)
 osStatus osSemaphoreDelete (osSemaphoreId semaphore_id);
 #endif
 #endif  // Semaphore available
 //  ==== Memory Pool Management Functions ====
 #if (defined(osFeature_Pool) && (osFeature_Pool != 0))  // Memory Pool available
 /// \brief Define a Memory Pool.
 /// \param         name          name of the memory pool.
 /// \param         no            maximum number of blocks (objects) in the memory pool.
 /// \param         type          data type of a single block (object).
 /// \note CAN BE CHANGED: The parameter to \b osPoolDef shall be consistent but the
 ///       macro body is implementation specific in every CMSIS-RTOS.
 #if defined (osObjectsExternal)  // object is external
 #define osPoolDef(name, no, type) \
 extern const osPoolDef_t os_pool_def_##name
 #else                            // define the object
 #if (osCMSIS < 0x20000U)
 #define osPoolDef(name, no, type) \
 const osPoolDef_t os_pool_def_##name = \
 { (no), sizeof(type), NULL }
 #else
 #define osPoolDef(name, no, type) \
 const osPoolDef_t os_pool_def_##name = \
 { (no), sizeof(type), { NULL, 0U, NULL, 0U, NULL, 0U } }
 #endif
 #endif
 /// \brief Access a Memory Pool definition.
 /// \param         name          name of the memory pool
 /// \note CAN BE CHANGED: The parameter to \b osPool shall be consistent but the
 ///       macro body is implementation specific in every CMSIS-RTOS.
 #define osPool(name) \
 &os_pool_def_##name
 /// Create and Initialize a Memory Pool object.
 /// \param[in]     pool_def      memory pool definition referenced with \ref osPool.
 /// \return memory pool ID for reference by other functions or NULL in case of error.
 osPoolId osPoolCreate (const osPoolDef_t *pool_def);
 /// Allocate a memory block from a Memory Pool.
 /// \param[in]     pool_id       memory pool ID obtain referenced with \ref osPoolCreate.
 /// \return address of the allocated memory block or NULL in case of no memory available.
 void *osPoolAlloc (osPoolId pool_id);
 /// Allocate a memory block from a Memory Pool and set memory block to zero.
 /// \param[in]     pool_id       memory pool ID obtain referenced with \ref osPoolCreate.
 /// \return address of the allocated memory block or NULL in case of no memory available.
 void *osPoolCAlloc (osPoolId pool_id);
 /// Return an allocated memory block back to a Memory Pool.
 /// \param[in]     pool_id       memory pool ID obtain referenced with \ref osPoolCreate.
 /// \param[in]     block         address of the allocated memory block to be returned to the memory pool.
 /// \return status code that indicates the execution status of the function.
 osStatus osPoolFree (osPoolId pool_id, void *block);
 #endif  // Memory Pool available
 //  ==== Message Queue Management Functions ====
 #if (defined(osFeature_MessageQ) && (osFeature_MessageQ != 0))  // Message Queue available
 /// \brief Create a Message Queue Definition.
 /// \param         name          name of the queue.
 /// \param         queue_sz      maximum number of messages in the queue.
 /// \param         type          data type of a single message element (for debugger).
 /// \note CAN BE CHANGED: The parameter to \b osMessageQDef shall be consistent but the
 ///       macro body is implementation specific in every CMSIS-RTOS.
 #if defined (osObjectsExternal)  // object is external
 #define osMessageQDef(name, queue_sz, type) \
 extern const osMessageQDef_t os_messageQ_def_##name
 #else                            // define the object
 #if (osCMSIS < 0x20000U)
 #define osMessageQDef(name, queue_sz, type) \
 const osMessageQDef_t os_messageQ_def_##name = \
 { (queue_sz), NULL }
 #else
 #define osMessageQDef(name, queue_sz, type) \
 const osMessageQDef_t os_messageQ_def_##name = \
 { (queue_sz), { NULL, 0U, NULL, 0U, NULL, 0U } }
 #endif
 #endif
 /// \brief Access a Message Queue Definition.
 /// \param         name          name of the queue
 /// \note CAN BE CHANGED: The parameter to \b osMessageQ shall be consistent but the
 ///       macro body is implementation specific in every CMSIS-RTOS.
 #define osMessageQ(name) \
 &os_messageQ_def_##name
 /// Create and Initialize a Message Queue object.
 /// \param[in]     queue_def     message queue definition referenced with \ref osMessageQ.
 /// \param[in]     thread_id     thread ID (obtained by \ref osThreadCreate or \ref osThreadGetId) or NULL.
 /// \return message queue ID for reference by other functions or NULL in case of error.
 osMessageQId osMessageCreate (const osMessageQDef_t *queue_def, osThreadId thread_id);
 /// Put a Message to a Queue.
 /// \param[in]     queue_id      message queue ID obtained with \ref osMessageCreate.
 /// \param[in]     info          message information.
 /// \param[in]     millisec      \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out.
 /// \return status code that indicates the execution status of the function.
 osStatus osMessagePut (osMessageQId queue_id, uint32_t info, uint32_t millisec);
 /// Get a Message from a Queue or timeout if Queue is empty.
 /// \param[in]     queue_id      message queue ID obtained with \ref osMessageCreate.
 /// \param[in]     millisec      \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out.
 /// \return event information that includes status code.
 osEvent osMessageGet (osMessageQId queue_id, uint32_t millisec);
 #endif  // Message Queue available
 //  ==== Mail Queue Management Functions ====
 #if (defined(osFeature_MailQ) && (osFeature_MailQ != 0))  // Mail Queue available
 /// \brief Create a Mail Queue Definition.
 /// \param         name          name of the queue.
 /// \param         queue_sz      maximum number of mails in the queue.
 /// \param         type          data type of a single mail element.
 /// \note CAN BE CHANGED: The parameter to \b osMailQDef shall be consistent but the
 ///       macro body is implementation specific in every CMSIS-RTOS.
 #if defined (osObjectsExternal)  // object is external
 #define osMailQDef(name, queue_sz, type) \
 extern const osMailQDef_t os_mailQ_def_##name
 #else                            // define the object
 #if (osCMSIS < 0x20000U)
 #define osMailQDef(name, queue_sz, type) \
 const osMailQDef_t os_mailQ_def_##name = \
 { (queue_sz), sizeof(type), NULL }
 #else
 #define osMailQDef(name, queue_sz, type) \
 static void *os_mail_p_##name[2]; \
 const osMailQDef_t os_mailQ_def_##name = \
 { (queue_sz), sizeof(type), (&os_mail_p_##name), \
  { NULL, 0U, NULL, 0U, NULL, 0U }, \
  { NULL, 0U, NULL, 0U, NULL, 0U } }
 #endif
 #endif
 /// \brief Access a Mail Queue Definition.
 /// \param         name          name of the queue
 /// \note CAN BE CHANGED: The parameter to \b osMailQ shall be consistent but the
 ///       macro body is implementation specific in every CMSIS-RTOS.
 #define osMailQ(name) \
 &os_mailQ_def_##name
 /// Create and Initialize a Mail Queue object.
 /// \param[in]     queue_def     mail queue definition referenced with \ref osMailQ.
 /// \param[in]     thread_id     thread ID (obtained by \ref osThreadCreate or \ref osThreadGetId) or NULL.
 /// \return mail queue ID for reference by other functions or NULL in case of error.
 osMailQId osMailCreate (const osMailQDef_t *queue_def, osThreadId thread_id);
 /// Allocate a memory block for mail from a mail memory pool.
 /// \param[in]     queue_id      mail queue ID obtained with \ref osMailCreate.
 /// \param[in]     millisec      \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out
 /// \return pointer to memory block that can be filled with mail or NULL in case of error.
 void *osMailAlloc (osMailQId queue_id, uint32_t millisec);
 /// Allocate a memory block for mail from a mail memory pool and set memory block to zero.
 /// \param[in]     queue_id      mail queue ID obtained with \ref osMailCreate.
 /// \param[in]     millisec      \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out
 /// \return pointer to memory block that can be filled with mail or NULL in case of error.
 void *osMailCAlloc (osMailQId queue_id, uint32_t millisec);
 /// Put a Mail into a Queue.
 /// \param[in]     queue_id      mail queue ID obtained with \ref osMailCreate.
 /// \param[in]     mail          pointer to memory with mail to put into a queue.
 /// \return status code that indicates the execution status of the function.
 osStatus osMailPut (osMailQId queue_id, const void *mail);
 /// Get a Mail from a Queue or timeout if Queue is empty.
 /// \param[in]     queue_id      mail queue ID obtained with \ref osMailCreate.
 /// \param[in]     millisec      \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out.
 /// \return event information that includes status code.
 osEvent osMailGet (osMailQId queue_id, uint32_t millisec);
 /// Free a memory block by returning it to a mail memory pool.
 /// \param[in]     queue_id      mail queue ID obtained with \ref osMailCreate.
 /// \param[in]     mail          pointer to memory block that was obtained with \ref osMailGet.
 /// \return status code that indicates the execution status of the function.
 osStatus osMailFree (osMailQId queue_id, void *mail);
 #endif  // Mail Queue available
 #ifdef  __cplusplus
 }
 #endif
 #endif  // CMSIS_OS_H_
--- a/firmware/stm32/smart_dormitory/Drivers/CMSIS/RTOS2/Template/cmsis_os1.c
+++ b/firmware/stm32/smart_dormitory/Drivers/CMSIS/RTOS2/Template/cmsis_os1.c
@@ -1,361 +0,0 @@
 /*
 * Copyright (c) 2013-2017 ARM Limited. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * ----------------------------------------------------------------------
 *
 * $Date:        10. January 2017
 * $Revision:    V1.2
 *
 * Project:      CMSIS-RTOS API V1
 * Title:        cmsis_os_v1.c V1 module file
 *---------------------------------------------------------------------------*/
 #include <string.h>
 #include "cmsis_os.h"
 #if (osCMSIS >= 0x20000U)
 // Thread
 osThreadId osThreadCreate (const osThreadDef_t *thread_def, void *argument) {
  if (thread_def == NULL) {
    return (osThreadId)NULL;
  }
  return osThreadNew((osThreadFunc_t)thread_def->pthread, argument, &thread_def->attr);
 }
 // Signals
 #define SignalMask ((1U<<osFeature_Signals)-1U)
 int32_t osSignalSet (osThreadId thread_id, int32_t signals) {
  uint32_t flags;
  flags = osThreadFlagsSet(thread_id, (uint32_t)signals);
  if ((flags & 0x80000000U) != 0U) {
    return ((int32_t)0x80000000U);
  }
  return ((int32_t)(flags & ~((uint32_t)signals)));
 }
 int32_t osSignalClear (osThreadId thread_id, int32_t signals) {
  uint32_t flags;
  if (thread_id != osThreadGetId()) {
    return ((int32_t)0x80000000U);
  }
  flags = osThreadFlagsClear((uint32_t)signals);
  if ((flags & 0x80000000U) != 0U) {
    return ((int32_t)0x80000000U);
  }
  return ((int32_t)flags);
 }
 osEvent osSignalWait (int32_t signals, uint32_t millisec) {
  osEvent  event;
  uint32_t flags;
  if (signals != 0) {
    flags = osThreadFlagsWait((uint32_t)signals, osFlagsWaitAll, millisec);
  } else {
    flags = osThreadFlagsWait(SignalMask,        osFlagsWaitAny, millisec);
  }
  if ((flags > 0U) && (flags < 0x80000000U)) {
    event.status = osEventSignal;
    event.value.signals = (int32_t)flags;
  } else {
    switch ((int32_t)flags) {
      case osErrorResource:
        event.status = osOK;
        break;
      case osErrorTimeout:
        event.status = osEventTimeout;
        break;
      case osErrorParameter:
        event.status = osErrorValue;
        break;
      default:
        event.status = (osStatus)flags;
        break;
    }
  }
  return event;
 }
 // Timer
 osTimerId osTimerCreate (const osTimerDef_t *timer_def, os_timer_type type, void *argument) {
  if (timer_def == NULL) {
    return (osTimerId)NULL;
  }
  return osTimerNew((osTimerFunc_t)timer_def->ptimer, type, argument, &timer_def->attr);
 }
 // Mutex
 osMutexId osMutexCreate (const osMutexDef_t *mutex_def) {
  if (mutex_def == NULL) {
    return (osMutexId)NULL;
  }
  return osMutexNew(mutex_def);
 }
 // Semaphore
 #if (defined (osFeature_Semaphore) && (osFeature_Semaphore != 0U))
 osSemaphoreId osSemaphoreCreate (const osSemaphoreDef_t *semaphore_def, int32_t count) {
  if (semaphore_def == NULL) {
    return (osSemaphoreId)NULL;
  }
  return osSemaphoreNew((uint32_t)count, (uint32_t)count, semaphore_def);
 }
 int32_t osSemaphoreWait (osSemaphoreId semaphore_id, uint32_t millisec) {
  osStatus_t status;
  uint32_t   count;
  status = osSemaphoreAcquire(semaphore_id, millisec);
  switch (status) {
    case osOK:
      count = osSemaphoreGetCount(semaphore_id);
      return ((int32_t)count + 1);
    case osErrorResource:
    case osErrorTimeout:
      return 0;
    default:
      break;
  }
  return -1;
 }
 #endif  // Semaphore
 // Memory Pool
 #if (defined(osFeature_Pool) && (osFeature_Pool != 0))
 osPoolId osPoolCreate (const osPoolDef_t *pool_def) {
  if (pool_def == NULL) {
    return (osPoolId)NULL;
  }
  return ((osPoolId)(osMemoryPoolNew(pool_def->pool_sz, pool_def->item_sz, &pool_def->attr)));
 }
 void *osPoolAlloc (osPoolId pool_id) {
  return osMemoryPoolAlloc((osMemoryPoolId_t)pool_id, 0U);
 }
 void *osPoolCAlloc (osPoolId pool_id) {
  void    *block;
  uint32_t block_size;
  block_size = osMemoryPoolGetBlockSize((osMemoryPoolId_t)pool_id);
  if (block_size == 0U) {
    return NULL;
  }
  block = osMemoryPoolAlloc((osMemoryPoolId_t)pool_id, 0U);
  if (block != NULL) {
    memset(block, 0, block_size);
  }
  return block;
 }
 osStatus osPoolFree (osPoolId pool_id, void *block) {
  return osMemoryPoolFree((osMemoryPoolId_t)pool_id, block);
 }
 #endif  // Memory Pool
 // Message Queue
 #if (defined(osFeature_MessageQ) && (osFeature_MessageQ != 0))
 osMessageQId osMessageCreate (const osMessageQDef_t *queue_def, osThreadId thread_id) {
  (void)thread_id;
  if (queue_def == NULL) {
    return (osMessageQId)NULL;
  }
  return ((osMessageQId)(osMessageQueueNew(queue_def->queue_sz, sizeof(uint32_t), &queue_def->attr)));
 }
 osStatus osMessagePut (osMessageQId queue_id, uint32_t info, uint32_t millisec) {
  return osMessageQueuePut((osMessageQueueId_t)queue_id, &info, 0U, millisec);
 }
 osEvent osMessageGet (osMessageQId queue_id, uint32_t millisec) {
  osStatus_t status;
  osEvent    event;
  uint32_t   message;
  status = osMessageQueueGet((osMessageQueueId_t)queue_id, &message, NULL, millisec);
  switch (status) {
    case osOK:
      event.status = osEventMessage;
      event.value.v = message;
      break;
    case osErrorResource:
      event.status = osOK;
      break;
    case osErrorTimeout:
      event.status = osEventTimeout;
      break;
    default:
      event.status = status;
      break;
  }
  return event;
 }
 #endif  // Message Queue
 // Mail Queue
 #if (defined(osFeature_MailQ) && (osFeature_MailQ != 0))
 typedef struct os_mail_queue_s {
  osMemoryPoolId_t   mp_id;
  osMessageQueueId_t mq_id;
 } os_mail_queue_t;
 osMailQId osMailCreate (const osMailQDef_t *queue_def, osThreadId thread_id) {
  os_mail_queue_t *ptr;
  (void)thread_id;
  if (queue_def == NULL) {
    return (osMailQId)NULL;
  }
  ptr = queue_def->mail;
  if (ptr == NULL) {
    return (osMailQId)NULL;
  }
  ptr->mp_id = osMemoryPoolNew  (queue_def->queue_sz, queue_def->item_sz, &queue_def->mp_attr);
  ptr->mq_id = osMessageQueueNew(queue_def->queue_sz, sizeof(void *), &queue_def->mq_attr);
  if ((ptr->mp_id == (osMemoryPoolId_t)NULL) || (ptr->mq_id == (osMessageQueueId_t)NULL)) {
    if (ptr->mp_id != (osMemoryPoolId_t)NULL) {
      osMemoryPoolDelete(ptr->mp_id);
    }
    if (ptr->mq_id != (osMessageQueueId_t)NULL) {
      osMessageQueueDelete(ptr->mq_id);
    }
    return (osMailQId)NULL;
  }
  return (osMailQId)ptr;
 }
 void *osMailAlloc (osMailQId queue_id, uint32_t millisec) {
  os_mail_queue_t *ptr = (os_mail_queue_t *)queue_id;
  if (ptr == NULL) {
    return NULL;
  }
  return osMemoryPoolAlloc(ptr->mp_id, millisec);
 }
 void *osMailCAlloc (osMailQId queue_id, uint32_t millisec) {
  os_mail_queue_t *ptr = (os_mail_queue_t *)queue_id;
  void            *block;
  uint32_t         block_size;
  if (ptr == NULL) {
    return NULL;
  }
  block_size = osMemoryPoolGetBlockSize(ptr->mp_id);
  if (block_size == 0U) {
    return NULL;
  }
  block = osMemoryPoolAlloc(ptr->mp_id, millisec);
  if (block != NULL) {
    memset(block, 0, block_size);
  }
  return block;
 }
 osStatus osMailPut (osMailQId queue_id, const void *mail) {
  os_mail_queue_t *ptr = (os_mail_queue_t *)queue_id;
  if (ptr == NULL) {
    return osErrorParameter;
  }
  if (mail == NULL) {
    return osErrorValue;
  }
  return osMessageQueuePut(ptr->mq_id, &mail, 0U, 0U);
 }
 osEvent osMailGet (osMailQId queue_id, uint32_t millisec) {
  os_mail_queue_t *ptr = (os_mail_queue_t *)queue_id;
  osStatus_t       status;
  osEvent          event;
  void            *mail;
  if (ptr == NULL) {
    event.status = osErrorParameter;
    return event;
  }
  status = osMessageQueueGet(ptr->mq_id, &mail, NULL, millisec);
  switch (status) {
    case osOK:
      event.status = osEventMail;
      event.value.p = mail;
      break;
    case osErrorResource:
      event.status = osOK;
      break;
    case osErrorTimeout:
      event.status = osEventTimeout;
      break;
    default:
      event.status = status;
      break;
  }
  return event;
 }
 osStatus osMailFree (osMailQId queue_id, void *mail) {
  os_mail_queue_t *ptr = (os_mail_queue_t *)queue_id;
  if (ptr == NULL) {
    return osErrorParameter;
  }
  if (mail == NULL) {
    return osErrorValue;
  }
  return osMemoryPoolFree(ptr->mp_id, mail);
 }
 #endif  // Mail Queue
 #endif  // osCMSIS