Week16. Wildcard week¶
## Design and produce something with a digital process (incorporating computer-aided design and manufacturing) not covered in another assignment, documenting the requirements that your assignment meets, and including everything necessary to reproduce it.
I decided to “Machine Learning on Embedded System” as my assignment for this week
This is a case from seeedwiki. I followed this case and re-operated it. To facilitate my learning of Tiny ML, I will use some images and diagrams from the tutorial in this document, and I will make labels, details can be found here
Machine Learning on Embedded System¶
What I ultimately want to achieve is:¶
I say “yes” to the microphone and the lights turn on.
what we need to understand before start.¶
Introduction of KWS¶
Keyword Spotting (KWS) is critical to many voice assistants, enabling devices to respond to specific words or phrases.
What is Edge Impulse?¶
Edge Impulse is a platform for developing machine learning models specifically designed for edge devices and embedded systems. It provides a comprehensive set of tools and services that enable developers to quickly create, train, and deploy machine learning models without requiring deep expertise in machine learning. —Content above come from GPT4
What is XIAO ESP32S3?¶
The Seeed Studio XIAO ESP32S3 is a powerful and versatile development board that features a variety of peripheral interfaces and GPIO pins. These pins can be used for various purposes, such as communicating with other devices, reading analog sensors, controlling LEDs, and more.
KWS Real Usage - reference by < Machine Learning Systems >
—Image above come from seeed wiki
Stage 1: A smaller microprocessor inside the Echo Dot or Google Home continuously listens to the sound, waiting for the keyword to be spotted. For such detection, a TinyML model at the edge is used (KWS application).
Stage 2: Only when triggered by the KWS application on Stage 1 is the data sent to the cloud and processed on a larger model.
Without KWS, these steps will waste a lot of energy
KWS Introduction - reference by < Machine Learning Systems >
—Image above come from seeed wiki
The Machine Learning workflow
Recording Audio with XIAO ESP32S3 Sense¶
Open Arduino IDE
The kit overview
step/1¶
Insert the memory card into XIAO
step/2¶
Install Microphone board
Need to pay attention here: The direction of insertion, the side with the gold finger should face inward.
Training Audio Data Recording¶
The pre-flashed code:
/*
* WAV Recorder for Seeed XIAO ESP32S3 Sense
*
* NOTE: To execute this code, we will need to use the PSRAM
* function of the ESP-32 chip, so please turn it on before uploading.
* Tools>PSRAM: "OPI PSRAM"
*
* Adapted by M.Rovai @May23 from original Seeed code
*/
#include <I2S.h>
#include "FS.h"
#include "SD.h"
#include "SPI.h"
// make changes as needed
#define RECORD_TIME 10 // seconds, The maximum value is 240
#define WAV_FILE_NAME "data"
// do not change for best
#define SAMPLE_RATE 16000U
#define SAMPLE_BITS 16
#define WAV_HEADER_SIZE 44
#define VOLUME_GAIN 2
int fileNumber = 1;
String baseFileName;
bool isRecording = false;
void setup() {
Serial.begin(115200);
while (!Serial) ;
I2S.setAllPins(-1, 42, 41, -1, -1);
if (!I2S.begin(PDM_MONO_MODE, SAMPLE_RATE, SAMPLE_BITS)) {
Serial.println("Failed to initialize I2S!");
while (1) ;
}
if(!SD.begin(21)){
Serial.println("Failed to mount SD Card!");
while (1) ;
}
Serial.printf("Enter with the label name\n");
//record_wav();
}
void loop() {
if (Serial.available() > 0) {
String command = Serial.readStringUntil('\n');
command.trim();
if (command == "rec") {
isRecording = true;
} else {
baseFileName = command;
fileNumber = 1; // reset file number each time a new base file name is set
Serial.printf("Send rec for starting recording label \n");
}
}
if (isRecording && baseFileName != "") {
String fileName = "/" + baseFileName + "." + String(fileNumber) + ".wav";
fileNumber++;
record_wav(fileName);
delay(1000); // delay to avoid recording multiple files at once
isRecording = false;
}
}
void record_wav(String fileName)
{
uint32_t sample_size = 0;
uint32_t record_size = (SAMPLE_RATE * SAMPLE_BITS / 8) * RECORD_TIME;
uint8_t *rec_buffer = NULL;
Serial.printf("Start recording ...\n");
File file = SD.open(fileName.c_str(), FILE_WRITE);
// Write the header to the WAV file
uint8_t wav_header[WAV_HEADER_SIZE];
generate_wav_header(wav_header, record_size, SAMPLE_RATE);
file.write(wav_header, WAV_HEADER_SIZE);
// PSRAM malloc for recording
rec_buffer = (uint8_t *)ps_malloc(record_size);
if (rec_buffer == NULL) {
Serial.printf("malloc failed!\n");
while(1) ;
}
Serial.printf("Buffer: %d bytes\n", ESP.getPsramSize() - ESP.getFreePsram());
// Start recording
esp_i2s::i2s_read(esp_i2s::I2S_NUM_0, rec_buffer, record_size, &sample_size, portMAX_DELAY);
if (sample_size == 0) {
Serial.printf("Record Failed!\n");
} else {
Serial.printf("Record %d bytes\n", sample_size);
}
// Increase volume
for (uint32_t i = 0; i < sample_size; i += SAMPLE_BITS/8) {
(*(uint16_t *)(rec_buffer+i)) <<= VOLUME_GAIN;
}
// Write data to the WAV file
Serial.printf("Writing to the file ...\n");
if (file.write(rec_buffer, record_size) != record_size)
Serial.printf("Write file Failed!\n");
free(rec_buffer);
file.close();
Serial.printf("Recording complete: \n");
Serial.printf("Send rec for a new sample or enter a new label\n\n");
}
void generate_wav_header(uint8_t *wav_header, uint32_t wav_size, uint32_t sample_rate)
{
// See this for reference: http://soundfile.sapp.org/doc/WaveFormat/
uint32_t file_size = wav_size + WAV_HEADER_SIZE - 8;
uint32_t byte_rate = SAMPLE_RATE * SAMPLE_BITS / 8;
const uint8_t set_wav_header[] = {
'R', 'I', 'F', 'F', // ChunkID
file_size, file_size >> 8, file_size >> 16, file_size >> 24, // ChunkSize
'W', 'A', 'V', 'E', // Format
'f', 'm', 't', ' ', // Subchunk1ID
0x10, 0x00, 0x00, 0x00, // Subchunk1Size (16 for PCM)
0x01, 0x00, // AudioFormat (1 for PCM)
0x01, 0x00, // NumChannels (1 channel)
sample_rate, sample_rate >> 8, sample_rate >> 16, sample_rate >> 24, // SampleRate
byte_rate, byte_rate >> 8, byte_rate >> 16, byte_rate >> 24, // ByteRate
0x02, 0x00, // BlockAlign
0x10, 0x00, // BitsPerSample (16 bits)
'd', 'a', 't', 'a', // Subchunk2ID
wav_size, wav_size >> 8, wav_size >> 16, wav_size >> 24, // Subchunk2Size
};
memcpy(wav_header, set_wav_header, sizeof(set_wav_header));
}
—code above come from seeed wiki
Click Serial Monitor and run the recording code
1. Type the “yes” as your recording file¶
Type “rec” for starting the record
Close XIAO to my mouth and Saying “yes”, one.
Recording…. Done!!!
2. Type the “no” as your recording file¶
Type “rec” for starting the record
Close XIAO to my mouth and Saying “no”, one. Recording…. Done!!!
3. Type the “test_yes_no” as your recording file¶
redo the same step as above.Done
Remove SD card from XIAO and insert to USB driver Then we got these 3 audio file
repeat above step, we can get more audio file in 3 type.
TinyML QuickStart on Edge Impulse¶
register an account select the Upload Data tool in the Data Acquisition section. Choose the files to be uploaded.
Find exactly every time you say “yes” or “no”, and then split it
then click “creat impulse”
Processing block:
Option 1: MFCC¶
Option 2: MFE¶
Mel-frequency Cepstral Coefficients (MFCCs) —— Best for anything related to human speech¶
Speech recognition: Understanding spoken words. Speaker identification: Recognizing who is talking. Emotion recognition: Detecting feelings from speech. Keyword spotting: Finding specific words in speech.
Mel-Frequency Energies (MFEs) —— : More focus on the intensity of sounds across frequencies.¶
Audio Quality Assurance Environmental Sound Classification Music Analysis Another one called: Spectrograms
and I chose MFE way to do it
then click the MFE
then I can start training
before training
after trainning
I have my own Audio ML model Now!
Deploying models to XIAO ESP32S3 Sense¶
1. Training Exported Models with Edge Impulse¶
Navigate to File > Preferences,
and fill “Additional Boards Manager URLs” with the url below:
*https://raw.githubusercontent.com/espressif/arduino-esp32/gh-pages/package_esp32_index.json *
Install ESP32 before we can use this board
Be sure to select “2.0.16 version” of the installation package.
Because only this version is compatible with XIAO ESP32S Sense
The board will take some time to download.
We can first go back to the page to download our model, the Arduino version
Edge Impulse will package all the needed libraries, preprocessing functions, and trained models, downloading them to my computer. I will select the option Arduino Library and at the bottom, select Quantized (Int8) and press the button Build. When the Build button is selected, a Zip file will be created and downloaded to my computer.
Upload the zip file to my Arduino IDE
I have used two systems, which can be set according to the following paths:
Windows: Documentation/Arduino/libraries Below
Mac: Documentation/Arduino/libraries Below
Unzip it and you will see:
We need to add an AI acceleration algorithm
my ML model file -> src -> edge-impulse-sdk -> porting -> espressif -> ESP-NN. Find the ESP-NN file, delete the original and replace the new one
I find the complete code on the project’s GitHub. Upload the sketch to my board and test some real inferences.
https://github.com/Mjrovai/XIAO-ESP32S3-Sense/tree/main/xiao_esp32s3_microphone_led
and here is the code
/* Edge Impulse Arduino examples
* Copyright (c) 2022 EdgeImpulse Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
// If your target is limited in memory remove this macro to save 10K RAM
#define EIDSP_QUANTIZE_FILTERBANK 0
/*
** NOTE: If you run into TFLite arena allocation issue.
**
** This may be due to may dynamic memory fragmentation.
** Try defining "-DEI_CLASSIFIER_ALLOCATION_STATIC" in boards.local.txt (create
** if it doesn't exist) and copy this file to
** `<ARDUINO_CORE_INSTALL_PATH>/arduino/hardware/<mbed_core>/<core_version>/`.
**
** See
** (https://support.arduino.cc/hc/en-us/articles/360012076960-Where-are-the-installed-cores-located-)
** to find where Arduino installs cores on your machine.
**
** If the problem persists then there's not enough memory for this model and application.
*/
/* Includes ---------------------------------------------------------------- */
//#include <XIAO-ESP32S3-KWS_inferencing_inferencing.h>
#include <Marco-KWS-KIC_inferencing.h>
#include <I2S.h>
#define SAMPLE_RATE 16000U
#define SAMPLE_BITS 16
#define LED_BUILT_IN 21
/** Audio buffers, pointers and selectors */
typedef struct {
int16_t *buffer;
uint8_t buf_ready;
uint32_t buf_count;
uint32_t n_samples;
} inference_t;
static inference_t inference;
static const uint32_t sample_buffer_size = 2048;
static signed short sampleBuffer[sample_buffer_size];
static bool debug_nn = false; // Set this to true to see e.g. features generated from the raw signal
static bool record_status = true;
/**
* @brief Arduino setup function
*/
void setup()
{
// put your setup code here, to run once:
Serial.begin(115200);
// comment out the below line to cancel the wait for USB connection (needed for native USB)
while (!Serial);
Serial.println("Edge Impulse Inferencing Demo");
pinMode(LED_BUILT_IN, OUTPUT); // Set the pin as output
digitalWrite(LED_BUILT_IN, HIGH); //Turn off
I2S.setAllPins(-1, 42, 41, -1, -1);
if (!I2S.begin(PDM_MONO_MODE, SAMPLE_RATE, SAMPLE_BITS)) {
Serial.println("Failed to initialize I2S!");
while (1) ;
}
// summary of inferencing settings (from model_metadata.h)
ei_printf("Inferencing settings:\n");
ei_printf("\tInterval: ");
ei_printf_float((float)EI_CLASSIFIER_INTERVAL_MS);
ei_printf(" ms.\n");
ei_printf("\tFrame size: %d\n", EI_CLASSIFIER_DSP_INPUT_FRAME_SIZE);
ei_printf("\tSample length: %d ms.\n", EI_CLASSIFIER_RAW_SAMPLE_COUNT / 16);
ei_printf("\tNo. of classes: %d\n", sizeof(ei_classifier_inferencing_categories) / sizeof(ei_classifier_inferencing_categories[0]));
ei_printf("\nStarting continious inference in 2 seconds...\n");
ei_sleep(2000);
if (microphone_inference_start(EI_CLASSIFIER_RAW_SAMPLE_COUNT) == false) {
ei_printf("ERR: Could not allocate audio buffer (size %d), this could be due to the window length of your model\r\n", EI_CLASSIFIER_RAW_SAMPLE_COUNT);
return;
}
ei_printf("Recording...\n");
}
/**
* @brief Arduino main function. Runs the inferencing loop.
*/
void loop()
{
bool m = microphone_inference_record();
if (!m) {
ei_printf("ERR: Failed to record audio...\n");
return;
}
signal_t signal;
signal.total_length = EI_CLASSIFIER_RAW_SAMPLE_COUNT;
signal.get_data = µphone_audio_signal_get_data;
ei_impulse_result_t result = { 0 };
EI_IMPULSE_ERROR r = run_classifier(&signal, &result, debug_nn);
if (r != EI_IMPULSE_OK) {
ei_printf("ERR: Failed to run classifier (%d)\n", r);
return;
}
int pred_index = 0; // Initialize pred_index
float pred_value = 0; // Initialize pred_value
// print the predictions
ei_printf("Predictions ");
ei_printf("(DSP: %d ms., Classification: %d ms., Anomaly: %d ms.)",
result.timing.dsp, result.timing.classification, result.timing.anomaly);
ei_printf(": \n");
for (size_t ix = 0; ix < EI_CLASSIFIER_LABEL_COUNT; ix++) {
ei_printf(" %s: ", result.classification[ix].label);
ei_printf_float(result.classification[ix].value);
ei_printf("\n");
if (result.classification[ix].value > pred_value){
pred_index = ix;
pred_value = result.classification[ix].value;
}
}
// Display inference result
if ((pred_index == 1) && (pred_value > 0.8)){
digitalWrite(LED_BUILT_IN, LOW); //Turn on
}
else{
digitalWrite(LED_BUILT_IN, HIGH); //Turn off
}
#if EI_CLASSIFIER_HAS_ANOMALY == 1
ei_printf(" anomaly score: ");
ei_printf_float(result.anomaly);
ei_printf("\n");
#endif
}
static void audio_inference_callback(uint32_t n_bytes)
{
for(int i = 0; i < n_bytes>>1; i++) {
inference.buffer[inference.buf_count++] = sampleBuffer[i];
if(inference.buf_count >= inference.n_samples) {
inference.buf_count = 0;
inference.buf_ready = 1;
}
}
}
static void capture_samples(void* arg) {
const int32_t i2s_bytes_to_read = (uint32_t)arg;
size_t bytes_read = i2s_bytes_to_read;
while (record_status) {
/* read data at once from i2s - Modified for XIAO ESP2S3 Sense and I2S.h library */
// i2s_read((i2s_port_t)1, (void*)sampleBuffer, i2s_bytes_to_read, &bytes_read, 100);
esp_i2s::i2s_read(esp_i2s::I2S_NUM_0, (void*)sampleBuffer, i2s_bytes_to_read, &bytes_read, 100);
if (bytes_read <= 0) {
ei_printf("Error in I2S read : %d", bytes_read);
}
else {
if (bytes_read < i2s_bytes_to_read) {
ei_printf("Partial I2S read");
}
// scale the data (otherwise the sound is too quiet)
for (int x = 0; x < i2s_bytes_to_read/2; x++) {
sampleBuffer[x] = (int16_t)(sampleBuffer[x]) * 8;
}
if (record_status) {
audio_inference_callback(i2s_bytes_to_read);
}
else {
break;
}
}
}
vTaskDelete(NULL);
}
/**
* @brief Init inferencing struct and setup/start PDM
*
* @param[in] n_samples The n samples
*
* @return { description_of_the_return_value }
*/
static bool microphone_inference_start(uint32_t n_samples)
{
inference.buffer = (int16_t *)malloc(n_samples * sizeof(int16_t));
if(inference.buffer == NULL) {
return false;
}
inference.buf_count = 0;
inference.n_samples = n_samples;
inference.buf_ready = 0;
// if (i2s_init(EI_CLASSIFIER_FREQUENCY)) {
// ei_printf("Failed to start I2S!");
// }
ei_sleep(100);
record_status = true;
xTaskCreate(capture_samples, "CaptureSamples", 1024 * 32, (void*)sample_buffer_size, 10, NULL);
return true;
}
/**
* @brief Wait on new data
*
* @return True when finished
*/
static bool microphone_inference_record(void)
{
bool ret = true;
while (inference.buf_ready == 0) {
delay(10);
}
inference.buf_ready = 0;
return ret;
}
/**
* Get raw audio signal data
*/
static int microphone_audio_signal_get_data(size_t offset, size_t length, float *out_ptr)
{
numpy::int16_to_float(&inference.buffer[offset], out_ptr, length);
return 0;
}
/**
* @brief Stop PDM and release buffers
*/
static void microphone_inference_end(void)
{
free(sampleBuffer);
ei_free(inference.buffer);
}
//
//static int i2s_init(uint32_t sampling_rate) {
// // Start listening for audio: MONO @ 8/16KHz
// i2s_config_t i2s_config = {
// .mode = (i2s_mode_t)(I2S_CHANNEL_MONO),
// .sample_rate = sampling_rate,
// .bits_per_sample = (i2s_bits_per_sample_t)16,
// .channel_format = I2S_CHANNEL_FMT_ONLY_RIGHT,
// .communication_format = I2S_COMM_FORMAT_I2S,
// .intr_alloc_flags = 0,
// .dma_buf_count = 8,
// .dma_buf_len = 512,
// .use_apll = false,
// .tx_desc_auto_clear = false,
// .fixed_mclk = -1,
// };
// i2s_pin_config_t pin_config = {
// .bck_io_num = -1, // IIS_SCLK 26
// .ws_io_num = 42, // IIS_LCLK 32
// .data_out_num = -1, // IIS_DSIN -1
// .data_in_num = 41, // IIS_DOUT 33
// };
// esp_err_t ret = 0;
//
// ret = i2s_driver_install((i2s_port_t)1, &i2s_config, 0, NULL);
// if (ret != ESP_OK) {
// ei_printf("Error in i2s_driver_install");
// }
//
// ret = i2s_set_pin((i2s_port_t)1, &pin_config);
// if (ret != ESP_OK) {
// ei_printf("Error in i2s_set_pin");
// }
//
// ret = i2s_zero_dma_buffer((i2s_port_t)1);
// if (ret != ESP_OK) {
// ei_printf("Error in initializing dma buffer with 0");
// }
//
// return int(ret);
//}
//
//static int i2s_deinit(void) {
// i2s_driver_uninstall((i2s_port_t)1); //stop & destroy i2s driver
// return 0;
//}
#if !defined(EI_CLASSIFIER_SENSOR) || EI_CLASSIFIER_SENSOR != EI_CLASSIFIER_SENSOR_MICROPHONE
#error "Invalid model for current sensor."
#endif
Pred_index: Index of identified tags “yes” or “no”.¶
Pred_value: Confidence level.¶
LED_BUILT_IN: Pin numbering of on-board LED¶
Presenting after you clicking “serial port”
Let me actually demonstrate the effect below
What I gained:¶
The study of Tiny ML gave me a preliminary understanding of AI models, which made me think that the role of this field is really very powerful. I can imagine that if this technology can be more popular in the future, our lives will be different. Earth-shaking changes have occurred. But at the same time, I also feel that the threshold in this field is really high, and I hope to have the opportunity to learn more about it in the future.