I have an update on the ‘size’ optimized code. The code was running more than 20 minutes to end up with a segmentation fault. So, the SDK seems to be a bit undercooked at the moment. Perhaps this is just a ‘malloc()’ implementation not being optimized.
As a microcontroller, I think this is not much of an issue. But as a computer (running a desktop, server, etc.) core, this could be a very serious problem. I’ll take an approach with stack memory calculation variant of my code. Here is my code, btw…
/*
* Simple matrix calculation benchmark!
*
* Results from MilkV Duo (CV1800B) Oct 8th 2024
*
* Assignment Avg: 1792.65 [us]
* Destruction Avg: 546.30 [us]
* Multiplication Avg: 51592.88 [us]
* Transpose Avg: 1027.14 [us]
* Inverse Avg: 87142.32 [us]
*
*/
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <stdint.h>
#include <unistd.h>
#include <time.h>
#include <wiringx.h>
#define LED_BUILTIN 25
/* Do not exceed 80x80 with float. It will deplete Pi Pico's memory real quick! */
#define MATRIX_ROWS 80
#define MATRIX_COLS 80
/* Benchmark control */
#define N_ITER 100
static uint16_t iter;
/* Note that integer matrices can't do inverse */
#define MAT_T float
#define MAT_SZ_T uint32_t
/* Serial baud rate */
#define SERIAL_BUAD_RATE 115200
/* Utility stuffs */
//char buffer[256];
/* Storing benchmark times */
uint32_t etMatGen[N_ITER];
uint32_t etMatDestroy[N_ITER];
uint32_t etMatMul[N_ITER];
uint32_t etMatTrans[N_ITER];
uint32_t etMatInv[N_ITER];
/* Codes to work with matrix */
// Matrix struct
typedef struct matrix_t {
MAT_T** numbers;
MAT_SZ_T rows;
MAT_SZ_T cols;
} Matrix;
// Initialization with Random Number
Matrix* InitMatrixRnd(MAT_SZ_T rows, MAT_SZ_T cols) {
MAT_SZ_T i, j;
Matrix* mat = (Matrix*)malloc(sizeof(Matrix));
mat->rows = rows;
mat->cols = cols;
mat->numbers = (MAT_T**)malloc(sizeof(MAT_T*)*rows);
for (i=0; i<rows; i++) {
mat->numbers[i] = (MAT_T*)malloc(sizeof(MAT_T)*cols);
for (j=0; j<cols; j++) {
mat->numbers[i][j] = (MAT_T)rand() / (MAT_T)RAND_MAX;
}
}
return mat;
}
// Initialization with Zeros
Matrix* InitMatrixZero(MAT_SZ_T rows, MAT_SZ_T cols) {
MAT_SZ_T i, j;
Matrix* mat = (Matrix*)malloc(sizeof(Matrix));
mat->rows = rows;
mat->cols = cols;
mat->numbers = (MAT_T**)malloc(sizeof(MAT_T*)*rows);
for (i=0; i<rows; i++) {
mat->numbers[i] = (MAT_T*)malloc(sizeof(MAT_T)*cols);
for (j=0; j<cols; j++) {
mat->numbers[i][j] = (MAT_T)0.0;
}
}
return mat;
}
// Identity Matrix
Matrix* InitMatrixIdentity(MAT_SZ_T rows, MAT_SZ_T cols) {
MAT_SZ_T i, j;
if (rows != cols) { return NULL; }
Matrix* mat = (Matrix*)malloc(sizeof(Matrix));
mat->rows = rows;
mat->cols = cols;
mat->numbers = (MAT_T**)malloc(sizeof(MAT_T*)*rows);
for (i=0; i<rows; i++) {
mat->numbers[i] = (MAT_T*)malloc(sizeof(MAT_T)*cols);
for (j=0; j<cols; j++) {
if (i == j) { mat->numbers[i][j] = (MAT_T)1.0; }
else { mat->numbers[i][j] = (MAT_T)0.0; }
}
}
return mat;
}
// Init from another Matrix
Matrix* InitMatrixOtherMat(Matrix* A) {
if (A->rows == 0 || A->cols == 0) { return NULL; }
MAT_SZ_T i, j;
Matrix* newA = InitMatrixZero(A->rows, A->cols);
for (i=0; i<A->rows; i++) {
for (j=0; j<A->cols; j++) {
newA->numbers[i][j] = A->numbers[i][j];
}
}
return newA;
}
// Destruction
int DestroyMatrix(Matrix* mat) {
MAT_SZ_T i;
if (mat == NULL) { return 0; }
for (i=0; i < mat->rows; i++) {
free(mat->numbers[i]);
}
free(mat->numbers);
free(mat);
return 0;
}
// Multiplication
Matrix* MatMul(Matrix* A, Matrix* B) {
MAT_SZ_T i, j;
MAT_SZ_T k;
if (A->cols != B->rows) { return NULL; }
Matrix* Result = InitMatrixZero(A->rows, B->cols);
for (i=0; i<Result->rows; i++) {
for (j=0; j<Result->cols; j++) {
Result->numbers[i][j] = 0.0;
for (k=0; k<A->cols; k++) {
Result->numbers[i][j] += A->numbers[i][k]*B->numbers[k][j];
}
}
}
return Result;
}
// Transpose
void MatT(Matrix* A) {
MAT_SZ_T i, j;
MAT_T tmp;
MAT_T** new_numbers;
/* Square matrix case */
if (A->rows == A->cols) {
for (i=0; i<A->rows; i++) {
for (j=0; j<A->cols; j++) {
if (A->numbers[i][j] == A->numbers[j][i]) { continue; }
if (i == j) { continue; }
tmp = A->numbers[i][j];
A->numbers[i][j] = A->numbers[j][i];
A->numbers[j][i] = tmp;
}
}
}
/* Not square... */
else {
new_numbers = (MAT_T**)malloc(sizeof(MAT_T*)*A->cols);
for (i=0; i<A->cols; i++) {
new_numbers[i] = (MAT_T*)malloc(sizeof(MAT_T)*A->rows);
}
for (i=0; i<A->rows; i++) {
for (j=0; j<A->cols; j++) {
new_numbers[j][i] = A->numbers[i][j];
}
}
for (i=0; i<A->rows; i++) {
free(A->numbers[i]);
}
free(A->numbers);
A->numbers = new_numbers;
}
}
// Inverse - Gaussian Elimination
Matrix* MatInv(Matrix* A) {
if (A->rows != A->cols) { return NULL; }
Matrix* tmpA = InitMatrixOtherMat(A);
Matrix* IdA = InitMatrixIdentity(A->rows, A->cols);
MAT_T tmp = (MAT_T)0.0;
MAT_SZ_T msize = A->rows;
MAT_SZ_T i, j, k;
for (k=0; k<msize; k++) {
tmp = tmpA->numbers[k][k];
for (j=0; j<msize; j++) {
tmpA->numbers[k][j] /= tmp;
IdA->numbers[k][j] /= tmp;
}
for(i=0; i<msize; i++) {
tmp = A->numbers[i][k];
for (j=0; j<msize; j++) {
if (i==k) { break; }
tmpA->numbers[i][j] -= tmpA->numbers[k][j]*tmp;
IdA->numbers[i][j] -= IdA->numbers[k][j]*tmp;
}
}
} /* for (k=0; i<msize; k++) */
DestroyMatrix(tmpA);
return IdA;
}
/* Matrices to be played with */
static Matrix* matA;
static Matrix* matB;
/* Checking times */
//uint32_t start_time, elapsed_time;
static struct timespec start_time;
static struct timespec end_time;
static uint32_t elapsed_time;
static float avgAssign, avgDestroy, avgMul, avgInv, avgTrans;
/* When some strange number happened */
static int bad_number_flag = 0;
#define BAD_NUMBER 100000000
/* Averaging an array */
float ary_avg(uint32_t* ary, uint16_t len) {
if (len == 0) return 0.0f;
uint32_t i;
float sum = 0.0f;
for (i=0; i<len; ++i) {
sum += (float)ary[i];
}
return sum / (float)len;
}
/* Arduino codes for benchmarking */
void setup() {
// Serial.begin(SERIAL_BUAD_RATE);
pinMode(LED_BUILTIN, PINMODE_OUTPUT);
sleep(1);
digitalWrite(LED_BUILTIN, HIGH);
sleep(1);
digitalWrite(LED_BUILTIN, LOW);
iter = 0;
}
void loop() {
// put your main code here, to run repeatedly:
printf("\n<<Working with %lu x %lu matrix>>\n", (uint32_t)MATRIX_ROWS, (uint32_t)MATRIX_COLS);
// Generating matrix with random numbers
// Serial.println("Assigning Matrices");
clock_gettime(CLOCK_REALTIME, &start_time);
matA = InitMatrixRnd((uint32_t)MATRIX_ROWS, (uint32_t)MATRIX_COLS);
matB = InitMatrixRnd((uint32_t)MATRIX_COLS, (uint32_t)MATRIX_ROWS);
clock_gettime(CLOCK_REALTIME, &end_time);
elapsed_time = (end_time.tv_nsec - start_time.tv_nsec)/1000;
if (elapsed_time >= BAD_NUMBER) bad_number_flag = 1;
etMatGen[iter] = elapsed_time;
printf("2 Matrix Assignment Ended in %lu [us]\n", elapsed_time);
printf("Matrices Initialized!!\n");
// Testing multiplication
digitalWrite(LED_BUILTIN, HIGH);
clock_gettime(CLOCK_REALTIME, &start_time);
Matrix* matC = MatMul(matA, matB);
clock_gettime(CLOCK_REALTIME, &end_time);
elapsed_time = (end_time.tv_nsec - start_time.tv_nsec)/1000;
if (elapsed_time >= BAD_NUMBER) bad_number_flag = 1;
etMatMul[iter] = elapsed_time;
DestroyMatrix(matC);
digitalWrite(LED_BUILTIN, LOW);
printf("Multiplication Ended in %lu [us]\n", elapsed_time);
// Testing Transpose
digitalWrite(LED_BUILTIN, HIGH);
clock_gettime(CLOCK_REALTIME, &start_time);
MatT(matA);
clock_gettime(CLOCK_REALTIME, &end_time);
elapsed_time = (end_time.tv_nsec - start_time.tv_nsec)/1000;
if (elapsed_time >= BAD_NUMBER) bad_number_flag = 1;
etMatTrans[iter] = elapsed_time;
digitalWrite(LED_BUILTIN, LOW);
printf("Transpose Ended in %lu [us]\n", elapsed_time);
// Testing Inverse
digitalWrite(LED_BUILTIN, HIGH);
clock_gettime(CLOCK_REALTIME, &start_time);
Matrix* AInv = MatInv(matA);
DestroyMatrix(AInv);
clock_gettime(CLOCK_REALTIME, &end_time);
elapsed_time = (end_time.tv_nsec - start_time.tv_nsec)/1000;
if (elapsed_time >= BAD_NUMBER) bad_number_flag = 1;
etMatInv[iter] = elapsed_time;
digitalWrite(LED_BUILTIN, LOW);
printf("Inverse Ended in %lu [us]\n", elapsed_time);
// Serial.println("Destroying Matrices");
clock_gettime(CLOCK_REALTIME, &start_time);
DestroyMatrix(matA);
DestroyMatrix(matB);
clock_gettime(CLOCK_REALTIME, &end_time);
elapsed_time = (end_time.tv_nsec - start_time.tv_nsec)/1000;
if (elapsed_time >= BAD_NUMBER) bad_number_flag = 1;
etMatDestroy[iter] = elapsed_time;
printf("Destruction of 2 Matrices ended in %lu [us]\n", elapsed_time);
printf("[%d] Iteration Done!\n", iter+1);
if (iter == N_ITER-1) {
avgAssign = ary_avg(etMatGen, N_ITER);
avgMul = ary_avg(etMatMul, N_ITER);
avgTrans = ary_avg(etMatTrans, N_ITER);
avgInv = ary_avg(etMatInv, N_ITER);
avgDestroy = ary_avg(etMatDestroy, N_ITER);
printf("\n");
printf("Assignment Avg: %.2f [us]\n", avgAssign);
printf("Destruction Avg: %.2f [us]\n", avgDestroy);
printf("Multiplication Avg: %.2f [us]\n", avgMul);
printf("Transpose Avg: %.2f [us]\n", avgTrans);
printf("Inverse Avg: %.2f [us]\n", avgInv);
iter = 0;
}
else { iter++; }
if (bad_number_flag != 0) {
printf("Bad elapsed time happened!! Calculating again..\n");
iter--;
bad_number_flag = 0;
}
// Serial.println("");
sleep(1);
}
int main(void)
{
if (wiringXSetup("milkv_duo", NULL) == -1) {
wiringXGC();
return 1;
}
setup();
while(true) {
loop();
}
return 0;
}