Timer issue on Duo (CV1800B)

I have been playing with MilkV Duo (CV1800B) to see if how capable the embedded linux is. One of thest was running a simple Matrix calculation program to just see how it performs. While running the program, compiled with the SDK on a Ubuntu host machine, I found a strange behavior.

My program measures matrix calculation time. To do this, my method of checking time was ‘time.h’ which comes with a few convenient functions that employs ‘timespec’ struct.

Here is my code, well, part of it.

  clock_gettime(CLOCK_REALTIME, &start_time);
  matA = InitMatrixRnd((uint32_t)MATRIX_ROWS, (uint32_t)MATRIX_COLS);
  matB = InitMatrixRnd((uint32_t)MATRIX_COLS, (uint32_t)MATRIX_ROWS);
  clock_gettime(CLOCK_REALTIME, &end_time);
  elapsed_time = (end_time.tv_nsec - start_time.tv_nsec)/1000;
  etMatGen[iter] = elapsed_time;
  printf("2 Matrix Assignment Ended in %lu [us]\n", elapsed_time);
  printf("Matrices Initialized!!\n");

Yeah, simply measuring the time with ‘clock_gettime’ function provided by the ‘time.h’ header. However, not very often but often… I found a little bit strange behavior that shows tremendeous time taken. Like this:

2 Matrix Assignment Ended in 2011 [us]
Matrices Initialized!!
Multiplication Ended in 50391 [us]
Transpose Ended in 953 [us]
Inverse Ended in 18446744073708638228 [us]
Destruction of 2 Matrices ended in 482 [us]
[3] Iteration Done!

See that 184…[us] result? It doesn’t always happen but happens after… like 10 - 20 loops. It also happens to other calculation as well: I’ve seen multiplication and inverse shows the most occurance.

However, if I give in a size optimization, which results under-performing code, ‘-g -Os’ to CFLAGS, this behavior disappears completely while losing 10-20% of performance.

I use this same code on other microcontrollers, i.e. RP2040, but never seen this happening. Just providing some stuff I found. That’s all.

The firmware was ‘https://github.com/milkv-duo/duo-buildroot-sdk/releases/download/v1.1.3/milkv-duo-sd-v1.1.3-2024-0930.img.zip’.

2 Likes

I have an update on the ‘size’ optimized code. The code was running more than 20 minutes to end up with a segmentation fault. So, the SDK seems to be a bit undercooked at the moment. Perhaps this is just a ‘malloc()’ implementation not being optimized.

As a microcontroller, I think this is not much of an issue. But as a computer (running a desktop, server, etc.) core, this could be a very serious problem. I’ll take an approach with stack memory calculation variant of my code. Here is my code, btw…

/* 
 * Simple matrix calculation benchmark!
 * 
 * Results from MilkV Duo (CV1800B) Oct 8th 2024
 * 
 * Assignment Avg: 1792.65 [us]
 * Destruction Avg: 546.30 [us]
 * Multiplication Avg: 51592.88 [us]
 * Transpose Avg: 1027.14 [us]
 * Inverse Avg: 87142.32 [us]
 *
 */

#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <stdint.h>
#include <unistd.h>
#include <time.h>

#include <wiringx.h>

#define LED_BUILTIN 25

/* Do not exceed 80x80 with float. It will deplete Pi Pico's memory real quick! */
#define MATRIX_ROWS 80
#define MATRIX_COLS 80

/* Benchmark control */
#define N_ITER 100
static uint16_t iter;

/* Note that integer matrices can't do inverse */
#define MAT_T float
#define MAT_SZ_T uint32_t

/* Serial baud rate */
#define SERIAL_BUAD_RATE 115200

/* Utility stuffs */
//char buffer[256];

/* Storing benchmark times */
uint32_t etMatGen[N_ITER];
uint32_t etMatDestroy[N_ITER];
uint32_t etMatMul[N_ITER];
uint32_t etMatTrans[N_ITER];
uint32_t etMatInv[N_ITER];

/* Codes to work with matrix */
// Matrix struct
typedef struct matrix_t {
  MAT_T** numbers;
  MAT_SZ_T rows;
  MAT_SZ_T cols;
} Matrix;

// Initialization with Random Number
Matrix* InitMatrixRnd(MAT_SZ_T rows, MAT_SZ_T cols) {
  MAT_SZ_T i, j;
  
  Matrix* mat = (Matrix*)malloc(sizeof(Matrix));
  mat->rows = rows;
  mat->cols = cols;

  mat->numbers = (MAT_T**)malloc(sizeof(MAT_T*)*rows);
  for (i=0; i<rows; i++) {
    mat->numbers[i] = (MAT_T*)malloc(sizeof(MAT_T)*cols);
    for (j=0; j<cols; j++) {
      mat->numbers[i][j] = (MAT_T)rand() / (MAT_T)RAND_MAX;
    }
  }

  return mat;
}

// Initialization with Zeros
Matrix* InitMatrixZero(MAT_SZ_T rows, MAT_SZ_T cols) {
  MAT_SZ_T i, j;
  
  Matrix* mat = (Matrix*)malloc(sizeof(Matrix));
  mat->rows = rows;
  mat->cols = cols;

  mat->numbers = (MAT_T**)malloc(sizeof(MAT_T*)*rows);
  for (i=0; i<rows; i++) {
    mat->numbers[i] = (MAT_T*)malloc(sizeof(MAT_T)*cols);
    for (j=0; j<cols; j++) {
      mat->numbers[i][j] = (MAT_T)0.0;
    }
  }
  return mat;
}

// Identity Matrix
Matrix* InitMatrixIdentity(MAT_SZ_T rows, MAT_SZ_T cols) {
  MAT_SZ_T i, j;

  if (rows != cols) { return NULL; }
  
  Matrix* mat = (Matrix*)malloc(sizeof(Matrix));
  mat->rows = rows;
  mat->cols = cols;

  mat->numbers = (MAT_T**)malloc(sizeof(MAT_T*)*rows);
  for (i=0; i<rows; i++) {
    mat->numbers[i] = (MAT_T*)malloc(sizeof(MAT_T)*cols);
    for (j=0; j<cols; j++) {
      if (i == j) { mat->numbers[i][j] = (MAT_T)1.0; }
      else { mat->numbers[i][j] = (MAT_T)0.0; }
    }
  }
  return mat;
}

// Init from another Matrix
Matrix* InitMatrixOtherMat(Matrix* A) {
  if (A->rows == 0 || A->cols == 0) { return NULL; }

  MAT_SZ_T i, j;
  Matrix* newA = InitMatrixZero(A->rows, A->cols);

  for (i=0; i<A->rows; i++) {
    for (j=0; j<A->cols; j++) {
      newA->numbers[i][j] = A->numbers[i][j];
    }
  }
  return newA;
}

// Destruction
int DestroyMatrix(Matrix* mat) {
  MAT_SZ_T i;

  if (mat == NULL) { return 0; }
  
  for (i=0; i < mat->rows; i++) {
    free(mat->numbers[i]);
  }
  free(mat->numbers);
  free(mat);
  
  return 0;
}

// Multiplication
Matrix* MatMul(Matrix* A, Matrix* B) {

  MAT_SZ_T i, j;
  MAT_SZ_T k;

  if (A->cols != B->rows) { return NULL; }

  Matrix* Result = InitMatrixZero(A->rows, B->cols);
  
  for (i=0; i<Result->rows; i++) {
    for (j=0; j<Result->cols; j++) {
      Result->numbers[i][j] = 0.0;
      for (k=0; k<A->cols; k++) {
        Result->numbers[i][j] += A->numbers[i][k]*B->numbers[k][j];
      }
    }
  }

  return Result;
}

// Transpose
void MatT(Matrix* A) {
  MAT_SZ_T i, j;
  MAT_T tmp;
  MAT_T** new_numbers;

  /* Square matrix case */
  if (A->rows == A->cols) {
    for (i=0; i<A->rows; i++) {
      for (j=0; j<A->cols; j++) {
        if (A->numbers[i][j] == A->numbers[j][i]) { continue; }
        if (i == j) { continue; }
        
        tmp = A->numbers[i][j];
        A->numbers[i][j] = A->numbers[j][i];
        A->numbers[j][i] = tmp;
      }
    }
  }
  /* Not square... */
  else {
    new_numbers = (MAT_T**)malloc(sizeof(MAT_T*)*A->cols);
    for (i=0; i<A->cols; i++) {
      new_numbers[i] = (MAT_T*)malloc(sizeof(MAT_T)*A->rows);
    }

    for (i=0; i<A->rows; i++) {
      for (j=0; j<A->cols; j++) {
        new_numbers[j][i] = A->numbers[i][j];
      }
    }

    for (i=0; i<A->rows; i++) {
      free(A->numbers[i]);
    }
    free(A->numbers);

    A->numbers = new_numbers;    
  }
  
}

// Inverse - Gaussian Elimination
Matrix* MatInv(Matrix* A) {

  if (A->rows != A->cols) { return NULL; }

  Matrix* tmpA = InitMatrixOtherMat(A);
  Matrix* IdA = InitMatrixIdentity(A->rows, A->cols);
  MAT_T tmp = (MAT_T)0.0;
  MAT_SZ_T msize = A->rows;
  MAT_SZ_T i, j, k;

  for (k=0; k<msize; k++) {
    tmp = tmpA->numbers[k][k];

    for (j=0; j<msize; j++) {
      tmpA->numbers[k][j] /= tmp;
      IdA->numbers[k][j] /= tmp;
    }

    for(i=0; i<msize; i++) {
      tmp = A->numbers[i][k];
      for (j=0; j<msize; j++) {
        if (i==k) { break; }
        tmpA->numbers[i][j] -= tmpA->numbers[k][j]*tmp;
        IdA->numbers[i][j] -= IdA->numbers[k][j]*tmp;
      }
    }
    
  } /* for (k=0; i<msize; k++) */

  DestroyMatrix(tmpA);
  
  return IdA;
}

/* Matrices to be played with */
static Matrix* matA;
static Matrix* matB;

/* Checking times */
//uint32_t start_time, elapsed_time;
static struct timespec start_time;
static struct timespec end_time;
static uint32_t elapsed_time;
static float avgAssign, avgDestroy, avgMul, avgInv, avgTrans;

/* When some strange number happened */
static int bad_number_flag = 0;
#define BAD_NUMBER 100000000

/* Averaging an array */
float ary_avg(uint32_t* ary, uint16_t len) {

  if (len == 0) return 0.0f;
  
  uint32_t i;
  float sum = 0.0f;

  for (i=0; i<len; ++i) {
    sum += (float)ary[i];
  }

  return sum / (float)len;
}

/* Arduino codes for benchmarking */
void setup() {
  // Serial.begin(SERIAL_BUAD_RATE);
  pinMode(LED_BUILTIN, PINMODE_OUTPUT);
  sleep(1);
  
  digitalWrite(LED_BUILTIN, HIGH);
  sleep(1);
  digitalWrite(LED_BUILTIN, LOW);

  iter = 0;
}


void loop() {
  // put your main code here, to run repeatedly:
  printf("\n<<Working with %lu x %lu matrix>>\n", (uint32_t)MATRIX_ROWS, (uint32_t)MATRIX_COLS);

  // Generating matrix with random numbers
  // Serial.println("Assigning Matrices");
  clock_gettime(CLOCK_REALTIME, &start_time);
  matA = InitMatrixRnd((uint32_t)MATRIX_ROWS, (uint32_t)MATRIX_COLS);
  matB = InitMatrixRnd((uint32_t)MATRIX_COLS, (uint32_t)MATRIX_ROWS);
  clock_gettime(CLOCK_REALTIME, &end_time);
  elapsed_time = (end_time.tv_nsec - start_time.tv_nsec)/1000;
  if (elapsed_time >= BAD_NUMBER) bad_number_flag = 1;
  etMatGen[iter] = elapsed_time;
  printf("2 Matrix Assignment Ended in %lu [us]\n", elapsed_time);
  printf("Matrices Initialized!!\n");

  // Testing multiplication
  digitalWrite(LED_BUILTIN, HIGH);
  clock_gettime(CLOCK_REALTIME, &start_time);
  Matrix* matC = MatMul(matA, matB);
  clock_gettime(CLOCK_REALTIME, &end_time);
  elapsed_time = (end_time.tv_nsec - start_time.tv_nsec)/1000;
  if (elapsed_time >= BAD_NUMBER) bad_number_flag = 1;
  etMatMul[iter] = elapsed_time;
  DestroyMatrix(matC);
  digitalWrite(LED_BUILTIN, LOW);
  printf("Multiplication Ended in %lu [us]\n", elapsed_time);
  
  // Testing Transpose
  digitalWrite(LED_BUILTIN, HIGH);
  clock_gettime(CLOCK_REALTIME, &start_time);
  MatT(matA);
  clock_gettime(CLOCK_REALTIME, &end_time);
  elapsed_time = (end_time.tv_nsec - start_time.tv_nsec)/1000;
  if (elapsed_time >= BAD_NUMBER) bad_number_flag = 1;
  etMatTrans[iter] = elapsed_time;
  digitalWrite(LED_BUILTIN, LOW);
  printf("Transpose Ended in %lu [us]\n", elapsed_time);
  
  // Testing Inverse
  digitalWrite(LED_BUILTIN, HIGH);
  clock_gettime(CLOCK_REALTIME, &start_time);
  Matrix* AInv = MatInv(matA);
  DestroyMatrix(AInv);
  clock_gettime(CLOCK_REALTIME, &end_time);
  elapsed_time = (end_time.tv_nsec - start_time.tv_nsec)/1000;
  if (elapsed_time >= BAD_NUMBER) bad_number_flag = 1;
  etMatInv[iter] = elapsed_time;
  digitalWrite(LED_BUILTIN, LOW);
  printf("Inverse Ended in %lu [us]\n", elapsed_time);
  
  // Serial.println("Destroying Matrices");
  clock_gettime(CLOCK_REALTIME, &start_time);
  DestroyMatrix(matA);
  DestroyMatrix(matB);
  clock_gettime(CLOCK_REALTIME, &end_time);
  elapsed_time = (end_time.tv_nsec - start_time.tv_nsec)/1000;
  if (elapsed_time >= BAD_NUMBER) bad_number_flag = 1;
  etMatDestroy[iter] = elapsed_time;
  printf("Destruction of 2 Matrices ended in %lu [us]\n", elapsed_time);
  
  printf("[%d] Iteration Done!\n", iter+1);
  
  if (iter == N_ITER-1) { 
    avgAssign = ary_avg(etMatGen, N_ITER);
    avgMul = ary_avg(etMatMul, N_ITER);
    avgTrans = ary_avg(etMatTrans, N_ITER);
    avgInv = ary_avg(etMatInv, N_ITER);
    avgDestroy = ary_avg(etMatDestroy, N_ITER); 

    printf("\n");
    printf("Assignment Avg: %.2f [us]\n", avgAssign);
    printf("Destruction Avg: %.2f [us]\n", avgDestroy);
    printf("Multiplication Avg: %.2f [us]\n", avgMul);
    printf("Transpose Avg: %.2f [us]\n", avgTrans);
    printf("Inverse Avg: %.2f [us]\n", avgInv);
    
    iter = 0; 
  }
  else { iter++; }

  if (bad_number_flag != 0) {
  	printf("Bad elapsed time happened!! Calculating again..\n");
  	iter--;
  	bad_number_flag = 0;
  }
  
  // Serial.println("");
  sleep(1);
}


int main(void)
{
  if (wiringXSetup("milkv_duo", NULL) == -1) {
  	wiringXGC();
  	return 1;
  }

  setup();

  while(true) {
  	loop();
  }

  return 0;
}
1 Like