API Reference Manual  1.46.0
odp_ml_run.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright (c) 2024 Nokia
3  */
4 
14 #include <odp_api.h>
15 #include <odp/helper/odph_api.h>
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <inttypes.h>
19 #include <errno.h>
20 
21 /* Max number of inputs and outputs */
22 #define MAX_IO 8
23 
24 typedef struct io_size {
25  uint64_t elems, size;
26  int elem_size;
27 } io_size;
28 
29 static struct {
30  struct {
31  char *model_name, *input_name, *output_name, *reference_name;
32  float scale_q, scale_d;
33  int num_batch;
34  } opt;
35  odp_ml_model_t mdl;
38  int num_inp, num_out;
39  odp_ml_input_info_t inp_info[MAX_IO];
40  io_size inp[MAX_IO];
41  odp_ml_output_info_t out_info[MAX_IO];
42  io_size out[MAX_IO];
43  uint64_t inp_size_q, inp_size_d, out_size_q, out_size_d;
44 } glb = { .opt = { .num_batch = 1 } };
45 
46 static void *read_file(const char *name, uint64_t *size)
47 {
48  FILE *file = fopen(name, "rb");
49 
50  if (!file) {
51  ODPH_ERR("Failed to open file %s: %s\n", name, strerror(errno));
52  return NULL;
53  }
54 
55  void *addr = NULL;
56 
57  if (fseek(file, 0, SEEK_END)) {
58  ODPH_ERR("Failed to get file size for file %s\n", name);
59  goto error;
60  }
61 
62  long pos = ftell(file);
63 
64  if (pos < 0) {
65  ODPH_ERR("Failed to get file size for file %s\n", name);
66  goto error;
67  }
68 
69  rewind(file);
70  *size = pos;
71  addr = malloc(*size);
72 
73  if (!addr) {
74  ODPH_ERR("Allocating %" PRIu64 " bytes failed\n", *size);
75  goto error;
76  }
77 
78  if (fread(addr, *size, 1, file) != 1) {
79  ODPH_ERR("Reading %" PRIu64 " bytes failed\n", *size);
80  goto error;
81  }
82 
83  fclose(file);
84  printf("Read %" PRIu64 " bytes from %s\n", *size, name);
85 
86  return addr;
87 
88 error:
89  fclose(file);
90  free(addr);
91 
92  return NULL;
93 }
94 
95 static int write_file(const char *name, uint8_t *addr, uint64_t size)
96 {
97  FILE *file = fopen(name, "wb");
98 
99  if (!file) {
100  ODPH_ERR("Failed to open file %s, %s\n", name, strerror(errno));
101  return -1;
102  }
103 
104  if (fwrite(addr, size, 1, file) != 1) {
105  ODPH_ERR("Writing %" PRIu64 " bytes failed\n", size);
106  fclose(file);
107  return -1;
108  }
109 
110  printf("Wrote %" PRIu64 " bytes to %s\n", size, name);
111 
112  fclose(file);
113  return 0;
114 }
115 
116 static void usage(const char *prog)
117 {
118  printf("\n"
119  "Usage: %s [options]\n"
120  "\n"
121  "Mandatory OPTIONS:\n"
122  " -m, --model <file> Model file\n"
123  " -i, --input <file> Input file\n"
124  "\n"
125  "Optional OPTIONS\n"
126  " -o, --output <file> Output file\n"
127  " -r, --reference <file> Reference file\n"
128  " -q, --quant <scale> Quantization scale\n"
129  " -d, --dequant <scale> Dequantization scale\n"
130  " -b, --batches <num> Number of batches\n"
131  " -h, --help Help\n"
132  "\n",
133  prog);
134 }
135 
136 static void parse_args(int argc, char *argv[])
137 {
138  static const struct option longopts[] = {
139  { "model", required_argument, NULL, 'm' },
140  { "input", required_argument, NULL, 'i' },
141  { "output", required_argument, NULL, 'o' },
142  { "reference", required_argument, NULL, 'r' },
143  { "quant", required_argument, NULL, 'q' },
144  { "dequant", required_argument, NULL, 'd' },
145  { "batches", required_argument, NULL, 'b' },
146  { "help", no_argument, NULL, 'h' },
147  { NULL, 0, NULL, 0 } };
148 
149  static const char *shortopts = "+m:i:o:r:q:d:b:h";
150 
151  while (1) {
152  int c = getopt_long(argc, argv, shortopts, longopts, NULL);
153 
154  if (c == -1)
155  break; /* No more options */
156 
157  switch (c) {
158  case 'm':
159  glb.opt.model_name = optarg;
160  break;
161  case 'i':
162  glb.opt.input_name = optarg;
163  break;
164  case 'o':
165  glb.opt.output_name = optarg;
166  break;
167  case 'r':
168  glb.opt.reference_name = optarg;
169  break;
170  case 'q':
171  glb.opt.scale_q = atof(optarg);
172  break;
173  case 'd':
174  glb.opt.scale_d = atof(optarg);
175  break;
176  case 'b':
177  glb.opt.num_batch = atof(optarg);
178  break;
179  case 'h':
180  usage(argv[0]);
181  exit(EXIT_SUCCESS);
182  break;
183  default:
184  usage(argv[0]);
185  exit(EXIT_FAILURE);
186  break;
187  }
188  }
189 
190  optind = 1; /* reset 'extern optind' from the getopt lib */
191 
192  if (!glb.opt.model_name || !glb.opt.input_name) {
193  usage(argv[0]);
194  exit(EXIT_FAILURE);
195  }
196 }
197 
198 static int check_num_batch(void)
199 {
200  int min_batch = 1, max_batch = 1;
201 
202  for (int i = 0; i < glb.num_inp; i++) {
203  odp_ml_shape_info_t *shape = &glb.inp_info[i].shape;
204 
205  for (int j = 0; j < (int)shape->num_dim; j++) {
206  if (shape->dim[j] == ODP_ML_DIM_DYNAMIC) {
207  min_batch = shape->dim_min[j];
208  max_batch = shape->dim_max[j];
209  break;
210  }
211  }
212  }
213 
214  if (glb.opt.num_batch < min_batch || glb.opt.num_batch > max_batch) {
215  ODPH_ERR("Number of batches %d out of range [%d, %d]\n", glb.opt.num_batch,
216  min_batch, max_batch);
217  return -1;
218  }
219 
220  return 0;
221 }
222 
223 static void calc_io_size(void)
224 {
225  for (int i = 0; i < glb.num_inp; i++) {
226  uint64_t elems = 1;
227  odp_ml_input_info_t *info = &glb.inp_info[i];
228  odp_ml_shape_info_t *shape = &info->shape;
229  io_size *inp = &glb.inp[i];
230 
231  printf("Input %d: %s, shape:", i, info->name);
232 
233  for (int j = 0; j < (int)shape->num_dim; j++) {
234  printf(" %d", shape->dim[j]);
235  if (shape->dim[j] != ODP_ML_DIM_DYNAMIC)
236  elems *= shape->dim[j];
237  }
238 
239  if (shape->type == ODP_ML_SHAPE_BATCH)
240  elems *= glb.opt.num_batch;
241  inp->elems = elems;
242  inp->elem_size = info->data_type_size;
243  inp->size = elems * info->data_type_size;
244  glb.inp_size_q += inp->size;
245  glb.inp_size_d += elems * sizeof(float);
246 
247  printf(", elems: %" PRIu64 ", datatype size: %d, size: %" PRIu64 "\n",
248  inp->elems, inp->elem_size, inp->size);
249  }
250 
251  printf("Input size_q: %" PRIu64 ", size_d: %" PRIu64 "\n", glb.inp_size_q, glb.inp_size_d);
252 
253  for (int i = 0; i < glb.num_out; i++) {
254  uint64_t elems = 1;
255  odp_ml_output_info_t *info = &glb.out_info[i];
256  odp_ml_shape_info_t *shape = &info->shape;
257  io_size *out = &glb.out[i];
258 
259  printf("Output %d: %s, shape:", i, info->name);
260 
261  for (int j = 0; j < (int)shape->num_dim; j++) {
262  printf(" %d", shape->dim[j]);
263  if (shape->dim[j] != ODP_ML_DIM_DYNAMIC)
264  elems *= shape->dim[j];
265  }
266 
267  if (shape->type == ODP_ML_SHAPE_BATCH)
268  elems *= glb.opt.num_batch;
269  out->elems = elems;
270  out->elem_size = info->data_type_size;
271  out->size = elems * info->data_type_size;
272  glb.out_size_q += out->size;
273  glb.out_size_d += elems * sizeof(float);
274 
275  printf(", elems: %" PRIu64 ", datatype size: %d, size: %" PRIu64 "\n",
276  out->elems, out->elem_size, out->size);
277  }
278 
279  printf("Output size_q: %" PRIu64 ", size_d: %" PRIu64 "\n", glb.out_size_q, glb.out_size_d);
280 }
281 
282 static int quantize_input(uint8_t *inp_q_addr, uint8_t *inp_d_addr)
283 {
284  for (int i = 0; i < glb.num_inp; i++) {
285  float scale_q = glb.opt.scale_q;
286  uint64_t elems = glb.inp[i].elems;
287  odp_ml_input_info_t *info = &glb.inp_info[i];
288 
289  switch (info->data_type) {
291  odp_ml_fp32_to_int8((int8_t *)inp_q_addr, (float *)inp_d_addr, elems,
292  scale_q, 0);
293  break;
295  odp_ml_fp32_to_uint8((uint8_t *)inp_q_addr, (float *)inp_d_addr, elems,
296  scale_q, 0);
297  break;
299  odp_ml_fp32_to_fp16((uint16_t *)inp_q_addr, (float *)inp_d_addr, elems);
300  break;
301  default:
302  ODPH_ERR("Unsupported type %d for input %d\n", info->data_type, i);
303  return -1;
304  }
305 
306  inp_q_addr += glb.inp[i].size;
307  inp_d_addr += elems * sizeof(float);
308  }
309 
310  return 0;
311 }
312 
313 static int dequantize_output(uint8_t *out_d_addr, uint8_t *out_q_addr)
314 {
315  for (int i = 0; i < glb.num_out; i++) {
316  float scale_d = glb.opt.scale_d;
317  uint64_t elems = glb.out[i].elems;
318  odp_ml_output_info_t *info = &glb.out_info[i];
319 
320  switch (info->data_type) {
322  odp_ml_fp32_from_int8((float *)out_d_addr, (int8_t *)out_q_addr, elems,
323  scale_d, 0);
324  break;
326  odp_ml_fp32_from_uint8((float *)out_d_addr, (uint8_t *)out_q_addr, elems,
327  scale_d, 0);
328  break;
330  odp_ml_fp32_from_fp16((float *)out_d_addr, (uint16_t *)out_q_addr, elems);
331  break;
332  default:
333  ODPH_ERR("Unsupported type %d for output %d\n", info->data_type, i);
334  return -1;
335  }
336 
337  out_q_addr += glb.out[i].size;
338  out_d_addr += elems * sizeof(float);
339  }
340 
341  return 0;
342 }
343 
344 int main(int argc, char *argv[])
345 {
346  odp_instance_t inst;
347  odp_ml_config_t ml_config;
348  odp_ml_model_param_t model_param;
349  int ret = 0;
350  void *input_file = NULL, *output_file = NULL, *reference_file = NULL;
351  uint64_t input_file_size, reference_file_size;
352  uint8_t *input = NULL, *output = NULL;
353 
354  parse_args(argc, argv);
355 
356  if (odp_init_global(&inst, NULL, NULL)) {
357  ODPH_ERR("Global init failed\n");
358  return -1;
359  }
360 
361  if (odp_init_local(inst, ODP_THREAD_CONTROL)) {
362  ODPH_ERR("Local init failed\n");
363  return -1;
364  }
365 
366  if (odp_ml_capability(&glb.capa)) {
367  ODPH_ERR("odp_ml_capability() failed\n");
368  ret = -1;
369  goto odp_term;
370  }
371 
372  if (glb.capa.min_input_align > 1) {
373  ODPH_ERR("Minimum input alignment %d not supported\n", glb.capa.min_input_align);
374  ret = -1;
375  goto odp_term;
376  }
377 
378  if (glb.capa.min_output_align > 1) {
379  ODPH_ERR("Minimum output alignment %d not supported\n", glb.capa.min_output_align);
380  ret = -1;
381  goto odp_term;
382  }
383 
384  odp_ml_config_init(&ml_config);
385  ml_config.max_model_size = glb.capa.max_model_size;
388 
389  if (odp_ml_config(&ml_config)) {
390  ODPH_ERR("odp_ml_config() failed\n");
391  ret = -1;
392  goto odp_term;
393  }
394 
395  odp_ml_model_param_init(&model_param);
396 
397  model_param.model = read_file(glb.opt.model_name, &model_param.size);
398  if (!model_param.model) {
399  ODPH_ERR("Failed to read model file\n");
400  ret = -1;
401  goto odp_term;
402  }
403 
404  glb.mdl = odp_ml_model_create(glb.opt.model_name, &model_param);
405  free(model_param.model);
406  if (glb.mdl == ODP_ML_MODEL_INVALID) {
407  ODPH_ERR("odp_ml_model_create() failed\n");
408  ret = -1;
409  goto odp_term;
410  }
411 
412  odp_ml_model_print(glb.mdl);
413 
414  if (odp_ml_model_load(glb.mdl, NULL)) {
415  ODPH_ERR("odp_ml_model_load() failed\n");
416  ret = -1;
417  goto odp_term;
418  }
419 
420  if (odp_ml_model_info(glb.mdl, &glb.info)) {
421  ODPH_ERR("odp_ml_model_info() failed\n");
422  ret = -1;
423  goto odp_term;
424  }
425 
426  glb.num_inp = odp_ml_model_input_info(glb.mdl, glb.inp_info, MAX_IO);
427 
428  if (glb.num_inp < 0 || glb.num_inp > MAX_IO) {
429  ODPH_ERR("odp_ml_model_input_info() failed, or too many inputs\n");
430  ret = -1;
431  goto odp_term;
432  }
433 
434  glb.num_out = odp_ml_model_output_info(glb.mdl, glb.out_info, MAX_IO);
435 
436  if (glb.num_out < 0 || glb.num_out > MAX_IO) {
437  ODPH_ERR("odp_ml_model_output_info() failed, or too many outputs\n");
438  ret = -1;
439  goto odp_term;
440  }
441 
442  if (check_num_batch()) {
443  ret = -1;
444  goto odp_term;
445  }
446 
447  input_file = read_file(glb.opt.input_name, &input_file_size);
448  if (!input_file)
449  return -1;
450 
451  calc_io_size();
452 
453  if ((glb.opt.scale_q > 0.0 && input_file_size != glb.inp_size_d) ||
454  (!(glb.opt.scale_q > 0.0) && input_file_size != glb.inp_size_q)) {
455  ODPH_ERR("Input file size mismatch\n");
456  ret = -1;
457  goto odp_term;
458  }
459 
460  odp_ml_data_seg_t inp_seg[MAX_IO];
461  uint8_t *inp_addr = input_file;
462 
463  if (glb.opt.scale_q > 0.0) {
464  input = malloc(glb.inp_size_q);
465  if (!input) {
466  ODPH_ERR("Allocating %" PRIu64 " bytes failed\n", glb.inp_size_q);
467  ret = -1;
468  goto odp_term;
469  }
470 
471  if (quantize_input(input, input_file)) {
472  ret = -1;
473  goto odp_term;
474  }
475 
476  inp_addr = input;
477  }
478 
479  for (int i = 0; i < glb.num_inp; i++) {
480  inp_seg[i].addr = inp_addr;
481  inp_seg[i].size = glb.inp[i].size;
482  inp_addr += glb.inp[i].size;
483  }
484 
485  output = malloc(glb.out_size_q);
486 
487  if (!output) {
488  ODPH_ERR("Allocating %" PRIu64 " bytes failed\n", glb.out_size_q);
489  ret = -1;
490  goto odp_term;
491  }
492 
493  odp_ml_data_seg_t out_seg[MAX_IO];
494  uint8_t *out_addr = output;
495 
496  for (int i = 0; i < glb.num_out; i++) {
497  out_seg[i].addr = out_addr;
498  out_seg[i].size = glb.out[i].size;
499  out_addr += glb.out[i].size;
500  }
501 
502  odp_ml_data_t data = {
503  .input_seg = inp_seg,
504  .num_input_seg = glb.num_inp,
505  .output_seg = out_seg,
506  .num_output_seg = glb.num_out,
507  };
508  odp_ml_run_param_t run_param;
509 
510  odp_ml_run_param_init(&run_param);
511  run_param.batch_size = glb.opt.num_batch;
512 
513  if (odp_ml_run(glb.mdl, &data, &run_param) != 1) {
514  ODPH_ERR("odp_ml_run() failed\n");
515  ret = -1;
516  goto odp_term;
517  }
518 
519  void *output_final = output;
520  uint64_t out_size_final = glb.out_size_q;
521 
522  if (glb.opt.scale_d > 0.0) {
523  output_file = malloc(glb.out_size_d);
524  if (!output_file) {
525  ODPH_ERR("Allocating %" PRIu64 " bytes failed\n", glb.out_size_d);
526  ret = -1;
527  goto odp_term;
528  }
529 
530  if (dequantize_output(output_file, output)) {
531  ret = -1;
532  goto odp_term;
533  }
534 
535  output_final = output_file;
536  out_size_final = glb.out_size_d;
537  }
538 
539  if (glb.opt.output_name) {
540  if (write_file(glb.opt.output_name, output_final, out_size_final)) {
541  ret = -1;
542  goto odp_term;
543  }
544  }
545 
546  if (glb.opt.reference_name)
547  reference_file = read_file(glb.opt.reference_name, &reference_file_size);
548 
549  if (reference_file) {
550  if (out_size_final != reference_file_size) {
551  ODPH_ERR("Output size mismatch: %" PRIu64
552  " differs from reference file size %" PRIu64 "\n",
553  out_size_final, reference_file_size);
554  ret = -1;
555  goto odp_term;
556  }
557 
558  if (memcmp(reference_file, output_final, out_size_final)) {
559  ODPH_ERR("Output differs from reference\n");
560  ret = -1;
561  } else {
562  printf("Output matches reference\n");
563  }
564  }
565 
566  if (odp_ml_model_unload(glb.mdl, NULL)) {
567  ODPH_ERR("odp_ml_model_unload() failed\n");
568  ret = -1;
569  goto odp_term;
570  }
571 
572  if (odp_ml_model_destroy(glb.mdl)) {
573  ODPH_ERR("odp_ml_model_destroy() failed\n");
574  ret = -1;
575  goto odp_term;
576  }
577 
578 odp_term:
579  free(input);
580  free(output);
581  free(input_file);
582  free(output_file);
583  free(reference_file);
584 
585  if (odp_term_local()) {
586  ODPH_ERR("Local term failed\n");
587  return -1;
588  }
589 
590  if (odp_term_global(inst)) {
591  ODPH_ERR("Global term failed\n");
592  return -1;
593  }
594 
595  return ret;
596 }
int odp_init_local(odp_instance_t instance, odp_thread_type_t thr_type)
Thread local ODP initialization.
int odp_init_global(odp_instance_t *instance, const odp_init_t *params, const odp_platform_init_t *platform_params)
Global ODP initialization.
int odp_term_local(void)
Thread local ODP termination.
int odp_term_global(odp_instance_t instance)
Global ODP termination.
uint64_t odp_instance_t
ODP instance ID.
void odp_ml_run_param_init(odp_ml_run_param_t *param)
Initialize model run parameters.
void odp_ml_model_param_init(odp_ml_model_param_t *param)
Initialize ML model parameters.
void odp_ml_config_init(odp_ml_config_t *config)
Initialize ML configuration parameters.
void odp_ml_fp32_to_fp16(uint16_t *dst_fp16, const float *src_fp32, uint32_t num)
Quantize 32-bit float to 16-bit float.
void odp_ml_fp32_to_int8(int8_t *dst_i8, const float *src_fp32, uint32_t num, float scale, int8_t zerop)
Quantize 32-bit float to int8_t.
#define ODP_ML_DIM_DYNAMIC
Dimension size is dynamic.
void odp_ml_fp32_from_fp16(float *dst_fp32, const uint16_t *src_fp16, uint32_t num)
De-quantize 32-bit float from 16-bit float.
void odp_ml_fp32_from_uint8(float *dst_fp32, const uint8_t *src_u8, uint32_t num, float scale, uint8_t zerop)
De-quantize 32-bit float from uint8_t.
#define ODP_ML_COMPL_MODE_SYNC
Synchronous operation.
int odp_ml_config(const odp_ml_config_t *config)
Configure ML offload.
#define ODP_ML_MODEL_INVALID
Invalid ML model.
int odp_ml_run(odp_ml_model_t model, const odp_ml_data_t *data, const odp_ml_run_param_t *param)
Run the model in synchronous mode.
uint32_t odp_ml_model_output_info(odp_ml_model_t model, odp_ml_output_info_t info[], uint32_t num)
Retrieve model output information.
odp_ml_model_t odp_ml_model_create(const char *name, const odp_ml_model_param_t *param)
Create an ML model.
void odp_ml_fp32_to_uint8(uint8_t *dst_u8, const float *src_fp32, uint32_t num, float scale, uint8_t zerop)
Quantize 32-bit float to uint8_t.
int odp_ml_model_load(odp_ml_model_t model, odp_ml_load_result_t *result)
Load ML model.
uint32_t odp_ml_model_input_info(odp_ml_model_t model, odp_ml_input_info_t info[], uint32_t num)
Retrieve model input information.
int odp_ml_model_unload(odp_ml_model_t model, odp_ml_load_result_t *result)
Unload ML model.
void odp_ml_model_print(odp_ml_model_t model)
Print debug information about the model.
int odp_ml_capability(odp_ml_capability_t *capa)
Query ML capabilities.
int odp_ml_model_info(odp_ml_model_t model, odp_ml_model_info_t *info)
Retrieve model information.
int odp_ml_model_destroy(odp_ml_model_t model)
Destroy an ML model.
void odp_ml_fp32_from_int8(float *dst_fp32, const int8_t *src_i8, uint32_t num, float scale, int8_t zerop)
De-quantize 32-bit float from int8_t.
@ ODP_ML_SHAPE_BATCH
Dynamic batch size.
@ ODP_ML_DATA_TYPE_FP16
16-bit floating point number
@ ODP_ML_DATA_TYPE_UINT8
8-bit unsigned integer
@ ODP_ML_DATA_TYPE_INT8
8-bit integer
@ ODP_THREAD_CONTROL
Control thread.
The OpenDataPlane API.
Machine learning capabilities.
Machine learning configuration parameters.
uint64_t max_model_size
Maximum model binary size in bytes.
odp_ml_compl_mode_t load_mode_mask
Load / unload completion modes.
odp_ml_compl_mode_t run_mode_mask
Run completion modes.
Model input / output data segment.
void * addr
Segment start address.
uint64_t size
Segment size in bytes.
Model input / output data for a model inference run.
odp_ml_data_seg_t * input_seg
Model input data segments.
Model input information.
uint32_t data_type_size
Size of model input data type in bytes.
odp_ml_data_type_t data_type
Model input data type.
char name[ODP_ML_MODEL_IO_NAME_LEN]
Model input name.
odp_ml_shape_info_t shape
Model input data shape.
Model information.
Machine learning model parameters.
uint64_t size
Size of the model binary in bytes.
void * model
Model binary.
Model output information.
odp_ml_data_type_t data_type
Model output data type.
odp_ml_shape_info_t shape
Model output data shape.
uint32_t data_type_size
Size of model output data type in bytes.
char name[ODP_ML_MODEL_IO_NAME_LEN]
Model output name.
Parameters for model run.
uint32_t batch_size
Batch size.
Model input / output data shape information.
uint32_t dim_min[ODP_ML_MAX_DIMS]
Minimum dimension sizes.
odp_ml_shape_type_t type
Shape type.
uint32_t dim_max[ODP_ML_MAX_DIMS]
Maximum dimension sizes.
uint32_t num_dim
Number of dimensions.
uint32_t dim[ODP_ML_MAX_DIMS]
Dimension sizes.