15 #include <odp/helper/odph_api.h>
24 typedef struct io_size {
31 char *model_name, *input_name, *output_name, *reference_name;
32 float scale_q, scale_d;
43 uint64_t inp_size_q, inp_size_d, out_size_q, out_size_d;
44 } glb = { .opt = { .num_batch = 1 } };
46 static void *read_file(
const char *name, uint64_t *size)
48 FILE *file = fopen(name,
"rb");
51 ODPH_ERR(
"Failed to open file %s: %s\n", name, strerror(errno));
57 if (fseek(file, 0, SEEK_END)) {
58 ODPH_ERR(
"Failed to get file size for file %s\n", name);
62 long pos = ftell(file);
65 ODPH_ERR(
"Failed to get file size for file %s\n", name);
74 ODPH_ERR(
"Allocating %" PRIu64
" bytes failed\n", *size);
78 if (fread(addr, *size, 1, file) != 1) {
79 ODPH_ERR(
"Reading %" PRIu64
" bytes failed\n", *size);
84 printf(
"Read %" PRIu64
" bytes from %s\n", *size, name);
95 static int write_file(
const char *name, uint8_t *addr, uint64_t size)
97 FILE *file = fopen(name,
"wb");
100 ODPH_ERR(
"Failed to open file %s, %s\n", name, strerror(errno));
104 if (fwrite(addr, size, 1, file) != 1) {
105 ODPH_ERR(
"Writing %" PRIu64
" bytes failed\n", size);
110 printf(
"Wrote %" PRIu64
" bytes to %s\n", size, name);
116 static void usage(
const char *prog)
119 "Usage: %s [options]\n"
121 "Mandatory OPTIONS:\n"
122 " -m, --model <file> Model file\n"
123 " -i, --input <file> Input file\n"
126 " -o, --output <file> Output file\n"
127 " -r, --reference <file> Reference file\n"
128 " -q, --quant <scale> Quantization scale\n"
129 " -d, --dequant <scale> Dequantization scale\n"
130 " -b, --batches <num> Number of batches\n"
136 static void parse_args(
int argc,
char *argv[])
138 static const struct option longopts[] = {
139 {
"model", required_argument, NULL,
'm' },
140 {
"input", required_argument, NULL,
'i' },
141 {
"output", required_argument, NULL,
'o' },
142 {
"reference", required_argument, NULL,
'r' },
143 {
"quant", required_argument, NULL,
'q' },
144 {
"dequant", required_argument, NULL,
'd' },
145 {
"batches", required_argument, NULL,
'b' },
146 {
"help", no_argument, NULL,
'h' },
147 { NULL, 0, NULL, 0 } };
149 static const char *shortopts =
"+m:i:o:r:q:d:b:h";
152 int c = getopt_long(argc, argv, shortopts, longopts, NULL);
159 glb.opt.model_name = optarg;
162 glb.opt.input_name = optarg;
165 glb.opt.output_name = optarg;
168 glb.opt.reference_name = optarg;
171 glb.opt.scale_q = atof(optarg);
174 glb.opt.scale_d = atof(optarg);
177 glb.opt.num_batch = atof(optarg);
192 if (!glb.opt.model_name || !glb.opt.input_name) {
198 static int check_num_batch(
void)
200 int min_batch = 1, max_batch = 1;
202 for (
int i = 0; i < glb.num_inp; i++) {
205 for (
int j = 0; j < (int)shape->
num_dim; j++) {
214 if (glb.opt.num_batch < min_batch || glb.opt.num_batch > max_batch) {
215 ODPH_ERR(
"Number of batches %d out of range [%d, %d]\n", glb.opt.num_batch,
216 min_batch, max_batch);
223 static void calc_io_size(
void)
225 for (
int i = 0; i < glb.num_inp; i++) {
229 io_size *inp = &glb.inp[i];
231 printf(
"Input %d: %s, shape:", i, info->
name);
233 for (
int j = 0; j < (int)shape->
num_dim; j++) {
234 printf(
" %d", shape->
dim[j]);
236 elems *= shape->
dim[j];
240 elems *= glb.opt.num_batch;
244 glb.inp_size_q += inp->size;
245 glb.inp_size_d += elems *
sizeof(float);
247 printf(
", elems: %" PRIu64
", datatype size: %d, size: %" PRIu64
"\n",
248 inp->elems, inp->elem_size, inp->size);
251 printf(
"Input size_q: %" PRIu64
", size_d: %" PRIu64
"\n", glb.inp_size_q, glb.inp_size_d);
253 for (
int i = 0; i < glb.num_out; i++) {
257 io_size *out = &glb.out[i];
259 printf(
"Output %d: %s, shape:", i, info->
name);
261 for (
int j = 0; j < (int)shape->
num_dim; j++) {
262 printf(
" %d", shape->
dim[j]);
264 elems *= shape->
dim[j];
268 elems *= glb.opt.num_batch;
272 glb.out_size_q += out->size;
273 glb.out_size_d += elems *
sizeof(float);
275 printf(
", elems: %" PRIu64
", datatype size: %d, size: %" PRIu64
"\n",
276 out->elems, out->elem_size, out->size);
279 printf(
"Output size_q: %" PRIu64
", size_d: %" PRIu64
"\n", glb.out_size_q, glb.out_size_d);
282 static int quantize_input(uint8_t *inp_q_addr, uint8_t *inp_d_addr)
284 for (
int i = 0; i < glb.num_inp; i++) {
285 float scale_q = glb.opt.scale_q;
286 uint64_t elems = glb.inp[i].elems;
302 ODPH_ERR(
"Unsupported type %d for input %d\n", info->
data_type, i);
306 inp_q_addr += glb.inp[i].size;
307 inp_d_addr += elems *
sizeof(float);
313 static int dequantize_output(uint8_t *out_d_addr, uint8_t *out_q_addr)
315 for (
int i = 0; i < glb.num_out; i++) {
316 float scale_d = glb.opt.scale_d;
317 uint64_t elems = glb.out[i].elems;
333 ODPH_ERR(
"Unsupported type %d for output %d\n", info->
data_type, i);
337 out_q_addr += glb.out[i].size;
338 out_d_addr += elems *
sizeof(float);
344 int main(
int argc,
char *argv[])
350 void *input_file = NULL, *output_file = NULL, *reference_file = NULL;
351 uint64_t input_file_size, reference_file_size;
352 uint8_t *input = NULL, *output = NULL;
354 parse_args(argc, argv);
357 ODPH_ERR(
"Global init failed\n");
362 ODPH_ERR(
"Local init failed\n");
367 ODPH_ERR(
"odp_ml_capability() failed\n");
372 if (glb.capa.min_input_align > 1) {
373 ODPH_ERR(
"Minimum input alignment %d not supported\n", glb.capa.min_input_align);
378 if (glb.capa.min_output_align > 1) {
379 ODPH_ERR(
"Minimum output alignment %d not supported\n", glb.capa.min_output_align);
390 ODPH_ERR(
"odp_ml_config() failed\n");
397 model_param.
model = read_file(glb.opt.model_name, &model_param.
size);
398 if (!model_param.
model) {
399 ODPH_ERR(
"Failed to read model file\n");
405 free(model_param.
model);
407 ODPH_ERR(
"odp_ml_model_create() failed\n");
415 ODPH_ERR(
"odp_ml_model_load() failed\n");
421 ODPH_ERR(
"odp_ml_model_info() failed\n");
428 if (glb.num_inp < 0 || glb.num_inp > MAX_IO) {
429 ODPH_ERR(
"odp_ml_model_input_info() failed, or too many inputs\n");
436 if (glb.num_out < 0 || glb.num_out > MAX_IO) {
437 ODPH_ERR(
"odp_ml_model_output_info() failed, or too many outputs\n");
442 if (check_num_batch()) {
447 input_file = read_file(glb.opt.input_name, &input_file_size);
453 if ((glb.opt.scale_q > 0.0 && input_file_size != glb.inp_size_d) ||
454 (!(glb.opt.scale_q > 0.0) && input_file_size != glb.inp_size_q)) {
455 ODPH_ERR(
"Input file size mismatch\n");
461 uint8_t *inp_addr = input_file;
463 if (glb.opt.scale_q > 0.0) {
464 input = malloc(glb.inp_size_q);
466 ODPH_ERR(
"Allocating %" PRIu64
" bytes failed\n", glb.inp_size_q);
471 if (quantize_input(input, input_file)) {
479 for (
int i = 0; i < glb.num_inp; i++) {
480 inp_seg[i].
addr = inp_addr;
481 inp_seg[i].
size = glb.inp[i].size;
482 inp_addr += glb.inp[i].size;
485 output = malloc(glb.out_size_q);
488 ODPH_ERR(
"Allocating %" PRIu64
" bytes failed\n", glb.out_size_q);
494 uint8_t *out_addr = output;
496 for (
int i = 0; i < glb.num_out; i++) {
497 out_seg[i].
addr = out_addr;
498 out_seg[i].
size = glb.out[i].size;
499 out_addr += glb.out[i].size;
504 .num_input_seg = glb.num_inp,
505 .output_seg = out_seg,
506 .num_output_seg = glb.num_out,
513 if (
odp_ml_run(glb.mdl, &data, &run_param) != 1) {
514 ODPH_ERR(
"odp_ml_run() failed\n");
519 void *output_final = output;
520 uint64_t out_size_final = glb.out_size_q;
522 if (glb.opt.scale_d > 0.0) {
523 output_file = malloc(glb.out_size_d);
525 ODPH_ERR(
"Allocating %" PRIu64
" bytes failed\n", glb.out_size_d);
530 if (dequantize_output(output_file, output)) {
535 output_final = output_file;
536 out_size_final = glb.out_size_d;
539 if (glb.opt.output_name) {
540 if (write_file(glb.opt.output_name, output_final, out_size_final)) {
546 if (glb.opt.reference_name)
547 reference_file = read_file(glb.opt.reference_name, &reference_file_size);
549 if (reference_file) {
550 if (out_size_final != reference_file_size) {
551 ODPH_ERR(
"Output size mismatch: %" PRIu64
552 " differs from reference file size %" PRIu64
"\n",
553 out_size_final, reference_file_size);
558 if (memcmp(reference_file, output_final, out_size_final)) {
559 ODPH_ERR(
"Output differs from reference\n");
562 printf(
"Output matches reference\n");
567 ODPH_ERR(
"odp_ml_model_unload() failed\n");
573 ODPH_ERR(
"odp_ml_model_destroy() failed\n");
583 free(reference_file);
586 ODPH_ERR(
"Local term failed\n");
591 ODPH_ERR(
"Global term failed\n");
int odp_init_local(odp_instance_t instance, odp_thread_type_t thr_type)
Thread local ODP initialization.
int odp_init_global(odp_instance_t *instance, const odp_init_t *params, const odp_platform_init_t *platform_params)
Global ODP initialization.
int odp_term_local(void)
Thread local ODP termination.
int odp_term_global(odp_instance_t instance)
Global ODP termination.
uint64_t odp_instance_t
ODP instance ID.
void odp_ml_run_param_init(odp_ml_run_param_t *param)
Initialize model run parameters.
void odp_ml_model_param_init(odp_ml_model_param_t *param)
Initialize ML model parameters.
void odp_ml_config_init(odp_ml_config_t *config)
Initialize ML configuration parameters.
void odp_ml_fp32_to_fp16(uint16_t *dst_fp16, const float *src_fp32, uint32_t num)
Quantize 32-bit float to 16-bit float.
void odp_ml_fp32_to_int8(int8_t *dst_i8, const float *src_fp32, uint32_t num, float scale, int8_t zerop)
Quantize 32-bit float to int8_t.
#define ODP_ML_DIM_DYNAMIC
Dimension size is dynamic.
void odp_ml_fp32_from_fp16(float *dst_fp32, const uint16_t *src_fp16, uint32_t num)
De-quantize 32-bit float from 16-bit float.
void odp_ml_fp32_from_uint8(float *dst_fp32, const uint8_t *src_u8, uint32_t num, float scale, uint8_t zerop)
De-quantize 32-bit float from uint8_t.
#define ODP_ML_COMPL_MODE_SYNC
Synchronous operation.
int odp_ml_config(const odp_ml_config_t *config)
Configure ML offload.
#define ODP_ML_MODEL_INVALID
Invalid ML model.
int odp_ml_run(odp_ml_model_t model, const odp_ml_data_t *data, const odp_ml_run_param_t *param)
Run the model in synchronous mode.
uint32_t odp_ml_model_output_info(odp_ml_model_t model, odp_ml_output_info_t info[], uint32_t num)
Retrieve model output information.
odp_ml_model_t odp_ml_model_create(const char *name, const odp_ml_model_param_t *param)
Create an ML model.
void odp_ml_fp32_to_uint8(uint8_t *dst_u8, const float *src_fp32, uint32_t num, float scale, uint8_t zerop)
Quantize 32-bit float to uint8_t.
int odp_ml_model_load(odp_ml_model_t model, odp_ml_load_result_t *result)
Load ML model.
uint32_t odp_ml_model_input_info(odp_ml_model_t model, odp_ml_input_info_t info[], uint32_t num)
Retrieve model input information.
int odp_ml_model_unload(odp_ml_model_t model, odp_ml_load_result_t *result)
Unload ML model.
void odp_ml_model_print(odp_ml_model_t model)
Print debug information about the model.
int odp_ml_capability(odp_ml_capability_t *capa)
Query ML capabilities.
int odp_ml_model_info(odp_ml_model_t model, odp_ml_model_info_t *info)
Retrieve model information.
int odp_ml_model_destroy(odp_ml_model_t model)
Destroy an ML model.
void odp_ml_fp32_from_int8(float *dst_fp32, const int8_t *src_i8, uint32_t num, float scale, int8_t zerop)
De-quantize 32-bit float from int8_t.
@ ODP_ML_SHAPE_BATCH
Dynamic batch size.
@ ODP_ML_DATA_TYPE_FP16
16-bit floating point number
@ ODP_ML_DATA_TYPE_UINT8
8-bit unsigned integer
@ ODP_ML_DATA_TYPE_INT8
8-bit integer
@ ODP_THREAD_CONTROL
Control thread.
Machine learning capabilities.
Machine learning configuration parameters.
uint64_t max_model_size
Maximum model binary size in bytes.
odp_ml_compl_mode_t load_mode_mask
Load / unload completion modes.
odp_ml_compl_mode_t run_mode_mask
Run completion modes.
Model input / output data segment.
void * addr
Segment start address.
uint64_t size
Segment size in bytes.
Model input / output data for a model inference run.
odp_ml_data_seg_t * input_seg
Model input data segments.
Machine learning model parameters.
uint64_t size
Size of the model binary in bytes.
void * model
Model binary.
Model output information.
odp_ml_data_type_t data_type
Model output data type.
odp_ml_shape_info_t shape
Model output data shape.
uint32_t data_type_size
Size of model output data type in bytes.
char name[ODP_ML_MODEL_IO_NAME_LEN]
Model output name.
Parameters for model run.
uint32_t batch_size
Batch size.
Model input / output data shape information.
uint32_t dim_min[ODP_ML_MAX_DIMS]
Minimum dimension sizes.
odp_ml_shape_type_t type
Shape type.
uint32_t dim_max[ODP_ML_MAX_DIMS]
Maximum dimension sizes.
uint32_t num_dim
Number of dimensions.
uint32_t dim[ODP_ML_MAX_DIMS]
Dimension sizes.