API Reference Manual  1.46.0
odp_dma_perf.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright (c) 2021-2024 Nokia
3  */
4 
16 #ifndef _GNU_SOURCE
17 #define _GNU_SOURCE
18 #endif
19 
20 #include <inttypes.h>
21 #include <stdlib.h>
22 #include <signal.h>
23 #include <stdint.h>
24 #include <unistd.h>
25 
26 #include <odp_api.h>
27 #include <odp/helper/odph_api.h>
28 
29 #include <export_results.h>
30 
31 #define EXIT_NOT_SUP 2
32 #define PROG_NAME "odp_dma_perf"
33 
34 enum {
35  SYNC_DMA = 0U,
36  ASYNC_DMA,
37  SW_COPY
38 };
39 
40 enum {
41  DENSE_PACKET = 0U,
42  SPARSE_PACKET,
43  DENSE_MEMORY,
44  SPARSE_MEMORY
45 };
46 
47 enum {
48  POLL = 0U,
49  EVENT
50 };
51 
52 enum {
53  SINGLE = 0U,
54  MANY
55 };
56 
57 #define DEF_TRS_TYPE SYNC_DMA
58 #define DEF_SEG_CNT 1U
59 #define DEF_LEN 1024U
60 #define DEF_SEG_TYPE DENSE_PACKET
61 #define DEF_MODE POLL
62 #define DEF_INFLIGHT 1U
63 #define DEF_TIME 10U
64 #define DEF_WORKERS 1U
65 #define DEF_POLICY SINGLE
66 
67 #define MAX_SEGS 1024U
68 #define MAX_WORKERS 32
69 #define MAX_MEMORY (256U * 1024U * 1024U)
70 
71 #define GIGAS 1000000000
72 #define MEGAS 1000000
73 #define KILOS 1000
74 
75 #define DATA 0xAA
76 
77 typedef enum {
78  PRS_OK,
79  PRS_NOK,
80  PRS_TERM,
81  PRS_NOT_SUP
82 } parse_result_t;
83 
84 typedef struct {
85  uint64_t completed;
86  uint64_t start_errs;
87  uint64_t poll_errs;
88  uint64_t scheduler_timeouts;
89  uint64_t transfer_errs;
90  uint64_t data_errs;
91  uint64_t tot_tm;
92  uint64_t trs_tm;
93  uint64_t max_trs_tm;
94  uint64_t min_trs_tm;
95  uint64_t start_cc;
96  uint64_t max_start_cc;
97  uint64_t min_start_cc;
98  uint64_t wait_cc;
99  uint64_t max_wait_cc;
100  uint64_t min_wait_cc;
101  uint64_t trs_cc;
102  uint64_t max_trs_cc;
103  uint64_t min_trs_cc;
104  uint64_t start_cnt;
105  uint64_t wait_cnt;
106  uint64_t trs_poll_cnt;
107  uint64_t trs_cnt;
108 } stats_t;
109 
110 typedef struct {
111  odp_dma_transfer_param_t trs_param;
112  odp_dma_compl_param_t compl_param;
113  odp_ticketlock_t lock;
114  odp_time_t trs_start_tm;
115  uint64_t trs_start_cc;
116  uint64_t trs_poll_cnt;
117  odp_bool_t is_running;
118 } trs_info_t;
119 
120 typedef struct sd_s sd_t;
121 typedef void (*ver_fn_t)(trs_info_t *info, stats_t *stats);
122 
123 typedef struct ODP_ALIGNED_CACHE sd_s {
124  struct {
125  trs_info_t infos[MAX_SEGS];
126  odp_dma_seg_t src_seg[MAX_SEGS];
127  odp_dma_seg_t dst_seg[MAX_SEGS];
128  odp_dma_t handle;
129  odp_pool_t pool;
130  odp_queue_t compl_q;
131  uint32_t num_in_segs;
132  uint32_t num_out_segs;
133  uint32_t src_seg_len;
134  uint32_t dst_seg_len;
135  uint32_t num_inflight;
136  uint8_t trs_type;
137  uint8_t compl_mode;
138  } dma;
139 
140  struct {
141  odp_packet_t src_pkt[MAX_SEGS];
142  odp_packet_t dst_pkt[MAX_SEGS];
143  odp_pool_t src_pool;
144  odp_pool_t dst_pool;
145  odp_shm_t src_shm;
146  odp_shm_t dst_shm;
147  void *src;
148  void *dst;
149  void *src_high;
150  void *dst_high;
151  void *cur_src;
152  void *cur_dst;
153  uint64_t shm_size;
154  uint8_t seg_type;
155  } seg;
156 
158  /* Prepare single transfer. */
159  void (*prep_trs_fn)(sd_t *sd, trs_info_t *info);
160  /* Verify single transfer. */
161  ver_fn_t ver_fn;
162 } sd_t;
163 
164 typedef struct prog_config_s prog_config_t;
165 
166 typedef struct ODP_ALIGNED_CACHE {
167  stats_t stats;
168  prog_config_t *prog_config;
169  sd_t *sd;
170 } thread_config_t;
171 
172 typedef struct {
173  /* Configure DMA session specific resources. */
174  odp_bool_t (*session_cfg_fn)(sd_t *sd);
175  /* Setup transfer elements (memory/packet segments). */
176  odp_bool_t (*setup_fn)(sd_t *sd);
177  /* Configure DMA transfers (segment addresses etc.). */
178  void (*trs_fn)(sd_t *sd);
179  /* Configure transfer completion resources (transfer IDs, events etc.). */
180  odp_bool_t (*compl_fn)(sd_t *sd);
181  /* Initiate required initial transfers. */
182  odp_bool_t (*bootstrap_fn)(sd_t *sd);
183  /* Wait and handle finished transfer. */
184  void (*wait_fn)(sd_t *sd, stats_t *stats);
185  /* Handle all unfinished transfers after main test has been stopped. */
186  void (*drain_fn)(sd_t *sd);
187  /* Free any resources that might have been allocated during setup phase. */
188  void (*free_fn)(const sd_t *sd);
189 } test_api_t;
190 
191 typedef struct prog_config_s {
192  odph_thread_t threads[MAX_WORKERS];
193  thread_config_t thread_config[MAX_WORKERS];
194  sd_t sds[MAX_WORKERS];
195  test_api_t api;
196  odp_atomic_u32_t is_running;
198  odp_barrier_t init_barrier;
199  odp_barrier_t term_barrier;
200  odp_dma_compl_mode_t compl_mode_mask;
201  odp_pool_t src_pool;
202  odp_pool_t dst_pool;
203  uint64_t shm_size;
204  uint32_t num_in_segs;
205  uint32_t num_out_segs;
206  uint32_t src_seg_len;
207  uint32_t dst_seg_len;
208  uint32_t num_inflight;
209  double time_sec;
210  uint32_t num_sessions;
211  uint32_t src_cache_size;
212  uint32_t dst_cache_size;
213  int num_workers;
214  odp_bool_t is_verify;
215  uint8_t trs_type;
216  uint8_t seg_type;
217  uint8_t compl_mode;
218  uint8_t policy;
219  test_common_options_t common_options;
220 } prog_config_t;
221 
222 static prog_config_t *prog_conf;
223 
224 static const int mode_map[] = { ODP_DMA_COMPL_POLL, ODP_DMA_COMPL_EVENT };
225 
226 static void terminate(int signal ODP_UNUSED)
227 {
228  odp_atomic_store_u32(&prog_conf->is_running, 0U);
229 }
230 
231 static void init_config(prog_config_t *config)
232 {
233  sd_t *sd;
234  trs_info_t *info;
235  stats_t *stats;
236 
237  memset(config, 0, sizeof(*config));
238  config->compl_mode_mask |= ODP_DMA_COMPL_SYNC;
239  config->src_pool = ODP_POOL_INVALID;
240  config->dst_pool = ODP_POOL_INVALID;
241  config->num_in_segs = DEF_SEG_CNT;
242  config->num_out_segs = DEF_SEG_CNT;
243  config->src_seg_len = DEF_LEN;
244  config->num_inflight = DEF_INFLIGHT;
245  config->time_sec = DEF_TIME;
246  config->num_workers = DEF_WORKERS;
247  config->trs_type = DEF_TRS_TYPE;
248  config->seg_type = DEF_SEG_TYPE;
249  config->compl_mode = DEF_MODE;
250  config->policy = DEF_POLICY;
251 
252  for (uint32_t i = 0U; i < MAX_WORKERS; ++i) {
253  sd = &config->sds[i];
254  stats = &config->thread_config[i].stats;
255  memset(sd, 0, sizeof(*sd));
256 
257  for (uint32_t j = 0U; j < MAX_SEGS; ++j) {
258  info = &sd->dma.infos[j];
259  info->compl_param.transfer_id = ODP_DMA_TRANSFER_ID_INVALID;
260  info->compl_param.event = ODP_EVENT_INVALID;
261  info->compl_param.queue = ODP_QUEUE_INVALID;
262  odp_ticketlock_init(&info->lock);
263  sd->seg.src_pkt[j] = ODP_PACKET_INVALID;
264  sd->seg.dst_pkt[j] = ODP_PACKET_INVALID;
265  }
266 
267  sd->dma.handle = ODP_DMA_INVALID;
268  sd->dma.pool = ODP_POOL_INVALID;
269  sd->dma.compl_q = ODP_QUEUE_INVALID;
270  sd->seg.src_shm = ODP_SHM_INVALID;
271  sd->seg.dst_shm = ODP_SHM_INVALID;
272  sd->grp = ODP_SCHED_GROUP_INVALID;
273  stats->min_trs_tm = UINT64_MAX;
274  stats->min_start_cc = UINT64_MAX;
275  stats->min_wait_cc = UINT64_MAX;
276  stats->min_trs_cc = UINT64_MAX;
277  }
278 }
279 
280 static void print_usage(void)
281 {
282  printf("\n"
283  "DMA performance test. Load DMA subsystem from several workers.\n"
284  "\n"
285  "Usage: " PROG_NAME " [OPTIONS]\n"
286  "\n"
287  " E.g. " PROG_NAME "\n"
288  " " PROG_NAME " -s 10240\n"
289  " " PROG_NAME " -t 0 -i 1 -o 1 -s 51200 -S 2 -f 64 -T 10\n"
290  " " PROG_NAME " -t 1 -i 10 -o 10 -s 4096 -S 0 -m 1 -f 10 -c 4 -p 1\n"
291  " " PROG_NAME " -t 2 -i 10 -o 1 -s 1024 -S 3 -f 10 -c 4 -p 1\n"
292  "\n"
293  "Optional OPTIONS:\n"
294  "\n"
295  " -t, --trs_type Transfer type for test data. %u by default.\n"
296  " Types:\n"
297  " 0: synchronous DMA\n"
298  " 1: asynchronous DMA\n"
299  " 2: SW memory copy\n"
300  " -i, --num_in_seg Number of input segments to transfer. 0 means the maximum\n"
301  " count supported by the implementation. %u by default.\n"
302  " -o, --num_out_seg Number of output segments to transfer to. 0 means the\n"
303  " maximum count supported by the implementation. %u by\n"
304  " default.\n"
305  " -s, --in_seg_len Input segment length in bytes. 0 length means the maximum\n"
306  " segment length supported by the implementation. The actual\n"
307  " maximum might be limited by what type of data is\n"
308  " transferred (packet/memory). %u by default.\n"
309  " -S, --in_seg_type Input segment data type. Dense types can load the DMA\n"
310  " subsystem more heavily as transfer resources are\n"
311  " pre-configured. Sparse types might on the other hand\n"
312  " reflect application usage more precisely as transfer\n"
313  " resources are configured in runtime. %u by default.\n"
314  " Types:\n"
315  " 0: dense packet\n"
316  " 1: sparse packet\n"
317  " 2: dense memory\n"
318  " 3: sparse memory\n"
319  " -m, --compl_mode Completion mode for transfers. %u by default.\n"
320  " Modes:\n"
321  " 0: poll\n"
322  " 1: event\n"
323  " -f, --max_in_flight Maximum transfers in-flight per session. 0 means the\n"
324  " maximum supported by the tester/implementation. %u by\n"
325  " default.\n"
326  " -T, --time_sec Time in seconds to run. 0 means infinite. %u by default.\n"
327  " -c, --worker_count Amount of workers. %u by default.\n"
328  " -p, --policy DMA session policy. %u by default.\n"
329  " Policies:\n"
330  " 0: One session shared by workers\n"
331  " 1: One session per worker\n"
332  " -v, --verify Verify transfers. Checks correctness of destination data\n"
333  " after successful transfers.\n"
334  " -h, --help This help.\n"
335  "\n", DEF_TRS_TYPE, DEF_SEG_CNT, DEF_SEG_CNT, DEF_LEN, DEF_SEG_TYPE, DEF_MODE,
336  DEF_INFLIGHT, DEF_TIME, DEF_WORKERS, DEF_POLICY);
337 }
338 
339 static parse_result_t check_options(prog_config_t *config)
340 {
341  int max_workers;
342  odp_dma_capability_t dma_capa;
343  uint32_t num_sessions, max_seg_len, max_trs, max_in, max_out, max_segs;
344  odp_schedule_capability_t sched_capa;
345  odp_pool_capability_t pool_capa;
346  odp_shm_capability_t shm_capa;
347  uint64_t shm_size = 0U;
348 
349  if (config->trs_type != SYNC_DMA && config->trs_type != ASYNC_DMA &&
350  config->trs_type != SW_COPY) {
351  ODPH_ERR("Invalid transfer type: %u\n", config->trs_type);
352  return PRS_NOK;
353  }
354 
355  if (config->seg_type != DENSE_PACKET && config->seg_type != SPARSE_PACKET &&
356  config->seg_type != DENSE_MEMORY && config->seg_type != SPARSE_MEMORY) {
357  ODPH_ERR("Invalid segment type: %u\n", config->seg_type);
358  return PRS_NOK;
359  }
360 
361  max_workers = ODPH_MIN(odp_thread_count_max() - 1, MAX_WORKERS);
362 
363  if (config->num_workers <= 0 || config->num_workers > max_workers) {
364  ODPH_ERR("Invalid thread count: %d (min: 1, max: %d)\n", config->num_workers,
365  max_workers);
366  return PRS_NOK;
367  }
368 
369  if (config->policy != SINGLE && config->policy != MANY) {
370  ODPH_ERR("Invalid DMA session policy: %u\n", config->policy);
371  return PRS_NOK;
372  }
373 
374  if (odp_dma_capability(&dma_capa) < 0) {
375  ODPH_ERR("Error querying DMA capabilities\n");
376  return PRS_NOK;
377  }
378 
379  num_sessions = config->policy == SINGLE ? 1 : config->num_workers;
380 
381  if (num_sessions > dma_capa.max_sessions) {
382  ODPH_ERR("Not enough DMA sessions supported: %u (max: %u)\n", num_sessions,
383  dma_capa.max_sessions);
384  return PRS_NOT_SUP;
385  }
386 
387  config->num_sessions = num_sessions;
388 
389  if (config->num_in_segs == 0U)
390  config->num_in_segs = dma_capa.max_src_segs;
391 
392  if (config->num_out_segs == 0U)
393  config->num_out_segs = dma_capa.max_dst_segs;
394 
395  if (config->num_in_segs > dma_capa.max_src_segs ||
396  config->num_out_segs > dma_capa.max_dst_segs ||
397  config->num_in_segs + config->num_out_segs > dma_capa.max_segs) {
398  ODPH_ERR("Unsupported segment count configuration, in: %u, out: %u (max in: %u, "
399  "max out: %u, max tot: %u)\n", config->num_in_segs, config->num_out_segs,
400  dma_capa.max_src_segs, dma_capa.max_dst_segs, dma_capa.max_segs);
401  return PRS_NOT_SUP;
402  }
403 
404  if (config->src_seg_len == 0U)
405  config->src_seg_len = dma_capa.max_seg_len;
406 
407  config->dst_seg_len = config->src_seg_len * config->num_in_segs /
408  config->num_out_segs + config->src_seg_len *
409  config->num_in_segs % config->num_out_segs;
410 
411  max_seg_len = ODPH_MAX(config->src_seg_len, config->dst_seg_len);
412 
413  if (max_seg_len > dma_capa.max_seg_len) {
414  ODPH_ERR("Unsupported total DMA segment length: %u (max: %u)\n", max_seg_len,
415  dma_capa.max_seg_len);
416  return PRS_NOT_SUP;
417  }
418 
419  if (config->trs_type == ASYNC_DMA) {
420  if (config->compl_mode != POLL && config->compl_mode != EVENT) {
421  ODPH_ERR("Invalid completion mode: %u\n", config->compl_mode);
422  return PRS_NOK;
423  }
424 
425  if (config->compl_mode == POLL && (dma_capa.compl_mode_mask & ODP_DMA_COMPL_POLL)
426  == 0U) {
427  ODPH_ERR("Unsupported DMA completion mode, poll\n");
428  return PRS_NOT_SUP;
429  }
430 
431  if (config->compl_mode == EVENT) {
432  if (config->num_sessions > dma_capa.pool.max_pools) {
433  ODPH_ERR("Unsupported amount of completion pools: %u (max: %u)\n",
434  config->num_sessions, dma_capa.pool.max_pools);
435  return PRS_NOT_SUP;
436  }
437 
438  if ((dma_capa.compl_mode_mask & ODP_DMA_COMPL_EVENT) == 0U) {
439  ODPH_ERR("Unsupported DMA completion mode, event\n");
440  return PRS_NOT_SUP;
441  }
442 
443  if (dma_capa.queue_type_sched == 0) {
444  ODPH_ERR("Unsupported DMA queueing type, scheduled\n");
445  return PRS_NOT_SUP;
446  }
447 
448  if (config->num_inflight > dma_capa.pool.max_num) {
449  ODPH_ERR("Unsupported amount of completion events: %u (max: %u)\n",
450  config->num_inflight, dma_capa.pool.max_num);
451  return PRS_NOT_SUP;
452  }
453 
454  if (odp_schedule_capability(&sched_capa) < 0) {
455  ODPH_ERR("Error querying scheduler capabilities\n");
456  return PRS_NOK;
457  }
458 
459  if (config->num_sessions > sched_capa.max_groups - 3U) {
460  ODPH_ERR("Unsupported amount of scheduler groups: %u (max: %u)\n",
461  config->num_sessions, sched_capa.max_groups - 3U);
462  return PRS_NOT_SUP;
463  }
464  }
465 
466  config->compl_mode_mask |= mode_map[config->compl_mode];
467  }
468 
469  max_trs = ODPH_MIN(dma_capa.max_transfers, MAX_SEGS);
470 
471  if (config->num_inflight == 0U)
472  config->num_inflight = max_trs;
473 
474  if (config->num_inflight > max_trs) {
475  ODPH_ERR("Unsupported amount of in-flight DMA transfers: %u (max: %u)\n",
476  config->num_inflight, max_trs);
477  return PRS_NOT_SUP;
478  }
479 
480  max_in = config->num_in_segs * config->num_inflight;
481  max_out = config->num_out_segs * config->num_inflight;
482  max_segs = ODPH_MAX(max_in, max_out);
483 
484  if (max_segs > MAX_SEGS) {
485  ODPH_ERR("Unsupported input/output * inflight segment combination: %u (max: %u)\n",
486  max_segs, MAX_SEGS);
487  return PRS_NOT_SUP;
488  }
489 
490  if (config->seg_type == DENSE_PACKET || config->seg_type == SPARSE_PACKET) {
491  if (odp_pool_capability(&pool_capa) < 0) {
492  ODPH_ERR("Error querying pool capabilities\n");
493  return PRS_NOK;
494  }
495 
496  if (pool_capa.pkt.max_pools < 2U) {
497  ODPH_ERR("Unsupported amount of packet pools: 2 (max: %u)\n",
498  pool_capa.pkt.max_pools);
499  return PRS_NOT_SUP;
500  }
501 
502  if (pool_capa.pkt.max_len != 0U && max_seg_len > pool_capa.pkt.max_len) {
503  ODPH_ERR("Unsupported packet size: %u (max: %u)\n", max_seg_len,
504  pool_capa.pkt.max_len);
505  return PRS_NOT_SUP;
506  }
507 
508  if (pool_capa.pkt.max_num != 0U &&
509  max_segs * num_sessions > pool_capa.pkt.max_num) {
510  ODPH_ERR("Unsupported amount of packet pool elements: %u (max: %u)\n",
511  max_segs * num_sessions, pool_capa.pkt.max_num);
512  return PRS_NOT_SUP;
513  }
514 
515  config->src_cache_size = ODPH_MIN(ODPH_MAX(max_in, pool_capa.pkt.min_cache_size),
516  pool_capa.pkt.max_cache_size);
517  config->dst_cache_size = ODPH_MIN(ODPH_MAX(max_out, pool_capa.pkt.min_cache_size),
518  pool_capa.pkt.max_cache_size);
519  } else {
520  /* If SHM implementation capabilities are very puny, program will have already
521  * failed when reserving memory for global program configuration. */
522  if (odp_shm_capability(&shm_capa) < 0) {
523  ODPH_ERR("Error querying SHM capabilities\n");
524  return PRS_NOK;
525  }
526 
527  /* One block for program configuration, one for source memory and one for
528  * destination memory. */
529  if (shm_capa.max_blocks < 3U) {
530  ODPH_ERR("Unsupported amount of SHM blocks: 3 (max: %u)\n",
531  shm_capa.max_blocks);
532  return PRS_NOT_SUP;
533  }
534 
535  shm_size = (uint64_t)config->dst_seg_len * config->num_out_segs *
536  config->num_inflight;
537 
538  if (shm_capa.max_size != 0U && shm_size > shm_capa.max_size) {
539  ODPH_ERR("Unsupported total SHM block size: %" PRIu64 ""
540  " (max: %" PRIu64 ")\n", shm_size, shm_capa.max_size);
541  return PRS_NOT_SUP;
542  }
543 
544  if (config->seg_type == SPARSE_MEMORY && shm_size < MAX_MEMORY)
545  shm_size = shm_capa.max_size != 0U ?
546  ODPH_MIN(shm_capa.max_size, MAX_MEMORY) : MAX_MEMORY;
547 
548  config->shm_size = shm_size;
549  }
550 
551  return PRS_OK;
552 }
553 
554 static parse_result_t parse_options(int argc, char **argv, prog_config_t *config)
555 {
556  int opt;
557  static const struct option longopts[] = {
558  { "trs_type", required_argument, NULL, 't' },
559  { "num_in_seg", required_argument, NULL, 'i' },
560  { "num_out_seg", required_argument, NULL, 'o' },
561  { "in_seg_len", required_argument, NULL, 's' },
562  { "in_seg_type", required_argument, NULL, 'S' },
563  { "compl_mode", required_argument, NULL, 'm' },
564  { "max_in_flight", required_argument, NULL, 'f'},
565  { "time_sec", required_argument, NULL, 'T' },
566  { "worker_count", required_argument, NULL, 'c' },
567  { "policy", required_argument, NULL, 'p' },
568  { "verify", no_argument, NULL, 'v' },
569  { "help", no_argument, NULL, 'h' },
570  { NULL, 0, NULL, 0 }
571  };
572  static const char *shortopts = "t:i:o:s:S:m:f:T:c:p:vh";
573 
574  init_config(config);
575 
576  while (1) {
577  opt = getopt_long(argc, argv, shortopts, longopts, NULL);
578 
579  if (opt == -1)
580  break;
581 
582  switch (opt) {
583  case 't':
584  config->trs_type = atoi(optarg);
585  break;
586  case 'i':
587  config->num_in_segs = atoi(optarg);
588  break;
589  case 'o':
590  config->num_out_segs = atoi(optarg);
591  break;
592  case 's':
593  config->src_seg_len = atoi(optarg);
594  break;
595  case 'S':
596  config->seg_type = atoi(optarg);
597  break;
598  case 'm':
599  config->compl_mode = atoi(optarg);
600  break;
601  case 'f':
602  config->num_inflight = atoi(optarg);
603  break;
604  case 'T':
605  config->time_sec = atof(optarg);
606  break;
607  case 'c':
608  config->num_workers = atoi(optarg);
609  break;
610  case 'p':
611  config->policy = atoi(optarg);
612  break;
613  case 'v':
614  config->is_verify = true;
615  break;
616  case 'h':
617  print_usage();
618  return PRS_TERM;
619  case '?':
620  default:
621  print_usage();
622  return PRS_NOK;
623  }
624  }
625 
626  return check_options(config);
627 }
628 
629 static parse_result_t setup_program(int argc, char **argv, prog_config_t *config)
630 {
631  struct sigaction action = { .sa_handler = terminate };
632 
633  if (sigemptyset(&action.sa_mask) == -1 || sigaddset(&action.sa_mask, SIGINT) == -1 ||
634  sigaddset(&action.sa_mask, SIGTERM) == -1 ||
635  sigaddset(&action.sa_mask, SIGHUP) == -1 || sigaction(SIGINT, &action, NULL) == -1 ||
636  sigaction(SIGTERM, &action, NULL) == -1 || sigaction(SIGHUP, &action, NULL) == -1) {
637  ODPH_ERR("Error installing signal handler\n");
638  return PRS_NOK;
639  }
640 
641  return parse_options(argc, argv, config);
642 }
643 
644 static odp_pool_t get_src_packet_pool(void)
645 {
646  odp_pool_param_t param;
647  uint32_t num_pkts_per_worker = ODPH_MAX(prog_conf->num_inflight * prog_conf->num_in_segs,
648  prog_conf->src_cache_size);
649 
650  if (prog_conf->src_pool != ODP_POOL_INVALID)
651  return prog_conf->src_pool;
652 
653  odp_pool_param_init(&param);
654  param.type = ODP_POOL_PACKET;
655  param.pkt.num = num_pkts_per_worker * prog_conf->num_workers;
656  param.pkt.len = prog_conf->src_seg_len;
657  param.pkt.seg_len = prog_conf->src_seg_len;
658  param.pkt.cache_size = prog_conf->src_cache_size;
659  prog_conf->src_pool = odp_pool_create(PROG_NAME "_src_pkts", &param);
660 
661  return prog_conf->src_pool;
662 }
663 
664 static odp_pool_t get_dst_packet_pool(void)
665 {
666  odp_pool_param_t param;
667  uint32_t num_pkts_per_worker = ODPH_MAX(prog_conf->num_inflight * prog_conf->num_out_segs,
668  prog_conf->dst_cache_size);
669 
670  if (prog_conf->dst_pool != ODP_POOL_INVALID)
671  return prog_conf->dst_pool;
672 
673  odp_pool_param_init(&param);
674  param.type = ODP_POOL_PACKET;
675  param.pkt.num = num_pkts_per_worker * prog_conf->num_workers;
676  param.pkt.len = prog_conf->dst_seg_len;
677  param.pkt.seg_len = prog_conf->dst_seg_len;
678  param.pkt.cache_size = prog_conf->dst_cache_size;
679  prog_conf->dst_pool = odp_pool_create(PROG_NAME "_dst_pkts", &param);
680 
681  return prog_conf->dst_pool;
682 }
683 
684 static odp_bool_t configure_packets(sd_t *sd)
685 {
686  sd->seg.src_pool = get_src_packet_pool();
687 
688  if (sd->seg.src_pool == ODP_POOL_INVALID) {
689  ODPH_ERR("Error creating source packet pool\n");
690  return false;
691  }
692 
693  sd->seg.dst_pool = get_dst_packet_pool();
694 
695  if (sd->seg.dst_pool == ODP_POOL_INVALID) {
696  ODPH_ERR("Error creating destination packet pool\n");
697  return false;
698  }
699 
700  return true;
701 }
702 
703 static odp_bool_t allocate_packets(sd_t *sd)
704 {
705  for (uint32_t i = 0U; i < sd->dma.num_inflight * sd->dma.num_in_segs; ++i) {
706  sd->seg.src_pkt[i] = odp_packet_alloc(sd->seg.src_pool, sd->dma.src_seg_len);
707 
708  if (sd->seg.src_pkt[i] == ODP_PACKET_INVALID) {
709  ODPH_ERR("Error allocating source segment packets\n");
710  return false;
711  }
712  }
713 
714  for (uint32_t i = 0U; i < sd->dma.num_inflight * sd->dma.num_out_segs; ++i) {
715  sd->seg.dst_pkt[i] = odp_packet_alloc(sd->seg.dst_pool, sd->dma.dst_seg_len);
716 
717  if (sd->seg.dst_pkt[i] == ODP_PACKET_INVALID) {
718  ODPH_ERR("Error allocating destination segment packets\n");
719  return false;
720  }
721  }
722 
723  return true;
724 }
725 
726 static odp_bool_t setup_packet_segments(sd_t *sd)
727 {
728  return configure_packets(sd) &&
729  (sd->seg.seg_type == DENSE_PACKET ? allocate_packets(sd) : true);
730 }
731 
732 static inline void fill_data(uint8_t *data, uint32_t len)
733 {
734  memset(data, DATA, len);
735 }
736 
737 static void configure_packet_transfer(sd_t *sd)
738 {
739  odp_dma_seg_t *start_src_seg, *start_dst_seg, *seg;
740  uint32_t k = 0U, z = 0U, len;
741  odp_packet_t pkt;
743 
744  for (uint32_t i = 0U; i < sd->dma.num_inflight; ++i) {
745  start_src_seg = &sd->dma.src_seg[k];
746  start_dst_seg = &sd->dma.dst_seg[z];
747 
748  for (uint32_t j = 0U; j < sd->dma.num_in_segs; ++j, ++k) {
749  pkt = sd->seg.src_pkt[k];
750  seg = &start_src_seg[j];
751  seg->packet = pkt;
752  seg->offset = 0U;
753  seg->len = sd->dma.src_seg_len;
754 
755  if (seg->packet != ODP_PACKET_INVALID)
756  fill_data(odp_packet_data(seg->packet), seg->len);
757  }
758 
759  len = sd->dma.num_in_segs * sd->dma.src_seg_len;
760 
761  for (uint32_t j = 0U; j < sd->dma.num_out_segs; ++j, ++z) {
762  pkt = sd->seg.dst_pkt[z];
763  seg = &start_dst_seg[j];
764  seg->packet = pkt;
765  seg->offset = 0U;
766  seg->len = ODPH_MIN(len, sd->dma.dst_seg_len);
767  len -= sd->dma.dst_seg_len;
768  }
769 
770  param = &sd->dma.infos[i].trs_param;
774  param->num_src = sd->dma.num_in_segs;
775  param->num_dst = sd->dma.num_out_segs;
776  param->src_seg = start_src_seg;
777  param->dst_seg = start_dst_seg;
778  }
779 }
780 
781 static void free_packets(const sd_t *sd)
782 {
783  for (uint32_t i = 0U; i < sd->dma.num_inflight * sd->dma.num_in_segs; ++i) {
784  if (sd->seg.src_pkt[i] != ODP_PACKET_INVALID)
785  odp_packet_free(sd->seg.src_pkt[i]);
786  }
787 
788  for (uint32_t i = 0U; i < sd->dma.num_inflight * sd->dma.num_out_segs; ++i) {
789  if (sd->seg.dst_pkt[i] != ODP_PACKET_INVALID)
790  odp_packet_free(sd->seg.dst_pkt[i]);
791  }
792 }
793 
794 static odp_bool_t allocate_memory(sd_t *sd)
795 {
796  sd->seg.src_shm = odp_shm_reserve(PROG_NAME "_src_shm", sd->seg.shm_size,
797  ODP_CACHE_LINE_SIZE, 0U);
798  sd->seg.dst_shm = odp_shm_reserve(PROG_NAME "_dst_shm", sd->seg.shm_size,
799  ODP_CACHE_LINE_SIZE, 0U);
800 
801  if (sd->seg.src_shm == ODP_SHM_INVALID || sd->seg.dst_shm == ODP_SHM_INVALID) {
802  ODPH_ERR("Error allocating SHM block\n");
803  return false;
804  }
805 
806  sd->seg.src = odp_shm_addr(sd->seg.src_shm);
807  sd->seg.dst = odp_shm_addr(sd->seg.dst_shm);
808 
809  if (sd->seg.src == NULL || sd->seg.dst == NULL) {
810  ODPH_ERR("Error resolving SHM block address\n");
811  return false;
812  }
813 
814  sd->seg.src_high = (uint8_t *)sd->seg.src + sd->seg.shm_size - sd->dma.src_seg_len;
815  sd->seg.dst_high = (uint8_t *)sd->seg.dst + sd->seg.shm_size - sd->dma.dst_seg_len;
816  sd->seg.cur_src = sd->seg.src;
817  sd->seg.cur_dst = sd->seg.dst;
818 
819  return true;
820 }
821 
822 static odp_bool_t setup_memory_segments(sd_t *sd)
823 {
824  return allocate_memory(sd);
825 }
826 
827 static void configure_address_transfer(sd_t *sd)
828 {
829  odp_dma_seg_t *start_src_seg, *start_dst_seg, *seg;
830  uint32_t k = 0U, z = 0U, len;
832 
833  for (uint32_t i = 0U; i < sd->dma.num_inflight; ++i) {
834  start_src_seg = &sd->dma.src_seg[k];
835  start_dst_seg = &sd->dma.dst_seg[z];
836 
837  for (uint32_t j = 0U; j < sd->dma.num_in_segs; ++j, ++k) {
838  seg = &start_src_seg[j];
839  seg->addr = sd->seg.seg_type == SPARSE_MEMORY ?
840  NULL : (uint8_t *)sd->seg.src + k * sd->dma.src_seg_len;
841  seg->len = sd->dma.src_seg_len;
842 
843  if (seg->addr != NULL)
844  fill_data(seg->addr, seg->len);
845  }
846 
847  len = sd->dma.num_in_segs * sd->dma.src_seg_len;
848 
849  for (uint32_t j = 0U; j < sd->dma.num_out_segs; ++j, ++z) {
850  seg = &start_dst_seg[j];
851  seg->addr = sd->seg.seg_type == SPARSE_MEMORY ?
852  NULL : (uint8_t *)sd->seg.dst + z * sd->dma.dst_seg_len;
853  seg->len = ODPH_MIN(len, sd->dma.dst_seg_len);
854  len -= sd->dma.dst_seg_len;
855  }
856 
857  param = &sd->dma.infos[i].trs_param;
861  param->num_src = sd->dma.num_in_segs;
862  param->num_dst = sd->dma.num_out_segs;
863  param->src_seg = start_src_seg;
864  param->dst_seg = start_dst_seg;
865  }
866 }
867 
868 static void free_memory(const sd_t *sd)
869 {
870  if (sd->seg.src_shm != ODP_SHM_INVALID)
871  (void)odp_shm_free(sd->seg.src_shm);
872 
873  if (sd->seg.dst_shm != ODP_SHM_INVALID)
874  (void)odp_shm_free(sd->seg.dst_shm);
875 }
876 
877 static void run_transfer(odp_dma_t handle, trs_info_t *info, stats_t *stats, ver_fn_t ver_fn)
878 {
879  odp_time_t start_tm, end_tm;
880  uint64_t start_cc, end_cc, trs_tm, trs_cc;
881  odp_dma_result_t res;
882  int ret;
883 
884  start_tm = odp_time_local_strict();
885  start_cc = odp_cpu_cycles();
886  ret = odp_dma_transfer(handle, &info->trs_param, &res);
887  end_cc = odp_cpu_cycles();
888  end_tm = odp_time_local_strict();
889 
890  if (odp_unlikely(ret <= 0)) {
891  ++stats->start_errs;
892  } else {
893  trs_tm = odp_time_diff_ns(end_tm, start_tm);
894  stats->max_trs_tm = ODPH_MAX(trs_tm, stats->max_trs_tm);
895  stats->min_trs_tm = ODPH_MIN(trs_tm, stats->min_trs_tm);
896  stats->trs_tm += trs_tm;
897  trs_cc = odp_cpu_cycles_diff(end_cc, start_cc);
898  stats->max_trs_cc = ODPH_MAX(trs_cc, stats->max_trs_cc);
899  stats->min_trs_cc = ODPH_MIN(trs_cc, stats->min_trs_cc);
900  stats->trs_cc += trs_cc;
901  ++stats->trs_cnt;
902  stats->max_start_cc = stats->max_trs_cc;
903  stats->min_start_cc = stats->min_trs_cc;
904  stats->start_cc += trs_cc;
905  ++stats->start_cnt;
906 
907  if (odp_unlikely(!res.success)) {
908  ++stats->transfer_errs;
909  } else {
910  ++stats->completed;
911 
912  if (ver_fn != NULL)
913  ver_fn(info, stats);
914  }
915  }
916 }
917 
918 static void run_transfers_mt_unsafe(sd_t *sd, stats_t *stats)
919 {
920  const uint32_t count = sd->dma.num_inflight;
921  odp_dma_t handle = sd->dma.handle;
922  trs_info_t *infos = sd->dma.infos, *info;
923 
924  for (uint32_t i = 0U; i < count; ++i) {
925  info = &infos[i];
926 
927  if (sd->prep_trs_fn != NULL)
928  sd->prep_trs_fn(sd, info);
929 
930  run_transfer(handle, info, stats, sd->ver_fn);
931  }
932 }
933 
934 static void run_transfers_mt_safe(sd_t *sd, stats_t *stats)
935 {
936  const uint32_t count = sd->dma.num_inflight;
937  odp_dma_t handle = sd->dma.handle;
938  trs_info_t *infos = sd->dma.infos, *info;
939 
940  for (uint32_t i = 0U; i < count; ++i) {
941  info = &infos[i];
942 
943  if (odp_ticketlock_trylock(&info->lock)) {
944  if (sd->prep_trs_fn != NULL)
945  sd->prep_trs_fn(sd, info);
946 
947  run_transfer(handle, info, stats, sd->ver_fn);
948  odp_ticketlock_unlock(&info->lock);
949  }
950  }
951 }
952 
953 static odp_bool_t configure_poll_compl(sd_t *sd)
954 {
955  odp_dma_compl_param_t *param;
956 
957  for (uint32_t i = 0U; i < sd->dma.num_inflight; ++i) {
958  param = &sd->dma.infos[i].compl_param;
959 
961  param->compl_mode = mode_map[sd->dma.compl_mode];
962  param->transfer_id = odp_dma_transfer_id_alloc(sd->dma.handle);
963 
965  ODPH_ERR("Error allocating transfer ID\n");
966  return false;
967  }
968  }
969 
970  return true;
971 }
972 
973 static void poll_transfer(sd_t *sd, trs_info_t *info, stats_t *stats)
974 {
975  uint64_t start_cc, end_cc, trs_tm, trs_cc, wait_cc, start_cc_diff;
976  odp_time_t start_tm;
977  odp_dma_t handle = sd->dma.handle;
978  odp_dma_result_t res;
979  int ret;
980 
981  if (info->is_running) {
982  start_cc = odp_cpu_cycles();
983  ret = odp_dma_transfer_done(handle, info->compl_param.transfer_id, &res);
984  end_cc = odp_cpu_cycles();
985 
986  if (odp_unlikely(ret < 0)) {
987  ++stats->poll_errs;
988  return;
989  }
990 
991  ++info->trs_poll_cnt;
992  wait_cc = odp_cpu_cycles_diff(end_cc, start_cc);
993  stats->max_wait_cc = ODPH_MAX(wait_cc, stats->max_wait_cc);
994  stats->min_wait_cc = ODPH_MIN(wait_cc, stats->min_wait_cc);
995  stats->wait_cc += wait_cc;
996  ++stats->wait_cnt;
997 
998  if (ret == 0)
999  return;
1000 
1001  trs_tm = odp_time_diff_ns(odp_time_global_strict(), info->trs_start_tm);
1002  stats->max_trs_tm = ODPH_MAX(trs_tm, stats->max_trs_tm);
1003  stats->min_trs_tm = ODPH_MIN(trs_tm, stats->min_trs_tm);
1004  stats->trs_tm += trs_tm;
1005  trs_cc = odp_cpu_cycles_diff(odp_cpu_cycles(), info->trs_start_cc);
1006  stats->max_trs_cc = ODPH_MAX(trs_cc, stats->max_trs_cc);
1007  stats->min_trs_cc = ODPH_MIN(trs_cc, stats->min_trs_cc);
1008  stats->trs_cc += trs_cc;
1009  stats->trs_poll_cnt += info->trs_poll_cnt;
1010  ++stats->trs_cnt;
1011 
1012  if (odp_unlikely(!res.success)) {
1013  ++stats->transfer_errs;
1014  } else {
1015  ++stats->completed;
1016 
1017  if (sd->ver_fn != NULL)
1018  sd->ver_fn(info, stats);
1019  }
1020 
1021  info->is_running = false;
1022  } else {
1023  if (sd->prep_trs_fn != NULL)
1024  sd->prep_trs_fn(sd, info);
1025 
1026  start_tm = odp_time_global_strict();
1027  start_cc = odp_cpu_cycles();
1028  ret = odp_dma_transfer_start(handle, &info->trs_param, &info->compl_param);
1029  end_cc = odp_cpu_cycles();
1030 
1031  if (odp_unlikely(ret <= 0)) {
1032  ++stats->start_errs;
1033  } else {
1034  info->trs_start_tm = start_tm;
1035  info->trs_start_cc = start_cc;
1036  info->trs_poll_cnt = 0U;
1037  start_cc_diff = odp_cpu_cycles_diff(end_cc, start_cc);
1038  stats->max_start_cc = ODPH_MAX(start_cc_diff, stats->max_start_cc);
1039  stats->min_start_cc = ODPH_MIN(start_cc_diff, stats->min_start_cc);
1040  stats->start_cc += start_cc_diff;
1041  ++stats->start_cnt;
1042  info->is_running = true;
1043  }
1044  }
1045 }
1046 
1047 static void poll_transfers_mt_unsafe(sd_t *sd, stats_t *stats)
1048 {
1049  const uint32_t count = sd->dma.num_inflight;
1050  trs_info_t *infos = sd->dma.infos;
1051 
1052  for (uint32_t i = 0U; i < count; ++i)
1053  poll_transfer(sd, &infos[i], stats);
1054 }
1055 
1056 static void poll_transfers_mt_safe(sd_t *sd, stats_t *stats)
1057 {
1058  const uint32_t count = sd->dma.num_inflight;
1059  trs_info_t *infos = sd->dma.infos, *info;
1060 
1061  for (uint32_t i = 0U; i < count; ++i) {
1062  info = &infos[i];
1063 
1064  if (odp_ticketlock_trylock(&info->lock)) {
1065  poll_transfer(sd, info, stats);
1066  odp_ticketlock_unlock(&info->lock);
1067  }
1068  }
1069 }
1070 
1071 static void drain_poll_transfers(sd_t *sd)
1072 {
1073  const uint32_t count = sd->dma.num_inflight;
1074  trs_info_t *infos = sd->dma.infos, *info;
1075  odp_dma_t handle = sd->dma.handle;
1076  int rc;
1077 
1078  for (uint32_t i = 0U; i < count; ++i) {
1079  info = &infos[i];
1080 
1081  if (info->is_running) {
1082  do {
1083  rc = odp_dma_transfer_done(handle, info->compl_param.transfer_id,
1084  NULL);
1085  } while (rc == 0);
1086  }
1087  }
1088 }
1089 
1090 static odp_bool_t configure_event_compl_session(sd_t *sd)
1091 {
1092  odp_thrmask_t zero;
1093  odp_dma_pool_param_t pool_param;
1094  odp_queue_param_t queue_param;
1095 
1096  odp_thrmask_zero(&zero);
1097  sd->grp = odp_schedule_group_create(PROG_NAME "_scd_grp", &zero);
1098 
1099  if (sd->grp == ODP_SCHED_GROUP_INVALID) {
1100  ODPH_ERR("Error creating scheduler group for DMA session\n");
1101  return false;
1102  }
1103 
1104  odp_dma_pool_param_init(&pool_param);
1105  pool_param.num = sd->dma.num_inflight;
1106  sd->dma.pool = odp_dma_pool_create(PROG_NAME "_dma_evs", &pool_param);
1107 
1108  if (sd->dma.pool == ODP_POOL_INVALID) {
1109  ODPH_ERR("Error creating DMA event completion pool\n");
1110  return false;
1111  }
1112 
1113  odp_queue_param_init(&queue_param);
1114  queue_param.type = ODP_QUEUE_TYPE_SCHED;
1115  queue_param.sched.sync = ODP_SCHED_SYNC_PARALLEL;
1116  queue_param.sched.prio = odp_schedule_default_prio();
1117  queue_param.sched.group = sd->grp;
1118  sd->dma.compl_q = odp_queue_create(PROG_NAME, &queue_param);
1119 
1120  if (sd->dma.compl_q == ODP_QUEUE_INVALID) {
1121  ODPH_ERR("Error creating DMA completion queue\n");
1122  return false;
1123  }
1124 
1125  return true;
1126 }
1127 
1128 static odp_bool_t configure_event_compl(sd_t *sd)
1129 {
1130  odp_dma_compl_param_t *param;
1131  odp_dma_compl_t c_ev;
1132 
1133  for (uint32_t i = 0U; i < sd->dma.num_inflight; ++i) {
1134  param = &sd->dma.infos[i].compl_param;
1135 
1136  odp_dma_compl_param_init(param);
1137  param->compl_mode = mode_map[sd->dma.compl_mode];
1138  c_ev = odp_dma_compl_alloc(sd->dma.pool);
1139 
1140  if (c_ev == ODP_DMA_COMPL_INVALID) {
1141  ODPH_ERR("Error allocating completion event\n");
1142  return false;
1143  }
1144 
1145  param->event = odp_dma_compl_to_event(c_ev);
1146  param->queue = sd->dma.compl_q;
1147  param->user_ptr = &sd->dma.infos[i];
1148  }
1149 
1150  return true;
1151 }
1152 
1153 static odp_bool_t start_initial_transfers(sd_t *sd)
1154 {
1155  odp_time_t start_tm;
1156  uint64_t start_cc;
1157  trs_info_t *info;
1158  int ret;
1159 
1160  for (uint32_t i = 0U; i < sd->dma.num_inflight; ++i) {
1161  info = &sd->dma.infos[i];
1162 
1163  if (sd->prep_trs_fn != NULL)
1164  sd->prep_trs_fn(sd, info);
1165 
1166  start_tm = odp_time_global_strict();
1167  start_cc = odp_cpu_cycles();
1168  ret = odp_dma_transfer_start(sd->dma.handle, &info->trs_param, &info->compl_param);
1169 
1170  if (ret <= 0) {
1171  ODPH_ERR("Error starting DMA transfer\n");
1172  return false;
1173  }
1174 
1175  info->trs_start_tm = start_tm;
1176  info->trs_start_cc = start_cc;
1177  }
1178 
1179  return true;
1180 }
1181 
1182 static void wait_compl_event(sd_t *sd, stats_t *stats)
1183 {
1184  uint64_t start_cc, end_cc, wait_cc, trs_tm, trs_cc, start_cc_diff;
1185  odp_time_t start_tm;
1186  odp_event_t ev;
1187  odp_dma_result_t res;
1188  trs_info_t *info;
1189  int ret;
1190 
1191  start_cc = odp_cpu_cycles();
1193  end_cc = odp_cpu_cycles();
1194 
1195  if (odp_unlikely(ev == ODP_EVENT_INVALID)) {
1196  ++stats->scheduler_timeouts;
1197  return;
1198  }
1199 
1201  info = res.user_ptr;
1202  trs_tm = odp_time_diff_ns(odp_time_global_strict(), info->trs_start_tm);
1203  stats->max_trs_tm = ODPH_MAX(trs_tm, stats->max_trs_tm);
1204  stats->min_trs_tm = ODPH_MIN(trs_tm, stats->min_trs_tm);
1205  stats->trs_tm += trs_tm;
1206  trs_cc = odp_cpu_cycles_diff(odp_cpu_cycles(), info->trs_start_cc);
1207  stats->max_trs_cc = ODPH_MAX(trs_cc, stats->max_trs_cc);
1208  stats->min_trs_cc = ODPH_MIN(trs_cc, stats->min_trs_cc);
1209  stats->trs_cc += trs_cc;
1210  ++stats->trs_cnt;
1211  wait_cc = odp_cpu_cycles_diff(end_cc, start_cc);
1212  stats->max_wait_cc = ODPH_MAX(wait_cc, stats->max_wait_cc);
1213  stats->min_wait_cc = ODPH_MIN(wait_cc, stats->min_wait_cc);
1214  stats->wait_cc += wait_cc;
1215  ++stats->wait_cnt;
1216 
1217  if (odp_unlikely(!res.success)) {
1218  ++stats->transfer_errs;
1219  } else {
1220  ++stats->completed;
1221 
1222  if (sd->ver_fn != NULL)
1223  sd->ver_fn(info, stats);
1224  }
1225 
1226  if (sd->prep_trs_fn != NULL)
1227  sd->prep_trs_fn(sd, info);
1228 
1229  start_tm = odp_time_global_strict();
1230  start_cc = odp_cpu_cycles();
1231  ret = odp_dma_transfer_start(sd->dma.handle, &info->trs_param, &info->compl_param);
1232  end_cc = odp_cpu_cycles();
1233 
1234  if (odp_unlikely(ret <= 0)) {
1235  ++stats->start_errs;
1236  } else {
1237  info->trs_start_tm = start_tm;
1238  info->trs_start_cc = start_cc;
1239  start_cc_diff = odp_cpu_cycles_diff(end_cc, start_cc);
1240  stats->max_start_cc = ODPH_MAX(start_cc_diff, stats->max_start_cc);
1241  stats->min_start_cc = ODPH_MIN(start_cc_diff, stats->min_start_cc);
1242  stats->start_cc += start_cc_diff;
1243  ++stats->start_cnt;
1244  }
1245 }
1246 
1247 static void drain_compl_events(ODP_UNUSED sd_t *sd)
1248 {
1249  odp_event_t ev;
1250 
1251  while (true) {
1253 
1254  if (ev == ODP_EVENT_INVALID)
1255  break;
1256  }
1257 }
1258 
1259 static void run_memcpy(trs_info_t *info, stats_t *stats, ver_fn_t ver_fn)
1260 {
1261  odp_time_t start_tm;
1262  uint64_t start_cc, end_cc, trs_tm, trs_cc;
1263  const odp_dma_transfer_param_t *param = &info->trs_param;
1264  uint32_t tot_len, src_len, dst_len, min_len, len, i = 0U, j = 0U, src_off = 0U,
1265  dst_off = 0U, src_rem, dst_rem;
1266  const odp_bool_t is_addr = param->src_format == ODP_DMA_FORMAT_ADDR;
1267  uint8_t *src_data, *dst_data;
1268 
1269  /* Test data is configured so that total source and total destination sizes always match,
1270  * all source and all destination segments have the same size and in case of packets,
1271  * there's always just a single segment. */
1272  tot_len = param->num_src * param->src_seg->len;
1273  src_len = param->src_seg->len;
1274  dst_len = param->dst_seg->len;
1275  min_len = ODPH_MIN(src_len, dst_len);
1276  len = min_len;
1277  start_tm = odp_time_local_strict();
1278  start_cc = odp_cpu_cycles();
1279 
1280  while (tot_len > 0U) {
1281  if (is_addr) {
1282  src_data = param->src_seg[i].addr;
1283  dst_data = param->dst_seg[j].addr;
1284  } else {
1285  src_data = odp_packet_data(param->src_seg[i].packet);
1286  dst_data = odp_packet_data(param->dst_seg[j].packet);
1287  }
1288 
1289  memcpy(dst_data + dst_off, src_data + src_off, len);
1290  dst_off += len;
1291  src_off += len;
1292  src_rem = src_len - src_off;
1293  dst_rem = dst_len - dst_off;
1294  tot_len -= len;
1295  len = ODPH_MIN(ODPH_MAX(src_rem, dst_rem), min_len);
1296 
1297  if (dst_rem > 0U) {
1298  ++i;
1299  src_off = 0U;
1300  } else {
1301  ++j;
1302  dst_off = 0U;
1303  }
1304  }
1305 
1306  end_cc = odp_cpu_cycles();
1307  trs_tm = odp_time_diff_ns(odp_time_local_strict(), start_tm);
1308  stats->max_trs_tm = ODPH_MAX(trs_tm, stats->max_trs_tm);
1309  stats->min_trs_tm = ODPH_MIN(trs_tm, stats->min_trs_tm);
1310  stats->trs_tm += trs_tm;
1311  trs_cc = odp_cpu_cycles_diff(end_cc, start_cc);
1312  stats->max_trs_cc = ODPH_MAX(trs_cc, stats->max_trs_cc);
1313  stats->min_trs_cc = ODPH_MIN(trs_cc, stats->min_trs_cc);
1314  stats->trs_cc += trs_cc;
1315  ++stats->trs_cnt;
1316  stats->max_start_cc = stats->max_trs_cc;
1317  stats->min_start_cc = stats->min_trs_cc;
1318  stats->start_cc += trs_cc;
1319  ++stats->start_cnt;
1320  ++stats->completed;
1321 
1322  if (ver_fn != NULL)
1323  ver_fn(info, stats);
1324 }
1325 
1326 static void run_memcpy_mt_unsafe(sd_t *sd, stats_t *stats)
1327 {
1328  const uint32_t count = sd->dma.num_inflight;
1329  trs_info_t *infos = sd->dma.infos, *info;
1330 
1331  for (uint32_t i = 0U; i < count; ++i) {
1332  info = &infos[i];
1333 
1334  if (sd->prep_trs_fn != NULL)
1335  sd->prep_trs_fn(sd, info);
1336 
1337  run_memcpy(info, stats, sd->ver_fn);
1338  }
1339 }
1340 
1341 static void run_memcpy_mt_safe(sd_t *sd, stats_t *stats)
1342 {
1343  const uint32_t count = sd->dma.num_inflight;
1344  trs_info_t *infos = sd->dma.infos, *info;
1345 
1346  for (uint32_t i = 0U; i < count; ++i) {
1347  info = &infos[i];
1348 
1349  if (odp_ticketlock_trylock(&info->lock)) {
1350  if (sd->prep_trs_fn != NULL)
1351  sd->prep_trs_fn(sd, info);
1352 
1353  run_memcpy(info, stats, sd->ver_fn);
1354  odp_ticketlock_unlock(&info->lock);
1355  }
1356  }
1357 }
1358 
1359 static void setup_api(prog_config_t *config)
1360 {
1361  if (config->seg_type == DENSE_PACKET || config->seg_type == SPARSE_PACKET) {
1362  config->api.setup_fn = setup_packet_segments;
1363  config->api.trs_fn = configure_packet_transfer;
1364  config->api.free_fn = free_packets;
1365  } else {
1366  config->api.setup_fn = setup_memory_segments;
1367  config->api.trs_fn = configure_address_transfer;
1368  config->api.free_fn = free_memory;
1369  }
1370 
1371  if (config->trs_type == SYNC_DMA) {
1372  config->api.session_cfg_fn = NULL;
1373  config->api.compl_fn = NULL;
1374  config->api.bootstrap_fn = NULL;
1375  config->api.wait_fn = config->num_workers == 1 || config->policy == MANY ?
1376  run_transfers_mt_unsafe : run_transfers_mt_safe;
1377  config->api.drain_fn = NULL;
1378  } else if (config->trs_type == ASYNC_DMA) {
1379  if (config->compl_mode == POLL) {
1380  config->api.session_cfg_fn = NULL;
1381  config->api.compl_fn = configure_poll_compl;
1382  config->api.bootstrap_fn = NULL;
1383  config->api.wait_fn = config->num_workers == 1 || config->policy == MANY ?
1384  poll_transfers_mt_unsafe : poll_transfers_mt_safe;
1385  config->api.drain_fn = drain_poll_transfers;
1386  } else {
1387  config->api.session_cfg_fn = configure_event_compl_session;
1388  config->api.compl_fn = configure_event_compl;
1389  config->api.bootstrap_fn = start_initial_transfers;
1390  config->api.wait_fn = wait_compl_event;
1391  config->api.drain_fn = drain_compl_events;
1392  }
1393  } else {
1394  config->api.session_cfg_fn = NULL;
1395  config->api.compl_fn = NULL;
1396  config->api.bootstrap_fn = NULL;
1397  config->api.wait_fn = config->num_workers == 1 || config->policy == MANY ?
1398  run_memcpy_mt_unsafe : run_memcpy_mt_safe;
1399  config->api.drain_fn = NULL;
1400  }
1401 }
1402 
1403 static void prepare_packet_transfer(sd_t *sd, trs_info_t *info)
1404 {
1405  odp_dma_transfer_param_t *param = &info->trs_param;
1406  odp_dma_seg_t *seg;
1407 
1408  for (uint32_t i = 0U; i < param->num_src; ++i) {
1409  seg = &param->src_seg[i];
1410 
1411  if (odp_likely(seg->packet != ODP_PACKET_INVALID))
1412  odp_packet_free(seg->packet);
1413 
1414  seg->packet = odp_packet_alloc(sd->seg.src_pool, seg->len);
1415 
1416  if (odp_unlikely(seg->packet == ODP_PACKET_INVALID))
1417  /* There should always be enough packets. */
1418  ODPH_ABORT("Failed to allocate packet, aborting\n");
1419 
1420  fill_data(odp_packet_data(seg->packet), seg->len);
1421  }
1422 
1423  for (uint32_t i = 0U; i < param->num_dst; ++i) {
1424  seg = &param->dst_seg[i];
1425 
1426  if (odp_likely(seg->packet != ODP_PACKET_INVALID))
1427  odp_packet_free(seg->packet);
1428 
1429  seg->packet = odp_packet_alloc(sd->seg.dst_pool, seg->len);
1430 
1431  if (odp_unlikely(seg->packet == ODP_PACKET_INVALID))
1432  /* There should always be enough packets. */
1433  ODPH_ABORT("Failed to allocate packet, aborting\n");
1434  }
1435 }
1436 
1437 static void prepare_address_transfer(sd_t *sd, trs_info_t *info)
1438 {
1439  odp_dma_transfer_param_t *param = &info->trs_param;
1440  uint8_t *addr = sd->seg.cur_src;
1441  odp_dma_seg_t *seg;
1442 
1443  for (uint32_t i = 0U; i < param->num_src; ++i) {
1444  seg = &param->src_seg[i];
1445 
1446  if (odp_unlikely(addr > (uint8_t *)sd->seg.src_high))
1447  addr = sd->seg.src;
1448 
1449  seg->addr = addr;
1450  addr += sd->dma.src_seg_len;
1451  fill_data(seg->addr, seg->len);
1452  }
1453 
1454  sd->seg.cur_src = addr + ODP_CACHE_LINE_SIZE;
1455  addr = sd->seg.cur_dst;
1456 
1457  for (uint32_t i = 0U; i < param->num_dst; ++i) {
1458  if (odp_unlikely(addr > (uint8_t *)sd->seg.dst_high))
1459  addr = sd->seg.dst;
1460 
1461  param->dst_seg[i].addr = addr;
1462  addr += sd->dma.dst_seg_len;
1463  }
1464 
1465  sd->seg.cur_dst = addr + ODP_CACHE_LINE_SIZE;
1466 }
1467 
1468 static void verify_transfer(trs_info_t *info, stats_t *stats)
1469 {
1470  odp_dma_transfer_param_t *param = &info->trs_param;
1471  odp_dma_seg_t *seg;
1472  const odp_bool_t is_addr = param->dst_format == ODP_DMA_FORMAT_ADDR;
1473  uint8_t *data;
1474 
1475  for (uint32_t i = 0U; i < param->num_dst; ++i) {
1476  seg = &param->dst_seg[i];
1477  data = is_addr ? seg->addr : odp_packet_data(seg->packet);
1478 
1479  for (uint32_t j = 0U; j < seg->len; ++j)
1480  if (odp_unlikely(data[j] != DATA)) {
1481  ++stats->data_errs;
1482  return;
1483  }
1484  }
1485 }
1486 
1487 static odp_bool_t setup_session_descriptors(prog_config_t *config)
1488 {
1489  sd_t *sd;
1490  const odp_dma_param_t dma_params = {
1492  .type = ODP_DMA_TYPE_COPY,
1493  .compl_mode_mask = config->compl_mode_mask,
1494  .mt_mode = config->num_workers == 1 || config->policy == MANY ?
1496  .order = ODP_DMA_ORDER_NONE };
1497 
1498  for (uint32_t i = 0U; i < config->num_sessions; ++i) {
1499  char name[ODP_DMA_NAME_LEN];
1500 
1501  sd = &config->sds[i];
1502  sd->dma.num_in_segs = config->num_in_segs;
1503  sd->dma.num_out_segs = config->num_out_segs;
1504  sd->dma.src_seg_len = config->src_seg_len;
1505  sd->dma.dst_seg_len = config->dst_seg_len;
1506  sd->dma.num_inflight = config->num_inflight;
1507  sd->dma.trs_type = config->trs_type;
1508  sd->dma.compl_mode = config->compl_mode;
1509  snprintf(name, sizeof(name), PROG_NAME "_dma_%u", i);
1510  sd->dma.handle = odp_dma_create(name, &dma_params);
1511 
1512  if (sd->dma.handle == ODP_DMA_INVALID) {
1513  ODPH_ERR("Error creating DMA session\n");
1514  return false;
1515  }
1516 
1517  if (config->api.session_cfg_fn != NULL && !config->api.session_cfg_fn(sd))
1518  return false;
1519 
1520  sd->seg.shm_size = config->shm_size;
1521  sd->seg.seg_type = config->seg_type;
1522  sd->prep_trs_fn = config->seg_type == SPARSE_PACKET ? prepare_packet_transfer :
1523  config->seg_type == SPARSE_MEMORY ?
1524  prepare_address_transfer : NULL;
1525  sd->ver_fn = config->is_verify ? verify_transfer : NULL;
1526  }
1527 
1528  return true;
1529 }
1530 
1531 static odp_bool_t setup_data(prog_config_t *config)
1532 {
1533  sd_t *sd;
1534 
1535  for (uint32_t i = 0U; i < config->num_sessions; ++i) {
1536  sd = &config->sds[i];
1537 
1538  if (!config->api.setup_fn(sd))
1539  return false;
1540 
1541  config->api.trs_fn(sd);
1542 
1543  if (config->api.compl_fn != NULL && !config->api.compl_fn(sd))
1544  return false;
1545  }
1546 
1547  return true;
1548 }
1549 
1550 static int transfer(void *args)
1551 {
1552  thread_config_t *thr_config = args;
1553  prog_config_t *prog_config = thr_config->prog_config;
1554  sd_t *sd = thr_config->sd;
1555  stats_t *stats = &thr_config->stats;
1556  test_api_t *api = &prog_conf->api;
1557  odp_thrmask_t mask;
1558  odp_time_t start_tm;
1559 
1560  odp_barrier_wait(&prog_config->init_barrier);
1561 
1562  if (sd->grp != ODP_SCHED_GROUP_INVALID) {
1563  odp_thrmask_zero(&mask);
1564  odp_thrmask_set(&mask, odp_thread_id());
1565 
1566  if (odp_schedule_group_join(sd->grp, &mask) < 0) {
1567  ODPH_ERR("Error joining scheduler group\n");
1568  goto out;
1569  }
1570  }
1571 
1572  start_tm = odp_time_local_strict();
1573 
1574  while (odp_atomic_load_u32(&prog_config->is_running))
1575  api->wait_fn(sd, stats);
1576 
1577  thr_config->stats.tot_tm = odp_time_diff_ns(odp_time_local_strict(), start_tm);
1578 
1579  if (api->drain_fn != NULL)
1580  api->drain_fn(sd);
1581 
1582 out:
1583  odp_barrier_wait(&prog_config->term_barrier);
1584 
1585  return 0;
1586 }
1587 
1588 static odp_bool_t setup_workers(prog_config_t *config)
1589 {
1590  odp_cpumask_t cpumask;
1591  int num_workers;
1592  odph_thread_common_param_t thr_common;
1593  odph_thread_param_t thr_params[config->num_workers], *thr_param;
1594  thread_config_t *thr_config;
1595  sd_t *sd;
1596 
1597  /* Barrier init count for control and worker. */
1598  odp_barrier_init(&config->init_barrier, config->num_workers + 1);
1599  odp_barrier_init(&config->term_barrier, config->num_workers);
1600  num_workers = odp_cpumask_default_worker(&cpumask, config->num_workers);
1601  odph_thread_common_param_init(&thr_common);
1602  thr_common.instance = config->odp_instance;
1603  thr_common.cpumask = &cpumask;
1604 
1605  for (int i = 0; i < config->num_workers; ++i) {
1606  thr_param = &thr_params[i];
1607  thr_config = &config->thread_config[i];
1608  sd = config->policy == SINGLE ? &config->sds[0U] : &config->sds[i];
1609 
1610  odph_thread_param_init(thr_param);
1611  thr_param->start = transfer;
1612  thr_param->thr_type = ODP_THREAD_WORKER;
1613  thr_config->prog_config = config;
1614  thr_config->sd = sd;
1615  thr_param->arg = thr_config;
1616  }
1617 
1618  num_workers = odph_thread_create(config->threads, &thr_common, thr_params, num_workers);
1619 
1620  if (num_workers != config->num_workers) {
1621  ODPH_ERR("Error configuring worker threads\n");
1622  return false;
1623  }
1624 
1625  for (uint32_t i = 0U; i < config->num_sessions; ++i) {
1626  if (config->api.bootstrap_fn != NULL && !config->api.bootstrap_fn(&config->sds[i]))
1627  return false;
1628  }
1629 
1630  odp_barrier_wait(&config->init_barrier);
1631 
1632  return true;
1633 }
1634 
1635 static odp_bool_t setup_test(prog_config_t *config)
1636 {
1637  setup_api(config);
1638 
1639  return setup_session_descriptors(config) && setup_data(config) && setup_workers(config);
1640 }
1641 
1642 static void stop_test(prog_config_t *config)
1643 {
1644  (void)odph_thread_join(config->threads, config->num_workers);
1645 }
1646 
1647 static void teardown_data(const sd_t *sd, void (*free_fn)(const sd_t *sd))
1648 {
1649  const odp_dma_compl_param_t *compl_param;
1650 
1651  for (uint32_t i = 0U; i < MAX_SEGS; ++i) {
1652  compl_param = &sd->dma.infos[i].compl_param;
1653 
1654  if (compl_param->transfer_id != ODP_DMA_TRANSFER_ID_INVALID)
1655  odp_dma_transfer_id_free(sd->dma.handle, compl_param->transfer_id);
1656 
1657  if (compl_param->event != ODP_EVENT_INVALID)
1658  odp_event_free(compl_param->event);
1659  }
1660 
1661  free_fn(sd);
1662 }
1663 
1664 static void teardown_test(prog_config_t *config)
1665 {
1666  sd_t *sd;
1667 
1668  for (uint32_t i = 0U; i < config->num_sessions; ++i) {
1669  sd = &config->sds[i];
1670  teardown_data(sd, config->api.free_fn);
1671 
1672  if (sd->dma.compl_q != ODP_QUEUE_INVALID)
1673  (void)odp_queue_destroy(sd->dma.compl_q);
1674 
1675  if (sd->dma.pool != ODP_POOL_INVALID)
1676  (void)odp_pool_destroy(sd->dma.pool);
1677 
1678  if (sd->grp != ODP_SCHED_GROUP_INVALID)
1679  (void)odp_schedule_group_destroy(sd->grp);
1680 
1681  if (sd->dma.handle != ODP_DMA_INVALID)
1682  (void)odp_dma_destroy(sd->dma.handle);
1683  }
1684 
1685  if (config->src_pool != ODP_POOL_INVALID)
1686  (void)odp_pool_destroy(config->src_pool);
1687 
1688  if (config->dst_pool != ODP_POOL_INVALID)
1689  (void)odp_pool_destroy(config->dst_pool);
1690 }
1691 
1692 static void print_humanised(uint64_t value, const char *type)
1693 {
1694  if (value > GIGAS)
1695  printf("%.2f G%s\n", (double)value / GIGAS, type);
1696  else if (value > MEGAS)
1697  printf("%.2f M%s\n", (double)value / MEGAS, type);
1698  else if (value > KILOS)
1699  printf("%.2f k%s\n", (double)value / KILOS, type);
1700  else
1701  printf("%" PRIu64 " %s\n", value, type);
1702 }
1703 
1704 static int output_results(const prog_config_t *config)
1705 {
1706  const stats_t *stats;
1707  uint64_t data_cnt = config->num_in_segs * config->src_seg_len, tot_completed = 0U,
1708  tot_tm = 0U, tot_trs_tm = 0U, tot_trs_cc = 0U, tot_trs_cnt = 0U, tot_min_tm = UINT64_MAX,
1709  tot_max_tm = 0U, tot_min_cc = UINT64_MAX, tot_max_cc = 0U, avg_start_cc,
1710  avg_start_cc_tot = 0U, min_start = UINT64_MAX, max_start = 0U, avg_wait_cc,
1711  avg_wait_cc_tot = 0U, min_wait = UINT64_MAX, max_wait = 0U, start_cnt_sum = 0U,
1712  wait_cnt_sum = 0U;
1713  double avg_tot_tm;
1714 
1715  printf("\n======================\n\n"
1716  "DMA performance test done\n\n"
1717  " mode: %s\n"
1718  " input segment count: %u\n"
1719  " output segment count: %u\n"
1720  " segment length: %u\n"
1721  " segment type: %s\n"
1722  " inflight count: %u\n"
1723  " session policy: %s\n\n",
1724  config->trs_type == SYNC_DMA ? "DMA synchronous" :
1725  config->trs_type == ASYNC_DMA && config->compl_mode == POLL ?
1726  "DMA asynchronous-poll" :
1727  config->trs_type == ASYNC_DMA && config->compl_mode == EVENT ?
1728  "DMA asynchronous-event" : "SW", config->num_in_segs,
1729  config->num_out_segs, config->src_seg_len,
1730  config->seg_type == DENSE_PACKET ? "dense packet" :
1731  config->seg_type == SPARSE_PACKET ? "sparse packet" :
1732  config->seg_type == DENSE_MEMORY ? "dense memory" : "sparse memory",
1733  config->num_inflight, config->policy == SINGLE ? "shared" : "per-worker");
1734 
1735  for (int i = 0; i < config->num_workers; ++i) {
1736  stats = &config->thread_config[i].stats;
1737  tot_completed += stats->completed;
1738  tot_tm += stats->tot_tm;
1739  tot_trs_tm += stats->trs_tm;
1740  tot_trs_cc += stats->trs_cc;
1741  tot_trs_cnt += stats->trs_cnt;
1742  tot_min_tm = ODPH_MIN(tot_min_tm, stats->min_trs_tm);
1743  tot_max_tm = ODPH_MAX(tot_max_tm, stats->max_trs_tm);
1744  tot_min_cc = ODPH_MIN(tot_min_cc, stats->min_trs_cc);
1745  tot_max_cc = ODPH_MAX(tot_max_cc, stats->max_trs_cc);
1746  avg_start_cc = 0U;
1747  avg_wait_cc = 0U;
1748 
1749  printf(" worker %d:\n", i);
1750  printf(" successful transfers: %" PRIu64 "\n"
1751  " start errors: %" PRIu64 "\n",
1752  stats->completed, stats->start_errs);
1753 
1754  if (config->trs_type == ASYNC_DMA) {
1755  if (config->compl_mode == POLL)
1756  printf(" poll errors: %" PRIu64 "\n",
1757  stats->poll_errs);
1758  else
1759  printf(" scheduler timeouts: %" PRIu64 "\n",
1760  stats->scheduler_timeouts);
1761  }
1762 
1763  printf(" transfer errors: %" PRIu64 "\n", stats->transfer_errs);
1764 
1765  if (config->is_verify)
1766  printf(" data errors: %" PRIu64 "\n", stats->data_errs);
1767 
1768  printf(" run time: %" PRIu64 " ns\n", stats->tot_tm);
1769 
1770  if (config->policy == MANY) {
1771  printf(" session:\n"
1772  " average time per transfer: %" PRIu64 " "
1773  "(min: %" PRIu64 ", max: %" PRIu64 ") ns\n"
1774  " average cycles per transfer: %" PRIu64 " "
1775  "(min: %" PRIu64 ", max: %" PRIu64 ")\n"
1776  " ops: ",
1777  stats->trs_cnt > 0U ? stats->trs_tm / stats->trs_cnt : 0U,
1778  stats->trs_cnt > 0U ? stats->min_trs_tm : 0U,
1779  stats->trs_cnt > 0U ? stats->max_trs_tm : 0U,
1780  stats->trs_cnt > 0U ? stats->trs_cc / stats->trs_cnt : 0U,
1781  stats->trs_cnt > 0U ? stats->min_trs_cc : 0U,
1782  stats->trs_cnt > 0U ? stats->max_trs_cc : 0U);
1783  print_humanised(stats->completed /
1784  ((double)stats->tot_tm / ODP_TIME_SEC_IN_NS),
1785  "OPS");
1786  printf(" speed: ");
1787  print_humanised(stats->completed * data_cnt /
1788  ((double)stats->tot_tm / ODP_TIME_SEC_IN_NS), "B/s");
1789  }
1790 
1791  if (stats->start_cnt > 0U) {
1792  avg_start_cc = stats->start_cc / stats->start_cnt;
1793  start_cnt_sum += stats->start_cnt;
1794  avg_start_cc_tot += stats->start_cc;
1795  min_start = stats->min_start_cc < min_start ?
1796  stats->min_start_cc : min_start;
1797  max_start = stats->max_start_cc > max_start ?
1798  stats->max_start_cc : max_start;
1799  }
1800 
1801  printf(" average cycles breakdown:\n");
1802 
1803  if (config->trs_type == SYNC_DMA) {
1804  printf(" odp_dma_transfer(): %" PRIu64 " "
1805  "(min: %" PRIu64 ", max: %" PRIu64 ")\n", avg_start_cc,
1806  avg_start_cc > 0U ? stats->min_start_cc : 0U,
1807  avg_start_cc > 0U ? stats->max_start_cc : 0U);
1808  } else if (config->trs_type == SW_COPY) {
1809  printf(" memcpy(): %" PRIu64 " "
1810  "(min: %" PRIu64 ", max: %" PRIu64 ")\n", avg_start_cc,
1811  avg_start_cc > 0U ? stats->min_start_cc : 0U,
1812  avg_start_cc > 0U ? stats->max_start_cc : 0U);
1813  } else {
1814  printf(" odp_dma_transfer_start(): %" PRIu64 " "
1815  "(min: %" PRIu64 ", max: %" PRIu64 ")\n", avg_start_cc,
1816  avg_start_cc > 0U ? stats->min_start_cc : 0U,
1817  avg_start_cc > 0U ? stats->max_start_cc : 0U);
1818 
1819  if (stats->wait_cnt > 0U) {
1820  avg_wait_cc = stats->wait_cc / stats->wait_cnt;
1821  wait_cnt_sum += stats->wait_cnt;
1822  avg_wait_cc_tot += stats->wait_cc;
1823  min_wait = stats->min_wait_cc < min_wait ?
1824  stats->min_wait_cc : min_wait;
1825  max_wait = stats->max_wait_cc > max_wait ?
1826  stats->max_wait_cc : max_wait;
1827  }
1828 
1829  if (config->compl_mode == POLL) {
1830  printf(" odp_dma_transfer_done(): %" PRIu64 ""
1831  " (min: %" PRIu64 ", max: %" PRIu64 ", x%" PRIu64 ""
1832  " per transfer)\n", avg_wait_cc,
1833  avg_wait_cc > 0U ? stats->min_wait_cc : 0U,
1834  avg_wait_cc > 0U ? stats->max_wait_cc : 0U,
1835  stats->trs_cnt > 0U ?
1836  stats->trs_poll_cnt / stats->trs_cnt : 0U);
1837  } else {
1838  printf(" odp_schedule(): %" PRIu64 " "
1839  " (min: %" PRIu64 ", max: %" PRIu64 ")\n", avg_wait_cc,
1840  avg_wait_cc > 0U ? stats->min_wait_cc : 0U,
1841  avg_wait_cc > 0U ? stats->max_wait_cc : 0U);
1842  }
1843  }
1844 
1845  printf("\n");
1846  }
1847  avg_start_cc_tot = start_cnt_sum > 0U ? avg_start_cc_tot / start_cnt_sum : 0U;
1848  avg_wait_cc_tot = wait_cnt_sum > 0U ? avg_wait_cc_tot / wait_cnt_sum : 0U;
1849 
1850  avg_tot_tm = (double)tot_tm / config->num_workers / ODP_TIME_SEC_IN_NS;
1851  printf(" total:\n"
1852  " average time per transfer: %" PRIu64 " (min: %" PRIu64
1853  ", max: %" PRIu64 ") ns\n"
1854  " average cycles per transfer: %" PRIu64 " (min: %" PRIu64
1855  ", max: %" PRIu64 ")\n"
1856  " ops: ",
1857  tot_trs_cnt > 0U ? tot_trs_tm / tot_trs_cnt : 0U,
1858  tot_trs_cnt > 0U ? tot_min_tm : 0U,
1859  tot_trs_cnt > 0U ? tot_max_tm : 0U,
1860  tot_trs_cnt > 0U ? tot_trs_cc / tot_trs_cnt : 0U,
1861  tot_trs_cnt > 0U ? tot_min_cc : 0U,
1862  tot_trs_cnt > 0U ? tot_max_cc : 0U);
1863  print_humanised(avg_tot_tm > 0U ? tot_completed / avg_tot_tm : 0U, "OPS");
1864  printf(" speed: ");
1865  print_humanised(avg_tot_tm > 0U ? tot_completed * data_cnt / avg_tot_tm : 0U, "B/s");
1866  printf("\n");
1867  printf("======================\n");
1868 
1869  if (config->common_options.is_export) {
1870  /* Write header */
1871  if (test_common_write("time per transfer avg (ns),time per transfer min (ns),"
1872  "time per transfer max (ns),cycles per transfer avg,"
1873  "cycles per transfer min,cycles per transfer max,"
1874  "ops (OPS),speed (B/s),dma_transfer avg,"
1875  "dma_transfer min,dma_transfer max,memcpy avg,memcpy min,"
1876  "memcpy max,dma_transfer_start avg,dma_transfer_start min,"
1877  "dma_transfer_start max,dma_transfer_done avg,"
1878  "dma_transfer_done min,dma_transfer_done max,schedule avg,"
1879  "schedule min,schedule max\n"))
1880  goto exit;
1881  /* Write the values always present, disregarding parameters */
1882  if (test_common_write("%" PRIu64 ",%" PRIu64 ",%" PRIu64 ",%" PRIu64 ","
1883  "%" PRIu64 ",%" PRIu64 ",%" PRIu64 ",%" PRIu64 ",",
1884  tot_trs_cnt > 0U ? tot_trs_tm / tot_trs_cnt : 0U,
1885  tot_trs_cnt > 0U ? tot_min_tm : 0U,
1886  tot_trs_cnt > 0U ? tot_max_tm : 0U,
1887  tot_trs_cnt > 0U ? tot_trs_cc / tot_trs_cnt : 0U,
1888  tot_trs_cnt > 0U ? tot_min_cc : 0U,
1889  tot_trs_cnt > 0U ? tot_max_cc : 0U,
1890  avg_tot_tm > 0U ? (uint64_t)(tot_completed / avg_tot_tm) : 0U,
1891  avg_tot_tm > 0U ?
1892  (uint64_t)(tot_completed * data_cnt / avg_tot_tm) : 0U))
1893  goto exit;
1894  /* Write the function specific values */
1895  if (config->trs_type == SYNC_DMA) {
1896  if (test_common_write("%" PRIu64 ",%" PRIu64 ",%" PRIu64 ","
1897  "0,0,0,0,0,0,0,0,0,0,0,0\n",
1898  avg_start_cc_tot,
1899  avg_start_cc_tot > 0U ? min_start : 0U,
1900  avg_start_cc_tot > 0U ? max_start : 0U))
1901  goto exit;
1902  } else if (config->trs_type == SW_COPY) {
1903  if (test_common_write("0,0,0 %" PRIu64 ",%" PRIu64 ",%" PRIu64 ","
1904  "0,0,0,0,0,0,0,0,0\n",
1905  avg_start_cc_tot,
1906  avg_start_cc_tot > 0U ? min_start : 0U,
1907  avg_start_cc_tot > 0U ? max_start : 0U))
1908  goto exit;
1909  } else if (config->trs_type == ASYNC_DMA) {
1910  if (test_common_write("0,0,0,0,0,0, %" PRIu64 ",%" PRIu64 ",%" PRIu64 ",",
1911  avg_start_cc_tot,
1912  avg_start_cc_tot > 0U ? min_start : 0U,
1913  avg_start_cc_tot > 0U ? max_start : 0U))
1914  goto exit;
1915 
1916  if (config->compl_mode == POLL) {
1917  if (test_common_write("%" PRIu64 ",%" PRIu64 ",%" PRIu64 ","
1918  "0,0,0\n",
1919  avg_wait_cc_tot,
1920  avg_wait_cc_tot > 0U ? min_wait : 0U,
1921  avg_wait_cc_tot > 0U ? max_wait : 0U))
1922  goto exit;
1923  } else if (config->compl_mode == EVENT) {
1924  if (test_common_write("0,0,0 %" PRIu64 ",%" PRIu64 ",%" PRIu64 "\n",
1925  avg_wait_cc_tot,
1926  avg_wait_cc_tot > 0U ? min_wait : 0U,
1927  avg_wait_cc_tot > 0U ? max_wait : 0U))
1928  goto exit;
1929  }
1930  }
1931  test_common_write_term();
1932  }
1933 
1934  return 0;
1935 
1936 exit:
1937  ODPH_ERR("Export failed\n");
1938  test_common_write_term();
1939  return -1;
1940 }
1941 
1942 int main(int argc, char **argv)
1943 {
1944  odph_helper_options_t odph_opts;
1945  odp_init_t init_param;
1947  odp_shm_t shm_cfg = ODP_SHM_INVALID;
1948  parse_result_t parse_res;
1949  int ret = EXIT_SUCCESS;
1950  test_common_options_t common_options;
1951 
1952  argc = odph_parse_options(argc, argv);
1953 
1954  if (odph_options(&odph_opts)) {
1955  ODPH_ERR("Error while reading ODP helper options, exiting\n");
1956  exit(EXIT_FAILURE);
1957  }
1958 
1959  argc = test_common_parse_options(argc, argv);
1960  if (test_common_options(&common_options)) {
1961  ODPH_ERR("Error while reading test options, exiting\n");
1962  exit(EXIT_FAILURE);
1963  }
1964 
1965  odp_init_param_init(&init_param);
1966  init_param.mem_model = odph_opts.mem_model;
1967 
1968  if (odp_init_global(&odp_instance, &init_param, NULL)) {
1969  ODPH_ERR("ODP global init failed, exiting\n");
1970  exit(EXIT_FAILURE);
1971  }
1972 
1974  ODPH_ERR("ODP local init failed, exiting\n");
1975  exit(EXIT_FAILURE);
1976  }
1977 
1978  shm_cfg = odp_shm_reserve(PROG_NAME "_cfg", sizeof(prog_config_t), ODP_CACHE_LINE_SIZE,
1979  0U);
1980 
1981  if (shm_cfg == ODP_SHM_INVALID) {
1982  ODPH_ERR("Error reserving shared memory\n");
1983  ret = EXIT_FAILURE;
1984  goto out;
1985  }
1986 
1987  prog_conf = odp_shm_addr(shm_cfg);
1988 
1989  if (prog_conf == NULL) {
1990  ODPH_ERR("Error resolving shared memory address\n");
1991  ret = EXIT_FAILURE;
1992  goto out;
1993  }
1994 
1995  parse_res = setup_program(argc, argv, prog_conf);
1996 
1997  if (parse_res == PRS_NOK) {
1998  ret = EXIT_FAILURE;
1999  goto out;
2000  }
2001 
2002  if (parse_res == PRS_TERM) {
2003  ret = EXIT_SUCCESS;
2004  goto out;
2005  }
2006 
2007  if (parse_res == PRS_NOT_SUP) {
2008  ret = EXIT_NOT_SUP;
2009  goto out;
2010  }
2011 
2012  if (odp_schedule_config(NULL) < 0) {
2013  ODPH_ERR("Error configuring scheduler\n");
2014  ret = EXIT_FAILURE;
2015  goto out;
2016  }
2017 
2018  prog_conf->odp_instance = odp_instance;
2019  odp_atomic_init_u32(&prog_conf->is_running, 1U);
2020 
2021  if (!setup_test(prog_conf)) {
2022  ret = EXIT_FAILURE;
2023  goto out_test;
2024  }
2025 
2026  if (prog_conf->time_sec > 0.001) {
2027  struct timespec ts;
2028 
2029  ts.tv_sec = prog_conf->time_sec;
2030  ts.tv_nsec = (prog_conf->time_sec - ts.tv_sec) * ODP_TIME_SEC_IN_NS;
2031  nanosleep(&ts, NULL);
2032  odp_atomic_store_u32(&prog_conf->is_running, 0U);
2033  }
2034 
2035  stop_test(prog_conf);
2036 
2037  prog_conf->common_options = common_options;
2038 
2039  output_results(prog_conf);
2040 
2041 out_test:
2042  /* Release all resources that have been allocated during 'setup_test()'. */
2043  teardown_test(prog_conf);
2044 
2045 out:
2046  if (shm_cfg != ODP_SHM_INVALID)
2047  (void)odp_shm_free(shm_cfg);
2048 
2049  if (odp_term_local()) {
2050  ODPH_ERR("ODP local terminate failed, exiting\n");
2051  exit(EXIT_FAILURE);
2052  }
2053 
2055  ODPH_ERR("ODP global terminate failed, exiting\n");
2056  exit(EXIT_FAILURE);
2057  }
2058 
2059  return ret;
2060 }
void odp_atomic_init_u32(odp_atomic_u32_t *atom, uint32_t val)
Initialize atomic uint32 variable.
uint32_t odp_atomic_load_u32(odp_atomic_u32_t *atom)
Load value of atomic uint32 variable.
void odp_atomic_store_u32(odp_atomic_u32_t *atom, uint32_t val)
Store value to atomic uint32 variable.
void odp_barrier_init(odp_barrier_t *barr, int count)
Initialize barrier with thread count.
void odp_barrier_wait(odp_barrier_t *barr)
Synchronize thread execution on barrier.
#define ODP_ALIGNED_CACHE
Defines type/struct/variable to be cache line size aligned.
#define odp_unlikely(x)
Branch unlikely taken.
Definition: spec/hints.h:64
#define ODP_UNUSED
Intentionally unused variables of functions.
Definition: spec/hints.h:54
#define odp_likely(x)
Branch likely taken.
Definition: spec/hints.h:59
uint64_t odp_cpu_cycles_diff(uint64_t c2, uint64_t c1)
CPU cycle count difference.
uint64_t odp_cpu_cycles(void)
Current CPU cycle count.
int odp_cpumask_default_worker(odp_cpumask_t *mask, int num)
Default CPU mask for worker threads.
odp_dma_t odp_dma_create(const char *name, const odp_dma_param_t *param)
Create DMA session.
#define ODP_DMA_COMPL_SYNC
Synchronous transfer.
odp_pool_t odp_dma_pool_create(const char *name, const odp_dma_pool_param_t *pool_param)
Create DMA completion event pool.
uint32_t odp_dma_compl_mode_t
DMA transfer completion mode.
int odp_dma_transfer_done(odp_dma_t dma, odp_dma_transfer_id_t transfer_id, odp_dma_result_t *result)
Check if DMA transfer has completed.
#define ODP_DMA_TYPE_COPY
Copy data.
void odp_dma_transfer_id_free(odp_dma_t dma, odp_dma_transfer_id_t transfer_id)
Free DMA transfer identifier.
#define ODP_DMA_COMPL_EVENT
Asynchronous transfer with completion event.
void odp_dma_transfer_param_init(odp_dma_transfer_param_t *trs_param)
Initialize DMA transfer parameters.
int odp_dma_destroy(odp_dma_t dma)
Destroy DMA session.
int odp_dma_compl_result(odp_dma_compl_t dma_compl, odp_dma_result_t *result)
Check DMA completion event.
void odp_dma_compl_param_init(odp_dma_compl_param_t *compl_param)
Initialize DMA transfer completion parameters.
#define ODP_DMA_NAME_LEN
Maximum DMA name length, including the null character.
#define ODP_DMA_TRANSFER_ID_INVALID
Invalid DMA transfer identifier.
#define ODP_DMA_COMPL_INVALID
Invalid DMA completion event.
odp_event_t odp_dma_compl_to_event(odp_dma_compl_t dma_compl)
Convert DMA completion event to event.
void odp_dma_pool_param_init(odp_dma_pool_param_t *pool_param)
Initialize DMA completion event pool parameters.
int odp_dma_transfer_start(odp_dma_t dma, const odp_dma_transfer_param_t *trs_param, const odp_dma_compl_param_t *compl_param)
Start DMA transfer.
int odp_dma_transfer(odp_dma_t dma, const odp_dma_transfer_param_t *trs_param, odp_dma_result_t *result)
Perform DMA transfer.
odp_dma_compl_t odp_dma_compl_from_event(odp_event_t ev)
Convert event to DMA completion event.
#define ODP_DMA_MAIN_TO_MAIN
DMA transfer within the main memory.
int odp_dma_capability(odp_dma_capability_t *capa)
Query DMA capabilities.
odp_dma_transfer_id_t odp_dma_transfer_id_alloc(odp_dma_t dma)
Allocate DMA transfer identifier.
#define ODP_DMA_INVALID
Invalid DMA session.
#define ODP_DMA_COMPL_POLL
Asynchronous transfer with completion polling.
odp_dma_compl_t odp_dma_compl_alloc(odp_pool_t pool)
Allocate DMA completion event.
@ ODP_DMA_MT_SAFE
Multi-thread safe operation.
@ ODP_DMA_MT_SERIAL
Application serializes operations.
@ ODP_DMA_FORMAT_PACKET
Data format is odp_packet_t.
@ ODP_DMA_FORMAT_ADDR
Data format is raw memory address.
@ ODP_DMA_ORDER_NONE
No specific ordering between transfers.
void odp_event_free(odp_event_t event)
Free event.
#define ODP_EVENT_INVALID
Invalid event.
int odp_instance(odp_instance_t *instance)
Get instance handle.
void odp_init_param_init(odp_init_t *param)
Initialize the odp_init_t to default values for all fields.
int odp_init_local(odp_instance_t instance, odp_thread_type_t thr_type)
Thread local ODP initialization.
int odp_init_global(odp_instance_t *instance, const odp_init_t *params, const odp_platform_init_t *platform_params)
Global ODP initialization.
int odp_term_local(void)
Thread local ODP termination.
int odp_term_global(odp_instance_t instance)
Global ODP termination.
uint64_t odp_instance_t
ODP instance ID.
void odp_ticketlock_init(odp_ticketlock_t *tklock)
Initialize ticket lock.
int odp_ticketlock_trylock(odp_ticketlock_t *tklock)
Try to acquire ticket lock.
void odp_ticketlock_unlock(odp_ticketlock_t *tklock)
Release ticket lock.
void * odp_packet_data(odp_packet_t pkt)
Packet data pointer.
odp_packet_t odp_packet_alloc(odp_pool_t pool, uint32_t len)
Allocate a packet from a packet pool.
void odp_packet_free(odp_packet_t pkt)
Free packet.
#define ODP_PACKET_INVALID
Invalid packet.
odp_pool_t odp_pool_create(const char *name, const odp_pool_param_t *param)
Create a pool.
int odp_pool_capability(odp_pool_capability_t *capa)
Query pool capabilities.
void odp_pool_param_init(odp_pool_param_t *param)
Initialize pool params.
int odp_pool_destroy(odp_pool_t pool)
Destroy a pool previously created by odp_pool_create()
#define ODP_POOL_INVALID
Invalid pool.
@ ODP_POOL_PACKET
Packet pool.
void odp_queue_param_init(odp_queue_param_t *param)
Initialize queue params.
#define ODP_QUEUE_INVALID
Invalid queue.
odp_queue_t odp_queue_create(const char *name, const odp_queue_param_t *param)
Queue create.
int odp_queue_destroy(odp_queue_t queue)
Destroy ODP queue.
@ ODP_QUEUE_TYPE_SCHED
Scheduled queue.
#define ODP_SCHED_SYNC_PARALLEL
Parallel scheduled queues.
int odp_schedule_group_t
Scheduler thread group.
int odp_schedule_group_join(odp_schedule_group_t group, const odp_thrmask_t *mask)
Join a schedule group.
int odp_schedule_group_destroy(odp_schedule_group_t group)
Schedule group destroy.
#define ODP_SCHED_GROUP_INVALID
Invalid scheduler group.
int odp_schedule_default_prio(void)
Default scheduling priority level.
int odp_schedule_config(const odp_schedule_config_t *config)
Global schedule configuration.
uint64_t odp_schedule_wait_time(uint64_t ns)
Schedule wait time.
int odp_schedule_capability(odp_schedule_capability_t *capa)
Query scheduler capabilities.
odp_schedule_group_t odp_schedule_group_create(const char *name, const odp_thrmask_t *mask)
Schedule group create.
odp_event_t odp_schedule(odp_queue_t *from, uint64_t wait)
Schedule an event.
int odp_shm_free(odp_shm_t shm)
Free a contiguous block of shared memory.
#define ODP_SHM_INVALID
Invalid shared memory block.
int odp_shm_capability(odp_shm_capability_t *capa)
Query shared memory capabilities.
void * odp_shm_addr(odp_shm_t shm)
Shared memory block address.
odp_shm_t odp_shm_reserve(const char *name, uint64_t size, uint64_t align, uint32_t flags)
Reserve a contiguous block of shared memory.
bool odp_bool_t
Boolean type.
void odp_thrmask_set(odp_thrmask_t *mask, int thr)
Add thread to mask.
int odp_thread_count_max(void)
Maximum thread count.
int odp_thread_id(void)
Get thread identifier.
void odp_thrmask_zero(odp_thrmask_t *mask)
Clear entire thread mask.
@ ODP_THREAD_WORKER
Worker thread.
@ ODP_THREAD_CONTROL
Control thread.
#define ODP_TIME_SEC_IN_NS
A second in nanoseconds.
odp_time_t odp_time_global_strict(void)
Current global time (strict)
#define ODP_TIME_MSEC_IN_NS
A millisecond in nanoseconds.
odp_time_t odp_time_local_strict(void)
Current local time (strict)
uint64_t odp_time_diff_ns(odp_time_t t2, odp_time_t t1)
Time difference in nanoseconds.
The OpenDataPlane API.
uint32_t max_sessions
Maximum number of DMA sessions.
uint32_t max_transfers
Maximum number of transfers per DMA session.
odp_dma_pool_capability_t pool
DMA completion event pool capabilities.
uint32_t max_segs
Maximum number of destination and source segments combined in a single transfer.
uint32_t max_dst_segs
Maximum number of destination segments in a single transfer.
uint32_t max_src_segs
Maximum number of source segments in a single transfer.
odp_bool_t queue_type_sched
Scheduled queue support.
odp_dma_compl_mode_t compl_mode_mask
Supported completion modes.
uint32_t max_seg_len
Maximum segment length in bytes.
DMA transfer completion parameters.
odp_dma_transfer_id_t transfer_id
Transfer identifier.
void * user_ptr
User context pointer.
odp_event_t event
Completion event.
odp_dma_compl_mode_t compl_mode
Completion mode.
odp_queue_t queue
Completion queue.
DMA session parameters.
odp_dma_direction_t direction
Transfer direction.
uint32_t max_pools
Maximum number of DMA completion event pools.
uint32_t max_num
Maximum number of DMA completion events in a pool.
DMA completion event pool parameters.
uint32_t num
Number of DMA completion events in the pool.
DMA transfer results.
void * user_ptr
User context pointer.
odp_bool_t success
DMA transfer success.
odp_packet_t packet
Packet handle.
uint32_t len
Segment length in bytes.
uint32_t offset
Segment start offset into the packet.
void * addr
Segment start address in memory.
DMA transfer parameters.
odp_dma_seg_t * dst_seg
Table of destination segments.
odp_dma_data_format_t dst_format
Destination data format.
uint32_t num_dst
Number of destination segments.
uint32_t num_src
Number of source segments.
odp_dma_seg_t * src_seg
Table of source segments.
odp_dma_data_format_t src_format
Source data format.
Global initialization parameters.
odp_mem_model_t mem_model
Application memory model.
struct odp_pool_capability_t::@122 pkt
Packet pool capabilities
uint32_t max_num
Maximum number of buffers of any size.
uint32_t min_cache_size
Minimum size of thread local cache.
uint32_t max_cache_size
Maximum size of thread local cache.
uint32_t max_pools
Maximum number of pools of any type (odp_pool_type_t)
uint32_t max_len
Maximum packet data length in bytes.
Pool parameters.
uint32_t num
Number of buffers in the pool.
uint32_t cache_size
Maximum number of buffers cached locally per thread.
odp_pool_type_t type
Pool type.
uint32_t len
Minimum length of 'num' packets.
uint32_t seg_len
Minimum number of packet data bytes that can be stored in the first segment of a newly allocated pack...
struct odp_pool_param_t::@126 pkt
Parameters for packet pools.
ODP Queue parameters.
odp_schedule_param_t sched
Scheduler parameters.
odp_queue_type_t type
Queue type.
uint32_t max_groups
Maximum number of scheduling groups.
odp_schedule_group_t group
Thread group.
odp_schedule_prio_t prio
Priority level.
odp_schedule_sync_t sync
Synchronization method.
Shared memory capabilities.
uint32_t max_blocks
Maximum number of shared memory blocks.
uint64_t max_size
Maximum memory block size in bytes.