API Reference Manual  1.46.0
odp_sched_perf.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright (c) 2018 Linaro Limited
3  * Copyright (c) 2020-2024 Nokia
4  */
5 
14 #ifndef _GNU_SOURCE
15 #define _GNU_SOURCE /* Needed for sigaction */
16 #endif
17 
18 #include <signal.h>
19 #include <stdio.h>
20 #include <string.h>
21 #include <stdint.h>
22 #include <inttypes.h>
23 #include <stdlib.h>
24 #include <getopt.h>
25 
26 #include <odp_api.h>
27 #include <odp/helper/odph_api.h>
28 
29 #include <export_results.h>
30 
31 #define MAX_QUEUES (256 * 1024)
32 #define MAX_GROUPS 256
33 
34 /* Limit data values to 16 bits. Large data values are costly on square root calculation. */
35 #define DATA_MASK 0xffff
36 
37 /* Max time to wait for new events in nanoseconds */
38 #define MAX_SCHED_WAIT_NS (10 * ODP_TIME_SEC_IN_NS)
39 
40 /* Scheduling round interval to check for MAX_SCHED_WAIT_NS */
41 #define TIME_CHECK_INTERVAL (1024 * 1024)
42 
43 /* Round up 'X' to a multiple of 'NUM' */
44 #define ROUNDUP(X, NUM) ((NUM) * (((X) + (NUM) - 1) / (NUM)))
45 
46 typedef struct test_options_t {
47  uint32_t num_cpu;
48  uint32_t num_queue;
49  uint32_t num_low;
50  uint32_t num_high;
51  uint32_t num_dummy;
52  uint32_t num_event;
53  uint32_t num_sched;
54  int num_group;
55  uint32_t num_join;
56  uint32_t max_burst;
57  odp_pool_type_t pool_type;
58  int queue_type;
59  int forward;
60  int fairness;
61  uint32_t event_size;
62  uint32_t queue_size;
63  uint32_t tot_queue;
64  uint32_t tot_event;
65  int touch_data;
66  uint32_t stress;
67  uint32_t rd_words;
68  uint32_t rw_words;
69  uint32_t ctx_size;
70  uint32_t ctx_rd_words;
71  uint32_t ctx_rw_words;
72  uint32_t tot_rd_size;
73  uint32_t tot_rw_size;
74  uint32_t uarea_rd;
75  uint32_t uarea_rw;
76  uint32_t uarea_size;
77  uint64_t wait_ns;
78  int verbose;
79 
80 } test_options_t;
81 
82 typedef struct test_stat_t {
83  uint64_t rounds;
84  uint64_t enqueues;
85  uint64_t events;
86  uint64_t nsec;
87  uint64_t cycles;
88  uint64_t waits;
89  uint64_t dummy_sum;
90  uint8_t failed;
91 
92 } test_stat_t;
93 
94 typedef struct thread_arg_t {
95  void *global;
96  int first_group;
97 
98 } thread_arg_t;
99 
100 typedef struct test_global_t {
101  test_options_t test_options;
102  odp_schedule_config_t schedule_config;
103  odp_barrier_t barrier;
104  odp_pool_t pool;
105  odp_cpumask_t cpumask;
106  odp_shm_t ctx_shm;
107  odp_queue_t queue[MAX_QUEUES];
108  odp_schedule_group_t group[MAX_GROUPS];
109  odph_thread_t thread_tbl[ODP_THREAD_COUNT_MAX];
110  test_stat_t stat[ODP_THREAD_COUNT_MAX];
111  thread_arg_t thread_arg[ODP_THREAD_COUNT_MAX];
112  odp_atomic_u32_t num_worker;
113  odp_atomic_u32_t exit_threads;
114  test_common_options_t common_options;
115 
116 } test_global_t;
117 
118 typedef struct {
119  odp_queue_t next;
120  odp_atomic_u64_t count;
121 } queue_context_t;
122 
123 static test_global_t *test_globals;
124 
125 static void sig_handler(int signum ODP_UNUSED)
126 {
127  odp_atomic_store_u32(&test_globals->exit_threads, 1);
128 }
129 
130 static int setup_sig_handler(void)
131 {
132  struct sigaction action = { .sa_handler = sig_handler };
133 
134  if (sigemptyset(&action.sa_mask) || sigaction(SIGINT, &action, NULL))
135  return -1;
136 
137  return 0;
138 }
139 
140 static void print_usage(void)
141 {
142  printf("\n"
143  "Scheduler performance test\n"
144  "\n"
145  "Usage: odp_sched_perf [options]\n"
146  "\n"
147  " -c, --num_cpu Number of CPUs (worker threads). 0: all available CPUs. Default: 1.\n"
148  " -q, --num_queue Number of queues. Default: 1.\n"
149  " -L, --num_low Number of lowest priority queues out of '--num_queue' queues. Rest of\n"
150  " the queues are default (or highest) priority. Default: 0.\n"
151  " -H, --num_high Number of highest priority queues out of '--num_queue' queues. Rest of\n"
152  " the queues are default (or lowest) priority. Default: 0.\n"
153  " -d, --num_dummy Number of empty queues. Default: 0.\n"
154  " -e, --num_event Number of events per queue. Default: 100.\n"
155  " -s, --num_sched Number of events to schedule per thread. If zero, the application runs\n"
156  " until SIGINT is received. Default: 100 000.\n"
157  " -g, --num_group Number of schedule groups. Round robins threads and queues into groups.\n"
158  " -1: SCHED_GROUP_WORKER\n"
159  " 0: SCHED_GROUP_ALL (default)\n"
160  " -j, --num_join Number of groups a thread joins. Threads are divide evenly into groups,\n"
161  " if num_cpu is multiple of num_group and num_group is multiple of num_join.\n"
162  " 0: join all groups (default)\n"
163  " -b, --burst Maximum number of events per operation. Default: 100.\n"
164  " -t, --type Queue type. 0: parallel, 1: atomic, 2: ordered. Default: 0.\n"
165  " -f, --forward 0: Keep event in the original queue, 1: Forward event to the next queue. Default: 0.\n"
166  " -F, --fairness 0: Don't count events per queue, 1: Count and report events relative to average. Default: 0.\n"
167  " -w, --wait_ns Number of nsec to wait before enqueueing events. Default: 0.\n"
168  " -S, --stress CPU stress function(s) to be called for each event data word (requires -n or -m).\n"
169  " Data is processed as uint32_t words. Multiple flags may be selected.\n"
170  " 0: No extra data processing (default)\n"
171  " 0x1: Calculate square of each uint32_t\n"
172  " 0x2: Calculate log2 of each uint32_t\n"
173  " 0x4: Calculate square root of each uint32_t\n"
174  " 0x8: Calculate square root of each uint32_t in floating point\n"
175  " -k, --ctx_rd_words Number of queue context words (uint64_t) to read on every event. Default: 0.\n"
176  " -l, --ctx_rw_words Number of queue context words (uint64_t) to modify on every event. Default: 0.\n"
177  " -n, --rd_words Number of event data words (uint64_t) to read before enqueueing it. Default: 0.\n"
178  " -m, --rw_words Number of event data words (uint64_t) to modify before enqueueing it. Default: 0.\n"
179  " -u, --uarea_rd Number of user area words (uint64_t) to read on every event. Default: 0.\n"
180  " -U, --uarea_rw Number of user area words (uint64_t) to modify on every event. Default: 0.\n"
181  " -p, --pool_type Pool type. 0: buffer, 1: packet. Default: 0.\n"
182  " -v, --verbose Verbose output.\n"
183  " -h, --help This help\n"
184  "\n");
185 }
186 
187 static int parse_options(int argc, char *argv[], test_options_t *test_options)
188 {
189  int opt, num_group, num_join;
190  int ret = 0;
191  uint32_t ctx_size = 0;
192  int pool_type = 0;
193 
194  static const struct option longopts[] = {
195  {"num_cpu", required_argument, NULL, 'c'},
196  {"num_queue", required_argument, NULL, 'q'},
197  {"num_low", required_argument, NULL, 'L'},
198  {"num_high", required_argument, NULL, 'H'},
199  {"num_dummy", required_argument, NULL, 'd'},
200  {"num_event", required_argument, NULL, 'e'},
201  {"num_sched", required_argument, NULL, 's'},
202  {"num_group", required_argument, NULL, 'g'},
203  {"num_join", required_argument, NULL, 'j'},
204  {"burst", required_argument, NULL, 'b'},
205  {"type", required_argument, NULL, 't'},
206  {"forward", required_argument, NULL, 'f'},
207  {"fairness", required_argument, NULL, 'F'},
208  {"wait_ns", required_argument, NULL, 'w'},
209  {"stress", required_argument, NULL, 'S'},
210  {"ctx_rd_words", required_argument, NULL, 'k'},
211  {"ctx_rw_words", required_argument, NULL, 'l'},
212  {"rd_words", required_argument, NULL, 'n'},
213  {"rw_words", required_argument, NULL, 'm'},
214  {"uarea_rd", required_argument, NULL, 'u'},
215  {"uarea_rw", required_argument, NULL, 'U'},
216  {"pool_type", required_argument, NULL, 'p'},
217  {"verbose", no_argument, NULL, 'v'},
218  {"help", no_argument, NULL, 'h'},
219  {NULL, 0, NULL, 0}
220  };
221 
222  static const char *shortopts = "+c:q:L:H:d:e:s:g:j:b:t:f:F:w:S:k:l:n:m:p:u:U:vh";
223 
224  test_options->num_cpu = 1;
225  test_options->num_queue = 1;
226  test_options->num_low = 0;
227  test_options->num_high = 0;
228  test_options->num_dummy = 0;
229  test_options->num_event = 100;
230  test_options->num_sched = 100000;
231  test_options->num_group = 0;
232  test_options->num_join = 0;
233  test_options->max_burst = 100;
234  test_options->queue_type = 0;
235  test_options->forward = 0;
236  test_options->fairness = 0;
237  test_options->stress = 0;
238  test_options->ctx_rd_words = 0;
239  test_options->ctx_rw_words = 0;
240  test_options->rd_words = 0;
241  test_options->rw_words = 0;
242  test_options->uarea_rd = 0;
243  test_options->uarea_rw = 0;
244  test_options->wait_ns = 0;
245  test_options->verbose = 0;
246 
247  while (1) {
248  opt = getopt_long(argc, argv, shortopts, longopts, NULL);
249 
250  if (opt == -1)
251  break;
252 
253  switch (opt) {
254  case 'c':
255  test_options->num_cpu = atoi(optarg);
256  break;
257  case 'q':
258  test_options->num_queue = atoi(optarg);
259  break;
260  case 'L':
261  test_options->num_low = atoi(optarg);
262  break;
263  case 'H':
264  test_options->num_high = atoi(optarg);
265  break;
266  case 'd':
267  test_options->num_dummy = atoi(optarg);
268  break;
269  case 'e':
270  test_options->num_event = atoi(optarg);
271  break;
272  case 's':
273  test_options->num_sched = atoi(optarg);
274  break;
275  case 'g':
276  test_options->num_group = atoi(optarg);
277  break;
278  case 'j':
279  test_options->num_join = atoi(optarg);
280  break;
281  case 'b':
282  test_options->max_burst = atoi(optarg);
283  break;
284  case 't':
285  test_options->queue_type = atoi(optarg);
286  break;
287  case 'f':
288  test_options->forward = atoi(optarg);
289  break;
290  case 'F':
291  test_options->fairness = atoi(optarg);
292  break;
293  case 'S':
294  test_options->stress = strtoul(optarg, NULL, 0);
295  break;
296  case 'k':
297  test_options->ctx_rd_words = atoi(optarg);
298  break;
299  case 'l':
300  test_options->ctx_rw_words = atoi(optarg);
301  break;
302  case 'n':
303  test_options->rd_words = atoi(optarg);
304  break;
305  case 'm':
306  test_options->rw_words = atoi(optarg);
307  break;
308  case 'u':
309  test_options->uarea_rd = atoi(optarg);
310  break;
311  case 'U':
312  test_options->uarea_rw = atoi(optarg);
313  break;
314  case 'p':
315  pool_type = atoi(optarg);
316  break;
317  case 'w':
318  test_options->wait_ns = atoll(optarg);
319  break;
320  case 'v':
321  test_options->verbose = 1;
322  break;
323  case 'h':
324  /* fall through */
325  default:
326  print_usage();
327  ret = -1;
328  break;
329  }
330  }
331  if (pool_type == 0) {
332  test_options->pool_type = ODP_POOL_BUFFER;
333  } else if (pool_type == 1) {
334  test_options->pool_type = ODP_POOL_PACKET;
335  } else {
336  ODPH_ERR("Invalid pool type: %d.\n", pool_type);
337  ret = -1;
338  }
339 
340  test_options->touch_data = test_options->rd_words ||
341  test_options->rw_words;
342 
343  if (test_options->stress && test_options->touch_data == 0) {
344  ODPH_ERR("Use -n or/and -m to select event data size with a stress function\n");
345  ret = -1;
346  }
347 
348  if ((test_options->num_queue + test_options->num_dummy) > MAX_QUEUES) {
349  ODPH_ERR("Too many queues. Max supported %i.\n", MAX_QUEUES);
350  ret = -1;
351  }
352 
353  if ((test_options->num_low + test_options->num_high) > test_options->num_queue) {
354  ODPH_ERR("Number of low/high prio %u/%u exceed number of queues %u.\n",
355  test_options->num_low, test_options->num_high, test_options->num_queue);
356  ret = -1;
357  }
358 
359  num_group = test_options->num_group;
360  num_join = test_options->num_join;
361  if (num_group > MAX_GROUPS) {
362  ODPH_ERR("Too many groups. Max supported %i.\n", MAX_GROUPS);
363  ret = -1;
364  }
365 
366  if (num_group > 0 && num_join > num_group) {
367  ODPH_ERR("num_join (%i) larger than num_group (%i).\n", num_join, num_group);
368  ret = -1;
369  }
370 
371  if (num_join && num_group > (int)(test_options->num_cpu * num_join)) {
372  printf("WARNING: Too many groups (%i). Some groups (%i) are not served.\n\n",
373  num_group, num_group - (test_options->num_cpu * num_join));
374 
375  if (test_options->forward) {
376  printf("Error: Cannot forward when some queues are not served.\n");
377  ret = -1;
378  }
379  }
380 
381  test_options->tot_queue = test_options->num_queue +
382  test_options->num_dummy;
383  test_options->tot_event = test_options->num_queue *
384  test_options->num_event;
385 
386  test_options->queue_size = test_options->num_event;
387 
388  if (test_options->forward) {
389  /* When forwarding, all events may end up into
390  * a single queue */
391  test_options->queue_size = test_options->tot_event;
392  }
393 
394  if (test_options->forward || test_options->fairness)
395  ctx_size = sizeof(queue_context_t);
396 
397  if (test_options->ctx_rd_words || test_options->ctx_rw_words) {
398  /* Round up queue handle size to a multiple of 8 for correct
399  * context data alignment */
400  ctx_size = ROUNDUP(ctx_size, 8);
401  ctx_size += 8 * test_options->ctx_rd_words;
402  ctx_size += 8 * test_options->ctx_rw_words;
403  }
404 
405  /* When context data is modified, round up to cache line size to avoid
406  * false sharing */
407  if (test_options->fairness || test_options->ctx_rw_words)
408  ctx_size = ROUNDUP(ctx_size, ODP_CACHE_LINE_SIZE);
409 
410  test_options->ctx_size = ctx_size;
411  test_options->uarea_size = 8 * (test_options->uarea_rd + test_options->uarea_rw);
412  test_options->tot_rd_size = 8 * (test_options->ctx_rd_words + test_options->uarea_rd +
413  test_options->rd_words);
414  test_options->tot_rw_size = 8 * (test_options->ctx_rw_words + test_options->uarea_rw +
415  test_options->rw_words);
416 
417  return ret;
418 }
419 
420 static int set_num_cpu(test_global_t *global)
421 {
422  int ret;
423  test_options_t *test_options = &global->test_options;
424  int num_cpu = test_options->num_cpu;
425 
426  /* One thread used for the main thread */
427  if (num_cpu > ODP_THREAD_COUNT_MAX - 1) {
428  printf("Error: Too many workers. Maximum is %i.\n",
430  return -1;
431  }
432 
433  ret = odp_cpumask_default_worker(&global->cpumask, num_cpu);
434 
435  if (num_cpu && ret != num_cpu) {
436  printf("Error: Too many workers. Max supported %i\n.", ret);
437  return -1;
438  }
439 
440  /* Zero: all available workers */
441  if (num_cpu == 0) {
442  num_cpu = ret;
443  test_options->num_cpu = num_cpu;
444  }
445 
446  odp_barrier_init(&global->barrier, num_cpu);
447 
448  return 0;
449 }
450 
451 static uint64_t init_data(uint64_t init, uint64_t *data, uint32_t words)
452 {
453  uint32_t i;
454  uint64_t val = init;
455 
456  for (i = 0; i < words; i++) {
457  data[i] = val;
458  val = (val + 1) & DATA_MASK;
459  }
460 
461  return val;
462 }
463 
464 static void print_options(test_options_t *options)
465 {
466  printf("\nScheduler performance test\n");
467  printf(" num sched %u\n", options->num_sched);
468  printf(" num cpu %u\n", options->num_cpu);
469  printf(" num queues %u\n", options->num_queue);
470  printf(" num lowest prio queues %u\n", options->num_low);
471  printf(" num highest prio queues %u\n", options->num_high);
472  printf(" num empty queues %u\n", options->num_dummy);
473  printf(" total queues %u\n", options->tot_queue);
474  printf(" num groups %i", options->num_group);
475 
476  if (options->num_group == -1)
477  printf(" (ODP_SCHED_GROUP_WORKER)\n");
478  else if (options->num_group == 0)
479  printf(" (ODP_SCHED_GROUP_ALL)\n");
480  else
481  printf("\n");
482 
483  printf(" num join %u\n", options->num_join);
484  printf(" forward events %i\n", options->forward ? 1 : 0);
485  printf(" wait %" PRIu64 " nsec\n", options->wait_ns);
486  printf(" events per queue %u\n", options->num_event);
487  printf(" queue size %u\n", options->queue_size);
488  printf(" max burst size %u\n", options->max_burst);
489  printf(" total events %u\n", options->tot_event);
490  printf(" stress 0x%x\n", options->stress);
491 
492  printf(" event size %u bytes", options->event_size);
493  if (options->touch_data)
494  printf(" (rd: %u, rw: %u)", 8 * options->rd_words, 8 * options->rw_words);
495  printf("\n");
496 
497  printf(" queue context size %u bytes", options->ctx_size);
498  if (options->ctx_rd_words || options->ctx_rw_words) {
499  printf(" (rd: %u, rw: %u)",
500  8 * options->ctx_rd_words,
501  8 * options->ctx_rw_words);
502  }
503  printf("\n");
504 
505  printf(" user area size %u bytes", options->uarea_size);
506  if (options->uarea_size)
507  printf(" (rd: %u, rw: %u)", 8 * options->uarea_rd, 8 * options->uarea_rw);
508  printf("\n");
509 
510  printf(" pool type %s\n", options->pool_type == ODP_POOL_BUFFER ?
511  "buffer" : "packet");
512 
513  printf(" queue type %s\n\n", options->queue_type == 0 ? "parallel" :
514  options->queue_type == 1 ? "atomic" :
515  "ordered");
516 
517  printf("Extra rd/rw ops per event (queue context + user area + event data)\n");
518  printf(" read %u bytes\n", options->tot_rd_size);
519  printf(" write %u bytes\n\n", options->tot_rw_size);
520 }
521 
522 static int create_pool(test_global_t *global)
523 {
524  odp_pool_capability_t pool_capa;
525  odp_pool_param_t pool_param;
526  odp_pool_t pool;
527  uint32_t max_num, max_size, max_uarea;
528  test_options_t *test_options = &global->test_options;
529  uint32_t tot_event = test_options->tot_event;
530  uint32_t event_size = 16;
531  uint32_t uarea_size = test_options->uarea_size;
532 
533  if (test_options->touch_data) {
534  event_size = test_options->rd_words + test_options->rw_words;
535  event_size = 8 * event_size;
536  }
537  test_options->event_size = event_size;
538 
539  if (odp_pool_capability(&pool_capa)) {
540  ODPH_ERR("Error: pool capa failed\n");
541  return -1;
542  }
543 
544  if (test_options->pool_type == ODP_POOL_BUFFER) {
545  max_num = pool_capa.buf.max_num;
546  max_size = pool_capa.buf.max_size;
547  max_uarea = pool_capa.buf.max_uarea_size;
548  } else {
549  max_num = pool_capa.pkt.max_num;
550  max_size = pool_capa.pkt.max_seg_len;
551  max_uarea = pool_capa.pkt.max_uarea_size;
552  }
553 
554  if (max_num && tot_event > max_num) {
555  ODPH_ERR("Error: max events supported %u\n", max_num);
556  return -1;
557  }
558 
559  if (max_size && event_size > max_size) {
560  ODPH_ERR("Error: max supported event size %u\n", max_size);
561  return -1;
562  }
563 
564  if (uarea_size > max_uarea) {
565  ODPH_ERR("Error: max supported user area size %u\n", max_uarea);
566  return -1;
567  }
568 
569  odp_pool_param_init(&pool_param);
570  if (test_options->pool_type == ODP_POOL_BUFFER) {
571  pool_param.type = ODP_POOL_BUFFER;
572  pool_param.buf.num = tot_event;
573  pool_param.buf.size = event_size;
574  pool_param.buf.align = 8;
575  pool_param.buf.uarea_size = uarea_size;
576  } else {
577  pool_param.type = ODP_POOL_PACKET;
578  pool_param.pkt.num = tot_event;
579  pool_param.pkt.len = event_size;
580  pool_param.pkt.seg_len = event_size;
581  pool_param.pkt.align = 8;
582  pool_param.pkt.uarea_size = uarea_size;
583  }
584 
585  pool = odp_pool_create("sched perf", &pool_param);
586  if (pool == ODP_POOL_INVALID) {
587  ODPH_ERR("Error: pool create failed\n");
588  return -1;
589  }
590 
591  global->pool = pool;
592 
593  return 0;
594 }
595 
596 static int create_groups(test_global_t *global)
597 {
598  odp_schedule_capability_t sched_capa;
599  odp_thrmask_t thrmask;
600  uint32_t i;
601  test_options_t *test_options = &global->test_options;
602  uint32_t num_group = test_options->num_group;
603 
604  if (test_options->num_group <= 0)
605  return 0;
606 
607  if (odp_schedule_capability(&sched_capa)) {
608  printf("Error: schedule capability failed\n");
609  return -1;
610  }
611 
612  if (num_group > sched_capa.max_groups) {
613  printf("Error: Too many sched groups (max_groups capa %u)\n",
614  sched_capa.max_groups);
615  return -1;
616  }
617 
618  odp_thrmask_zero(&thrmask);
619 
620  for (i = 0; i < num_group; i++) {
621  odp_schedule_group_t group;
622 
623  group = odp_schedule_group_create("test_group", &thrmask);
624 
625  if (group == ODP_SCHED_GROUP_INVALID) {
626  printf("Error: Group create failed %u\n", i);
627  return -1;
628  }
629 
630  global->group[i] = group;
631  }
632 
633  return 0;
634 }
635 
636 static int create_queues(test_global_t *global)
637 {
638  odp_queue_param_t queue_param;
639  odp_queue_t queue;
640  odp_schedule_sync_t sync;
641  odp_schedule_prio_t prio;
642  uint32_t i, j, first;
643  test_options_t *test_options = &global->test_options;
644  uint32_t event_size = test_options->event_size;
645  uint32_t num_event = test_options->num_event;
646  uint32_t queue_size = test_options->queue_size;
647  uint32_t tot_queue = test_options->tot_queue;
648  uint32_t num_low = test_options->num_low;
649  uint32_t num_high = test_options->num_high;
650  uint32_t num_default = test_options->num_queue - num_low - num_high;
651  int num_group = test_options->num_group;
652  int type = test_options->queue_type;
653  odp_pool_t pool = global->pool;
654  uint8_t *ctx = NULL;
655  uint32_t ctx_size = test_options->ctx_size;
656  uint64_t init_val = 0;
657 
658  if (type == 0)
660  else if (type == 1)
661  sync = ODP_SCHED_SYNC_ATOMIC;
662  else
663  sync = ODP_SCHED_SYNC_ORDERED;
664 
665  if (tot_queue > global->schedule_config.num_queues) {
666  printf("Max queues supported %u\n",
667  global->schedule_config.num_queues);
668  return -1;
669  }
670 
671  if (global->schedule_config.queue_size &&
672  queue_size > global->schedule_config.queue_size) {
673  printf("Max queue size %u\n",
674  global->schedule_config.queue_size);
675  return -1;
676  }
677 
678  if (ctx_size) {
679  ctx = odp_shm_addr(global->ctx_shm);
680  if (ctx == NULL) {
681  printf("Bad queue context\n");
682  return -1;
683  }
684  }
685 
686  odp_queue_param_init(&queue_param);
687  queue_param.type = ODP_QUEUE_TYPE_SCHED;
688  queue_param.sched.sync = sync;
689  queue_param.size = queue_size;
690  if (num_group == -1)
691  queue_param.sched.group = ODP_SCHED_GROUP_WORKER;
692  else
693  queue_param.sched.group = ODP_SCHED_GROUP_ALL;
694 
695  first = test_options->num_dummy;
696 
697  for (i = 0; i < tot_queue; i++) {
698  if (num_group > 0) {
699  odp_schedule_group_t group;
700 
701  /* Divide all queues evenly into groups */
702  group = global->group[i % num_group];
703  queue_param.sched.group = group;
704  }
705 
706  /* Create low, high and default queues in a mixed order. Dummy queues are created
707  * first and with default priority. */
708  prio = odp_schedule_default_prio();
709  if (i >= first) {
710  switch (i % 3) {
711  case 0:
712  if (num_low) {
713  num_low--;
714  prio = odp_schedule_min_prio();
715  } else if (num_high) {
716  num_high--;
717  prio = odp_schedule_max_prio();
718  } else {
719  num_default--;
720  }
721  break;
722  case 1:
723  if (num_high) {
724  num_high--;
725  prio = odp_schedule_max_prio();
726  } else if (num_low) {
727  num_low--;
728  prio = odp_schedule_min_prio();
729  } else {
730  num_default--;
731  }
732  break;
733  default:
734  if (num_default) {
735  num_default--;
736  } else if (num_high) {
737  num_high--;
738  prio = odp_schedule_max_prio();
739  } else {
740  num_low--;
741  prio = odp_schedule_min_prio();
742  }
743  break;
744  }
745  }
746 
747  queue_param.sched.prio = prio;
748 
749  queue = odp_queue_create(NULL, &queue_param);
750 
751  global->queue[i] = queue;
752 
753  if (queue == ODP_QUEUE_INVALID) {
754  printf("Error: Queue create failed %u\n", i);
755  return -1;
756  }
757  }
758 
759  /* Store events into queues. Dummy queues are allocated from
760  * the beginning of the array, so that usage of those affect allocation
761  * of active queues. Dummy queues are left empty. */
762  for (i = first; i < tot_queue; i++) {
763  queue = global->queue[i];
764 
765  if (ctx_size) {
766  /*
767  * Cast increases alignment, but it's ok, since ctx and
768  * ctx_size are both cache line aligned.
769  */
770  queue_context_t *qc = (queue_context_t *)(uintptr_t)ctx;
771 
772  if (test_options->forward) {
773  uint32_t next = i + 1;
774 
775  if (next == tot_queue)
776  next = first;
777 
778  qc->next = global->queue[next];
779  }
780 
781  if (test_options->fairness)
782  odp_atomic_init_u64(&qc->count, 0);
783 
784  if (odp_queue_context_set(queue, ctx, ctx_size)) {
785  printf("Error: Context set failed %u\n", i);
786  return -1;
787  }
788 
789  ctx += ctx_size;
790  }
791 
792  for (j = 0; j < num_event; j++) {
793  odp_event_t ev;
794  uint64_t *data;
795  uint32_t words;
796 
797  if (test_options->pool_type == ODP_POOL_BUFFER) {
798  odp_buffer_t buf = odp_buffer_alloc(pool);
799 
800  if (buf == ODP_BUFFER_INVALID) {
801  ODPH_ERR("Error: alloc failed %u/%u\n", i, j);
802  return -1;
803  }
804  ev = odp_buffer_to_event(buf);
805 
806  data = odp_buffer_addr(buf);
807  words = odp_buffer_size(buf) / 8;
808  } else {
809  odp_packet_t pkt = odp_packet_alloc(pool, event_size);
810 
811  if (pkt == ODP_PACKET_INVALID) {
812  ODPH_ERR("Error: alloc failed %u/%u\n", i, j);
813  return -1;
814  }
815  ev = odp_packet_to_event(pkt);
816 
817  data = odp_packet_data(pkt);
818  words = odp_packet_seg_len(pkt) / 8;
819  }
820 
821  init_val = init_data(init_val, data, words);
822 
823  if (odp_queue_enq(queue, ev)) {
824  ODPH_ERR("Error: enqueue failed %u/%u\n", i, j);
825  return -1;
826  }
827  }
828  }
829 
830  return 0;
831 }
832 
833 static int join_group(test_global_t *global, int grp_index, int thr)
834 {
835  odp_thrmask_t thrmask;
836  odp_schedule_group_t group;
837 
838  odp_thrmask_zero(&thrmask);
839  odp_thrmask_set(&thrmask, thr);
840  group = global->group[grp_index];
841 
842  if (odp_schedule_group_join(group, &thrmask)) {
843  printf("Error: Group %i join failed (thr %i)\n",
844  grp_index, thr);
845  return -1;
846  }
847 
848  return 0;
849 }
850 
851 static int join_all_groups(test_global_t *global, int thr)
852 {
853  int i;
854  test_options_t *test_options = &global->test_options;
855  int num_group = test_options->num_group;
856 
857  if (num_group <= 0)
858  return 0;
859 
860  for (i = 0; i < num_group; i++) {
861  if (join_group(global, i, thr)) {
862  printf("Error: Group %u join failed (thr %i)\n",
863  i, thr);
864  return -1;
865  }
866  }
867 
868  return 0;
869 }
870 
871 static void print_queue_fairness(test_global_t *global)
872 {
873  uint32_t i;
874  queue_context_t *ctx;
875  test_options_t *test_options = &global->test_options;
876  uint32_t first = test_options->num_dummy;
877  uint32_t num_queue = test_options->num_queue;
878  uint32_t tot_queue = test_options->tot_queue;
879  uint64_t total = 0;
880  double average;
881 
882  if (!test_options->fairness)
883  return;
884 
885  for (i = first; i < tot_queue; i++) {
886  ctx = odp_queue_context(global->queue[i]);
887  total += odp_atomic_load_u64(&ctx->count);
888  }
889 
890  average = (double)total / (double)num_queue;
891 
892  printf("\n");
893  printf("RESULTS - events per queue (percent of average):\n");
894  printf("------------------------------------------------\n");
895  printf(" 1 2 3 4 5 6 7 8 9 10");
896 
897  for (i = first; i < tot_queue; i++) {
898  ctx = odp_queue_context(global->queue[i]);
899 
900  if ((i % 10) == 0)
901  printf("\n ");
902 
903  printf("%6.1f ", (double)odp_atomic_load_u64(&ctx->count) /
904  average * 100.0);
905  }
906 
907  printf("\n");
908 }
909 
910 static int destroy_queues(test_global_t *global)
911 {
912  uint32_t i;
913  odp_event_t ev;
914  uint64_t wait;
915  test_options_t *test_options = &global->test_options;
916  uint32_t tot_queue = test_options->tot_queue;
917  int thr = odp_thread_id();
918 
919  if (join_all_groups(global, thr))
920  return -1;
921 
923 
924  while ((ev = odp_schedule(NULL, wait)) != ODP_EVENT_INVALID)
925  odp_event_free(ev);
926 
927  for (i = 0; i < tot_queue; i++) {
928  if (global->queue[i] != ODP_QUEUE_INVALID) {
929  if (odp_queue_destroy(global->queue[i])) {
930  printf("Error: Queue destroy failed %u\n", i);
931  return -1;
932  }
933  }
934  }
935 
936  return 0;
937 }
938 
939 static int destroy_groups(test_global_t *global)
940 {
941  int i;
942  test_options_t *test_options = &global->test_options;
943  int num_group = test_options->num_group;
944 
945  if (num_group <= 0)
946  return 0;
947 
948  for (i = 0; i < num_group; i++) {
949  odp_schedule_group_t group = global->group[i];
950 
951  if (odp_schedule_group_destroy(group)) {
952  printf("Error: Group destroy failed %u\n", i);
953  return -1;
954  }
955  }
956 
957  return 0;
958 }
959 
960 static uint64_t rw_uarea(odp_event_t ev[], int num, uint32_t rd_words, uint32_t rw_words)
961 {
962  uint64_t *data;
963  int i;
964  uint32_t j;
965  uint64_t sum = 0;
966 
967  for (i = 0; i < num; i++) {
968  data = odp_event_user_area(ev[i]);
969 
970  for (j = 0; j < rd_words; j++)
971  sum += data[j];
972 
973  for (; j < rd_words + rw_words; j++) {
974  sum += data[j];
975  data[j] += 1;
976  }
977  }
978 
979  return sum;
980 }
981 
982 static inline uint64_t rw_ctx_data(void *ctx, uint32_t offset,
983  uint32_t rd_words, uint32_t rw_words)
984 {
985  uint64_t *data;
986  uint32_t i;
987  uint64_t sum = 0;
988 
989  data = (uint64_t *)(uintptr_t)((uint8_t *)ctx + offset);
990 
991  for (i = 0; i < rd_words; i++)
992  sum += data[i];
993 
994  for (; i < rd_words + rw_words; i++) {
995  sum += data[i];
996  data[i] += 1;
997  }
998 
999  return sum;
1000 }
1001 
1002 static uint64_t rw_data(odp_event_t ev[], int num, uint32_t rd_words, uint32_t rw_words,
1003  odp_pool_type_t pool_type)
1004 {
1005  uint64_t *data;
1006  uint32_t j;
1007  uint64_t sum = 0;
1008 
1009  for (int i = 0; i < num; i++) {
1010  if (pool_type == ODP_POOL_BUFFER)
1011  data = odp_buffer_addr(odp_buffer_from_event(ev[i]));
1012  else
1013  data = odp_packet_data(odp_packet_from_event(ev[i]));
1014 
1015  for (j = 0; j < rd_words; j++)
1016  sum += data[j];
1017 
1018  for (; j < rd_words + rw_words; j++) {
1019  sum += data[j];
1020  data[j] += 1;
1021  }
1022  }
1023 
1024  return sum;
1025 }
1026 
1027 static uint64_t rw_data_stress(odp_event_t ev[], int num, uint32_t rd_words, uint32_t rw_words,
1028  uint32_t stress, odp_pool_type_t pool_type)
1029 {
1030  uint64_t *data;
1031  uint64_t word;
1032  uint32_t j;
1033  uint64_t sum = 0;
1034 
1035  for (int i = 0; i < num; i++) {
1036  if (pool_type == ODP_POOL_BUFFER)
1037  data = odp_buffer_addr(odp_buffer_from_event(ev[i]));
1038  else
1039  data = odp_packet_data(odp_packet_from_event(ev[i]));
1040 
1041  for (j = 0; j < rd_words + rw_words; j++) {
1042  word = data[j];
1043 
1044  if (stress & 0x1)
1045  sum += odph_stress_pow2_u32(word);
1046  if (stress & 0x2)
1047  sum += odph_stress_log2_u32(word);
1048  if (stress & 0x4)
1049  sum += odph_stress_sqrt_u32(word);
1050  if (stress & 0x8)
1051  sum += odph_stress_sqrt_f32(word);
1052 
1053  if (j >= rd_words)
1054  data[j] = (word + 1) & DATA_MASK;
1055  }
1056  }
1057 
1058  return sum;
1059 }
1060 
1061 static int test_sched(void *arg)
1062 {
1063  int num, num_enq, ret, thr;
1064  uint32_t i, rounds;
1065  uint64_t c1, c2, cycles, nsec;
1066  uint64_t events, enqueues, waits, events_prev;
1067  odp_time_t t1, t2, last_retry_ts;
1068  odp_queue_t queue;
1069  thread_arg_t *thread_arg = arg;
1070  test_global_t *global = thread_arg->global;
1071  test_options_t *test_options = &global->test_options;
1072  uint32_t num_sched = test_options->num_sched;
1073  uint32_t max_burst = test_options->max_burst;
1074  int num_group = test_options->num_group;
1075  int forward = test_options->forward;
1076  int fairness = test_options->fairness;
1077  const int touch_data = test_options->touch_data;
1078  const uint32_t stress = test_options->stress;
1079  const uint32_t rd_words = test_options->rd_words;
1080  const uint32_t rw_words = test_options->rw_words;
1081  uint32_t ctx_size = test_options->ctx_size;
1082  uint32_t ctx_rd_words = test_options->ctx_rd_words;
1083  uint32_t ctx_rw_words = test_options->ctx_rw_words;
1084  const uint32_t uarea_size = test_options->uarea_size;
1085  const uint32_t uarea_rd = test_options->uarea_rd;
1086  const uint32_t uarea_rw = test_options->uarea_rw;
1087  const odp_pool_type_t pool_type = test_options->pool_type;
1088  int touch_ctx = ctx_rd_words || ctx_rw_words;
1089  odp_atomic_u32_t *exit_threads = &global->exit_threads;
1090  uint32_t ctx_offset = 0;
1091  uint32_t sched_retries = 0;
1092  uint64_t data_sum = 0;
1093  uint64_t ctx_sum = 0;
1094  uint64_t uarea_sum = 0;
1095  uint64_t wait_ns = test_options->wait_ns;
1096  odp_event_t ev[max_burst];
1097 
1098  thr = odp_thread_id();
1099 
1100  if (forward || fairness)
1101  ctx_offset = ROUNDUP(sizeof(queue_context_t), 8);
1102 
1103  if (num_group > 0) {
1104  uint32_t num_join = test_options->num_join;
1105 
1106  if (num_join) {
1107  int pos = 0;
1108  int n = 512;
1109  char str[n];
1110  int group_index = thread_arg->first_group;
1111 
1112  pos += snprintf(&str[pos], n - pos,
1113  "Thread %i joined groups:", thr);
1114 
1115  for (i = 0; i < num_join; i++) {
1116  if (join_group(global, group_index, thr))
1117  return -1;
1118 
1119  pos += snprintf(&str[pos], n - pos, " %i",
1120  group_index);
1121 
1122  group_index = (group_index + 1) % num_group;
1123  }
1124 
1125  printf("%s\n", str);
1126 
1127  } else {
1128  if (join_all_groups(global, thr))
1129  return -1;
1130  }
1131  }
1132 
1133  for (i = 0; i < max_burst; i++)
1134  ev[i] = ODP_EVENT_INVALID;
1135 
1136  enqueues = 0;
1137  events = 0;
1138  events_prev = 0;
1139  waits = 0;
1140  ret = 0;
1141 
1142  /* Start all workers at the same time */
1143  odp_barrier_wait(&global->barrier);
1144 
1145  t1 = odp_time_local();
1146  c1 = odp_cpu_cycles();
1147  last_retry_ts = t1;
1148 
1149  for (rounds = 0; odp_likely(!odp_atomic_load_u32(exit_threads)); rounds++) {
1150  if (odp_unlikely(num_sched && events >= num_sched))
1151  break;
1152 
1153  num = odp_schedule_multi(&queue, ODP_SCHED_NO_WAIT,
1154  ev, max_burst);
1155 
1156  if (odp_likely(num > 0)) {
1157  sched_retries = 0;
1158  events += num;
1159  i = 0;
1160 
1161  if (odp_unlikely(uarea_size))
1162  uarea_sum += rw_uarea(ev, num, uarea_rd, uarea_rw);
1163 
1164  if (odp_unlikely(ctx_size)) {
1165  queue_context_t *ctx = odp_queue_context(queue);
1166 
1167  if (forward)
1168  queue = ctx->next;
1169 
1170  if (fairness)
1171  odp_atomic_add_u64(&ctx->count, num);
1172 
1173  if (odp_unlikely(touch_ctx))
1174  ctx_sum += rw_ctx_data(ctx, ctx_offset,
1175  ctx_rd_words,
1176  ctx_rw_words);
1177  }
1178 
1179  if (odp_unlikely(touch_data)) {
1180  if (stress) {
1181  data_sum += rw_data_stress(ev, num, rd_words, rw_words,
1182  stress, pool_type);
1183  } else {
1184  data_sum += rw_data(ev, num, rd_words, rw_words, pool_type);
1185  }
1186  }
1187 
1188  if (odp_unlikely(wait_ns)) {
1189  waits++;
1190  odp_time_wait_ns(wait_ns);
1191  }
1192 
1193  while (num) {
1194  num_enq = odp_queue_enq_multi(queue, &ev[i],
1195  num);
1196 
1197  if (num_enq < 0) {
1198  printf("Error: Enqueue failed. Round %u\n",
1199  rounds);
1200  odp_event_free_multi(&ev[i], num);
1201  ret = -1;
1202  break;
1203  }
1204 
1205  num -= num_enq;
1206  i += num_enq;
1207  enqueues++;
1208  }
1209 
1210  if (odp_unlikely(ret))
1211  break;
1212 
1213  continue;
1214  } else if (num == 0) {
1215  sched_retries++;
1216  if (odp_unlikely(sched_retries > TIME_CHECK_INTERVAL)) {
1217  odp_time_t cur_time = odp_time_local();
1218 
1219  /* Measure time from the last received event and
1220  * break if MAX_SCHED_WAIT_NS is exceeded */
1221  sched_retries = 0;
1222  if (events_prev != events) {
1223  events_prev = events;
1224  last_retry_ts = cur_time;
1225  } else if (odp_time_diff_ns(cur_time,
1226  last_retry_ts) >
1227  MAX_SCHED_WAIT_NS) {
1228  printf("Error: scheduling timed out\n");
1229  ret = -1;
1230  break;
1231  }
1232  }
1233  }
1234 
1235  /* <0 not specified as an error but checking anyway */
1236  if (num < 0) {
1237  printf("Error: Sched failed. Round %u\n", rounds);
1238  ret = -1;
1239  break;
1240  }
1241  }
1242 
1243  c2 = odp_cpu_cycles();
1244  t2 = odp_time_local();
1245 
1246  nsec = odp_time_diff_ns(t2, t1);
1247  cycles = odp_cpu_cycles_diff(c2, c1);
1248 
1249  /* Update stats*/
1250  global->stat[thr].rounds = rounds;
1251  global->stat[thr].enqueues = enqueues;
1252  global->stat[thr].events = events;
1253  global->stat[thr].nsec = nsec;
1254  global->stat[thr].cycles = cycles;
1255  global->stat[thr].waits = waits;
1256  global->stat[thr].dummy_sum = data_sum + ctx_sum + uarea_sum;
1257  global->stat[thr].failed = ret;
1258 
1259  if (odp_atomic_fetch_dec_u32(&global->num_worker) == 1) {
1260  /* The last worker frees all events. This is needed when the main
1261  * thread cannot do the clean up (ODP_SCHED_GROUP_WORKER). */
1262  odp_event_t event;
1263  uint64_t sched_wait = odp_schedule_wait_time(200 * ODP_TIME_MSEC_IN_NS);
1264 
1265  /* Print queue and scheduler status at the end of the test, before any queues
1266  * are emptied or destroyed. */
1267  if (test_options->verbose) {
1270  }
1271 
1272  while ((event = odp_schedule(NULL, sched_wait)) != ODP_EVENT_INVALID)
1273  odp_event_free(event);
1274  }
1275 
1276  /* Pause scheduling before thread exit */
1278 
1279  while (1) {
1280  ev[0] = odp_schedule(&queue, ODP_SCHED_NO_WAIT);
1281 
1282  if (ev[0] == ODP_EVENT_INVALID)
1283  break;
1284 
1285  if (odp_unlikely(forward))
1286  queue = ((queue_context_t *)odp_queue_context(queue))->next;
1287 
1288  if (odp_queue_enq(queue, ev[0])) {
1289  printf("Error: Queue enqueue failed\n");
1290  odp_event_free(ev[0]);
1291  ret = -1;
1292  }
1293  }
1294 
1295  return ret;
1296 }
1297 
1298 static int start_workers(test_global_t *global, odp_instance_t instance)
1299 {
1300  odph_thread_common_param_t thr_common;
1301  int i, ret;
1302  test_options_t *test_options = &global->test_options;
1303  int num_group = test_options->num_group;
1304  uint32_t num_join = test_options->num_join;
1305  int num_cpu = test_options->num_cpu;
1306  odph_thread_param_t thr_param[num_cpu];
1307 
1308  odp_atomic_init_u32(&global->num_worker, num_cpu);
1309 
1310  memset(global->thread_tbl, 0, sizeof(global->thread_tbl));
1311  odph_thread_common_param_init(&thr_common);
1312 
1313  thr_common.instance = instance;
1314  thr_common.cpumask = &global->cpumask;
1315 
1316  for (i = 0; i < num_cpu; i++) {
1317  odph_thread_param_init(&thr_param[i]);
1318  thr_param[i].start = test_sched;
1319  thr_param[i].arg = &global->thread_arg[i];
1320  thr_param[i].thr_type = ODP_THREAD_WORKER;
1321 
1322  global->thread_arg[i].global = global;
1323  global->thread_arg[i].first_group = 0;
1324 
1325  if (num_group > 0 && num_join) {
1326  /* Each thread joins only num_join groups, starting
1327  * from this group index and wrapping around the group
1328  * table. */
1329  int first_group = (i * num_join) % num_group;
1330 
1331  global->thread_arg[i].first_group = first_group;
1332  }
1333  }
1334 
1335  ret = odph_thread_create(global->thread_tbl, &thr_common, thr_param,
1336  num_cpu);
1337 
1338  if (ret != num_cpu) {
1339  printf("Error: thread create failed %i\n", ret);
1340  return -1;
1341  }
1342 
1343  return 0;
1344 }
1345 
1346 static double measure_wait_time_cycles(uint64_t wait_ns)
1347 {
1348  uint64_t i, c1, c2, diff;
1349  uint64_t rounds;
1350  double wait_cycles;
1351 
1352  if (wait_ns == 0)
1353  return 0.0;
1354 
1355  /* Run measurement for 100msec or at least two times, so that effect
1356  * from CPU frequency scaling is minimized. */
1357  rounds = (100 * ODP_TIME_MSEC_IN_NS) / wait_ns;
1358  if (rounds == 0)
1359  rounds = 2;
1360 
1361  c1 = odp_cpu_cycles();
1362 
1363  for (i = 0; i < rounds; i++)
1364  odp_time_wait_ns(wait_ns);
1365 
1366  c2 = odp_cpu_cycles();
1367  diff = odp_cpu_cycles_diff(c2, c1);
1368  wait_cycles = (double)diff / rounds;
1369 
1370  printf("\nMeasured wait cycles: %.3f\n", wait_cycles);
1371 
1372  return wait_cycles;
1373 }
1374 
1375 static int output_results(test_global_t *global)
1376 {
1377  int i, num;
1378  double rounds_ave, enqueues_ave, events_ave, events_per_sec, nsec_ave, cycles_ave;
1379  double waits_ave, wait_cycles, wait_cycles_ave;
1380  test_options_t *test_options = &global->test_options;
1381  int num_cpu = test_options->num_cpu;
1382  uint64_t wait_ns = test_options->wait_ns;
1383  uint64_t rounds_sum = 0;
1384  uint64_t enqueues_sum = 0;
1385  uint64_t events_sum = 0;
1386  uint64_t nsec_sum = 0;
1387  uint64_t cycles_sum = 0;
1388  uint64_t waits_sum = 0;
1389  uint32_t tot_rd = test_options->tot_rd_size;
1390  uint32_t tot_rw = test_options->tot_rw_size;
1391 
1392  wait_cycles = measure_wait_time_cycles(wait_ns);
1393 
1394  /* Averages */
1395  for (i = 0; i < ODP_THREAD_COUNT_MAX; i++) {
1396  if (global->stat[i].failed) {
1397  num_cpu--;
1398  continue;
1399  }
1400  rounds_sum += global->stat[i].rounds;
1401  enqueues_sum += global->stat[i].enqueues;
1402  events_sum += global->stat[i].events;
1403  nsec_sum += global->stat[i].nsec;
1404  cycles_sum += global->stat[i].cycles;
1405  waits_sum += global->stat[i].waits;
1406  }
1407 
1408  if (rounds_sum == 0 || num_cpu <= 0) {
1409  printf("No results.\n");
1410  return 0;
1411  }
1412 
1413  rounds_ave = rounds_sum / num_cpu;
1414  enqueues_ave = enqueues_sum / num_cpu;
1415  events_ave = events_sum / num_cpu;
1416  nsec_ave = nsec_sum / num_cpu;
1417  cycles_ave = cycles_sum / num_cpu;
1418  waits_ave = waits_sum / num_cpu;
1419  wait_cycles_ave = waits_ave * wait_cycles;
1420  num = 0;
1421 
1422  printf("\n");
1423  printf("RESULTS - per thread (Million events per sec):\n");
1424  printf("----------------------------------------------\n");
1425  printf(" 1 2 3 4 5 6 7 8 9 10");
1426 
1427  for (i = 0; i < ODP_THREAD_COUNT_MAX; i++) {
1428  if (global->stat[i].rounds) {
1429  if ((num % 10) == 0)
1430  printf("\n ");
1431 
1432  if (global->stat[i].failed)
1433  printf(" n/a ");
1434  else
1435  printf("%6.1f ",
1436  (1000.0 * global->stat[i].events) /
1437  global->stat[i].nsec);
1438 
1439  num++;
1440  }
1441  }
1442  printf("\n\n");
1443 
1444  printf("RESULTS - average over %i threads:\n", num_cpu);
1445  printf("----------------------------------\n");
1446  printf(" schedule calls: %.3f\n", rounds_ave);
1447  printf(" enqueue calls: %.3f\n", enqueues_ave);
1448  printf(" duration: %.3f msec\n", nsec_ave / 1000000);
1449  printf(" num cycles: %.3f M\n", cycles_ave / 1000000);
1450  printf(" cycles per round: %.3f\n",
1451  cycles_ave / rounds_ave);
1452  printf(" cycles per event: %.3f\n",
1453  cycles_ave / events_ave);
1454  if (wait_ns) {
1455  printf(" without wait_ns cycles: %.3f\n",
1456  (cycles_ave - wait_cycles_ave) / events_ave);
1457  }
1458  printf(" ave events received: %.3f\n",
1459  events_ave / rounds_ave);
1460  printf(" rounds per sec: %.3f M\n",
1461  (1000.0 * rounds_ave) / nsec_ave);
1462 
1463  events_per_sec = (1000.0 * events_ave) / nsec_ave;
1464  printf(" events per sec: %.3f M\n", events_per_sec);
1465 
1466  printf(" extra reads per sec: %.3f MB\n", tot_rd * events_per_sec);
1467  printf(" extra writes per sec: %.3f MB\n", tot_rw * events_per_sec);
1468 
1469  printf("TOTAL events per sec: %.3f M\n\n",
1470  (1000.0 * events_sum) / nsec_ave);
1471 
1472  if (global->common_options.is_export) {
1473  if (test_common_write("schedule calls,enqueue calls,duration (msec),"
1474  "num cycles (M),cycles per round,cycles per event,"
1475  "ave events received,rounds per sec (M),"
1476  "events per sec (M), TOTAL events per sec (M)\n")) {
1477  ODPH_ERR("Export failed\n");
1478  test_common_write_term();
1479  return -1;
1480  }
1481 
1482  if (test_common_write("%f,%f,%f,%f,%f,%f,%f,%f,%f,%f\n",
1483  rounds_ave, enqueues_ave, nsec_ave / 1000000,
1484  cycles_ave / 1000000, cycles_ave / rounds_ave,
1485  cycles_ave / events_ave, events_ave / rounds_ave,
1486  (1000.0 * rounds_ave) / nsec_ave,
1487  (1000.0 * events_ave) / nsec_ave,
1488  (1000.0 * events_sum) / nsec_ave)) {
1489  ODPH_ERR("Export failed\n");
1490  test_common_write_term();
1491  return -1;
1492  }
1493 
1494  test_common_write_term();
1495  }
1496 
1497  return 0;
1498 }
1499 
1500 int main(int argc, char **argv)
1501 {
1502  odph_helper_options_t helper_options;
1503  odp_instance_t instance;
1504  odp_init_t init;
1505  odp_shm_t shm;
1506  test_global_t *global;
1507  test_common_options_t common_options;
1508 
1509  /* Let helper collect its own arguments (e.g. --odph_proc) */
1510  argc = odph_parse_options(argc, argv);
1511  if (odph_options(&helper_options)) {
1512  ODPH_ERR("Error: Reading ODP helper options failed.\n");
1513  exit(EXIT_FAILURE);
1514  }
1515 
1516  argc = test_common_parse_options(argc, argv);
1517  if (test_common_options(&common_options)) {
1518  ODPH_ERR("Error: Reading test options failed\n");
1519  exit(EXIT_FAILURE);
1520  }
1521 
1522  /* List features not to be used */
1523  odp_init_param_init(&init);
1524  init.not_used.feat.cls = 1;
1525  init.not_used.feat.compress = 1;
1526  init.not_used.feat.crypto = 1;
1527  init.not_used.feat.ipsec = 1;
1528  init.not_used.feat.timer = 1;
1529  init.not_used.feat.tm = 1;
1530 
1531  init.mem_model = helper_options.mem_model;
1532 
1533  /* Init ODP before calling anything else */
1534  if (odp_init_global(&instance, &init, NULL)) {
1535  printf("Error: Global init failed.\n");
1536  return -1;
1537  }
1538 
1539  /* Init this thread */
1540  if (odp_init_local(instance, ODP_THREAD_CONTROL)) {
1541  printf("Error: Local init failed.\n");
1542  return -1;
1543  }
1544 
1545  shm = odp_shm_reserve("sched_perf_global", sizeof(test_global_t), ODP_CACHE_LINE_SIZE, 0);
1546  if (shm == ODP_SHM_INVALID) {
1547  ODPH_ERR("Error: SHM reserve failed.\n");
1548  exit(EXIT_FAILURE);
1549  }
1550 
1551  global = odp_shm_addr(shm);
1552  if (global == NULL) {
1553  ODPH_ERR("Error: SHM alloc failed\n");
1554  exit(EXIT_FAILURE);
1555  }
1556  test_globals = global;
1557 
1558  memset(global, 0, sizeof(test_global_t));
1559  global->pool = ODP_POOL_INVALID;
1560  global->ctx_shm = ODP_SHM_INVALID;
1561  odp_atomic_init_u32(&global->exit_threads, 0);
1562 
1563  global->common_options = common_options;
1564 
1565  if (setup_sig_handler()) {
1566  ODPH_ERR("Error: signal handler setup failed\n");
1567  exit(EXIT_FAILURE);
1568  }
1569 
1570  if (parse_options(argc, argv, &global->test_options))
1571  return -1;
1572 
1574 
1575  if (global->test_options.ctx_size) {
1576  uint64_t size = (uint64_t)global->test_options.ctx_size *
1577  global->test_options.tot_queue;
1578 
1579  global->ctx_shm = odp_shm_reserve("queue contexts", size,
1580  ODP_CACHE_LINE_SIZE, 0);
1581  if (global->ctx_shm == ODP_SHM_INVALID) {
1582  printf("Error: SHM reserve %" PRIu64 " bytes failed\n",
1583  size);
1584  return -1;
1585  }
1586  }
1587 
1588  odp_schedule_config_init(&global->schedule_config);
1589  odp_schedule_config(&global->schedule_config);
1590 
1591  if (set_num_cpu(global))
1592  return -1;
1593 
1594  if (create_pool(global))
1595  return -1;
1596 
1597  if (create_groups(global))
1598  return -1;
1599 
1600  if (create_queues(global))
1601  return -1;
1602 
1603  if (global->test_options.verbose)
1605 
1606  print_options(&global->test_options);
1607 
1608  /* Start workers */
1609  start_workers(global, instance);
1610 
1611  /* Wait workers to exit */
1612  odph_thread_join(global->thread_tbl, global->test_options.num_cpu);
1613 
1614  print_queue_fairness(global);
1615 
1616  if (destroy_queues(global))
1617  return -1;
1618 
1619  if (destroy_groups(global))
1620  return -1;
1621 
1622  if (output_results(global))
1623  return -1;
1624 
1625  if (odp_pool_destroy(global->pool)) {
1626  printf("Error: Pool destroy failed.\n");
1627  return -1;
1628  }
1629 
1630  if (global->ctx_shm != ODP_SHM_INVALID)
1631  odp_shm_free(global->ctx_shm);
1632 
1633  if (odp_shm_free(shm)) {
1634  ODPH_ERR("Error: SHM free failed.\n");
1635  exit(EXIT_FAILURE);
1636  }
1637 
1638  if (odp_term_local()) {
1639  printf("Error: term local failed.\n");
1640  return -1;
1641  }
1642 
1643  if (odp_term_global(instance)) {
1644  printf("Error: term global failed.\n");
1645  return -1;
1646  }
1647 
1648  return 0;
1649 }
void odp_atomic_init_u32(odp_atomic_u32_t *atom, uint32_t val)
Initialize atomic uint32 variable.
uint32_t odp_atomic_load_u32(odp_atomic_u32_t *atom)
Load value of atomic uint32 variable.
void odp_atomic_init_u64(odp_atomic_u64_t *atom, uint64_t val)
Initialize atomic uint64 variable.
void odp_atomic_store_u32(odp_atomic_u32_t *atom, uint32_t val)
Store value to atomic uint32 variable.
uint32_t odp_atomic_fetch_dec_u32(odp_atomic_u32_t *atom)
Fetch and decrement atomic uint32 variable.
void odp_atomic_add_u64(odp_atomic_u64_t *atom, uint64_t val)
Add to atomic uint64 variable.
uint64_t odp_atomic_load_u64(odp_atomic_u64_t *atom)
Load value of atomic uint64 variable.
void odp_barrier_init(odp_barrier_t *barr, int count)
Initialize barrier with thread count.
void odp_barrier_wait(odp_barrier_t *barr)
Synchronize thread execution on barrier.
uint32_t odp_buffer_size(odp_buffer_t buf)
Buffer maximum data size.
odp_event_t odp_buffer_to_event(odp_buffer_t buf)
Convert buffer handle to event.
odp_buffer_t odp_buffer_alloc(odp_pool_t pool)
Buffer alloc.
void * odp_buffer_addr(odp_buffer_t buf)
Buffer start address.
odp_buffer_t odp_buffer_from_event(odp_event_t ev)
Get buffer handle from event.
#define ODP_BUFFER_INVALID
Invalid buffer.
#define odp_unlikely(x)
Branch unlikely taken.
Definition: spec/hints.h:64
#define ODP_UNUSED
Intentionally unused variables of functions.
Definition: spec/hints.h:54
#define odp_likely(x)
Branch likely taken.
Definition: spec/hints.h:59
uint64_t odp_cpu_cycles_diff(uint64_t c2, uint64_t c1)
CPU cycle count difference.
uint64_t odp_cpu_cycles(void)
Current CPU cycle count.
int odp_cpumask_default_worker(odp_cpumask_t *mask, int num)
Default CPU mask for worker threads.
void odp_event_free_multi(const odp_event_t event[], int num)
Free multiple events.
void odp_event_free(odp_event_t event)
Free event.
void * odp_event_user_area(odp_event_t event)
Event user area.
#define ODP_EVENT_INVALID
Invalid event.
void odp_init_param_init(odp_init_t *param)
Initialize the odp_init_t to default values for all fields.
int odp_init_local(odp_instance_t instance, odp_thread_type_t thr_type)
Thread local ODP initialization.
int odp_init_global(odp_instance_t *instance, const odp_init_t *params, const odp_platform_init_t *platform_params)
Global ODP initialization.
int odp_term_local(void)
Thread local ODP termination.
int odp_term_global(odp_instance_t instance)
Global ODP termination.
uint64_t odp_instance_t
ODP instance ID.
odp_event_t odp_packet_to_event(odp_packet_t pkt)
Convert packet handle to event.
uint32_t odp_packet_seg_len(odp_packet_t pkt)
Packet data length following the data pointer.
void * odp_packet_data(odp_packet_t pkt)
Packet data pointer.
odp_packet_t odp_packet_alloc(odp_pool_t pool, uint32_t len)
Allocate a packet from a packet pool.
odp_packet_t odp_packet_from_event(odp_event_t ev)
Get packet handle from event.
#define ODP_PACKET_INVALID
Invalid packet.
odp_pool_t odp_pool_create(const char *name, const odp_pool_param_t *param)
Create a pool.
int odp_pool_capability(odp_pool_capability_t *capa)
Query pool capabilities.
void odp_pool_param_init(odp_pool_param_t *param)
Initialize pool params.
int odp_pool_destroy(odp_pool_t pool)
Destroy a pool previously created by odp_pool_create()
odp_pool_type_t
Pool types.
#define ODP_POOL_INVALID
Invalid pool.
@ ODP_POOL_BUFFER
Buffer pool.
@ ODP_POOL_PACKET
Packet pool.
int odp_queue_context_set(odp_queue_t queue, void *context, uint32_t len)
Set queue context.
int odp_queue_enq_multi(odp_queue_t queue, const odp_event_t events[], int num)
Enqueue multiple events to a queue.
void odp_queue_param_init(odp_queue_param_t *param)
Initialize queue params.
#define ODP_QUEUE_INVALID
Invalid queue.
void * odp_queue_context(odp_queue_t queue)
Get queue context.
int odp_queue_enq(odp_queue_t queue, odp_event_t ev)
Enqueue an event to a queue.
void odp_queue_print_all(void)
Print debug info about all queues.
odp_queue_t odp_queue_create(const char *name, const odp_queue_param_t *param)
Queue create.
int odp_queue_destroy(odp_queue_t queue)
Destroy ODP queue.
@ ODP_QUEUE_TYPE_SCHED
Scheduled queue.
int odp_schedule_sync_t
Scheduler synchronization method.
#define ODP_SCHED_SYNC_PARALLEL
Parallel scheduled queues.
int odp_schedule_prio_t
Scheduling priority level.
int odp_schedule_multi(odp_queue_t *from, uint64_t wait, odp_event_t events[], int num)
Schedule multiple events.
int odp_schedule_group_t
Scheduler thread group.
void odp_schedule_config_init(odp_schedule_config_t *config)
Initialize schedule configuration options.
int odp_schedule_group_join(odp_schedule_group_t group, const odp_thrmask_t *mask)
Join a schedule group.
#define ODP_SCHED_SYNC_ATOMIC
Atomic queue synchronization.
#define ODP_SCHED_SYNC_ORDERED
Ordered queue synchronization.
int odp_schedule_min_prio(void)
Minimum scheduling priority level.
#define ODP_SCHED_GROUP_WORKER
Group of all worker threads.
int odp_schedule_group_destroy(odp_schedule_group_t group)
Schedule group destroy.
#define ODP_SCHED_GROUP_INVALID
Invalid scheduler group.
#define ODP_SCHED_NO_WAIT
Do not wait.
int odp_schedule_default_prio(void)
Default scheduling priority level.
void odp_schedule_pause(void)
Pause scheduling.
int odp_schedule_max_prio(void)
Maximum scheduling priority level.
int odp_schedule_config(const odp_schedule_config_t *config)
Global schedule configuration.
uint64_t odp_schedule_wait_time(uint64_t ns)
Schedule wait time.
int odp_schedule_capability(odp_schedule_capability_t *capa)
Query scheduler capabilities.
odp_schedule_group_t odp_schedule_group_create(const char *name, const odp_thrmask_t *mask)
Schedule group create.
odp_event_t odp_schedule(odp_queue_t *from, uint64_t wait)
Schedule an event.
void odp_schedule_print(void)
Print debug info about scheduler.
#define ODP_SCHED_GROUP_ALL
Group of all threads.
void odp_shm_print_all(void)
Print all shared memory blocks.
int odp_shm_free(odp_shm_t shm)
Free a contiguous block of shared memory.
#define ODP_SHM_INVALID
Invalid shared memory block.
void * odp_shm_addr(odp_shm_t shm)
Shared memory block address.
odp_shm_t odp_shm_reserve(const char *name, uint64_t size, uint64_t align, uint32_t flags)
Reserve a contiguous block of shared memory.
void odp_sys_info_print(void)
Print system info.
#define ODP_THREAD_COUNT_MAX
Maximum number of threads supported in build time.
void odp_thrmask_set(odp_thrmask_t *mask, int thr)
Add thread to mask.
int odp_thread_id(void)
Get thread identifier.
void odp_thrmask_zero(odp_thrmask_t *mask)
Clear entire thread mask.
@ ODP_THREAD_WORKER
Worker thread.
@ ODP_THREAD_CONTROL
Control thread.
void odp_time_wait_ns(uint64_t ns)
Wait the specified number of nanoseconds.
odp_time_t odp_time_local(void)
Current local time.
#define ODP_TIME_MSEC_IN_NS
A millisecond in nanoseconds.
uint64_t odp_time_diff_ns(odp_time_t t2, odp_time_t t1)
Time difference in nanoseconds.
The OpenDataPlane API.
Global initialization parameters.
odp_mem_model_t mem_model
Application memory model.
odp_feature_t not_used
Unused features.
struct odp_pool_capability_t::@121 buf
Buffer pool capabilities
struct odp_pool_capability_t::@122 pkt
Packet pool capabilities
uint32_t max_num
Maximum number of buffers of any size.
uint32_t max_uarea_size
Maximum user area size in bytes.
uint32_t max_size
Maximum buffer data size in bytes.
uint32_t max_seg_len
Maximum packet segment data length in bytes.
Pool parameters.
uint32_t uarea_size
Minimum user area size in bytes.
uint32_t num
Number of buffers in the pool.
uint32_t align
Minimum buffer alignment in bytes.
uint32_t size
Minimum buffer size in bytes.
odp_pool_type_t type
Pool type.
uint32_t len
Minimum length of 'num' packets.
uint32_t seg_len
Minimum number of packet data bytes that can be stored in the first segment of a newly allocated pack...
struct odp_pool_param_t::@126 pkt
Parameters for packet pools.
struct odp_pool_param_t::@125 buf
Parameters for buffer pools.
ODP Queue parameters.
odp_schedule_param_t sched
Scheduler parameters.
uint32_t size
Queue size.
odp_queue_type_t type
Queue type.
uint32_t max_groups
Maximum number of scheduling groups.
Schedule configuration.
odp_schedule_group_t group
Thread group.
odp_schedule_prio_t prio
Priority level.
odp_schedule_sync_t sync
Synchronization method.
uint32_t tm
Traffic Manager APIs, e.g., odp_tm_xxx()
uint32_t crypto
Crypto APIs, e.g., odp_crypto_xxx()
uint32_t ipsec
IPsec APIs, e.g., odp_ipsec_xxx()
uint32_t timer
Timer APIs, e.g., odp_timer_xxx(), odp_timeout_xxx()
uint32_t cls
Classifier APIs, e.g., odp_cls_xxx(), odp_cos_xxx()
struct odp_feature_t::@148 feat
Individual feature bits.
uint32_t compress
Compression APIs, e.g., odp_comp_xxx()