27 #include <odp/helper/odph_api.h>
29 #include <export_results.h>
31 #define MAX_QUEUES (256 * 1024)
32 #define MAX_GROUPS 256
35 #define DATA_MASK 0xffff
38 #define MAX_SCHED_WAIT_NS (10 * ODP_TIME_SEC_IN_NS)
41 #define TIME_CHECK_INTERVAL (1024 * 1024)
44 #define ROUNDUP(X, NUM) ((NUM) * (((X) + (NUM) - 1) / (NUM)))
46 typedef struct test_options_t {
70 uint32_t ctx_rd_words;
71 uint32_t ctx_rw_words;
82 typedef struct test_stat_t {
94 typedef struct thread_arg_t {
100 typedef struct test_global_t {
101 test_options_t test_options;
114 test_common_options_t common_options;
123 static test_global_t *test_globals;
125 static void sig_handler(
int signum
ODP_UNUSED)
130 static int setup_sig_handler(
void)
132 struct sigaction action = { .sa_handler = sig_handler };
134 if (sigemptyset(&action.sa_mask) || sigaction(SIGINT, &action, NULL))
140 static void print_usage(
void)
143 "Scheduler performance test\n"
145 "Usage: odp_sched_perf [options]\n"
147 " -c, --num_cpu Number of CPUs (worker threads). 0: all available CPUs. Default: 1.\n"
148 " -q, --num_queue Number of queues. Default: 1.\n"
149 " -L, --num_low Number of lowest priority queues out of '--num_queue' queues. Rest of\n"
150 " the queues are default (or highest) priority. Default: 0.\n"
151 " -H, --num_high Number of highest priority queues out of '--num_queue' queues. Rest of\n"
152 " the queues are default (or lowest) priority. Default: 0.\n"
153 " -d, --num_dummy Number of empty queues. Default: 0.\n"
154 " -e, --num_event Number of events per queue. Default: 100.\n"
155 " -s, --num_sched Number of events to schedule per thread. If zero, the application runs\n"
156 " until SIGINT is received. Default: 100 000.\n"
157 " -g, --num_group Number of schedule groups. Round robins threads and queues into groups.\n"
158 " -1: SCHED_GROUP_WORKER\n"
159 " 0: SCHED_GROUP_ALL (default)\n"
160 " -j, --num_join Number of groups a thread joins. Threads are divide evenly into groups,\n"
161 " if num_cpu is multiple of num_group and num_group is multiple of num_join.\n"
162 " 0: join all groups (default)\n"
163 " -b, --burst Maximum number of events per operation. Default: 100.\n"
164 " -t, --type Queue type. 0: parallel, 1: atomic, 2: ordered. Default: 0.\n"
165 " -f, --forward 0: Keep event in the original queue, 1: Forward event to the next queue. Default: 0.\n"
166 " -F, --fairness 0: Don't count events per queue, 1: Count and report events relative to average. Default: 0.\n"
167 " -w, --wait_ns Number of nsec to wait before enqueueing events. Default: 0.\n"
168 " -S, --stress CPU stress function(s) to be called for each event data word (requires -n or -m).\n"
169 " Data is processed as uint32_t words. Multiple flags may be selected.\n"
170 " 0: No extra data processing (default)\n"
171 " 0x1: Calculate square of each uint32_t\n"
172 " 0x2: Calculate log2 of each uint32_t\n"
173 " 0x4: Calculate square root of each uint32_t\n"
174 " 0x8: Calculate square root of each uint32_t in floating point\n"
175 " -k, --ctx_rd_words Number of queue context words (uint64_t) to read on every event. Default: 0.\n"
176 " -l, --ctx_rw_words Number of queue context words (uint64_t) to modify on every event. Default: 0.\n"
177 " -n, --rd_words Number of event data words (uint64_t) to read before enqueueing it. Default: 0.\n"
178 " -m, --rw_words Number of event data words (uint64_t) to modify before enqueueing it. Default: 0.\n"
179 " -u, --uarea_rd Number of user area words (uint64_t) to read on every event. Default: 0.\n"
180 " -U, --uarea_rw Number of user area words (uint64_t) to modify on every event. Default: 0.\n"
181 " -p, --pool_type Pool type. 0: buffer, 1: packet. Default: 0.\n"
182 " -v, --verbose Verbose output.\n"
183 " -h, --help This help\n"
187 static int parse_options(
int argc,
char *argv[], test_options_t *test_options)
189 int opt, num_group, num_join;
191 uint32_t ctx_size = 0;
194 static const struct option longopts[] = {
195 {
"num_cpu", required_argument, NULL,
'c'},
196 {
"num_queue", required_argument, NULL,
'q'},
197 {
"num_low", required_argument, NULL,
'L'},
198 {
"num_high", required_argument, NULL,
'H'},
199 {
"num_dummy", required_argument, NULL,
'd'},
200 {
"num_event", required_argument, NULL,
'e'},
201 {
"num_sched", required_argument, NULL,
's'},
202 {
"num_group", required_argument, NULL,
'g'},
203 {
"num_join", required_argument, NULL,
'j'},
204 {
"burst", required_argument, NULL,
'b'},
205 {
"type", required_argument, NULL,
't'},
206 {
"forward", required_argument, NULL,
'f'},
207 {
"fairness", required_argument, NULL,
'F'},
208 {
"wait_ns", required_argument, NULL,
'w'},
209 {
"stress", required_argument, NULL,
'S'},
210 {
"ctx_rd_words", required_argument, NULL,
'k'},
211 {
"ctx_rw_words", required_argument, NULL,
'l'},
212 {
"rd_words", required_argument, NULL,
'n'},
213 {
"rw_words", required_argument, NULL,
'm'},
214 {
"uarea_rd", required_argument, NULL,
'u'},
215 {
"uarea_rw", required_argument, NULL,
'U'},
216 {
"pool_type", required_argument, NULL,
'p'},
217 {
"verbose", no_argument, NULL,
'v'},
218 {
"help", no_argument, NULL,
'h'},
222 static const char *shortopts =
"+c:q:L:H:d:e:s:g:j:b:t:f:F:w:S:k:l:n:m:p:u:U:vh";
224 test_options->num_cpu = 1;
225 test_options->num_queue = 1;
226 test_options->num_low = 0;
227 test_options->num_high = 0;
228 test_options->num_dummy = 0;
229 test_options->num_event = 100;
230 test_options->num_sched = 100000;
231 test_options->num_group = 0;
232 test_options->num_join = 0;
233 test_options->max_burst = 100;
234 test_options->queue_type = 0;
235 test_options->forward = 0;
236 test_options->fairness = 0;
237 test_options->stress = 0;
238 test_options->ctx_rd_words = 0;
239 test_options->ctx_rw_words = 0;
240 test_options->rd_words = 0;
241 test_options->rw_words = 0;
242 test_options->uarea_rd = 0;
243 test_options->uarea_rw = 0;
244 test_options->wait_ns = 0;
245 test_options->verbose = 0;
248 opt = getopt_long(argc, argv, shortopts, longopts, NULL);
255 test_options->num_cpu = atoi(optarg);
258 test_options->num_queue = atoi(optarg);
261 test_options->num_low = atoi(optarg);
264 test_options->num_high = atoi(optarg);
267 test_options->num_dummy = atoi(optarg);
270 test_options->num_event = atoi(optarg);
273 test_options->num_sched = atoi(optarg);
276 test_options->num_group = atoi(optarg);
279 test_options->num_join = atoi(optarg);
282 test_options->max_burst = atoi(optarg);
285 test_options->queue_type = atoi(optarg);
288 test_options->forward = atoi(optarg);
291 test_options->fairness = atoi(optarg);
294 test_options->stress = strtoul(optarg, NULL, 0);
297 test_options->ctx_rd_words = atoi(optarg);
300 test_options->ctx_rw_words = atoi(optarg);
303 test_options->rd_words = atoi(optarg);
306 test_options->rw_words = atoi(optarg);
309 test_options->uarea_rd = atoi(optarg);
312 test_options->uarea_rw = atoi(optarg);
315 pool_type = atoi(optarg);
318 test_options->wait_ns = atoll(optarg);
321 test_options->verbose = 1;
331 if (pool_type == 0) {
333 }
else if (pool_type == 1) {
336 ODPH_ERR(
"Invalid pool type: %d.\n", pool_type);
340 test_options->touch_data = test_options->rd_words ||
341 test_options->rw_words;
343 if (test_options->stress && test_options->touch_data == 0) {
344 ODPH_ERR(
"Use -n or/and -m to select event data size with a stress function\n");
348 if ((test_options->num_queue + test_options->num_dummy) > MAX_QUEUES) {
349 ODPH_ERR(
"Too many queues. Max supported %i.\n", MAX_QUEUES);
353 if ((test_options->num_low + test_options->num_high) > test_options->num_queue) {
354 ODPH_ERR(
"Number of low/high prio %u/%u exceed number of queues %u.\n",
355 test_options->num_low, test_options->num_high, test_options->num_queue);
359 num_group = test_options->num_group;
360 num_join = test_options->num_join;
361 if (num_group > MAX_GROUPS) {
362 ODPH_ERR(
"Too many groups. Max supported %i.\n", MAX_GROUPS);
366 if (num_group > 0 && num_join > num_group) {
367 ODPH_ERR(
"num_join (%i) larger than num_group (%i).\n", num_join, num_group);
371 if (num_join && num_group > (
int)(test_options->num_cpu * num_join)) {
372 printf(
"WARNING: Too many groups (%i). Some groups (%i) are not served.\n\n",
373 num_group, num_group - (test_options->num_cpu * num_join));
375 if (test_options->forward) {
376 printf(
"Error: Cannot forward when some queues are not served.\n");
381 test_options->tot_queue = test_options->num_queue +
382 test_options->num_dummy;
383 test_options->tot_event = test_options->num_queue *
384 test_options->num_event;
386 test_options->queue_size = test_options->num_event;
388 if (test_options->forward) {
391 test_options->queue_size = test_options->tot_event;
394 if (test_options->forward || test_options->fairness)
395 ctx_size =
sizeof(queue_context_t);
397 if (test_options->ctx_rd_words || test_options->ctx_rw_words) {
400 ctx_size = ROUNDUP(ctx_size, 8);
401 ctx_size += 8 * test_options->ctx_rd_words;
402 ctx_size += 8 * test_options->ctx_rw_words;
407 if (test_options->fairness || test_options->ctx_rw_words)
408 ctx_size = ROUNDUP(ctx_size, ODP_CACHE_LINE_SIZE);
410 test_options->ctx_size = ctx_size;
411 test_options->uarea_size = 8 * (test_options->uarea_rd + test_options->uarea_rw);
412 test_options->tot_rd_size = 8 * (test_options->ctx_rd_words + test_options->uarea_rd +
413 test_options->rd_words);
414 test_options->tot_rw_size = 8 * (test_options->ctx_rw_words + test_options->uarea_rw +
415 test_options->rw_words);
420 static int set_num_cpu(test_global_t *global)
423 test_options_t *test_options = &global->test_options;
424 int num_cpu = test_options->num_cpu;
428 printf(
"Error: Too many workers. Maximum is %i.\n",
435 if (num_cpu && ret != num_cpu) {
436 printf(
"Error: Too many workers. Max supported %i\n.", ret);
443 test_options->num_cpu = num_cpu;
451 static uint64_t init_data(uint64_t init, uint64_t *data, uint32_t words)
456 for (i = 0; i < words; i++) {
458 val = (val + 1) & DATA_MASK;
464 static void print_options(test_options_t *options)
466 printf(
"\nScheduler performance test\n");
467 printf(
" num sched %u\n", options->num_sched);
468 printf(
" num cpu %u\n", options->num_cpu);
469 printf(
" num queues %u\n", options->num_queue);
470 printf(
" num lowest prio queues %u\n", options->num_low);
471 printf(
" num highest prio queues %u\n", options->num_high);
472 printf(
" num empty queues %u\n", options->num_dummy);
473 printf(
" total queues %u\n", options->tot_queue);
474 printf(
" num groups %i", options->num_group);
476 if (options->num_group == -1)
477 printf(
" (ODP_SCHED_GROUP_WORKER)\n");
478 else if (options->num_group == 0)
479 printf(
" (ODP_SCHED_GROUP_ALL)\n");
483 printf(
" num join %u\n", options->num_join);
484 printf(
" forward events %i\n", options->forward ? 1 : 0);
485 printf(
" wait %" PRIu64
" nsec\n", options->wait_ns);
486 printf(
" events per queue %u\n", options->num_event);
487 printf(
" queue size %u\n", options->queue_size);
488 printf(
" max burst size %u\n", options->max_burst);
489 printf(
" total events %u\n", options->tot_event);
490 printf(
" stress 0x%x\n", options->stress);
492 printf(
" event size %u bytes", options->event_size);
493 if (options->touch_data)
494 printf(
" (rd: %u, rw: %u)", 8 * options->rd_words, 8 * options->rw_words);
497 printf(
" queue context size %u bytes", options->ctx_size);
498 if (options->ctx_rd_words || options->ctx_rw_words) {
499 printf(
" (rd: %u, rw: %u)",
500 8 * options->ctx_rd_words,
501 8 * options->ctx_rw_words);
505 printf(
" user area size %u bytes", options->uarea_size);
506 if (options->uarea_size)
507 printf(
" (rd: %u, rw: %u)", 8 * options->uarea_rd, 8 * options->uarea_rw);
511 "buffer" :
"packet");
513 printf(
" queue type %s\n\n", options->queue_type == 0 ?
"parallel" :
514 options->queue_type == 1 ?
"atomic" :
517 printf(
"Extra rd/rw ops per event (queue context + user area + event data)\n");
518 printf(
" read %u bytes\n", options->tot_rd_size);
519 printf(
" write %u bytes\n\n", options->tot_rw_size);
522 static int create_pool(test_global_t *global)
527 uint32_t max_num, max_size, max_uarea;
528 test_options_t *test_options = &global->test_options;
529 uint32_t tot_event = test_options->tot_event;
530 uint32_t event_size = 16;
531 uint32_t uarea_size = test_options->uarea_size;
533 if (test_options->touch_data) {
534 event_size = test_options->rd_words + test_options->rw_words;
535 event_size = 8 * event_size;
537 test_options->event_size = event_size;
540 ODPH_ERR(
"Error: pool capa failed\n");
554 if (max_num && tot_event > max_num) {
555 ODPH_ERR(
"Error: max events supported %u\n", max_num);
559 if (max_size && event_size > max_size) {
560 ODPH_ERR(
"Error: max supported event size %u\n", max_size);
564 if (uarea_size > max_uarea) {
565 ODPH_ERR(
"Error: max supported user area size %u\n", max_uarea);
572 pool_param.
buf.
num = tot_event;
573 pool_param.
buf.
size = event_size;
578 pool_param.
pkt.
num = tot_event;
579 pool_param.
pkt.
len = event_size;
587 ODPH_ERR(
"Error: pool create failed\n");
596 static int create_groups(test_global_t *global)
601 test_options_t *test_options = &global->test_options;
602 uint32_t num_group = test_options->num_group;
604 if (test_options->num_group <= 0)
608 printf(
"Error: schedule capability failed\n");
613 printf(
"Error: Too many sched groups (max_groups capa %u)\n",
620 for (i = 0; i < num_group; i++) {
626 printf(
"Error: Group create failed %u\n", i);
630 global->group[i] = group;
636 static int create_queues(test_global_t *global)
642 uint32_t i, j, first;
643 test_options_t *test_options = &global->test_options;
644 uint32_t event_size = test_options->event_size;
645 uint32_t num_event = test_options->num_event;
646 uint32_t queue_size = test_options->queue_size;
647 uint32_t tot_queue = test_options->tot_queue;
648 uint32_t num_low = test_options->num_low;
649 uint32_t num_high = test_options->num_high;
650 uint32_t num_default = test_options->num_queue - num_low - num_high;
651 int num_group = test_options->num_group;
652 int type = test_options->queue_type;
655 uint32_t ctx_size = test_options->ctx_size;
656 uint64_t init_val = 0;
665 if (tot_queue > global->schedule_config.num_queues) {
666 printf(
"Max queues supported %u\n",
667 global->schedule_config.num_queues);
671 if (global->schedule_config.queue_size &&
672 queue_size > global->schedule_config.queue_size) {
673 printf(
"Max queue size %u\n",
674 global->schedule_config.queue_size);
681 printf(
"Bad queue context\n");
689 queue_param.
size = queue_size;
695 first = test_options->num_dummy;
697 for (i = 0; i < tot_queue; i++) {
702 group = global->group[i % num_group];
715 }
else if (num_high) {
726 }
else if (num_low) {
736 }
else if (num_high) {
751 global->queue[i] = queue;
754 printf(
"Error: Queue create failed %u\n", i);
762 for (i = first; i < tot_queue; i++) {
763 queue = global->queue[i];
770 queue_context_t *qc = (queue_context_t *)(uintptr_t)ctx;
772 if (test_options->forward) {
773 uint32_t next = i + 1;
775 if (next == tot_queue)
778 qc->next = global->queue[next];
781 if (test_options->fairness)
785 printf(
"Error: Context set failed %u\n", i);
792 for (j = 0; j < num_event; j++) {
801 ODPH_ERR(
"Error: alloc failed %u/%u\n", i, j);
812 ODPH_ERR(
"Error: alloc failed %u/%u\n", i, j);
821 init_val = init_data(init_val, data, words);
824 ODPH_ERR(
"Error: enqueue failed %u/%u\n", i, j);
833 static int join_group(test_global_t *global,
int grp_index,
int thr)
840 group = global->group[grp_index];
843 printf(
"Error: Group %i join failed (thr %i)\n",
851 static int join_all_groups(test_global_t *global,
int thr)
854 test_options_t *test_options = &global->test_options;
855 int num_group = test_options->num_group;
860 for (i = 0; i < num_group; i++) {
861 if (join_group(global, i, thr)) {
862 printf(
"Error: Group %u join failed (thr %i)\n",
871 static void print_queue_fairness(test_global_t *global)
874 queue_context_t *ctx;
875 test_options_t *test_options = &global->test_options;
876 uint32_t first = test_options->num_dummy;
877 uint32_t num_queue = test_options->num_queue;
878 uint32_t tot_queue = test_options->tot_queue;
882 if (!test_options->fairness)
885 for (i = first; i < tot_queue; i++) {
890 average = (double)total / (
double)num_queue;
893 printf(
"RESULTS - events per queue (percent of average):\n");
894 printf(
"------------------------------------------------\n");
895 printf(
" 1 2 3 4 5 6 7 8 9 10");
897 for (i = first; i < tot_queue; i++) {
910 static int destroy_queues(test_global_t *global)
915 test_options_t *test_options = &global->test_options;
916 uint32_t tot_queue = test_options->tot_queue;
919 if (join_all_groups(global, thr))
927 for (i = 0; i < tot_queue; i++) {
930 printf(
"Error: Queue destroy failed %u\n", i);
939 static int destroy_groups(test_global_t *global)
942 test_options_t *test_options = &global->test_options;
943 int num_group = test_options->num_group;
948 for (i = 0; i < num_group; i++) {
952 printf(
"Error: Group destroy failed %u\n", i);
960 static uint64_t rw_uarea(
odp_event_t ev[],
int num, uint32_t rd_words, uint32_t rw_words)
967 for (i = 0; i < num; i++) {
970 for (j = 0; j < rd_words; j++)
973 for (; j < rd_words + rw_words; j++) {
982 static inline uint64_t rw_ctx_data(
void *ctx, uint32_t offset,
983 uint32_t rd_words, uint32_t rw_words)
989 data = (uint64_t *)(uintptr_t)((uint8_t *)ctx + offset);
991 for (i = 0; i < rd_words; i++)
994 for (; i < rd_words + rw_words; i++) {
1002 static uint64_t rw_data(
odp_event_t ev[],
int num, uint32_t rd_words, uint32_t rw_words,
1009 for (
int i = 0; i < num; i++) {
1015 for (j = 0; j < rd_words; j++)
1018 for (; j < rd_words + rw_words; j++) {
1027 static uint64_t rw_data_stress(
odp_event_t ev[],
int num, uint32_t rd_words, uint32_t rw_words,
1035 for (
int i = 0; i < num; i++) {
1041 for (j = 0; j < rd_words + rw_words; j++) {
1045 sum += odph_stress_pow2_u32(word);
1047 sum += odph_stress_log2_u32(word);
1049 sum += odph_stress_sqrt_u32(word);
1051 sum += odph_stress_sqrt_f32(word);
1054 data[j] = (word + 1) & DATA_MASK;
1061 static int test_sched(
void *arg)
1063 int num, num_enq, ret, thr;
1065 uint64_t c1, c2, cycles, nsec;
1066 uint64_t events, enqueues, waits, events_prev;
1069 thread_arg_t *thread_arg = arg;
1070 test_global_t *global = thread_arg->global;
1071 test_options_t *test_options = &global->test_options;
1072 uint32_t num_sched = test_options->num_sched;
1073 uint32_t max_burst = test_options->max_burst;
1074 int num_group = test_options->num_group;
1075 int forward = test_options->forward;
1076 int fairness = test_options->fairness;
1077 const int touch_data = test_options->touch_data;
1078 const uint32_t stress = test_options->stress;
1079 const uint32_t rd_words = test_options->rd_words;
1080 const uint32_t rw_words = test_options->rw_words;
1081 uint32_t ctx_size = test_options->ctx_size;
1082 uint32_t ctx_rd_words = test_options->ctx_rd_words;
1083 uint32_t ctx_rw_words = test_options->ctx_rw_words;
1084 const uint32_t uarea_size = test_options->uarea_size;
1085 const uint32_t uarea_rd = test_options->uarea_rd;
1086 const uint32_t uarea_rw = test_options->uarea_rw;
1088 int touch_ctx = ctx_rd_words || ctx_rw_words;
1090 uint32_t ctx_offset = 0;
1091 uint32_t sched_retries = 0;
1092 uint64_t data_sum = 0;
1093 uint64_t ctx_sum = 0;
1094 uint64_t uarea_sum = 0;
1095 uint64_t wait_ns = test_options->wait_ns;
1100 if (forward || fairness)
1101 ctx_offset = ROUNDUP(
sizeof(queue_context_t), 8);
1103 if (num_group > 0) {
1104 uint32_t num_join = test_options->num_join;
1110 int group_index = thread_arg->first_group;
1112 pos += snprintf(&str[pos], n - pos,
1113 "Thread %i joined groups:", thr);
1115 for (i = 0; i < num_join; i++) {
1116 if (join_group(global, group_index, thr))
1119 pos += snprintf(&str[pos], n - pos,
" %i",
1122 group_index = (group_index + 1) % num_group;
1125 printf(
"%s\n", str);
1128 if (join_all_groups(global, thr))
1133 for (i = 0; i < max_burst; i++)
1162 uarea_sum += rw_uarea(ev, num, uarea_rd, uarea_rw);
1174 ctx_sum += rw_ctx_data(ctx, ctx_offset,
1181 data_sum += rw_data_stress(ev, num, rd_words, rw_words,
1184 data_sum += rw_data(ev, num, rd_words, rw_words, pool_type);
1198 printf(
"Error: Enqueue failed. Round %u\n",
1214 }
else if (num == 0) {
1216 if (
odp_unlikely(sched_retries > TIME_CHECK_INTERVAL)) {
1222 if (events_prev != events) {
1223 events_prev = events;
1224 last_retry_ts = cur_time;
1227 MAX_SCHED_WAIT_NS) {
1228 printf(
"Error: scheduling timed out\n");
1237 printf(
"Error: Sched failed. Round %u\n", rounds);
1250 global->stat[thr].rounds = rounds;
1251 global->stat[thr].enqueues = enqueues;
1252 global->stat[thr].events = events;
1253 global->stat[thr].nsec = nsec;
1254 global->stat[thr].cycles = cycles;
1255 global->stat[thr].waits = waits;
1256 global->stat[thr].dummy_sum = data_sum + ctx_sum + uarea_sum;
1257 global->stat[thr].failed = ret;
1267 if (test_options->verbose) {
1289 printf(
"Error: Queue enqueue failed\n");
1298 static int start_workers(test_global_t *global,
odp_instance_t instance)
1300 odph_thread_common_param_t thr_common;
1302 test_options_t *test_options = &global->test_options;
1303 int num_group = test_options->num_group;
1304 uint32_t num_join = test_options->num_join;
1305 int num_cpu = test_options->num_cpu;
1306 odph_thread_param_t thr_param[num_cpu];
1310 memset(global->thread_tbl, 0,
sizeof(global->thread_tbl));
1311 odph_thread_common_param_init(&thr_common);
1313 thr_common.instance = instance;
1314 thr_common.cpumask = &global->cpumask;
1316 for (i = 0; i < num_cpu; i++) {
1317 odph_thread_param_init(&thr_param[i]);
1318 thr_param[i].start = test_sched;
1319 thr_param[i].arg = &global->thread_arg[i];
1322 global->thread_arg[i].global = global;
1323 global->thread_arg[i].first_group = 0;
1325 if (num_group > 0 && num_join) {
1329 int first_group = (i * num_join) % num_group;
1331 global->thread_arg[i].first_group = first_group;
1335 ret = odph_thread_create(global->thread_tbl, &thr_common, thr_param,
1338 if (ret != num_cpu) {
1339 printf(
"Error: thread create failed %i\n", ret);
1346 static double measure_wait_time_cycles(uint64_t wait_ns)
1348 uint64_t i, c1, c2, diff;
1363 for (i = 0; i < rounds; i++)
1368 wait_cycles = (double)diff / rounds;
1370 printf(
"\nMeasured wait cycles: %.3f\n", wait_cycles);
1375 static int output_results(test_global_t *global)
1378 double rounds_ave, enqueues_ave, events_ave, events_per_sec, nsec_ave, cycles_ave;
1379 double waits_ave, wait_cycles, wait_cycles_ave;
1380 test_options_t *test_options = &global->test_options;
1381 int num_cpu = test_options->num_cpu;
1382 uint64_t wait_ns = test_options->wait_ns;
1383 uint64_t rounds_sum = 0;
1384 uint64_t enqueues_sum = 0;
1385 uint64_t events_sum = 0;
1386 uint64_t nsec_sum = 0;
1387 uint64_t cycles_sum = 0;
1388 uint64_t waits_sum = 0;
1389 uint32_t tot_rd = test_options->tot_rd_size;
1390 uint32_t tot_rw = test_options->tot_rw_size;
1392 wait_cycles = measure_wait_time_cycles(wait_ns);
1396 if (global->stat[i].failed) {
1400 rounds_sum += global->stat[i].rounds;
1401 enqueues_sum += global->stat[i].enqueues;
1402 events_sum += global->stat[i].events;
1403 nsec_sum += global->stat[i].nsec;
1404 cycles_sum += global->stat[i].cycles;
1405 waits_sum += global->stat[i].waits;
1408 if (rounds_sum == 0 || num_cpu <= 0) {
1409 printf(
"No results.\n");
1413 rounds_ave = rounds_sum / num_cpu;
1414 enqueues_ave = enqueues_sum / num_cpu;
1415 events_ave = events_sum / num_cpu;
1416 nsec_ave = nsec_sum / num_cpu;
1417 cycles_ave = cycles_sum / num_cpu;
1418 waits_ave = waits_sum / num_cpu;
1419 wait_cycles_ave = waits_ave * wait_cycles;
1423 printf(
"RESULTS - per thread (Million events per sec):\n");
1424 printf(
"----------------------------------------------\n");
1425 printf(
" 1 2 3 4 5 6 7 8 9 10");
1428 if (global->stat[i].rounds) {
1429 if ((num % 10) == 0)
1432 if (global->stat[i].failed)
1436 (1000.0 * global->stat[i].events) /
1437 global->stat[i].nsec);
1444 printf(
"RESULTS - average over %i threads:\n", num_cpu);
1445 printf(
"----------------------------------\n");
1446 printf(
" schedule calls: %.3f\n", rounds_ave);
1447 printf(
" enqueue calls: %.3f\n", enqueues_ave);
1448 printf(
" duration: %.3f msec\n", nsec_ave / 1000000);
1449 printf(
" num cycles: %.3f M\n", cycles_ave / 1000000);
1450 printf(
" cycles per round: %.3f\n",
1451 cycles_ave / rounds_ave);
1452 printf(
" cycles per event: %.3f\n",
1453 cycles_ave / events_ave);
1455 printf(
" without wait_ns cycles: %.3f\n",
1456 (cycles_ave - wait_cycles_ave) / events_ave);
1458 printf(
" ave events received: %.3f\n",
1459 events_ave / rounds_ave);
1460 printf(
" rounds per sec: %.3f M\n",
1461 (1000.0 * rounds_ave) / nsec_ave);
1463 events_per_sec = (1000.0 * events_ave) / nsec_ave;
1464 printf(
" events per sec: %.3f M\n", events_per_sec);
1466 printf(
" extra reads per sec: %.3f MB\n", tot_rd * events_per_sec);
1467 printf(
" extra writes per sec: %.3f MB\n", tot_rw * events_per_sec);
1469 printf(
"TOTAL events per sec: %.3f M\n\n",
1470 (1000.0 * events_sum) / nsec_ave);
1472 if (global->common_options.is_export) {
1473 if (test_common_write(
"schedule calls,enqueue calls,duration (msec),"
1474 "num cycles (M),cycles per round,cycles per event,"
1475 "ave events received,rounds per sec (M),"
1476 "events per sec (M), TOTAL events per sec (M)\n")) {
1477 ODPH_ERR(
"Export failed\n");
1478 test_common_write_term();
1482 if (test_common_write(
"%f,%f,%f,%f,%f,%f,%f,%f,%f,%f\n",
1483 rounds_ave, enqueues_ave, nsec_ave / 1000000,
1484 cycles_ave / 1000000, cycles_ave / rounds_ave,
1485 cycles_ave / events_ave, events_ave / rounds_ave,
1486 (1000.0 * rounds_ave) / nsec_ave,
1487 (1000.0 * events_ave) / nsec_ave,
1488 (1000.0 * events_sum) / nsec_ave)) {
1489 ODPH_ERR(
"Export failed\n");
1490 test_common_write_term();
1494 test_common_write_term();
1500 int main(
int argc,
char **argv)
1502 odph_helper_options_t helper_options;
1506 test_global_t *global;
1507 test_common_options_t common_options;
1510 argc = odph_parse_options(argc, argv);
1511 if (odph_options(&helper_options)) {
1512 ODPH_ERR(
"Error: Reading ODP helper options failed.\n");
1516 argc = test_common_parse_options(argc, argv);
1517 if (test_common_options(&common_options)) {
1518 ODPH_ERR(
"Error: Reading test options failed\n");
1531 init.
mem_model = helper_options.mem_model;
1535 printf(
"Error: Global init failed.\n");
1541 printf(
"Error: Local init failed.\n");
1545 shm =
odp_shm_reserve(
"sched_perf_global",
sizeof(test_global_t), ODP_CACHE_LINE_SIZE, 0);
1547 ODPH_ERR(
"Error: SHM reserve failed.\n");
1552 if (global == NULL) {
1553 ODPH_ERR(
"Error: SHM alloc failed\n");
1556 test_globals = global;
1558 memset(global, 0,
sizeof(test_global_t));
1563 global->common_options = common_options;
1565 if (setup_sig_handler()) {
1566 ODPH_ERR(
"Error: signal handler setup failed\n");
1570 if (parse_options(argc, argv, &global->test_options))
1575 if (global->test_options.ctx_size) {
1576 uint64_t size = (uint64_t)global->test_options.ctx_size *
1577 global->test_options.tot_queue;
1580 ODP_CACHE_LINE_SIZE, 0);
1582 printf(
"Error: SHM reserve %" PRIu64
" bytes failed\n",
1591 if (set_num_cpu(global))
1594 if (create_pool(global))
1597 if (create_groups(global))
1600 if (create_queues(global))
1603 if (global->test_options.verbose)
1606 print_options(&global->test_options);
1609 start_workers(global, instance);
1612 odph_thread_join(global->thread_tbl, global->test_options.num_cpu);
1614 print_queue_fairness(global);
1616 if (destroy_queues(global))
1619 if (destroy_groups(global))
1622 if (output_results(global))
1626 printf(
"Error: Pool destroy failed.\n");
1634 ODPH_ERR(
"Error: SHM free failed.\n");
1639 printf(
"Error: term local failed.\n");
1644 printf(
"Error: term global failed.\n");
void odp_atomic_init_u32(odp_atomic_u32_t *atom, uint32_t val)
Initialize atomic uint32 variable.
uint32_t odp_atomic_load_u32(odp_atomic_u32_t *atom)
Load value of atomic uint32 variable.
void odp_atomic_init_u64(odp_atomic_u64_t *atom, uint64_t val)
Initialize atomic uint64 variable.
void odp_atomic_store_u32(odp_atomic_u32_t *atom, uint32_t val)
Store value to atomic uint32 variable.
uint32_t odp_atomic_fetch_dec_u32(odp_atomic_u32_t *atom)
Fetch and decrement atomic uint32 variable.
void odp_atomic_add_u64(odp_atomic_u64_t *atom, uint64_t val)
Add to atomic uint64 variable.
uint64_t odp_atomic_load_u64(odp_atomic_u64_t *atom)
Load value of atomic uint64 variable.
void odp_barrier_init(odp_barrier_t *barr, int count)
Initialize barrier with thread count.
void odp_barrier_wait(odp_barrier_t *barr)
Synchronize thread execution on barrier.
uint32_t odp_buffer_size(odp_buffer_t buf)
Buffer maximum data size.
odp_event_t odp_buffer_to_event(odp_buffer_t buf)
Convert buffer handle to event.
odp_buffer_t odp_buffer_alloc(odp_pool_t pool)
Buffer alloc.
void * odp_buffer_addr(odp_buffer_t buf)
Buffer start address.
odp_buffer_t odp_buffer_from_event(odp_event_t ev)
Get buffer handle from event.
#define ODP_BUFFER_INVALID
Invalid buffer.
#define odp_unlikely(x)
Branch unlikely taken.
#define ODP_UNUSED
Intentionally unused variables of functions.
#define odp_likely(x)
Branch likely taken.
uint64_t odp_cpu_cycles_diff(uint64_t c2, uint64_t c1)
CPU cycle count difference.
uint64_t odp_cpu_cycles(void)
Current CPU cycle count.
int odp_cpumask_default_worker(odp_cpumask_t *mask, int num)
Default CPU mask for worker threads.
void odp_event_free_multi(const odp_event_t event[], int num)
Free multiple events.
void odp_event_free(odp_event_t event)
Free event.
void * odp_event_user_area(odp_event_t event)
Event user area.
#define ODP_EVENT_INVALID
Invalid event.
void odp_init_param_init(odp_init_t *param)
Initialize the odp_init_t to default values for all fields.
int odp_init_local(odp_instance_t instance, odp_thread_type_t thr_type)
Thread local ODP initialization.
int odp_init_global(odp_instance_t *instance, const odp_init_t *params, const odp_platform_init_t *platform_params)
Global ODP initialization.
int odp_term_local(void)
Thread local ODP termination.
int odp_term_global(odp_instance_t instance)
Global ODP termination.
uint64_t odp_instance_t
ODP instance ID.
odp_event_t odp_packet_to_event(odp_packet_t pkt)
Convert packet handle to event.
uint32_t odp_packet_seg_len(odp_packet_t pkt)
Packet data length following the data pointer.
void * odp_packet_data(odp_packet_t pkt)
Packet data pointer.
odp_packet_t odp_packet_alloc(odp_pool_t pool, uint32_t len)
Allocate a packet from a packet pool.
odp_packet_t odp_packet_from_event(odp_event_t ev)
Get packet handle from event.
#define ODP_PACKET_INVALID
Invalid packet.
odp_pool_t odp_pool_create(const char *name, const odp_pool_param_t *param)
Create a pool.
int odp_pool_capability(odp_pool_capability_t *capa)
Query pool capabilities.
void odp_pool_param_init(odp_pool_param_t *param)
Initialize pool params.
int odp_pool_destroy(odp_pool_t pool)
Destroy a pool previously created by odp_pool_create()
odp_pool_type_t
Pool types.
#define ODP_POOL_INVALID
Invalid pool.
@ ODP_POOL_BUFFER
Buffer pool.
@ ODP_POOL_PACKET
Packet pool.
int odp_queue_context_set(odp_queue_t queue, void *context, uint32_t len)
Set queue context.
int odp_queue_enq_multi(odp_queue_t queue, const odp_event_t events[], int num)
Enqueue multiple events to a queue.
void odp_queue_param_init(odp_queue_param_t *param)
Initialize queue params.
#define ODP_QUEUE_INVALID
Invalid queue.
void * odp_queue_context(odp_queue_t queue)
Get queue context.
int odp_queue_enq(odp_queue_t queue, odp_event_t ev)
Enqueue an event to a queue.
void odp_queue_print_all(void)
Print debug info about all queues.
odp_queue_t odp_queue_create(const char *name, const odp_queue_param_t *param)
Queue create.
int odp_queue_destroy(odp_queue_t queue)
Destroy ODP queue.
@ ODP_QUEUE_TYPE_SCHED
Scheduled queue.
int odp_schedule_sync_t
Scheduler synchronization method.
#define ODP_SCHED_SYNC_PARALLEL
Parallel scheduled queues.
int odp_schedule_prio_t
Scheduling priority level.
int odp_schedule_multi(odp_queue_t *from, uint64_t wait, odp_event_t events[], int num)
Schedule multiple events.
int odp_schedule_group_t
Scheduler thread group.
void odp_schedule_config_init(odp_schedule_config_t *config)
Initialize schedule configuration options.
int odp_schedule_group_join(odp_schedule_group_t group, const odp_thrmask_t *mask)
Join a schedule group.
#define ODP_SCHED_SYNC_ATOMIC
Atomic queue synchronization.
#define ODP_SCHED_SYNC_ORDERED
Ordered queue synchronization.
int odp_schedule_min_prio(void)
Minimum scheduling priority level.
#define ODP_SCHED_GROUP_WORKER
Group of all worker threads.
int odp_schedule_group_destroy(odp_schedule_group_t group)
Schedule group destroy.
#define ODP_SCHED_GROUP_INVALID
Invalid scheduler group.
#define ODP_SCHED_NO_WAIT
Do not wait.
int odp_schedule_default_prio(void)
Default scheduling priority level.
void odp_schedule_pause(void)
Pause scheduling.
int odp_schedule_max_prio(void)
Maximum scheduling priority level.
int odp_schedule_config(const odp_schedule_config_t *config)
Global schedule configuration.
uint64_t odp_schedule_wait_time(uint64_t ns)
Schedule wait time.
int odp_schedule_capability(odp_schedule_capability_t *capa)
Query scheduler capabilities.
odp_schedule_group_t odp_schedule_group_create(const char *name, const odp_thrmask_t *mask)
Schedule group create.
odp_event_t odp_schedule(odp_queue_t *from, uint64_t wait)
Schedule an event.
void odp_schedule_print(void)
Print debug info about scheduler.
#define ODP_SCHED_GROUP_ALL
Group of all threads.
void odp_shm_print_all(void)
Print all shared memory blocks.
int odp_shm_free(odp_shm_t shm)
Free a contiguous block of shared memory.
#define ODP_SHM_INVALID
Invalid shared memory block.
void * odp_shm_addr(odp_shm_t shm)
Shared memory block address.
odp_shm_t odp_shm_reserve(const char *name, uint64_t size, uint64_t align, uint32_t flags)
Reserve a contiguous block of shared memory.
void odp_sys_info_print(void)
Print system info.
#define ODP_THREAD_COUNT_MAX
Maximum number of threads supported in build time.
void odp_thrmask_set(odp_thrmask_t *mask, int thr)
Add thread to mask.
int odp_thread_id(void)
Get thread identifier.
void odp_thrmask_zero(odp_thrmask_t *mask)
Clear entire thread mask.
@ ODP_THREAD_WORKER
Worker thread.
@ ODP_THREAD_CONTROL
Control thread.
void odp_time_wait_ns(uint64_t ns)
Wait the specified number of nanoseconds.
odp_time_t odp_time_local(void)
Current local time.
#define ODP_TIME_MSEC_IN_NS
A millisecond in nanoseconds.
uint64_t odp_time_diff_ns(odp_time_t t2, odp_time_t t1)
Time difference in nanoseconds.
Global initialization parameters.
odp_mem_model_t mem_model
Application memory model.
odp_feature_t not_used
Unused features.
struct odp_pool_capability_t::@121 buf
Buffer pool capabilities
struct odp_pool_capability_t::@122 pkt
Packet pool capabilities
uint32_t max_num
Maximum number of buffers of any size.
uint32_t max_uarea_size
Maximum user area size in bytes.
uint32_t max_size
Maximum buffer data size in bytes.
uint32_t max_seg_len
Maximum packet segment data length in bytes.
uint32_t uarea_size
Minimum user area size in bytes.
uint32_t num
Number of buffers in the pool.
uint32_t align
Minimum buffer alignment in bytes.
uint32_t size
Minimum buffer size in bytes.
odp_pool_type_t type
Pool type.
uint32_t len
Minimum length of 'num' packets.
uint32_t seg_len
Minimum number of packet data bytes that can be stored in the first segment of a newly allocated pack...
struct odp_pool_param_t::@126 pkt
Parameters for packet pools.
struct odp_pool_param_t::@125 buf
Parameters for buffer pools.
odp_schedule_param_t sched
Scheduler parameters.
odp_queue_type_t type
Queue type.
uint32_t max_groups
Maximum number of scheduling groups.
odp_schedule_group_t group
Thread group.
odp_schedule_prio_t prio
Priority level.
odp_schedule_sync_t sync
Synchronization method.
uint32_t tm
Traffic Manager APIs, e.g., odp_tm_xxx()
uint32_t crypto
Crypto APIs, e.g., odp_crypto_xxx()
uint32_t ipsec
IPsec APIs, e.g., odp_ipsec_xxx()
uint32_t timer
Timer APIs, e.g., odp_timer_xxx(), odp_timeout_xxx()
uint32_t cls
Classifier APIs, e.g., odp_cls_xxx(), odp_cos_xxx()
struct odp_feature_t::@148 feat
Individual feature bits.
uint32_t compress
Compression APIs, e.g., odp_comp_xxx()