21 #include <odp/helper/odph_api.h>
23 #include <export_results.h>
26 #define DEFAULT_MAX_WORKERS 10
29 #define MAX_COUNTERS 8
31 #define TEST_INFO(name, test, validate) { name, test, validate }
34 #define TEST_MAX_BENCH 50
36 typedef enum repeat_t {
42 typedef enum place_t {
49 typedef struct test_options_t {
59 static test_options_t test_options_def = {
68 typedef struct test_global_t test_global_t;
71 typedef void (*test_fn_t)(test_global_t *g, uint64_t **counter,
72 uint32_t num_counter);
74 typedef int (*validate_fn_t)(test_global_t *g, uint64_t **counter,
75 uint32_t num_counter);
78 typedef struct test_thread_ctx_t {
79 test_global_t *global;
86 typedef struct results_t {
87 const char *test_name;
88 double cycles_per_round;
90 double rounds_per_cpu;
95 struct test_global_t {
96 test_options_t test_options;
102 test_common_options_t common_options;
103 results_t results[TEST_MAX_BENCH];
107 uint64_t counter[MAX_COUNTERS];
111 uint64_t counter[MAX_COUNTERS];
112 } spinlock_recursive;
115 uint64_t counter[MAX_COUNTERS];
119 uint64_t counter[MAX_COUNTERS];
123 uint64_t counter[MAX_COUNTERS];
126 uint64_t counter[MAX_COUNTERS];
130 } all_separate[MAX_COUNTERS];
137 validate_fn_t validate_fn;
140 static test_global_t *test_global;
142 static inline void test_spinlock(test_global_t *g, uint64_t **counter,
143 uint32_t num_counter)
147 for (uint64_t i = 0; i < g->test_options.num_round; i++) {
149 for (uint32_t j = 0; j < num_counter; j++)
155 static inline void test_spinlock_recursive(test_global_t *g, uint64_t **counter,
156 uint32_t num_counter)
160 for (uint64_t i = 0; i < g->test_options.num_round; i++) {
163 for (uint32_t j = 0; j < num_counter; j++)
170 static inline void test_rwlock(test_global_t *g, uint64_t **counter,
171 uint32_t num_counter)
175 for (uint64_t i = 0; i < g->test_options.num_round; i++) {
177 for (uint32_t j = 0; j < num_counter; j++)
181 for (uint32_t j = 1; j < num_counter; j++)
182 if (*counter[0] != *counter[j]) {
184 ODPH_ERR(
"Error: Counter mismatch\n");
191 static inline void test_rwlock_recursive(test_global_t *g, uint64_t **counter,
192 uint32_t num_counter)
196 for (uint64_t i = 0; i < g->test_options.num_round; i++) {
199 for (uint32_t j = 0; j < num_counter; j++)
205 for (uint32_t j = 1; j < num_counter; j++)
206 if (*counter[0] != *counter[j]) {
209 ODPH_ERR(
"Error: Counter mismatch\n");
217 static inline void test_ticketlock(test_global_t *g, uint64_t **counter,
218 uint32_t num_counter)
222 for (uint64_t i = 0; i < g->test_options.num_round; i++) {
224 for (uint32_t j = 0; j < num_counter; j++)
230 static inline int validate_generic(test_global_t *g, uint64_t **counter,
231 uint32_t num_counter)
234 uint64_t total = (uint64_t)g->test_options.num_cpu * g->test_options.num_round;
236 for (uint32_t i = 0; i < num_counter; i++) {
237 if (*counter[i] != total) {
239 ODPH_ERR(
"Error: Counter %d value %" PRIu64
" expected %" PRIu64
"\n",
240 i, *counter[i], total);
247 static void print_usage(
void)
250 "Lock performance test\n"
252 "Usage: odp_lock_perf [options]\n"
254 " -c, --num_cpu Number of CPUs (worker threads). 0: all available CPUs (or max %d) (default)\n"
255 " -t, --type Lock type to test. 0: all (default %u)\n"
256 " 1: odp_spinlock_t\n"
257 " 2: odp_spinlock_recursive_t\n"
259 " 4: odp_rwlock_recursive_t\n"
260 " 5: odp_ticketlock_t\n"
261 " -r, --num_round Number of rounds (default %" PRIu64
")\n"
262 " -e, --repeat Repeat the tests (default %u)\n"
263 " 0: no repeat, run the tests once\n"
264 " 1: repeat until failure\n"
265 " 2: repeat forever\n"
266 " -o, --num_counter Number of counters (default %u)\n"
267 " -p, --place Counter placement (default %d)\n"
268 " 0: pack to same cache line with lock\n"
269 " 1: pack to separate cache line\n"
270 " 2: place each counter to separate cache line\n"
271 " -h, --help This help\n"
273 DEFAULT_MAX_WORKERS, test_options_def.type,
274 test_options_def.num_round, test_options_def.repeat,
275 test_options_def.num_counter, test_options_def.place);
278 static void print_info(test_options_t *test_options)
280 printf(
"\nLock performance test configuration:\n");
281 printf(
" num cpu %u\n", test_options->num_cpu);
282 printf(
" type %u\n", test_options->type);
283 printf(
" num rounds %" PRIu64
"\n", test_options->num_round);
284 printf(
" repeat %u\n", test_options->repeat);
285 printf(
" num counters %u\n", test_options->num_counter);
286 printf(
" place %u\n", test_options->place);
290 static int output_summary(test_global_t *global)
292 int results_size = ODPH_ARRAY_SIZE(global->results);
295 if (global->common_options.is_export) {
296 if (test_common_write(
"function name,rounds/cpu (M/s),"
297 "total rounds (M/s),cycles/round,nsec/round\n")) {
298 test_common_write_term();
303 printf(
"Average results over %i threads:\n", global->test_options.num_cpu);
304 printf(
"%-33s %-18s %-20s %-14s %-12s\n",
"function name",
"rounds/cpu (M/s)",
305 "total rounds (M/s)",
"cycles/round",
"nsec/round");
306 printf(
"----------------------------------------------------------------------------"
307 "---------------------\n");
308 for (
int i = 0; i < results_size && global->results[i].test_name; i++) {
309 res = global->results[i];
310 printf(
"[%02d] %-28s %-18.2f %-20.2f %-14.2f %-12.2f\n", i + 1,
311 res.test_name, res.rounds_per_cpu, res.total_rounds,
312 res.cycles_per_round, res.nsec_per_op);
313 if (global->common_options.is_export) {
314 if (test_common_write(
"%s,%f,%f,%f,%f\n", res.test_name, res.rounds_per_cpu,
315 res.total_rounds, res.cycles_per_round,
317 test_common_write_term();
323 if (global->common_options.is_export)
324 test_common_write_term();
329 static int parse_options(
int argc,
char *argv[], test_options_t *test_options)
334 static const struct option longopts[] = {
335 {
"num_cpu", required_argument, NULL,
'c' },
336 {
"type", required_argument, NULL,
't' },
337 {
"num_round", required_argument, NULL,
'r' },
338 {
"repeat", required_argument, NULL,
'e' },
339 {
"num_counter", required_argument, NULL,
'o' },
340 {
"place", required_argument, NULL,
'p' },
341 {
"help", no_argument, NULL,
'h' },
345 static const char *shortopts =
"+c:t:r:e:o:p:h";
347 *test_options = test_options_def;
350 opt = getopt_long(argc, argv, shortopts, longopts, NULL);
357 test_options->num_cpu = atoi(optarg);
360 test_options->type = atoi(optarg);
363 test_options->num_round = atoll(optarg);
366 test_options->repeat = atoi(optarg);
369 test_options->num_counter = atoi(optarg);
372 test_options->place = atoi(optarg);
383 if (test_options->num_round < 1) {
384 ODPH_ERR(
"Invalid number of test rounds: %" PRIu64
"\n",
385 test_options->num_round);
389 if (test_options->num_counter < 1 ||
390 test_options->num_counter > MAX_COUNTERS) {
391 ODPH_ERR(
"Invalid number of counters: %" PRIu32
"\n",
392 test_options->num_counter);
399 static int set_num_cpu(test_global_t *global)
402 test_options_t *test_options = &global->test_options;
403 int num_cpu = test_options->num_cpu;
414 if (max_num > DEFAULT_MAX_WORKERS)
415 max_num = DEFAULT_MAX_WORKERS;
420 if (num_cpu && ret != num_cpu) {
421 ODPH_ERR(
"Too many workers. Max supported %i.\n", ret);
428 ODPH_ERR(
"Too many cpus from odp_cpumask_default_worker(): %i\n", ret);
433 test_options->num_cpu = num_cpu;
441 static int init_test(test_global_t *g,
const char *name)
443 printf(
"TEST: %s\n", name);
445 memset(&g->item, 0,
sizeof(g->item));
455 static void fill_counter_ptrs(test_global_t *g, uint64_t **counter_out)
457 test_options_t *test_options = &g->test_options;
459 memset(counter_out, 0,
sizeof(uint64_t *) * MAX_COUNTERS);
461 switch (test_options->place) {
463 for (uint32_t i = 0; i < test_options->num_counter; i++) {
464 switch (g->cur_type) {
466 counter_out[i] = &g->item.spinlock.counter[i];
469 counter_out[i] = &g->item.spinlock_recursive.counter[i];
472 counter_out[i] = &g->item.rwlock.counter[i];
475 counter_out[i] = &g->item.rwlock_recursive.counter[i];
478 counter_out[i] = &g->item.ticketlock.counter[i];
484 for (uint32_t i = 0; i < test_options->num_counter; i++)
485 counter_out[i] = &g->item.separate.counter[i];
487 case PLACE_ALL_SEPARATE:
488 for (uint32_t i = 0; i < test_options->num_counter; i++)
489 counter_out[i] = &g->item.all_separate[i].counter;
494 static int run_test(
void *arg)
500 test_thread_ctx_t *thread_ctx = arg;
501 test_global_t *global = thread_ctx->global;
502 test_options_t *test_options = &global->test_options;
503 test_fn_t test_func = thread_ctx->func;
504 uint64_t *counter[MAX_COUNTERS];
506 fill_counter_ptrs(global, counter);
514 test_func(global, counter, test_options->num_counter);
523 thread_ctx->nsec = nsec;
524 thread_ctx->cycles = cycles;
529 static int start_workers(test_global_t *global,
odp_instance_t instance,
532 odph_thread_common_param_t param;
534 test_options_t *test_options = &global->test_options;
535 int num_cpu = test_options->num_cpu;
536 odph_thread_param_t thr_param[num_cpu];
538 odph_thread_common_param_init(¶m);
539 param.instance = instance;
540 param.cpumask = &global->cpumask;
542 for (i = 0; i < num_cpu; i++) {
543 test_thread_ctx_t *thread_ctx = &global->thread_ctx[i];
545 thread_ctx->global = global;
547 thread_ctx->func = func;
549 odph_thread_param_init(&thr_param[i]);
551 thr_param[i].start = run_test;
552 thr_param[i].arg = thread_ctx;
555 ret = odph_thread_create(global->thread_tbl, ¶m, thr_param,
557 if (ret != num_cpu) {
558 ODPH_ERR(
"Failed to create all threads %i\n", ret);
565 static int validate_results(test_global_t *global, validate_fn_t validate)
567 test_options_t *test_options = &global->test_options;
568 uint64_t *counter[MAX_COUNTERS];
570 fill_counter_ptrs(global, counter);
572 if (validate(global, counter, test_options->num_counter))
581 static test_case_t test_suite[] = {
582 TEST_INFO(
"odp_spinlock", test_spinlock, validate_generic),
583 TEST_INFO(
"odp_spinlock_recursive", test_spinlock_recursive, validate_generic),
584 TEST_INFO(
"odp_rwlock", test_rwlock, validate_generic),
585 TEST_INFO(
"odp_rwlock_recursive", test_rwlock_recursive, validate_generic),
586 TEST_INFO(
"odp_ticketlock", test_ticketlock, validate_generic),
590 "Result array is too small to hold all the results");
592 static void output_results(test_global_t *global,
int idx)
595 double cycles_per_round, nsec_ave, nsec_per_round, rounds_per_cpu, total_rounds;
596 test_options_t *test_options = &global->test_options;
597 int num_cpu = test_options->num_cpu;
598 uint64_t num_round = test_options->num_round;
599 uint64_t nsec_sum = 0;
600 uint64_t cycles_sum = 0;
602 global->results[idx].test_name = test_suite[idx].name;
605 nsec_sum += global->thread_ctx[i].nsec;
606 cycles_sum += global->thread_ctx[i].cycles;
610 printf(
"No results.\n");
614 nsec_ave = (double)nsec_sum / num_cpu;
615 nsec_per_round = (double)nsec_sum / (num_cpu * num_round);
616 cycles_per_round = (double)cycles_sum / (num_cpu * num_round);
619 rounds_per_cpu = num_round / (nsec_ave / 1000.0);
620 total_rounds = ((uint64_t)num_cpu * num_round) / (nsec_ave / 1000.0);
622 global->results[idx].cycles_per_round = cycles_per_round;
623 global->results[idx].rounds_per_cpu = rounds_per_cpu;
624 global->results[idx].nsec_per_op = nsec_per_round;
625 global->results[idx].total_rounds = total_rounds;
627 printf(
"------------------------------------------------\n");
628 printf(
"Per thread results (Millions of rounds per sec):\n");
629 printf(
"------------------------------------------------\n");
630 printf(
" 1 2 3 4 5 6 7 8 9 10");
633 if (global->thread_ctx[i].nsec) {
637 printf(
"%8.3f ", num_round / (global->thread_ctx[i].nsec / 1000.0));
643 printf(
"Average results over %i threads:\n", num_cpu);
644 printf(
"------------------------------------------\n");
646 printf(
" cycles per round %8.2f\n", cycles_per_round);
647 printf(
" nsec per round: %8.2f\n", nsec_per_round);
648 printf(
" rounds per cpu: %8.3fM rounds/sec\n", rounds_per_cpu);
649 printf(
" total rounds: %8.3fM rounds/sec\n", total_rounds);
653 int main(
int argc,
char **argv)
655 odph_helper_options_t helper_options;
659 test_options_t test_options;
661 test_common_options_t common_options;
664 argc = odph_parse_options(argc, argv);
665 if (odph_options(&helper_options)) {
666 ODPH_ERR(
"Error: reading ODP helper options failed.\n");
670 argc = test_common_parse_options(argc, argv);
671 if (test_common_options(&common_options)) {
672 ODPH_ERR(
"Error: reading test common options failed\n");
676 if (parse_options(argc, argv, &test_options))
690 init.
mem_model = helper_options.mem_model;
694 ODPH_ERR(
"Global init failed.\n");
700 ODPH_ERR(
"Local init failed.\n");
706 ODP_CACHE_LINE_SIZE, 0);
709 ODPH_ERR(
"Shared memory reserve failed.\n");
714 if (test_global == NULL) {
715 ODPH_ERR(
"Shared memory alloc failed.\n");
718 memset(test_global, 0,
sizeof(test_global_t));
719 test_global->test_options = test_options;
720 test_global->common_options = common_options;
724 if (set_num_cpu(test_global))
727 print_info(&test_global->test_options);
730 num_tests = ODPH_ARRAY_SIZE(test_suite);
733 for (i = 0; i < num_tests; i++) {
734 if (test_options.type && test_options.type != (uint32_t)i + 1)
737 test_global->cur_type = i;
740 if (init_test(test_global, test_suite[i].name)) {
741 ODPH_ERR(
"Failed to initialize test.\n");
746 if (start_workers(test_global, instance, test_suite[i].test_fn))
750 odph_thread_join(test_global->thread_tbl,
751 test_global->test_options.num_cpu);
753 output_results(test_global, i);
756 if (validate_results(test_global, test_suite[i].validate_fn)) {
757 ODPH_ERR(
"Test %s result validation failed.\n",
759 if (test_options.repeat != REPEAT_FOREVER)
764 if (test_options.repeat == REPEAT_NO)
768 if (output_summary(test_global)) {
769 ODPH_ERR(
"Outputting summary failed.\n");
774 ODPH_ERR(
"Shm free failed.\n");
779 ODPH_ERR(
"Local terminate failed.\n");
784 ODPH_ERR(
"Global terminate failed.\n");
void odp_barrier_init(odp_barrier_t *barr, int count)
Initialize barrier with thread count.
void odp_barrier_wait(odp_barrier_t *barr)
Synchronize thread execution on barrier.
#define ODP_ALIGNED_CACHE
Defines type/struct/variable to be cache line size aligned.
uint64_t odp_cpu_cycles_diff(uint64_t c2, uint64_t c1)
CPU cycle count difference.
uint64_t odp_cpu_cycles(void)
Current CPU cycle count.
int odp_cpumask_default_worker(odp_cpumask_t *mask, int num)
Default CPU mask for worker threads.
void odp_init_param_init(odp_init_t *param)
Initialize the odp_init_t to default values for all fields.
#define ODP_STATIC_ASSERT(cond, msg)
Compile time assertion macro.
int odp_init_local(odp_instance_t instance, odp_thread_type_t thr_type)
Thread local ODP initialization.
int odp_init_global(odp_instance_t *instance, const odp_init_t *params, const odp_platform_init_t *platform_params)
Global ODP initialization.
int odp_term_local(void)
Thread local ODP termination.
int odp_term_global(odp_instance_t instance)
Global ODP termination.
uint64_t odp_instance_t
ODP instance ID.
void odp_ticketlock_init(odp_ticketlock_t *tklock)
Initialize ticket lock.
void odp_spinlock_lock(odp_spinlock_t *splock)
Acquire spin lock.
void odp_rwlock_recursive_read_unlock(odp_rwlock_recursive_t *lock)
Release recursive rwlock after reading.
void odp_spinlock_init(odp_spinlock_t *splock)
Initialize spin lock.
void odp_ticketlock_lock(odp_ticketlock_t *tklock)
Acquire ticket lock.
void odp_rwlock_read_lock(odp_rwlock_t *rwlock)
Acquire read permission on a reader/writer lock.
void odp_spinlock_recursive_init(odp_spinlock_recursive_t *lock)
Initialize recursive spinlock.
void odp_rwlock_read_unlock(odp_rwlock_t *rwlock)
Release read permission on a reader/writer lock.
void odp_rwlock_recursive_init(odp_rwlock_recursive_t *lock)
Initialize recursive rwlock.
void odp_rwlock_recursive_write_unlock(odp_rwlock_recursive_t *lock)
Release recursive rwlock after writing.
void odp_ticketlock_unlock(odp_ticketlock_t *tklock)
Release ticket lock.
void odp_rwlock_recursive_read_lock(odp_rwlock_recursive_t *lock)
Acquire recursive rwlock for reading.
void odp_spinlock_recursive_unlock(odp_spinlock_recursive_t *lock)
Release recursive spinlock.
void odp_rwlock_write_unlock(odp_rwlock_t *rwlock)
Release write permission on a reader/writer lock.
void odp_rwlock_write_lock(odp_rwlock_t *rwlock)
Acquire write permission on a reader/writer lock.
void odp_spinlock_unlock(odp_spinlock_t *splock)
Release spin lock.
void odp_spinlock_recursive_lock(odp_spinlock_recursive_t *lock)
Acquire recursive spinlock.
void odp_rwlock_init(odp_rwlock_t *rwlock)
Initialize a reader/writer lock.
void odp_rwlock_recursive_write_lock(odp_rwlock_recursive_t *lock)
Acquire recursive rwlock for writing.
int odp_shm_free(odp_shm_t shm)
Free a contiguous block of shared memory.
#define ODP_SHM_INVALID
Invalid shared memory block.
void * odp_shm_addr(odp_shm_t shm)
Shared memory block address.
odp_shm_t odp_shm_reserve(const char *name, uint64_t size, uint64_t align, uint32_t flags)
Reserve a contiguous block of shared memory.
void odp_sys_info_print(void)
Print system info.
#define ODP_THREAD_COUNT_MAX
Maximum number of threads supported in build time.
@ ODP_THREAD_WORKER
Worker thread.
@ ODP_THREAD_CONTROL
Control thread.
#define ODP_TIME_SEC_IN_NS
A second in nanoseconds.
odp_time_t odp_time_local_strict(void)
Current local time (strict)
uint64_t odp_time_diff_ns(odp_time_t t2, odp_time_t t1)
Time difference in nanoseconds.
Global initialization parameters.
odp_mem_model_t mem_model
Application memory model.
odp_feature_t not_used
Unused features.
odp_rwlock_t lock
the lock
odp_spinlock_t lock
the lock
char lock
lock flag, should match odp_atomic_flag_t
uint32_t tm
Traffic Manager APIs, e.g., odp_tm_xxx()
uint32_t stash
Stash APIs, e.g., odp_stash_xxx()
uint32_t crypto
Crypto APIs, e.g., odp_crypto_xxx()
uint32_t ipsec
IPsec APIs, e.g., odp_ipsec_xxx()
uint32_t timer
Timer APIs, e.g., odp_timer_xxx(), odp_timeout_xxx()
uint32_t cls
Classifier APIs, e.g., odp_cls_xxx(), odp_cos_xxx()
uint32_t schedule
Scheduler APIs, e.g., odp_schedule_xxx()
struct odp_feature_t::@148 feat
Individual feature bits.
uint32_t compress
Compression APIs, e.g., odp_comp_xxx()