22 #include <odp/helper/odph_api.h>
24 #include <export_results.h>
26 #define DEFAULT_BURST_SIZE 32
28 #define STAT_AVAILABLE 0x1
29 #define STAT_CACHE 0x2
30 #define STAT_THR_CACHE 0x4
31 #define STAT_ALLOC_OPS 0x10
32 #define STAT_FREE_OPS 0x20
33 #define STAT_TOTAL_OPS 0x40
35 typedef struct test_options_t {
48 typedef struct test_stat_t {
57 typedef struct test_global_t {
58 test_options_t test_options;
65 test_common_options_t common_options;
69 static void print_usage(
void)
72 "Pool performance test\n"
74 "Usage: odp_pool_perf [options]\n"
76 " -c, --num_cpu Number of CPUs (worker threads). 0: all available CPUs. Default 1.\n"
77 " -e, --num_event Number of events\n"
78 " -r, --num_round Number of rounds\n"
79 " -b, --burst Maximum number of events per operation (default %d). When 0, single\n"
80 " event alloc/free functions are used instead of multi event variants.\n"
81 " -n, --num_burst Number of bursts allocated/freed back-to-back\n"
82 " -s, --data_size Data size in bytes\n"
83 " -S, --stats_mode Pool statistics usage. Enable counters with combination of these flags:\n"
84 " 0: no pool statistics (default)\n"
86 " 0x2: cache_available\n"
87 " 0x4: thread_cache_available\n"
91 " -t, --pool_type 0: Buffer pool (default)\n"
93 " -C, --cache_size Pool cache size (per thread)\n"
94 " -h, --help This help\n"
95 "\n", DEFAULT_BURST_SIZE);
98 static int parse_options(
int argc,
char *argv[], test_options_t *test_options)
103 static const struct option longopts[] = {
104 {
"num_cpu", required_argument, NULL,
'c'},
105 {
"num_event", required_argument, NULL,
'e'},
106 {
"num_round", required_argument, NULL,
'r'},
107 {
"burst", required_argument, NULL,
'b'},
108 {
"num_burst", required_argument, NULL,
'n'},
109 {
"data_size", required_argument, NULL,
's'},
110 {
"stats_mode", required_argument, NULL,
'S'},
111 {
"pool_type", required_argument, NULL,
't'},
112 {
"cache_size", required_argument, NULL,
'C'},
113 {
"help", no_argument, NULL,
'h'},
117 static const char *shortopts =
"+c:e:r:b:n:s:S:t:C:h";
119 test_options->num_cpu = 1;
120 test_options->num_event = 1000;
121 test_options->num_round = 100000;
122 test_options->max_burst = DEFAULT_BURST_SIZE;
123 test_options->num_burst = 1;
124 test_options->data_size = 64;
125 test_options->stats_mode = 0;
126 test_options->pool_type = 0;
127 test_options->cache_size = UINT32_MAX;
130 opt = getopt_long(argc, argv, shortopts, longopts, NULL);
137 test_options->num_cpu = atoi(optarg);
140 test_options->num_event = atoi(optarg);
143 test_options->num_round = atoi(optarg);
146 test_options->max_burst = atoi(optarg);
149 test_options->num_burst = atoi(optarg);
152 test_options->data_size = atoi(optarg);
155 test_options->stats_mode = strtoul(optarg, NULL, 0);
158 test_options->pool_type = atoi(optarg);
161 test_options->cache_size = atoi(optarg);
172 if (test_options->num_burst * ODPH_MAX(test_options->max_burst, 1U) >
173 test_options->num_event) {
174 printf(
"Not enough events (%u) for the burst configuration.\n"
175 "Use smaller burst size (%u) or less bursts (%u)\n",
176 test_options->num_event, test_options->max_burst,
177 test_options->num_burst);
184 static int set_num_cpu(test_global_t *global)
187 test_options_t *test_options = &global->test_options;
188 int num_cpu = test_options->num_cpu;
192 printf(
"Error: Too many workers. Maximum is %i.\n",
199 if (num_cpu && ret != num_cpu) {
200 printf(
"Error: Too many workers. Max supported %i.\n", ret);
207 test_options->num_cpu = num_cpu;
215 static int create_pool(test_global_t *global)
221 uint32_t max_num, max_size, min_cache_size, max_cache_size;
222 test_options_t *test_options = &global->test_options;
223 uint32_t num_event = test_options->num_event;
224 uint32_t num_round = test_options->num_round;
225 uint32_t max_burst = test_options->max_burst;
226 uint32_t num_burst = test_options->num_burst;
227 uint32_t num_cpu = test_options->num_cpu;
228 uint32_t data_size = test_options->data_size;
229 uint32_t cache_size = test_options->cache_size;
230 uint32_t stats_mode = test_options->stats_mode;
231 int packet_pool = test_options->pool_type;
237 if (cache_size == UINT32_MAX)
241 if (stats_mode & STAT_AVAILABLE)
243 if (stats_mode & STAT_CACHE)
245 if (stats_mode & STAT_THR_CACHE)
247 if (stats_mode & STAT_ALLOC_OPS)
249 if (stats_mode & STAT_FREE_OPS)
251 if (stats_mode & STAT_TOTAL_OPS)
254 printf(
"\nPool performance test\n");
255 printf(
" num cpu %u\n", num_cpu);
256 printf(
" num rounds %u\n", num_round);
257 printf(
" num events %u\n", num_event);
258 printf(
" max burst %u\n", max_burst);
259 printf(
" num bursts %u\n", num_burst);
260 printf(
" data size %u\n", data_size);
261 printf(
" cache size %u\n", cache_size);
262 printf(
" stats mode 0x%x\n", stats_mode);
263 printf(
" pool type %s\n", packet_pool ?
"packet" :
"buffer");
264 printf(
" op type %s\n\n", test_options->max_burst == 0 ?
"single" :
"multi");
267 printf(
"Error: Pool capa failed.\n");
285 if ((stats_capa.
all & stats.
all) != stats.
all) {
286 printf(
"Error: requested statistics not supported (0x%" PRIx64
" / 0x%" PRIx64
")\n",
287 stats.
all, stats_capa.
all);
291 if (cache_size < min_cache_size) {
292 printf(
"Error: min cache size supported %u\n", min_cache_size);
296 if (cache_size > max_cache_size) {
297 printf(
"Error: max cache size supported %u\n", max_cache_size);
301 if (max_num && num_event > max_num) {
302 printf(
"Error: max events supported %u\n", max_num);
306 if (max_size && data_size > max_size) {
307 printf(
"Error: max data size supported %u\n", max_size);
313 pool_param.
pkt.
num = num_event;
314 pool_param.
pkt.
len = data_size;
320 pool_param.
buf.
num = num_event;
321 pool_param.
buf.
size = data_size;
330 printf(
"Error: Pool create failed.\n");
339 static inline void record_results(test_stat_t *stats, uint64_t start_cycles,
odp_time_t start_time,
340 uint64_t rounds, uint64_t frees, uint64_t events)
345 stats->rounds = rounds;
346 stats->frees = frees;
347 stats->events = events;
352 static int test_buffer_pool(
void *arg)
355 uint32_t num, num_free, num_freed, i, rounds;
356 uint64_t start_cycles, events, frees;
358 test_global_t *global = arg;
359 test_options_t *test_options = &global->test_options;
360 uint32_t num_round = test_options->num_round;
361 uint32_t max_burst = test_options->max_burst;
362 uint32_t num_burst = test_options->num_burst;
363 uint32_t max_num = num_burst * max_burst;
369 for (i = 0; i < max_num; i++)
382 for (rounds = 0; rounds < num_round; rounds++) {
385 for (i = 0; i < num_burst; i++) {
389 printf(
"Error: Alloc failed. Round %u\n",
406 while (num_freed < num) {
407 num_free = num - num_freed;
408 if (num_free > max_burst)
409 num_free = max_burst;
413 num_freed += num_free;
417 record_results(&global->stat[thr], start_cycles, start_time, rounds, frees, events);
422 static int test_buffer_pool_single(
void *arg)
425 uint32_t num, num_freed, i, rounds;
426 uint64_t start_cycles, events, frees;
428 test_global_t *global = arg;
429 test_options_t *test_options = &global->test_options;
430 uint32_t num_round = test_options->num_round;
431 uint32_t num_burst = test_options->num_burst;
437 for (i = 0; i < num_burst; i++)
449 for (rounds = 0; rounds < num_round; rounds++) {
452 for (i = 0; i < num_burst; i++) {
466 while (num_freed < num) {
473 record_results(&global->stat[thr], start_cycles, start_time, rounds, frees, events);
478 static int test_packet_pool(
void *arg)
481 uint32_t num, num_free, num_freed, i, rounds;
482 uint64_t start_cycles, events, frees;
484 test_global_t *global = arg;
485 test_options_t *test_options = &global->test_options;
486 uint32_t num_round = test_options->num_round;
487 uint32_t max_burst = test_options->max_burst;
488 uint32_t num_burst = test_options->num_burst;
489 uint32_t max_num = num_burst * max_burst;
490 uint32_t data_size = test_options->data_size;
496 for (i = 0; i < max_num; i++)
509 for (rounds = 0; rounds < num_round; rounds++) {
512 for (i = 0; i < num_burst; i++) {
516 printf(
"Error: Alloc failed. Round %u\n",
534 while (num_freed < num) {
535 num_free = num - num_freed;
536 if (num_free > max_burst)
537 num_free = max_burst;
541 num_freed += num_free;
545 record_results(&global->stat[thr], start_cycles, start_time, rounds, frees, events);
550 static int test_packet_pool_single(
void *arg)
553 uint32_t num, num_freed, i, rounds;
554 uint64_t start_cycles, events, frees;
556 test_global_t *global = arg;
557 test_options_t *test_options = &global->test_options;
558 uint32_t num_round = test_options->num_round;
559 uint32_t num_burst = test_options->num_burst;
560 uint32_t data_size = test_options->data_size;
566 for (i = 0; i < num_burst; i++)
578 for (rounds = 0; rounds < num_round; rounds++) {
581 for (i = 0; i < num_burst; i++) {
596 while (num_freed < num) {
603 record_results(&global->stat[thr], start_cycles, start_time, rounds, frees, events);
608 static int start_workers(test_global_t *global,
odp_instance_t instance)
610 odph_thread_common_param_t thr_common;
611 odph_thread_param_t thr_param;
612 test_options_t *test_options = &global->test_options;
613 int num_cpu = test_options->num_cpu;
614 int packet_pool = test_options->pool_type;
616 odph_thread_common_param_init(&thr_common);
617 thr_common.instance = instance;
618 thr_common.cpumask = &global->cpumask;
619 thr_common.share_param = 1;
621 odph_thread_param_init(&thr_param);
622 thr_param.arg = global;
625 if (test_options->max_burst == 0)
626 thr_param.start = packet_pool ? test_packet_pool_single : test_buffer_pool_single;
628 thr_param.start = packet_pool ? test_packet_pool : test_buffer_pool;
630 if (odph_thread_create(global->thread_tbl, &thr_common, &thr_param,
637 static void test_stats_perf(test_global_t *global)
643 int num_thr = global->test_options.num_cpu + 1;
645 double nsec_ave = 0.0;
646 const int rounds = 1000;
657 for (i = 0; i < rounds; i++) {
659 printf(
"Error: Stats request failed on round %i\n", i);
668 nsec_ave = (double)nsec / i;
670 printf(
"Pool statistics:\n");
671 printf(
" odp_pool_stats() calls %i\n", i);
672 printf(
" ave call latency %.2f nsec\n", nsec_ave);
673 printf(
" num threads %i\n", num_thr);
674 printf(
" alloc_ops %" PRIu64
"\n", stats.
alloc_ops);
675 printf(
" free_ops %" PRIu64
"\n", stats.
free_ops);
676 printf(
" total_ops %" PRIu64
"\n", stats.
total_ops);
677 printf(
" available %" PRIu64
"\n", stats.
available);
679 for (i = 0; i < num_thr; i++) {
680 printf(
" thr[%2i] cache_available %" PRIu64
"\n",
687 static int output_results(test_global_t *global)
690 double rounds_ave, allocs_ave, frees_ave;
691 double events_ave, nsec_ave, cycles_ave;
692 test_options_t *test_options = &global->test_options;
693 int num_cpu = test_options->num_cpu;
694 uint32_t num_burst = test_options->num_burst;
695 uint64_t rounds_sum = 0;
696 uint64_t frees_sum = 0;
697 uint64_t events_sum = 0;
698 uint64_t nsec_sum = 0;
699 uint64_t cycles_sum = 0;
703 rounds_sum += global->stat[i].rounds;
704 frees_sum += global->stat[i].frees;
705 events_sum += global->stat[i].events;
706 nsec_sum += global->stat[i].nsec;
707 cycles_sum += global->stat[i].cycles;
710 if (rounds_sum == 0) {
711 printf(
"No results.\n");
715 rounds_ave = rounds_sum / num_cpu;
716 allocs_ave = (num_burst * rounds_sum) / num_cpu;
717 frees_ave = frees_sum / num_cpu;
718 events_ave = events_sum / num_cpu;
719 nsec_ave = nsec_sum / num_cpu;
720 cycles_ave = cycles_sum / num_cpu;
723 printf(
"RESULTS - per thread (Million events per sec):\n");
724 printf(
"----------------------------------------------\n");
725 printf(
" 1 2 3 4 5 6 7 8 9 10");
728 if (global->stat[i].rounds) {
732 printf(
"%6.1f ", (1000.0 * global->stat[i].events) /
733 global->stat[i].nsec);
739 printf(
"RESULTS - average over %i threads:\n", num_cpu);
740 printf(
"----------------------------------\n");
741 printf(
" alloc calls: %.3f\n", allocs_ave);
742 printf(
" free calls: %.3f\n", frees_ave);
743 printf(
" duration: %.3f msec\n", nsec_ave / 1000000);
744 printf(
" num cycles: %.3f M\n", cycles_ave / 1000000);
745 printf(
" cycles per round: %.3f\n",
746 cycles_ave / rounds_ave);
747 printf(
" cycles per event: %.3f\n",
748 cycles_ave / events_ave);
749 printf(
" ave events allocated: %.3f\n",
750 events_ave / allocs_ave);
751 printf(
" allocs per sec: %.3f M\n",
752 (1000.0 * allocs_ave) / nsec_ave);
753 printf(
" frees per sec: %.3f M\n",
754 (1000.0 * frees_ave) / nsec_ave);
755 printf(
" events per sec: %.3f M\n\n",
756 (1000.0 * events_ave) / nsec_ave);
758 printf(
"TOTAL events per sec: %.3f M\n\n",
759 (1000.0 * events_sum) / nsec_ave);
761 if (global->common_options.is_export) {
762 if (test_common_write(
"alloc calls,free calls,duration (msec),"
763 "num cycles (M),cycles per round,cycles per event,"
764 "ave events allocated,allocs per sec (M),frees per sec (M),"
765 "events per sec (M),total events per sec (M)\n")) {
766 ODPH_ERR(
"Export failed\n");
767 test_common_write_term();
771 if (test_common_write(
"%f,%f,%f,%f,%f,%f,%f,%f,%f,%f,%f\n",
772 allocs_ave, frees_ave, nsec_ave / 1000000,
773 cycles_ave / 1000000, cycles_ave / rounds_ave,
774 cycles_ave / events_ave, events_ave / allocs_ave,
775 (1000.0 * allocs_ave) / nsec_ave,
776 (1000.0 * frees_ave) / nsec_ave,
777 (1000.0 * events_ave) / nsec_ave,
778 (1000.0 * events_sum) / nsec_ave)) {
779 ODPH_ERR(
"Export failed\n");
780 test_common_write_term();
784 test_common_write_term();
790 int main(
int argc,
char **argv)
792 odph_helper_options_t helper_options;
796 test_global_t *global;
797 test_common_options_t common_options;
800 argc = odph_parse_options(argc, argv);
801 if (odph_options(&helper_options)) {
802 ODPH_ERR(
"Error: Reading ODP helper options failed.\n");
806 argc = test_common_parse_options(argc, argv);
807 if (test_common_options(&common_options)) {
808 ODPH_ERR(
"Error: Reading test options failed\n");
822 init.
mem_model = helper_options.mem_model;
826 printf(
"Error: Global init failed.\n");
832 printf(
"Error: Local init failed.\n");
836 shm =
odp_shm_reserve(
"pool_perf_global",
sizeof(test_global_t), ODP_CACHE_LINE_SIZE, 0);
838 ODPH_ERR(
"Error: Shared mem reserve failed.\n");
843 if (global == NULL) {
844 ODPH_ERR(
"Error: Shared mem alloc failed\n");
848 memset(global, 0,
sizeof(test_global_t));
851 global->common_options = common_options;
853 if (parse_options(argc, argv, &global->test_options))
858 if (set_num_cpu(global))
861 if (create_pool(global))
865 start_workers(global, instance);
868 odph_thread_join(global->thread_tbl, global->test_options.num_cpu);
870 if (global->test_options.stats_mode)
871 test_stats_perf(global);
873 if (output_results(global))
877 printf(
"Error: Pool destroy failed.\n");
882 ODPH_ERR(
"Error: Shared mem free failed.\n");
887 printf(
"Error: term local failed.\n");
892 printf(
"Error: term global failed.\n");
void odp_barrier_init(odp_barrier_t *barr, int count)
Initialize barrier with thread count.
void odp_barrier_wait(odp_barrier_t *barr)
Synchronize thread execution on barrier.
odp_buffer_t odp_buffer_alloc(odp_pool_t pool)
Buffer alloc.
void odp_buffer_free(odp_buffer_t buf)
Buffer free.
int odp_buffer_alloc_multi(odp_pool_t pool, odp_buffer_t buf[], int num)
Allocate multiple buffers.
#define ODP_BUFFER_INVALID
Invalid buffer.
void odp_buffer_free_multi(const odp_buffer_t buf[], int num)
Free multiple buffers.
#define odp_unlikely(x)
Branch unlikely taken.
uint64_t odp_cpu_cycles_diff(uint64_t c2, uint64_t c1)
CPU cycle count difference.
uint64_t odp_cpu_cycles(void)
Current CPU cycle count.
int odp_cpumask_default_worker(odp_cpumask_t *mask, int num)
Default CPU mask for worker threads.
void odp_init_param_init(odp_init_t *param)
Initialize the odp_init_t to default values for all fields.
int odp_init_local(odp_instance_t instance, odp_thread_type_t thr_type)
Thread local ODP initialization.
int odp_init_global(odp_instance_t *instance, const odp_init_t *params, const odp_platform_init_t *platform_params)
Global ODP initialization.
int odp_term_local(void)
Thread local ODP termination.
int odp_term_global(odp_instance_t instance)
Global ODP termination.
uint64_t odp_instance_t
ODP instance ID.
odp_packet_t odp_packet_alloc(odp_pool_t pool, uint32_t len)
Allocate a packet from a packet pool.
void odp_packet_free(odp_packet_t pkt)
Free packet.
#define ODP_PACKET_INVALID
Invalid packet.
void odp_packet_free_multi(const odp_packet_t pkt[], int num)
Free multiple packets.
int odp_packet_alloc_multi(odp_pool_t pool, uint32_t len, odp_packet_t pkt[], int num)
Allocate multiple packets from a packet pool.
odp_pool_t odp_pool_create(const char *name, const odp_pool_param_t *param)
Create a pool.
int odp_pool_capability(odp_pool_capability_t *capa)
Query pool capabilities.
void odp_pool_param_init(odp_pool_param_t *param)
Initialize pool params.
int odp_pool_destroy(odp_pool_t pool)
Destroy a pool previously created by odp_pool_create()
#define ODP_POOL_MAX_THREAD_STATS
Maximum number of per thread statistics a single odp_pool_stats() call can read.
int odp_pool_stats(odp_pool_t pool, odp_pool_stats_t *stats)
Read pool statistics.
#define ODP_POOL_INVALID
Invalid pool.
@ ODP_POOL_BUFFER
Buffer pool.
@ ODP_POOL_PACKET
Packet pool.
int odp_shm_free(odp_shm_t shm)
Free a contiguous block of shared memory.
#define ODP_SHM_INVALID
Invalid shared memory block.
void * odp_shm_addr(odp_shm_t shm)
Shared memory block address.
odp_shm_t odp_shm_reserve(const char *name, uint64_t size, uint64_t align, uint32_t flags)
Reserve a contiguous block of shared memory.
void odp_sys_info_print(void)
Print system info.
#define ODP_THREAD_COUNT_MAX
Maximum number of threads supported in build time.
int odp_thread_id(void)
Get thread identifier.
@ ODP_THREAD_WORKER
Worker thread.
@ ODP_THREAD_CONTROL
Control thread.
odp_time_t odp_time_local(void)
Current local time.
odp_time_t odp_time_local_strict(void)
Current local time (strict)
uint64_t odp_time_diff_ns(odp_time_t t2, odp_time_t t1)
Time difference in nanoseconds.
Global initialization parameters.
odp_mem_model_t mem_model
Application memory model.
odp_feature_t not_used
Unused features.
struct odp_pool_capability_t::@133 pkt
Packet pool capabilities
uint32_t max_num
Maximum number of buffers of any size.
uint32_t min_cache_size
Minimum size of thread local cache.
uint32_t max_size
Maximum buffer data size in bytes.
odp_pool_stats_opt_t stats
Supported statistics counters.
struct odp_pool_capability_t::@132 buf
Buffer pool capabilities
uint32_t max_cache_size
Maximum size of thread local cache.
uint32_t max_len
Maximum packet data length in bytes.
uint32_t num
Number of buffers in the pool.
struct odp_pool_param_t::@137 buf
Parameters for buffer pools.
uint32_t max_len
Maximum packet length that will be allocated from the pool.
uint32_t cache_size
Maximum number of buffers cached locally per thread.
struct odp_pool_param_t::@138 pkt
Parameters for packet pools.
uint32_t size
Minimum buffer size in bytes.
uint32_t max_num
Maximum number of packets.
odp_pool_type_t type
Pool type.
uint32_t len
Minimum length of 'num' packets.
odp_pool_stats_opt_t stats
Configure statistics counters.
Pool statistics counters.
uint64_t alloc_ops
The number of alloc operations from the pool.
uint64_t available
The number of available events in the pool.
uint16_t first
First thread identifier to read counters from.
uint64_t free_ops
The number of free operations to the pool.
uint64_t total_ops
The total number of alloc and free operations.
uint64_t cache_available
The number of available events in the local caches of all threads.
uint16_t last
Last thread identifier to read counters from.
struct odp_pool_stats_t::@131 thread
Per thread counters.
uint32_t tm
Traffic Manager APIs, e.g., odp_tm_xxx()
uint32_t crypto
Crypto APIs, e.g., odp_crypto_xxx()
uint32_t ipsec
IPsec APIs, e.g., odp_ipsec_xxx()
uint32_t timer
Timer APIs, e.g., odp_timer_xxx(), odp_timeout_xxx()
uint32_t cls
Classifier APIs, e.g., odp_cls_xxx(), odp_cos_xxx()
struct odp_feature_t::@173 feat
Individual feature bits.
uint32_t schedule
Scheduler APIs, e.g., odp_schedule_xxx()
uint32_t compress
Compression APIs, e.g., odp_comp_xxx()
Pool statistics counters options.
uint64_t total_ops
See odp_pool_stats_t::total_ops.
uint64_t cache_available
See odp_pool_stats_t::cache_available.
struct odp_pool_stats_opt_t::@130 bit
Option flags.
uint64_t thread_cache_available
See odp_pool_stats_t::thread::cache_available.
uint64_t free_ops
See odp_pool_stats_t::free_ops.
uint64_t alloc_ops
See odp_pool_stats_t::alloc_ops.
uint64_t available
See odp_pool_stats_t::available.
uint64_t all
All bits of the bit field structure.