API Reference Manual  1.46.0
odp_mem_perf.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright (c) 2021 Nokia
3  */
4 
13 #include <stdio.h>
14 #include <string.h>
15 #include <stdint.h>
16 #include <inttypes.h>
17 #include <stdlib.h>
18 #include <getopt.h>
19 
20 #include <odp_api.h>
21 #include <odp/helper/odph_api.h>
22 
23 typedef struct test_options_t {
24  uint32_t num_cpu;
25  uint32_t num_round;
26  uint64_t data_len;
27  uint32_t shm_flags;
28  int private;
29  int mode;
30 
31 } test_options_t;
32 
33 typedef struct test_global_t test_global_t;
34 
35 typedef struct test_thread_ctx_t {
36  test_global_t *global;
37  void *shm_addr;
38  uint64_t nsec;
39 
40 } test_thread_ctx_t;
41 
42 struct test_global_t {
43  test_options_t test_options;
44 
45  odp_barrier_t barrier;
46  uint32_t num_shm;
48  void *shm_addr[ODP_THREAD_COUNT_MAX];
49  odp_cpumask_t cpumask;
50  odph_thread_t thread_tbl[ODP_THREAD_COUNT_MAX];
51  test_thread_ctx_t thread_ctx[ODP_THREAD_COUNT_MAX];
52 
53 };
54 
55 static void print_usage(void)
56 {
57  printf("\n"
58  "Memory performance test\n"
59  "\n"
60  "Usage: odp_mem_perf [options]\n"
61  "\n"
62  " -c, --num_cpu Number of CPUs (worker threads). 0: all available CPUs. Default 1.\n"
63  " -r, --num_round Number of rounds\n"
64  " -l, --data_len Data length in bytes\n"
65  " -f, --flags SHM flags parameter. Default 0.\n"
66  " -p, --private 0: The same memory area is shared between threads (default)\n"
67  " 1: Memory areas are private to each thread. This increases\n"
68  " memory consumption to num_cpu * data_len.\n"
69  " -m, --mode 0: Memset data (default)\n"
70  " 1: Memcpy data. On each round, reads data from one half of the memory area\n"
71  " and writes it to the other half.\n"
72  " -h, --help This help\n"
73  "\n");
74 }
75 
76 static int parse_options(int argc, char *argv[], test_options_t *test_options)
77 {
78  int opt;
79  int ret = 0;
80 
81  static const struct option longopts[] = {
82  {"num_cpu", required_argument, NULL, 'c'},
83  {"num_round", required_argument, NULL, 'r'},
84  {"data_len", required_argument, NULL, 'l'},
85  {"flags", required_argument, NULL, 'f'},
86  {"private", required_argument, NULL, 'p'},
87  {"mode", required_argument, NULL, 'm'},
88  {"help", no_argument, NULL, 'h'},
89  {NULL, 0, NULL, 0}
90  };
91 
92  static const char *shortopts = "+c:r:l:f:p:m:h";
93 
94  test_options->num_cpu = 1;
95  test_options->num_round = 1000;
96  test_options->data_len = 10 * 1024 * 1024;
97  test_options->shm_flags = 0;
98  test_options->private = 0;
99  test_options->mode = 0;
100 
101  while (1) {
102  opt = getopt_long(argc, argv, shortopts, longopts, NULL);
103 
104  if (opt == -1)
105  break;
106 
107  switch (opt) {
108  case 'c':
109  test_options->num_cpu = atoi(optarg);
110  break;
111  case 'r':
112  test_options->num_round = atoi(optarg);
113  break;
114  case 'l':
115  test_options->data_len = strtoull(optarg, NULL, 0);
116  break;
117  case 'f':
118  test_options->shm_flags = strtoul(optarg, NULL, 0);
119  break;
120  case 'p':
121  test_options->private = atoi(optarg);
122  break;
123  case 'm':
124  test_options->mode = atoi(optarg);
125  break;
126  case 'h':
127  /* fall through */
128  default:
129  print_usage();
130  ret = -1;
131  break;
132  }
133  }
134 
135  return ret;
136 }
137 
138 static int set_num_cpu(test_global_t *global)
139 {
140  int ret, max_num;
141  test_options_t *test_options = &global->test_options;
142  int num_cpu = test_options->num_cpu;
143 
144  /* One thread used for the main thread */
145  if (num_cpu > ODP_THREAD_COUNT_MAX - 1) {
146  ODPH_ERR("Too many workers. Maximum is %i.\n", ODP_THREAD_COUNT_MAX - 1);
147  return -1;
148  }
149 
150  max_num = num_cpu;
151  if (num_cpu == 0)
152  max_num = ODP_THREAD_COUNT_MAX - 1;
153 
154  ret = odp_cpumask_default_worker(&global->cpumask, max_num);
155 
156  if (num_cpu && ret != num_cpu) {
157  ODPH_ERR("Too many workers. Max supported %i.\n", ret);
158  return -1;
159  }
160 
161  /* Zero: all available workers */
162  if (num_cpu == 0) {
163  if (ret > max_num) {
164  ODPH_ERR("Too many cpus from odp_cpumask_default_worker(): %i\n", ret);
165  return -1;
166  }
167 
168  num_cpu = ret;
169  test_options->num_cpu = num_cpu;
170  }
171 
172  odp_barrier_init(&global->barrier, num_cpu);
173 
174  return 0;
175 }
176 
177 static int create_shm(test_global_t *global)
178 {
179  odp_shm_capability_t shm_capa;
180  odp_shm_t shm;
181  void *addr;
182  uint32_t i, num_shm;
183  test_options_t *test_options = &global->test_options;
184  uint32_t num_round = test_options->num_round;
185  uint32_t num_cpu = test_options->num_cpu;
186  uint64_t data_len = test_options->data_len;
187  uint32_t shm_flags = test_options->shm_flags;
188  int private = test_options->private;
189  char name[] = "mem_perf_00";
190 
191  num_shm = 1;
192  if (private)
193  num_shm = num_cpu;
194 
195  printf("\nMemory performance test\n");
196  printf(" num cpu %u\n", num_cpu);
197  printf(" num rounds %u\n", num_round);
198  printf(" data len %" PRIu64 "\n", data_len);
199  printf(" memory footprint %" PRIu64 "\n", num_shm * data_len);
200  printf(" shm flags 0x%x\n", shm_flags);
201  printf(" num shm %u\n", num_shm);
202  printf(" private %i\n", private);
203  printf(" mode %i\n", test_options->mode);
204 
205  if (odp_shm_capability(&shm_capa)) {
206  ODPH_ERR("SHM capa failed.\n");
207  return -1;
208  }
209 
210  if (shm_capa.max_size && data_len > shm_capa.max_size) {
211  ODPH_ERR("Data len too large. Maximum len is %" PRIu64 "\n", shm_capa.max_size);
212  return -1;
213  }
214 
215  if (num_shm > shm_capa.max_blocks) {
216  ODPH_ERR("Too many SHM blocks. Maximum is %u\n", shm_capa.max_blocks);
217  return -1;
218  }
219 
220  for (i = 0; i < num_shm; i++) {
221  name[9] = '0' + i / 10;
222  name[10] = '0' + i % 10;
223 
224  shm = odp_shm_reserve(name, data_len, ODP_CACHE_LINE_SIZE, shm_flags);
225 
226  if (shm == ODP_SHM_INVALID) {
227  ODPH_ERR("SHM[%u] reserve failed.\n", i);
228  return -1;
229  }
230 
231  global->shm[i] = shm;
232 
233  addr = odp_shm_addr(shm);
234  if (addr == NULL) {
235  ODPH_ERR("SHM[%u] addr failed.\n", i);
236  return -1;
237  }
238 
239  global->shm_addr[i] = addr;
240 
241  printf(" shm addr[%u] %p\n", i, addr);
242  }
243 
244  printf("\n");
245  global->num_shm = num_shm;
246 
248 
249  return 0;
250 }
251 
252 static int free_shm(test_global_t *global)
253 {
254  uint32_t i;
255 
256  for (i = 0; i < global->num_shm; i++) {
257  if (odp_shm_free(global->shm[i])) {
258  ODPH_ERR("SHM[%u] free failed.\n", i);
259  return -1;
260  }
261  }
262 
263  return 0;
264 }
265 
266 static int run_test(void *arg)
267 {
268  int thr;
269  uint32_t i;
270  uint64_t nsec;
271  odp_time_t t1, t2;
272  test_thread_ctx_t *thread_ctx = arg;
273  test_global_t *global = thread_ctx->global;
274  test_options_t *test_options = &global->test_options;
275  uint32_t num_round = test_options->num_round;
276  uint64_t data_len = test_options->data_len;
277  uint64_t half_len = data_len / 2;
278  int mode = test_options->mode;
279  uint8_t *addr = thread_ctx->shm_addr;
280 
281  thr = odp_thread_id();
282 
283  /* Start all workers at the same time */
284  odp_barrier_wait(&global->barrier);
285 
286  t1 = odp_time_local();
287 
288  if (mode == 0) {
289  for (i = 0; i < num_round; i++)
290  memset(addr, thr + i, data_len);
291  } else {
292  for (i = 0; i < num_round; i++) {
293  if ((i & 0x1) == 0)
294  memcpy(&addr[half_len], addr, half_len);
295  else
296  memcpy(addr, &addr[half_len], half_len);
297  }
298  }
299 
300  t2 = odp_time_local();
301 
302  nsec = odp_time_diff_ns(t2, t1);
303 
304  /* Update stats */
305  thread_ctx->nsec = nsec;
306 
307  return 0;
308 }
309 
310 static int start_workers(test_global_t *global, odp_instance_t instance)
311 {
312  odph_thread_common_param_t param;
313  int i, ret;
314  test_options_t *test_options = &global->test_options;
315  int num_cpu = test_options->num_cpu;
316  odph_thread_param_t thr_param[num_cpu];
317 
318  odph_thread_common_param_init(&param);
319  param.instance = instance;
320  param.cpumask = &global->cpumask;
321 
322  for (i = 0; i < num_cpu; i++) {
323  test_thread_ctx_t *thread_ctx = &global->thread_ctx[i];
324 
325  thread_ctx->global = global;
326  thread_ctx->shm_addr = global->shm_addr[0];
327  if (global->test_options.private)
328  thread_ctx->shm_addr = global->shm_addr[i];
329 
330  odph_thread_param_init(&thr_param[i]);
331  thr_param[i].thr_type = ODP_THREAD_WORKER;
332  thr_param[i].start = run_test;
333  thr_param[i].arg = thread_ctx;
334  }
335 
336  ret = odph_thread_create(global->thread_tbl, &param, thr_param, num_cpu);
337  if (ret != num_cpu) {
338  ODPH_ERR("Failed to create all threads %i\n", ret);
339  return -1;
340  }
341 
342  return 0;
343 }
344 
345 static void print_stat(test_global_t *global)
346 {
347  int i, num;
348  double nsec_ave;
349  uint64_t data_touch;
350  test_options_t *test_options = &global->test_options;
351  int num_cpu = test_options->num_cpu;
352  uint32_t num_round = test_options->num_round;
353  uint64_t data_len = test_options->data_len;
354  uint64_t nsec_sum = 0;
355 
356  for (i = 0; i < ODP_THREAD_COUNT_MAX; i++)
357  nsec_sum += global->thread_ctx[i].nsec;
358 
359  if (nsec_sum == 0) {
360  printf("No results.\n");
361  return;
362  }
363 
364  data_touch = num_round * data_len;
365  nsec_ave = nsec_sum / num_cpu;
366  num = 0;
367 
368  printf("RESULTS - per thread (MB per sec):\n");
369  printf("----------------------------------\n");
370  printf(" 1 2 3 4 5 6 7 8 9 10");
371 
372  for (i = 0; i < ODP_THREAD_COUNT_MAX; i++) {
373  if (global->thread_ctx[i].nsec) {
374  if ((num % 10) == 0)
375  printf("\n ");
376 
377  printf("%8.1f ", data_touch / (global->thread_ctx[i].nsec / 1000.0));
378  num++;
379  }
380  }
381  printf("\n\n");
382 
383  printf("RESULTS - average over %i threads:\n", num_cpu);
384  printf("----------------------------------\n");
385  printf(" duration: %.6f sec\n", nsec_ave / 1000000000);
386  printf(" bandwidth per cpu: %.3f MB/s\n", data_touch / (nsec_ave / 1000.0));
387  printf(" total bandwidth: %.3f MB/s\n", (num_cpu * data_touch) / (nsec_ave / 1000.0));
388  printf("\n");
389 }
390 
391 int main(int argc, char **argv)
392 {
393  odph_helper_options_t helper_options;
394  odp_instance_t instance;
395  odp_init_t init;
396  odp_shm_t shm;
397  test_global_t *global;
398 
399  /* Let helper collect its own arguments (e.g. --odph_proc) */
400  argc = odph_parse_options(argc, argv);
401  if (odph_options(&helper_options)) {
402  ODPH_ERR("Reading ODP helper options failed.\n");
403  exit(EXIT_FAILURE);
404  }
405 
406  /* List features not to be used */
407  odp_init_param_init(&init);
408  init.not_used.feat.cls = 1;
409  init.not_used.feat.compress = 1;
410  init.not_used.feat.crypto = 1;
411  init.not_used.feat.ipsec = 1;
412  init.not_used.feat.schedule = 1;
413  init.not_used.feat.timer = 1;
414  init.not_used.feat.tm = 1;
415 
416  init.mem_model = helper_options.mem_model;
417 
418  /* Init ODP before calling anything else */
419  if (odp_init_global(&instance, &init, NULL)) {
420  ODPH_ERR("Global init failed.\n");
421  return -1;
422  }
423 
424  /* Init this thread */
425  if (odp_init_local(instance, ODP_THREAD_CONTROL)) {
426  ODPH_ERR("Local init failed.\n");
427  return -1;
428  }
429 
430  shm = odp_shm_reserve("mem_perf_global", sizeof(test_global_t), ODP_CACHE_LINE_SIZE, 0);
431  if (shm == ODP_SHM_INVALID) {
432  ODPH_ERR("Shared mem reserve failed.\n");
433  exit(EXIT_FAILURE);
434  }
435 
436  global = odp_shm_addr(shm);
437  if (global == NULL) {
438  ODPH_ERR("Shared mem alloc failed\n");
439  exit(EXIT_FAILURE);
440  }
441 
442  memset(global, 0, sizeof(test_global_t));
443 
444  if (parse_options(argc, argv, &global->test_options))
445  return -1;
446 
448 
449  if (set_num_cpu(global))
450  return -1;
451 
452  if (create_shm(global))
453  return -1;
454 
455  /* Start workers */
456  if (start_workers(global, instance))
457  return -1;
458 
459  /* Wait workers to exit */
460  odph_thread_join(global->thread_tbl, global->test_options.num_cpu);
461 
462  print_stat(global);
463 
464  if (free_shm(global))
465  return -1;
466 
467  if (odp_shm_free(shm)) {
468  ODPH_ERR("Shared mem free failed.\n");
469  exit(EXIT_FAILURE);
470  }
471 
472  if (odp_term_local()) {
473  ODPH_ERR("term local failed.\n");
474  return -1;
475  }
476 
477  if (odp_term_global(instance)) {
478  ODPH_ERR("term global failed.\n");
479  return -1;
480  }
481 
482  return 0;
483 }
void odp_barrier_init(odp_barrier_t *barr, int count)
Initialize barrier with thread count.
void odp_barrier_wait(odp_barrier_t *barr)
Synchronize thread execution on barrier.
int odp_cpumask_default_worker(odp_cpumask_t *mask, int num)
Default CPU mask for worker threads.
void odp_init_param_init(odp_init_t *param)
Initialize the odp_init_t to default values for all fields.
int odp_init_local(odp_instance_t instance, odp_thread_type_t thr_type)
Thread local ODP initialization.
int odp_init_global(odp_instance_t *instance, const odp_init_t *params, const odp_platform_init_t *platform_params)
Global ODP initialization.
int odp_term_local(void)
Thread local ODP termination.
int odp_term_global(odp_instance_t instance)
Global ODP termination.
uint64_t odp_instance_t
ODP instance ID.
void odp_shm_print_all(void)
Print all shared memory blocks.
int odp_shm_free(odp_shm_t shm)
Free a contiguous block of shared memory.
#define ODP_SHM_INVALID
Invalid shared memory block.
int odp_shm_capability(odp_shm_capability_t *capa)
Query shared memory capabilities.
void * odp_shm_addr(odp_shm_t shm)
Shared memory block address.
odp_shm_t odp_shm_reserve(const char *name, uint64_t size, uint64_t align, uint32_t flags)
Reserve a contiguous block of shared memory.
void odp_sys_info_print(void)
Print system info.
#define ODP_THREAD_COUNT_MAX
Maximum number of threads supported in build time.
int odp_thread_id(void)
Get thread identifier.
@ ODP_THREAD_WORKER
Worker thread.
@ ODP_THREAD_CONTROL
Control thread.
odp_time_t odp_time_local(void)
Current local time.
uint64_t odp_time_diff_ns(odp_time_t t2, odp_time_t t1)
Time difference in nanoseconds.
The OpenDataPlane API.
Global initialization parameters.
odp_mem_model_t mem_model
Application memory model.
odp_feature_t not_used
Unused features.
Shared memory capabilities.
uint32_t max_blocks
Maximum number of shared memory blocks.
uint64_t max_size
Maximum memory block size in bytes.
uint32_t tm
Traffic Manager APIs, e.g., odp_tm_xxx()
uint32_t crypto
Crypto APIs, e.g., odp_crypto_xxx()
uint32_t ipsec
IPsec APIs, e.g., odp_ipsec_xxx()
uint32_t timer
Timer APIs, e.g., odp_timer_xxx(), odp_timeout_xxx()
uint32_t cls
Classifier APIs, e.g., odp_cls_xxx(), odp_cos_xxx()
uint32_t schedule
Scheduler APIs, e.g., odp_schedule_xxx()
struct odp_feature_t::@148 feat
Individual feature bits.
uint32_t compress
Compression APIs, e.g., odp_comp_xxx()