API Reference Manual  1.46.0
odp_ipfragreass_reassemble.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright (c) 2017-2018 Linaro Limited
3  */
4 
7 #include <odp_api.h>
8 
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <assert.h>
12 
13 #include "odp_ipfragreass_reassemble.h"
14 #include "odp_ipfragreass_helpers.h"
15 
16 #define ROT(x, k) (((x) << (k)) | ((x) >> (32 - (k))))
17 
26 static inline odp_bool_t equal_flow(struct packet *current, struct packet *frag)
27 {
28  odph_ipv4hdr_t *curr_h = odp_packet_data(current->handle);
29  odph_ipv4hdr_t *frag_h = odp_packet_data(frag->handle);
30 
31  return (memcmp(&curr_h->src_addr, &frag_h->src_addr,
32  sizeof(curr_h->src_addr) + sizeof(curr_h->dst_addr))
33  == 0 &&
34  curr_h->id == frag_h->id &&
35  curr_h->proto == frag_h->proto);
36 }
37 
46 static inline odp_bool_t later_flow(struct packet *current, struct packet *frag)
47 {
48  odph_ipv4hdr_t *curr_h = odp_packet_data(current->handle);
49  odph_ipv4hdr_t *frag_h = odp_packet_data(frag->handle);
50 
51  return (memcmp(&curr_h->src_addr, &frag_h->src_addr,
52  sizeof(curr_h->src_addr) + sizeof(curr_h->dst_addr))
53  > 0 ||
54  curr_h->id > frag_h->id ||
55  curr_h->proto > frag_h->proto);
56 }
57 
67 static inline struct flts earliest(struct flts a, struct flts b,
68  struct flts now)
69 {
70  struct flts result;
71  struct flts elapsed_a;
72  struct flts elapsed_b;
73 
74  now.t += TS_NOW_TOLERANCE;
75  elapsed_a.t = now.t - a.t;
76  elapsed_b.t = now.t - b.t;
77  result.t = now.t - max(elapsed_a.t, elapsed_b.t);
78  return result;
79 }
80 
88 static inline uint32_t hash(odph_ipv4hdr_t *hdr)
89 {
90  uint32_t a = hdr->src_addr;
91  uint32_t b = hdr->dst_addr;
92  uint32_t c = (uint32_t)hdr->id << 16 | hdr->proto;
93 
94  /* A degenerate 3x32-bit Jenkins hash */
95  c ^= b;
96  c -= ROT(b, 14);
97  a ^= c;
98  a -= ROT(c, 11);
99  b ^= a;
100  b -= ROT(a, 25);
101  c ^= b;
102  c -= ROT(b, 16);
103  a ^= c;
104  a -= ROT(c, 4);
105  b ^= a;
106  b -= ROT(a, 14);
107  c ^= b;
108  c -= ROT(b, 24);
109  return c;
110 }
111 
125 static inline odp_bool_t later_fragment(struct packet *a, struct packet *b,
126  struct flts now)
127 {
128  odph_ipv4hdr_t hdr_a = *(odph_ipv4hdr_t *)odp_packet_data(a->handle);
129  odph_ipv4hdr_t hdr_b = *(odph_ipv4hdr_t *)odp_packet_data(b->handle);
130  uint32_t offset_a = ipv4hdr_fragment_offset_oct(hdr_a);
131  uint32_t offset_b = ipv4hdr_fragment_offset_oct(hdr_b);
132  uint32_t payload_len_a = ipv4hdr_payload_len(hdr_a);
133  uint32_t payload_len_b = ipv4hdr_payload_len(hdr_b);
134  uint32_t endpoint_a = OCTS_TO_BYTES(offset_a) + payload_len_a;
135  uint32_t endpoint_b = OCTS_TO_BYTES(offset_b) + payload_len_b;
136 
137  if (later_flow(a, b)) {
138  return 1;
139  } else if (equal_flow(a, b)) {
140  if (endpoint_a > endpoint_b) {
141  return 1;
142  } else if (endpoint_a == endpoint_b) {
143  if (offset_a < offset_b) {
144  return 1;
145  } else if (offset_a == offset_b) {
146  return b->arrival.t == earliest(a->arrival,
147  b->arrival,
148  now).t;
149  }
150  }
151  }
152 
153  return 0;
154 }
155 
168 static struct packet *extract_complete_packet(struct packet *tail,
169  struct packet **remaining_packets,
170  int *made_changes)
171 {
172  /*
173  * Iterate through the flows in this fragment list (until a packet is
174  * reassembled).
175  */
176  while (tail) {
177  /*
178  * Work backwards through the fragments in a single flow,
179  * attempting to glue together a whole packet. Upon finding a
180  * discontinuity, break out of the loop for this flow and try
181  * the next (if there is one).
182  */
183  struct packet *current = tail;
184  odph_ipv4hdr_t tail_hdr;
185  uint16_t final_frag_offset;
186 
187  tail_hdr = *(odph_ipv4hdr_t *)odp_packet_data(tail->handle);
188  final_frag_offset = ipv4hdr_fragment_offset_oct(tail_hdr);
189  while (current) {
190  odph_ipv4hdr_t curr_hdr;
191  uint16_t curr_offset_oct;
192  odp_bool_t final_fragment;
193  struct packet *prev;
194  void *tmp;
195  odph_ipv4hdr_t prev_hdr;
196  uint16_t prev_off_oct;
197  uint16_t prev_oct;
198 
199  tmp = odp_packet_data(current->handle);
200  curr_hdr = *(odph_ipv4hdr_t *)tmp;
201  curr_offset_oct = ipv4hdr_fragment_offset_oct(curr_hdr);
202  final_fragment = (curr_offset_oct == final_frag_offset);
203 
204  /*
205  * If the final fragment in the chain has "More
206  * Fragments" set, it's not the final packet of the
207  * datagram as a whole.
208  */
209  if (final_fragment && ipv4hdr_more_fragments(curr_hdr))
210  break;
211 
212  /*
213  * If this is the first fragment in a chain, we may have
214  * completed the reassembly of a whole packet.
215  */
216  prev = prev_packet(*current);
217  if (prev == NULL || !equal_flow(current, prev)) {
218  if (curr_offset_oct)
219  break;
220 
221  /*
222  * Extract the complete packet from the list of
223  * remaining packets
224  */
225  if (remaining_packets)
226  *remaining_packets = prev;
227  set_prev_packet(current, NULL);
228  if (made_changes)
229  *made_changes = 1;
230  return tail;
231  }
232 
233  /*
234  * Fragments should be consistent with those previously
235  * processed
236  */
237  tmp = odp_packet_data(prev->handle);
238  prev_hdr = *(odph_ipv4hdr_t *)tmp;
239  prev_off_oct = ipv4hdr_fragment_offset_oct(prev_hdr);
240  prev_oct = BYTES_TO_OCTS(ipv4hdr_payload_len(prev_hdr));
241  if (curr_offset_oct != prev_off_oct + prev_oct) {
242  if (prev_off_oct + prev_oct < curr_offset_oct) {
243  /*
244  * If there's no overlap, this is just a
245  * regular discontinuity
246  */
247  break;
248  }
249 
250  /*
251  * Fragment duplication or overlap has occurred!
252  * We don't handle such occurrences in this
253  * simple example application.
254  */
255  assert(0);
256  break;
257  }
258 
259  current = prev;
260  }
261 
262  /*
263  * Since we haven't had any luck finding a whole packet within
264  * this flow, let's try to look at other flows in this fraglist
265  * (if there are any others).
266  */
267  if (!current) {
268  tail = NULL;
269  break;
270  }
271 
272  while (prev_packet(*current) &&
273  equal_flow(current, prev_packet(*current))) {
274  current = prev_packet(*current);
275  }
276  tail = prev_packet(*current);
277  remaining_packets = prev_packet_ptr(current);
278  }
279 
280  return NULL;
281 }
282 
283 /*
284  * Glue together a list of fragments sorted by fragment offset, writing the
285  * result to an output queue
286  *
287  * @param tail The tail pointer to the list of fragments to reassemble
288  * @param out The output queue to write the result to
289  *
290  * @return 0 on success, -1 otherwise
291  */
292 static int send_packet(struct packet *tail, odp_queue_t out)
293 {
294  struct packet result = *tail;
295  struct packet *current = prev_packet(result);
296  odph_ipv4hdr_t *header;
297  uint32_t length;
298 
299  /*
300  * Reassemble the complete packet (working backwards from the last
301  * fragment)
302  */
303  while (current && equal_flow(current, &result)) {
304  struct packet new_result = *current;
305  int concat_success, trunc_success;
306 
307  current = prev_packet(new_result);
308  header = odp_packet_data(result.handle);
309  trunc_success = odp_packet_trunc_head(&result.handle, ipv4hdr_ihl(*header),
310  NULL, NULL);
311  if (trunc_success < 0) {
312  fprintf(stderr, "ERROR: odp_packet_trunc_head\n");
313  return -1;
314  }
315 
316  concat_success = odp_packet_concat(&new_result.handle,
317  result.handle);
318  if (concat_success < 0) {
319  fprintf(stderr, "ERROR: odp_packet_concat\n");
320  return -1;
321  }
322  result = new_result;
323  }
324 
325  /* Fix the header */
326  header = odp_packet_data(result.handle);
327  length = odp_packet_len(result.handle);
328  assert(length >= IP_HDR_LEN_MIN || length <= UINT16_MAX);
329  header->tot_len = odp_cpu_to_be_16(length);
330  ipv4hdr_set_more_fragments(header, 0);
331  ipv4hdr_set_fragment_offset_oct(header, 0);
332  header->chksum = 0;
333  odph_ipv4_csum_update(result.handle);
334 
335  assert(odp_queue_enq(out, odp_packet_to_event(result.handle)) >= 0);
336  return 0;
337 }
338 
345 static void sort_fraglist(union fraglist *fl, struct flts now)
346 {
347  struct packet *to_insert = fl->tail;
348 
349  fl->tail = NULL;
350  while (to_insert) {
351  struct packet *to_insert_next = prev_packet(*to_insert);
352 
353  if (fl->tail == NULL ||
354  later_fragment(to_insert, fl->tail, now)) {
355  set_prev_packet(to_insert, fl->tail);
356  fl->tail = to_insert;
357  } else {
358  struct packet *current = fl->tail;
359 
360  while (prev_packet(*current) &&
361  later_fragment(prev_packet(*current), to_insert,
362  now)) {
363  current = prev_packet(*current);
364  }
365  set_prev_packet(to_insert, prev_packet(*current));
366  set_prev_packet(current, to_insert);
367  }
368  to_insert = to_insert_next;
369  }
370 }
371 
384 static int add_fraglist_to_fraglist(odp_atomic_u128_t *fl, union fraglist frags,
385  struct packet *frags_head, struct flts now,
386  odp_queue_t out, odp_bool_t dont_assemble)
387 {
388  int reassembled = 0;
389 
390  /*
391  * We may need to recursively call this function a number of times,
392  * keeping count of the total number of packets reassembled. Sadly,
393  * tail call optimisation doesn't seem to be working very well, so
394  * we're using good ol' fashioned GOTOs instead.
395  */
396 redo:;
397  union fraglist oldfl;
398  union fraglist newfl;
399  union fraglist nullfl;
400  struct flts oldfl_earliest;
401  struct flts frags_earliest;
402 
403  oldfl.raw = odp_atomic_load_u128(fl);
404 
405  /*
406  * If we're updating a non-empty fraglist, we should always attempt
407  * reassembly!
408  */
409  if (oldfl.tail != NULL)
410  dont_assemble = 0;
411 
412  /* Insert the new fragment(s) to the tail of the fraglist */
413  set_prev_packet(frags_head, oldfl.tail);
414  newfl.tail = frags.tail;
415 
416  /*
417  * Update the fraglist variables (accumulating the length of the
418  * received pieces into "part_len", and updating the perceived 'true'
419  * length of the whole packet along with the timestamp of the earliest
420  * fragment in this list).
421  */
422  oldfl_earliest.t = oldfl.earliest;
423  frags_earliest.t = frags.earliest;
424  newfl.part_len = min(IP_OCTET_MAX, oldfl.part_len + frags.part_len);
425  newfl.whole_len = min(oldfl.whole_len, frags.whole_len);
426  newfl.earliest = (oldfl.tail == NULL ? frags.earliest
427  : earliest(oldfl_earliest,
428  frags_earliest,
429  now).t);
430 
431  /*
432  * Check if it looks like we have all the fragments for a whole packet
433  * yet. If not, just write out our changes and move on.
434  */
435  if (newfl.part_len < newfl.whole_len || dont_assemble) {
436  if (!odp_atomic_cas_rel_u128(fl, &oldfl.raw, newfl.raw)) {
437  /* Failed to add this fragment? Try again. */
438  set_prev_packet(frags_head, NULL);
439  goto redo;
440  }
441  return reassembled;
442  }
443 
444  /*
445  * It /appears/ that we have all the fragments for a packet. Things are
446  * not always as they appear, however, particularly in the case of a
447  * hash map collision where part_len and whole_len may be incorrect
448  * (and, hence, must be verified).
449  *
450  * Take exclusive ownership over this fraglist while we attempt
451  * reassembly. If we're truly done with it, then this releases the slot,
452  * otherwise we'll update the slot with our changes later.
453  */
454  init_fraglist(&nullfl);
455  if (!odp_atomic_cas_acq_u128(fl, &oldfl.raw, nullfl.raw)) {
456  /* Failed to take this fraglist? Try again. */
457  set_prev_packet(frags_head, NULL);
458  goto redo;
459  }
460 
461  /*
462  * Find any complete packets within the fraglist, cut them out of the
463  * list, and send them to the output queue. Note that there may be
464  * several complete packets, as we may have added multiple new fragments
465  * into the fraglist.
466  */
467  struct packet *remaining_packets;
468  struct packet *complete_datagram;
469  int fraglist_changed = 0;
470  int call_changed_fraglist = 0;
471  union fraglist update;
472 
473  sort_fraglist(&newfl, now);
474  remaining_packets = newfl.tail;
475  dont_assemble = 1;
476  while ((complete_datagram =
477  extract_complete_packet(remaining_packets, &remaining_packets,
478  &call_changed_fraglist)) ||
479  call_changed_fraglist) {
480  fraglist_changed = 1;
481  if (complete_datagram) {
482  assert(!send_packet(complete_datagram, out));
483  ++reassembled;
484  dont_assemble = 0;
485  }
486  call_changed_fraglist = 0;
487  }
488 
489  /* No remaining fragments in this fraglist? We're done. */
490  if (!remaining_packets)
491  return reassembled;
492 
493  /*
494  * If there are still fragments in this fraglist, we have changes to
495  * write back.
496  *
497  * Note that we may have to reassemble more packets, as adding the
498  * fragments this thread has exclusive access to into the shared
499  * fraglist may entail new packets being completed. Thus, we have to
500  * repeat this whole add_fraglist_to_fraglist process with the remaining
501  * fragments.
502  */
503  update.tail = remaining_packets;
504  update.part_len = newfl.part_len;
505  update.whole_len = newfl.whole_len;
506  update.earliest = newfl.earliest;
507 
508  /*
509  * We've cut fragments from the fragment list chain, and so should
510  * recalculate the part_len, whole_len, and earliest variables before
511  * writing out our changes.
512  */
513  if (fraglist_changed) {
514  struct packet *current = remaining_packets;
515 
516  update.earliest = now.t;
517  while (current) {
518  odph_ipv4hdr_t *h;
519  uint16_t part_oct;
520  uint16_t whole_oct;
521  struct flts update_earliest;
522 
523  update_earliest.t = update.earliest;
524  h = odp_packet_data(current->handle);
525  part_oct = ipv4hdr_payload_len_oct(*h);
526  whole_oct = ipv4hdr_reass_payload_len_oct(*h);
527  update.part_len = min(IP_OCTET_MAX,
528  update.part_len + part_oct);
529  update.whole_len = min(update.whole_len, whole_oct);
530  update.earliest = earliest(update_earliest,
531  current->arrival, now).t;
532  frags_head = current;
533  current = prev_packet(*current);
534  }
535  frags = update;
536  goto redo;
537  }
538 
539  frags = update;
540  frags_head = frags.tail;
541  while (prev_packet(*frags_head))
542  frags_head = prev_packet(*frags_head);
543  goto redo;
544 }
545 
558 static int add_frag_to_fraglist(odp_atomic_u128_t *fl, struct packet *frag,
559  uint16_t frag_payload_len,
560  uint16_t frag_reass_payload_len,
561  odp_queue_t out)
562 {
563  union fraglist frags;
564 
565  frags.tail = frag;
566  frags.part_len = frag_payload_len;
567  frags.whole_len = frag_reass_payload_len;
568  frags.earliest = frag->arrival.t;
569 
570  return add_fraglist_to_fraglist(fl, frags, frags.tail, frag->arrival,
571  out, 0);
572 }
573 
584 static void remove_stale_flows(odp_atomic_u128_t *fl, union fraglist oldfl,
585  struct flts timestamp_now, odp_queue_t out,
586  odp_bool_t force)
587 {
588  union fraglist newfl = oldfl;
589  struct packet *current;
590  struct packet *current_tail;
591  struct packet *remaining_frags_head;
592  struct flts flow_earliest;
593 
594  /*
595  * Sort the fraglist so we can step through its fragments flow-by-flow
596  */
597  sort_fraglist(&newfl, timestamp_now);
598 
599  /* Remove stale flows from the fraglist */
600  current = newfl.tail;
601  current_tail = newfl.tail;
602  remaining_frags_head = NULL;
603  flow_earliest = current->arrival;
604  newfl.earliest = timestamp_now.t;
605  while (current) {
606  struct packet *prev = prev_packet(*current);
607 
608  /*
609  * If this is the first fragment in a chain, we can make the
610  * decision on whether this flow should be kept or discarded
611  */
612  if (prev == NULL || !equal_flow(current, prev)) {
613  struct flts elapsed;
614  uint64_t elapsed_ns;
615 
616  elapsed.t = timestamp_now.t - flow_earliest.t;
617  elapsed_ns = (elapsed.t * TS_RES_NS);
618  if ((elapsed_ns >= FLOW_TIMEOUT_NS &&
619  elapsed.t + TS_NOW_TOLERANCE >=
620  TS_NOW_TOLERANCE) || force) {
621  struct packet *to_free = current_tail;
622 
623  while (to_free != prev) {
624  struct packet *next;
625 
626  next = prev_packet(*to_free);
627  odp_packet_free(to_free->handle);
628  to_free = next;
629  }
630 
631  if (remaining_frags_head)
632  set_prev_packet(remaining_frags_head,
633  prev);
634  else
635  newfl.tail = prev;
636  } else {
637  odph_ipv4hdr_t *h;
638  uint16_t part_oct;
639  uint16_t whole_oct;
640  struct flts newfl_earliest;
641 
642  newfl_earliest.t = newfl.earliest;
643  remaining_frags_head = current;
644  h = odp_packet_data(current->handle);
645  part_oct = ipv4hdr_payload_len_oct(*h);
646  whole_oct = ipv4hdr_reass_payload_len_oct(*h);
647  newfl.part_len = min(IP_OCTET_MAX,
648  newfl.part_len + part_oct);
649  newfl.whole_len = min(newfl.whole_len,
650  whole_oct);
651  newfl.earliest = earliest(newfl_earliest,
652  flow_earliest,
653  timestamp_now).t;
654  }
655 
656  current_tail = prev;
657  flow_earliest.t = EARLIEST_MAX;
658  } else {
659  flow_earliest = earliest(flow_earliest,
660  current->arrival,
661  timestamp_now);
662  }
663 
664  current = prev;
665  }
666 
667  /*
668  * If there are any remaining fragments, write them back into the
669  * fraglist
670  */
671  if (remaining_frags_head)
672  add_fraglist_to_fraglist(fl, newfl, remaining_frags_head,
673  timestamp_now, out, 0);
674 }
675 
683 static void garbage_collect_fraglist(odp_atomic_u128_t *fl, odp_queue_t out,
684  odp_bool_t force)
685 {
686  uint64_t time_now;
687  struct flts timestamp_now;
688  struct flts elapsed;
689  uint64_t elapsed_ns;
690  union fraglist oldfl;
691  odp_bool_t success = 1;
692 
693  do {
694  time_now = odp_time_to_ns(odp_time_global());
695  timestamp_now.t = time_now / TS_RES_NS;
696 
697  oldfl.raw = odp_atomic_load_u128(fl);
698 
699  elapsed.t = timestamp_now.t - oldfl.earliest;
700 
701  if (oldfl.tail == NULL ||
702  elapsed.t + TS_NOW_TOLERANCE < TS_NOW_TOLERANCE)
703  return;
704 
705  elapsed_ns = (elapsed.t * TS_RES_NS);
706  assert(force || elapsed_ns <= 86400000000000);
707  if (elapsed_ns >= FLOW_TIMEOUT_NS || force) {
708  union fraglist nullfl;
709 
710  init_fraglist(&nullfl);
711  success = odp_atomic_cas_acq_u128(fl, &oldfl.raw,
712  nullfl.raw);
713  if (success)
714  remove_stale_flows(fl, oldfl, timestamp_now,
715  out, force);
716  }
717  } while (!success);
718 }
719 
720 int reassemble_ipv4_packets(odp_atomic_u128_t *fraglists, int num_fraglists,
721  struct packet *fragments, int num_fragments,
722  odp_queue_t out)
723 {
724  int i;
725  int packets_reassembled = 0;
726 
727  for (i = 0; i < num_fragments; ++i) {
728  struct packet frag;
729  odph_ipv4hdr_t *hdr;
730  uint16_t frag_payload_len;
731  uint16_t frag_reass_payload_len;
732  uint32_t key;
733  odp_atomic_u128_t *fl;
734  int status;
735 
736  frag = fragments[i];
737  hdr = odp_packet_data(frag.handle);
738  frag_payload_len = ipv4hdr_payload_len_oct(*hdr);
739  frag_reass_payload_len = ipv4hdr_reass_payload_len_oct(*hdr);
740 
741  /*
742  * Find the appropriate hash map bucket for fragments in this
743  * flow. In the case of collisions, fragments for multiple flows
744  * are simply stored in the same list.
745  */
746  key = hash(hdr);
747  fl = &fraglists[key % num_fraglists];
748 
749  status = add_frag_to_fraglist(fl, &fragments[i],
750  frag_payload_len,
751  frag_reass_payload_len, out);
752  if (status < 0) {
753  fprintf(stderr,
754  "ERROR: failed to add fragment to fraglist\n");
755  return -1;
756  }
757  packets_reassembled += status;
758  }
759 
760  return packets_reassembled;
761 }
762 
763 void garbage_collect_fraglists(odp_atomic_u128_t *fraglists, int num_fraglists,
764  odp_queue_t out, odp_bool_t destroy_all)
765 {
766  int i;
767 
768  for (i = 0; i < num_fraglists; ++i)
769  garbage_collect_fraglist(&fraglists[i], out, destroy_all);
770 }
int odp_atomic_cas_acq_u128(odp_atomic_u128_t *atom, odp_u128_t *old_val, odp_u128_t new_val)
Compare and swap atomic odp_u128_t variable using ACQUIRE memory ordering.
int odp_atomic_cas_rel_u128(odp_atomic_u128_t *atom, odp_u128_t *old_val, odp_u128_t new_val)
Compare and swap atomic odp_u128_t variable using RELEASE memory ordering.
odp_u128_t odp_atomic_load_u128(odp_atomic_u128_t *atom)
Load value of atomic odp_u128_t variable.
odp_u16be_t odp_cpu_to_be_16(uint16_t cpu16)
Convert cpu native uint16_t to 16bit big endian.
odp_event_t odp_packet_to_event(odp_packet_t pkt)
Convert packet handle to event.
int odp_packet_concat(odp_packet_t *dst, odp_packet_t src)
Concatenate two packets.
void * odp_packet_data(odp_packet_t pkt)
Packet data pointer.
uint32_t odp_packet_len(odp_packet_t pkt)
Packet data length.
void odp_packet_free(odp_packet_t pkt)
Free packet.
int odp_packet_trunc_head(odp_packet_t *pkt, uint32_t len, void **data_ptr, uint32_t *seg_len)
Truncate packet head.
int odp_queue_enq(odp_queue_t queue, odp_event_t ev)
Enqueue an event to a queue.
bool odp_bool_t
Boolean type.
uint64_t odp_time_to_ns(odp_time_t time)
Convert time to nanoseconds.
odp_time_t odp_time_global(void)
Current global time.
The OpenDataPlane API.