1 /*        $KAME: dccp_tcplike.c,v 1.19 2005/07/27 06:27:25 nishida Exp $        */
2 /*        $NetBSD: dccp_tcplike.c,v 1.5 2022/05/22 11:27:36 andvar Exp $ */
3 
4 /*
5  * Copyright (c) 2003 Magnus Erixzon
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. The name of the author may not be used to endorse or promote products
18  *    derived from this software without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  */
31 /*
32  * TCP-like congestion control for DCCP
33  */
34 
35 #include <sys/cdefs.h>
36 __KERNEL_RCSID(0, "$NetBSD: dccp_tcplike.c,v 1.5 2022/05/22 11:27:36 andvar Exp $");
37 
38 #ifdef _KERNEL_OPT
39 #include "opt_dccp.h"
40 #endif
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/domain.h>
45 #include <sys/kernel.h>
46 #include <sys/lock.h>
47 #include <sys/malloc.h>
48 #include <sys/mbuf.h>
49 #include <sys/proc.h>
50 #include <sys/protosw.h>
51 #include <sys/signalvar.h>
52 #include <sys/socket.h>
53 #include <sys/socketvar.h>
54 #include <sys/mutex.h>
55 #include <sys/sysctl.h>
56 #include <sys/syslog.h>
57 
58 #include <net/if.h>
59 
60 #include <netinet/in.h>
61 #include <netinet/in_systm.h>
62 #include <netinet/ip.h>
63 #include <netinet/in_pcb.h>
64 #include <netinet/in_var.h>
65 
66 #include <netinet/ip_icmp.h>
67 #include <netinet/icmp_var.h>
68 #include <netinet/ip_var.h>
69 
70 #include <netinet/dccp.h>
71 #include <netinet/dccp_var.h>
72 #include <netinet/dccp_tcplike.h>
73 
74 #define TCPLIKE_DEBUG(args) dccp_log args
75 #define MALLOC_DEBUG(args) log args
76 #define CWND_DEBUG(args) dccp_log args
77 #define ACKRATIO_DEBUG(args) dccp_log args
78 #define LOSS_DEBUG(args) dccp_log args
79 #define TIMEOUT_DEBUG(args) dccp_log args
80 
81 #if !defined(__FreeBSD__) || __FreeBSD_version < 500000
82 #define   INP_INFO_LOCK_INIT(x,y)
83 #define   INP_INFO_WLOCK(x)
84 #define INP_INFO_WUNLOCK(x)
85 #define   INP_INFO_RLOCK(x)
86 #define INP_INFO_RUNLOCK(x)
87 #define   INP_LOCK(x)
88 #define INP_UNLOCK(x)
89 #endif
90 
91 /* Sender side */
92 
93 void tcplike_rto_timeout(void *);
94 void tcplike_rtt_sample(struct tcplike_send_ccb *, u_int16_t);
95 void _add_to_cwndvector(struct tcplike_send_ccb *, u_int64_t);
96 void _remove_from_cwndvector(struct tcplike_send_ccb *, u_int64_t);
97 int _chop_cwndvector(struct tcplike_send_ccb *, u_int64_t);
98 int _cwndvector_size(struct tcplike_send_ccb *);
99 u_char _cwndvector_state(struct tcplike_send_ccb *, u_int64_t);
100 
101 void tcplike_send_term(void *);
102 void tcplike_recv_term(void *);
103 
104 void _avlist_add(struct tcplike_recv_ccb *, u_int64_t, u_int64_t);
105 u_int64_t _avlist_get(struct tcplike_recv_ccb *, u_int64_t);
106 
107 /* extern Ack Vector functions */
108 extern void dccp_use_ackvector(struct dccpcb *);
109 extern void dccp_update_ackvector(struct dccpcb *, u_int64_t);
110 extern void dccp_increment_ackvector(struct dccpcb *, u_int64_t);
111 extern u_int16_t dccp_generate_ackvector(struct dccpcb *, u_char *);
112 extern u_char dccp_ackvector_state(struct dccpcb *, u_int32_t);
113 
114 extern int dccp_get_option(char *, int, int, char *, int);
115 extern int dccp_remove_feature(struct dccpcb *, u_int8_t, u_int8_t);
116 
117 /*
118  * RTO timer activated
119  */
120 void
tcplike_rto_timeout(void * ccb)121 tcplike_rto_timeout(void *ccb)
122 {
123           struct tcplike_send_ccb *cb = (struct tcplike_send_ccb *) ccb;
124           /*struct inpcb *inp;*/
125           int s;
126 
127           mutex_enter(&(cb->mutex));
128 
129           cb->ssthresh = cb->cwnd >>1;
130           cb->cwnd = 1; /* allowing 1 packet to be sent */
131           cb->outstanding = 0; /* is this correct? */
132           cb->rto_timer_callout = 0;
133           cb->rto = cb->rto << 1;
134           TIMEOUT_DEBUG((LOG_INFO, "RTO Timeout. New RTO = %u\n", cb->rto));
135 
136           cb->sample_rtt = 0;
137 
138           cb->ack_last = 0;
139           cb->ack_miss = 0;
140 
141           cb->rcvr_ackratio = 1; /* Constraint 2 & 3. We need ACKs asap */
142           dccp_remove_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO);
143           dccp_add_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO,
144                                          (char *) &cb->rcvr_ackratio, 1);
145           cb->acked_in_win = 0;
146           cb->acked_windows = 0;
147           cb->oldcwnd_ts = cb->pcb->seq_snd;
148 
149           LOSS_DEBUG((LOG_INFO, "Timeout. CWND value: %u , OUTSTANDING value: %u\n",
150               cb->cwnd, cb->outstanding));
151           mutex_exit(&(cb->mutex));
152 
153           /* lock'n run dccp_output */
154           s = splnet();
155           INP_INFO_RLOCK(&dccpbinfo);
156           /*inp = cb->pcb->d_inpcb;*/
157           INP_LOCK(inp);
158           INP_INFO_RUNLOCK(&dccpbinfo);
159 
160           dccp_output(cb->pcb, 1);
161 
162           INP_UNLOCK(inp);
163           splx(s);
164 }
165 
tcplike_rtt_sample(struct tcplike_send_ccb * cb,u_int16_t sample)166 void tcplike_rtt_sample(struct tcplike_send_ccb *cb, u_int16_t sample)
167 {
168           u_int16_t err;
169 
170           if (cb->rtt == 0xffff) {
171                     /* hmmmmm. */
172                     cb->rtt = sample;
173                     cb->rto = cb->rtt << 1;
174                     return;
175           }
176 
177           /* This is how the Linux implementation is doing it.. */
178           if (sample >= cb->rtt) {
179                     err = sample - cb->rtt;
180                     cb->rtt = cb->rtt + (err >> 3);
181           } else {
182                     err = cb->rtt - sample;
183                     cb->rtt = cb->rtt - (err >> 3);
184           }
185           cb->rtt_d = cb->rtt_d + ((err - cb->rtt_d) >> 2);
186           if (cb->rtt < TCPLIKE_MIN_RTT)
187                     cb->rtt = TCPLIKE_MIN_RTT;
188           cb->rto = cb->rtt + (cb->rtt_d << 2);
189 
190 
191           /* 5 million ways to calculate RTT ...*/
192 #if 0
193           cb->srtt = ( 0.8 * cb->srtt ) + (0.2 * sample);
194           if (cb->srtt < TCPLIKE_MIN_RTT)
195                     cb->srtt = TCPLIKE_MIN_RTT;
196           cb->rto = cb->srtt << 1;
197 #endif
198 
199           LOSS_DEBUG((LOG_INFO, "RTT Sample: %u , New RTO: %u\n", sample, cb->rto));
200 }
201 
202 /* Functions declared in struct dccp_cc_sw */
203 
204 /*
205  * Initialises the sender side
206  * returns: pointer to a tfrc_send_ccb struct on success, otherwise 0
207  */
208 void *
tcplike_send_init(struct dccpcb * pcb)209 tcplike_send_init(struct dccpcb* pcb)
210 {
211           struct tcplike_send_ccb *cb;
212 
213           TCPLIKE_DEBUG((LOG_INFO, "Entering tcplike_send_init()\n"));
214 
215           cb = malloc(sizeof (struct tcplike_send_ccb), M_PCB, M_NOWAIT | M_ZERO);
216           if (cb == 0) {
217                     TCPLIKE_DEBUG((LOG_INFO, "Unable to allocate memory for tcplike_send_ccb!\n"));
218                     dccpstat.tcplikes_send_memerr++;
219                     return 0;
220           }
221           memset(cb, 0, sizeof (struct tcplike_send_ccb));
222 
223           /* init sender */
224           cb->pcb = pcb;
225 
226           cb->cwnd = TCPLIKE_INITIAL_CWND;
227           cb->ssthresh = 0xafff; /* lim-> infinity */
228           cb->oldcwnd_ts = 0;
229           cb->outstanding = 0;
230           cb->rcvr_ackratio = 2; /* Ack Ratio */
231           cb->acked_in_win = 0;
232           cb->acked_windows = 0;
233 
234           CWND_DEBUG((LOG_INFO, "Init. CWND value: %u , OUTSTANDING value: %u\n",
235                         cb->cwnd, cb->outstanding));
236           cb->rtt = 0xffff;
237           cb->rto = TIMEOUT_UBOUND;
238           callout_init(&cb->rto_timer, 0);
239           callout_init(&cb->free_timer, 0);
240           cb->rto_timer_callout = 0;
241           cb->rtt_d = 0;
242           cb->timestamp = 0;
243 
244           cb->sample_rtt = 1;
245 
246           cb->cv_size = TCPLIKE_INITIAL_CWNDVECTOR;
247           /* 1 bit per entry */
248           cb->cwndvector = malloc(cb->cv_size / 8, M_PCB, M_NOWAIT | M_ZERO);
249           if (cb->cwndvector == NULL) {
250                     MALLOC_DEBUG((LOG_INFO, "Unable to allocate memory for cwndvector\n"));
251                     /* What to do now? */
252                     cb->cv_size = 0;
253                     dccpstat.tcplikes_send_memerr++;
254                     return 0;
255           }
256           memset(cb->cwndvector, 0, cb->cv_size / 8);
257           cb->cv_hs = cb->cv_ts = 0;
258           cb->cv_hp = cb->cwndvector;
259 
260           cb->ack_last = 0;
261           cb->ack_miss = 0;
262 
263           mutex_init(&(cb->mutex), MUTEX_DEFAULT, IPL_SOFTNET);
264 
265           TCPLIKE_DEBUG((LOG_INFO, "TCPlike sender initialised!\n"));
266           dccpstat.tcplikes_send_conn++;
267           return cb;
268 }
269 
tcplike_send_term(void * ccb)270 void tcplike_send_term(void *ccb)
271 {
272           struct tcplike_send_ccb *cb = (struct tcplike_send_ccb *) ccb;
273           if (ccb == 0)
274                     return;
275 
276           mutex_destroy(&(cb->mutex));
277 
278           free(cb, M_PCB);
279           TCPLIKE_DEBUG((LOG_INFO, "TCP-like sender is destroyed\n"));
280 }
281 
282 /*
283  * Free the sender side
284  * args: ccb - ccb of sender
285  */
286 void
tcplike_send_free(void * ccb)287 tcplike_send_free(void *ccb)
288 {
289           struct tcplike_send_ccb *cb = (struct tcplike_send_ccb *) ccb;
290 
291           LOSS_DEBUG((LOG_INFO, "Entering tcplike_send_free()\n"));
292 
293           if (ccb == 0)
294                     return;
295 
296           mutex_enter(&(cb->mutex));
297 
298           free(cb->cwndvector, M_PCB);
299           cb->cv_hs = cb->cv_ts = 0;
300 
301           /* untimeout any active timer */
302           if (cb->rto_timer_callout) {
303                     TCPLIKE_DEBUG((LOG_INFO, "Untimeout RTO Timer\n"));
304                     callout_stop(&cb->rto_timer);
305                     cb->rto_timer_callout = 0;
306           }
307 
308           mutex_exit(&(cb->mutex));
309 
310           callout_reset(&cb->free_timer, 10 * hz, tcplike_send_term, (void *)cb);
311 }
312 
313 /*
314  * Ask TCPlike wheter one can send a packet or not
315  * args: ccb  -  ccb block for current connection
316  * returns: 0 if ok, else <> 0.
317  */
318 int
tcplike_send_packet(void * ccb,long datasize)319 tcplike_send_packet(void *ccb, long datasize)
320 {
321           /* check if one can send here */
322           struct tcplike_send_ccb *cb = (struct tcplike_send_ccb *) ccb;
323           long ticks;
324           char feature[1];
325 
326           TCPLIKE_DEBUG((LOG_INFO, "Entering tcplike_send_packet()\n"));
327 
328           if (datasize == 0) {
329                     TCPLIKE_DEBUG((LOG_INFO, "Sending pure ACK. Dont care about CC right now\n"));
330                     return 1;
331           }
332 
333           mutex_enter(&(cb->mutex));
334 
335           if (cb->cwnd <= cb->outstanding) {
336                     /* May not send. trigger RTO */
337                     DCCP_DEBUG((LOG_INFO, "cwnd (%d) < outstanding (%d)\n", cb->cwnd, cb->outstanding));
338                     if (!cb->rto_timer_callout) {
339                               LOSS_DEBUG((LOG_INFO, "Trigger TCPlike RTO timeout timer. Ticks = %u\n", cb->rto));
340                               ticks = (long)cb->rto;
341                               callout_reset(&cb->rto_timer, ticks,
342                                   tcplike_rto_timeout, (void *)cb);
343                               cb->rto_timer_callout = 1;
344                     }
345                     mutex_exit(&(cb->mutex));
346                     return 0;
347           }
348 
349           /* We're allowed to send */
350 
351           feature[0] = 1;
352           if (cb->pcb->remote_ackvector == 0) {
353                     ACK_DEBUG((LOG_INFO, "Adding Change(Use Ack Vector, 1) to outgoing packet\n"));
354                     dccp_remove_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKVECTOR);
355                     dccp_add_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKVECTOR, feature, 1);
356           }
357 
358           /* untimeout any active timer */
359           if (cb->rto_timer_callout) {
360                     LOSS_DEBUG((LOG_INFO, "Untimeout RTO Timer\n"));
361                     callout_stop(&cb->rto_timer);
362                     cb->rto_timer_callout = 0;
363           }
364 
365           if (!cb->sample_rtt) {
366                     struct timeval stamp;
367                     microtime(&stamp);
368                     cb->timestamp = ((stamp.tv_sec & 0x00000FFF) * 1000000) + stamp.tv_usec;
369                     dccp_add_option(cb->pcb, DCCP_OPT_TIMESTAMP, (char*) &(cb->timestamp), 4);
370                     /*LOSS_DEBUG((LOG_INFO, "Adding timestamp %u\n", cb->timestamp));*/
371                     cb->sample_rtt = 1;
372           }
373 
374           mutex_exit(&(cb->mutex));
375           return 1;
376 
377 }
378 
379 /*
380  * Notify sender that a packet has been sent
381  * args: ccb - ccb block for current connection
382  *         moreToSend - if there exists more packets to send
383  */
384 void
tcplike_send_packet_sent(void * ccb,int moreToSend,long datasize)385 tcplike_send_packet_sent(void *ccb, int moreToSend, long datasize)
386 {
387           struct tcplike_send_ccb *cb = (struct tcplike_send_ccb *) ccb;
388 
389           TCPLIKE_DEBUG((LOG_INFO, "Entering tcplike_send_packet_sent(,%i,%i)\n",moreToSend,(int) datasize));
390 
391           if (datasize == 0) {
392                     TCPLIKE_DEBUG((LOG_INFO, "Sent pure ACK. Dont care about cwnd-storing\n"));
393                     return;
394           }
395 
396           mutex_enter(&(cb->mutex));
397 
398           cb->outstanding++;
399           TCPLIKE_DEBUG((LOG_INFO, "SENT. cwnd: %d, outstanding: %d\n",cb->cwnd, cb->outstanding));
400 
401           /* stash the seqnr in cwndvector */
402           /* Dont do this if we're only sending an ACK ! */
403           _add_to_cwndvector(cb, cb->pcb->seq_snd);
404           CWND_DEBUG((LOG_INFO, "Sent. CWND value: %u , OUTSTANDING value: %u\n",cb->cwnd, cb->outstanding));
405 
406           dccp_remove_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO);
407           mutex_exit(&(cb->mutex));
408 }
409 
410 /*
411  * Notify that an ack package was received
412  * args: ccb  -  ccb block for current connection
413  */
414 void
tcplike_send_packet_recv(void * ccb,char * options,int optlen)415 tcplike_send_packet_recv(void *ccb, char *options, int optlen)
416 {
417           dccp_seq acknum, lastok;
418           u_int16_t numlostpackets, avsize, i, prev_size;
419           u_int8_t length, state, numokpackets, ackratiocnt;
420           u_char av[10];
421           struct tcplike_send_ccb *cb = (struct tcplike_send_ccb *) ccb;
422 
423           TCPLIKE_DEBUG((LOG_INFO, "Entering tcplike_send_ack_recv()\n"));
424           mutex_enter(&(cb->mutex));
425 
426           if (dccp_get_option(options, optlen, DCCP_OPT_TIMESTAMP_ECHO, av,10) > 0) {
427                     u_int32_t echo, elapsed;
428 
429                     TCPLIKE_DEBUG((LOG_INFO, "Received TIMESTAMP ECHO\n"));
430                     bcopy(av, &echo, 4);
431                     bcopy(av + 4, &elapsed, 4);
432 
433                     if (echo == cb->timestamp) {
434                               struct timeval time;
435                               u_int32_t c_stamp;
436                               u_int16_t diff;
437 
438                               microtime(&time);
439                               c_stamp = ((time.tv_sec & 0x00000FFF) * 1000000) + time.tv_usec;
440 
441                               diff = (u_int16_t) c_stamp - cb->timestamp - elapsed;
442                               diff = (u_int16_t)(diff / 1000);
443                               TCPLIKE_DEBUG((LOG_INFO, "Got Timestamp Echo; Echo = %u, Elapsed = %u. DIFF = %u\n",
444                                                echo, elapsed, diff));
445                               tcplike_rtt_sample(cb, diff);
446                     }
447           }
448 
449           if (cb->pcb->ack_rcv == 0) {
450                     /* There was no Ack. There is no spoon */
451 
452                     /* We'll clear the missingacks data here, since the other host
453                      * is also sending data.
454                      * I guess we could deal with this, using the NDP field in the
455                      * header. Let's stick a *TODO* mark here for now.
456                      * The missingacks mechanism will activate if other host goes to
457                      * only sending DCCP-Ack packets.
458                      */
459                     cb->ack_last = 0;
460                     cb->ack_miss = 0;
461                     ACKRATIO_DEBUG((LOG_INFO, "Clear Missing Acks state!\n"));
462                     mutex_exit(&(cb->mutex));
463                     return;
464           }
465 
466           cb->sample_rtt = 0;
467 
468           /* check ackVector for lost packets. cmp with cv_list */
469           avsize = dccp_get_option(options, optlen, DCCP_OPT_ACK_VECTOR0, av,10);
470           if (avsize == 0)
471                     avsize = dccp_get_option(options, optlen, DCCP_OPT_ACK_VECTOR1, av,10);
472 
473           if (avsize > 0)
474                     dccpstat.tcplikes_send_ackrecv++;
475 
476           acknum = cb->pcb->ack_rcv;
477           numlostpackets = 0;
478           numokpackets = 0;
479           lastok = 0;
480           prev_size = _cwndvector_size(cb);
481 
482           TCPLIKE_DEBUG((LOG_INFO, "Start removing from cwndvector %d\n", avsize));
483           if (avsize == 0)
484                     _remove_from_cwndvector(cb, acknum);
485 
486           for (i=0; i < avsize; i++) {
487                     state = (av[i] & 0xc0) >> 6;
488                     length = (av[i] & 0x3f) +1;
489                     while (length > 0) {
490                               if (state == 0) {
491                                         CWND_DEBUG((LOG_INFO, "Packet %llu was OK\n", acknum));
492                                         numokpackets++;
493                                         lastok = acknum;
494                                         _remove_from_cwndvector(cb, acknum);
495                               } else {
496                                         if (acknum > cb->oldcwnd_ts) {
497                                                   LOSS_DEBUG((LOG_INFO, "Packet %llu was lost %llu state %d\n", acknum, cb->oldcwnd_ts, state));
498                                                   numlostpackets++;
499                                                   dccpstat.tcplikes_send_reploss++;
500                                         }
501                               }
502                               acknum--;
503                               length--;
504                     }
505           }
506           if (lastok)
507                     if (_chop_cwndvector(cb, lastok-TCPLIKE_NUMDUPACK)) {
508                               LOSS_DEBUG((LOG_INFO, "Packets were lost\n"));
509                               if (lastok-TCPLIKE_NUMDUPACK > cb->oldcwnd_ts) {
510                                         numlostpackets++;
511                                         dccpstat.tcplikes_send_assloss++;
512                               }
513                     }
514 
515           lastok = cb->cv_hs;
516           while (_cwndvector_state(cb, lastok) == 0x00 && lastok < cb->cv_ts)
517                     lastok++;
518           if (lastok != cb->cv_hs)
519                     _chop_cwndvector(cb, lastok);
520 
521           cb->outstanding = _cwndvector_size(cb);
522           CWND_DEBUG((LOG_INFO, "Decrease outstanding. was = %u , now = %u\n", prev_size, cb->outstanding));
523           if (prev_size == cb->outstanding) {
524                     /* Nothing dropped from cwndvector  */
525                     mutex_exit(&(cb->mutex));
526                     return;
527           }
528 
529           cb->acked_in_win += numokpackets;
530 
531           if (cb->cwnd < cb->ssthresh) {
532                     /* Slow start */
533 
534                     if (numlostpackets > 0) {
535                               /* Packet loss */
536                               LOSS_DEBUG((LOG_INFO, "Packet Loss in Slow Start\n"));
537                               cb->cwnd = cb->cwnd>>1;
538                               if (cb->cwnd < 1)
539                                         cb->cwnd = 1;
540                               cb->ssthresh = cb->cwnd;
541                               cb->acked_in_win = 0;
542                               cb->acked_windows = 0;
543                               cb->oldcwnd_ts = cb->pcb->seq_snd;
544 
545                     } else {
546                               cb->cwnd++;
547                     }
548 
549           } else if (cb->cwnd >= cb->ssthresh) {
550 
551                     if (numlostpackets > 0) {
552                               /* Packet loss */
553                               LOSS_DEBUG((LOG_INFO, "Packet Loss in action\n"));
554                               cb->cwnd = cb->cwnd>>1;
555                               if (cb->cwnd < 1)
556                                         cb->cwnd = 1;
557                               cb->ssthresh = cb->cwnd;
558                               cb->acked_in_win = 0;
559                               cb->acked_windows = 0;
560                               cb->oldcwnd_ts = cb->pcb->seq_snd;
561 
562                     } else if (cb->acked_in_win > cb->cwnd) {
563                               cb->cwnd++;
564                     }
565           }
566 
567           /* Ok let's check if there are missing Ack packets */
568           ACKRATIO_DEBUG((LOG_INFO, "Check Ack. seq_rcv: %u ,ack_last: %u ,ack_miss: %u\n",
569                               cb->pcb->seq_rcv, cb->ack_last, cb->ack_miss));
570 
571           if (cb->ack_last == 0) {
572                     /* First received ack (or first after Data packet). Yey */
573                     cb->ack_last = cb->pcb->seq_rcv;
574                     cb->ack_miss = 0;
575           } else if (cb->pcb->seq_rcv == (cb->ack_last + 1)) {
576                     /* This is correct, non-congestion, in-order behaviour */
577                     cb->ack_last = cb->pcb->seq_rcv;
578 
579           } else if (cb->pcb->seq_rcv < (cb->ack_last + 1)) {
580                     /* Might be an Ack we've been missing */
581                     /* This code has a flaw; If we miss 2 Ack packets, we only care
582                      * about the older one. This means that the next-to-oldest one could
583                      * be lost without any action being taken.
584                      * Time will tell if that is going to be a Giant Problem(r)
585                      */
586                     if (cb->pcb->seq_rcv == cb->ack_miss) {
587                               /* Yea it was. great */
588                               cb->ack_miss = 0;
589                     }
590 
591           } else if (cb->pcb->seq_rcv > (cb->ack_last + 1)) {
592                     /* There is a jump in Ack seqnums.. */
593                     cb->ack_miss = cb->ack_last + 1;
594                     cb->ack_last = cb->pcb->seq_rcv;
595           }
596 
597           if (cb->ack_miss && ((cb->ack_miss + TCPLIKE_NUMDUPACK) < cb->ack_last)) {
598                     /* Alert! Alert! Ack packets are MIA.
599                      * Decrease Ack Ratio
600                      */
601                     cb->rcvr_ackratio = cb->rcvr_ackratio<<1;
602                     if (cb->rcvr_ackratio > (cb->cwnd>>1)) {
603                               /* Constraint 2 */
604                               cb->rcvr_ackratio = cb->cwnd>>1;
605                     }
606                     if (cb->rcvr_ackratio == 0)
607                               cb->rcvr_ackratio = 1;
608                     ACKRATIO_DEBUG((LOG_INFO, "Increase Ack Ratio. Now = %u. (cwnd = %u)\n", cb->rcvr_ackratio, cb->cwnd));
609                     dccp_remove_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO);
610                     dccp_add_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO,
611                                          (char *) &cb->rcvr_ackratio, 1);
612 
613                     cb->ack_miss = 0;
614                     cb->acked_windows = 0;
615                     cb->acked_in_win = 0;
616                     dccpstat.tcplikes_send_missack++;
617 
618           } else if (cb->acked_in_win > cb->cwnd) {
619                     cb->acked_in_win = 0;
620                     cb->acked_windows++;
621                     if (cb->rcvr_ackratio == 1) {
622                               /* Ack Ratio is 1. We cant decrease it more.. Lets wait for some
623                                * heavy congestion so we can increase it
624                                */
625                               cb->acked_windows = 0;
626                     }
627           }
628 
629           if (cb->acked_windows >= 1) {
630                     ackratiocnt = (cb->cwnd / ((cb->rcvr_ackratio*cb->rcvr_ackratio) - cb->rcvr_ackratio));
631                     if (cb->acked_windows >= ackratiocnt) {
632                               if (cb->rcvr_ackratio > 2 && cb->cwnd >= 4) {
633                                         /* Constraint 3 - AckRatio at least 2 for a cwnd >= 4 */
634                                         cb->rcvr_ackratio--;
635                                         ACKRATIO_DEBUG((LOG_INFO, "Decrease ackratio by 1, now: %u\n", cb->rcvr_ackratio));
636                                         dccp_remove_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO);
637                                         dccp_add_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO,
638                                                              (char *) &cb->rcvr_ackratio, 1);
639                               }
640                               cb->acked_in_win = 0;
641                               cb->acked_windows = 0;
642                     }
643           }
644 
645           CWND_DEBUG((LOG_INFO, "Recvd. CWND value: %u , OUTSTANDING value: %u\n",
646                         cb->cwnd, cb->outstanding));
647 
648           if (cb->cwnd > cb->outstanding && cb->rto_timer_callout) {
649                 LOSS_DEBUG((LOG_INFO, "Force DCCP_OUTPUT, CWND = %u Outstanding = %u\n",
650                             cb->cwnd, cb->outstanding));
651                     callout_stop(&cb->rto_timer);
652                     cb->rto_timer_callout = 0;
653 
654                     mutex_exit(&(cb->mutex));
655                 dccp_output(cb->pcb, 1);
656                     return;
657         }
658           mutex_exit(&(cb->mutex));
659 }
660 
661 int
_cwndvector_size(struct tcplike_send_ccb * cb)662 _cwndvector_size(struct tcplike_send_ccb *cb)
663 {
664           u_int64_t gap, offset, seqnr;
665           u_int32_t cnt;
666           u_char *t;
667 
668           TCPLIKE_DEBUG((LOG_INFO, "Enter cwndvector_size\n"));
669           cnt = 0;
670           for (seqnr = cb->cv_hs; seqnr < cb->cv_ts; seqnr++) {
671                     gap = seqnr - cb->cv_hs;
672 
673                     offset = gap % 8;
674                     t = cb->cv_hp + (gap/8);
675                     if (t >= (cb->cwndvector + (cb->cv_size/8)))
676                               t -= (cb->cv_size / 8); /* wrapped */
677 
678                     if (((*t & (0x01 << offset)) >> offset) == 0x01)
679                               cnt++;
680           }
681           return cnt;
682 }
683 
684 u_char
_cwndvector_state(struct tcplike_send_ccb * cb,u_int64_t seqnr)685 _cwndvector_state(struct tcplike_send_ccb *cb, u_int64_t seqnr)
686 {
687           u_int64_t gap, offset;
688           u_char *t;
689 
690           /* Check for wrapping */
691           if (seqnr >= cb->cv_hs) {
692                     /* Not wrapped */
693                     gap = seqnr - cb->cv_hs;
694           } else {
695                     /* Wrapped XXXXX */
696                     gap = seqnr + 0x1000000000000LL - cb->cv_hs; /* seq nr = 48 bits */
697           }
698 
699           if (gap >= cb->cv_size) {
700                     /* gap is bigger than cwndvector size? baaad */
701                     return 0x01;
702           }
703 
704           offset = gap % 8;
705           t = cb->cv_hp + (gap/8);
706           if (t >= (cb->cwndvector + (cb->cv_size/8)))
707                     t -= (cb->cv_size / 8); /* wrapped */
708 
709           return ((*t & (0x01 << offset)) >> offset);
710 }
711 
712 void
_add_to_cwndvector(struct tcplike_send_ccb * cb,u_int64_t seqnr)713 _add_to_cwndvector(struct tcplike_send_ccb *cb, u_int64_t seqnr)
714 {
715           u_int64_t offset, dc, gap;
716           u_char *t, *n;
717 
718           TCPLIKE_DEBUG((LOG_INFO, "Entering add_to_cwndvector\n"));
719 
720           if (cb->cv_hs == cb->cv_ts) {
721                     /* Empty cwndvector */
722                     cb->cv_hs = cb->cv_ts = seqnr;
723           }
724 
725           /* Check for wrapping */
726           if (seqnr >= cb->cv_hs) {
727                     /* Not wrapped */
728                     gap = seqnr - cb->cv_hs;
729           } else {
730                     /* Wrapped */
731                     gap = seqnr + 0x1000000000000LL - cb->cv_hs; /* seq nr = 48 bits */
732           }
733 
734           if (gap >= cb->cv_size) {
735                     /* gap is bigger than cwndvector size? baaad */
736                     /* maybe we should increase the cwndvector here */
737                     CWND_DEBUG((LOG_INFO, "add cwndvector error. gap: %d, cv_size: %d, seqnr: %d\n",
738                                   gap, cb->cv_size, seqnr));
739                     dccpstat.tcplikes_send_badseq++;
740                     return;
741           }
742 
743           offset = gap % 8; /* bit to mark */
744           t = cb->cv_hp + (gap/8);
745           if (t >= (cb->cwndvector + (cb->cv_size/8)))
746                     t -= (cb->cv_size / 8); /* cwndvector wrapped */
747 
748           *t = *t | (0x01 << offset); /* turn on bit */
749 
750           cb->cv_ts = seqnr+1;
751           if (cb->cv_ts == 0x1000000000000LL)
752                     cb->cv_ts = 0;
753 
754           if (gap > (cb->cv_size - 128)) {
755                     MALLOC_DEBUG((LOG_INFO, "INCREASE cwndVECTOR\n"));
756                     n = malloc(cb->cv_size/4, M_PCB, M_NOWAIT); /* old size * 2 */
757                     if (n == NULL) {
758                               MALLOC_DEBUG((LOG_INFO, "Increase cwndvector FAILED\n"));
759                               dccpstat.tcplikes_send_memerr++;
760                               return;
761                     }
762                     memset (n+cb->cv_size/8,0x00,cb->cv_size/8); /* new half all missing */
763                     dc = (cb->cwndvector + (cb->cv_size/8)) - cb->cv_hp;
764                     memcpy (n,cb->cv_hp, dc); /* tail to end */
765                     memcpy (n+dc,cb->cwndvector,cb->cv_hp - cb->cwndvector); /* start to tail */
766                     cb->cv_size = cb->cv_size * 2; /* counted in items, so it';s a doubling */
767                     free (cb->cwndvector, M_PCB);
768                     cb->cv_hp = cb->cwndvector = n;
769           }
770 }
771 
772 void
_remove_from_cwndvector(struct tcplike_send_ccb * cb,u_int64_t seqnr)773 _remove_from_cwndvector(struct tcplike_send_ccb *cb, u_int64_t seqnr)
774 {
775           u_int64_t offset;
776           int64_t gap;
777           u_char *t;
778 
779           DCCP_DEBUG((LOG_INFO, "Entering remove_from_cwndvector\n"));
780 
781           if (cb->cv_hs == cb->cv_ts) {
782                     /* Empty cwndvector */
783                     return;
784           }
785 
786           /* Check for wrapping */
787           if (seqnr >= cb->cv_hs) {
788                     /* Not wrapped */
789                     gap = seqnr - cb->cv_hs;
790           } else {
791                     /* Wrapped */
792                     gap = seqnr + 0x1000000000000LL - cb->cv_hs; /* seq nr = 48 bits */
793           }
794 
795           if (gap >= cb->cv_size) {
796                     /* gap is bigger than cwndvector size. has already been chopped */
797                     return;
798           }
799 
800           offset = gap % 8; /* hi or low 2 bits to mark */
801           t = cb->cv_hp + (gap/8);
802           if (t >= (cb->cwndvector + (cb->cv_size/8)))
803                     t -= (cb->cv_size / 8); /* cwndvector wrapped */
804 
805           *t = *t & (~(0x01 << offset)); /* turn off bits */
806 }
807 
808 int
_chop_cwndvector(struct tcplike_send_ccb * cb,u_int64_t seqnr)809 _chop_cwndvector(struct tcplike_send_ccb *cb, u_int64_t seqnr)
810 {
811           int64_t gap, bytegap;
812           u_char *t;
813 
814           CWND_DEBUG((LOG_INFO,"Chop cwndvector at: %u\n", seqnr));
815 
816           if (cb->cv_hs == cb->cv_ts)
817                     return 0;
818 
819           if (seqnr > cb->cv_hs) {
820                     gap = seqnr - cb->cv_hs;
821           } else {
822                     /* We received obsolete information */
823                     return 0;
824           }
825 
826           bytegap = gap/8;
827           if (bytegap == 0)
828                     return 0;
829 
830           t = cb->cv_hp + bytegap;
831           if (t >= (cb->cwndvector + (cb->cv_size/8)))
832                     t -= (cb->cv_size / 8); /* ackvector wrapped */
833           cb->cv_hp = t;
834           cb->cv_hs += bytegap*8;
835           return 1;
836 }
837 
838 
839 /* Receiver side */
840 
841 
842 /* Functions declared in struct dccp_cc_sw */
843 
844 /* Initialises the receiver side
845  * returns: pointer to a tcplike_recv_ccb struct on success, otherwise 0
846  */
847 void *
tcplike_recv_init(struct dccpcb * pcb)848 tcplike_recv_init(struct dccpcb *pcb)
849 {
850           struct tcplike_recv_ccb *ccb;
851 
852           TCPLIKE_DEBUG((LOG_INFO, "Entering tcplike_recv_init()\n"));
853 
854           ccb = malloc(sizeof (struct tcplike_recv_ccb), M_PCB, M_NOWAIT | M_ZERO);
855           if (ccb == 0) {
856                     TCPLIKE_DEBUG((LOG_INFO, "Unable to allocate memory for tcplike_recv_ccb!\n"));
857                     dccpstat.tcplikes_recv_memerr++;
858                     return 0;
859           }
860 
861           memset(ccb, 0, sizeof (struct tcplike_recv_ccb));
862 
863           ccb->pcb = pcb;
864           ccb->unacked = 0;
865           ccb->pcb->ack_ratio = 2;
866 
867           ccb->pcb->remote_ackvector = 1;
868           dccp_use_ackvector(ccb->pcb);
869 
870           callout_init(&ccb->free_timer, 0);
871 
872           mutex_init(&(ccb->mutex), MUTEX_DEFAULT, IPL_SOFTNET);
873 
874           TCPLIKE_DEBUG((LOG_INFO, "TCPlike receiver initialised!\n"));
875           dccpstat.tcplikes_recv_conn++;
876           return ccb;
877 }
878 
tcplike_recv_term(void * ccb)879 void tcplike_recv_term(void *ccb)
880 {
881           struct tcplike_recv_ccb *cb = (struct tcplike_recv_ccb *) ccb;
882           if (ccb == 0)
883                     return;
884 
885           mutex_destroy(&(cb->mutex));
886           free(cb, M_PCB);
887           TCPLIKE_DEBUG((LOG_INFO, "TCP-like receiver is destroyed\n"));
888 }
889 
890 /* Free the receiver side
891  * args: ccb - ccb of receiver
892  */
893 void
tcplike_recv_free(void * ccb)894 tcplike_recv_free(void *ccb)
895 {
896           struct ack_list *a;
897           struct tcplike_recv_ccb *cb = (struct tcplike_recv_ccb *) ccb;
898 
899           LOSS_DEBUG((LOG_INFO, "Entering tcplike_recv_free()\n"));
900 
901           if (ccb == 0)
902                     return;
903 
904           mutex_enter(&(cb->mutex));
905 
906           a = cb->av_list;
907           while (a) {
908                     cb->av_list = a->next;
909                     free(a, M_TEMP);
910                     a = cb->av_list;
911           }
912 
913           cb->pcb->av_size = 0;
914           free(cb->pcb->ackvector, M_PCB);
915 
916           mutex_exit(&(cb->mutex));
917           callout_reset(&cb->free_timer, 10 * hz, tcplike_recv_term, (void *)cb);
918 }
919 
920 /*
921  * Tell TCPlike that a packet has been received
922  * args: ccb  -  ccb block for current connection
923  */
924 void
tcplike_recv_packet_recv(void * ccb,char * options,int optlen)925 tcplike_recv_packet_recv(void *ccb, char *options, int optlen)
926 {
927           struct tcplike_recv_ccb *cb = (struct tcplike_recv_ccb *) ccb;
928           u_char ackvector[16];
929           u_int16_t avsize;
930           u_char av_rcv[10];
931 
932           TCPLIKE_DEBUG((LOG_INFO, "Entering tcplike_recv_packet()\n"));
933 
934           mutex_enter(&(cb->mutex));
935 
936           if (cb->pcb->type_rcv == DCCP_TYPE_DATA ||
937               cb->pcb->type_rcv == DCCP_TYPE_DATAACK)
938                     dccpstat.tcplikes_recv_datarecv++;
939 
940           /* Grab Ack Vector 0 or 1 */
941           avsize = dccp_get_option(options, optlen, DCCP_OPT_ACK_VECTOR0, av_rcv,10);
942           if (avsize == 0)
943                     avsize = dccp_get_option(options, optlen, DCCP_OPT_ACK_VECTOR1, av_rcv,10);
944 
945           /* We are only interested in acks-on-acks here.
946            * The "real" ack handling is done be the sender */
947           if (avsize == 0 && cb->pcb->ack_rcv) {
948                     u_int64_t ackthru;
949                     /* We got an Ack without an ackvector.
950                      * This would mean it's an ack on an ack.
951                      */
952                     ackthru = _avlist_get(cb, cb->pcb->ack_rcv);
953                     ACK_DEBUG((LOG_INFO, "GOT Ack without Ackvector; Ackthru: %u\n", ackthru));
954                     if (ackthru) {
955                               dccp_update_ackvector(cb->pcb, ackthru);
956                               dccpstat.tcplikes_recv_ackack++;
957                     }
958           } else if (avsize > 0 && cb->pcb->ack_rcv) {
959                     /* We received an AckVector */
960                     u_int32_t acknum, ackthru;
961                     int i;
962                     ACK_DEBUG((LOG_INFO, "GOT Ack with Ackvector\n"));
963                     /* gotta loop through the ackvector */
964                     acknum = cb->pcb->ack_rcv;
965                     for (i=0; i<avsize; i++) {
966                               u_int8_t state, len;
967                               state = (av_rcv[i] & 0xc0) >> 6;
968                               len = (av_rcv[i] & 0x2f) + 1;
969                               if (state != 0) {
970                                         /* Drops in ackvector! Will be noted and taken care of by the sender part */
971                                         ACK_DEBUG((LOG_INFO, "Packets %u - %u are FUCKED\n",acknum-len, acknum));
972                                         continue;
973                               }
974 
975                               while (len>0) {
976                                         ackthru = _avlist_get(cb, acknum);
977                                         ACK_DEBUG((LOG_INFO, "Ackthru: %u\n", ackthru));
978                                         if (ackthru) {
979                                                   dccp_update_ackvector(cb->pcb, ackthru);
980                                                   dccpstat.tcplikes_recv_ackack++;
981                                         }
982                                         acknum--;
983                                         len--;
984                               }
985                     }
986           }
987 
988           ACK_DEBUG((LOG_INFO, "Adding %llu to local ackvector\n", cb->pcb->seq_rcv));
989           dccp_increment_ackvector(cb->pcb, cb->pcb->seq_rcv);
990           cb->unacked++;
991 
992           if (cb->unacked >= cb->pcb->ack_ratio) {
993                     /* Time to send an Ack */
994 
995                     avsize = dccp_generate_ackvector(cb->pcb, ackvector);
996 TCPLIKE_DEBUG((LOG_INFO, "recv_packet avsize %d ackvector %d\n", avsize, ackvector));
997                     cb->unacked = 0;
998                     if (avsize > 0) {
999                               dccp_add_option(cb->pcb, DCCP_OPT_ACK_VECTOR0, ackvector, avsize);
1000                               cb->pcb->ack_snd = cb->pcb->seq_rcv;
1001                               _avlist_add(cb, cb->pcb->seq_snd+1, cb->pcb->ack_snd);
1002                               ACK_DEBUG((LOG_INFO, "Recvr: Sending Ack (%llu) w/ Ack Vector\n", cb->pcb->ack_snd));
1003                               dccpstat.tcplikes_recv_acksent++;
1004                               dccp_output(cb->pcb, 1);
1005                     }
1006           }
1007           mutex_exit(&(cb->mutex));
1008 }
1009 
1010 void
_avlist_add(struct tcplike_recv_ccb * cb,u_int64_t localseq,u_int64_t ackthru)1011 _avlist_add(struct tcplike_recv_ccb *cb, u_int64_t localseq, u_int64_t ackthru)
1012 {
1013           struct ack_list *a;
1014           ACK_DEBUG((LOG_INFO,"Adding localseq %u - ackthru %u to avlist\n", localseq, ackthru));
1015           /*MALLOC_DEBUG((LOG_INFO, "New ack_list, %u\n", sizeof (struct ack_list)));*/
1016           a = malloc(sizeof(struct ack_list), M_TEMP, M_NOWAIT);
1017           if (a == NULL) {
1018                     MALLOC_DEBUG((LOG_INFO, "avlist_add: FAILED\n"));
1019                     dccpstat.tcplikes_recv_memerr++;
1020                     return;
1021           }
1022           memset(a, 0, sizeof(struct ack_list));
1023           a->localseq = localseq;
1024           a->ackthru = ackthru;
1025           a->next = cb->av_list;
1026           cb->av_list = a;
1027 }
1028 
1029 /*
1030  * Searches the av_list. if 'localseq' found, drop it from list and return
1031  * ackthru
1032  */
1033 u_int64_t
_avlist_get(struct tcplike_recv_ccb * cb,u_int64_t localseq)1034 _avlist_get(struct tcplike_recv_ccb *cb, u_int64_t localseq)
1035 {
1036           struct ack_list *a, *n, *p;
1037           u_int64_t ackthru;
1038 
1039           ACK_DEBUG((LOG_INFO,"Getting localseq %u from avlist\n", localseq));
1040           a = cb->av_list;
1041           p = 0;
1042           while (a) {
1043                     n = a->next;
1044                     if (a->localseq == localseq) {
1045                               if (p)
1046                                         p->next = n;
1047                               else
1048                                         cb->av_list = n;
1049                               ackthru = a->ackthru;
1050                               /*MALLOC_DEBUG((LOG_INFO, "Freeing element %u in ack_list\n", a->localseq));*/
1051                               free(a, M_TEMP);
1052                               return ackthru;
1053                     }
1054                     p = a;
1055                     a = n;
1056           }
1057           /* Not found. return 0 */
1058           return 0;
1059 }
1060 
1061 /*
1062 int tcplike_option_recv(void);
1063 */
1064