
Tail loss probe
https://tools.ietf.org/html/draft-dukkipati-tcpm-tcp-loss-probe-01


Index: sys/netinet/tcp_input.c
===================================================================
RCS file: /cvsroot/src/sys/netinet/tcp_input.c,v
retrieving revision 1.347
diff -d -u -r1.347 tcp_input.c
--- sys/netinet/tcp_input.c	10 Jun 2016 13:31:44 -0000	1.347
+++ sys/netinet/tcp_input.c	25 Jul 2016 13:13:34 -0000
@@ -1998,6 +1998,11 @@
 			tp->ts_recent_age = tcp_now;
 			tp->ts_recent = opti.ts_val;
 		}
+		/*
+		 * Disable Tail Loss Probe timer
+		 */
+		if (TCP_TIMER_ISARMED(tp, TCPT_TLP))
+			TCP_TIMER_DISARM(tp, TCPT_TLP);
 
 		if (tlen == 0) {
 			/* Ack prediction. */
Index: sys/netinet/tcp_output.c
===================================================================
RCS file: /cvsroot/src/sys/netinet/tcp_output.c,v
retrieving revision 1.186
diff -d -u -r1.186 tcp_output.c
--- sys/netinet/tcp_output.c	10 Jun 2016 13:27:16 -0000	1.186
+++ sys/netinet/tcp_output.c	25 Jul 2016 13:13:34 -0000
@@ -578,6 +578,11 @@
 	int sigoff = 0;
 #endif
 	uint64_t *tcps;
+	uint64_t flightsize;
+
+	/* but first things first - disarm the TLP timer */
+	if (TCP_TIMER_ISARMED(tp, TCPT_TLP))
+		TCP_TIMER_DISARM(tp, TCPT_TLP);
 
 #ifdef DIAGNOSTIC
 	if (tp->t_inpcb && tp->t_in6pcb)
@@ -676,6 +681,7 @@
 	} else {
 		if (idle && (tcp_now - tp->t_rcvtime) >= tp->t_rxtcur) {
 			/*
+			 * RFC5681 Section 4.1.
 			 * We have been idle for "a while" and no acks are
 			 * expected to clock out any data we send --
 			 * slow start to get ack "clock" running again.
@@ -1526,6 +1532,46 @@
 			}
 		}
 
+timer:
+		flightsize = (tp->snd_nxt - tp->sack_newdata) +
+		    sack_bytes_rxmt;
+		if (tcp_tlp_enabled != 0 &&
+		    TCPS_HAVEESTABLISHED(tp->t_state) &&
+		    TCP_SACK_ENABLED(tp) &&
+		    len != 0 &&
+		    /* only if we've transmitted everything or cwnd reached */
+		    (off + len == so->so_snd.sb_cc ||
+		    tp->snd_cwnd - flightsize < tp->t_segsz) &&
+		    tp->snd_tlp_nxt != tp->snd_nxt) {
+			int pto;
+			/*
+			 * Arm the timer for tail loss probe
+			 * Draft dukkipati Section 2.1
+			 * FlightSize > 1: schedule PTO in max(2*SRTT, 10ms)
+			 * tp->t_srtt >> 5 = hz expressed value
+			 */
+			if (flightsize > tp->t_segsz)
+				pto = max(tp->t_srtt >> 4, mstohz(10));
+			/*
+			 * FlightSize == 1:
+			 * schedule PTO in max(2*SRTT, 1.5*SRTT+WCDelAckT).
+			 */
+			else
+				pto = max(tp->t_srtt >> 4,
+				    3 * (tp->t_srtt >> 6) + mstohz(200));
+			/*
+			 * If RTO is earlier, schedule PTO in its place:
+			 * PTO = min(RTO, PTO)
+			 */
+			if (pto >= TCP_REXMTVAL(tp) * hz / PR_SLOWHZ &&
+			    TCP_TIMER_ISARMED(tp, TCPT_REXMT)) {
+				TCP_TIMER_DISARM(tp, TCPT_REXMT);
+				pto = TCP_REXMTVAL(tp) * hz / PR_SLOWHZ;
+			}
+			tp->snd_tlp_nxt = tp->snd_nxt;
+			TCP_TIMER_ARM_HZ(tp, TCPT_TLP, pto > 0 ? pto : 1);
+		}
+
 		/*
 		 * Set retransmit timer if not currently set,
 		 * and not doing an ack or a keep-alive probe.
@@ -1534,10 +1580,10 @@
 		 * Initialize shift counter which is used for backoff
 		 * of retransmit time.
 		 */
-timer:
 		if (TCP_TIMER_ISARMED(tp, TCPT_REXMT) == 0) {
-			if ((sack_rxmit && tp->snd_nxt != tp->snd_max)
-			    || tp->snd_nxt != tp->snd_una) {
+			if (((sack_rxmit && tp->snd_nxt != tp->snd_max)
+			    || tp->snd_nxt != tp->snd_una) &&
+			    TCP_TIMER_ISARMED(tp, TCPT_TLP) == 0) {
 				if (TCP_TIMER_ISARMED(tp, TCPT_PERSIST)) {
 					TCP_TIMER_DISARM(tp, TCPT_PERSIST);
 					tp->t_rxtshift = 0;
Index: sys/netinet/tcp_seq.h
===================================================================
RCS file: /cvsroot/src/sys/netinet/tcp_seq.h,v
retrieving revision 1.17
diff -d -u -r1.17 tcp_seq.h
--- sys/netinet/tcp_seq.h	25 Jul 2014 17:53:59 -0000	1.17
+++ sys/netinet/tcp_seq.h	25 Jul 2016 13:13:34 -0000
@@ -58,7 +58,8 @@
 
 #define	tcp_sendseqinit(tp) \
 	(tp)->snd_una = (tp)->snd_nxt = (tp)->snd_max = (tp)->snd_up = \
-	    (tp)->snd_recover = (tp)->snd_high = (tp)->snd_fack = (tp)->iss
+	    (tp)->snd_recover = (tp)->snd_high = (tp)->snd_fack = \
+	    (tp)->snd_tlp_nxt = (tp)->iss
 
 #define TCP_ISS_RANDOM_MASK 0x00ffffff /* bits of randomness in a TCP ISS */
 #define TCP_ISSINCR         0x01000000 /* increment per time and per conn */
Index: sys/netinet/tcp_timer.c
===================================================================
RCS file: /cvsroot/src/sys/netinet/tcp_timer.c,v
retrieving revision 1.90
diff -d -u -r1.90 tcp_timer.c
--- sys/netinet/tcp_timer.c	26 Apr 2016 08:44:44 -0000	1.90
+++ sys/netinet/tcp_timer.c	25 Jul 2016 13:13:34 -0000
@@ -149,6 +149,8 @@
 
 int	tcp_maxpersistidle = 0;		/* max idle time in persist */
 
+u_int	tcp_tlp_enabled = 0;		/* tail loss probe */
+
 /*
  * Time to delay the ACK.  This is initialized in tcp_init(), unless
  * its patched.
@@ -159,12 +161,14 @@
 void	tcp_timer_persist(void *);
 void	tcp_timer_keep(void *);
 void	tcp_timer_2msl(void *);
+void	tcp_timer_tlp(void *);
 
 const tcp_timer_func_t tcp_timer_funcs[TCPT_NTIMERS] = {
 	tcp_timer_rexmt,
 	tcp_timer_persist,
 	tcp_timer_keep,
 	tcp_timer_2msl,
+	tcp_timer_tlp,
 };
 
 /*
@@ -658,3 +662,61 @@
 	mutex_exit(softnet_lock);
 	KERNEL_UNLOCK_ONE(NULL);
 }
+
+void
+tcp_timer_tlp(void *arg)
+{
+	struct tcpcb *tp = arg;
+#ifdef INET
+	struct inpcb *inp;
+#endif
+#ifdef INET6
+	struct in6pcb *in6p;
+#endif
+	struct socket *so = NULL;
+
+	mutex_enter(softnet_lock);
+	if ((tp->t_flags & TF_DEAD) != 0 ||
+	    TCPS_HAVEESTABLISHED(tp->t_state) == 0) {
+		mutex_exit(softnet_lock);
+		return;
+	}
+	if (!callout_expired(&tp->t_timer[TCPT_TLP])) {
+		mutex_exit(softnet_lock);
+		return;
+	}
+
+	/* Do a tail loss probe rxmit */
+	KERNEL_LOCK(1, NULL);
+	/* check if tlp is still needed */
+	if (tp->snd_tlp_nxt == tp->snd_nxt ||
+	    TCP_TIMER_ISARMED(tp, TCPT_REXMT) == 0) {
+
+		TCP_STATINC(TCP_STAT_TLP);
+
+		tcp_free_sackholes(tp);
+		tp->snd_fack = tp->snd_una;
+
+#ifdef INET
+		inp = tp->t_inpcb;
+		if (inp)
+			so = inp->inp_socket;
+#endif
+#ifdef INET6
+		in6p = tp->t_in6pcb;
+		if (in6p)
+			so = in6p->in6p_socket;
+#endif
+		KASSERT(so != NULL);
+		/* Check if it should rexmit */
+		if (tp->snd_tlp_nxt - tp->snd_una == so->so_snd.sb_cc)
+			tp->snd_nxt = tp->snd_una;
+
+		tp->t_force = 1;
+		(void) tcp_output(tp);
+		tp->t_force = 0;
+	}
+
+	KERNEL_UNLOCK_ONE(NULL);
+	mutex_exit(softnet_lock);
+}
Index: sys/netinet/tcp_timer.h
===================================================================
RCS file: /cvsroot/src/sys/netinet/tcp_timer.h,v
retrieving revision 1.28
diff -d -u -r1.28 tcp_timer.h
--- sys/netinet/tcp_timer.h	24 May 2011 18:37:52 -0000	1.28
+++ sys/netinet/tcp_timer.h	25 Jul 2016 13:13:34 -0000
@@ -69,12 +69,13 @@
  * Definitions of the TCP timers.  These timers are counted
  * down PR_SLOWHZ times a second.
  */
-#define	TCPT_NTIMERS	4
+#define	TCPT_NTIMERS	5
 
 #define	TCPT_REXMT	0		/* retransmit */
 #define	TCPT_PERSIST	1		/* retransmit persistance */
 #define	TCPT_KEEP	2		/* keep alive */
 #define	TCPT_2MSL	3		/* 2*msl quiet time timer */
+#define	TCPT_TLP	4		/* tail loss probe */
 
 /*
  * The TCPT_REXMT timer is used to force retransmissions.
@@ -110,6 +111,9 @@
  * an ack segment in response from the peer.  If, despite the TCPT_KEEP
  * initiated segments we cannot elicit a response from a peer in TCPT_MAXIDLE
  * amount of time probing, then we drop the connection.
+ *
+ * The TCPT_TLP timer is used to trigger retransmissions on short streams
+ * that wouldn't have chance to rexmit a segment until RTO
  */
 
 /*
@@ -141,7 +145,7 @@
 
 #ifdef	TCPTIMERS
 const char *tcptimers[] =
-    { "REXMT", "PERSIST", "KEEP", "2MSL" };
+    { "REXMT", "PERSIST", "KEEP", "2MSL", "TLP" };
 #endif
 
 /*
@@ -159,6 +163,13 @@
 	callout_schedule(&(tp)->t_timer[(timer)],			\
 	    (nticks) * (hz / PR_SLOWHZ))
 
+/*
+ * Arm the timer for a specific number of hz units
+ */
+#define TCP_TIMER_ARM_HZ(tp, timer, nticks)				\
+	callout_schedule(&(tp)->t_timer[(timer)],			\
+	    (nticks))
+
 #define	TCP_TIMER_DISARM(tp, timer)					\
 	callout_stop(&(tp)->t_timer[(timer)])
 
@@ -188,6 +199,7 @@
 extern int tcp_maxpersistidle;		/* max idle time in persist */
 extern int tcp_ttl;			/* time to live for TCP segs */
 extern const int tcp_backoff[];
+extern u_int tcp_tlp_enabled;		/* tail loss probe enabled */
 
 void	tcp_timer_init(void);
 #endif
Index: sys/netinet/tcp_usrreq.c
===================================================================
RCS file: /cvsroot/src/sys/netinet/tcp_usrreq.c,v
retrieving revision 1.212
diff -d -u -r1.212 tcp_usrreq.c
--- sys/netinet/tcp_usrreq.c	26 Apr 2016 08:44:45 -0000	1.212
+++ sys/netinet/tcp_usrreq.c	25 Jul 2016 13:13:34 -0000
@@ -2184,6 +2184,13 @@
 		       sysctl_tcp_keep, 0, &tcp_keepcnt, 0,
 		       CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPCNT, CTL_EOL);
 	sysctl_createv(clog, 0, NULL, NULL,
+		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
+		       CTLTYPE_INT, "tlp",
+		       SYSCTL_DESCR("Enable tail loss probe"),
+		       NULL, 0, &tcp_tlp_enabled, 0,
+		       CTL_NET, pf, IPPROTO_TCP, TCPCTL_TLP, CTL_EOL);
+			
+	sysctl_createv(clog, 0, NULL, NULL,
 		       CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
 		       CTLTYPE_INT, "slowhz",
 		       SYSCTL_DESCR("Keepalive ticks per second"),
Index: sys/netinet/tcp_var.h
===================================================================
RCS file: /cvsroot/src/sys/netinet/tcp_var.h,v
retrieving revision 1.177
diff -d -u -r1.177 tcp_var.h
--- sys/netinet/tcp_var.h	14 Feb 2015 22:09:53 -0000	1.177
+++ sys/netinet/tcp_var.h	25 Jul 2016 13:13:34 -0000
@@ -235,6 +235,7 @@
 	tcp_seq	snd_una;		/* send unacknowledged */
 	tcp_seq	snd_nxt;		/* send next */
 	tcp_seq	snd_up;			/* send urgent pointer */
+	tcp_seq	snd_tlp_nxt;		/* next sequence when we sent tlp */
 	tcp_seq	snd_wl1;		/* window update seg seq number */
 	tcp_seq	snd_wl2;		/* window update seg ack number */
 	tcp_seq	iss;			/* initial send sequence number */
@@ -710,8 +711,9 @@
 #define	TCP_STAT_ECN_SHS	73	/* # of successful ECN handshakes */
 #define	TCP_STAT_ECN_CE		74	/* # of packets with CE bit */
 #define	TCP_STAT_ECN_ECT	75	/* # of packets with ECT(0) bit */
+#define	TCP_STAT_TLP		76	/* # of tail loss probes sent */
 
-#define	TCP_NSTATS		76
+#define	TCP_NSTATS		77
 
 /*
  * Names for TCP sysctl objects.
@@ -754,7 +756,8 @@
 #define	TCPCTL_DEBX		32	/* # of tcp debug sockets */
 #define	TCPCTL_DROP		33	/* drop tcp connection */
 #define	TCPCTL_MSL		34	/* Max Segment Life */
-#define	TCPCTL_MAXID		35
+#define	TCPCTL_TLP		35	/* Tail Loss Probe */
+#define	TCPCTL_MAXID		36
 
 #define	TCPCTL_NAMES { \
 	{ 0, 0 }, \
Index: usr.bin/netstat/inet.c
===================================================================
RCS file: /cvsroot/src/usr.bin/netstat/inet.c,v
retrieving revision 1.106
diff -d -u -r1.106 inet.c
--- usr.bin/netstat/inet.c	8 Feb 2015 15:09:45 -0000	1.106
+++ usr.bin/netstat/inet.c	25 Jul 2016 13:13:36 -0000
@@ -503,6 +503,7 @@
 	p(TCP_STAT_ECN_SHS, "\t%" PRIu64 " successful ECN handshake%s\n");
 	p(TCP_STAT_ECN_CE, "\t%" PRIu64 " packet%s with ECN CE bit\n");
 	p(TCP_STAT_ECN_ECT, "\t%" PRIu64 " packet%s ECN ECT(0) bit\n");
+	p(TCP_STAT_TLP, "\t%" PRIu64 " tail drop probe%s\n");
 #undef p
 #undef ps
 #undef p2
