Use the new RNG for the networking stack
[akaros.git] / kern / src / net / tcp.c
index 6035661..841919c 100644 (file)
-/**
- * @file
- * Transmission Control Protocol for IP
+/* Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
+ * Portions Copyright © 1997-1999 Vita Nuova Limited
+ * Portions Copyright © 2000-2007 Vita Nuova Holdings Limited
+ *                                (www.vitanuova.com)
+ * Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
  *
  *
- * This file contains common functions for the TCP implementation, such as functinos
- * for manipulating the data structures and the TCP timer functions. TCP functions
- * related to input and output is found in tcp_in.c and tcp_out.c respectively.
+ * Modified for the Akaros operating system:
+ * Copyright (c) 2013-2014 The Regents of the University of California
+ * Copyright (c) 2013-2015 Google Inc.
  *
  *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE. */
+
+#include <vfs.h>
+#include <kfs.h>
+#include <slab.h>
+#include <kmalloc.h>
+#include <kref.h>
+#include <string.h>
+#include <stdio.h>
+#include <assert.h>
+#include <error.h>
+#include <cpio.h>
+#include <pmap.h>
+#include <smp.h>
+#include <ip.h>
+
+#include <vfs.h>
+#include <kfs.h>
+#include <slab.h>
+#include <kmalloc.h>
+#include <kref.h>
+#include <string.h>
+#include <stdio.h>
+#include <assert.h>
+#include <error.h>
+#include <cpio.h>
+#include <pmap.h>
+#include <smp.h>
+#include <ip.h>
+
+enum {
+       QMAX = 64 * 1024 - 1,
+       IP_TCPPROTO = 6,
+
+       TCP4_IPLEN = 8,
+       TCP4_PHDRSIZE = 12,
+       TCP4_HDRSIZE = 20,
+       TCP4_TCBPHDRSZ = 40,
+       TCP4_PKT = TCP4_IPLEN + TCP4_PHDRSIZE,
+
+       TCP6_IPLEN = 0,
+       TCP6_PHDRSIZE = 40,
+       TCP6_HDRSIZE = 20,
+       TCP6_TCBPHDRSZ = 60,
+       TCP6_PKT = TCP6_IPLEN + TCP6_PHDRSIZE,
+
+       TcptimerOFF = 0,
+       TcptimerON = 1,
+       TcptimerDONE = 2,
+       MAX_TIME = (1 << 20),   /* Forever */
+       TCP_ACK = 50,   /* Timed ack sequence in ms */
+       MAXBACKMS = 9 * 60 * 1000,      /* longest backoff time (ms) before hangup */
+
+       URG = 0x20,     /* Data marked urgent */
+       ACK = 0x10,     /* Acknowledge is valid */
+       PSH = 0x08,     /* Whole data pipe is pushed */
+       RST = 0x04,     /* Reset connection */
+       SYN = 0x02,     /* Pkt. is synchronise */
+       FIN = 0x01,     /* Start close down */
+
+       EOLOPT = 0,
+       NOOPOPT = 1,
+       MSSOPT = 2,
+       MSS_LENGTH = 4, /* Mean segment size */
+       WSOPT = 3,
+       WS_LENGTH = 3,  /* Bits to scale window size by */
+       MSL2 = 10,
+       MSPTICK = 50,   /* Milliseconds per timer tick */
+       DEF_MSS = 1460, /* Default mean segment */
+       DEF_MSS6 = 1280,        /* Default mean segment (min) for v6 */
+       DEF_RTT = 500,  /* Default round trip */
+       DEF_KAT = 120000,       /* Default time (ms) between keep alives */
+       TCP_LISTEN = 0, /* Listen connection */
+       TCP_CONNECT = 1,        /* Outgoing connection */
+       SYNACK_RXTIMER = 250,   /* ms between SYNACK retransmits */
+
+       TCPREXMTTHRESH = 3,     /* dupack threshhold for rxt */
+
+       FORCE = 1,
+       CLONE = 2,
+       RETRAN = 4,
+       ACTIVE = 8,
+       SYNACK = 16,
+       TSO = 32,
+
+       LOGAGAIN = 3,
+       LOGDGAIN = 2,
+
+       Closed = 0,     /* Connection states */
+       Listen,
+       Syn_sent,
+       Syn_received,
+       Established,
+       Finwait1,
+       Finwait2,
+       Close_wait,
+       Closing,
+       Last_ack,
+       Time_wait,
+
+       Maxlimbo = 1000,        /* maximum procs waiting for response to SYN ACK */
+       NLHT = 256,     /* hash table size, must be a power of 2 */
+       LHTMASK = NLHT - 1,
+
+       HaveWS = 1 << 8,
+};
+
+/* Must correspond to the enumeration above */
+char *tcpstates[] = {
+       "Closed", "Listen", "Syn_sent", "Syn_received",
+       "Established", "Finwait1", "Finwait2", "Close_wait",
+       "Closing", "Last_ack", "Time_wait"
+};
+
+typedef struct Tcptimer Tcptimer;
+struct Tcptimer {
+       Tcptimer *next;
+       Tcptimer *prev;
+       Tcptimer *readynext;
+       int state;
+       uint64_t start;
+       uint64_t count;
+       void (*func) (void *);
+       void *arg;
+};
+
+/*
+ *  v4 and v6 pseudo headers used for
+ *  checksuming tcp
+ */
+typedef struct Tcp4hdr Tcp4hdr;
+struct Tcp4hdr {
+       uint8_t vihl;                           /* Version and header length */
+       uint8_t tos;                            /* Type of service */
+       uint8_t length[2];                      /* packet length */
+       uint8_t id[2];                          /* Identification */
+       uint8_t frag[2];                        /* Fragment information */
+       uint8_t Unused;
+       uint8_t proto;
+       uint8_t tcplen[2];
+       uint8_t tcpsrc[4];
+       uint8_t tcpdst[4];
+       uint8_t tcpsport[2];
+       uint8_t tcpdport[2];
+       uint8_t tcpseq[4];
+       uint8_t tcpack[4];
+       uint8_t tcpflag[2];
+       uint8_t tcpwin[2];
+       uint8_t tcpcksum[2];
+       uint8_t tcpurg[2];
+       /* Options segment */
+       uint8_t tcpopt[1];
+};
+
+typedef struct Tcp6hdr Tcp6hdr;
+struct Tcp6hdr {
+       uint8_t vcf[4];
+       uint8_t ploadlen[2];
+       uint8_t proto;
+       uint8_t ttl;
+       uint8_t tcpsrc[IPaddrlen];
+       uint8_t tcpdst[IPaddrlen];
+       uint8_t tcpsport[2];
+       uint8_t tcpdport[2];
+       uint8_t tcpseq[4];
+       uint8_t tcpack[4];
+       uint8_t tcpflag[2];
+       uint8_t tcpwin[2];
+       uint8_t tcpcksum[2];
+       uint8_t tcpurg[2];
+       /* Options segment */
+       uint8_t tcpopt[1];
+};
+
+/*
+ *  this represents the control info
+ *  for a single packet.  It is derived from
+ *  a packet in ntohtcp{4,6}() and stuck into
+ *  a packet in htontcp{4,6}().
  */
  */
+typedef struct Tcp Tcp;
+struct Tcp {
+       uint16_t source;
+       uint16_t dest;
+       uint32_t seq;
+       uint32_t ack;
+       uint8_t flags;
+       uint16_t ws;                            /* window scale option (if not zero) */
+       uint32_t wnd;
+       uint16_t urg;
+       uint16_t mss;                           /* max segment size option (if not zero) */
+       uint16_t len;                           /* size of data */
+};
 
 /*
 
 /*
- * Copyright (c) 2001-2004 Swedish Institute of Computer Science.
- * All rights reserved. 
- * 
- * Redistribution and use in source and binary forms, with or without modification, 
- * are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- *    this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- *    this list of conditions and the following disclaimer in the documentation
- *    and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote products
- *    derived from this software without specific prior written permission. 
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED 
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT 
- * SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT 
- * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 
- * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY 
- * OF SUCH DAMAGE.
+ *  this header is malloc'd to thread together fragments
+ *  waiting to be coalesced
+ */
+typedef struct Reseq Reseq;
+struct Reseq {
+       Reseq *next;
+       Tcp seg;
+       struct block *bp;
+       uint16_t length;
+};
+
+/*
+ *  the qlock in the Conv locks this structure
+ */
+typedef struct Tcpctl Tcpctl;
+struct Tcpctl {
+       uint8_t state;                          /* Connection state */
+       uint8_t type;                           /* Listening or active connection */
+       uint8_t code;                           /* Icmp code */
+       struct {
+               uint32_t una;                   /* Unacked data pointer */
+               uint32_t nxt;                   /* Next sequence expected */
+               uint32_t ptr;                   /* Data pointer */
+               uint32_t wnd;                   /* Tcp send window */
+               uint32_t urg;                   /* Urgent data pointer */
+               uint32_t wl2;
+               int scale;                              /* how much to right shift window in xmitted packets */
+               /* to implement tahoe and reno TCP */
+               uint32_t dupacks;               /* number of duplicate acks rcvd */
+               int recovery;                   /* loss recovery flag */
+               uint32_t rxt;                   /* right window marker for recovery */
+       } snd;
+       struct {
+               uint32_t nxt;                   /* Receive pointer to next uint8_t slot */
+               uint32_t wnd;                   /* Receive window incoming */
+               uint32_t urg;                   /* Urgent pointer */
+               int blocked;
+               int una;                                /* unacked data segs */
+               int scale;                              /* how much to left shift window in rcved packets */
+       } rcv;
+       uint32_t iss;                           /* Initial sequence number */
+       int sawwsopt;                           /* true if we saw a wsopt on the incoming SYN */
+       uint32_t cwind;                         /* Congestion window */
+       int scale;                                      /* desired snd.scale */
+       uint16_t ssthresh;                      /* Slow start threshold */
+       int resent;                                     /* Bytes just resent */
+       int irs;                                        /* Initial received squence */
+       uint16_t mss;                           /* Mean segment size */
+       int rerecv;                                     /* Overlap of data rerecevived */
+       uint32_t window;                        /* Recevive window */
+       uint8_t backoff;                        /* Exponential backoff counter */
+       int backedoff;                          /* ms we've backed off for rexmits */
+       uint8_t flags;                          /* State flags */
+       Reseq *reseq;                           /* Resequencing queue */
+       Tcptimer timer;                         /* Activity timer */
+       Tcptimer acktimer;                      /* Acknowledge timer */
+       Tcptimer rtt_timer;                     /* Round trip timer */
+       Tcptimer katimer;                       /* keep alive timer */
+       uint32_t rttseq;                        /* Round trip sequence */
+       int srtt;                                       /* Shortened round trip */
+       int mdev;                                       /* Mean deviation of round trip */
+       int kacounter;                          /* count down for keep alive */
+       uint64_t sndsyntime;            /* time syn sent */
+       uint64_t time;                          /* time Finwait2 or Syn_received was sent */
+       int nochecksum;                         /* non-zero means don't send checksums */
+       int flgcnt;                                     /* number of flags in the sequence (FIN,SEQ) */
+
+       union {
+               Tcp4hdr tcp4hdr;
+               Tcp6hdr tcp6hdr;
+       } protohdr;                                     /* prototype header */
+};
+
+/*
+ *  New calls are put in limbo rather than having a conversation structure
+ *  allocated.  Thus, a SYN attack results in lots of limbo'd calls but not
+ *  any real Conv structures mucking things up.  Calls in limbo rexmit their
+ *  SYN ACK every SYNACK_RXTIMER ms up to 4 times, i.e., they disappear after 1 second.
  *
  *
- * This file is part of the lwIP TCP/IP stack.
- * 
- * Author: Adam Dunkels <adam@sics.se>
- * Modified by David Zhu <yuzhu@cs.berkeley.edu> to be used for Akaros
+ *  In particular they aren't on a listener's queue so that they don't figure
+ *  in the input queue limit.
  *
  *
+ *  If 1/2 of a T3 was attacking SYN packets, we'ld have a permanent queue
+ *  of 70000 limbo'd calls.  Not great for a linear list but doable.  Therefore
+ *  there is no hashing of this list.
  */
  */
+typedef struct Limbo Limbo;
+struct Limbo {
+       Limbo *next;
+
+       uint8_t laddr[IPaddrlen];
+       uint8_t raddr[IPaddrlen];
+       uint16_t lport;
+       uint16_t rport;
+       uint32_t irs;                           /* initial received sequence */
+       uint32_t iss;                           /* initial sent sequence */
+       uint16_t mss;                           /* mss from the other end */
+       uint16_t rcvscale;                      /* how much to scale rcvd windows */
+       uint16_t sndscale;                      /* how much to scale sent windows */
+       uint64_t lastsend;                      /* last time we sent a synack */
+       uint8_t version;                        /* v4 or v6 */
+       uint8_t rexmits;                        /* number of retransmissions */
+};
 
 
-#include <ros/common.h>
-#include <string.h>
-#include <kmalloc.h>
-#include <net.h>
-#include <sys/queue.h>
-#include <atomic.h>
-
-#include <bits/netinet.h>
-#include <net/ip.h>
-#include <net/tcp.h>
-#include <net/tcp_impl.h>
-#include <slab.h>
-#include <socket.h>
-#include <string.h>
-#include <debug.h>
-
-/* String array used to display different TCP states */
-const char * const tcp_state_str[] = {
-  "CLOSED",      
-  "LISTEN",      
-  "SYN_SENT",    
-  "SYN_RCVD",    
-  "ESTABLISHED", 
-  "FIN_WAIT_1",  
-  "FIN_WAIT_2",  
-  "CLOSE_WAIT",  
-  "CLOSING",     
-  "LAST_ACK",    
-  "TIME_WAIT"   
+int tcp_irtt = DEF_RTT;                        /* Initial guess at round trip time */
+uint16_t tcp_mss = DEF_MSS;            /* Maximum segment size to be sent */
+
+enum {
+       /* MIB stats */
+       MaxConn,
+       ActiveOpens,
+       PassiveOpens,
+       EstabResets,
+       CurrEstab,
+       InSegs,
+       OutSegs,
+       RetransSegs,
+       RetransTimeouts,
+       InErrs,
+       OutRsts,
+
+       /* non-MIB stats */
+       CsumErrs,
+       HlenErrs,
+       LenErrs,
+       OutOfOrder,
+
+       Nstats
+};
+
+static char *statnames[] = {
+       [MaxConn] "MaxConn",
+       [ActiveOpens] "ActiveOpens",
+       [PassiveOpens] "PassiveOpens",
+       [EstabResets] "EstabResets",
+       [CurrEstab] "CurrEstab",
+       [InSegs] "InSegs",
+       [OutSegs] "OutSegs",
+       [RetransSegs] "RetransSegs",
+       [RetransTimeouts] "RetransTimeouts",
+       [InErrs] "InErrs",
+       [OutRsts] "OutRsts",
+       [CsumErrs] "CsumErrs",
+       [HlenErrs] "HlenErrs",
+       [LenErrs] "LenErrs",
+       [OutOfOrder] "OutOfOrder",
+};
+
+typedef struct Tcppriv Tcppriv;
+struct tcppriv {
+       /* List of active timers */
+       qlock_t tl;
+       Tcptimer *timers;
+
+       /* hash table for matching conversations */
+       struct Ipht ht;
+
+       /* calls in limbo waiting for an ACK to our SYN ACK */
+       int nlimbo;
+       Limbo *lht[NLHT];
+
+       /* for keeping track of tcpackproc */
+       qlock_t apl;
+       int ackprocstarted;
+
+       uint32_t stats[Nstats];
 };
 
 };
 
-const uint8_t tcp_backoff[13] =
-    { 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7};
- /* Times per slowtmr hits */
-const uint8_t tcp_persist_backoff[7] = { 3, 6, 12, 24, 48, 96, 120 };
-
-struct tcp_pcb *tcp_pcbs;
-
-/** List of all TCP PCBs bound but not yet (connected || listening) */
-struct tcp_pcb *tcp_bound_pcbs;
-/** List of all TCP PCBs in LISTEN state */
-union tcp_listen_pcbs_t tcp_listen_pcbs;
-/** List of all TCP PCBs that are in a state in which
- * they accept or send data. */
-struct tcp_pcb *tcp_active_pcbs;
-/** List of all TCP PCBs in TIME-WAIT state */
-struct tcp_pcb *tcp_tw_pcbs;
-
-#define NUM_TCP_PCB_LISTS               4
-#define NUM_TCP_PCB_LISTS_NO_TIME_WAIT  3
-/** An array with all (non-temporary) PCB lists, mainly used for smaller code size */
-struct tcp_pcb **tcp_pcb_lists[] = {&tcp_listen_pcbs.pcbs, &tcp_bound_pcbs,
-  &tcp_active_pcbs, &tcp_tw_pcbs};
-
-/** Timer counter to handle calling slow-timer from tcp_tmr() */ 
-static uint8_t tcp_timer;
-static uint16_t tcp_new_port(void);
-
-/** Only used for temporary storage. */
-struct tcp_pcb *tcp_tmp_pcb;
-
-/* Incremented every coarse grained timer shot (typically every 500 ms). */
-uint32_t tcp_ticks;
-uint16_t tcp_port_num = SOCKET_PORT_START;
-
-static uint16_t tcp_new_port(void);
-/**
- * Abandons a connection and optionally sends a RST to the remote
- * host.  Deletes the local protocol control block. This is done when
- * a connection is killed because of shortage of memory.
+/*
+ *  Setting tcpporthogdefense to non-zero enables Dong Lin's
+ *  solution to hijacked systems staking out port's as a form
+ *  of DoS attack.
  *
  *
- * @param pcb the tcp_pcb to abort
- * @param reset boolean to indicate whether a reset should be sent
+ *  To avoid stateless Conv hogs, we pick a sequence number at random.  If
+ *  it that number gets acked by the other end, we shut down the connection.
+ *  Look for tcpporthogedefense in the code.
  */
  */
-void
-tcp_abandon(struct tcp_pcb *pcb, int reset)
+int tcpporthogdefense = 0;
+
+int addreseq(Tcpctl *, struct tcppriv *, Tcp *, struct block *, uint16_t);
+void getreseq(Tcpctl *, Tcp *, struct block **, uint16_t *);
+void localclose(struct conv *, char *unused_char_p_t);
+void procsyn(struct conv *, Tcp *);
+void tcpiput(struct Proto *, struct Ipifc *, struct block *);
+void tcpoutput(struct conv *);
+int tcptrim(Tcpctl *, Tcp *, struct block **, uint16_t *);
+void tcpstart(struct conv *, int);
+void tcptimeout(void *);
+void tcpsndsyn(struct conv *, Tcpctl *);
+void tcprcvwin(struct conv *);
+void tcpacktimer(void *);
+void tcpkeepalive(void *);
+void tcpsetkacounter(Tcpctl *);
+void tcprxmit(struct conv *);
+void tcpsettimer(Tcpctl *);
+void tcpsynackrtt(struct conv *);
+void tcpsetscale(struct conv *, Tcpctl *, uint16_t, uint16_t);
+
+static void limborexmit(struct Proto *);
+static void limbo(struct conv *, uint8_t * unused_uint8_p_t, uint8_t *, Tcp *,
+                                 int);
+
+void tcpsetstate(struct conv *s, uint8_t newstate)
 {
 {
-  uint32_t seqno, ackno;
-  uint16_t remote_port, local_port;
-  ip_addr_t remote_ip, local_ip;
-#if LWIP_CALLBACK_API  
-  tcp_err_fn errf;
-#endif /* LWIP_CALLBACK_API */
-  void *errf_arg;
-
-  /* pcb->state LISTEN not allowed here */
-  LWIP_ASSERT("don't call tcp_abort/tcp_abandon for listen-pcbs",
-    pcb->state != LISTEN);
-  /* Figure out on which TCP PCB list we are, and remove us. If we
-     are in an active state, call the receive function associated with
-     the PCB with a NULL argument, and send an RST to the remote end. */
-  if (pcb->state == TIME_WAIT) {
-    tcp_pcb_remove(&tcp_tw_pcbs, pcb);
-               kmem_cache_free(tcp_pcb_kcache, (void*)pcb);
-  } else {
-    seqno = pcb->snd_nxt;
-    ackno = pcb->rcv_nxt;
-    ip_addr_copy(local_ip, pcb->local_ip);
-    ip_addr_copy(remote_ip, pcb->remote_ip);
-    local_port = pcb->local_port;
-    remote_port = pcb->remote_port;
-#if LWIP_CALLBACK_API
-    errf = pcb->errf;
-#endif /* LWIP_CALLBACK_API */
-    errf_arg = pcb->callback_arg;
-    tcp_pcb_remove(&tcp_active_pcbs, pcb);
-    if (pcb->unacked != NULL) {
-      tcp_segs_free(pcb->unacked);
-    }
-    if (pcb->unsent != NULL) {
-      tcp_segs_free(pcb->unsent);
-    }
-#if TCP_QUEUE_OOSEQ    
-    if (pcb->ooseq != NULL) {
-      tcp_segs_free(pcb->ooseq);
-    }
-#endif /* TCP_QUEUE_OOSEQ */
-               kmem_cache_free(tcp_pcb_kcache, (void*)pcb);
-    TCP_EVENT_ERR(errf, errf_arg, ECONNABORTED);
-    if (reset) {
-      LWIP_DEBUGF(TCP_RST_DEBUG, ("tcp_abandon: sending RST\n"));
-      tcp_rst(seqno, ackno, &local_ip, &remote_ip, local_port, remote_port);
-    }
-  }
+       Tcpctl *tcb;
+       uint8_t oldstate;
+       struct tcppriv *tpriv;
+
+       tpriv = s->p->priv;
+
+       tcb = (Tcpctl *) s->ptcl;
+
+       oldstate = tcb->state;
+       if (oldstate == newstate)
+               return;
+
+       if (oldstate == Established)
+               tpriv->stats[CurrEstab]--;
+       if (newstate == Established)
+               tpriv->stats[CurrEstab]++;
+
+       /**
+       print( "%d/%d %s->%s CurrEstab=%d\n", s->lport, s->rport,
+               tcpstates[oldstate], tcpstates[newstate], tpriv->tstats.tcpCurrEstab );
+       **/
+
+       switch (newstate) {
+               case Closed:
+                       qclose(s->rq);
+                       qclose(s->wq);
+                       qclose(s->eq);
+                       break;
+
+               case Close_wait:        /* Remote closes */
+                       qhangup(s->rq, NULL);
+                       break;
+       }
+
+       tcb->state = newstate;
+
+       if (oldstate == Syn_sent && newstate != Closed)
+               Fsconnected(s, NULL);
 }
 
 }
 
-/**
- * Aborts the connection by sending a RST (reset) segment to the remote
- * host. The pcb is deallocated. This function never fails.
- *
- * ATTENTION: When calling this from one of the TCP callbacks, make
- * sure you always return ECONNABORTED (and never return ECONNABORTED otherwise
- * or you will risk accessing deallocated memory or memory leaks!
- *
- * @param pcb the tcp pcb to abort
- */
-void
-tcp_abort(struct tcp_pcb *pcb)
+static char *tcpconnect(struct conv *c, char **argv, int argc)
 {
 {
-  tcp_abandon(pcb, 1);
+       char *e;
+
+       e = Fsstdconnect(c, argv, argc);
+       if (e != NULL)
+               return e;
+       tcpstart(c, TCP_CONNECT);
+
+       return NULL;
 }
 
 }
 
+static int tcpstate(struct conv *c, char *state, int n)
+{
+       Tcpctl *s;
+
+       s = (Tcpctl *) (c->ptcl);
+
+       return snprintf(state, n,
+                                       "%s qin %d qout %d srtt %d mdev %d cwin %u swin %u>>%d rwin %u>>%d timer.start %llu timer.count %llu rerecv %d katimer.start %d katimer.count %d\n",
+                                       tcpstates[s->state],
+                                       c->rq ? qlen(c->rq) : 0,
+                                       c->wq ? qlen(c->wq) : 0,
+                                       s->srtt, s->mdev,
+                                       s->cwind, s->snd.wnd, s->rcv.scale, s->rcv.wnd,
+                                       s->snd.scale, s->timer.start, s->timer.count, s->rerecv,
+                                       s->katimer.start, s->katimer.count);
+}
 
 
-/** 
- * Update the state that tracks the available window space to advertise.
- *
- * Returns how much extra window would be advertised if we sent an
- * update now.
- */
-uint32_t tcp_update_rcv_ann_wnd(struct tcp_pcb *pcb)
+static int tcpinuse(struct conv *c)
 {
 {
-  uint32_t new_right_edge = pcb->rcv_nxt + pcb->rcv_wnd;
-
-  if (TCP_SEQ_GEQ(new_right_edge, pcb->rcv_ann_right_edge + MIN((TCP_WND / 2), pcb->mss))) {
-    /* we can advertise more window */
-    pcb->rcv_ann_wnd = pcb->rcv_wnd;
-    return new_right_edge - pcb->rcv_ann_right_edge;
-  } else {
-    if (TCP_SEQ_GT(pcb->rcv_nxt, pcb->rcv_ann_right_edge)) {
-      /* Can happen due to other end sending out of advertised window,
-       * but within actual available (but not yet advertised) window */
-      pcb->rcv_ann_wnd = 0;
-    } else {
-      /* keep the right edge of window constant */
-      uint32_t new_rcv_ann_wnd = pcb->rcv_ann_right_edge - pcb->rcv_nxt;
-      pcb->rcv_ann_wnd = (uint16_t)new_rcv_ann_wnd;
-    }
-    return 0;
-  }
+       Tcpctl *s;
+
+       s = (Tcpctl *) (c->ptcl);
+       return s->state != Closed;
 }
 
 }
 
-/**
- * Kills the oldest connection that is in TIME_WAIT state.
- * Called from tcp_alloc() if no more connections are available.
- */
-static void
-tcp_kill_timewait(void)
+static char *tcpannounce(struct conv *c, char **argv, int argc)
 {
 {
-  struct tcp_pcb *pcb, *inactive;
-  uint32_t inactivity;
-
-  inactivity = 0;
-  inactive = NULL;
-  /* Go through the list of TIME_WAIT pcbs and get the oldest pcb. */
-  for(pcb = tcp_tw_pcbs; pcb != NULL; pcb = pcb->next) {
-    if ((uint32_t)(tcp_ticks - pcb->tmr) >= inactivity) {
-      inactivity = tcp_ticks - pcb->tmr;
-      inactive = pcb;
-    }
-  }
-  if (inactive != NULL) {
-    LWIP_DEBUGF(TCP_DEBUG, ("tcp_kill_timewait: killing oldest TIME-WAIT PCB %p (%"S32_F")\n",
-           (void *)inactive, inactivity));
-    tcp_abort(inactive);
-  }
+       char *e;
+
+       e = Fsstdannounce(c, argv, argc);
+       if (e != NULL)
+               return e;
+       tcpstart(c, TCP_LISTEN);
+       Fsconnected(c, NULL);
+
+       return NULL;
 }
 
 }
 
-/**
- * Kills the oldest active connection that has lower priority than prio.
- *
- * @param prio minimum priority
+/*
+ *  tcpclose is always called with the q locked
  */
  */
-static void
-tcp_kill_prio(uint8_t prio)
+static void tcpclose(struct conv *c)
 {
 {
-  struct tcp_pcb *pcb, *inactive;
-  uint32_t inactivity;
-  uint8_t mprio;
-
-
-  mprio = TCP_PRIO_MAX;
-  
-  /* We kill the oldest active connection that has lower priority than prio. */
-  inactivity = 0;
-  inactive = NULL;
-  for(pcb = tcp_active_pcbs; pcb != NULL; pcb = pcb->next) {
-    if (pcb->prio <= prio &&
-       pcb->prio <= mprio &&
-       (uint32_t)(tcp_ticks - pcb->tmr) >= inactivity) {
-      inactivity = tcp_ticks - pcb->tmr;
-      inactive = pcb;
-      mprio = pcb->prio;
-    }
-  }
-  if (inactive != NULL) {
-    LWIP_DEBUGF(TCP_DEBUG, ("tcp_kill_prio: killing oldest PCB %p (%"S32_F")\n",
-           (void *)inactive, inactivity));
-    tcp_abort(inactive);
-  }
+       Tcpctl *tcb;
+
+       tcb = (Tcpctl *) c->ptcl;
+
+       qhangup(c->rq, NULL);
+       qhangup(c->wq, NULL);
+       qhangup(c->eq, NULL);
+       qflush(c->rq);
+
+       switch (tcb->state) {
+               case Listen:
+                       /*
+                        *  reset any incoming calls to this listener
+                        */
+                       Fsconnected(c, "Hangup");
+
+                       localclose(c, NULL);
+                       break;
+               case Closed:
+               case Syn_sent:
+                       localclose(c, NULL);
+                       break;
+               case Syn_received:
+               case Established:
+                       tcb->flgcnt++;
+                       tcb->snd.nxt++;
+                       tcpsetstate(c, Finwait1);
+                       tcpoutput(c);
+                       break;
+               case Close_wait:
+                       tcb->flgcnt++;
+                       tcb->snd.nxt++;
+                       tcpsetstate(c, Last_ack);
+                       tcpoutput(c);
+                       break;
+       }
 }
 }
-/**
- * This function should be called by the application when it has
- * processed the data. The purpose is to advertise a larger window
- * when the data has been processed.
- *
- * @param pcb the tcp_pcb for which data is read
- * @param len the amount of bytes that have been read by the application
- */
-void
-tcp_recved(struct tcp_pcb *pcb, uint16_t len)
+
+void tcpkick(void *x)
 {
 {
-  int wnd_inflation;
+       ERRSTACK(1);
+       struct conv *s = x;
+       Tcpctl *tcb;
+
+       tcb = (Tcpctl *) s->ptcl;
+
+       qlock(&s->qlock);
+       if (waserror()) {
+               qunlock(&s->qlock);
+               nexterror();
+       }
+
+       switch (tcb->state) {
+               case Syn_sent:
+               case Syn_received:
+               case Established:
+               case Close_wait:
+                       /*
+                        * Push data
+                        */
+                       tcprcvwin(s);
+                       tcpoutput(s);
+                       break;
+               default:
+                       localclose(s, "Hangup");
+                       break;
+       }
 
 
-  check(len <= 0xffff - pcb->rcv_wnd);
+       qunlock(&s->qlock);
+       poperror();
+}
 
 
-  pcb->rcv_wnd += len;
-  if (pcb->rcv_wnd > TCP_WND) {
-    pcb->rcv_wnd = TCP_WND;
-  }
+void tcprcvwin(struct conv *s)
+{      /* Call with tcb locked */
+       int w;
+       Tcpctl *tcb;
+
+       tcb = (Tcpctl *) s->ptcl;
+       w = tcb->window - qlen(s->rq);
+       if (w < 0)
+               w = 0;
+       tcb->rcv.wnd = w;
+       if (w == 0)
+               tcb->rcv.blocked = 1;
+}
 
 
-  wnd_inflation = tcp_update_rcv_ann_wnd(pcb);
+void tcpacktimer(void *v)
+{
+       ERRSTACK(1);
+       Tcpctl *tcb;
+       struct conv *s;
 
 
-  /* If the change in the right edge of window is significant (default
-   * watermark is TCP_WND/4), then send an explicit update now.
-   * Otherwise wait for a packet to be sent in the normal course of
-   * events (or more window to be available later) */
-  if (wnd_inflation >= TCP_WND_UPDATE_THRESHOLD) {
-    tcp_ack_now(pcb);
-    //XXX: tcp_output(pcb);
-  }
+       s = v;
+       tcb = (Tcpctl *) s->ptcl;
 
 
-  printk("tcp_recved: received %d  bytes, wnd %d (%d).\n",
-         len, pcb->rcv_wnd, TCP_WND - pcb->rcv_wnd);
+       qlock(&s->qlock);
+       if (waserror()) {
+               qunlock(&s->qlock);
+               nexterror();
+       }
+       if (tcb->state != Closed) {
+               tcb->flags |= FORCE;
+               tcprcvwin(s);
+               tcpoutput(s);
+       }
+       qunlock(&s->qlock);
+       poperror();
 }
 
 }
 
-static void wrap_restart_kthread(struct trapframe *tf, uint32_t srcid,
-                                       long a0, long a1, long a2){
-       restart_kthread((struct kthread*) a0);
+static void tcpcreate(struct conv *c)
+{
+       c->rq = qopen(QMAX, Qcoalesce, tcpacktimer, c);
+       c->wq = qopen(8 * QMAX, Qkick, tcpkick, c);
 }
 
 }
 
-/**
- * Default receive callback that is called if the user didn't register
- * a recv callback for the pcb.
- */
-error_t tcp_recv_null(void *arg, struct tcp_pcb *pcb, struct pbuf *p, error_t err) {
-       if (pcb == NULL || pcb->pcbsock == NULL) {
-               pbuf_free(p);
-               return -1;
+static void timerstate(struct tcppriv *priv, Tcptimer * t, int newstate)
+{
+       if (newstate != TcptimerON) {
+               if (t->state == TcptimerON) {
+                       // unchain
+                       if (priv->timers == t) {
+                               priv->timers = t->next;
+                               if (t->prev != NULL)
+                                       panic("timerstate1");
+                       }
+                       if (t->next)
+                               t->next->prev = t->prev;
+                       if (t->prev)
+                               t->prev->next = t->next;
+                       t->next = t->prev = NULL;
+               }
+       } else {
+               if (t->state != TcptimerON) {
+                       // chain
+                       if (t->prev != NULL || t->next != NULL)
+                               panic("timerstate2");
+                       t->prev = NULL;
+                       t->next = priv->timers;
+                       if (t->next)
+                               t->next->prev = t;
+                       priv->timers = t;
+               }
        }
        }
-  if (p != NULL && pcb != NULL) {
-               // notify that we have recved and increase the recv window
-               // attach it to socket
-               struct socket *sock = pcb->pcbsock;
-               // TODO: attach_pbuf needs to return stuff that can not fit in the buffer right now
-               attach_pbuf(p, &sock->recv_buff);
-               struct kthread *kthread;
-               /* First notify any blocking recv calls,
-                * then notify anyone who might be waiting in a select
-                */ 
-               // multiple people might be waiting on the socket here..
-               kthread = __up_sem(&(sock->sem), FALSE);
-               if (kthread) {
-                        send_kernel_message(core_id(), (amr_t)wrap_restart_kthread, (long)kthread, 0, 0,
-                                                                                                 KMSG_ROUTINE);
-               } else {
-                       // wake up all waiters
-                       struct semaphore_entry *sentry, *sentry_tmp;
-                       spin_lock(&sock->waiter_lock);
-                 LIST_FOREACH_SAFE(sentry, &(sock->waiters), link, sentry_tmp){
-                               //should only wake up one waiter
-                               kthread = __up_sem(&sentry->sem, true);
-                               if (kthread){
-                               send_kernel_message(core_id(), (amr_t)wrap_restart_kthread, (long)kthread, 0, 0,
-                                                                                                 KMSG_ROUTINE);
+       t->state = newstate;
+}
+
+void tcpackproc(void *a)
+{
+       ERRSTACK(1);
+       Tcptimer *t, *tp, *timeo;
+       struct Proto *tcp;
+       struct tcppriv *priv;
+       int loop;
+
+       tcp = a;
+       priv = tcp->priv;
+
+       for (;;) {
+               kthread_usleep(MSPTICK * 1000);
+
+               qlock(&priv->tl);
+               timeo = NULL;
+               loop = 0;
+               for (t = priv->timers; t != NULL; t = tp) {
+                       if (loop++ > 10000)
+                               panic("tcpackproc1");
+                       tp = t->next;
+                       if (t->state == TcptimerON) {
+                               t->count--;
+                               if (t->count == 0) {
+                                       timerstate(priv, t, TcptimerDONE);
+                                       t->readynext = timeo;
+                                       timeo = t;
                                }
                                }
-                               LIST_REMOVE(sentry, link);
-                               /* do not need to free since all the sentry are stack-based vars */
                        }
                        }
-                       spin_unlock(&sock->waiter_lock);
                }
                }
+               qunlock(&priv->tl);
+
+               loop = 0;
+               for (t = timeo; t != NULL; t = t->readynext) {
+                       if (loop++ > 10000)
+                               panic("tcpackproc2");
+                       if (t->state == TcptimerDONE && t->func != NULL) {
+                               /* discard error style */
+                               if (!waserror())
+                                       (*t->func) (t->arg);
+                               poperror();
+                       }
+               }
+
+               limborexmit(tcp);
        }
        }
-       printk ("received total length tcp %d\n", p->tot_len);
-       tcp_recved(pcb, p->tot_len);
-       // decref
-       pbuf_free(p);
-  return ESUCCESS;
 }
 
 }
 
+void tcpgo(struct tcppriv *priv, Tcptimer * t)
+{
+       if (t == NULL || t->start == 0)
+               return;
 
 
-/**
- * Creates a new TCP protocol control block but doesn't place it on
- * any of the TCP PCB lists.
- * The pcb is not put on any list until binding using tcp_bind().
- *
- * @internal: Maybe there should be a idle TCP PCB list where these
- * PCBs are put on. Port reservation using tcp_bind() is implemented but
- * allocated pcbs that are not bound can't be killed automatically if wanting
- * to allocate a pcb with higher prio (@see tcp_kill_prio())
- *
- * @return a new tcp_pcb that initially is in state CLOSED
- */
-struct tcp_pcb* tcp_new(void) {
-  return tcp_alloc(TCP_PRIO_NORMAL);
+       qlock(&priv->tl);
+       t->count = t->start;
+       timerstate(priv, t, TcptimerON);
+       qunlock(&priv->tl);
 }
 
 }
 
-/**
- * Calculates a new initial sequence number for new connections.
- * TODO: Consider use a secure pseduo ISN
- *
- * @return uint32_t pseudo random sequence number
- */
-uint32_t tcp_next_iss(void)
+void tcphalt(struct tcppriv *priv, Tcptimer * t)
 {
 {
-  static uint32_t iss = 6510;
-  
-  iss += tcp_ticks;       /* XXX */
-  return iss;
+       if (t == NULL)
+               return;
+
+       qlock(&priv->tl);
+       timerstate(priv, t, TcptimerOFF);
+       qunlock(&priv->tl);
 }
 
 }
 
-/**
- * Allocate a new tcp_pcb structure.
- *
- * @param prio priority for the new pcb
- * @return a new tcp_pcb that initially is in state CLOSED
- */
-struct tcp_pcb* tcp_alloc(uint8_t prio) {
-  struct tcp_pcb *pcb;
-  uint32_t iss;
-  pcb = kmem_cache_alloc(tcp_pcb_kcache, 0);
-  if (pcb == NULL) {
-               /* Try killing oldest connection in TIME-WAIT. */
-               printd("tcp_alloc: killing off oldest TIME-WAIT connection\n");
-               tcp_kill_timewait();
-               /* Try to allocate a tcp_pcb again. */
-               pcb = (struct tcp_pcb *)kmem_cache_alloc(tcp_pcb_kcache, 0);
-               if (pcb == NULL) {
-                       /* Try killing active connections with lower priority than the new one. */
-                       printd("tcp_alloc: killing connection with prio lower than %d\n", prio);
-                       tcp_kill_prio(prio);
-                       /* Try to allocate a tcp_pcb again. */
-                       pcb = (struct tcp_pcb *)kmem_cache_alloc(tcp_pcb_kcache, 0);
+int backoff(int n)
+{
+       return 1 << n;
+}
+
+void localclose(struct conv *s, char *reason)
+{      /* called with tcb locked */
+       Tcpctl *tcb;
+       Reseq *rp, *rp1;
+       struct tcppriv *tpriv;
+
+       tpriv = s->p->priv;
+       tcb = (Tcpctl *) s->ptcl;
+
+       iphtrem(&tpriv->ht, s);
+
+       tcphalt(tpriv, &tcb->timer);
+       tcphalt(tpriv, &tcb->rtt_timer);
+       tcphalt(tpriv, &tcb->acktimer);
+       tcphalt(tpriv, &tcb->katimer);
+
+       /* Flush reassembly queue; nothing more can arrive */
+       for (rp = tcb->reseq; rp != NULL; rp = rp1) {
+               rp1 = rp->next;
+               freeblist(rp->bp);
+               kfree(rp);
+       }
+       tcb->reseq = NULL;
+
+       if (tcb->state == Syn_sent)
+               Fsconnected(s, reason);
+
+       qhangup(s->rq, reason);
+       qhangup(s->wq, reason);
+
+       tcpsetstate(s, Closed);
+
+       /* listener will check the rq state */
+       if (s->state == Announced)
+               rendez_wakeup(&s->listenr);
+}
+
+/* mtu (- TCP + IP hdr len) of 1st hop */
+int tcpmtu(struct Proto *tcp, uint8_t * addr, int version, int *scale,
+          uint8_t *flags)
+{
+       struct Ipifc *ifc;
+       int mtu;
+
+       ifc = findipifc(tcp->f, addr, 0);
+       switch (version) {
+               default:
+               case V4:
+                       mtu = DEF_MSS;
+                       if (ifc != NULL)
+                               mtu = ifc->maxtu - ifc->m->hsize - (TCP4_PKT + TCP4_HDRSIZE);
+                       break;
+               case V6:
+                       mtu = DEF_MSS6;
+                       if (ifc != NULL)
+                               mtu = ifc->maxtu - ifc->m->hsize - (TCP6_PKT + TCP6_HDRSIZE);
+                       break;
+       }
+       *flags &= ~TSO;
+
+       if (ifc != NULL) {
+               if (ifc->mbps > 100)
+                       *scale = HaveWS | 3;
+               else if (ifc->mbps > 10)
+                       *scale = HaveWS | 1;
+               else
+                       *scale = HaveWS | 0;
+               if (ifc->feat & NETF_TSO)
+                       *flags |= TSO;
+       } else
+               *scale = HaveWS | 0;
+
+       return mtu;
+}
+
+void inittcpctl(struct conv *s, int mode)
+{
+       Tcpctl *tcb;
+       Tcp4hdr *h4;
+       Tcp6hdr *h6;
+       int mss;
+
+       tcb = (Tcpctl *) s->ptcl;
+
+       memset(tcb, 0, sizeof(Tcpctl));
+
+       tcb->ssthresh = 65535;
+       tcb->srtt = tcp_irtt << LOGAGAIN;
+       tcb->mdev = 0;
+
+       /* setup timers */
+       tcb->timer.start = tcp_irtt / MSPTICK;
+       tcb->timer.func = tcptimeout;
+       tcb->timer.arg = s;
+       tcb->rtt_timer.start = MAX_TIME;
+       tcb->acktimer.start = TCP_ACK / MSPTICK;
+       tcb->acktimer.func = tcpacktimer;
+       tcb->acktimer.arg = s;
+       tcb->katimer.start = DEF_KAT / MSPTICK;
+       tcb->katimer.func = tcpkeepalive;
+       tcb->katimer.arg = s;
+
+       mss = DEF_MSS;
+
+       /* create a prototype(pseudo) header */
+       if (mode != TCP_LISTEN) {
+               if (ipcmp(s->laddr, IPnoaddr) == 0)
+                       findlocalip(s->p->f, s->laddr, s->raddr);
+
+               switch (s->ipversion) {
+                       case V4:
+                               h4 = &tcb->protohdr.tcp4hdr;
+                               memset(h4, 0, sizeof(*h4));
+                               h4->proto = IP_TCPPROTO;
+                               hnputs(h4->tcpsport, s->lport);
+                               hnputs(h4->tcpdport, s->rport);
+                               v6tov4(h4->tcpsrc, s->laddr);
+                               v6tov4(h4->tcpdst, s->raddr);
+                               break;
+                       case V6:
+                               h6 = &tcb->protohdr.tcp6hdr;
+                               memset(h6, 0, sizeof(*h6));
+                               h6->proto = IP_TCPPROTO;
+                               hnputs(h6->tcpsport, s->lport);
+                               hnputs(h6->tcpdport, s->rport);
+                               ipmove(h6->tcpsrc, s->laddr);
+                               ipmove(h6->tcpdst, s->raddr);
+                               mss = DEF_MSS6;
+                               break;
+                       default:
+                               panic("inittcpctl: version %d", s->ipversion);
                }
        }
                }
        }
-  if (pcb != NULL) {
-    memset(pcb, 0, sizeof(struct tcp_pcb));
-    pcb->prio = prio;
-    pcb->snd_buf = TCP_SND_BUF;
-    pcb->snd_queuelen = 0;
-    pcb->rcv_wnd = TCP_WND;
-    pcb->rcv_ann_wnd = TCP_WND;
-    pcb->tos = 0;
-    pcb->ttl = TCP_TTL;
-    /* As initial send MSS, we use TCP_MSS but limit it to 536.
-       The send MSS is updated when an MSS option is received. */
-    pcb->mss = (TCP_MSS > 536) ? 536 : TCP_MSS;
-    pcb->rto = 3000 / TCP_SLOW_INTERVAL;
-    pcb->sa = 0;
-    pcb->sv = 3000 / TCP_SLOW_INTERVAL;
-    pcb->rtime = -1;
-    pcb->cwnd = 1;
-    iss = tcp_next_iss();
-    pcb->snd_wl2 = iss;
-    pcb->snd_nxt = iss;
-    pcb->lastack = iss;
-    pcb->snd_lbb = iss;   
-    pcb->tmr = tcp_ticks;
-
-    pcb->polltmr = 0;
-
-/* Basically we need to use the callback api because then we can switch
- * handlers based on the state that the pcb is in. 
+
+       tcb->mss = tcb->cwind = mss;
+
+       /* default is no window scaling */
+       tcb->window = QMAX;
+       tcb->rcv.wnd = QMAX;
+       tcb->rcv.scale = 0;
+       tcb->snd.scale = 0;
+       qsetlimit(s->rq, QMAX);
+}
+
+/*
+ *  called with s qlocked
  */
  */
+void tcpstart(struct conv *s, int mode)
+{
+       Tcpctl *tcb;
+       struct tcppriv *tpriv;
+       /* tcpackproc needs to free this if it ever exits */
+       char *kpname = kmalloc(KNAMELEN, KMALLOC_WAIT);
+
+       tpriv = s->p->priv;
+
+       if (tpriv->ackprocstarted == 0) {
+               qlock(&tpriv->apl);
+               if (tpriv->ackprocstarted == 0) {
+                       snprintf(kpname, KNAMELEN, "#I%dtcpack", s->p->f->dev);
+                       ktask(kpname, tcpackproc, s->p);
+                       tpriv->ackprocstarted = 1;
+               }
+               qunlock(&tpriv->apl);
+       }
+
+       tcb = (Tcpctl *) s->ptcl;
+
+       inittcpctl(s, mode);
+
+       iphtadd(&tpriv->ht, s);
+       switch (mode) {
+               case TCP_LISTEN:
+                       tpriv->stats[PassiveOpens]++;
+                       tcb->flags |= CLONE;
+                       tcpsetstate(s, Listen);
+                       break;
+
+               case TCP_CONNECT:
+                       tpriv->stats[ActiveOpens]++;
+                       tcb->flags |= ACTIVE;
+                       tcpsndsyn(s, tcb);
+                       tcpsetstate(s, Syn_sent);
+                       tcpoutput(s);
+                       break;
+       }
+}
 
 
-    pcb->recv = tcp_recv_null;
-    
-    /* Init KEEPALIVE timer */
-    pcb->keep_idle  = TCP_KEEPIDLE_DEFAULT;
-    
-#if LWIP_TCP_KEEPALIVE
-    pcb->keep_intvl = TCP_KEEPINTVL_DEFAULT;
-    pcb->keep_cnt   = TCP_KEEPCNT_DEFAULT;
-#endif /* LWIP_TCP_KEEPALIVE */
-
-    pcb->keep_cnt_sent = 0;
-  }
-  return pcb;
+static char *tcpflag(uint16_t flag)
+{
+       static char buf[128];
+
+       snprintf(buf, sizeof(buf), "%d", flag >> 10);   /* Head len */
+       if (flag & URG)
+               snprintf(buf, sizeof(buf), "%s%s", buf, " URG");
+       if (flag & ACK)
+               snprintf(buf, sizeof(buf), "%s%s", buf, " ACK");
+       if (flag & PSH)
+               snprintf(buf, sizeof(buf), "%s%s", buf, " PSH");
+       if (flag & RST)
+               snprintf(buf, sizeof(buf), "%s%s", buf, " RST");
+       if (flag & SYN)
+               snprintf(buf, sizeof(buf), "%s%s", buf, " SYN");
+       if (flag & FIN)
+               snprintf(buf, sizeof(buf), "%s%s", buf, " FIN");
+
+       return buf;
 }
 
 }
 
-/**
- * A nastly hack featuring 'goto' statements that allocates a
- * new TCP local port.
- *
- * @return a new (free) local TCP port number
+struct block *htontcp6(Tcp * tcph, struct block *data, Tcp6hdr * ph,
+                                          Tcpctl * tcb)
+{
+       int dlen;
+       Tcp6hdr *h;
+       uint16_t csum;
+       uint16_t hdrlen, optpad = 0;
+       uint8_t *opt;
+
+       hdrlen = TCP6_HDRSIZE;
+       if (tcph->flags & SYN) {
+               if (tcph->mss)
+                       hdrlen += MSS_LENGTH;
+               if (tcph->ws)
+                       hdrlen += WS_LENGTH;
+               optpad = hdrlen & 3;
+               if (optpad)
+                       optpad = 4 - optpad;
+               hdrlen += optpad;
+       }
+
+       if (data) {
+               dlen = blocklen(data);
+               data = padblock(data, hdrlen + TCP6_PKT);
+               if (data == NULL)
+                       return NULL;
+       } else {
+               dlen = 0;
+               data = allocb(hdrlen + TCP6_PKT + 64);  /* the 64 pad is to meet mintu's */
+               if (data == NULL)
+                       return NULL;
+               data->wp += hdrlen + TCP6_PKT;
+       }
+
+       /* copy in pseudo ip header plus port numbers */
+       h = (Tcp6hdr *) (data->rp);
+       memmove(h, ph, TCP6_TCBPHDRSZ);
+
+       /* compose pseudo tcp header, do cksum calculation */
+       hnputl(h->vcf, hdrlen + dlen);
+       h->ploadlen[0] = h->ploadlen[1] = h->proto = 0;
+       h->ttl = ph->proto;
+
+       /* copy in variable bits */
+       hnputl(h->tcpseq, tcph->seq);
+       hnputl(h->tcpack, tcph->ack);
+       hnputs(h->tcpflag, (hdrlen << 10) | tcph->flags);
+       hnputs(h->tcpwin, tcph->wnd >> (tcb != NULL ? tcb->snd.scale : 0));
+       hnputs(h->tcpurg, tcph->urg);
+
+       if (tcph->flags & SYN) {
+               opt = h->tcpopt;
+               if (tcph->mss != 0) {
+                       *opt++ = MSSOPT;
+                       *opt++ = MSS_LENGTH;
+                       hnputs(opt, tcph->mss);
+                       opt += 2;
+               }
+               if (tcph->ws != 0) {
+                       *opt++ = WSOPT;
+                       *opt++ = WS_LENGTH;
+                       *opt++ = tcph->ws;
+               }
+               while (optpad-- > 0)
+                       *opt++ = NOOPOPT;
+       }
+
+       if (tcb != NULL && tcb->nochecksum) {
+               h->tcpcksum[0] = h->tcpcksum[1] = 0;
+       } else {
+               csum = ptclcsum(data, TCP6_IPLEN, hdrlen + dlen + TCP6_PHDRSIZE);
+               hnputs(h->tcpcksum, csum);
+       }
+
+       /* move from pseudo header back to normal ip header */
+       memset(h->vcf, 0, 4);
+       h->vcf[0] = IP_VER6;
+       hnputs(h->ploadlen, hdrlen + dlen);
+       h->proto = ph->proto;
+
+       return data;
+}
+
+struct block *htontcp4(Tcp * tcph, struct block *data, Tcp4hdr * ph,
+                                          Tcpctl * tcb)
+{
+       int dlen;
+       Tcp4hdr *h;
+       uint16_t csum;
+       uint16_t hdrlen, optpad = 0;
+       uint8_t *opt;
+
+       hdrlen = TCP4_HDRSIZE;
+       if (tcph->flags & SYN) {
+               if (tcph->mss)
+                       hdrlen += MSS_LENGTH;
+               if (tcph->ws)
+                       hdrlen += WS_LENGTH;
+               optpad = hdrlen & 3;
+               if (optpad)
+                       optpad = 4 - optpad;
+               hdrlen += optpad;
+       }
+
+       if (data) {
+               dlen = blocklen(data);
+               data = padblock(data, hdrlen + TCP4_PKT);
+               if (data == NULL)
+                       return NULL;
+       } else {
+               dlen = 0;
+               data = allocb(hdrlen + TCP4_PKT + 64);  /* the 64 pad is to meet mintu's */
+               if (data == NULL)
+                       return NULL;
+               data->wp += hdrlen + TCP4_PKT;
+       }
+
+       /* copy in pseudo ip header plus port numbers */
+       h = (Tcp4hdr *) (data->rp);
+       memmove(h, ph, TCP4_TCBPHDRSZ);
+
+       /* copy in variable bits */
+       hnputs(h->tcplen, hdrlen + dlen);
+       hnputl(h->tcpseq, tcph->seq);
+       hnputl(h->tcpack, tcph->ack);
+       hnputs(h->tcpflag, (hdrlen << 10) | tcph->flags);
+       hnputs(h->tcpwin, tcph->wnd >> (tcb != NULL ? tcb->snd.scale : 0));
+       hnputs(h->tcpurg, tcph->urg);
+
+       if (tcph->flags & SYN) {
+               opt = h->tcpopt;
+               if (tcph->mss != 0) {
+                       *opt++ = MSSOPT;
+                       *opt++ = MSS_LENGTH;
+                       hnputs(opt, tcph->mss);
+                       opt += 2;
+               }
+               if (tcph->ws != 0) {
+                       *opt++ = WSOPT;
+                       *opt++ = WS_LENGTH;
+                       *opt++ = tcph->ws;
+               }
+               while (optpad-- > 0)
+                       *opt++ = NOOPOPT;
+       }
+
+       if (tcb != NULL && tcb->nochecksum) {
+               h->tcpcksum[0] = h->tcpcksum[1] = 0;
+       } else {
+               csum = ~ptclcsum(data, TCP4_IPLEN, TCP4_PHDRSIZE);
+               hnputs(h->tcpcksum, csum);
+               data->checksum_start = TCP4_IPLEN + TCP4_PHDRSIZE;
+               data->checksum_offset = ph->tcpcksum - ph->tcpsport;
+               data->flag |= Btcpck;
+       }
+
+       return data;
+}
+
+int ntohtcp6(Tcp * tcph, struct block **bpp)
+{
+       Tcp6hdr *h;
+       uint8_t *optr;
+       uint16_t hdrlen;
+       uint16_t optlen;
+       int n;
+
+       *bpp = pullupblock(*bpp, TCP6_PKT + TCP6_HDRSIZE);
+       if (*bpp == NULL)
+               return -1;
+
+       h = (Tcp6hdr *) ((*bpp)->rp);
+       tcph->source = nhgets(h->tcpsport);
+       tcph->dest = nhgets(h->tcpdport);
+       tcph->seq = nhgetl(h->tcpseq);
+       tcph->ack = nhgetl(h->tcpack);
+       hdrlen = (h->tcpflag[0] >> 2) & ~3;
+       if (hdrlen < TCP6_HDRSIZE) {
+               freeblist(*bpp);
+               return -1;
+       }
+
+       tcph->flags = h->tcpflag[1];
+       tcph->wnd = nhgets(h->tcpwin);
+       tcph->urg = nhgets(h->tcpurg);
+       tcph->mss = 0;
+       tcph->ws = 0;
+       tcph->len = nhgets(h->ploadlen) - hdrlen;
+
+       *bpp = pullupblock(*bpp, hdrlen + TCP6_PKT);
+       if (*bpp == NULL)
+               return -1;
+
+       optr = h->tcpopt;
+       n = hdrlen - TCP6_HDRSIZE;
+       while (n > 0 && *optr != EOLOPT) {
+               if (*optr == NOOPOPT) {
+                       n--;
+                       optr++;
+                       continue;
+               }
+               optlen = optr[1];
+               if (optlen < 2 || optlen > n)
+                       break;
+               switch (*optr) {
+                       case MSSOPT:
+                               if (optlen == MSS_LENGTH)
+                                       tcph->mss = nhgets(optr + 2);
+                               break;
+                       case WSOPT:
+                               if (optlen == WS_LENGTH && *(optr + 2) <= 14)
+                                       tcph->ws = HaveWS | *(optr + 2);
+                               break;
+               }
+               n -= optlen;
+               optr += optlen;
+       }
+       return hdrlen;
+}
+
+int ntohtcp4(Tcp * tcph, struct block **bpp)
+{
+       Tcp4hdr *h;
+       uint8_t *optr;
+       uint16_t hdrlen;
+       uint16_t optlen;
+       int n;
+
+       *bpp = pullupblock(*bpp, TCP4_PKT + TCP4_HDRSIZE);
+       if (*bpp == NULL)
+               return -1;
+
+       h = (Tcp4hdr *) ((*bpp)->rp);
+       tcph->source = nhgets(h->tcpsport);
+       tcph->dest = nhgets(h->tcpdport);
+       tcph->seq = nhgetl(h->tcpseq);
+       tcph->ack = nhgetl(h->tcpack);
+
+       hdrlen = (h->tcpflag[0] >> 2) & ~3;
+       if (hdrlen < TCP4_HDRSIZE) {
+               freeblist(*bpp);
+               return -1;
+       }
+
+       tcph->flags = h->tcpflag[1];
+       tcph->wnd = nhgets(h->tcpwin);
+       tcph->urg = nhgets(h->tcpurg);
+       tcph->mss = 0;
+       tcph->ws = 0;
+       tcph->len = nhgets(h->length) - (hdrlen + TCP4_PKT);
+
+       *bpp = pullupblock(*bpp, hdrlen + TCP4_PKT);
+       if (*bpp == NULL)
+               return -1;
+
+       optr = h->tcpopt;
+       n = hdrlen - TCP4_HDRSIZE;
+       while (n > 0 && *optr != EOLOPT) {
+               if (*optr == NOOPOPT) {
+                       n--;
+                       optr++;
+                       continue;
+               }
+               optlen = optr[1];
+               if (optlen < 2 || optlen > n)
+                       break;
+               switch (*optr) {
+                       case MSSOPT:
+                               if (optlen == MSS_LENGTH)
+                                       tcph->mss = nhgets(optr + 2);
+                               break;
+                       case WSOPT:
+                               if (optlen == WS_LENGTH && *(optr + 2) <= 14)
+                                       tcph->ws = HaveWS | *(optr + 2);
+                               break;
+               }
+               n -= optlen;
+               optr += optlen;
+       }
+       return hdrlen;
+}
+
+/*
+ *  For outgiing calls, generate an initial sequence
+ *  number and put a SYN on the send queue
  */
  */
-static uint16_t tcp_new_port(void) {
-  int i;
-  struct tcp_pcb *pcb;
-  static uint16_t port = TCP_LOCAL_PORT_RANGE_START;
-  
- again:
-  if (++port > TCP_LOCAL_PORT_RANGE_END) {
-    port = TCP_LOCAL_PORT_RANGE_START;
-  }
-  /* Check all PCB lists. */
-  for (i = 0; i < NUM_TCP_PCB_LISTS; i++) {  
-    for(pcb = *tcp_pcb_lists[i]; pcb != NULL; pcb = pcb->next) {
-      if (pcb->local_port == port) {
-        goto again;
-      }
-    }
-  }
-  return port;
+void tcpsndsyn(struct conv *s, Tcpctl * tcb)
+{
+       urandom_read(&tcb->iss, sizeof(tcb->iss));
+       tcb->rttseq = tcb->iss;
+       tcb->snd.wl2 = tcb->iss;
+       tcb->snd.una = tcb->iss;
+       tcb->snd.ptr = tcb->rttseq;
+       tcb->snd.nxt = tcb->rttseq;
+       tcb->flgcnt++;
+       tcb->flags |= FORCE;
+       tcb->sndsyntime = NOW;
+
+       /* set desired mss and scale */
+       tcb->mss = tcpmtu(s->p, s->laddr, s->ipversion, &tcb->scale,
+                         &tcb->flags);
 }
 
 }
 
+void
+sndrst(struct Proto *tcp, uint8_t * source, uint8_t * dest,
+          uint16_t length, Tcp * seg, uint8_t version, char *reason)
+{
+       struct block *hbp;
+       uint8_t rflags;
+       struct tcppriv *tpriv;
+       Tcp4hdr ph4;
+       Tcp6hdr ph6;
+
+       netlog(tcp->f, Logtcp, "sndrst: %s\n", reason);
+
+       tpriv = tcp->priv;
+
+       if (seg->flags & RST)
+               return;
+
+       /* make pseudo header */
+       switch (version) {
+               case V4:
+                       memset(&ph4, 0, sizeof(ph4));
+                       ph4.vihl = IP_VER4;
+                       v6tov4(ph4.tcpsrc, dest);
+                       v6tov4(ph4.tcpdst, source);
+                       ph4.proto = IP_TCPPROTO;
+                       hnputs(ph4.tcplen, TCP4_HDRSIZE);
+                       hnputs(ph4.tcpsport, seg->dest);
+                       hnputs(ph4.tcpdport, seg->source);
+                       break;
+               case V6:
+                       memset(&ph6, 0, sizeof(ph6));
+                       ph6.vcf[0] = IP_VER6;
+                       ipmove(ph6.tcpsrc, dest);
+                       ipmove(ph6.tcpdst, source);
+                       ph6.proto = IP_TCPPROTO;
+                       hnputs(ph6.ploadlen, TCP6_HDRSIZE);
+                       hnputs(ph6.tcpsport, seg->dest);
+                       hnputs(ph6.tcpdport, seg->source);
+                       break;
+               default:
+                       panic("sndrst: version %d", version);
+       }
+
+       tpriv->stats[OutRsts]++;
+       rflags = RST;
+
+       /* convince the other end that this reset is in band */
+       if (seg->flags & ACK) {
+               seg->seq = seg->ack;
+               seg->ack = 0;
+       } else {
+               rflags |= ACK;
+               seg->ack = seg->seq;
+               seg->seq = 0;
+               if (seg->flags & SYN)
+                       seg->ack++;
+               seg->ack += length;
+               if (seg->flags & FIN)
+                       seg->ack++;
+       }
+       seg->flags = rflags;
+       seg->wnd = 0;
+       seg->urg = 0;
+       seg->mss = 0;
+       seg->ws = 0;
+       switch (version) {
+               case V4:
+                       hbp = htontcp4(seg, NULL, &ph4, NULL);
+                       if (hbp == NULL)
+                               return;
+                       ipoput4(tcp->f, hbp, 0, MAXTTL, DFLTTOS, NULL);
+                       break;
+               case V6:
+                       hbp = htontcp6(seg, NULL, &ph6, NULL);
+                       if (hbp == NULL)
+                               return;
+                       ipoput6(tcp->f, hbp, 0, MAXTTL, DFLTTOS, NULL);
+                       break;
+               default:
+                       panic("sndrst2: version %d", version);
+       }
+}
 
 
-/**
- * Binds the connection to a local portnumber and IP address. If the
- * IP address is not given (i.e., ipaddr == NULL), the IP address of
- * the outgoing network interface is used instead.
- *
- * @param pcb the tcp_pcb to bind (no check is done whether this pcb is
- *        already bound!)
- * @param ipaddr the local ip address to bind to (use IP_ADDR_ANY to bind
- *        to any local address
- * @param port the local port to bind to
- * @return ERR_USE if the port is already in use
- *         ESUCCESS if bound
+/*
+ *  send a reset to the remote side and close the conversation
+ *  called with s qlocked
  */
  */
-error_t tcp_bind(struct tcp_pcb *pcb, const struct in_addr *ipaddr, uint16_t port) {
-  int i;
-  int max_pcb_list = NUM_TCP_PCB_LISTS;
-  struct tcp_pcb *cpcb;
-
-  LWIP_ERROR("tcp_bind: can only bind in state CLOSED", pcb->state == CLOSED, return -EISCONN);
-
-#if SO_REUSE
-  /* Unless the REUSEADDR flag is set,
-     we have to check the pcbs in TIME-WAIT state, also.
-     We do not dump TIME_WAIT pcb's; they can still be matched by incoming
-     packets using both local and remote IP addresses and ports to distinguish.
-   */
-  if ((pcb->so_options & SO_REUSEADDR) != 0) {
-    max_pcb_list = NUM_TCP_PCB_LISTS_NO_TIME_WAIT;
-  }
-#endif /* SO_REUSE */
-
-  if (port == 0) {
-    port = tcp_new_port();
-  }
-
-  /* Check if the address already is in use (on all lists) */
-  for (i = 0; i < max_pcb_list; i++) {
-    for(cpcb = *tcp_pcb_lists[i]; cpcb != NULL; cpcb = cpcb->next) {
-      if (cpcb->local_port == port) {
-#if SO_REUSE
-        /* Omit checking for the same port if both pcbs have REUSEADDR set.
-           For SO_REUSEADDR, the duplicate-check for a 5-tuple is done in
-           tcp_connect. */
-        if (((pcb->so_options & SO_REUSEADDR) == 0) ||
-          ((cpcb->so_options & SO_REUSEADDR) == 0))
-#endif /* SO_REUSE */
-        {
-          if (ip_addr_isany(&(cpcb->local_ip)) ||
-              ip_addr_isany(ipaddr) ||
-              ip_addr_cmp(&(cpcb->local_ip), ipaddr)) {
-            return EADDRINUSE;
-          }
-        }
-      }
-    }
-  }
-
-  if (!ip_addr_isany(ipaddr)) {
-    pcb->local_ip = *ipaddr;
-  }
-  pcb->local_port = port;
-  TCP_REG(&tcp_bound_pcbs, pcb);
-  LWIP_DEBUGF(TCP_DEBUG, ("tcp_bind: bind to port %"U16_F"\n", port));
-  return 0;
+char *tcphangup(struct conv *s)
+{
+       ERRSTACK(2);
+       Tcp seg;
+       Tcpctl *tcb;
+       struct block *hbp;
+
+       tcb = (Tcpctl *) s->ptcl;
+       if (waserror()) {
+               poperror();
+               return commonerror();
+       }
+       if (ipcmp(s->raddr, IPnoaddr)) {
+               /* discard error style, poperror regardless */
+               if (!waserror()) {
+                       seg.flags = RST | ACK;
+                       seg.ack = tcb->rcv.nxt;
+                       tcb->rcv.una = 0;
+                       seg.seq = tcb->snd.ptr;
+                       seg.wnd = 0;
+                       seg.urg = 0;
+                       seg.mss = 0;
+                       seg.ws = 0;
+                       switch (s->ipversion) {
+                               case V4:
+                                       tcb->protohdr.tcp4hdr.vihl = IP_VER4;
+                                       hbp = htontcp4(&seg, NULL, &tcb->protohdr.tcp4hdr, tcb);
+                                       ipoput4(s->p->f, hbp, 0, s->ttl, s->tos, s);
+                                       break;
+                               case V6:
+                                       tcb->protohdr.tcp6hdr.vcf[0] = IP_VER6;
+                                       hbp = htontcp6(&seg, NULL, &tcb->protohdr.tcp6hdr, tcb);
+                                       ipoput6(s->p->f, hbp, 0, s->ttl, s->tos, s);
+                                       break;
+                               default:
+                                       panic("tcphangup: version %d", s->ipversion);
+                       }
+               }
+               poperror();
+       }
+       localclose(s, NULL);
+       poperror();
+       return NULL;
 }
 
 }
 
-/**
- * Is called every TCP_FAST_INTERVAL (250 ms) and process data previously
- * "refused" by upper layer (application) and sends delayed ACKs.
- *
- * Automatically called from tcp_tmr().
+/*
+ *  (re)send a SYN ACK
  */
  */
-void tcp_fasttmr(void) {
-  struct tcp_pcb *pcb = tcp_active_pcbs;
-
-  while(pcb != NULL) {
-    struct tcp_pcb *next = pcb->next;
-    /* If there is data which was previously "refused" by upper layer */
-    if (pcb->refused_data != NULL) {
-      /* Notify again application with data previously received. */
-      error_t err;
-      LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_fasttmr: notify kept packet\n"));
-      TCP_EVENT_RECV(pcb, pcb->refused_data, ESUCCESS, err);
-      if (err == ESUCCESS) {
-        pcb->refused_data = NULL;
-      } else if (err == ECONNABORTED) {
-        /* if err == ECONNABORTED, 'pcb' is already deallocated */
-        pcb = NULL;
-      }
-    }
-
-    /* send delayed ACKs */
-    if (pcb && (pcb->flags & TF_ACK_DELAY)) {
-      printd("tcp_fasttmr: delayed ACK\n");
-      tcp_ack_now(pcb);
-      // XXX: tcp_output(pcb);
-      pcb->flags &= ~(TF_ACK_DELAY | TF_ACK_NOW);
-    }
-
-    pcb = next;
-  }
+int sndsynack(struct Proto *tcp, Limbo * lp)
+{
+       struct block *hbp;
+       Tcp4hdr ph4;
+       Tcp6hdr ph6;
+       Tcp seg;
+       int scale;
+       uint8_t flag = 0;
+
+       /* make pseudo header */
+       switch (lp->version) {
+               case V4:
+                       memset(&ph4, 0, sizeof(ph4));
+                       ph4.vihl = IP_VER4;
+                       v6tov4(ph4.tcpsrc, lp->laddr);
+                       v6tov4(ph4.tcpdst, lp->raddr);
+                       ph4.proto = IP_TCPPROTO;
+                       hnputs(ph4.tcplen, TCP4_HDRSIZE);
+                       hnputs(ph4.tcpsport, lp->lport);
+                       hnputs(ph4.tcpdport, lp->rport);
+                       break;
+               case V6:
+                       memset(&ph6, 0, sizeof(ph6));
+                       ph6.vcf[0] = IP_VER6;
+                       ipmove(ph6.tcpsrc, lp->laddr);
+                       ipmove(ph6.tcpdst, lp->raddr);
+                       ph6.proto = IP_TCPPROTO;
+                       hnputs(ph6.ploadlen, TCP6_HDRSIZE);
+                       hnputs(ph6.tcpsport, lp->lport);
+                       hnputs(ph6.tcpdport, lp->rport);
+                       break;
+               default:
+                       panic("sndrst: version %d", lp->version);
+       }
+
+       seg.seq = lp->iss;
+       seg.ack = lp->irs + 1;
+       seg.flags = SYN | ACK;
+       seg.urg = 0;
+       seg.mss = tcpmtu(tcp, lp->laddr, lp->version, &scale, &flag);
+       seg.wnd = QMAX;
+
+       /* if the other side set scale, we should too */
+       if (lp->rcvscale) {
+               seg.ws = scale;
+               lp->sndscale = scale;
+       } else {
+               seg.ws = 0;
+               lp->sndscale = 0;
+       }
+
+       switch (lp->version) {
+               case V4:
+                       hbp = htontcp4(&seg, NULL, &ph4, NULL);
+                       if (hbp == NULL)
+                               return -1;
+                       ipoput4(tcp->f, hbp, 0, MAXTTL, DFLTTOS, NULL);
+                       break;
+               case V6:
+                       hbp = htontcp6(&seg, NULL, &ph6, NULL);
+                       if (hbp == NULL)
+                               return -1;
+                       ipoput6(tcp->f, hbp, 0, MAXTTL, DFLTTOS, NULL);
+                       break;
+               default:
+                       panic("sndsnack: version %d", lp->version);
+       }
+       lp->lastsend = NOW;
+       return 0;
 }
 
 }
 
-/**
- * Called periodically to dispatch TCP timers.
+#define hashipa(a, p) ( ( (a)[IPaddrlen-2] + (a)[IPaddrlen-1] + p )&LHTMASK )
+
+/*
+ *  put a call into limbo and respond with a SYN ACK
  *
  *
+ *  called with proto locked
  */
  */
-void tcp_tmr(void) {
-       /* Call tcp_fasttmr() every 250 ms */
-  tcp_fasttmr();
-
-  if (++tcp_timer & 1) {
-    /* Call tcp_tmr() every 500 ms, i.e., every other timer
-       tcp_tmr() is called. */
-    tcp_slowtmr();
-  }
+static void
+limbo(struct conv *s, uint8_t * source, uint8_t * dest, Tcp * seg, int version)
+{
+       Limbo *lp, **l;
+       struct tcppriv *tpriv;
+       int h;
+
+       tpriv = s->p->priv;
+       h = hashipa(source, seg->source);
+
+       for (l = &tpriv->lht[h]; *l != NULL; l = &lp->next) {
+               lp = *l;
+               if (lp->lport != seg->dest || lp->rport != seg->source
+                       || lp->version != version)
+                       continue;
+               if (ipcmp(lp->raddr, source) != 0)
+                       continue;
+               if (ipcmp(lp->laddr, dest) != 0)
+                       continue;
+
+               /* each new SYN restarts the retransmits */
+               lp->irs = seg->seq;
+               break;
+       }
+       lp = *l;
+       if (lp == NULL) {
+               if (tpriv->nlimbo >= Maxlimbo && tpriv->lht[h]) {
+                       lp = tpriv->lht[h];
+                       tpriv->lht[h] = lp->next;
+                       lp->next = NULL;
+               } else {
+                       lp = kzmalloc(sizeof(*lp), 0);
+                       if (lp == NULL)
+                               return;
+                       tpriv->nlimbo++;
+               }
+               *l = lp;
+               lp->version = version;
+               ipmove(lp->laddr, dest);
+               ipmove(lp->raddr, source);
+               lp->lport = seg->dest;
+               lp->rport = seg->source;
+               lp->mss = seg->mss;
+               lp->rcvscale = seg->ws;
+               lp->irs = seg->seq;
+               urandom_read(&lp->iss, sizeof(lp->iss));
+       }
+
+       if (sndsynack(s->p, lp) < 0) {
+               *l = lp->next;
+               tpriv->nlimbo--;
+               kfree(lp);
+       }
 }
 
 }
 
-/**
- * Closes the TX side of a connection held by the PCB.
- * For tcp_close(), a RST is sent if the application didn't receive all data
- * (tcp_recved() not called for all data passed to recv callback).
- *
- * Listening pcbs are freed and may not be referenced any more.
- * Connection pcbs are freed if not yet connected and may not be referenced
- * any more. If a connection is established (at least SYN received or in
- * a closing state), the connection is closed, and put in a closing state.
- * The pcb is then automatically freed in tcp_slowtmr(). It is therefore
- * unsafe to reference it.
- *
- * @param pcb the tcp_pcb to close
- * @return ESUCCESS if connection has been closed
- *         another error_t if closing failed and pcb is not freed
+/*
+ *  resend SYN ACK's once every SYNACK_RXTIMER ms.
  */
  */
-static error_t
-tcp_close_shutdown(struct tcp_pcb *pcb, uint8_t rst_on_unacked_data)
+static void limborexmit(struct Proto *tcp)
 {
 {
-  error_t err;
-
-  if (rst_on_unacked_data && (pcb->state != LISTEN)) {
-    if ((pcb->refused_data != NULL) || (pcb->rcv_wnd != TCP_WND)) {
-      /* Not all data received by application, send RST to tell the remote
-         side about this. */
-      LWIP_ASSERT("pcb->flags & TF_RXCLOSED", pcb->flags & TF_RXCLOSED);
-
-      /* don't call tcp_abort here: we must not deallocate the pcb since
-         that might not be expected when calling tcp_close */
-      tcp_rst(pcb->snd_nxt, pcb->rcv_nxt, &pcb->local_ip, &pcb->remote_ip,
-        pcb->local_port, pcb->remote_port);
-
-      tcp_pcb_purge(pcb);
-
-      /* TODO: to which state do we move now? */
-
-      /* move to TIME_WAIT since we close actively */
-      TCP_RMV(&tcp_active_pcbs, pcb);
-      pcb->state = TIME_WAIT;
-      TCP_REG(&tcp_tw_pcbs, pcb);
-
-      return ESUCCESS;
-    }
-  }
-
-  switch (pcb->state) {
-  case CLOSED:
-    /* Closing a pcb in the CLOSED state might seem erroneous,
-     * however, it is in this state once allocated and as yet unused
-     * and the user needs some way to free it should the need arise.
-     * Calling tcp_close() with a pcb that has already been closed, (i.e. twice)
-     * or for a pcb that has been used and then entered the CLOSED state 
-     * is erroneous, but this should never happen as the pcb has in those cases
-     * been freed, and so any remaining handles are bogus. */
-    err = ESUCCESS;
-    TCP_RMV(&tcp_bound_pcbs, pcb);
-               kmem_cache_free(tcp_pcb_kcache, (void*)pcb);
-    pcb = NULL;
-    break;
-  case LISTEN:
-    err = ESUCCESS;
-    tcp_pcb_remove(&tcp_listen_pcbs.pcbs, pcb);
-               kmem_cache_free(tcp_pcb_kcache, (void*)pcb);
-    pcb = NULL;
-    break;
-  case SYN_SENT:
-    err = ESUCCESS;
-    tcp_pcb_remove(&tcp_active_pcbs, pcb);
-               kmem_cache_free(tcp_pcb_kcache, (void*)pcb);
-    pcb = NULL;
-    break;
-  case SYN_RCVD:
-    err = tcp_send_fin(pcb);
-    if (err == ESUCCESS) {
-      pcb->state = FIN_WAIT_1;
-    }
-    break;
-  case ESTABLISHED:
-    err = tcp_send_fin(pcb);
-    if (err == ESUCCESS) {
-      pcb->state = FIN_WAIT_1;
-    }
-    break;
-  case CLOSE_WAIT:
-    err = tcp_send_fin(pcb);
-    if (err == ESUCCESS) {
-      pcb->state = LAST_ACK;
-    }
-    break;
-  default:
-    /* Has already been closed, do nothing. */
-    err = ESUCCESS;
-    pcb = NULL;
-    break;
-  }
-
-  if (pcb != NULL && err == ESUCCESS) {
-    /* To ensure all data has been sent when tcp_close returns, we have
-       to make sure tcp_output doesn't fail.
-       Since we don't really have to ensure all data has been sent when tcp_close
-       returns (unsent data is sent from tcp timer functions, also), we don't care
-       for the return value of tcp_output for now. */
-    /* @todo: When implementing SO_LINGER, this must be changed somehow:
-       If SOF_LINGER is set, the data should be sent and acked before close returns.
-       This can only be valid for sequential APIs, not for the raw API. */
-    tcp_output(pcb);
-  }
-  return err;
+       struct tcppriv *tpriv;
+       Limbo **l, *lp;
+       int h;
+       int seen;
+       uint64_t now;
+
+       tpriv = tcp->priv;
+
+       if (!canqlock(&tcp->qlock))
+               return;
+       seen = 0;
+       now = NOW;
+       for (h = 0; h < NLHT && seen < tpriv->nlimbo; h++) {
+               for (l = &tpriv->lht[h]; *l != NULL && seen < tpriv->nlimbo;) {
+                       lp = *l;
+                       seen++;
+                       if (now - lp->lastsend < (lp->rexmits + 1) * SYNACK_RXTIMER)
+                               continue;
+
+                       /* time it out after 1 second */
+                       if (++(lp->rexmits) > 5) {
+                               tpriv->nlimbo--;
+                               *l = lp->next;
+                               kfree(lp);
+                               continue;
+                       }
+
+                       /* if we're being attacked, don't bother resending SYN ACK's */
+                       if (tpriv->nlimbo > 100)
+                               continue;
+
+                       if (sndsynack(tcp, lp) < 0) {
+                               tpriv->nlimbo--;
+                               *l = lp->next;
+                               kfree(lp);
+                               continue;
+                       }
+
+                       l = &lp->next;
+               }
+       }
+       qunlock(&tcp->qlock);
 }
 
 }
 
-/**
- * Closes the connection held by the PCB.
- *
- * Listening pcbs are freed and may not be referenced any more.
- * Connection pcbs are freed if not yet connected and may not be referenced
- * any more. If a connection is established (at least SYN received or in
- * a closing state), the connection is closed, and put in a closing state.
- * The pcb is then automatically freed in tcp_slowtmr(). It is therefore
- * unsafe to reference it (unless an error is returned).
+/*
+ *  lookup call in limbo.  if found, throw it out.
  *
  *
- * @param pcb the tcp_pcb to close
- * @return ESUCCESS if connection has been closed
- *         another error_t if closing failed and pcb is not freed
+ *  called with proto locked
  */
  */
-error_t
-tcp_close(struct tcp_pcb *pcb)
+static void
+limborst(struct conv *s, Tcp * segp, uint8_t * src, uint8_t * dst,
+                uint8_t version)
 {
 {
-#if TCP_DEBUG
-  LWIP_DEBUGF(TCP_DEBUG, ("tcp_close: closing in "));
-  tcp_debug_print_state(pcb->state);
-#endif /* TCP_DEBUG */
-
-  if (pcb->state != LISTEN) {
-    /* Set a flag not to receive any more data... */
-    pcb->flags |= TF_RXCLOSED;
-  }
-  /* ... and close */
-  return tcp_close_shutdown(pcb, 1);
+       Limbo *lp, **l;
+       int h;
+       struct tcppriv *tpriv;
+
+       tpriv = s->p->priv;
+
+       /* find a call in limbo */
+       h = hashipa(src, segp->source);
+       for (l = &tpriv->lht[h]; *l != NULL; l = &lp->next) {
+               lp = *l;
+               if (lp->lport != segp->dest || lp->rport != segp->source
+                       || lp->version != version)
+                       continue;
+               if (ipcmp(lp->laddr, dst) != 0)
+                       continue;
+               if (ipcmp(lp->raddr, src) != 0)
+                       continue;
+
+               /* RST can only follow the SYN */
+               if (segp->seq == lp->irs + 1) {
+                       tpriv->nlimbo--;
+                       *l = lp->next;
+                       kfree(lp);
+               }
+               break;
+       }
 }
 
 }
 
-/**
- * Causes all or part of a full-duplex connection of this PCB to be shut down.
- * This doesn't deallocate the PCB!
+/*
+ *  come here when we finally get an ACK to our SYN-ACK.
+ *  lookup call in limbo.  if found, create a new conversation
  *
  *
- * @param pcb PCB to shutdown
- * @param shut_rx shut down receive side if this is != 0
- * @param shut_tx shut down send side if this is != 0
- * @return ESUCCESS if shutdown succeeded (or the PCB has already been shut down)
- *         another error_t on error.
+ *  called with proto locked
  */
  */
-error_t
-tcp_shutdown(struct tcp_pcb *pcb, int shut_rx, int shut_tx)
+static struct conv *tcpincoming(struct conv *s, Tcp * segp, uint8_t * src,
+                                                               uint8_t * dst, uint8_t version)
 {
 {
-  if (pcb->state == LISTEN) {
-    return ENOTCONN;
-  }
-  if (shut_rx) {
-    /* shut down the receive side: free buffered data... */
-    if (pcb->refused_data != NULL) {
-      pbuf_free(pcb->refused_data);
-      pcb->refused_data = NULL;
-    }
-    /* ... and set a flag not to receive any more data */
-    pcb->flags |= TF_RXCLOSED;
-  }
-  if (shut_tx) {
-    /* This can't happen twice since if it succeeds, the pcb's state is changed.
-       Only close in these states as the others directly deallocate the PCB */
-    switch (pcb->state) {
-  case SYN_RCVD:
-  case ESTABLISHED:
-  case CLOSE_WAIT:
-    return tcp_close_shutdown(pcb, 0);
-  default:
-    /* don't shut down other states */
-    break;
-    }
-  }
-  /* @todo: return another error_t if not in correct state or already shut? */
-  return ESUCCESS;
+       struct conv *new;
+       Tcpctl *tcb;
+       struct tcppriv *tpriv;
+       Tcp4hdr *h4;
+       Tcp6hdr *h6;
+       Limbo *lp, **l;
+       int h;
+
+       /* unless it's just an ack, it can't be someone coming out of limbo */
+       if ((segp->flags & SYN) || (segp->flags & ACK) == 0)
+               return NULL;
+
+       tpriv = s->p->priv;
+
+       /* find a call in limbo */
+       h = hashipa(src, segp->source);
+       for (l = &tpriv->lht[h]; (lp = *l) != NULL; l = &lp->next) {
+               netlog(s->p->f, Logtcp,
+                          "tcpincoming s %I!%d/%I!%d d %I!%d/%I!%d v %d/%d\n", src,
+                          segp->source, lp->raddr, lp->rport, dst, segp->dest, lp->laddr,
+                          lp->lport, version, lp->version);
+
+               if (lp->lport != segp->dest || lp->rport != segp->source
+                       || lp->version != version)
+                       continue;
+               if (ipcmp(lp->laddr, dst) != 0)
+                       continue;
+               if (ipcmp(lp->raddr, src) != 0)
+                       continue;
+
+               /* we're assuming no data with the initial SYN */
+               if (segp->seq != lp->irs + 1 || segp->ack != lp->iss + 1) {
+                       netlog(s->p->f, Logtcp, "tcpincoming s 0x%lx/0x%lx a 0x%lx 0x%lx\n",
+                                  segp->seq, lp->irs + 1, segp->ack, lp->iss + 1);
+                       lp = NULL;
+               } else {
+                       tpriv->nlimbo--;
+                       *l = lp->next;
+               }
+               break;
+       }
+       if (lp == NULL)
+               return NULL;
+
+       new = Fsnewcall(s, src, segp->source, dst, segp->dest, version);
+       if (new == NULL)
+               return NULL;
+
+       memmove(new->ptcl, s->ptcl, sizeof(Tcpctl));
+       tcb = (Tcpctl *) new->ptcl;
+       tcb->flags &= ~CLONE;
+       tcb->timer.arg = new;
+       tcb->timer.state = TcptimerOFF;
+       tcb->acktimer.arg = new;
+       tcb->acktimer.state = TcptimerOFF;
+       tcb->katimer.arg = new;
+       tcb->katimer.state = TcptimerOFF;
+       tcb->rtt_timer.arg = new;
+       tcb->rtt_timer.state = TcptimerOFF;
+
+       tcb->irs = lp->irs;
+       tcb->rcv.nxt = tcb->irs + 1;
+       tcb->rcv.urg = tcb->rcv.nxt;
+
+       tcb->iss = lp->iss;
+       tcb->rttseq = tcb->iss;
+       tcb->snd.wl2 = tcb->iss;
+       tcb->snd.una = tcb->iss + 1;
+       tcb->snd.ptr = tcb->iss + 1;
+       tcb->snd.nxt = tcb->iss + 1;
+       tcb->flgcnt = 0;
+       tcb->flags |= SYNACK;
+
+       /* our sending max segment size cannot be bigger than what he asked for */
+       if (lp->mss != 0 && lp->mss < tcb->mss)
+               tcb->mss = lp->mss;
+
+       /* window scaling */
+       tcpsetscale(new, tcb, lp->rcvscale, lp->sndscale);
+
+       /* the congestion window always starts out as a single segment */
+       tcb->snd.wnd = segp->wnd;
+       tcb->cwind = tcb->mss;
+
+       /* set initial round trip time */
+       tcb->sndsyntime = lp->lastsend + lp->rexmits * SYNACK_RXTIMER;
+       tcpsynackrtt(new);
+
+       kfree(lp);
+
+       /* set up proto header */
+       switch (version) {
+               case V4:
+                       h4 = &tcb->protohdr.tcp4hdr;
+                       memset(h4, 0, sizeof(*h4));
+                       h4->proto = IP_TCPPROTO;
+                       hnputs(h4->tcpsport, new->lport);
+                       hnputs(h4->tcpdport, new->rport);
+                       v6tov4(h4->tcpsrc, dst);
+                       v6tov4(h4->tcpdst, src);
+                       break;
+               case V6:
+                       h6 = &tcb->protohdr.tcp6hdr;
+                       memset(h6, 0, sizeof(*h6));
+                       h6->proto = IP_TCPPROTO;
+                       hnputs(h6->tcpsport, new->lport);
+                       hnputs(h6->tcpdport, new->rport);
+                       ipmove(h6->tcpsrc, dst);
+                       ipmove(h6->tcpdst, src);
+                       break;
+               default:
+                       panic("tcpincoming: version %d", new->ipversion);
+       }
+
+       tcpsetstate(new, Established);
+
+       iphtadd(&tpriv->ht, new);
+
+       return new;
 }
 
 }
 
-/**
- * Default accept callback if no accept callback is specified by the user.
- */
-static error_t
-tcp_accept_null(void *arg, struct tcp_pcb *pcb, error_t err)
+int seq_within(uint32_t x, uint32_t low, uint32_t high)
 {
 {
-       //XXX: IMPLEMENT ACCEPT
+       if (low <= high) {
+               if (low <= x && x <= high)
+                       return 1;
+       } else {
+               if (x >= low || x <= high)
+                       return 1;
+       }
+       return 0;
+}
 
 
-  return ECONNABORTED;
+int seq_lt(uint32_t x, uint32_t y)
+{
+       return (int)(x - y) < 0;
 }
 
 }
 
-/**
- * Set the state of the connection to be LISTEN, which means that it
- * is able to accept incoming connections. The protocol control block
- * is reallocated in order to consume less memory. Setting the
- * connection to LISTEN is an irreversible process.
- *
- * @param pcb the original tcp_pcb
- * @param backlog the incoming connections queue limit
- * @return tcp_pcb used for listening, consumes less memory.
- *
- * @note The original tcp_pcb is freed. This function therefore has to be
- *       called like this:
- *             tpcb = tcp_listen(tpcb);
- */
-struct tcp_pcb *
-tcp_listen_with_backlog(struct tcp_pcb *pcb, uint8_t backlog)
+int seq_le(uint32_t x, uint32_t y)
 {
 {
-  struct tcp_pcb_listen *lpcb;
-
-  LWIP_ERROR("tcp_listen: pcb already connected", pcb->state == CLOSED, return NULL);
-
-  /* already listening? */
-  if (pcb->state == LISTEN) {
-    return pcb;
-  }
-#if SO_REUSE
-  if ((pcb->so_options & SO_REUSEADDR) != 0) {
-    /* Since SO_REUSEADDR allows reusing a local address before the pcb's usage
-       is declared (listen-/connection-pcb), we have to make sure now that
-       this port is only used once for every local IP. */
-    for(lpcb = tcp_listen_pcbs.listen_pcbs; lpcb != NULL; lpcb = lpcb->next) {
-      if (lpcb->local_port == pcb->local_port) {
-        if (ip_addr_cmp(&lpcb->local_ip, &pcb->local_ip)) {
-          /* this address/port is already used */
-          return NULL;
-        }
-      }
-    }
-  }
-#endif /* SO_REUSE */
-       lpcb = kmem_cache_alloc(tcp_pcb_listen_kcache, 0);
-  if (lpcb == NULL) {
-    return NULL;
-  }
-  lpcb->callback_arg = pcb->callback_arg;
-  lpcb->local_port = pcb->local_port;
-  lpcb->state = LISTEN;
-  lpcb->prio = pcb->prio;
-  lpcb->so_options = pcb->so_options;
-  lpcb->so_options |= SO_ACCEPTCONN;
-  lpcb->ttl = pcb->ttl;
-  lpcb->tos = pcb->tos;
-  ip_addr_copy(lpcb->local_ip, pcb->local_ip);
-  TCP_RMV(&tcp_bound_pcbs, pcb);
-       kmem_cache_free(tcp_pcb_kcache, (void*)pcb);
-#if LWIP_CALLBACK_API
-  lpcb->accept = tcp_accept_null;
-#endif /* LWIP_CALLBACK_API */
-#if TCP_LISTEN_BACKLOG
-  lpcb->accepts_pending = 0;
-  lpcb->backlog = (backlog ? backlog : 1);
-#endif /* TCP_LISTEN_BACKLOG */
-  TCP_REG(&tcp_listen_pcbs.pcbs, (struct tcp_pcb *)lpcb);
-  return (struct tcp_pcb *)lpcb;
+       return (int)(x - y) <= 0;
 }
 
 }
 
+int seq_gt(uint32_t x, uint32_t y)
+{
+       return (int)(x - y) > 0;
+}
 
 
-/**
- * Connects to another host. The function given as the "connected"
- * argument will be called when the connection has been established.
- *
- * @param pcb the tcp_pcb used to establish the connection
- * @param ipaddr the remote ip address to connect to
- * @param port the remote tcp port to connect to
- * @param connected callback function to call when connected (or on error)
- * @return ERR_VAL if invalid arguments are given
- *         ESUCCESS if connect request has been sent
- *         other error_t values if connect request couldn't be sent
- */
-error_t
-tcp_connect(struct tcp_pcb *pcb, ip_addr_t *ipaddr, uint16_t port,
-      tcp_connected_fn connected)
+int seq_ge(uint32_t x, uint32_t y)
 {
 {
-  error_t ret;
-  uint32_t iss;
-
-  LWIP_ERROR("tcp_connect: can only connected from state CLOSED", pcb->state == CLOSED, return EISCONN);
-
-  LWIP_DEBUGF(TCP_DEBUG, ("tcp_connect to port %"U16_F"\n", port));
-  if (ipaddr != NULL) {
-    pcb->remote_ip = *ipaddr;
-  } else {
-    return ENETUNREACH;
-  }
-  pcb->remote_port = port;
-
-  /* check if we have a route to the remote host */
-  if (ip_addr_isany(&(pcb->local_ip))) {
-               // assume we have a route anywhere..
-
-    /* no local IP address set, yet. */
-    // struct netif *netif = ip_route(&(pcb->remote_ip));
-    /* Use the netif's IP address as local address. */
-               pcb->local_ip = LOCAL_IP_ADDR;
-  }
-
-  if (pcb->local_port == 0) {
-    pcb->local_port = tcp_new_port();
-  }
-#if SO_REUSE
-  if ((pcb->so_options & SO_REUSEADDR) != 0) {
-    /* Since SO_REUSEADDR allows reusing a local address, we have to make sure
-       now that the 5-tuple is unique. */
-    struct tcp_pcb *cpcb;
-    int i;
-    /* Don't check listen- and bound-PCBs, check active- and TIME-WAIT PCBs. */
-    for (i = 2; i < NUM_TCP_PCB_LISTS; i++) {
-      for(cpcb = *tcp_pcb_lists[i]; cpcb != NULL; cpcb = cpcb->next) {
-        if ((cpcb->local_port == pcb->local_port) &&
-            (cpcb->remote_port == port) &&
-            ip_addr_cmp(&cpcb->local_ip, &pcb->local_ip) &&
-            ip_addr_cmp(&cpcb->remote_ip, ipaddr)) {
-          /* linux returns EISCONN here, but ERR_USE should be OK for us */
-          return ERR_USE;
-        }
-      }
-    }
-  }
-#endif /* SO_REUSE */
-  iss = tcp_next_iss();
-  pcb->rcv_nxt = 0;
-  pcb->snd_nxt = iss;
-  pcb->lastack = iss - 1;
-  pcb->snd_lbb = iss - 1;
-  pcb->rcv_wnd = TCP_WND;
-  pcb->rcv_ann_wnd = TCP_WND;
-  pcb->rcv_ann_right_edge = pcb->rcv_nxt;
-  pcb->snd_wnd = TCP_WND;
-  /* As initial send MSS, we use TCP_MSS but limit it to 536.
-     The send MSS is updated when an MSS option is received. */
-  pcb->mss = (TCP_MSS > 536) ? 536 : TCP_MSS;
-#if TCP_CALCULATE_EFF_SEND_MSS 
-  pcb->mss = tcp_eff_send_mss(pcb->mss, ipaddr);
-#endif /* TCP_CALCULATE_EFF_SEND_MSS */
-  pcb->cwnd = 1;
-  pcb->ssthresh = pcb->mss * 10;
-#if LWIP_CALLBACK_API
-  pcb->connected = connected;
-#else /* LWIP_CALLBACK_API */  
-#endif /* LWIP_CALLBACK_API */
-
-  /* Send a SYN together with the MSS option. */
-  ret = tcp_enqueue_flags(pcb, TCP_SYN);
-  if (ret == ESUCCESS) {
-    /* SYN segment was enqueued, changed the pcbs state now */
-    pcb->state = SYN_SENT;
-    TCP_RMV(&tcp_bound_pcbs, pcb);
-    TCP_REG(&tcp_active_pcbs, pcb);
-    //snmp_inc_tcpactiveopens();
-
-    tcp_output(pcb);
-  }
-  return ret;
+       return (int)(x - y) >= 0;
 }
 
 }
 
-/**
- * Called every 500 ms and implements the retransmission timer and the timer that
- * removes PCBs that have been in TIME-WAIT for enough time. It also increments
- * various timers such as the inactivity timer in each PCB.
- *
- * Automatically called from tcp_tmr().
+/*
+ *  use the time between the first SYN and it's ack as the
+ *  initial round trip time
  */
  */
-void
-tcp_slowtmr(void)
+void tcpsynackrtt(struct conv *s)
 {
 {
-  struct tcp_pcb *pcb, *prev;
-  uint16_t eff_wnd;
-  uint8_t pcb_remove;      /* flag if a PCB should be removed */
-  uint8_t pcb_reset;       /* flag if a RST should be sent when removing */
-  error_t err;
-
-  err = ESUCCESS;
-
-  ++tcp_ticks;
-
-  /* Steps through all of the active PCBs. */
-  prev = NULL;
-  pcb = tcp_active_pcbs;
-  if (pcb == NULL) {
-    LWIP_DEBUGF(TCP_DEBUG, ("tcp_slowtmr: no active pcbs\n"));
-  }
-  while (pcb != NULL) {
-    LWIP_DEBUGF(TCP_DEBUG, ("tcp_slowtmr: processing active pcb\n"));
-    LWIP_ASSERT("tcp_slowtmr: active pcb->state != CLOSED\n", pcb->state != CLOSED);
-    LWIP_ASSERT("tcp_slowtmr: active pcb->state != LISTEN\n", pcb->state != LISTEN);
-    LWIP_ASSERT("tcp_slowtmr: active pcb->state != TIME-WAIT\n", pcb->state != TIME_WAIT);
-
-    pcb_remove = 0;
-    pcb_reset = 0;
-
-    if (pcb->state == SYN_SENT && pcb->nrtx == TCP_SYNMAXRTX) {
-      ++pcb_remove;
-      LWIP_DEBUGF(TCP_DEBUG, ("tcp_slowtmr: max SYN retries reached\n"));
-    }
-    else if (pcb->nrtx == TCP_MAXRTX) {
-      ++pcb_remove;
-      LWIP_DEBUGF(TCP_DEBUG, ("tcp_slowtmr: max DATA retries reached\n"));
-    } else {
-      if (pcb->persist_backoff > 0) {
-        /* If snd_wnd is zero, use persist timer to send 1 byte probes
-         * instead of using the standard retransmission mechanism. */
-        pcb->persist_cnt++;
-        if (pcb->persist_cnt >= tcp_persist_backoff[pcb->persist_backoff-1]) {
-          pcb->persist_cnt = 0;
-          if (pcb->persist_backoff < sizeof(tcp_persist_backoff)) {
-            pcb->persist_backoff++;
-          }
-          tcp_zero_window_probe(pcb);
-        }
-      } else {
-        /* Increase the retransmission timer if it is running */
-        if(pcb->rtime >= 0)
-          ++pcb->rtime;
-
-        if (pcb->unacked != NULL && pcb->rtime >= pcb->rto) {
-          /* Time for a retransmission. */
-          LWIP_DEBUGF(TCP_RTO_DEBUG, ("tcp_slowtmr: rtime %"S16_F
-                                      " pcb->rto %"S16_F"\n",
-                                      pcb->rtime, pcb->rto));
-
-          /* Double retransmission time-out unless we are trying to
-           * connect to somebody (i.e., we are in SYN_SENT). */
-          if (pcb->state != SYN_SENT) {
-            pcb->rto = ((pcb->sa >> 3) + pcb->sv) << tcp_backoff[pcb->nrtx];
-          }
-
-          /* Reset the retransmission timer. */
-          pcb->rtime = 0;
-
-          /* Reduce congestion window and ssthresh. */
-          eff_wnd = MIN(pcb->cwnd, pcb->snd_wnd);
-          pcb->ssthresh = eff_wnd >> 1;
-          if (pcb->ssthresh < (pcb->mss << 1)) {
-            pcb->ssthresh = (pcb->mss << 1);
-          }
-          pcb->cwnd = pcb->mss;
-          LWIP_DEBUGF(TCP_CWND_DEBUG, ("tcp_slowtmr: cwnd %"U16_F
-                                       " ssthresh %"U16_F"\n",
-                                       pcb->cwnd, pcb->ssthresh));
-          /* The following needs to be called AFTER cwnd is set to one
-             mss - STJ */
-          tcp_rexmit_rto(pcb);
-        }
-      }
-    }
-    /* Check if this PCB has stayed too long in FIN-WAIT-2 */
-    if (pcb->state == FIN_WAIT_2) {
-      if ((uint32_t)(tcp_ticks - pcb->tmr) >
-          TCP_FIN_WAIT_TIMEOUT / TCP_SLOW_INTERVAL) {
-        ++pcb_remove;
-        LWIP_DEBUGF(TCP_DEBUG, ("tcp_slowtmr: removing pcb stuck in FIN-WAIT-2\n"));
-      }
-    }
-
-    /* Check if KEEPALIVE should be sent */
-    if((pcb->so_options & SO_KEEPALIVE) &&
-       ((pcb->state == ESTABLISHED) ||
-        (pcb->state == CLOSE_WAIT))) {
-#if LWIP_TCP_KEEPALIVE
-      if((uint32_t)(tcp_ticks - pcb->tmr) >
-         (pcb->keep_idle + (pcb->keep_cnt*pcb->keep_intvl))
-         / TCP_SLOW_INTERVAL)
-#else      
-      if((uint32_t)(tcp_ticks - pcb->tmr) >
-         (pcb->keep_idle + TCP_MAXIDLE) / TCP_SLOW_INTERVAL)
-#endif /* LWIP_TCP_KEEPALIVE */
-      {
-        LWIP_DEBUGF(TCP_DEBUG, ("tcp_slowtmr: KEEPALIVE timeout. Aborting connection to %"U16_F".%"U16_F".%"U16_F".%"U16_F".\n",
-                                ip4_addr1_16(&pcb->remote_ip), ip4_addr2_16(&pcb->remote_ip),
-                                ip4_addr3_16(&pcb->remote_ip), ip4_addr4_16(&pcb->remote_ip)));
-        
-        ++pcb_remove;
-        ++pcb_reset;
-      }
-#if LWIP_TCP_KEEPALIVE
-      else if((uint32_t)(tcp_ticks - pcb->tmr) > 
-              (pcb->keep_idle + pcb->keep_cnt_sent * pcb->keep_intvl)
-              / TCP_SLOW_INTERVAL)
-#else
-      else if((uint32_t)(tcp_ticks - pcb->tmr) > 
-              (pcb->keep_idle + pcb->keep_cnt_sent * TCP_KEEPINTVL_DEFAULT) 
-              / TCP_SLOW_INTERVAL)
-#endif /* LWIP_TCP_KEEPALIVE */
-      {
-        tcp_keepalive(pcb);
-        pcb->keep_cnt_sent++;
-      }
-    }
-
-    /* If this PCB has queued out of sequence data, but has been
-       inactive for too long, will drop the data (it will eventually
-       be retransmitted). */
-#if TCP_QUEUE_OOSEQ
-    if (pcb->ooseq != NULL &&
-        (uint32_t)tcp_ticks - pcb->tmr >= pcb->rto * TCP_OOSEQ_TIMEOUT) {
-      tcp_segs_free(pcb->ooseq);
-      pcb->ooseq = NULL;
-      LWIP_DEBUGF(TCP_CWND_DEBUG, ("tcp_slowtmr: dropping OOSEQ queued data\n"));
-    }
-#endif /* TCP_QUEUE_OOSEQ */
-
-    /* Check if this PCB has stayed too long in SYN-RCVD */
-    if (pcb->state == SYN_RCVD) {
-      if ((uint32_t)(tcp_ticks - pcb->tmr) >
-          TCP_SYN_RCVD_TIMEOUT / TCP_SLOW_INTERVAL) {
-        ++pcb_remove;
-        LWIP_DEBUGF(TCP_DEBUG, ("tcp_slowtmr: removing pcb stuck in SYN-RCVD\n"));
-      }
-    }
-
-    /* Check if this PCB has stayed too long in LAST-ACK */
-    if (pcb->state == LAST_ACK) {
-      if ((uint32_t)(tcp_ticks - pcb->tmr) > 2 * TCP_MSL / TCP_SLOW_INTERVAL) {
-        ++pcb_remove;
-        LWIP_DEBUGF(TCP_DEBUG, ("tcp_slowtmr: removing pcb stuck in LAST-ACK\n"));
-      }
-    }
-
-    /* If the PCB should be removed, do it. */
-    if (pcb_remove) {
-      struct tcp_pcb *pcb2;
-      tcp_pcb_purge(pcb);
-      /* Remove PCB from tcp_active_pcbs list. */
-      if (prev != NULL) {
-        LWIP_ASSERT("tcp_slowtmr: middle tcp != tcp_active_pcbs", pcb != tcp_active_pcbs);
-        prev->next = pcb->next;
-      } else {
-        /* This PCB was the first. */
-        LWIP_ASSERT("tcp_slowtmr: first pcb == tcp_active_pcbs", tcp_active_pcbs == pcb);
-        tcp_active_pcbs = pcb->next;
-      }
-
-      TCP_EVENT_ERR(pcb->errf, pcb->callback_arg, ECONNABORTED);
-      if (pcb_reset) {
-        tcp_rst(pcb->snd_nxt, pcb->rcv_nxt, &pcb->local_ip, &pcb->remote_ip,
-          pcb->local_port, pcb->remote_port);
-      }
-
-      pcb2 = pcb;
-      pcb = pcb->next;
-                       kmem_cache_free(tcp_pcb_kcache, (void*)pcb2);
-    } else {
-      /* get the 'next' element now and work with 'prev' below (in case of abort) */
-      prev = pcb;
-      pcb = pcb->next;
-
-      /* We check if we should poll the connection. */
-      ++prev->polltmr;
-      if (prev->polltmr >= prev->pollinterval) {
-        prev->polltmr = 0;
-        LWIP_DEBUGF(TCP_DEBUG, ("tcp_slowtmr: polling application\n"));
-        TCP_EVENT_POLL(prev, err);
-        /* if err == ECONNABORTED, 'prev' is already deallocated */
-        if (err == ESUCCESS) {
-          tcp_output(prev);
-        }
-      }
-    }
-  }
-
-  
-  /* Steps through all of the TIME-WAIT PCBs. */
-  prev = NULL;
-  pcb = tcp_tw_pcbs;
-  while (pcb != NULL) {
-    LWIP_ASSERT("tcp_slowtmr: TIME-WAIT pcb->state == TIME-WAIT", pcb->state == TIME_WAIT);
-    pcb_remove = 0;
-
-    /* Check if this PCB has stayed long enough in TIME-WAIT */
-    if ((uint32_t)(tcp_ticks - pcb->tmr) > 2 * TCP_MSL / TCP_SLOW_INTERVAL) {
-      ++pcb_remove;
-    }
-    
-
-
-    /* If the PCB should be removed, do it. */
-    if (pcb_remove) {
-      struct tcp_pcb *pcb2;
-      tcp_pcb_purge(pcb);
-      /* Remove PCB from tcp_tw_pcbs list. */
-      if (prev != NULL) {
-        LWIP_ASSERT("tcp_slowtmr: middle tcp != tcp_tw_pcbs", pcb != tcp_tw_pcbs);
-        prev->next = pcb->next;
-      } else {
-        /* This PCB was the first. */
-        LWIP_ASSERT("tcp_slowtmr: first pcb == tcp_tw_pcbs", tcp_tw_pcbs == pcb);
-        tcp_tw_pcbs = pcb->next;
-      }
-      pcb2 = pcb;
-      pcb = pcb->next;
-                       kmem_cache_free(tcp_pcb_kcache, (void*)pcb2);
-    } else {
-      prev = pcb;
-      pcb = pcb->next;
-    }
-  }
+       Tcpctl *tcb;
+       uint64_t delta;
+       struct tcppriv *tpriv;
+
+       tcb = (Tcpctl *) s->ptcl;
+       tpriv = s->p->priv;
+
+       delta = NOW - tcb->sndsyntime;
+       tcb->srtt = delta << LOGAGAIN;
+       tcb->mdev = delta << LOGDGAIN;
+
+       /* halt round trip timer */
+       tcphalt(tpriv, &tcb->rtt_timer);
 }
 
 }
 
+void update(struct conv *s, Tcp * seg)
+{
+       int rtt, delta;
+       Tcpctl *tcb;
+       uint32_t acked;
+       uint32_t expand;
+       struct tcppriv *tpriv;
+
+       tpriv = s->p->priv;
+       tcb = (Tcpctl *) s->ptcl;
+
+       /* if everything has been acked, force output(?) */
+       if (seq_gt(seg->ack, tcb->snd.nxt)) {
+               tcb->flags |= FORCE;
+               return;
+       }
 
 
-/**
- * Deallocates a list of TCP segments (tcp_seg structures).
- *
- * @param seg tcp_seg list of TCP segments to free
+       /* added by Dong Lin for fast retransmission */
+       if (seg->ack == tcb->snd.una
+               && tcb->snd.una != tcb->snd.nxt
+               && seg->len == 0 && seg->wnd == tcb->snd.wnd) {
+
+               /* this is a pure ack w/o window update */
+               netlog(s->p->f, Logtcprxmt, "dupack %lu ack %lu sndwnd %d advwin %d\n",
+                          tcb->snd.dupacks, seg->ack, tcb->snd.wnd, seg->wnd);
+
+               if (++tcb->snd.dupacks == TCPREXMTTHRESH) {
+                       /*
+                        *  tahoe tcp rxt the packet, half sshthresh,
+                        *  and set cwnd to one packet
+                        */
+                       tcb->snd.recovery = 1;
+                       tcb->snd.rxt = tcb->snd.nxt;
+                       netlog(s->p->f, Logtcprxmt, "fast rxt %lu, nxt %lu\n", tcb->snd.una,
+                                  tcb->snd.nxt);
+                       tcprxmit(s);
+               } else {
+                       /* do reno tcp here. */
+               }
+       }
+
+       /*
+        *  update window
+        */
+       if (seq_gt(seg->ack, tcb->snd.wl2)
+               || (tcb->snd.wl2 == seg->ack && seg->wnd > tcb->snd.wnd)) {
+               tcb->snd.wnd = seg->wnd;
+               tcb->snd.wl2 = seg->ack;
+       }
+
+       if (!seq_gt(seg->ack, tcb->snd.una)) {
+               /*
+                *  don't let us hangup if sending into a closed window and
+                *  we're still getting acks
+                */
+               if ((tcb->flags & RETRAN) && tcb->snd.wnd == 0) {
+                       tcb->backedoff = MAXBACKMS / 4;
+               }
+               return;
+       }
+
+       /*
+        *  any positive ack turns off fast rxt,
+        *  (should we do new-reno on partial acks?)
+        */
+       if (!tcb->snd.recovery || seq_ge(seg->ack, tcb->snd.rxt)) {
+               tcb->snd.dupacks = 0;
+               tcb->snd.recovery = 0;
+       } else
+               netlog(s->p->f, Logtcp, "rxt next %lu, cwin %u\n", seg->ack,
+                          tcb->cwind);
+
+       /* Compute the new send window size */
+       acked = seg->ack - tcb->snd.una;
+
+       /* avoid slow start and timers for SYN acks */
+       if ((tcb->flags & SYNACK) == 0) {
+               tcb->flags |= SYNACK;
+               acked--;
+               tcb->flgcnt--;
+               goto done;
+       }
+
+       /* slow start as long as we're not recovering from lost packets */
+       if (tcb->cwind < tcb->snd.wnd && !tcb->snd.recovery) {
+               if (tcb->cwind < tcb->ssthresh) {
+                       expand = tcb->mss;
+                       if (acked < expand)
+                               expand = acked;
+               } else
+                       expand = ((int)tcb->mss * tcb->mss) / tcb->cwind;
+
+               if (tcb->cwind + expand < tcb->cwind)
+                       expand = tcb->snd.wnd - tcb->cwind;
+               if (tcb->cwind + expand > tcb->snd.wnd)
+                       expand = tcb->snd.wnd - tcb->cwind;
+               tcb->cwind += expand;
+       }
+
+       /* Adjust the timers according to the round trip time */
+       if (tcb->rtt_timer.state == TcptimerON && seq_ge(seg->ack, tcb->rttseq)) {
+               tcphalt(tpriv, &tcb->rtt_timer);
+               if ((tcb->flags & RETRAN) == 0) {
+                       tcb->backoff = 0;
+                       tcb->backedoff = 0;
+                       rtt = tcb->rtt_timer.start - tcb->rtt_timer.count;
+                       if (rtt == 0)
+                               rtt = 1;        /* otherwise all close systems will rexmit in 0 time */
+                       rtt *= MSPTICK;
+                       if (tcb->srtt == 0) {
+                               tcb->srtt = rtt << LOGAGAIN;
+                               tcb->mdev = rtt << LOGDGAIN;
+                       } else {
+                               delta = rtt - (tcb->srtt >> LOGAGAIN);
+                               tcb->srtt += delta;
+                               if (tcb->srtt <= 0)
+                                       tcb->srtt = 1;
+
+                               delta = abs(delta) - (tcb->mdev >> LOGDGAIN);
+                               tcb->mdev += delta;
+                               if (tcb->mdev <= 0)
+                                       tcb->mdev = 1;
+                       }
+                       tcpsettimer(tcb);
+               }
+       }
+
+done:
+       if (qdiscard(s->wq, acked) < acked)
+               tcb->flgcnt--;
+
+       tcb->snd.una = seg->ack;
+       if (seq_gt(seg->ack, tcb->snd.urg))
+               tcb->snd.urg = seg->ack;
+
+       if (tcb->snd.una != tcb->snd.nxt)
+               tcpgo(tpriv, &tcb->timer);
+       else
+               tcphalt(tpriv, &tcb->timer);
+
+       if (seq_lt(tcb->snd.ptr, tcb->snd.una))
+               tcb->snd.ptr = tcb->snd.una;
+
+       tcb->flags &= ~RETRAN;
+       tcb->backoff = 0;
+       tcb->backedoff = 0;
+}
+
+void tcpiput(struct Proto *tcp, struct Ipifc *unused, struct block *bp)
+{
+       ERRSTACK(1);
+       Tcp seg;
+       Tcp4hdr *h4;
+       Tcp6hdr *h6;
+       int hdrlen;
+       Tcpctl *tcb;
+       uint16_t length;
+       uint8_t source[IPaddrlen], dest[IPaddrlen];
+       struct conv *s;
+       struct Fs *f;
+       struct tcppriv *tpriv;
+       uint8_t version;
+
+       f = tcp->f;
+       tpriv = tcp->priv;
+
+       tpriv->stats[InSegs]++;
+
+       h4 = (Tcp4hdr *) (bp->rp);
+       h6 = (Tcp6hdr *) (bp->rp);
+
+       if ((h4->vihl & 0xF0) == IP_VER4) {
+               version = V4;
+               length = nhgets(h4->length);
+               v4tov6(dest, h4->tcpdst);
+               v4tov6(source, h4->tcpsrc);
+
+               h4->Unused = 0;
+               hnputs(h4->tcplen, length - TCP4_PKT);
+               if (!(bp->flag & Btcpck) && (h4->tcpcksum[0] || h4->tcpcksum[1]) &&
+                       ptclcsum(bp, TCP4_IPLEN, length - TCP4_IPLEN)) {
+                       tpriv->stats[CsumErrs]++;
+                       tpriv->stats[InErrs]++;
+                       netlog(f, Logtcp, "bad tcp proto cksum\n");
+                       freeblist(bp);
+                       return;
+               }
+
+               hdrlen = ntohtcp4(&seg, &bp);
+               if (hdrlen < 0) {
+                       tpriv->stats[HlenErrs]++;
+                       tpriv->stats[InErrs]++;
+                       netlog(f, Logtcp, "bad tcp hdr len\n");
+                       return;
+               }
+
+               /* trim the packet to the size claimed by the datagram */
+               length -= hdrlen + TCP4_PKT;
+               bp = trimblock(bp, hdrlen + TCP4_PKT, length);
+               if (bp == NULL) {
+                       tpriv->stats[LenErrs]++;
+                       tpriv->stats[InErrs]++;
+                       netlog(f, Logtcp, "tcp len < 0 after trim\n");
+                       return;
+               }
+       } else {
+               int ttl = h6->ttl;
+               int proto = h6->proto;
+
+               version = V6;
+               length = nhgets(h6->ploadlen);
+               ipmove(dest, h6->tcpdst);
+               ipmove(source, h6->tcpsrc);
+
+               h6->ploadlen[0] = h6->ploadlen[1] = h6->proto = 0;
+               h6->ttl = proto;
+               hnputl(h6->vcf, length);
+               if ((h6->tcpcksum[0] || h6->tcpcksum[1]) &&
+                       ptclcsum(bp, TCP6_IPLEN, length + TCP6_PHDRSIZE)) {
+                       tpriv->stats[CsumErrs]++;
+                       tpriv->stats[InErrs]++;
+                       netlog(f, Logtcp, "bad tcp proto cksum\n");
+                       freeblist(bp);
+                       return;
+               }
+               h6->ttl = ttl;
+               h6->proto = proto;
+               hnputs(h6->ploadlen, length);
+
+               hdrlen = ntohtcp6(&seg, &bp);
+               if (hdrlen < 0) {
+                       tpriv->stats[HlenErrs]++;
+                       tpriv->stats[InErrs]++;
+                       netlog(f, Logtcp, "bad tcp hdr len\n");
+                       return;
+               }
+
+               /* trim the packet to the size claimed by the datagram */
+               length -= hdrlen;
+               bp = trimblock(bp, hdrlen + TCP6_PKT, length);
+               if (bp == NULL) {
+                       tpriv->stats[LenErrs]++;
+                       tpriv->stats[InErrs]++;
+                       netlog(f, Logtcp, "tcp len < 0 after trim\n");
+                       return;
+               }
+       }
+
+       /* lock protocol while searching for a conversation */
+       qlock(&tcp->qlock);
+
+       /* Look for a matching conversation */
+       s = iphtlook(&tpriv->ht, source, seg.source, dest, seg.dest);
+       if (s == NULL) {
+               netlog(f, Logtcp, "iphtlook failed\n");
+reset:
+               qunlock(&tcp->qlock);
+               sndrst(tcp, source, dest, length, &seg, version, "no conversation");
+               freeblist(bp);
+               return;
+       }
+
+       /* if it's a listener, look for the right flags and get a new conv */
+       tcb = (Tcpctl *) s->ptcl;
+       if (tcb->state == Listen) {
+               if (seg.flags & RST) {
+                       limborst(s, &seg, source, dest, version);
+                       qunlock(&tcp->qlock);
+                       freeblist(bp);
+                       return;
+               }
+
+               /* if this is a new SYN, put the call into limbo */
+               if ((seg.flags & SYN) && (seg.flags & ACK) == 0) {
+                       limbo(s, source, dest, &seg, version);
+                       qunlock(&tcp->qlock);
+                       freeblist(bp);
+                       return;
+               }
+
+               /*
+                *  if there's a matching call in limbo, tcpincoming will
+                *  return it in state Syn_received
+                */
+               s = tcpincoming(s, &seg, source, dest, version);
+               if (s == NULL)
+                       goto reset;
+       }
+
+       /* The rest of the input state machine is run with the control block
+        * locked and implements the state machine directly out of the RFC.
+        * Out-of-band data is ignored - it was always a bad idea.
+        */
+       tcb = (Tcpctl *) s->ptcl;
+       if (waserror()) {
+               qunlock(&s->qlock);
+               nexterror();
+       }
+       qlock(&s->qlock);
+       qunlock(&tcp->qlock);
+
+       /* fix up window */
+       seg.wnd <<= tcb->rcv.scale;
+
+       /* every input packet in puts off the keep alive time out */
+       tcpsetkacounter(tcb);
+
+       switch (tcb->state) {
+               case Closed:
+                       sndrst(tcp, source, dest, length, &seg, version,
+                                  "sending to Closed");
+                       goto raise;
+               case Syn_sent:
+                       if (seg.flags & ACK) {
+                               if (!seq_within(seg.ack, tcb->iss + 1, tcb->snd.nxt)) {
+                                       sndrst(tcp, source, dest, length, &seg, version,
+                                                  "bad seq in Syn_sent");
+                                       goto raise;
+                               }
+                       }
+                       if (seg.flags & RST) {
+                               if (seg.flags & ACK)
+                                       localclose(s, errno_to_string(ECONNREFUSED));
+                               goto raise;
+                       }
+
+                       if (seg.flags & SYN) {
+                               procsyn(s, &seg);
+                               if (seg.flags & ACK) {
+                                       update(s, &seg);
+                                       tcpsynackrtt(s);
+                                       tcpsetstate(s, Established);
+                                       tcpsetscale(s, tcb, seg.ws, tcb->scale);
+                               } else {
+                                       tcb->time = NOW;
+                                       tcpsetstate(s, Syn_received);   /* DLP - shouldn't this be a reset? */
+                               }
+
+                               if (length != 0 || (seg.flags & FIN))
+                                       break;
+
+                               freeblist(bp);
+                               goto output;
+                       } else
+                               freeblist(bp);
+
+                       qunlock(&s->qlock);
+                       poperror();
+                       return;
+               case Syn_received:
+                       /* doesn't matter if it's the correct ack, we're just trying to set timing */
+                       if (seg.flags & ACK)
+                               tcpsynackrtt(s);
+                       break;
+       }
+
+       /*
+        *  One DOS attack is to open connections to us and then forget about them,
+        *  thereby tying up a conv at no long term cost to the attacker.
+        *  This is an attempt to defeat these stateless DOS attacks.  See
+        *  corresponding code in tcpsendka().
+        */
+       if (tcb->state != Syn_received && (seg.flags & RST) == 0) {
+               if (tcpporthogdefense
+                       && seq_within(seg.ack, tcb->snd.una - (1 << 31),
+                                                 tcb->snd.una - (1 << 29))) {
+                       printd("stateless hog %I.%d->%I.%d f 0x%x 0x%lx - 0x%lx - 0x%lx\n",
+                                  source, seg.source, dest, seg.dest, seg.flags,
+                                  tcb->snd.una - (1 << 31), seg.ack, tcb->snd.una - (1 << 29));
+                       localclose(s, "stateless hog");
+               }
+       }
+
+       /* Cut the data to fit the receive window */
+       if (tcptrim(tcb, &seg, &bp, &length) == -1) {
+               netlog(f, Logtcp, "tcp len < 0, %lu %d\n", seg.seq, length);
+               update(s, &seg);
+               if (qlen(s->wq) + tcb->flgcnt == 0 && tcb->state == Closing) {
+                       tcphalt(tpriv, &tcb->rtt_timer);
+                       tcphalt(tpriv, &tcb->acktimer);
+                       tcphalt(tpriv, &tcb->katimer);
+                       tcpsetstate(s, Time_wait);
+                       tcb->timer.start = MSL2 * (1000 / MSPTICK);
+                       tcpgo(tpriv, &tcb->timer);
+               }
+               if (!(seg.flags & RST)) {
+                       tcb->flags |= FORCE;
+                       goto output;
+               }
+               qunlock(&s->qlock);
+               poperror();
+               return;
+       }
+
+       /* Cannot accept so answer with a rst */
+       if (length && tcb->state == Closed) {
+               sndrst(tcp, source, dest, length, &seg, version, "sending to Closed");
+               goto raise;
+       }
+
+       /* The segment is beyond the current receive pointer so
+        * queue the data in the resequence queue
+        */
+       if (seg.seq != tcb->rcv.nxt)
+               if (length != 0 || (seg.flags & (SYN | FIN))) {
+                       update(s, &seg);
+                       if (addreseq(tcb, tpriv, &seg, bp, length) < 0)
+                               printd("reseq %I.%d -> %I.%d\n", s->raddr, s->rport, s->laddr,
+                                          s->lport);
+                       tcb->flags |= FORCE;
+                       goto output;
+               }
+
+       /*
+        *  keep looping till we've processed this packet plus any
+        *  adjacent packets in the resequence queue
+        */
+       for (;;) {
+               if (seg.flags & RST) {
+                       if (tcb->state == Established) {
+                               tpriv->stats[EstabResets]++;
+                               if (tcb->rcv.nxt != seg.seq)
+                                       printd
+                                               ("out of order RST rcvd: %I.%d -> %I.%d, rcv.nxt 0x%lx seq 0x%lx\n",
+                                                s->raddr, s->rport, s->laddr, s->lport, tcb->rcv.nxt,
+                                                seg.seq);
+                       }
+                       localclose(s, errno_to_string(ECONNREFUSED));
+                       goto raise;
+               }
+
+               if ((seg.flags & ACK) == 0)
+                       goto raise;
+
+               switch (tcb->state) {
+                       case Syn_received:
+                               if (!seq_within(seg.ack, tcb->snd.una + 1, tcb->snd.nxt)) {
+                                       sndrst(tcp, source, dest, length, &seg, version,
+                                                  "bad seq in Syn_received");
+                                       goto raise;
+                               }
+                               update(s, &seg);
+                               tcpsetstate(s, Established);
+                       case Established:
+                       case Close_wait:
+                               update(s, &seg);
+                               break;
+                       case Finwait1:
+                               update(s, &seg);
+                               if (qlen(s->wq) + tcb->flgcnt == 0) {
+                                       tcphalt(tpriv, &tcb->rtt_timer);
+                                       tcphalt(tpriv, &tcb->acktimer);
+                                       tcpsetkacounter(tcb);
+                                       tcb->time = NOW;
+                                       tcpsetstate(s, Finwait2);
+                                       tcb->katimer.start = MSL2 * (1000 / MSPTICK);
+                                       tcpgo(tpriv, &tcb->katimer);
+                               }
+                               break;
+                       case Finwait2:
+                               update(s, &seg);
+                               break;
+                       case Closing:
+                               update(s, &seg);
+                               if (qlen(s->wq) + tcb->flgcnt == 0) {
+                                       tcphalt(tpriv, &tcb->rtt_timer);
+                                       tcphalt(tpriv, &tcb->acktimer);
+                                       tcphalt(tpriv, &tcb->katimer);
+                                       tcpsetstate(s, Time_wait);
+                                       tcb->timer.start = MSL2 * (1000 / MSPTICK);
+                                       tcpgo(tpriv, &tcb->timer);
+                               }
+                               break;
+                       case Last_ack:
+                               update(s, &seg);
+                               if (qlen(s->wq) + tcb->flgcnt == 0) {
+                                       localclose(s, NULL);
+                                       goto raise;
+                               }
+                       case Time_wait:
+                               tcb->flags |= FORCE;
+                               if (tcb->timer.state != TcptimerON)
+                                       tcpgo(tpriv, &tcb->timer);
+               }
+
+               if ((seg.flags & URG) && seg.urg) {
+                       if (seq_gt(seg.urg + seg.seq, tcb->rcv.urg)) {
+                               tcb->rcv.urg = seg.urg + seg.seq;
+                               pullblock(&bp, seg.urg);
+                       }
+               } else if (seq_gt(tcb->rcv.nxt, tcb->rcv.urg))
+                       tcb->rcv.urg = tcb->rcv.nxt;
+
+               if (length == 0) {
+                       if (bp != NULL)
+                               freeblist(bp);
+               } else {
+                       switch (tcb->state) {
+                               default:
+                                       /* Ignore segment text */
+                                       if (bp != NULL)
+                                               freeblist(bp);
+                                       break;
+
+                               case Syn_received:
+                               case Established:
+                               case Finwait1:
+                                       /* If we still have some data place on
+                                        * receive queue
+                                        */
+                                       if (bp) {
+                                               bp = packblock(bp);
+                                               if (bp == NULL)
+                                                       panic("tcp packblock");
+                                               qpassnolim(s->rq, bp);
+                                               bp = NULL;
+
+                                               /*
+                                                *  Force an ack every 2 data messages.  This is
+                                                *  a hack for rob to make his home system run
+                                                *  faster.
+                                                *
+                                                *  this also keeps the standard TCP congestion
+                                                *  control working since it needs an ack every
+                                                *  2 max segs worth.  This is not quite that,
+                                                *  but under a real stream is equivalent since
+                                                *  every packet has a max seg in it.
+                                                */
+                                               if (++(tcb->rcv.una) >= 2)
+                                                       tcb->flags |= FORCE;
+                                       }
+                                       tcb->rcv.nxt += length;
+
+                                       /*
+                                        *  update our rcv window
+                                        */
+                                       tcprcvwin(s);
+
+                                       /*
+                                        *  turn on the acktimer if there's something
+                                        *  to ack
+                                        */
+                                       if (tcb->acktimer.state != TcptimerON)
+                                               tcpgo(tpriv, &tcb->acktimer);
+
+                                       break;
+                               case Finwait2:
+                                       /* no process to read the data, send a reset */
+                                       if (bp != NULL)
+                                               freeblist(bp);
+                                       sndrst(tcp, source, dest, length, &seg, version,
+                                                  "send to Finwait2");
+                                       qunlock(&s->qlock);
+                                       poperror();
+                                       return;
+                       }
+               }
+
+               if (seg.flags & FIN) {
+                       tcb->flags |= FORCE;
+
+                       switch (tcb->state) {
+                               case Syn_received:
+                               case Established:
+                                       tcb->rcv.nxt++;
+                                       tcpsetstate(s, Close_wait);
+                                       break;
+                               case Finwait1:
+                                       tcb->rcv.nxt++;
+                                       if (qlen(s->wq) + tcb->flgcnt == 0) {
+                                               tcphalt(tpriv, &tcb->rtt_timer);
+                                               tcphalt(tpriv, &tcb->acktimer);
+                                               tcphalt(tpriv, &tcb->katimer);
+                                               tcpsetstate(s, Time_wait);
+                                               tcb->timer.start = MSL2 * (1000 / MSPTICK);
+                                               tcpgo(tpriv, &tcb->timer);
+                                       } else
+                                               tcpsetstate(s, Closing);
+                                       break;
+                               case Finwait2:
+                                       tcb->rcv.nxt++;
+                                       tcphalt(tpriv, &tcb->rtt_timer);
+                                       tcphalt(tpriv, &tcb->acktimer);
+                                       tcphalt(tpriv, &tcb->katimer);
+                                       tcpsetstate(s, Time_wait);
+                                       tcb->timer.start = MSL2 * (1000 / MSPTICK);
+                                       tcpgo(tpriv, &tcb->timer);
+                                       break;
+                               case Close_wait:
+                               case Closing:
+                               case Last_ack:
+                                       break;
+                               case Time_wait:
+                                       tcpgo(tpriv, &tcb->timer);
+                                       break;
+                       }
+               }
+
+               /*
+                *  get next adjacent segment from the resequence queue.
+                *  dump/trim any overlapping segments
+                */
+               for (;;) {
+                       if (tcb->reseq == NULL)
+                               goto output;
+
+                       if (seq_ge(tcb->rcv.nxt, tcb->reseq->seg.seq) == 0)
+                               goto output;
+
+                       getreseq(tcb, &seg, &bp, &length);
+
+                       if (tcptrim(tcb, &seg, &bp, &length) == 0)
+                               break;
+               }
+       }
+output:
+       tcpoutput(s);
+       qunlock(&s->qlock);
+       poperror();
+       return;
+raise:
+       qunlock(&s->qlock);
+       poperror();
+       freeblist(bp);
+       tcpkick(s);
+}
+
+/*
+ *  always enters and exits with the s locked.  We drop
+ *  the lock to ipoput the packet so some care has to be
+ *  taken by callers.
  */
  */
-void
-tcp_segs_free(struct tcp_seg *seg)
+void tcpoutput(struct conv *s)
+{
+       Tcp seg;
+       int msgs;
+       Tcpctl *tcb;
+       struct block *hbp, *bp;
+       int sndcnt, n;
+       uint32_t ssize, dsize, usable, sent;
+       struct Fs *f;
+       struct tcppriv *tpriv;
+       uint8_t version;
+
+       f = s->p->f;
+       tpriv = s->p->priv;
+       version = s->ipversion;
+
+       for (msgs = 0; msgs < 100; msgs++) {
+               tcb = (Tcpctl *) s->ptcl;
+
+               switch (tcb->state) {
+                       case Listen:
+                       case Closed:
+                       case Finwait2:
+                               return;
+               }
+
+               /* force an ack when a window has opened up */
+               if (tcb->rcv.blocked && tcb->rcv.wnd > 0) {
+                       tcb->rcv.blocked = 0;
+                       tcb->flags |= FORCE;
+               }
+
+               sndcnt = qlen(s->wq) + tcb->flgcnt;
+               sent = tcb->snd.ptr - tcb->snd.una;
+
+               /* Don't send anything else until our SYN has been acked */
+               if (tcb->snd.ptr != tcb->iss && (tcb->flags & SYNACK) == 0)
+                       break;
+
+               /* Compute usable segment based on offered window and limit
+                * window probes to one
+                */
+               if (tcb->snd.wnd == 0) {
+                       if (sent != 0) {
+                               if ((tcb->flags & FORCE) == 0)
+                                       break;
+//              tcb->snd.ptr = tcb->snd.una;
+                       }
+                       usable = 1;
+               } else {
+                       usable = tcb->cwind;
+                       if (tcb->snd.wnd < usable)
+                               usable = tcb->snd.wnd;
+                       usable -= sent;
+               }
+               ssize = sndcnt - sent;
+               if (ssize && usable < 2)
+                       netlog(s->p->f, Logtcp, "throttled snd.wnd %lu cwind %lu\n",
+                                  tcb->snd.wnd, tcb->cwind);
+               if (usable < ssize)
+                       ssize = usable;
+               if (ssize > tcb->mss) {
+                       if ((tcb->flags & TSO) == 0) {
+                               ssize = tcb->mss;
+                       } else {
+                               int segs, window;
+
+                               /*  Don't send too much.  32K is arbitrary..
+                                */
+                               if (ssize > 32 * 1024)
+                                       ssize = 32 * 1024;
+
+                               /* Clamp xmit to an integral MSS to
+                                * avoid ragged tail segments causing
+                                * poor link utilization.  Also
+                                * account for each segment sent in
+                                * msg heuristic, and round up to the
+                                * next multiple of 4, to ensure we
+                                * still yeild.
+                                */
+                               segs = ssize / tcb->mss;
+                               ssize = segs * tcb->mss;
+                               msgs += segs;
+                               if (segs > 3)
+                                       msgs = (msgs + 4) & ~3;
+                       }
+               }
+
+               dsize = ssize;
+               seg.urg = 0;
+
+               if (ssize == 0)
+                       if ((tcb->flags & FORCE) == 0)
+                               break;
+
+               tcb->flags &= ~FORCE;
+               tcprcvwin(s);
+
+               /* By default we will generate an ack */
+               tcphalt(tpriv, &tcb->acktimer);
+               tcb->rcv.una = 0;
+               seg.source = s->lport;
+               seg.dest = s->rport;
+               seg.flags = ACK;
+               seg.mss = 0;
+               seg.ws = 0;
+               switch (tcb->state) {
+                       case Syn_sent:
+                               seg.flags = 0;
+                               if (tcb->snd.ptr == tcb->iss) {
+                                       seg.flags |= SYN;
+                                       dsize--;
+                                       seg.mss = tcb->mss;
+                                       seg.ws = tcb->scale;
+                               }
+                               break;
+                       case Syn_received:
+                               /*
+                                *  don't send any data with a SYN/ACK packet
+                                *  because Linux rejects the packet in its
+                                *  attempt to solve the SYN attack problem
+                                */
+                               if (tcb->snd.ptr == tcb->iss) {
+                                       seg.flags |= SYN;
+                                       dsize = 0;
+                                       ssize = 1;
+                                       seg.mss = tcb->mss;
+                                       seg.ws = tcb->scale;
+                               }
+                               break;
+               }
+               seg.seq = tcb->snd.ptr;
+               seg.ack = tcb->rcv.nxt;
+               seg.wnd = tcb->rcv.wnd;
+
+               /* Pull out data to send */
+               bp = NULL;
+               if (dsize != 0) {
+                       bp = qcopy(s->wq, dsize, sent);
+                       if (BLEN(bp) != dsize) {
+                               seg.flags |= FIN;
+                               dsize--;
+                       }
+                       if (BLEN(bp) > tcb->mss) {
+                               bp->flag |= Btso;
+                               bp->mss = tcb->mss;
+                       }
+               }
+
+               if (sent + dsize == sndcnt)
+                       seg.flags |= PSH;
+
+               /* keep track of balance of resent data */
+               if (seq_lt(tcb->snd.ptr, tcb->snd.nxt)) {
+                       n = tcb->snd.nxt - tcb->snd.ptr;
+                       if (ssize < n)
+                               n = ssize;
+                       tcb->resent += n;
+                       netlog(f, Logtcp, "rexmit: %I.%d -> %I.%d ptr 0x%lx nxt 0x%lx\n",
+                                  s->raddr, s->rport, s->laddr, s->lport, tcb->snd.ptr,
+                                  tcb->snd.nxt);
+                       tpriv->stats[RetransSegs]++;
+               }
+
+               tcb->snd.ptr += ssize;
+
+               /* Pull up the send pointer so we can accept acks
+                * for this window
+                */
+               if (seq_gt(tcb->snd.ptr, tcb->snd.nxt))
+                       tcb->snd.nxt = tcb->snd.ptr;
+
+               /* Build header, link data and compute cksum */
+               switch (version) {
+                       case V4:
+                               tcb->protohdr.tcp4hdr.vihl = IP_VER4;
+                               hbp = htontcp4(&seg, bp, &tcb->protohdr.tcp4hdr, tcb);
+                               if (hbp == NULL) {
+                                       freeblist(bp);
+                                       return;
+                               }
+                               break;
+                       case V6:
+                               tcb->protohdr.tcp6hdr.vcf[0] = IP_VER6;
+                               hbp = htontcp6(&seg, bp, &tcb->protohdr.tcp6hdr, tcb);
+                               if (hbp == NULL) {
+                                       freeblist(bp);
+                                       return;
+                               }
+                               break;
+                       default:
+                               hbp = NULL;     /* to suppress a warning */
+                               panic("tcpoutput: version %d", version);
+               }
+
+               /* Start the transmission timers if there is new data and we
+                * expect acknowledges
+                */
+               if (ssize != 0) {
+                       if (tcb->timer.state != TcptimerON)
+                               tcpgo(tpriv, &tcb->timer);
+
+                       /*  If round trip timer isn't running, start it.
+                        *  measure the longest packet only in case the
+                        *  transmission time dominates RTT
+                        */
+                       if (tcb->rtt_timer.state != TcptimerON)
+                               if (ssize == tcb->mss) {
+                                       tcpgo(tpriv, &tcb->rtt_timer);
+                                       tcb->rttseq = tcb->snd.ptr;
+                               }
+               }
+
+               tpriv->stats[OutSegs]++;
+
+               /* put off the next keep alive */
+               tcpgo(tpriv, &tcb->katimer);
+
+               switch (version) {
+                       case V4:
+                               if (ipoput4(f, hbp, 0, s->ttl, s->tos, s) < 0) {
+                                       /* a negative return means no route */
+                                       localclose(s, "no route");
+                               }
+                               break;
+                       case V6:
+                               if (ipoput6(f, hbp, 0, s->ttl, s->tos, s) < 0) {
+                                       /* a negative return means no route */
+                                       localclose(s, "no route");
+                               }
+                               break;
+                       default:
+                               panic("tcpoutput2: version %d", version);
+               }
+               if ((msgs % 4) == 1) {
+                       qunlock(&s->qlock);
+                       kthread_yield();
+                       qlock(&s->qlock);
+               }
+       }
+}
+
+/*
+ *  the BSD convention (hack?) for keep alives.  resend last uint8_t acked.
+ */
+void tcpsendka(struct conv *s)
 {
 {
-  while (seg != NULL) {
-    struct tcp_seg *next = seg->next;
-    tcp_seg_free(seg);
-    seg = next;
-  }
+       Tcp seg;
+       Tcpctl *tcb;
+       struct block *hbp, *dbp;
+
+       tcb = (Tcpctl *) s->ptcl;
+
+       dbp = NULL;
+       seg.urg = 0;
+       seg.source = s->lport;
+       seg.dest = s->rport;
+       seg.flags = ACK | PSH;
+       seg.mss = 0;
+       seg.ws = 0;
+       if (tcpporthogdefense)
+               urandom_read(&seg.seq, sizeof(seg.seq));
+       else
+               seg.seq = tcb->snd.una - 1;
+       seg.ack = tcb->rcv.nxt;
+       tcb->rcv.una = 0;
+       seg.wnd = tcb->rcv.wnd;
+       if (tcb->state == Finwait2) {
+               seg.flags |= FIN;
+       } else {
+               dbp = allocb(1);
+               dbp->wp++;
+       }
+
+       if (isv4(s->raddr)) {
+               /* Build header, link data and compute cksum */
+               tcb->protohdr.tcp4hdr.vihl = IP_VER4;
+               hbp = htontcp4(&seg, dbp, &tcb->protohdr.tcp4hdr, tcb);
+               if (hbp == NULL) {
+                       freeblist(dbp);
+                       return;
+               }
+               ipoput4(s->p->f, hbp, 0, s->ttl, s->tos, s);
+       } else {
+               /* Build header, link data and compute cksum */
+               tcb->protohdr.tcp6hdr.vcf[0] = IP_VER6;
+               hbp = htontcp6(&seg, dbp, &tcb->protohdr.tcp6hdr, tcb);
+               if (hbp == NULL) {
+                       freeblist(dbp);
+                       return;
+               }
+               ipoput6(s->p->f, hbp, 0, s->ttl, s->tos, s);
+       }
 }
 
 }
 
-/**
- * Frees a TCP segment (tcp_seg structure).
- *
- * @param seg single tcp_seg to free
+/*
+ *  set connection to time out after 12 minutes
  */
  */
-void
-tcp_seg_free(struct tcp_seg *seg)
+void tcpsetkacounter(Tcpctl * tcb)
 {
 {
-  if (seg != NULL) {
-    if (seg->p != NULL) {
-      pbuf_free(seg->p);
-#if TCP_DEBUG
-      seg->p = NULL;
-#endif /* TCP_DEBUG */
-    }
-               kmem_cache_free(tcp_segment_kcache, seg);
-  }
+       tcb->kacounter = (12 * 60 * 1000) / (tcb->katimer.start * MSPTICK);
+       if (tcb->kacounter < 3)
+               tcb->kacounter = 3;
 }
 
 }
 
-/**
- * Sets the priority of a connection.
- *
- * @param pcb the tcp_pcb to manipulate
- * @param prio new priority
+/*
+ *  if we've timed out, close the connection
+ *  otherwise, send a keepalive and restart the timer
  */
  */
-void
-tcp_setprio(struct tcp_pcb *pcb, uint8_t prio)
+void tcpkeepalive(void *v)
 {
 {
-  pcb->prio = prio;
+       ERRSTACK(1);
+       Tcpctl *tcb;
+       struct conv *s;
+
+       s = v;
+       tcb = (Tcpctl *) s->ptcl;
+       qlock(&s->qlock);
+       if (waserror()) {
+               qunlock(&s->qlock);
+               nexterror();
+       }
+       if (tcb->state != Closed) {
+               if (--(tcb->kacounter) <= 0) {
+                       localclose(s, errno_to_string(ETIMEDOUT));
+               } else {
+                       tcpsendka(s);
+                       tcpgo(s->p->priv, &tcb->katimer);
+               }
+       }
+       qunlock(&s->qlock);
+       poperror();
 }
 
 }
 
-#if TCP_QUEUE_OOSEQ
-/**
- * Returns a copy of the given TCP segment.
- * The pbuf and data are not copied, only the pointers
- *
- * @param seg the old tcp_seg
- * @return a copy of seg
- */ 
-struct tcp_seg *
-tcp_seg_copy(struct tcp_seg *seg)
+/*
+ *  start keepalive timer
+ */
+char *tcpstartka(struct conv *s, char **f, int n)
 {
 {
-  struct tcp_seg *cseg;
-
-  cseg = (struct tcp_seg *)kmem_cache_alloc(tcp_segment_kcache, 0);
-  if (cseg == NULL) {
-    return NULL;
-  }
-  memcpy((uint8_t *)cseg, (const uint8_t *)seg, sizeof(struct tcp_seg)); 
-  pbuf_ref(cseg->p);
-  return cseg;
+       Tcpctl *tcb;
+       int x;
+
+       tcb = (Tcpctl *) s->ptcl;
+       if (tcb->state != Established)
+               return "connection must be in Establised state";
+       if (n > 1) {
+               x = atoi(f[1]);
+               if (x >= MSPTICK)
+                       tcb->katimer.start = x / MSPTICK;
+       }
+       tcpsetkacounter(tcb);
+       tcpgo(s->p->priv, &tcb->katimer);
+
+       return NULL;
 }
 }
-#endif /* TCP_QUEUE_OOSEQ */
 
 
+/*
+ *  turn checksums on/off
+ */
+char *tcpsetchecksum(struct conv *s, char **f, int unused)
+{
+       Tcpctl *tcb;
 
 
+       tcb = (Tcpctl *) s->ptcl;
+       tcb->nochecksum = !atoi(f[1]);
 
 
-/**
- * Used to specify the argument that should be passed callback
- * functions.
- *
- * @param pcb tcp_pcb to set the callback argument
- * @param arg void pointer argument to pass to callback functions
- */ 
-void
-tcp_arg(struct tcp_pcb *pcb, void *arg)
-{  
-  pcb->callback_arg = arg;
+       return NULL;
 }
 }
-#if LWIP_CALLBACK_API
 
 
-/**
- * Used to specify the function that should be called when a TCP
- * connection receives data.
- *
- * @param pcb tcp_pcb to set the recv callback
- * @param recv callback function to call for this pcb when data is received
- */ 
-void
-tcp_recv(struct tcp_pcb *pcb, tcp_recv_fn recv)
+void tcprxmit(struct conv *s)
 {
 {
-  pcb->recv = recv;
+       Tcpctl *tcb;
+
+       tcb = (Tcpctl *) s->ptcl;
+
+       tcb->flags |= RETRAN | FORCE;
+       tcb->snd.ptr = tcb->snd.una;
+
+       /*
+        *  We should be halving the slow start threshhold (down to one
+        *  mss) but leaving it at mss seems to work well enough
+        */
+       tcb->ssthresh = tcb->mss;
+
+       /*
+        *  pull window down to a single packet
+        */
+       tcb->cwind = tcb->mss;
+       tcpoutput(s);
 }
 
 }
 
-/**
- * Used to specify the function that should be called when TCP data
- * has been successfully delivered to the remote host.
- *
- * @param pcb tcp_pcb to set the sent callback
- * @param sent callback function to call for this pcb when data is successfully sent
- */ 
-void
-tcp_sent(struct tcp_pcb *pcb, tcp_sent_fn sent)
+void tcptimeout(void *arg)
 {
 {
-  pcb->sent = sent;
+       ERRSTACK(1);
+       struct conv *s;
+       Tcpctl *tcb;
+       int maxback;
+       struct tcppriv *tpriv;
+
+       s = (struct conv *)arg;
+       tpriv = s->p->priv;
+       tcb = (Tcpctl *) s->ptcl;
+
+       qlock(&s->qlock);
+       if (waserror()) {
+               qunlock(&s->qlock);
+               nexterror();
+       }
+       switch (tcb->state) {
+               default:
+                       tcb->backoff++;
+                       if (tcb->state == Syn_sent)
+                               maxback = MAXBACKMS / 2;
+                       else
+                               maxback = MAXBACKMS;
+                       tcb->backedoff += tcb->timer.start * MSPTICK;
+                       if (tcb->backedoff >= maxback) {
+                               localclose(s, errno_to_string(ETIMEDOUT));
+                               break;
+                       }
+                       netlog(s->p->f, Logtcprxmt, "timeout rexmit 0x%lx %llu/%llu\n",
+                                  tcb->snd.una, tcb->timer.start, NOW);
+                       tcpsettimer(tcb);
+                       tcprxmit(s);
+                       tpriv->stats[RetransTimeouts]++;
+                       tcb->snd.dupacks = 0;
+                       break;
+               case Time_wait:
+                       localclose(s, NULL);
+                       break;
+               case Closed:
+                       break;
+       }
+       qunlock(&s->qlock);
+       poperror();
 }
 
 }
 
-/**
- * Used to specify the function that should be called when a fatal error
- * has occured on the connection.
- *
- * @param pcb tcp_pcb to set the err callback
- * @param err callback function to call for this pcb when a fatal error
- *        has occured on the connection
- */ 
-void
-tcp_err(struct tcp_pcb *pcb, tcp_err_fn err)
+int inwindow(Tcpctl * tcb, int seq)
 {
 {
-  pcb->errf = err;
+       return seq_within(seq, tcb->rcv.nxt, tcb->rcv.nxt + tcb->rcv.wnd - 1);
 }
 
 }
 
-/**
- * Used for specifying the function that should be called when a
- * LISTENing connection has been connected to another host.
- *
- * @param pcb tcp_pcb to set the accept callback
- * @param accept callback function to call for this pcb when LISTENing
- *        connection has been connected to another host
- */ 
-void
-tcp_accept(struct tcp_pcb *pcb, tcp_accept_fn accept)
+/*
+ *  set up state for a received SYN (or SYN ACK) packet
+ */
+void procsyn(struct conv *s, Tcp * seg)
 {
 {
-  pcb->accept = accept;
+       Tcpctl *tcb;
+
+       tcb = (Tcpctl *) s->ptcl;
+       tcb->flags |= FORCE;
+
+       tcb->rcv.nxt = seg->seq + 1;
+       tcb->rcv.urg = tcb->rcv.nxt;
+       tcb->irs = seg->seq;
+
+       /* our sending max segment size cannot be bigger than what he asked for */
+       if (seg->mss != 0 && seg->mss < tcb->mss)
+               tcb->mss = seg->mss;
+
+       /* the congestion window always starts out as a single segment */
+       tcb->snd.wnd = seg->wnd;
+       tcb->cwind = tcb->mss;
 }
 }
-#endif /* LWIP_CALLBACK_API */
 
 
+int
+addreseq(Tcpctl * tcb, struct tcppriv *tpriv, Tcp * seg,
+                struct block *bp, uint16_t length)
+{
+       Reseq *rp, *rp1;
+       int i, rqlen, qmax;
 
 
-/**
- * Used to specify the function that should be called periodically
- * from TCP. The interval is specified in terms of the TCP coarse
- * timer interval, which is called twice a second.
- *
- */ 
-void
-tcp_poll(struct tcp_pcb *pcb, tcp_poll_fn poll, uint8_t interval)
+       rp = kzmalloc(sizeof(Reseq), 0);
+       if (rp == NULL) {
+               freeblist(bp);  /* bp always consumed by add_reseq */
+               return 0;
+       }
+
+       rp->seg = *seg;
+       rp->bp = bp;
+       rp->length = length;
+
+       /* Place on reassembly list sorting by starting seq number */
+       rp1 = tcb->reseq;
+       if (rp1 == NULL || seq_lt(seg->seq, rp1->seg.seq)) {
+               rp->next = rp1;
+               tcb->reseq = rp;
+               if (rp->next != NULL)
+                       tpriv->stats[OutOfOrder]++;
+               return 0;
+       }
+
+       rqlen = 0;
+       for (i = 0;; i++) {
+               rqlen += rp1->length;
+               if (rp1->next == NULL || seq_lt(seg->seq, rp1->next->seg.seq)) {
+                       rp->next = rp1->next;
+                       rp1->next = rp;
+                       if (rp->next != NULL)
+                               tpriv->stats[OutOfOrder]++;
+                       break;
+               }
+               rp1 = rp1->next;
+       }
+       qmax = QMAX << tcb->rcv.scale;
+       if (rqlen > qmax) {
+               printd("resequence queue > window: %d > %d\n", rqlen, qmax);
+               i = 0;
+               for (rp1 = tcb->reseq; rp1 != NULL; rp1 = rp1->next) {
+                       printd("0x%#lx 0x%#lx 0x%#x\n", rp1->seg.seq,
+                                  rp1->seg.ack, rp1->seg.flags);
+                       if (i++ > 10) {
+                               printd("...\n");
+                               break;
+                       }
+               }
+
+               // delete entire reassembly queue; wait for retransmit.
+               // - should we be smarter and only delete the tail?
+               for (rp = tcb->reseq; rp != NULL; rp = rp1) {
+                       rp1 = rp->next;
+                       freeblist(rp->bp);
+                       kfree(rp);
+               }
+               tcb->reseq = NULL;
+
+               return -1;
+       }
+       return 0;
+}
+
+void getreseq(Tcpctl * tcb, Tcp * seg, struct block **bp, uint16_t * length)
 {
 {
-#if LWIP_CALLBACK_API
-  pcb->poll = poll;
-#else /* LWIP_CALLBACK_API */  
-  LWIP_UNUSED_ARG(poll);
-#endif /* LWIP_CALLBACK_API */  
-  pcb->pollinterval = interval;
+       Reseq *rp;
+
+       rp = tcb->reseq;
+       if (rp == NULL)
+               return;
+
+       tcb->reseq = rp->next;
+
+       *seg = rp->seg;
+       *bp = rp->bp;
+       *length = rp->length;
+
+       kfree(rp);
 }
 
 }
 
-/**
- * Purges a TCP PCB. Removes any buffered data and frees the buffer memory
- * (pcb->ooseq, pcb->unsent and pcb->unacked are freed).
- *
- * @param pcb tcp_pcb to purge. The pcb itself is not deallocated!
- */
-void
-tcp_pcb_purge(struct tcp_pcb *pcb)
+int tcptrim(Tcpctl * tcb, Tcp * seg, struct block **bp, uint16_t * length)
 {
 {
-  if (pcb->state != CLOSED &&
-     pcb->state != TIME_WAIT &&
-     pcb->state != LISTEN) {
-
-    printd("tcp_pcb_purge\n");
-
-#if TCP_LISTEN_BACKLOG
-    if (pcb->state == SYN_RCVD) {
-      /* Need to find the corresponding listen_pcb and decrease its accepts_pending */
-      struct tcp_pcb_listen *lpcb;
-      LWIP_ASSERT("tcp_pcb_purge: pcb->state == SYN_RCVD but tcp_listen_pcbs is NULL",
-        tcp_listen_pcbs.listen_pcbs != NULL);
-      for (lpcb = tcp_listen_pcbs.listen_pcbs; lpcb != NULL; lpcb = lpcb->next) {
-        if ((lpcb->local_port == pcb->local_port) &&
-            (ip_addr_isany(&lpcb->local_ip) ||
-             ip_addr_cmp(&pcb->local_ip, &lpcb->local_ip))) {
-            /* port and address of the listen pcb match the timed-out pcb */
-            LWIP_ASSERT("tcp_pcb_purge: listen pcb does not have accepts pending",
-              lpcb->accepts_pending > 0);
-            lpcb->accepts_pending--;
-            break;
-          }
-      }
-    }
-#endif /* TCP_LISTEN_BACKLOG */
-
-
-    if (pcb->refused_data != NULL) {
-      LWIP_DEBUGF(TCP_DEBUG, ("tcp_pcb_purge: data left on ->refused_data\n"));
-      pbuf_free(pcb->refused_data);
-      pcb->refused_data = NULL;
-    }
-    if (pcb->unsent != NULL) {
-      LWIP_DEBUGF(TCP_DEBUG, ("tcp_pcb_purge: not all data sent\n"));
-    }
-    if (pcb->unacked != NULL) {
-      LWIP_DEBUGF(TCP_DEBUG, ("tcp_pcb_purge: data left on ->unacked\n"));
-    }
-#if TCP_QUEUE_OOSEQ
-    if (pcb->ooseq != NULL) {
-      LWIP_DEBUGF(TCP_DEBUG, ("tcp_pcb_purge: data left on ->ooseq\n"));
-    }
-    tcp_segs_free(pcb->ooseq);
-    pcb->ooseq = NULL;
-#endif /* TCP_QUEUE_OOSEQ */
-
-    /* Stop the retransmission timer as it will expect data on unacked
-       queue if it fires */
-    pcb->rtime = -1;
-
-    tcp_segs_free(pcb->unsent);
-    tcp_segs_free(pcb->unacked);
-    pcb->unacked = pcb->unsent = NULL;
-#if TCP_OVERSIZE
-    pcb->unsent_oversize = 0;
-#endif /* TCP_OVERSIZE */
-  }
+       uint16_t len;
+       uint8_t accept;
+       int dupcnt, excess;
+
+       accept = 0;
+       len = *length;
+       if (seg->flags & SYN)
+               len++;
+       if (seg->flags & FIN)
+               len++;
+
+       if (tcb->rcv.wnd == 0) {
+               if (len == 0 && seg->seq == tcb->rcv.nxt)
+                       return 0;
+       } else {
+               /* Some part of the segment should be in the window */
+               if (inwindow(tcb, seg->seq))
+                       accept++;
+               else if (len != 0) {
+                       if (inwindow(tcb, seg->seq + len - 1) ||
+                               seq_within(tcb->rcv.nxt, seg->seq, seg->seq + len - 1))
+                               accept++;
+               }
+       }
+       if (!accept) {
+               freeblist(*bp);
+               return -1;
+       }
+       dupcnt = tcb->rcv.nxt - seg->seq;
+       if (dupcnt > 0) {
+               tcb->rerecv += dupcnt;
+               if (seg->flags & SYN) {
+                       seg->flags &= ~SYN;
+                       seg->seq++;
+
+                       if (seg->urg > 1)
+                               seg->urg--;
+                       else
+                               seg->flags &= ~URG;
+                       dupcnt--;
+               }
+               if (dupcnt > 0) {
+                       pullblock(bp, (uint16_t) dupcnt);
+                       seg->seq += dupcnt;
+                       *length -= dupcnt;
+
+                       if (seg->urg > dupcnt)
+                               seg->urg -= dupcnt;
+                       else {
+                               seg->flags &= ~URG;
+                               seg->urg = 0;
+                       }
+               }
+       }
+       excess = seg->seq + *length - (tcb->rcv.nxt + tcb->rcv.wnd);
+       if (excess > 0) {
+               tcb->rerecv += excess;
+               *length -= excess;
+               *bp = trimblock(*bp, 0, *length);
+               if (*bp == NULL)
+                       panic("presotto is a boofhead");
+               seg->flags &= ~FIN;
+       }
+       return 0;
 }
 
 }
 
-/**
- * Purges the PCB and removes it from a PCB list. Any delayed ACKs are sent first.
- *
- * @param pcblist PCB list to purge.
- * @param pcb tcp_pcb to purge. The pcb itself is NOT deallocated!
- */
-void
-tcp_pcb_remove(struct tcp_pcb **pcblist, struct tcp_pcb *pcb)
+void tcpadvise(struct Proto *tcp, struct block *bp, char *msg)
 {
 {
-  TCP_RMV(pcblist, pcb);
-
-  tcp_pcb_purge(pcb);
-  
-  /* if there is an outstanding delayed ACKs, send it */
-  if (pcb->state != TIME_WAIT &&
-     pcb->state != LISTEN &&
-     pcb->flags & TF_ACK_DELAY) {
-    pcb->flags |= TF_ACK_NOW;
-    tcp_output(pcb);
-  }
-
-  if (pcb->state != LISTEN) {
-    LWIP_ASSERT("unsent segments leaking", pcb->unsent == NULL);
-    LWIP_ASSERT("unacked segments leaking", pcb->unacked == NULL);
-#if TCP_QUEUE_OOSEQ
-    LWIP_ASSERT("ooseq segments leaking", pcb->ooseq == NULL);
-#endif /* TCP_QUEUE_OOSEQ */
-  }
-
-  pcb->state = CLOSED;
-
-  LWIP_ASSERT("tcp_pcb_remove: tcp_pcbs_sane()", tcp_pcbs_sane());
+       Tcp4hdr *h4;
+       Tcp6hdr *h6;
+       Tcpctl *tcb;
+       uint8_t source[IPaddrlen];
+       uint8_t dest[IPaddrlen];
+       uint16_t psource, pdest;
+       struct conv *s, **p;
+
+       h4 = (Tcp4hdr *) (bp->rp);
+       h6 = (Tcp6hdr *) (bp->rp);
+
+       if ((h4->vihl & 0xF0) == IP_VER4) {
+               v4tov6(dest, h4->tcpdst);
+               v4tov6(source, h4->tcpsrc);
+               psource = nhgets(h4->tcpsport);
+               pdest = nhgets(h4->tcpdport);
+       } else {
+               ipmove(dest, h6->tcpdst);
+               ipmove(source, h6->tcpsrc);
+               psource = nhgets(h6->tcpsport);
+               pdest = nhgets(h6->tcpdport);
+       }
+
+       /* Look for a connection */
+       qlock(&tcp->qlock);
+       for (p = tcp->conv; *p; p++) {
+               s = *p;
+               tcb = (Tcpctl *) s->ptcl;
+               if (s->rport == pdest)
+                       if (s->lport == psource)
+                               if (tcb->state != Closed)
+                                       if (ipcmp(s->raddr, dest) == 0)
+                                               if (ipcmp(s->laddr, source) == 0) {
+                                                       qlock(&s->qlock);
+                                                       qunlock(&tcp->qlock);
+                                                       switch (tcb->state) {
+                                                               case Syn_sent:
+                                                                       localclose(s, msg);
+                                                                       break;
+                                                       }
+                                                       qunlock(&s->qlock);
+                                                       freeblist(bp);
+                                                       return;
+                                               }
+       }
+       qunlock(&tcp->qlock);
+       freeblist(bp);
 }
 
 }
 
-#if TCP_CALCULATE_EFF_SEND_MSS
-/**
- * Calcluates the effective send mss that can be used for a specific IP address
- * by using ip_route to determin the netif used to send to the address and
- * calculating the minimum of TCP_MSS and that netif's mtu (if set).
- */
-uint16_t
-tcp_eff_send_mss(uint16_t sendmss, ip_addr_t *addr)
+static char *tcpporthogdefensectl(char *val)
 {
 {
-  uint16_t mss_s;
-  struct netif *outif;
-
-  //outif = ip_route(addr);
-    mss_s = DEFAULT_MTU - IP_HDR_SZ - TCP_HLEN;
-    /* RFC 1122, chap 4.2.2.6:
-     * Eff.snd.MSS = min(SendMSS+20, MMS_S) - TCPhdrsize - IPoptionsize
-     * We correct for TCP options in tcp_write(), and don't support IP options.
-     */
-    sendmss = MIN(sendmss, mss_s);
-  return sendmss;
+       if (strcmp(val, "on") == 0)
+               tcpporthogdefense = 1;
+       else if (strcmp(val, "off") == 0)
+               tcpporthogdefense = 0;
+       else
+               return "unknown value for tcpporthogdefense";
+       return NULL;
 }
 }
-#endif /* TCP_CALCULATE_EFF_SEND_MSS */
 
 
-const char*
-tcp_debug_state_str(enum tcp_state s)
+/* called with c qlocked */
+char *tcpctl(struct conv *c, char **f, int n)
 {
 {
-  return tcp_state_str[s];
+       if (n == 1 && strcmp(f[0], "hangup") == 0)
+               return tcphangup(c);
+       if (n >= 1 && strcmp(f[0], "keepalive") == 0)
+               return tcpstartka(c, f, n);
+       if (n >= 1 && strcmp(f[0], "checksum") == 0)
+               return tcpsetchecksum(c, f, n);
+       if (n >= 1 && strcmp(f[0], "tcpporthogdefense") == 0)
+               return tcpporthogdefensectl(f[1]);
+       return "unknown control request";
 }
 
 }
 
-#if TCP_DEBUG || TCP_INPUT_DEBUG || TCP_OUTPUT_DEBUG
-/**
- * Print a tcp header for debugging purposes.
- *
- * @param tcphdr pointer to a struct tcp_hdr
- */
-void
-tcp_debug_print(struct tcp_hdr *tcphdr)
+int tcpstats(struct Proto *tcp, char *buf, int len)
 {
 {
-  LWIP_DEBUGF(TCP_DEBUG, ("TCP header:\n"));
-  LWIP_DEBUGF(TCP_DEBUG, ("+-------------------------------+\n"));
-  LWIP_DEBUGF(TCP_DEBUG, ("|    %5"U16_F"      |    %5"U16_F"      | (src port, dest port)\n",
-         ntohs(tcphdr->src), ntohs(tcphdr->dest)));
-  LWIP_DEBUGF(TCP_DEBUG, ("+-------------------------------+\n"));
-  LWIP_DEBUGF(TCP_DEBUG, ("|           %010"U32_F"          | (seq no)\n",
-          ntohl(tcphdr->seqno)));
-  LWIP_DEBUGF(TCP_DEBUG, ("+-------------------------------+\n"));
-  LWIP_DEBUGF(TCP_DEBUG, ("|           %010"U32_F"          | (ack no)\n",
-         ntohl(tcphdr->ackno)));
-  LWIP_DEBUGF(TCP_DEBUG, ("+-------------------------------+\n"));
-  LWIP_DEBUGF(TCP_DEBUG, ("| %2"U16_F" |   |%"U16_F"%"U16_F"%"U16_F"%"U16_F"%"U16_F"%"U16_F"|     %5"U16_F"     | (hdrlen, flags (",
-       TCPH_HDRLEN(tcphdr),
-         TCPH_FLAGS(tcphdr) >> 5 & 1,
-         TCPH_FLAGS(tcphdr) >> 4 & 1,
-         TCPH_FLAGS(tcphdr) >> 3 & 1,
-         TCPH_FLAGS(tcphdr) >> 2 & 1,
-         TCPH_FLAGS(tcphdr) >> 1 & 1,
-         TCPH_FLAGS(tcphdr) & 1,
-         ntohs(tcphdr->wnd)));
-  tcp_debug_print_flags(TCPH_FLAGS(tcphdr));
-  LWIP_DEBUGF(TCP_DEBUG, ("), win)\n"));
-  LWIP_DEBUGF(TCP_DEBUG, ("+-------------------------------+\n"));
-  LWIP_DEBUGF(TCP_DEBUG, ("|    0x%04"X16_F"     |     %5"U16_F"     | (chksum, urgp)\n",
-         ntohs(tcphdr->chksum), ntohs(tcphdr->urgp)));
-  LWIP_DEBUGF(TCP_DEBUG, ("+-------------------------------+\n"));
+       struct tcppriv *priv;
+       char *p, *e;
+       int i;
+
+       priv = tcp->priv;
+       p = buf;
+       e = p + len;
+       for (i = 0; i < Nstats; i++)
+               p = seprintf(p, e, "%s: %u\n", statnames[i], priv->stats[i]);
+       return p - buf;
 }
 
 }
 
-/**
- * Print a tcp state for debugging purposes.
+/*
+ *  garbage collect any stale conversations:
+ *     - SYN received but no SYN-ACK after 5 seconds (could be the SYN attack)
+ *     - Finwait2 after 5 minutes
  *
  *
- * @param s enum tcp_state to print
+ *  this is called whenever we run out of channels.  Both checks are
+ *  of questionable validity so we try to use them only when we're
+ *  up against the wall.
  */
  */
-void
-tcp_debug_print_state(enum tcp_state s)
+int tcpgc(struct Proto *tcp)
 {
 {
-  LWIP_DEBUGF(TCP_DEBUG, ("State: %s\n", tcp_state_str[s]));
+       struct conv *c, **pp, **ep;
+       int n;
+       Tcpctl *tcb;
+
+       n = 0;
+       ep = &tcp->conv[tcp->nc];
+       for (pp = tcp->conv; pp < ep; pp++) {
+               c = *pp;
+               if (c == NULL)
+                       break;
+               if (!canqlock(&c->qlock))
+                       continue;
+               tcb = (Tcpctl *) c->ptcl;
+               switch (tcb->state) {
+                       case Syn_received:
+                               if (NOW - tcb->time > 5000) {
+                                       localclose(c, "timed out");
+                                       n++;
+                               }
+                               break;
+                       case Finwait2:
+                               if (NOW - tcb->time > 5 * 60 * 1000) {
+                                       localclose(c, "timed out");
+                                       n++;
+                               }
+                               break;
+               }
+               qunlock(&c->qlock);
+       }
+       return n;
 }
 
 }
 
-/**
- * Print tcp flags for debugging purposes.
- *
- * @param flags tcp flags, all active flags are printed
- */
-void
-tcp_debug_print_flags(uint8_t flags)
+void tcpsettimer(Tcpctl * tcb)
 {
 {
-  if (flags & TCP_FIN) {
-    LWIP_DEBUGF(TCP_DEBUG, ("FIN "));
-  }
-  if (flags & TCP_SYN) {
-    LWIP_DEBUGF(TCP_DEBUG, ("SYN "));
-  }
-  if (flags & TCP_RST) {
-    LWIP_DEBUGF(TCP_DEBUG, ("RST "));
-  }
-  if (flags & TCP_PSH) {
-    LWIP_DEBUGF(TCP_DEBUG, ("PSH "));
-  }
-  if (flags & TCP_ACK) {
-    LWIP_DEBUGF(TCP_DEBUG, ("ACK "));
-  }
-  if (flags & TCP_URG) {
-    LWIP_DEBUGF(TCP_DEBUG, ("URG "));
-  }
-  if (flags & TCP_ECE) {
-    LWIP_DEBUGF(TCP_DEBUG, ("ECE "));
-  }
-  if (flags & TCP_CWR) {
-    LWIP_DEBUGF(TCP_DEBUG, ("CWR "));
-  }
-  LWIP_DEBUGF(TCP_DEBUG, ("\n"));
+       int x;
+
+       /* round trip dependency */
+       x = backoff(tcb->backoff) *
+               (tcb->mdev + (tcb->srtt >> LOGAGAIN) + MSPTICK) / MSPTICK;
+
+       /* bounded twixt 1/2 and 64 seconds */
+       if (x < 500 / MSPTICK)
+               x = 500 / MSPTICK;
+       else if (x > (64000 / MSPTICK))
+               x = 64000 / MSPTICK;
+       tcb->timer.start = x;
 }
 
 }
 
-/**
- * Print all tcp_pcbs in every list for debugging purposes.
- */
-void
-tcp_debug_print_pcbs(void)
+void tcpinit(struct Fs *fs)
 {
 {
-  struct tcp_pcb *pcb;
-  LWIP_DEBUGF(TCP_DEBUG, ("Active PCB states:\n"));
-  for(pcb = tcp_active_pcbs; pcb != NULL; pcb = pcb->next) {
-    LWIP_DEBUGF(TCP_DEBUG, ("Local port %"U16_F", foreign port %"U16_F" snd_nxt %"U32_F" rcv_nxt %"U32_F" ",
-                       pcb->local_port, pcb->remote_port,
-                       pcb->snd_nxt, pcb->rcv_nxt));
-    tcp_debug_print_state(pcb->state);
-  }    
-  LWIP_DEBUGF(TCP_DEBUG, ("Listen PCB states:\n"));
-  for(pcb = (struct tcp_pcb *)tcp_listen_pcbs.pcbs; pcb != NULL; pcb = pcb->next) {
-    LWIP_DEBUGF(TCP_DEBUG, ("Local port %"U16_F", foreign port %"U16_F" snd_nxt %"U32_F" rcv_nxt %"U32_F" ",
-                       pcb->local_port, pcb->remote_port,
-                       pcb->snd_nxt, pcb->rcv_nxt));
-    tcp_debug_print_state(pcb->state);
-  }    
-  LWIP_DEBUGF(TCP_DEBUG, ("TIME-WAIT PCB states:\n"));
-  for(pcb = tcp_tw_pcbs; pcb != NULL; pcb = pcb->next) {
-    LWIP_DEBUGF(TCP_DEBUG, ("Local port %"U16_F", foreign port %"U16_F" snd_nxt %"U32_F" rcv_nxt %"U32_F" ",
-                       pcb->local_port, pcb->remote_port,
-                       pcb->snd_nxt, pcb->rcv_nxt));
-    tcp_debug_print_state(pcb->state);
-  }    
+       struct Proto *tcp;
+       struct tcppriv *tpriv;
+
+       tcp = kzmalloc(sizeof(struct Proto), 0);
+       tpriv = tcp->priv = kzmalloc(sizeof(struct tcppriv), 0);
+       qlock_init(&tpriv->tl);
+       qlock_init(&tpriv->apl);
+       tcp->name = "tcp";
+       tcp->connect = tcpconnect;
+       tcp->announce = tcpannounce;
+       tcp->ctl = tcpctl;
+       tcp->state = tcpstate;
+       tcp->create = tcpcreate;
+       tcp->close = tcpclose;
+       tcp->rcv = tcpiput;
+       tcp->advise = tcpadvise;
+       tcp->stats = tcpstats;
+       tcp->inuse = tcpinuse;
+       tcp->gc = tcpgc;
+       tcp->ipproto = IP_TCPPROTO;
+       tcp->nc = scalednconv();
+       tcp->ptclsize = sizeof(Tcpctl);
+       tpriv->stats[MaxConn] = tcp->nc;
+
+       Fsproto(fs, tcp);
 }
 
 }
 
-/**
- * Check state consistency of the tcp_pcb lists.
- */
-s16_t tcp_pcbs_sane(void)
+void
+tcpsetscale(struct conv *s, Tcpctl * tcb, uint16_t rcvscale, uint16_t sndscale)
 {
 {
-  struct tcp_pcb *pcb;
-  for(pcb = tcp_active_pcbs; pcb != NULL; pcb = pcb->next) {
-    LWIP_ASSERT("tcp_pcbs_sane: active pcb->state != CLOSED", pcb->state != CLOSED);
-    LWIP_ASSERT("tcp_pcbs_sane: active pcb->state != LISTEN", pcb->state != LISTEN);
-    LWIP_ASSERT("tcp_pcbs_sane: active pcb->state != TIME-WAIT", pcb->state != TIME_WAIT);
-  }
-  for(pcb = tcp_tw_pcbs; pcb != NULL; pcb = pcb->next) {
-    LWIP_ASSERT("tcp_pcbs_sane: tw pcb->state == TIME-WAIT", pcb->state == TIME_WAIT);
-  }
-  return 1;
+       if (rcvscale) {
+               tcb->rcv.scale = rcvscale & 0xff;
+               tcb->snd.scale = sndscale & 0xff;
+               tcb->window = QMAX << tcb->snd.scale;
+               qsetlimit(s->rq, tcb->window);
+       } else {
+               tcb->rcv.scale = 0;
+               tcb->snd.scale = 0;
+               tcb->window = QMAX;
+               qsetlimit(s->rq, tcb->window);
+       }
 }
 }
-#endif /* TCP_DEBUG */