RFC 1144 Compressing TCP/IP Headers February 1990
A Sample Implementation
The following is a sample implementation of the protocol described in
this document.
Since many people who might have the deal with this code are familiar
with the Berkeley Unix kernel and its coding style (affectionately known
as kernel normal form), this code was done in that style. It uses the
Berkeley `subroutines' (actually, macros and/or inline assembler
expansions) for converting to/from network byte order and
copying/comparing strings of bytes. These routines are briefly
described in sec. A.5 for anyone not familiar with them.
This code has been run on all the machines listed in the table on page
24. Thus, the author hopes there are no byte order or alignment
problems (although there are embedded assumptions about alignment that
are valid for Berkeley Unix but may not be true for other IP
implementations --- see the comments mentioning alignment in
sl_compress_tcp and sl_decompress_tcp).
There was some attempt to make this code efficient. Unfortunately, that
may have made portions of it incomprehensible. The author apologizes
for any frustration this engenders. (In honesty, my C style is known to
be obscure and claims of `efficiency' are simply a convenient excuse.)
This sample code and a complete Berkeley Unix implementation is
available in machine readable form via anonymous ftp from Internet host
ftp.ee.lbl.gov (128.3.254.68), file cslip.tar.Z. This is a compressed
Unix tar file. It must be ftped in binary mode.
All of the code in this appendix is covered by the following copyright:
/*
* Copyright (c) 1989 Regents of the University of California.
* All rights reserved.
*
* Redistribution and use in source and binary forms are
* permitted provided that the above copyright notice and this
* paragraph are duplicated in all such forms and that any
* documentation, advertising materials, and other materials
* related to such distribution and use acknowledge that the
* software was developed by the University of California,
* Berkeley. The name of the University may not be used to
* endorse or promote products derived from this software
* without specific prior written permission.
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE
* IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A
* PARTICULAR PURPOSE.
*/
Jacobson [Page 27]
RFC 1144 Compressing TCP/IP Headers February 1990
A.1 Definitions and State Data
#define MAX_STATES 16 /* must be >2 and <255 */
#define MAX_HDR 128 /* max TCP+IP hdr length (by protocol def) */
/* packet types */
#define TYPE_IP 0x40
#define TYPE_UNCOMPRESSED_TCP 0x70
#define TYPE_COMPRESSED_TCP 0x80
#define TYPE_ERROR 0x00 /* this is not a type that ever appears on
* the wire. The receive framer uses it to
* tell the decompressor there was a packet
* transmission error. */
/*
* Bits in first octet of compressed packet
*/
/* flag bits for what changed in a packet */
#define NEW_C 0x40
#define NEW_I 0x20
#define TCP_PUSH_BIT 0x10
#define NEW_S 0x08
#define NEW_A 0x04
#define NEW_W 0x02
#define NEW_U 0x01
/* reserved, special-case values of above */
#define SPECIAL_I (NEW_S|NEW_W|NEW_U) /* echoed interactive traffic */
#define SPECIAL_D (NEW_S|NEW_A|NEW_W|NEW_U) /* unidirectional data */
#define SPECIALS_MASK (NEW_S|NEW_A|NEW_W|NEW_U)
/*
* "state" data for each active tcp conversation on the wire. This is
* basically a copy of the entire IP/TCP header from the last packet together
* with a small identifier the transmit & receive ends of the line use to
* locate saved header.
*/
struct cstate {
struct cstate *cs_next; /* next most recently used cstate (xmit only) */
u_short cs_hlen; /* size of hdr (receive only) */
u_char cs_id; /* connection # associated with this state */
u_char cs_filler;
union {
char hdr[MAX_HDR];
struct ip csu_ip; /* ip/tcp hdr from most recent packet */
} slcs_u;
};
#define cs_ip slcs_u.csu_ip
Jacobson [Page 28]
RFC 1144 Compressing TCP/IP Headers February 1990
#define cs_hdr slcs_u.csu_hdr
/*
* all the state data for one serial line (we need one of these per line).
*/
struct slcompress {
struct cstate *last_cs; /* most recently used tstate */
u_char last_recv; /* last rcvd conn. id */
u_char last_xmit; /* last sent conn. id */
u_short flags;
struct cstate tstate[MAX_STATES]; /* xmit connection states */
struct cstate rstate[MAX_STATES]; /* receive connection states */
};
/* flag values */
#define SLF_TOSS 1 /* tossing rcvd frames because of input err */
/*
* The following macros are used to encode and decode numbers. They all
* assume that `cp' points to a buffer where the next byte encoded (decoded)
* is to be stored (retrieved). Since the decode routines do arithmetic,
* they have to convert from and to network byte order.
*/
/*
* ENCODE encodes a number that is known to be non-zero. ENCODEZ checks for
* zero (zero has to be encoded in the long, 3 byte form).
*/
#define ENCODE(n) { \
if ((u_short)(n) >= 256) { \
*cp++ = 0; \
cp[1] = (n); \
cp[0] = (n) >> 8; \
cp += 2; \
} else { \
*cp++ = (n); \
} \
}
#define ENCODEZ(n) { \
if ((u_short)(n) >= 256 || (u_short)(n) == 0) { \
*cp++ = 0; \
cp[1] = (n); \
cp[0] = (n) >> 8; \
cp += 2; \
} else { \
*cp++ = (n); \
} \
}
/*
* DECODEL takes the (compressed) change at byte cp and adds it to the
Jacobson [Page 29]
RFC 1144 Compressing TCP/IP Headers February 1990
* current value of packet field 'f' (which must be a 4-byte (long) integer
* in network byte order). DECODES does the same for a 2-byte (short) field.
* DECODEU takes the change at cp and stuffs it into the (short) field f.
* 'cp' is updated to point to the next field in the compressed header.
*/
#define DECODEL(f) { \
if (*cp == 0) {\
(f) = htonl(ntohl(f) + ((cp[1] << 8) | cp[2])); \
cp += 3; \
} else { \
(f) = htonl(ntohl(f) + (u_long)*cp++); \
} \
}
#define DECODES(f) { \
if (*cp == 0) {\
(f) = htons(ntohs(f) + ((cp[1] << 8) | cp[2])); \
cp += 3; \
} else { \
(f) = htons(ntohs(f) + (u_long)*cp++); \
} \
}
#define DECODEU(f) { \
if (*cp == 0) {\
(f) = htons((cp[1] << 8) | cp[2]); \
cp += 3; \
} else { \
(f) = htons((u_long)*cp++); \
} \
}
Jacobson [Page 30]
RFC 1144 Compressing TCP/IP Headers February 1990
A.2 Compression
This routine looks daunting but isn't really. The code splits into four
approximately equal sized sections: The first quarter manages a
circularly linked, least-recently-used list of `active' TCP
connections./47/ The second figures out the sequence/ack/window/urg
changes and builds the bulk of the compressed packet. The third handles
the special-case encodings. The last quarter does packet ID and
connection ID encoding and replaces the original packet header with the
compressed header.
The arguments to this routine are a pointer to a packet to be
compressed, a pointer to the compression state data for the serial line,
and a flag which enables or disables connection id (C bit) compression.
Compression is done `in-place' so, if a compressed packet is created,
both the start address and length of the incoming packet (the off and
len fields of m) will be updated to reflect the removal of the original
header and its replacement by the compressed header. If either a
compressed or uncompressed packet is created, the compression state is
updated. This routines returns the packet type for the transmit framer
(TYPE_IP, TYPE_UNCOMPRESSED_TCP or TYPE_COMPRESSED_TCP).
Because 16 and 32 bit arithmetic is done on various header fields, the
incoming IP packet must be aligned appropriately (e.g., on a SPARC, the
IP header is aligned on a 32-bit boundary). Substantial changes would
have to be made to the code below if this were not true (and it would
probably be cheaper to byte copy the incoming header to somewhere
correctly aligned than to make those changes).
Note that the outgoing packet will be aligned arbitrarily (e.g., it
could easily start on an odd-byte boundary).
u_char
sl_compress_tcp(m, comp, compress_cid)
struct mbuf *m;
struct slcompress *comp;
int compress_cid;
{
register struct cstate *cs = comp->last_cs->cs_next;
register struct ip *ip = mtod(m, struct ip *);
register u_int hlen = ip->ip_hl;
register struct tcphdr *oth; /* last TCP header */
register struct tcphdr *th; /* current TCP header */
----------------------------
47. The two most common operations on the connection list are a `find'
that terminates at the first entry (a new packet for the most recently
used connection) and moving the last entry on the list to the head of
the list (the first packet from a new connection). A circular list
efficiently handles these two operations.
Jacobson [Page 31]
RFC 1144 Compressing TCP/IP Headers February 1990
register u_int deltaS, deltaA; /* general purpose temporaries */
register u_int changes = 0; /* change mask */
u_char new_seq[16]; /* changes from last to current */
register u_char *cp = new_seq;
/*
* Bail if this is an IP fragment or if the TCP packet isn't
* `compressible' (i.e., ACK isn't set or some other control bit is
* set). (We assume that the caller has already made sure the packet
* is IP proto TCP).
*/
if ((ip->ip_off & htons(0x3fff)) || m->m_len < 40)
return (TYPE_IP);
th = (struct tcphdr *) & ((int *) ip)[hlen];
if ((th->th_flags & (TH_SYN | TH_FIN | TH_RST | TH_ACK)) != TH_ACK)
return (TYPE_IP);
/*
* Packet is compressible -- we're going to send either a
* COMPRESSED_TCP or UNCOMPRESSED_TCP packet. Either way we need to
* locate (or create) the connection state. Special case the most
* recently used connection since it's most likely to be used again &
* we don't have to do any reordering if it's used.
*/
if (ip->ip_src.s_addr != cs->cs_ip.ip_src.s_addr ||
ip->ip_dst.s_addr != cs->cs_ip.ip_dst.s_addr ||
*(int *) th != ((int *) &cs->cs_ip)[cs->cs_ip.ip_hl]) {
/*
* Wasn't the first -- search for it.
*
* States are kept in a circularly linked list with last_cs
* pointing to the end of the list. The list is kept in lru
* order by moving a state to the head of the list whenever
* it is referenced. Since the list is short and,
* empirically, the connection we want is almost always near
* the front, we locate states via linear search. If we
* don't find a state for the datagram, the oldest state is
* (re-)used.
*/
register struct cstate *lcs;
register struct cstate *lastcs = comp->last_cs;
do {
lcs = cs;
cs = cs->cs_next;
if (ip->ip_src.s_addr == cs->cs_ip.ip_src.s_addr
&& ip->ip_dst.s_addr == cs->cs_ip.ip_dst.s_addr
&& *(int *) th == ((int *) &cs->cs_ip)[cs->cs_ip.ip_hl])
goto found;
Jacobson [Page 32]
RFC 1144 Compressing TCP/IP Headers February 1990
} while (cs != lastcs);
/*
* Didn't find it -- re-use oldest cstate. Send an
* uncompressed packet that tells the other side what
* connection number we're using for this conversation. Note
* that since the state list is circular, the oldest state
* points to the newest and we only need to set last_cs to
* update the lru linkage.
*/
comp->last_cs = lcs;
hlen += th->th_off;
hlen <<= 2;
goto uncompressed;
found:
/* Found it -- move to the front on the connection list. */
if (lastcs == cs)
comp->last_cs = lcs;
else {
lcs->cs_next = cs->cs_next;
cs->cs_next = lastcs->cs_next;
lastcs->cs_next = cs;
}
}
/*
* Make sure that only what we expect to change changed. The first
* line of the `if' checks the IP protocol version, header length &
* type of service. The 2nd line checks the "Don't fragment" bit.
* The 3rd line checks the time-to-live and protocol (the protocol
* check is unnecessary but costless). The 4th line checks the TCP
* header length. The 5th line checks IP options, if any. The 6th
* line checks TCP options, if any. If any of these things are
* different between the previous & current datagram, we send the
* current datagram `uncompressed'.
*/
oth = (struct tcphdr *) & ((int *) &cs->cs_ip)[hlen];
deltaS = hlen;
hlen += th->th_off;
hlen <<= 2;
if (((u_short *) ip)[0] != ((u_short *) &cs->cs_ip)[0] ||
((u_short *) ip)[3] != ((u_short *) &cs->cs_ip)[3] ||
((u_short *) ip)[4] != ((u_short *) &cs->cs_ip)[4] ||
th->th_off != oth->th_off ||
(deltaS > 5 && BCMP(ip + 1, &cs->cs_ip + 1, (deltaS - 5) << 2)) ||
(th->th_off > 5 && BCMP(th + 1, oth + 1, (th->th_off - 5) << 2)))
goto uncompressed;
/*
* Figure out which of the changing fields changed. The receiver
Jacobson [Page 33]
RFC 1144 Compressing TCP/IP Headers February 1990
* expects changes in the order: urgent, window, ack, seq.
*/
if (th->th_flags & TH_URG) {
deltaS = ntohs(th->th_urp);
ENCODEZ(deltaS);
changes |= NEW_U;
} else if (th->th_urp != oth->th_urp)
/*
* argh! URG not set but urp changed -- a sensible
* implementation should never do this but RFC793 doesn't
* prohibit the change so we have to deal with it.
*/
goto uncompressed;
if (deltaS = (u_short) (ntohs(th->th_win) - ntohs(oth->th_win))) {
ENCODE(deltaS);
changes |= NEW_W;
}
if (deltaA = ntohl(th->th_ack) - ntohl(oth->th_ack)) {
if (deltaA > 0xffff)
goto uncompressed;
ENCODE(deltaA);
changes |= NEW_A;
}
if (deltaS = ntohl(th->th_seq) - ntohl(oth->th_seq)) {
if (deltaS > 0xffff)
goto uncompressed;
ENCODE(deltaS);
changes |= NEW_S;
}
/*
* Look for the special-case encodings.
*/
switch (changes) {
case 0:
/*
* Nothing changed. If this packet contains data and the last
* one didn't, this is probably a data packet following an
* ack (normal on an interactive connection) and we send it
* compressed. Otherwise it's probably a retransmit,
* retransmitted ack or window probe. Send it uncompressed
* in case the other side missed the compressed version.
*/
if (ip->ip_len != cs->cs_ip.ip_len &&
ntohs(cs->cs_ip.ip_len) == hlen)
break;
/* (fall through) */
case SPECIAL_I:
Jacobson [Page 34]
RFC 1144 Compressing TCP/IP Headers February 1990
case SPECIAL_D:
/*
* Actual changes match one of our special case encodings --
* send packet uncompressed.
*/
goto uncompressed;
case NEW_S | NEW_A:
if (deltaS == deltaA &&
deltaS == ntohs(cs->cs_ip.ip_len) - hlen) {
/* special case for echoed terminal traffic */
changes = SPECIAL_I;
cp = new_seq;
}
break;
case NEW_S:
if (deltaS == ntohs(cs->cs_ip.ip_len) - hlen) {
/* special case for data xfer */
changes = SPECIAL_D;
cp = new_seq;
}
break;
}
deltaS = ntohs(ip->ip_id) - ntohs(cs->cs_ip.ip_id);
if (deltaS != 1) {
ENCODEZ(deltaS);
changes |= NEW_I;
}
if (th->th_flags & TH_PUSH)
changes |= TCP_PUSH_BIT;
/*
* Grab the cksum before we overwrite it below. Then update our
* state with this packet's header.
*/
deltaA = ntohs(th->th_sum);
BCOPY(ip, &cs->cs_ip, hlen);
/*
* We want to use the original packet as our compressed packet. (cp -
* new_seq) is the number of bytes we need for compressed sequence
* numbers. In addition we need one byte for the change mask, one
* for the connection id and two for the tcp checksum. So, (cp -
* new_seq) + 4 bytes of header are needed. hlen is how many bytes
* of the original packet to toss so subtract the two to get the new
* packet size.
*/
deltaS = cp - new_seq;
cp = (u_char *) ip;
if (compress_cid == 0 || comp->last_xmit != cs->cs_id) {
comp->last_xmit = cs->cs_id;
Jacobson [Page 35]
RFC 1144 Compressing TCP/IP Headers February 1990
hlen -= deltaS + 4;
cp += hlen;
*cp++ = changes | NEW_C;
*cp++ = cs->cs_id;
} else {
hlen -= deltaS + 3;
cp += hlen;
*cp++ = changes;
}
m->m_len -= hlen;
m->m_off += hlen;
*cp++ = deltaA >> 8;
*cp++ = deltaA;
BCOPY(new_seq, cp, deltaS);
return (TYPE_COMPRESSED_TCP);
uncompressed:
/*
* Update connection state cs & send uncompressed packet
* ('uncompressed' means a regular ip/tcp packet but with the
* 'conversation id' we hope to use on future compressed packets in
* the protocol field).
*/
BCOPY(ip, &cs->cs_ip, hlen);
ip->ip_p = cs->cs_id;
comp->last_xmit = cs->cs_id;
return (TYPE_UNCOMPRESSED_TCP);
}
Jacobson [Page 36]
RFC 1144 Compressing TCP/IP Headers February 1990
A.3 Decompression
This routine decompresses a received packet. It is called with a
pointer to the packet, the packet length and type, and a pointer to the
compression state structure for the incoming serial line. It returns a
pointer to the resulting packet or zero if there were errors in the
incoming packet. If the packet is COMPRESSED_TCP or UNCOMPRESSED_TCP,
the compression state will be updated.
The new packet will be constructed in-place. That means that there must
be 128 bytes of free space in front of bufp to allow room for the
reconstructed IP and TCP headers. The reconstructed packet will be
aligned on a 32-bit boundary.
u_char *
sl_uncompress_tcp(bufp, len, type, comp)
u_char *bufp;
int len;
u_int type;
struct slcompress *comp;
{
register u_char *cp;
register u_int hlen, changes;
register struct tcphdr *th;
register struct cstate *cs;
register struct ip *ip;
switch (type) {
case TYPE_ERROR:
default:
goto bad;
case TYPE_IP:
return (bufp);
case TYPE_UNCOMPRESSED_TCP:
/*
* Locate the saved state for this connection. If the state
* index is legal, clear the 'discard' flag.
*/
ip = (struct ip *) bufp;
if (ip->ip_p >= MAX_STATES)
goto bad;
cs = &comp->rstate[comp->last_recv = ip->ip_p];
comp->flags &= ~SLF_TOSS;
/*
* Restore the IP protocol field then save a copy of this
* packet header. (The checksum is zeroed in the copy so we
* don't have to zero it each time we process a compressed
Jacobson [Page 37]