RFC 1144 Compressing TCP/IP Headers February 1990
* packet.
*/
ip->ip_p = IPPROTO_TCP;
hlen = ip->ip_hl;
hlen += ((struct tcphdr *) & ((int *) ip)[hlen])->th_off;
hlen <<= 2;
BCOPY(ip, &cs->cs_ip, hlen);
cs->cs_ip.ip_sum = 0;
cs->cs_hlen = hlen;
return (bufp);
case TYPE_COMPRESSED_TCP:
break;
}
/* We've got a compressed packet. */
cp = bufp;
changes = *cp++;
if (changes & NEW_C) {
/*
* Make sure the state index is in range, then grab the
* state. If we have a good state index, clear the 'discard'
* flag.
*/
if (*cp >= MAX_STATES)
goto bad;
comp->flags &= ~SLF_TOSS;
comp->last_recv = *cp++;
} else {
/*
* This packet has an implicit state index. If we've had a
* line error since the last time we got an explicit state
* index, we have to toss the packet.
*/
if (comp->flags & SLF_TOSS)
return ((u_char *) 0);
}
/*
* Find the state then fill in the TCP checksum and PUSH bit.
*/
cs = &comp->rstate[comp->last_recv];
hlen = cs->cs_ip.ip_hl << 2;
th = (struct tcphdr *) & ((u_char *) &cs->cs_ip)[hlen];
th->th_sum = htons((*cp << 8) | cp[1]);
cp += 2;
if (changes & TCP_PUSH_BIT)
th->th_flags |= TH_PUSH;
else
th->th_flags &= ~TH_PUSH;
/*
Jacobson [Page 38]
RFC 1144 Compressing TCP/IP Headers February 1990
* Fix up the state's ack, seq, urg and win fields based on the
* changemask.
*/
switch (changes & SPECIALS_MASK) {
case SPECIAL_I:
{
register u_int i = ntohs(cs->cs_ip.ip_len) - cs->cs_hlen;
th->th_ack = htonl(ntohl(th->th_ack) + i);
th->th_seq = htonl(ntohl(th->th_seq) + i);
}
break;
case SPECIAL_D:
th->th_seq = htonl(ntohl(th->th_seq) + ntohs(cs->cs_ip.ip_len)
- cs->cs_hlen);
break;
default:
if (changes & NEW_U) {
th->th_flags |= TH_URG;
DECODEU(th->th_urp)
} else
th->th_flags &= ~TH_URG;
if (changes & NEW_W)
DECODES(th->th_win)
if (changes & NEW_A)
DECODEL(th->th_ack)
if (changes & NEW_S)
DECODEL(th->th_seq)
break;
}
/* Update the IP ID */
if (changes & NEW_I)
DECODES(cs->cs_ip.ip_id)
else
cs->cs_ip.ip_id = htons(ntohs(cs->cs_ip.ip_id) + 1);
/*
* At this point, cp points to the first byte of data in the packet.
* If we're not aligned on a 4-byte boundary, copy the data down so
* the IP & TCP headers will be aligned. Then back up cp by the
* TCP/IP header length to make room for the reconstructed header (we
* assume the packet we were handed has enough space to prepend 128
* bytes of header). Adjust the lenth to account for the new header
* & fill in the IP total length.
*/
len -= (cp - bufp);
if (len < 0)
/*
* we must have dropped some characters (crc should detect
* this but the old slip framing won't)
Jacobson [Page 39]
RFC 1144 Compressing TCP/IP Headers February 1990
*/
goto bad;
if ((int) cp & 3) {
if (len > 0)
OVBCOPY(cp, (int) cp & ~3, len);
cp = (u_char *) ((int) cp & ~3);
}
cp -= cs->cs_hlen;
len += cs->cs_hlen;
cs->cs_ip.ip_len = htons(len);
BCOPY(&cs->cs_ip, cp, cs->cs_hlen);
/* recompute the ip header checksum */
{
register u_short *bp = (u_short *) cp;
for (changes = 0; hlen > 0; hlen -= 2)
changes += *bp++;
changes = (changes & 0xffff) + (changes >> 16);
changes = (changes & 0xffff) + (changes >> 16);
((struct ip *) cp)->ip_sum = ~changes;
}
return (cp);
bad:
comp->flags |= SLF_TOSS;
return ((u_char *) 0);
}
Jacobson [Page 40]
RFC 1144 Compressing TCP/IP Headers February 1990
A.4 Initialization
This routine initializes the state structure for both the transmit and
receive halves of some serial line. It must be called each time the
line is brought up.
void
sl_compress_init(comp)
struct slcompress *comp;
{
register u_int i;
register struct cstate *tstate = comp->tstate;
/*
* Clean out any junk left from the last time line was used.
*/
bzero((char *) comp, sizeof(*comp));
/*
* Link the transmit states into a circular list.
*/
for (i = MAX_STATES - 1; i > 0; --i) {
tstate[i].cs_id = i;
tstate[i].cs_next = &tstate[i - 1];
}
tstate[0].cs_next = &tstate[MAX_STATES - 1];
tstate[0].cs_id = 0;
comp->last_cs = &tstate[0];
/*
* Make sure we don't accidentally do CID compression
* (assumes MAX_STATES < 255).
*/
comp->last_recv = 255;
comp->last_xmit = 255;
}
A.5 Berkeley Unix dependencies
Note: The following is of interest only if you are trying to bring the
sample code up on a system that is not derived from 4BSD (Berkeley
Unix).
The code uses the normal Berkeley Unix header files (from
/usr/include/netinet) for definitions of the structure of IP and TCP
headers. The structure tags tend to follow the protocol RFCs closely
and should be obvious even if you do not have access to a 4BSD
system./48/
----------------------------
48. In the event they are not obvious, the header files (and all the
Berkeley networking code) can be anonymous ftp'd from host
Jacobson [Page 41]
RFC 1144 Compressing TCP/IP Headers February 1990
The macro BCOPY(src, dst, amt) is invoked to copy amt bytes from src to
dst. In BSD, it translates into a call to bcopy. If you have the
misfortune to be running System-V Unix, it can be translated into a call
to memcpy. The macro OVBCOPY(src, dst, amt) is used to copy when src
and dst overlap (i.e., when doing the 4-byte alignment copy). In the
BSD kernel, it translates into a call to ovbcopy. Since AT&T botched
the definition of memcpy, this should probably translate into a copy
loop under System-V.
The macro BCMP(src, dst, amt) is invoked to compare amt bytes of src and
dst for equality. In BSD, it translates into a call to bcmp. In
System-V, it can be translated into a call to memcmp or you can write a
routine to do the compare. The routine should return zero if all bytes
of src and dst are equal and non-zero otherwise.
The routine ntohl(dat) converts (4 byte) long dat from network byte
order to host byte order. On a reasonable cpu this can be the no-op
macro:
#define ntohl(dat) (dat)
On a Vax or IBM PC (or anything with Intel byte order), you will have to
define a macro or routine to rearrange bytes.
The routine ntohs(dat) is like ntohl but converts (2 byte) shorts
instead of longs. The routines htonl(dat) and htons(dat) do the inverse
transform (host to network byte order) for longs and shorts.
A struct mbuf is used in the call to sl_compress_tcp because that
routine needs to modify both the start address and length if the
incoming packet is compressed. In BSD, an mbuf is the kernel's buffer
management structure. If other systems, the following definition should
be sufficient:
struct mbuf {
u_char *m_off; /* pointer to start of data */
int m_len; /* length of data */
};
#define mtod(m, t) ((t)(m->m_off))
----------------------------
ucbarpa.berkeley.edu, files pub/4.3/tcp.tar and pub/4.3/inet.tar.
Jacobson [Page 42]
RFC 1144 Compressing TCP/IP Headers February 1990
B Compatibility with past mistakes
When combined with the modern PPP serial line protocol[9], the use of
header compression is automatic and invisible to the user.
Unfortunately, many sites have existing users of the SLIP described in
[12] which doesn't allow for different protocol types to distinguish
header compressed packets from IP packets or for version numbers or an
option exchange that could be used to automatically negotiate header
compression.
The author has used the following tricks to allow header compressed SLIP
to interoperate with the existing servers and clients. Note that these
are hacks for compatibility with past mistakes and should be offensive
to any right thinking person. They are offered solely to ease the pain
of running SLIP while users wait patiently for vendors to release PPP.
B.1 Living without a framing `type' byte
The bizarre packet type numbers in sec. A.1 were chosen to allow a
`packet type' to be sent on lines where it is undesirable or impossible
to add an explicit type byte. Note that the first byte of an IP packet
always contains `4' (the IP protocol version) in the top four bits. And
that the most significant bit of the first byte of the compressed header
is ignored. Using the packet types in sec. A.1, the type can be encoded
in the most significant bits of the outgoing packet using the code
p->dat[0] |= sl_compress_tcp(p, comp);
and decoded on the receive side by
if (p->dat[0] & 0x80)
type = TYPE_COMPRESSED_TCP;
else if (p->dat[0] >= 0x70) {
type = TYPE_UNCOMPRESSED_TCP;
p->dat[0] &=~ 0x30;
} else
type = TYPE_IP;
status = sl_uncompress_tcp(p, type, comp);
B.2 Backwards compatible SLIP servers
The SLIP described in [12] doesn't include any mechanism that could be
used to automatically negotiate header compression. It would be nice to
Jacobson [Page 43]
RFC 1144 Compressing TCP/IP Headers February 1990
allow users of this SLIP to use header compression but, when users of
the two SLIP varients share a common server, it would be annoying and
difficult to manually configure both ends of each connection to enable
compression. The following procedure can be used to avoid manual
configuration.
Since there are two types of dial-in clients (those that implement
compression and those that don't) but one server for both types, it's
clear that the server will be reconfiguring for each new client session
but clients change configuration seldom if ever. If manual
configuration has to be done, it should be done on the side that changes
infrequently --- the client. This suggests that the server should
somehow learn from the client whether to use header compression.
Assuming symmetry (i.e., if compression is used at all it should be used
both directions) the server can use the receipt of a compressed packet
from some client to indicate that it can send compressed packets to that
client. This leads to the following algorithm:
There are two bits per line to control header compression: allowed and
on. If on is set, compressed packets are sent, otherwise not. If
allowed is set, compressed packets can be received and, if an
UNCOMPRESSED_TCP packet arrives when on is clear, on will be set./49/
If a compressed packet arrives when allowed is clear, it will be
ignored.
Clients are configured with both bits set (allowed is always set if on
is set) and the server starts each session with allowed set and on
clear. The first compressed packet from the client (which must be a
UNCOMPRESSED_TCP packet) turns on compression for the server.
----------------------------
49. Since [12] framing doesn't include error detection, one should be
careful not to `false trigger' compression on the server. The
UNCOMPRESSED_TCP packet should checked for consistency (e.g., IP
checksum correctness) before compression is enabled. Arrival of
COMPRESSED_TCP packets should not be used to enable compression.
Jacobson [Page 44]
RFC 1144 Compressing TCP/IP Headers February 1990
C More aggressive compression
As noted in sec. 3.2.2, easily detected patterns exist in the stream of
compressed headers, indicating that more compression could be done.
Would this be worthwhile?
The average compressed datagram has only seven bits of header./50/ The
framing must be at least one bit (to encode the `type') and will
probably be more like two to three bytes. In most interesting cases
there will be at least one byte of data. Finally, the end-to-end
check---the TCP checksum---must be passed through unmodified./51/
The framing, data and checksum will remain even if the header is
completely compressed out so the change in average packet size is, at
best, four bytes down to three bytes and one bit --- roughly a 25%
improvement in delay./52/ While this may seem significant, on a 2400
bps line it means that typing echo response takes 25 rather than 29 ms.
At the present stage of human evolution, this difference is not
detectable.
However, the author sheepishly admits to perverting this compression
scheme for a very special case data-acquisition problem: We had an
instrument and control package floating at 200KV, communicating with
ground level via a telemetry system. For many reasons (multiplexed
communication, pipelining, error recovery, availability of well tested
implementations, etc.), it was convenient to talk to the package using
TCP/IP. However, since the primary use of the telemetry link was data
acquisition, it was designed with an uplink channel capacity <0.5% the
downlink's. To meet application delay budgets, data packets were 100
bytes and, since TCP acks every other packet, the relative uplink
bandwidth for acks is a/200 where `a' is the total size of ack packets.
Using the scheme in this paper, the smallest ack is four bytes which
would imply an uplink bandwidth 2% of the downlink. This wasn't
----------------------------
50. Tests run with several million packets from a mixed traffic load
(i.e., statistics kept on a year's traffic from my home to work) show
that 80% of packets use one of the two special encodings and, thus, the
only header is the change mask.
51. If someone tries to sell you a scheme that compresses the TCP
checksum `Just say no'. Some poor fool has yet to have the sad
experience that reveals the end-to-end argument is gospel truth. Worse,
since the fool is subverting your end-to-end error check, you may pay
the price for this education and they will be none the wiser. What does
it profit a man to gain two byte times of delay and lose peace of mind?
52. Note again that we must be concerned about interactive delay to be
making this argument: Bulk data transfer performance will be dominated
by the time to send the data and the difference between three and four
byte headers on a datagram containing tens or hundreds of data bytes is,
practically, no difference.
Jacobson [Page 45]
RFC 1144 Compressing TCP/IP Headers February 1990
possible so we used the scheme described in footnote 15: If the first
bit of the frame was one, it meant `same compressed header as last
time'. Otherwise the next two bits gave one of the types described in
sec. 3.2. Since the link had excellent forward error correction and
traffic made only a single hop, the TCP checksum was compressed out
(blush!) of the `same header' packet types/53/ so the total header size
for these packets was one bit. Over several months of operation, more
than 99% of the 40 byte TCP/IP headers were compressed down to one
bit./54/
D Security Considerations
Security considerations are not addressed in this memo.
E Author's address
Address: Van Jacobson
Real Time Systems Group
Mail Stop 46A
Lawrence Berkeley Laboratory
Berkeley, CA 94720
Phone: Use email (author ignores his phone)
EMail: van@helios.ee.lbl.gov
----------------------------
53. The checksum was re-generated in the decompressor and, of course,
the `toss' logic was made considerably more aggressive to prevent error
propagation.
54. We have heard the suggestion that `real-time' needs require
abandoning TCP/IP in favor of a `light-weight' protocol with smaller
headers. It is difficult to envision a protocol that averages less than
one header bit per packet.
Jacobson [Page 46]