Huge thanks to our Platinum Members Endace and LiveAction,
and our Silver Member Veeam, for supporting the Wireshark Foundation and project.

Ethereal-dev: [Ethereal-dev] HTTP chunked encoding patch

Note: This archive is from the project's previous web site, ethereal.com. This list is no longer active.

From: Jerry Talkington <jtalkington@xxxxxxxxxxxxxxxxxxxxx>
Date: Sun, 25 Apr 2004 21:39:51 -0700
Howdy List!

Here's a patch to add support for decoding chunked transfer-encoded HTTP
entities.  I also updated my email address.

I had originally started by using composite tvbuffs, but backed out when
I found out that they're not really ready for prime time.  I left the
code in, commented out, but I can resubmit with out it (or with any
other changes.)

-- 
GPG public key:
http://pgp.mit.edu:11371/pks/lookup?op=get&search=0x9D5B8762
Index: AUTHORS
===================================================================
RCS file: /cvsroot/ethereal/AUTHORS,v
retrieving revision 1.999
diff -u -r1.999 AUTHORS
--- AUTHORS	22 Apr 2004 08:22:07 -0000	1.999
+++ AUTHORS	26 Apr 2004 04:01:20 -0000
@@ -285,7 +285,7 @@
 	updates to BGP (Border Gateway Protocol) support
 }
 
-Jerry Talkington <jerryt[AT]netapp.com> {
+Jerry Talkington <jtalkington[AT]users.sourceforge.net> {
 	updates to HTTP support
 	Filter selection/editing GUI improvements
 	WCCP 1.0 support
Index: packet-http.c
===================================================================
RCS file: /cvsroot/ethereal/packet-http.c,v
retrieving revision 1.96
diff -u -r1.96 packet-http.c
--- packet-http.c	12 Apr 2004 22:14:37 -0000	1.96
+++ packet-http.c	26 Apr 2004 04:01:21 -0000
@@ -73,6 +73,9 @@
 static gint ett_http = -1;
 static gint ett_http_ntlmssp = -1;
 static gint ett_http_request = -1;
+static gint ett_http_chunked_response = -1;
+static gint ett_http_chunk_data = -1;
+static gint ett_http_encoded_entity = -1;
 
 static dissector_handle_t data_handle;
 static dissector_handle_t http_handle;
@@ -125,6 +128,8 @@
 
 static int is_http_request_or_reply(const gchar *data, int linelen, http_type_t *type,
 		RequestDissector *req_dissector, int *req_strlen);
+static int chunked_encoding_dissector(tvbuff_t **tvb_ptr, packet_info *pinfo,
+		proto_tree *tree, int offset);
 static void process_header(tvbuff_t *tvb, int offset, int next_offset,
     const guchar *line, int linelen, int colon_offset, packet_info *pinfo,
     proto_tree *tree, headers_t *eh_ptr);
@@ -589,6 +594,7 @@
 		 */
 		tvbuff_t *next_tvb;
 		void *save_private_data = NULL;
+		gint chunks_decoded = 0;
 
 		/*
 		 * Create a tvbuff for the payload.
@@ -608,6 +614,29 @@
 		    reported_datalen);
 
 		/*
+		 * Handle transfer encodings other than "identity".
+		 */
+		if (headers.transfer_encoding != NULL &&
+		    strcasecmp(headers.transfer_encoding, "identity") != 0) {
+			if (strcasecmp(headers.transfer_encoding, "chunked")
+			    == 0) {
+
+				chunks_decoded = chunked_encoding_dissector(
+				    &next_tvb, pinfo, tree, 0);
+
+			} else {
+				/*
+				 * We currently can't handle, for example, "gzip",
+				 * "compress", or "deflate"; just handle them
+				 * as data for now.
+				 */
+				call_dissector(data_handle, next_tvb, pinfo,
+				    http_tree);
+				goto body_dissected;
+			}
+		}
+
+		/*
 		 * Handle content encodings other than "identity" (which
 		 * shouldn't appear in a Content-Encoding header, but
 		 * we handle it in any case).
@@ -619,23 +648,28 @@
 			 * "compress", or "deflate"; just handle them as
 			 * data for now.
 			 */
-			call_dissector(data_handle, next_tvb, pinfo,
-			    http_tree);
-			goto body_dissected;
-		}
+			if (chunks_decoded != 0) {
+				/*
+				 * There is a chunked response tree, so put
+				 * the entity body below it.
+				 */
+				proto_item *e_ti = NULL;
+				proto_tree *e_tree = NULL;
 
-		/*
-		 * Handle transfer encodings other than "identity".
-		 */
-		if (headers.transfer_encoding != NULL &&
-		    strcasecmp(headers.transfer_encoding, "identity") != 0) {
-			/*
-			 * We currently can't handle, for example, "chunked",
-			 * "gzip", "compress", or "deflate"; just handle them
-			 * as data for now.
-			 */
-			call_dissector(data_handle, next_tvb, pinfo,
-			    http_tree);
+				e_ti = proto_tree_add_text(tree, next_tvb,
+				    0, tvb_length(next_tvb),
+				    "Encoded entity-body (%s)",
+				    headers.content_encoding);
+
+				e_tree = proto_item_add_subtree(e_ti,
+				    ett_http_encoded_entity);
+
+				call_dissector(data_handle, next_tvb, pinfo,
+				    e_tree);
+			} else {
+				call_dissector(data_handle, next_tvb, pinfo,
+				    http_tree);
+			}
 			goto body_dissected;
 		}
 
@@ -751,6 +785,182 @@
 }
 
 /*
+ * Dissect the http data chunks and add them to the tree.
+ */
+static int
+chunked_encoding_dissector(tvbuff_t **tvb_ptr, packet_info *pinfo,
+    proto_tree *tree, int offset)
+{
+	guint8 *chunk_string = NULL;
+	gint chunk_size = 0;
+	gint chunk_offset = 0;
+	gint datalen = 0;
+	gint linelen = 0;
+	gint chunks_decoded = 0;
+	tvbuff_t *tvb = NULL;
+	tvbuff_t *new_tvb = NULL;
+	gint chunked_data_size = 0;
+	proto_tree *subtree = NULL;
+	proto_item *ti = NULL;
+	
+	if (tvb_ptr == NULL || *tvb_ptr == NULL) {
+		return 0;
+	}
+
+	tvb = *tvb_ptr;
+
+	datalen = tvb_reported_length_remaining(tvb, offset);
+
+	if (tree) {
+		ti = proto_tree_add_text(tree, tvb, offset, datalen,
+		    "HTTP chunked response");
+		subtree = proto_item_add_subtree(ti, ett_http_chunked_response);
+	}
+
+
+	while (datalen != 0) {
+		proto_item *chunk_ti = NULL;
+		proto_tree *chunk_subtree = NULL;
+		tvbuff_t *data_tvb = NULL;
+		gchar *c = NULL;
+
+		linelen = tvb_find_line_end(tvb, offset, -1, &chunk_offset, TRUE);
+
+		if (linelen <= 0) {
+			/* Can't get the chunk size line */
+			return 0;
+		}
+
+		chunk_string = tvb_get_string(tvb, offset, linelen);
+
+		if (chunk_string == NULL) {
+			/* Can't get the chunk size line */
+			return 0;
+		}
+		
+		c = chunk_string;
+
+		/*
+		 * We don't care about the extensions.
+		 */
+		if ((c = strchr(c, ';'))) {
+			*c = '\0';
+		}
+
+		if (sscanf(chunk_string, "%x", &chunk_size) != 1) {
+			g_free(chunk_string);
+			return 0;
+		}
+
+		g_free(chunk_string);
+
+
+		if (chunk_size > datalen) {
+			/*
+			 * The chunk size is more than what's in the tvbuff,
+			 * so either the user hasn't enabled decoding, or all
+			 * of the segments weren't captured.
+			 */
+			chunk_size = datalen;
+		}/* else if (new_tvb == NULL) {
+			new_tvb = tvb_new_composite();
+		}
+
+
+
+		if (new_tvb != NULL && chunk_size != 0) {
+			tvbuff_t *chunk_tvb = NULL;
+			
+			chunk_tvb = tvb_new_subset(tvb, chunk_offset,
+			    chunk_size, datalen);
+
+			tvb_composite_append(new_tvb, chunk_tvb);
+
+		}
+		*/
+		
+		chunked_data_size += chunk_size;
+
+		if (chunk_size != 0) {
+			guint8 *raw_data = g_malloc(chunked_data_size);
+			gint raw_len = 0;
+
+			if (new_tvb != NULL) {
+				raw_len = tvb_length_remaining(new_tvb, 0);
+				tvb_memcpy(new_tvb, raw_data, 0, raw_len);
+
+				tvb_free(new_tvb);
+			}
+
+			tvb_memcpy(tvb, (guint8 *)(raw_data + raw_len),
+			    chunk_offset, chunk_size);
+
+			new_tvb = tvb_new_real_data(raw_data,
+			    chunked_data_size, chunked_data_size);
+
+		}
+
+
+
+		if (subtree) {
+			if (chunk_size == 0) {
+				chunk_ti = proto_tree_add_text(subtree, tvb,
+				    offset,
+				    chunk_offset - offset + chunk_size + 2,
+				    "Data chunk (last chunk)");
+			} else {
+				chunk_ti = proto_tree_add_text(subtree, tvb,
+				    offset,
+				    chunk_offset - offset + chunk_size + 2,
+				    "Data chunk (%u octets)", chunk_size);
+			}
+
+			chunk_subtree = proto_item_add_subtree(chunk_ti,
+			    ett_http_chunk_data);
+
+			proto_tree_add_text(chunk_subtree, tvb, offset,
+			    chunk_offset - offset, "Chunk size: %u octets",
+			    chunk_size);
+
+			data_tvb = tvb_new_subset(tvb, chunk_offset, chunk_size,
+			    datalen);
+
+		
+			if (chunk_size > 0) { 
+				call_dissector(data_handle, data_tvb, pinfo,
+				    chunk_subtree);
+			}
+
+			proto_tree_add_text(chunk_subtree, tvb, chunk_offset +
+			    chunk_size, 2, "Chunk boundry");
+		}
+
+		chunks_decoded++;
+		offset = chunk_offset + chunk_size + 2;
+		datalen = tvb_reported_length_remaining(tvb, offset);
+	}
+
+	if (new_tvb != NULL) {
+
+		/*
+		tvb_composite_finalize(new_tvb);
+		//tvb_set_reported_length(new_tvb, chunked_data_size);
+		*/
+
+		tvb_set_child_real_data_tvbuff(tvb, new_tvb);
+		add_new_data_source(pinfo, new_tvb, "De-chunked entity body");
+
+		tvb_free(*tvb_ptr);
+		*tvb_ptr = new_tvb;
+		
+	}
+
+	return chunks_decoded;
+
+}
+
+
+/*
  * XXX - this won't handle HTTP 0.9 replies, but they're all data
  * anyway.
  */
@@ -1271,6 +1481,9 @@
 		&ett_http,
 		&ett_http_ntlmssp,
 		&ett_http_request,
+		&ett_http_chunked_response,
+		&ett_http_chunk_data,
+		&ett_http_encoded_entity,
 	};
 	module_t *http_module;
 
Index: packet-wccp.c
===================================================================
RCS file: /cvsroot/ethereal/packet-wccp.c,v
retrieving revision 1.33
diff -u -r1.33 packet-wccp.c
--- packet-wccp.c	28 Aug 2002 21:00:37 -0000	1.33
+++ packet-wccp.c	26 Apr 2004 04:01:22 -0000
@@ -1,6 +1,6 @@
 /* packet-wccp.c
  * Routines for Web Cache Coordination Protocol dissection
- * Jerry Talkington <jerryt@xxxxxxxxxx>
+ * Jerry Talkington <jtalkington@xxxxxxxxxxxxxxxxxxxxx>
  *
  * $Id: packet-wccp.c,v 1.33 2002/08/28 21:00:37 jmayer Exp $
  *
Index: req_resp_hdrs.c
===================================================================
RCS file: /cvsroot/ethereal/req_resp_hdrs.c,v
retrieving revision 1.3
diff -u -r1.3 req_resp_hdrs.c
--- req_resp_hdrs.c	29 Dec 2003 22:33:18 -0000	1.3
+++ req_resp_hdrs.c	26 Apr 2004 04:01:22 -0000
@@ -30,6 +30,7 @@
 #include <glib.h>
 #include <epan/packet.h>
 #include <epan/strutil.h>
+#include <string.h>
 
 #include "req_resp_hdrs.h"
 
@@ -47,6 +48,7 @@
 	int		linelen;
 	long int	content_length;
 	gboolean	content_length_found = FALSE;
+	gboolean	chunked_encoding = FALSE;
 
 	/*
 	 * Do header desegmentation if we've been told to.
@@ -131,8 +133,8 @@
 			}
 
 			/*
-			 * Is this a Content-Length header?
-			 * If not, it either means that we are in
+			 * Is this a Content-Length or Transfer-Encoding
+			 * header?  If not, it either means that we are in
 			 * a different header line, or that we are
 			 * at the end of the headers, or that there
 			 * isn't enough data; the two latter cases
@@ -151,6 +153,44 @@
 					    "%li", &content_length)
 					    == 1)
 						content_length_found = TRUE;
+				} else if (tvb_strncaseeql(tvb,
+					    next_offset_sav,
+					    "Transfer-Encoding:", 18) == 0) {
+					gchar *chunk_type = tvb_get_string(tvb,
+					    next_offset_sav + 18, linelen - 18);
+					/*
+					 * Find out if this Transfer-Encoding is
+					 * chunked.  It should be, since there
+					 * really aren't any other types, but
+					 * RFC 2616 allows for them.
+					 */
+
+					if (chunk_type != NULL) {
+						gchar *c = chunk_type;
+						gint len = strlen(chunk_type);
+
+						
+						/* start after any white-space */
+						while (c != NULL && c <
+							    chunk_type + len &&
+							    (*c == ' ' ||
+							     *c == 0x09)) {
+							c++;
+						}
+
+						if (c <= chunk_type + len ) {
+							if (strncasecmp(c, "chunked", 7)
+							    == 0) {
+								/*
+								 * Don't bother looking for extensions;
+								 * since we don't understand them,
+								 * they should be ignored.
+								 */
+								chunked_encoding = TRUE;
+							}
+						}
+						g_free(chunk_type);
+					}
 				}
 			}
 		}
@@ -158,30 +198,139 @@
 
 	/*
 	 * The above loop ends when we reached the end of the headers, so
-	 * there should be content_length byte after the 4 terminating bytes
+	 * there should be content_length bytes after the 4 terminating bytes
 	 * and next_offset points to after the end of the headers.
 	 */
-	if (desegment_body && content_length_found) {
-		/* next_offset has been set because content-length was found */
-		if (!tvb_bytes_exist(tvb, next_offset, content_length)) {
-			length_remaining = tvb_length_remaining(tvb,
-			    next_offset);
-			reported_length_remaining =
-			    tvb_reported_length_remaining(tvb, next_offset);
-			if (length_remaining < reported_length_remaining) {
+	if (desegment_body) {
+		if (content_length_found) {
+			/* next_offset has been set to the end of the headers */
+			if (!tvb_bytes_exist(tvb, next_offset, content_length)) {
+				length_remaining = tvb_length_remaining(tvb,
+				    next_offset);
+				reported_length_remaining =
+				    tvb_reported_length_remaining(tvb, next_offset);
+				if (length_remaining < reported_length_remaining) {
+					/*
+					 * It's a waste of time asking for more
+					 * data, because that data wasn't captured.
+					 */
+					return TRUE;
+				}
+				if (length_remaining == -1)
+					length_remaining = 0;
+				pinfo->desegment_offset = offset;
+				pinfo->desegment_len =
+				    content_length - length_remaining;
+				return FALSE;
+			}
+		} else if (chunked_encoding) {
+			/*
+			 * This data is chunked, so we need to keep pulling
+			 * data until we reach the end of the stream, or a
+			 * zero sized chunk.
+			 *
+			 * XXX
+			 * This doesn't bother with trailing headers; I don't
+			 * think they are really used, and we'd have to use
+			 * is_http_request_or_reply() to determine if it was
+			 * a trailing header, or the start of a new response.
+			 */
+			gboolean done_chunking = FALSE;
+
+			while (!done_chunking) {
+				gint chunk_size = 0;
+				gint chunk_offset = 0;
+				gchar *chunk_string = NULL;
+				gchar *c = NULL;
+
+				length_remaining = tvb_length_remaining(tvb,
+				    next_offset);
+				reported_length_remaining =
+				    tvb_reported_length_remaining(tvb,
+				    next_offset);
+
+				if (reported_length_remaining < 1) {
+					pinfo->desegment_offset = offset;
+					pinfo->desegment_len = 1;
+					return FALSE;
+				}
+
+				linelen = tvb_find_line_end(tvb, next_offset,
+						-1, &chunk_offset, TRUE);
+
+				if (linelen == -1 &&
+				    length_remaining >=
+				    reported_length_remaining) {
+					 pinfo->desegment_offset = offset;
+					 pinfo->desegment_len = 2;
+					 return FALSE;
+				}
+				
+				/* We have a line with the chunk size in it.*/
+				chunk_string = tvb_get_string(tvb, next_offset,
+				    linelen);
+				c = chunk_string;
+
 				/*
-				 * It's a waste of time asking for more
-				 * data, because that data wasn't captured.
+				 * We don't care about the extensions.
 				 */
-				return TRUE;
+				if ((c = strchr(c, ';'))) {
+					*c = '\0';
+				}
+
+				if ((sscanf(chunk_string, "%x",
+				    &chunk_size) < 0) || chunk_size < 0) {
+					/* We couldn't get the chunk size,
+					 * so stop trying.
+					 */
+					return TRUE;
+				}
+
+				if (chunk_size == 0) {
+					/*
+					 * This is the last chunk.  Let's pull in the
+					 * trailing CRLF.
+					 */
+					linelen = tvb_find_line_end(tvb,
+					    chunk_offset, -1, &chunk_offset, TRUE);
+						
+					if (linelen == -1 &&
+					    length_remaining >=
+					    reported_length_remaining) {
+						pinfo->desegment_offset = offset;
+						pinfo->desegment_len = 1;
+						return FALSE;
+					}
+
+					pinfo->desegment_offset = chunk_offset;
+					pinfo->desegment_len = 0;
+					done_chunking = TRUE;
+				} else {
+					/* 
+					 * Skip to the next chunk if we
+					 * already have it 
+					 */
+					if (reported_length_remaining >
+					        chunk_size) {
+						
+						next_offset = chunk_offset 
+						    + chunk_size + 2;
+					} else {
+						/* 
+						 * Fetch this chunk, plus the
+						 * trailing CRLF.
+						 */ 
+						pinfo->desegment_offset = offset;
+						pinfo->desegment_len =
+						    chunk_size + 1 -
+						    reported_length_remaining;
+						return FALSE;
+					}
+				}
+
 			}
-			if (length_remaining == -1)
-				length_remaining = 0;
-			pinfo->desegment_offset = offset;
-			pinfo->desegment_len =
-			    content_length - length_remaining;
-			return FALSE;
 		}
+
 	}
 
 	/*
Index: doc/ethereal.pod
===================================================================
RCS file: /cvsroot/ethereal/doc/ethereal.pod,v
retrieving revision 1.109
diff -u -r1.109 ethereal.pod
--- doc/ethereal.pod	23 Apr 2004 19:53:36 -0000	1.109
+++ doc/ethereal.pod	26 Apr 2004 04:01:23 -0000
@@ -1893,7 +1893,7 @@
   Warren Young             <tangent[AT]mail.com>
   Heikki Vatiainen         <hessu[AT]cs.tut.fi>
   Greg Hankins             <gregh[AT]twoguys.org>
-  Jerry Talkington         <jerryt[AT]netapp.com>
+  Jerry Talkington         <jtalkington[AT]users.sourceforge.net>
   Dave Chapeskie           <dchapes[AT]ddm.on.ca>
   James Coe                <jammer[AT]cin.net>
   Bert Driehuis            <driehuis[AT]playbeing.org>