diff options
| -rw-r--r-- | .gitignore | 10 | ||||
| -rw-r--r-- | .python-version | 1 | ||||
| -rw-r--r-- | README.md | 72 | ||||
| -rw-r--r-- | httpdump.1 | 12 | ||||
| -rw-r--r-- | httpdump.py | 170 | ||||
| -rw-r--r-- | pyproject.toml | 23 | ||||
| -rw-r--r-- | uv.lock | 56 |
7 files changed, 344 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..505a3b1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +# Python-generated files +__pycache__/ +*.py[oc] +build/ +dist/ +wheels/ +*.egg-info + +# Virtual environments +.venv diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..e4fba21 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.12 diff --git a/README.md b/README.md new file mode 100644 index 0000000..db49664 --- /dev/null +++ b/README.md @@ -0,0 +1,72 @@ +# httpdump + +Parse HTTP requests and responses into JSON. + +## Installation + +```bash +uv sync +``` + +## Usage + +```bash +# Parse HTTP request from stdin +nc -lp 8080 | uv run httpdump + +# Parse HTTP response +curl -si https://example.com | uv run httpdump --response + +# Parse from file +uv run httpdump request.txt +uv run httpdump --response response.txt +``` + +## Output Format + +### Request +```json +{ + "method": "POST", + "url": "/api/users?page=1", + "path": "/api/users", + "query_params": {"page": "1"}, + "headers": { + "raw_base64": "...", + "parsed": {"Content-Type": "application/json", "Host": "example.com"} + }, + "body": { + "raw_base64": "...", + "json": {"name": "John"} + } +} +``` + +### Response +```json +{ + "status_code": 200, + "status_text": "OK", + "headers": { + "raw_base64": "...", + "parsed": {"Content-Type": "application/json"} + }, + "body": { + "raw_base64": "...", + "json": {"id": 1} + } +} +``` + +## Body Parsing + +Body is parsed based on Content-Type: +- `application/json` → `body.json` +- `application/x-www-form-urlencoded` → `body.form` +- `multipart/form-data` → `body.multipart` + +Raw body is always available as base64 in `body.raw_base64`. + +## Options + +- `-r, --response` - Parse as HTTP response instead of request diff --git a/httpdump.1 b/httpdump.1 new file mode 100644 index 0000000..d9e00bf --- /dev/null +++ b/httpdump.1 @@ -0,0 +1,12 @@ +.TH "HTTPDUMP" "1" "2026-01-23" "0.1.0" "httpdump Manual" +.SH NAME +httpdump \- Parse HTTP requests and responses into JSON. +.SH SYNOPSIS +.B httpdump +[OPTIONS] [FILE] +.SH DESCRIPTION +Parse HTTP requests and responses into JSON. +.SH OPTIONS +.TP +\fB\-r,\fP \-\-response +Parse as HTTP response instead of request diff --git a/httpdump.py b/httpdump.py new file mode 100644 index 0000000..68f6a26 --- /dev/null +++ b/httpdump.py @@ -0,0 +1,170 @@ +#!/usr/bin/env python3 +import base64 +import json +import os +import select +import sys +from urllib.parse import parse_qs, urlparse + +import click + + +def decode(b: bytes, encoding: str = "ascii") -> str: + try: + return b.decode(encoding) + except (UnicodeDecodeError, AttributeError): + return "" + + +def parse_multipart(body: bytes, boundary: bytes) -> dict: + parts = {} + for part in body.split(b"--" + boundary): + part = part.strip() + if not part or part == b"--": + continue + if b"\r\n\r\n" in part: + headers_raw, content = part.split(b"\r\n\r\n", 1) + elif b"\n\n" in part: + headers_raw, content = part.split(b"\n\n", 1) + else: + continue + + headers = {} + for line in headers_raw.split(b"\n"): + if b": " in line: + k, v = line.strip().split(b": ", 1) + headers[decode(k).lower()] = decode(v) + + disposition = headers.get("content-disposition", "") + name = None + filename = None + for item in disposition.split(";"): + item = item.strip() + if item.startswith("name="): + name = item[5:].strip('"') + elif item.startswith("filename="): + filename = item[9:].strip('"') + + if name: + content = content.rstrip(b"\r\n-") + if filename: + parts[name] = {"filename": filename, "content": decode(content, "utf-8")} + else: + parts[name] = decode(content, "utf-8") + return parts + + +def parse_body(body: bytes, content_type: str | None) -> dict: + result = {"raw_base64": base64.b64encode(body).decode("ascii")} + if body and content_type: + if "application/json" in content_type: + try: + result["json"] = json.loads(body) + except (json.JSONDecodeError, UnicodeDecodeError): + pass + elif "application/x-www-form-urlencoded" in content_type: + try: + result["form"] = {k: v[0] if len(v) == 1 else v for k, v in parse_qs(decode(body, "utf-8")).items()} + except: + pass + elif "multipart/form-data" in content_type: + for part in content_type.split(";"): + if "boundary=" in part: + boundary = part.split("boundary=", 1)[1].strip().encode() + result["multipart"] = parse_multipart(body, boundary) + break + return result + + +def read_http_message(fd: int) -> tuple[bytes, bytes]: + buf = b"" + timeout = 30.0 + while b"\r\n\r\n" not in buf: + if not select.select([fd], [], [], timeout)[0]: + break + chunk = os.read(fd, 4096) + if not chunk: + break + buf += chunk + timeout = 0.1 + + if b"\r\n\r\n" not in buf: + return buf, b"" + + header_part, _, body = buf.partition(b"\r\n\r\n") + headers = {} + for line in header_part.split(b"\r\n")[1:]: + if b": " in line: + k, v = line.split(b": ", 1) + headers[decode(k).lower()] = decode(v) + + remaining = int(headers.get("content-length", 0)) - len(body) + while remaining > 0: + if not select.select([fd], [], [], 0.1)[0]: + break + chunk = os.read(fd, remaining) + if not chunk: + break + body += chunk + remaining -= len(chunk) + + return header_part, body + + +def parse_message(header_part: bytes, body: bytes, is_response: bool) -> dict: + lines = header_part.split(b"\r\n") + first_line = decode(lines[0]).split(" ", 2) + + parsed = {} + for line in lines[1:]: + if b": " in line: + k, v = line.split(b": ", 1) + parsed[decode(k)] = decode(v) + headers = { + "raw_base64": base64.b64encode(b"\r\n".join(lines[1:])).decode("ascii"), + "parsed": parsed, + } + + content_type = parsed.get("Content-Type") + + if is_response: + return { + "status_code": int(first_line[1]) if len(first_line) > 1 else 0, + "status_text": first_line[2] if len(first_line) > 2 else "", + "headers": headers, + "body": parse_body(body, content_type), + } + + path = first_line[1] if len(first_line) > 1 else "/" + parsed_url = urlparse(path) + return { + "method": first_line[0], + "url": path, + "path": parsed_url.path, + "query_params": {k: v[0] if len(v) == 1 else v for k, v in parse_qs(parsed_url.query).items()}, + "headers": headers, + "body": parse_body(body, content_type), + } + + +@click.command() +@click.option("--response", "-r", is_flag=True, help="Parse as HTTP response instead of request") +@click.argument("file", required=False, type=click.Path(exists=True)) +def main(response: bool, file: str | None): + """Parse HTTP requests and responses into JSON.""" + if file: + with open(file, "rb") as f: + data = f.read() + if b"\r\n\r\n" in data: + header_part, body = data.split(b"\r\n\r\n", 1) + else: + header_part, body = data, b"" + else: + header_part, body = read_http_message(sys.stdin.fileno()) + + print(json.dumps(parse_message(header_part, body, response), indent=2), flush=True) + os._exit(0) + + +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..182b9d7 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,23 @@ +[project] +name = "httpdump" +version = "0.1.0" +description = "Parse HTTP requests and responses into JSON" +readme = "README.md" +requires-python = ">=3.12" +dependencies = ["click"] + +[project.optional-dependencies] +dev = ["click-man"] + +[project.scripts] +httpdump = "httpdump:main" + +[tool.uv] +package = true + +[build-system] +requires = ["setuptools"] +build-backend = "setuptools.build_meta" + +[tool.setuptools] +py-modules = ["httpdump"] @@ -0,0 +1,56 @@ +version = 1 +revision = 3 +requires-python = ">=3.12" + +[[package]] +name = "click" +version = "8.3.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3d/fa/656b739db8587d7b5dfa22e22ed02566950fbfbcdc20311993483657a5c0/click-8.3.1.tar.gz", hash = "sha256:12ff4785d337a1bb490bb7e9c2b1ee5da3112e94a8622f26a6c77f5d2fc6842a", size = 295065, upload-time = "2025-11-15T20:45:42.706Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/98/78/01c019cdb5d6498122777c1a43056ebb3ebfeef2076d9d026bfe15583b2b/click-8.3.1-py3-none-any.whl", hash = "sha256:981153a64e25f12d547d3426c367a4857371575ee7ad18df2a6183ab0545b2a6", size = 108274, upload-time = "2025-11-15T20:45:41.139Z" }, +] + +[[package]] +name = "click-man" +version = "0.5.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8b/1c/686c42d3a07d25d1cde107d0b92a2cf6234b92e569e61f8a294ffe6c9357/click_man-0.5.1.tar.gz", hash = "sha256:2db2163ef51a1b746d6d7781f78856430a2bcf0f10df428fe5986ecc0ef9809c", size = 21345, upload-time = "2025-04-08T14:41:11.092Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e1/37/34e03579eb583a587edba458599af6d82715a617e685dbe2ff30e4238930/click_man-0.5.1-py3-none-any.whl", hash = "sha256:ed63caf6d6bf04f2b1fb198a1a764daea9785ad29f303b2962418a417541a6ce", size = 8825, upload-time = "2025-04-08T14:41:09.345Z" }, +] + +[[package]] +name = "colorama" +version = "0.4.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, +] + +[[package]] +name = "httpdump" +version = "0.1.0" +source = { editable = "." } +dependencies = [ + { name = "click" }, +] + +[package.optional-dependencies] +dev = [ + { name = "click-man" }, +] + +[package.metadata] +requires-dist = [ + { name = "click" }, + { name = "click-man", marker = "extra == 'dev'" }, +] +provides-extras = ["dev"] |
