From 771b9b0ac044edc3fac7dd1171da5e93f2f64206 Mon Sep 17 00:00:00 2001 From: Eric Curtin Date: Fri, 27 Feb 2026 16:07:39 +0000 Subject: [PATCH] set ContentLength when forwarding requests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit HTTP/2 clients (e.g. Java HttpClient with HTTP_2 version) often omit the Content-Length header since HTTP/2 uses DATA frames for body framing. When DMR's reverse proxy forwards such requests to the backend via HTTP/1.1, it uses Transfer-Encoding: chunked (ContentLength == -1), which vLLM's Python/uvicorn server fails to parse — resulting in an empty body and a 422 Unprocessable Entity response. Fix by explicitly setting ContentLength = len(body) on the upstream request after replacing the body with the already-buffered bytes. This ensures a Content-Length header is always sent, consistent with how the Ollama and Anthropic handlers already handle this. llama.cpp was unaffected because its C/C++ HTTP server handles chunked encoding gracefully. Signed-off-by: Eric Curtin --- pkg/inference/scheduling/http_handler.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pkg/inference/scheduling/http_handler.go b/pkg/inference/scheduling/http_handler.go index a9f3077b..d018ad0b 100644 --- a/pkg/inference/scheduling/http_handler.go +++ b/pkg/inference/scheduling/http_handler.go @@ -271,8 +271,14 @@ func (h *HTTPHandler) handleOpenAIInference(w http.ResponseWriter, r *http.Reque }() // Create a request with the body replaced for forwarding upstream. + // Set ContentLength explicitly so the backend always receives a Content-Length + // header. Without this, HTTP/2 requests (where clients may omit Content-Length) + // are forwarded with Transfer-Encoding: chunked, which some backends (e.g. + // vLLM's Python/uvicorn server) fail to parse, resulting in an empty body and + // a 422 response. upstreamRequest := r.Clone(r.Context()) upstreamRequest.Body = io.NopCloser(bytes.NewReader(body)) + upstreamRequest.ContentLength = int64(len(body)) // Perform the request. runner.ServeHTTP(w, upstreamRequest)