From 5df4faf0ec906b91d11299b9a91ca23c8863751b Mon Sep 17 00:00:00 2001
From: cerealbox <476487+cerealbox@users.noreply.github.com>
Date: Tue, 13 Aug 2024 03:40:58 -0400
Subject: [PATCH 1/2] Update llama_types.py

allow "json_schema" in response_format.
---
 llama_cpp/llama_types.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/llama_cpp/llama_types.py b/llama_cpp/llama_types.py
index bbb58afc3..eb6d593e6 100644
--- a/llama_cpp/llama_types.py
+++ b/llama_cpp/llama_types.py
@@ -156,10 +156,13 @@ class ChatCompletionFunctionCallOption(TypedDict):
 
 
 class ChatCompletionRequestResponseFormat(TypedDict):
-    type: Literal["text", "json_object"]
+    type: Literal["text", "json_object", "json_schema"]
     schema: NotRequired[
         JsonType
     ]  # https://docs.endpoints.anyscale.com/guides/json_mode/
+    json_schema: NotRequired[
+        JsonType
+    ]
 
 
 class ChatCompletionRequestMessageContentPartText(TypedDict):

From 9e8ba962b9e84ad6c1bcc487a7366f66535a377a Mon Sep 17 00:00:00 2001
From: cerealbox <476487+cerealbox@users.noreply.github.com>
Date: Tue, 13 Aug 2024 03:41:53 -0400
Subject: [PATCH 2/2] Update llama_chat_format.py

convert openai type 'json_schema' to llama_cpp type 'json_object'.
---
 llama_cpp/llama_chat_format.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/llama_cpp/llama_chat_format.py b/llama_cpp/llama_chat_format.py
index ea8d07feb..af4a078d6 100644
--- a/llama_cpp/llama_chat_format.py
+++ b/llama_cpp/llama_chat_format.py
@@ -582,7 +582,7 @@ def chat_completion_handler(
         if result.stopping_criteria is not None:
             stopping_criteria = result.stopping_criteria
 
-        if response_format is not None and response_format["type"] == "json_object":
+        if response_format is not None:
             grammar = _grammar_for_response_format(
                 response_format, verbose=llama.verbose
             )
@@ -928,6 +928,13 @@ def _grammar_for_response_format(
     response_format: llama_types.ChatCompletionRequestResponseFormat,
     verbose: bool = False,
 ):
+
+    # convert openai type 'json_schema' to llama_cpp type 'json_object':
+    if response_format['type'] == "json_schema":
+        response_format['type'] = "json_object"
+        response_format['schema'] = response_format['json_schema']['schema']
+        del response_format['json_schema']
+
     if response_format["type"] != "json_object":
         return None
 
@@ -2830,7 +2837,7 @@ def embed_image_bytes(image_bytes: bytes):
         # Get prompt tokens to avoid a cache miss
         prompt = llama.input_ids[: llama.n_tokens].tolist()
 
-        if response_format is not None and response_format["type"] == "json_object":
+        if response_format is not None:
             grammar = _grammar_for_response_format(response_format)
 
         # Convert legacy functions to tools
@@ -3442,7 +3449,7 @@ def chatml_function_calling(
             add_generation_prompt=True,
         )
 
-        if response_format is not None and response_format["type"] == "json_object":
+        if response_format is not None:
             grammar = _grammar_for_response_format(response_format)
 
         return _convert_completion_to_chat(