From 5df4faf0ec906b91d11299b9a91ca23c8863751b Mon Sep 17 00:00:00 2001 From: cerealbox <476487+cerealbox@users.noreply.github.com> Date: Tue, 13 Aug 2024 03:40:58 -0400 Subject: [PATCH 1/2] Update llama_types.py allow "json_schema" in response_format. --- llama_cpp/llama_types.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/llama_cpp/llama_types.py b/llama_cpp/llama_types.py index bbb58afc3..eb6d593e6 100644 --- a/llama_cpp/llama_types.py +++ b/llama_cpp/llama_types.py @@ -156,10 +156,13 @@ class ChatCompletionFunctionCallOption(TypedDict): class ChatCompletionRequestResponseFormat(TypedDict): - type: Literal["text", "json_object"] + type: Literal["text", "json_object", "json_schema"] schema: NotRequired[ JsonType ] # https://docs.endpoints.anyscale.com/guides/json_mode/ + json_schema: NotRequired[ + JsonType + ] class ChatCompletionRequestMessageContentPartText(TypedDict): From 9e8ba962b9e84ad6c1bcc487a7366f66535a377a Mon Sep 17 00:00:00 2001 From: cerealbox <476487+cerealbox@users.noreply.github.com> Date: Tue, 13 Aug 2024 03:41:53 -0400 Subject: [PATCH 2/2] Update llama_chat_format.py convert openai type 'json_schema' to llama_cpp type 'json_object'. --- llama_cpp/llama_chat_format.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/llama_cpp/llama_chat_format.py b/llama_cpp/llama_chat_format.py index ea8d07feb..af4a078d6 100644 --- a/llama_cpp/llama_chat_format.py +++ b/llama_cpp/llama_chat_format.py @@ -582,7 +582,7 @@ def chat_completion_handler( if result.stopping_criteria is not None: stopping_criteria = result.stopping_criteria - if response_format is not None and response_format["type"] == "json_object": + if response_format is not None: grammar = _grammar_for_response_format( response_format, verbose=llama.verbose ) @@ -928,6 +928,13 @@ def _grammar_for_response_format( response_format: llama_types.ChatCompletionRequestResponseFormat, verbose: bool = False, ): + + # convert openai type 'json_schema' to llama_cpp type 'json_object': + if response_format['type'] == "json_schema": + response_format['type'] = "json_object" + response_format['schema'] = response_format['json_schema']['schema'] + del response_format['json_schema'] + if response_format["type"] != "json_object": return None @@ -2830,7 +2837,7 @@ def embed_image_bytes(image_bytes: bytes): # Get prompt tokens to avoid a cache miss prompt = llama.input_ids[: llama.n_tokens].tolist() - if response_format is not None and response_format["type"] == "json_object": + if response_format is not None: grammar = _grammar_for_response_format(response_format) # Convert legacy functions to tools @@ -3442,7 +3449,7 @@ def chatml_function_calling( add_generation_prompt=True, ) - if response_format is not None and response_format["type"] == "json_object": + if response_format is not None: grammar = _grammar_for_response_format(response_format) return _convert_completion_to_chat(