feat: add a new provider siliconcloud

LinkW77 · DynamesC · commit 22b48c6d4be2 · 2024-07-25T19:32:29.000+08:00
diff --git a/inference/providers/siliconcloud/__init__.py b/inference/providers/siliconcloud/__init__.py
diff --git a/inference/providers/siliconcloud/chat_completion.py b/inference/providers/siliconcloud/chat_completion.py
@@ -0,0 +1,215 @@
+from typing import Tuple, Dict
+
+from app.models import ModelSchema
+from provider_dependency.chat_completion import *
+from .utils import *
+
+logger = logging.getLogger(__name__)
+
+
+async def _build_siliconcloud_message(message: ChatCompletionMessage):
+    if message.role in [
+        ChatCompletionRole.user,
+        ChatCompletionRole.system,
+    ] or is_assistant_text_message(message):
+        return {"role": message.role.name, "content": message.content}
+
+    if message.role == ChatCompletionRole.function:
+        message: ChatCompletionFunctionMessage
+        return {"role": "tool", "content": message.content, "tool_call_id": message.id}
+
+    if is_assistant_function_calls_message(message):
+        message: ChatCompletionAssistantMessage
+        function_calls = []
+
+        for f in message.function_calls:
+            arguments = f.arguments
+            if isinstance(arguments, dict):
+                arguments = json.dumps(arguments)
+            function_calls.append(
+                {
+                    "id": f.id,
+                    "type": "function",
+                    "function": {"name": f.name, "arguments": arguments},
+                }
+            )
+
+        return {
+            "role": ChatCompletionRole.assistant.name,
+            "tool_calls": function_calls,
+            "content": None,
+        }
+
+
+async def _build_siliconcloud_chat_completion_payload(
+    messages: List[ChatCompletionMessage],
+    stream: bool,
+    provider_model_id: str,
+    configs: ChatCompletionModelConfiguration,
+    function_call: Optional[str],
+    functions: Optional[List[ChatCompletionFunction]],
+):
+    # Convert ChatCompletionMessages to the required format
+    formatted_messages = [await _build_siliconcloud_message(msg) for msg in messages]
+    logger.debug("formatted_messages: %s", formatted_messages)
+    payload = {
+        "model": provider_model_id,
+        "messages": formatted_messages,
+        "stream": stream,
+    }
+    config_dict = configs.model_dump()
+    for key, value in config_dict.items():
+        if value is not None:
+            payload[key] = value
+
+    if configs.response_format:
+        payload["response_format"] = {"type": configs.response_format}
+
+        if configs.response_format == "json_object":
+
+            if payload["messages"][0]["role"] == "system":
+                payload["messages"][0][
+                    "content"
+                ] = f"{payload['messages'][0]['content']} You are designed to output JSON."
+            else:
+                payload["messages"].insert(0, {"role": "system", "content": "You are designed to output JSON."})
+
+    if function_call:
+        if function_call in ["none", "auto"]:
+            payload["tool_choice"] = function_call
+        else:
+            payload["tool_choice"] = {"name": function_call}
+    if functions:
+        payload["tools"] = [{"type": "function", "function": f.model_dump()} for f in functions]
+    return payload
+
+
+class SiliconcloudChatCompletionModel(BaseChatCompletionModel):
+    def __init__(self):
+        super().__init__()
+
+    # ------------------- prepare request data -------------------
+
+    async def prepare_request(
+        self,
+        stream: bool,
+        provider_model_id: str,
+        messages: List[ChatCompletionMessage],
+        credentials: ProviderCredentials,
+        configs: ChatCompletionModelConfiguration,
+        function_call: Optional[str] = None,
+        functions: Optional[List[ChatCompletionFunction]] = None,
+        model_schema: ModelSchema = None,
+    ) -> Tuple[str, Dict, Dict]:
+        api_url = "https://api.siliconflow.cn/v1/chat/completions"
+        headers = build_siliconcloud_header(credentials)
+        payload = await _build_siliconcloud_chat_completion_payload(
+            messages, stream, provider_model_id, configs, function_call, functions
+        )
+        return api_url, headers, payload
+
+    # ------------------- handle non-stream chat completion response -------------------
+
+    def extract_core_data(self, response_data: Dict, **kwargs) -> Optional[Dict]:
+        if not response_data.get("choices"):
+            return None
+        return response_data["choices"][0]
+
+    def extract_usage_data(self, response_data: Dict, **kwargs) -> Tuple[Optional[int], Optional[int]]:
+        usage = response_data.get("usage") if response_data else {}
+        return usage.get("prompt_tokens", None), usage.get("completion_tokens", None)
+
+    def extract_text_content(self, data: Dict, **kwargs) -> Optional[str]:
+        message_data = data.get("message") if data else None
+        if message_data and message_data.get("content"):
+            return message_data.get("content")
+        return None
+
+    def extract_function_calls(self, data: Dict, **kwargs) -> Optional[List[ChatCompletionFunctionCall]]:
+        message_data = data.get("message") if data else None
+        if message_data.get("tool_calls"):
+            function_calls = []
+            tool_calls = message_data.get("tool_calls")
+            for call in tool_calls:
+                func_call = build_function_call(
+                    name=call["function"]["name"],
+                    arguments_str=call["function"]["arguments"],
+                )
+                function_calls.append(func_call)
+            return function_calls
+
+        return None
+
+    def extract_finish_reason(self, data: Dict, **kwargs) -> Optional[ChatCompletionFinishReason]:
+        if not data:
+            return ChatCompletionFinishReason.unknown
+        finish_reason = data.get("finish_reason", "stop")
+        if finish_reason == "tool_calls":
+            finish_reason = ChatCompletionFinishReason.function_calls
+        elif finish_reason == "eos":
+            finish_reason = ChatCompletionFinishReason.stop
+        elif finish_reason == "eos_token":
+            finish_reason = ChatCompletionFinishReason.length
+        return ChatCompletionFinishReason.__members__.get(finish_reason, ChatCompletionFinishReason.unknown)
+
+    # ------------------- handle stream chat completion response -------------------
+
+    def stream_check_error(self, sse_data: Dict, **kwargs):
+        if sse_data.get("error"):
+            raise_provider_api_error(sse_data["error"])
+
+    def stream_extract_chunk_data(self, sse_data: Dict, **kwargs) -> Optional[Dict]:
+        if not sse_data.get("choices"):
+            return None
+        return sse_data["choices"][0]
+
+    def stream_extract_usage_data(self, sse_data: Dict, input_tokens, output_tokens, **kwargs) -> Tuple[int, int]:
+        usage = sse_data.get("usage") if sse_data else None
+        if usage is not None:
+            input_tokens = max(input_tokens or 0, usage.get("prompt_tokens", 0))
+            output_tokens = max(output_tokens or 0, usage.get("completion_tokens", 0))
+        return input_tokens, output_tokens
+
+    def stream_extract_chunk(
+        self, index: int, chunk_data: Dict, text_content: str, **kwargs
+    ) -> Tuple[int, Optional[ChatCompletionChunk]]:
+        content = chunk_data.get("delta", {}).get("content") if chunk_data else None
+        if content:
+            return index + 1, ChatCompletionChunk(
+                created_timestamp=get_current_timestamp_int(),
+                index=index,
+                delta=content,
+            )
+        return index, None
+
+    def stream_extract_finish_reason(self, chunk_data: Dict, **kwargs) -> Optional[ChatCompletionFinishReason]:
+        reason = chunk_data.get("finish_reason", "unknown")
+
+        if reason == "tool_calls":
+            return ChatCompletionFinishReason.function_calls
+        if reason == "eos":
+            return ChatCompletionFinishReason.stop
+
+        return ChatCompletionFinishReason.__members__.get(reason, ChatCompletionFinishReason.unknown)
+
+    def stream_handle_function_calls(
+        self, chunk_data: Dict, function_calls_content: ChatCompletionFunctionCallsContent, **kwargs
+    ) -> Optional[ChatCompletionFunctionCallsContent]:
+        delta = chunk_data.get("delta", {})
+        if delta and delta.get("tool_calls"):
+            tool_call = delta["tool_calls"][0]
+            toll_call_index = tool_call["index"]
+            tool_call_function = tool_call["function"]
+
+            if toll_call_index == function_calls_content.index:
+                # append to the current function call argument string
+                function_calls_content.arguments_strs[function_calls_content.index] += tool_call_function["arguments"]
+
+            elif toll_call_index > function_calls_content.index:
+                # trigger another function call
+                function_calls_content.arguments_strs.append(tool_call_function["arguments"] or "")
+                function_calls_content.names.append(tool_call_function["name"])
+                function_calls_content.index = toll_call_index
+            return function_calls_content
+
+        return None
diff --git a/inference/providers/siliconcloud/resources/i18n/en.yml b/inference/providers/siliconcloud/resources/i18n/en.yml
@@ -0,0 +1,7 @@
+siliconcloud_name: "SiliconCloud"
+siliconcloud_description: "SiliconCloud is a one-stop cloud service platform launched by SiliconFlow. It focuses on providing developers with a suite of services integrating mainstream open-source large models. The platform aims to accelerate the development and adoption of generative AI (GenAI) applications by offering comprehensive, fast, and cost-effective model APIs."
+
+siliconcloud_api_key_description: "Your Siliconcloud API Key for authentication."
+
+wildcard_name: "Wildcard"
+wildcard_description: "Wildcard allows you to use any model from SiliconCloud with custom configurations."
diff --git a/inference/providers/siliconcloud/resources/icon.svg b/inference/providers/siliconcloud/resources/icon.svg
@@ -0,0 +1,11 @@
+<svg width="24" height="24" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg">
+<g clip-path="url(#clip0_8737_40260)">
+<path d="M24 0H0V24H24V0ZM19.5563 15.0187H14.7C14.7938 15.4312 14.8312 15.8625 14.8312 16.2938C14.8312 19.8 12 22.6312 8.49375 22.6312C4.9875 22.6312 2.15625 19.8 2.15625 16.2938C2.1375 12.7875 4.96875 9.9375 8.475 9.9375C9.075 9.9375 9.65625 10.0125 10.2188 10.1812V9.35625C10.2188 4.51875 13.0688 1.36875 18 1.36875H19.5375C20.7937 1.3875 21.8063 2.41875 21.8063 3.675C21.8063 4.93125 20.7937 5.9625 19.5375 5.98125H18C16.05 5.98125 14.7 7.3125 14.7 9.16875V10.3875H19.5375C20.8125 10.3875 21.8438 11.4375 21.8438 12.7125C21.8625 13.9688 20.8313 15.0187 19.5563 15.0187Z" fill="#8358F6"/>
+<path d="M8.47505 14.4004C7.42505 14.4004 6.5813 15.2441 6.5813 16.2941C6.5813 17.3441 7.42505 18.1879 8.47505 18.1879C9.52505 18.1879 10.3688 17.3441 10.3688 16.2941C10.3688 15.2441 9.52505 14.4004 8.47505 14.4004Z" fill="#8358F6"/>
+</g>
+<defs>
+<clipPath id="clip0_8737_40260">
+<rect width="24" height="24" rx="4" fill="white"/>
+</clipPath>
+</defs>
+</svg>
diff --git a/inference/providers/siliconcloud/resources/models/wildcard.yml b/inference/providers/siliconcloud/resources/models/wildcard.yml
@@ -0,0 +1,29 @@
+model_schema_id: siliconcloud/wildcard
+provider_model_id:
+type: wildcard
+name: "i18n:wildcard_name"
+description: "i18n:wildcard_description"
+
+properties:
+  function_call: false
+  streaming: true
+  input_token_limit: 4096
+  output_token_limit: 4096
+config_schemas:
+  - config_id: temperature
+  - config_id: top_p
+  - config_id: max_tokens
+    type: int
+    default: 4096
+    min: 1
+    max: 4096
+    step: 1
+  - config_id: stop
+  - config_id: top_k
+  - config_id: frequency_penalty
+
+pricing:
+  input_token: 0
+  output_token: 0
+  unit: 0
+  currency: CNY
diff --git a/inference/providers/siliconcloud/resources/provider.yml b/inference/providers/siliconcloud/resources/provider.yml
@@ -0,0 +1,23 @@
+provider_id: siliconcloud
+name: "i18n:siliconcloud_name"
+description: "i18n:siliconcloud_description"
+updated_timestamp: 1719373812000
+
+return_token_usage: true
+return_stream_token_usage: true
+
+credentials_schema:
+  type: object
+  properties:
+    SILICONCLOUD_API_KEY:
+      type: string
+      description: "i18n:siliconcloud_api_key_description"
+      secret: true
+  required:
+    - SILICONCLOUD_API_KEY
+
+resources:
+  taskingai_documentation_url: "https://docs.tasking.ai/docs/integration/models/language_models/siliconcloud"
+  official_site_url: "https://siliconflow.cn/zh-cn/siliconcloud"
+  official_pricing_url: "https://siliconflow.cn/zh-cn/pricing"
+  official_credentials_url: "https://cloud.siliconflow.cn/account/ak"
diff --git a/inference/providers/siliconcloud/utils.py b/inference/providers/siliconcloud/utils.py
@@ -0,0 +1,13 @@
+from provider_dependency.chat_completion import *
+
+__all__ = [
+    "build_siliconcloud_header",
+]
+
+
+def build_siliconcloud_header(credentials: ProviderCredentials):
+    return {
+        "Authorization": f"Bearer {credentials.SILICONCLOUD_API_KEY}",
+        "Content-Type": "application/json",
+        "Accept": "application/json",
+    }
diff --git a/inference/test/utils/wildcard_test_cases.yml b/inference/test/utils/wildcard_test_cases.yml
@@ -51,3 +51,11 @@ wildcard_test_cases:
         model_type: "chat_completion"
         streaming: True
         function_call: True
+
+  - provider_id: "siliconcloud"
+    cases:
+      - model_schema_id: "siliconcloud/wildcard"
+        provider_model_id: "01-ai/Yi-1.5-34B-Chat-16K"
+        model_type: "chat_completion"
+        streaming: True
+        function_call: False