单模型 

llama-server.exe -m "G:\AI-AI\LLM\stablediffusionv2.gguf" --port   8081

多模型

llama-server.exe --config_file <config_file>

{
    "host": "0.0.0.0",
    "port": 8080,
    "models": [
        {
            "model": "models/OpenHermes-2.5-Mistral-7B-GGUF/openhermes-2.5-mistral-7b.Q4_K_M.gguf",
            "model_alias": "gpt-3.5-turbo",
            "chat_format": "chatml",
            "n_gpu_layers": -1,
            "offload_kqv": true,
            "n_threads": 12,
            "n_batch": 512,
            "n_ctx": 2048
        },
        {
            "model": "models/OpenHermes-2.5-Mistral-7B-GGUF/openhermes-2.5-mistral-7b.Q4_K_M.gguf",
            "model_alias": "gpt-4",
            "chat_format": "chatml",
            "n_gpu_layers": -1,
            "offload_kqv": true,
            "n_threads": 12,
            "n_batch": 512,
            "n_ctx": 2048
        },
        {
            "model": "models/ggml_llava-v1.5-7b/ggml-model-q4_k.gguf",
            "model_alias": "gpt-4-vision-preview",
            "chat_format": "llava-1-5",
            "clip_model_path": "models/ggml_llava-v1.5-7b/mmproj-model-f16.gguf",
            "n_gpu_layers": -1,
            "offload_kqv": true,
            "n_threads": 12,
            "n_batch": 512,
            "n_ctx": 2048
        },
        {
            "model": "models/mistral-7b-v0.1-GGUF/ggml-model-Q4_K.gguf",
            "model_alias": "text-davinci-003",
            "n_gpu_layers": -1,
            "offload_kqv": true,
            "n_threads": 12,
            "n_batch": 512,
            "n_ctx": 2048
        },
        {
            "model": "models/replit-code-v1_5-3b-GGUF/replit-code-v1_5-3b.Q4_0.gguf",
            "model_alias": "copilot-codex",
            "n_gpu_layers": -1,
            "offload_kqv": true,
            "n_threads": 12,
            "n_batch": 1024,
            "n_ctx": 9216
        }
    ]
}

Logo

火山引擎开发者社区是火山引擎打造的AI技术生态平台,聚焦Agent与大模型开发,提供豆包系列模型(图像/视频/视觉)、智能分析与会话工具,并配套评测集、动手实验室及行业案例库。社区通过技术沙龙、挑战赛等活动促进开发者成长,新用户可领50万Tokens权益,助力构建智能应用。

更多推荐