litellm-gateway/config.yaml

72 lines
2.3 KiB
YAML
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

# ============================================
# LiteLLM Gateway — config.yaml
# ============================================
# MiniMax 模型路由配置
# 完整文档: https://docs.litellm.ai/docs/proxy/configs
model_list:
# ── MiniMax M2.7 (优先) ──────────────────────
- model_name: MiniMax-M2.7
litellm_params:
model: minimax/MiniMax-M2.7
api_key: os.environ/MINIMAX_API_KEY
api_base: https://api.minimax.io/v1
rpm: 60
model_info:
mode: chat
supports_function_calling: true
# ── MiniMax M2.5 (备用) ──────────────────────
- model_name: MiniMax-M2.5
litellm_params:
model: minimax/MiniMax-M2.5
api_key: os.environ/MINIMAX_API_KEY
api_base: https://api.minimax.io/v1
rpm: 60
model_info:
mode: chat
supports_function_calling: true
# ── MiniMax M2.5-Lightning (兜底) ───────────
- model_name: MiniMax-M2.5-Lightning
litellm_params:
model: minimax/MiniMax-M2.5-Lightning
api_key: os.environ/MINIMAX_API_KEY
api_base: https://api.minimax.io/v1
rpm: 120
model_info:
mode: chat
# ── 路由设置 ───────────────────────────────────
router_settings:
# 失败后按顺序回落
fallback_params:
- model:
- MiniMax-M2.7
- MiniMax-M2.5
- MiniMax-M2.5-Lightning
# 每个模型重试次数
num_retries: 3
# 重试前等待秒数
retry_after: 5
# 单次请求超时(秒)
timeout: 30
# 路由策略:优先用最好的模型
routing_strategy: latency-based-routing
# ── 全局设置 ───────────────────────────────────
litellm_settings:
drop_params: true
set_verbose: false
# 每次请求记录到数据库
success_callback: ["database"]
# ── 服务器设置 ─────────────────────────────────
general_settings:
# 管理员 master key必须以 sk- 开头)
master_key: os.environ/LITELLM_MASTER_KEY
# 数据库SQLite 轻量版,生产推荐 PostgreSQL
database_url: "sqlite:///./litellm.db"
# UI 管理界面
ui_access_mode: "admin"