pip install langchain
pip install langchain-openai
- 正常hunggingface启动
python llm_fastapi.py
- vLLM加速启动
python vllm_fastapi.py
python client.py
curl https://localhost:7000/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "Qwen2-4b-chat",
"messages": [{"role": "user", "content": "你好,你是谁?"}]
}'