Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert removing the unused imports #385

Merged
merged 2 commits into from
Apr 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions python/sglang/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@
import re
from typing import Callable, List, Optional, Union

from sglang.backend.anthropic import Anthropic
from sglang.backend.base_backend import BaseBackend
from sglang.backend.openai import OpenAI
from sglang.backend.runtime_endpoint import RuntimeEndpoint
from sglang.backend.vertexai import VertexAI
from sglang.global_config import global_config
from sglang.lang.ir import (
SglExpr,
Expand Down
4 changes: 4 additions & 0 deletions python/sglang/backend/anthropic.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
from typing import List, Optional, Union

import numpy as np

from sglang.backend.base_backend import BaseBackend
from sglang.lang.chat_template import get_chat_template
from sglang.lang.interpreter import StreamExecutor
Expand Down
2 changes: 1 addition & 1 deletion python/sglang/backend/base_backend.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import List, Optional, Union
from typing import Callable, List, Optional, Union

from sglang.lang.chat_template import get_chat_template
from sglang.lang.interpreter import StreamExecutor
Expand Down
2 changes: 1 addition & 1 deletion python/sglang/backend/openai.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging
import time
from typing import List, Optional
from typing import Callable, List, Optional, Union

import numpy as np

Expand Down
7 changes: 4 additions & 3 deletions python/sglang/backend/runtime_endpoint.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
import json
from typing import List, Optional
from typing import Callable, List, Optional, Union

import numpy as np
import requests

from sglang.backend.base_backend import BaseBackend
from sglang.global_config import global_config
from sglang.lang.chat_template import get_chat_template_by_model_path
from sglang.lang.interpreter import StreamExecutor
from sglang.lang.ir import SglSamplingParams
from sglang.utils import find_printable_text, http_request
from sglang.lang.ir import SglArgument, SglSamplingParams
from sglang.utils import encode_image_base64, find_printable_text, http_request


class RuntimeEndpoint(BaseBackend):
Expand Down
3 changes: 3 additions & 0 deletions python/sglang/backend/vertexai.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import os
import warnings
from typing import List, Optional, Union

import numpy as np

from sglang.backend.base_backend import BaseBackend
from sglang.lang.chat_template import get_chat_template
Expand Down
4 changes: 2 additions & 2 deletions python/sglang/lang/chat_template.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from dataclasses import dataclass
from dataclasses import dataclass, field
from enum import Enum, auto
from typing import Callable, Dict, List, Tuple
from typing import Callable, Dict, List, Optional, Tuple


class ChatTemplateStyle(Enum):
Expand Down
8 changes: 7 additions & 1 deletion python/sglang/lang/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,13 @@

from sglang.global_config import global_config
from sglang.lang.interpreter import ProgramState, StreamExecutor, pin_program
from sglang.lang.ir import SglArgument, SglExpr, SglSamplingParams, SglVariable
from sglang.lang.ir import (
SglArgument,
SglConstantText,
SglExpr,
SglSamplingParams,
SglVariable,
)


def compile_func(function, backend):
Expand Down
3 changes: 2 additions & 1 deletion python/sglang/lang/interpreter.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import uuid
from concurrent.futures import ThreadPoolExecutor
from contextlib import contextmanager
from typing import Any, Callable, Dict, List, Optional
from typing import Any, Callable, Dict, List, Optional, Union

import tqdm

Expand All @@ -18,6 +18,7 @@
SglConstantText,
SglExpr,
SglExprList,
SglFunction,
SglGen,
SglImage,
SglRoleBegin,
Expand Down
2 changes: 1 addition & 1 deletion python/sglang/lang/ir.py
Original file line number Diff line number Diff line change
Expand Up @@ -472,4 +472,4 @@ def __init__(self):
super().__init__()

def __repr__(self):
return "CommitLazy()"
return f"CommitLazy()"
6 changes: 5 additions & 1 deletion python/sglang/lang/tracer.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,20 @@
"""Tracing a program."""

import uuid
from typing import Any, Dict, List, Optional
from typing import Any, Callable, Dict, List, Optional, Union

from sglang.backend.base_backend import BaseBackend
from sglang.global_config import global_config
from sglang.lang.interpreter import ProgramState, ProgramStateGroup
from sglang.lang.ir import (
SglArgument,
SglCommitLazy,
SglConcateAndAppend,
SglConstantText,
SglExpr,
SglExprList,
SglFork,
SglFunction,
SglGen,
SglGetForkItem,
SglRoleBegin,
Expand Down
2 changes: 1 addition & 1 deletion python/sglang/srt/hf_transformers_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import json
import os
import warnings
from typing import Optional, Union
from typing import List, Optional, Tuple, Union

from huggingface_hub import snapshot_download
from transformers import (
Expand Down
2 changes: 1 addition & 1 deletion python/sglang/srt/managers/detokenizer_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def start_detokenizer_process(
):
try:
manager = DetokenizerManager(server_args, port_args)
except Exception:
except Exception as e:
pipe_writer.send(get_exception_traceback())
raise
pipe_writer.send("init ok")
Expand Down
2 changes: 2 additions & 0 deletions python/sglang/srt/managers/router/radix_cache.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import heapq
import time
from collections import defaultdict
from dataclasses import dataclass
from typing import Tuple

import torch

Expand Down
2 changes: 1 addition & 1 deletion python/sglang/srt/models/commandr.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

# This file is based on the LLama model definition file in transformers
"""PyTorch Cohere model."""
from typing import Optional, Tuple
from typing import List, Optional, Tuple

import torch
import torch.utils.checkpoint
Expand Down
2 changes: 1 addition & 1 deletion python/sglang/srt/models/llama2.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Adapted from
# https://github.com/vllm-project/vllm/blob/671af2b1c0b3ed6d856d37c21a561cc429a10701/vllm/model_executor/models/llama.py#L1
"""Inference-only LLaMA model compatible with HuggingFace weights."""
from typing import Any, Dict, Optional, Tuple
from typing import Any, Dict, List, Optional, Tuple

import torch
from torch import nn
Expand Down
2 changes: 1 addition & 1 deletion python/sglang/srt/models/llava.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import numpy as np
import torch
from torch import nn
from transformers import CLIPVisionModel, LlavaConfig
from transformers import CLIPVisionModel, LlamaConfig, LlavaConfig
from transformers.models.llava.modeling_llava import LlavaMultiModalProjector
from vllm.model_executor.layers.linear import LinearMethodBase
from vllm.model_executor.weight_utils import (
Expand Down
2 changes: 1 addition & 1 deletion python/sglang/srt/models/mixtral.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Adapted from
# https://github.com/vllm-project/vllm/blob/d0215a58e78572d91dadafe9d832a2db89b09a13/vllm/model_executor/models/mixtral.py#L1
"""Inference-only Mixtral model."""
from typing import Optional
from typing import List, Optional, Tuple

import numpy as np
import torch
Expand Down
2 changes: 1 addition & 1 deletion python/sglang/srt/models/qwen.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, Dict, Optional
from typing import Any, Dict, List, Optional, Tuple

import torch
from torch import nn
Expand Down
2 changes: 1 addition & 1 deletion python/sglang/srt/models/qwen2.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Adapted from llama2.py
# Modify details for the adaptation of Qwen2 model.
"""Inference-only Qwen2 model compatible with HuggingFace weights."""
from typing import Any, Dict, Optional, Tuple
from typing import Any, Dict, List, Optional, Tuple

import torch
from torch import nn
Expand Down
4 changes: 3 additions & 1 deletion python/sglang/srt/models/yivl.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Inference-only Yi-VL model."""

from typing import Optional
import os
from typing import List, Optional

import torch
import torch.nn as nn
Expand All @@ -12,6 +13,7 @@

from sglang.srt.models.llava import (
LlavaLlamaForCausalLM,
clip_vision_embed_forward,
monkey_path_clip_vision_embed_forward,
)

Expand Down
8 changes: 4 additions & 4 deletions python/sglang/srt/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
import time
from typing import List, Optional, Union

# Fix a Python bug
setattr(threading, "_register_atexit", lambda *args, **kwargs: None)

import aiohttp
import psutil
import pydantic
Expand Down Expand Up @@ -55,9 +58,6 @@
from sglang.srt.server_args import PortArgs, ServerArgs
from sglang.srt.utils import enable_show_time_cost, handle_port_init

# Fix a Python bug
setattr(threading, "_register_atexit", lambda *args, **kwargs: None)

asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())

API_KEY_HEADER_NAME = "X-API-Key"
Expand Down Expand Up @@ -619,7 +619,7 @@ def _wait_and_warmup():
try:
requests.get(url + "/get_model_info", timeout=5, headers=headers)
break
except requests.exceptions.RequestException:
except requests.exceptions.RequestException as e:
pass
else:
if pipe_finish_writer is not None:
Expand Down
1 change: 1 addition & 0 deletions python/sglang/srt/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ def get_exception_traceback():


def get_int_token_logit_bias(tokenizer, vocab_size):
from transformers import LlamaTokenizer, LlamaTokenizerFast

# a bug when model's vocab size > tokenizer.vocab_size
vocab_size = tokenizer.vocab_size
Expand Down
1 change: 1 addition & 0 deletions test/lang/run_all.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import argparse
import glob
import multiprocessing
import os
import time
import unittest

Expand Down
1 change: 1 addition & 0 deletions test/lang/test_anthropic_backend.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import unittest

from sglang import Anthropic, set_default_backend
Expand Down
3 changes: 3 additions & 0 deletions test/lang/test_srt_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
python3 -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000
"""

import json
import unittest

import sglang as sgl
Expand All @@ -12,6 +13,8 @@
test_few_shot_qa,
test_mt_bench,
test_parallel_decoding,
test_parallel_encoding,
test_react,
test_regex,
test_select,
test_stream,
Expand Down
2 changes: 1 addition & 1 deletion test/lang/test_tracing.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def tip_suggestion(s):
forks = s.fork(3)
for i in range(3):
forks[i] += f"Now, expand tip {i+1} into a paragraph:\n"
forks[i] += sgl.gen("detailed_tip")
forks[i] += sgl.gen(f"detailed_tip")

s += "Tip 1:" + forks[0]["detailed_tip"] + "\n"
s += "Tip 2:" + forks[1]["detailed_tip"] + "\n"
Expand Down
1 change: 1 addition & 0 deletions test/srt/model/reference_hf.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import argparse
import os

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
Expand Down
4 changes: 4 additions & 0 deletions test/srt/model/test_llama_extend.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import multiprocessing
import os
import time

import numpy as np
import torch
import torch.distributed as dist
import transformers

from sglang.srt.managers.router.infer_batch import Batch, ForwardMode, Req
Expand Down
5 changes: 4 additions & 1 deletion test/srt/model/test_llava_low_api.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import multiprocessing
import time

import numpy as np
import torch
import torch.distributed as dist

from sglang.srt.hf_transformers_utils import get_processor
from sglang.srt.managers.router.model_runner import ModelRunner
from sglang.srt.managers.router.infer_batch import ForwardMode
from sglang.srt.managers.router.model_runner import InputMetadata, ModelRunner
from sglang.srt.model_config import ModelConfig
from sglang.srt.utils import load_image

Expand Down
3 changes: 3 additions & 0 deletions test/srt/test_httpserver_concurrent.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,11 @@

import argparse
import asyncio
import json
import time

import aiohttp
import requests


async def send_request(url, data, delay=0):
Expand Down
1 change: 1 addition & 0 deletions test/srt/test_httpserver_llava.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import argparse
import asyncio
import json
import time

import aiohttp
import requests
Expand Down
1 change: 1 addition & 0 deletions test/srt/test_httpserver_reuse.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"""

import argparse
import time

import requests

Expand Down