Skip to content

Commit

Permalink
fix format
Browse files Browse the repository at this point in the history
  • Loading branch information
Edwardf0t1 committed Feb 1, 2025
1 parent a8babf8 commit 12f49e1
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions python/sglang/srt/layers/quantization/modelopt_quant.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
requantize_with_max_scale,
)

from sglang.srt.layers.attention import AttentionBackend
from sglang.srt.layers.linear import LinearBase, LinearMethodBase
from sglang.srt.layers.parameter import ModelWeightParameter, PerTensorScaleParameter
from sglang.srt.layers.quantization.base_config import (
Expand Down Expand Up @@ -71,12 +72,12 @@ def from_config(cls, config: Dict[str, Any]) -> "ModelOptFp8Config":
def get_quant_method(
self, layer: torch.nn.Module, prefix: str
) -> Optional["QuantizeMethodBase"]:
from vllm.attention.layer import Attention

if isinstance(layer, LinearBase):
return ModelOptFp8LinearMethod(self)
elif isinstance(layer, Attention):
if isinstance(layer, AttentionBackend):
return ModelOptFp8KVCacheMethod(self)

return None

def get_scaled_act_names(self) -> List[str]:
Expand Down Expand Up @@ -182,7 +183,7 @@ def apply(

class ModelOptFp8KVCacheMethod(BaseKVCacheMethod):
"""
Supports loading kv-cache scaling factors from FP8 checkpoints.
Handles loading FP8 kv-cache scaling factors from modelopt quantized checkpoints.
"""

def __init__(self, quant_config: ModelOptFp8Config):
Expand Down

0 comments on commit 12f49e1

Please sign in to comment.