Fix block wise fp8 torch compile (#3232)

sgl-project · Jan 31, 2025 · c02e313 · c02e313
1 parent 734daed
commit c02e313
Showing 1 changed file with 7 additions and 0 deletions.
diff --git a/python/sglang/srt/layers/quantization/fp8.py b/python/sglang/srt/layers/quantization/fp8.py
@@ -290,6 +290,13 @@ def process_weights_after_loading(self, layer: Module) -> None:
                     weight_scale, requires_grad=False
                 )
                 layer.input_scale = None
+            else:
+                layer.weight = torch.nn.Parameter(
+                    layer.weight.data, requires_grad=False
+                )
+                layer.weight_scale_inv = torch.nn.Parameter(
+                    layer.weight_scale_inv.data, requires_grad=False
+                )
             return
         layer.weight = torch.nn.Parameter(layer.weight.data, requires_grad=False)
         # If checkpoint not serialized fp8, quantize the weights.