vllm.config.kernel ¶

KernelConfig ¶

Configuration for kernel selection and warmup behavior.

Source code in vllm/config/kernel.py

@config
class KernelConfig:
    """Configuration for kernel selection and warmup behavior."""

    enable_flashinfer_autotune: bool = None  # type: ignore[assignment]
    """If True, run FlashInfer autotuning during kernel warmup."""

    moe_backend: MoEBackend = "auto"
    """Backend for MoE expert computation kernels. Available options:

    - "auto": Automatically select the best backend based on model and hardware
    - "triton": Use Triton-based fused MoE kernels
    - "deep_gemm": Use DeepGEMM kernels (FP8 block-quantized only)
    - "cutlass": Use vLLM CUTLASS kernels
    - "flashinfer_trtllm": Use FlashInfer with TRTLLM-GEN kernels
    - "flashinfer_cutlass": Use FlashInfer with CUTLASS kernels
    - "flashinfer_cutedsl": Use FlashInfer with CuteDSL kernels (FP4 only)
    - "marlin": Use Marlin kernels (weight-only quantization)
    - "aiter": Use AMD AITer kernels (ROCm only)"""

    @field_validator("moe_backend", mode="before")
    @classmethod
    def _normalize_moe_backend(cls, value: Any) -> Any:
        if isinstance(value, str):
            return value.lower().replace("-", "_")
        return value

    def compute_hash(self) -> str:
        """
        WARNING: Whenever a new field is added to this config,
        ensure that it is included in the factors list if
        it affects the computation graph.

        Provide a hash that uniquely identifies all the configs
        that affect the structure of the computation
        graph from input ids/embeddings to the final hidden states,
        excluding anything before input ids/embeddings and after
        the final hidden states.
        """
        # no factors to consider.
        # this config will not affect the computation graph.
        factors: list[Any] = []
        hash_str = safe_hash(str(factors).encode(), usedforsecurity=False).hexdigest()
        return hash_str

    @field_validator("enable_flashinfer_autotune", mode="wrap")
    @classmethod
    def _skip_none_validation(cls, value: Any, handler: Callable) -> Any:
        """Skip validation if the value is `None` when initialization is delayed."""
        if value is None:
            return value
        return handler(value)

enable_flashinfer_autotune `class-attribute` `instance-attribute` ¶

enable_flashinfer_autotune: bool = None

If True, run FlashInfer autotuning during kernel warmup.

moe_backend `class-attribute` `instance-attribute` ¶

moe_backend: MoEBackend = 'auto'

Backend for MoE expert computation kernels. Available options:

"auto": Automatically select the best backend based on model and hardware
"triton": Use Triton-based fused MoE kernels
"deep_gemm": Use DeepGEMM kernels (FP8 block-quantized only)
"cutlass": Use vLLM CUTLASS kernels
"flashinfer_trtllm": Use FlashInfer with TRTLLM-GEN kernels
"flashinfer_cutlass": Use FlashInfer with CUTLASS kernels
"flashinfer_cutedsl": Use FlashInfer with CuteDSL kernels (FP4 only)
"marlin": Use Marlin kernels (weight-only quantization)
"aiter": Use AMD AITer kernels (ROCm only)

_skip_none_validation `classmethod` ¶

_skip_none_validation(value: Any, handler: Callable) -> Any

Skip validation if the value is None when initialization is delayed.

Source code in vllm/config/kernel.py

@field_validator("enable_flashinfer_autotune", mode="wrap")
@classmethod
def _skip_none_validation(cls, value: Any, handler: Callable) -> Any:
    """Skip validation if the value is `None` when initialization is delayed."""
    if value is None:
        return value
    return handler(value)

compute_hash ¶

compute_hash() -> str

WARNING: Whenever a new field is added to this config, ensure that it is included in the factors list if it affects the computation graph.

Provide a hash that uniquely identifies all the configs that affect the structure of the computation graph from input ids/embeddings to the final hidden states, excluding anything before input ids/embeddings and after the final hidden states.

Source code in vllm/config/kernel.py

def compute_hash(self) -> str:
    """
    WARNING: Whenever a new field is added to this config,
    ensure that it is included in the factors list if
    it affects the computation graph.

    Provide a hash that uniquely identifies all the configs
    that affect the structure of the computation
    graph from input ids/embeddings to the final hidden states,
    excluding anything before input ids/embeddings and after
    the final hidden states.
    """
    # no factors to consider.
    # this config will not affect the computation graph.
    factors: list[Any] = []
    hash_str = safe_hash(str(factors).encode(), usedforsecurity=False).hexdigest()
    return hash_str

vllm.config.kernel ¶

KernelConfig ¶

enable_flashinfer_autotune class-attribute instance-attribute ¶

moe_backend class-attribute instance-attribute ¶

_skip_none_validation classmethod ¶

compute_hash ¶

enable_flashinfer_autotune `class-attribute` `instance-attribute` ¶

moe_backend `class-attribute` `instance-attribute` ¶

_skip_none_validation `classmethod` ¶