Source code for shmpipeline.kernels.gpu.scale_offset

"""GPU scale-and-offset kernel."""

from __future__ import annotations

from typing import Any, Mapping

import torch

from shmpipeline.config import KernelConfig, SharedMemoryConfig
from shmpipeline.errors import ConfigValidationError
from shmpipeline.kernels.gpu._common import (
    require_numeric_parameter,
    validate_same_dtype,
)
from shmpipeline.kernels.gpu.base import GpuKernel, as_gpu_tensor


[docs] class ScaleOffsetGpuKernel(GpuKernel): """Apply output = gain * input - offset elementwise.""" kind = "gpu.scale_offset" auxiliary_arity = 1
[docs] @classmethod def validate_config( cls, config: KernelConfig, shared_memory: Mapping[str, SharedMemoryConfig], ) -> None: super().validate_config(config, shared_memory) require_numeric_parameter(config, name="gain") input_spec = shared_memory[config.input] offset_spec = shared_memory[config.auxiliary_names[0]] output_spec = shared_memory[config.output] if ( input_spec.shape != offset_spec.shape or input_spec.shape != output_spec.shape ): raise ConfigValidationError( f"kernel {config.name!r} requires matching shapes for input, offset, and output" ) validate_same_dtype( config, shared_memory, names=(config.input, config.auxiliary_names[0], config.output), description="scale-offset streams", )
def __init__(self, context) -> None: super().__init__(context) self.gain = require_numeric_parameter(context.config, name="gain")
[docs] def compute_into( self, trigger_input: Any, output: Any, auxiliary_inputs: Mapping[str, Any], ) -> None: alias = self.context.config.auxiliary_aliases[0] offset = as_gpu_tensor(auxiliary_inputs[alias], device=self.device) torch.mul( as_gpu_tensor(trigger_input, device=self.device), self.gain, out=output, ) torch.sub(output, offset, out=output) torch.cuda.synchronize(output.device)