Source code for shmpipeline.kernels.gpu.affine_transform

"""GPU affine transformation kernel."""

from __future__ import annotations

from typing import Any, Mapping

import torch

from shmpipeline.config import KernelConfig, SharedMemoryConfig
from shmpipeline.errors import ConfigValidationError
from shmpipeline.kernels.gpu.base import GpuKernel, as_gpu_tensor


[docs] class AffineTransformGpuKernel(GpuKernel): """Apply an affine transform y = A x + b to a vector input.""" kind = "gpu.affine_transform" auxiliary_arity = 2
[docs] @classmethod def validate_config( cls, config: KernelConfig, shared_memory: Mapping[str, SharedMemoryConfig], ) -> None: super().validate_config(config, shared_memory) vector_spec = shared_memory[config.input] matrix_spec = shared_memory[config.auxiliary_names[0]] offset_spec = shared_memory[config.auxiliary_names[1]] output_spec = shared_memory[config.output] if len(vector_spec.shape) != 1: raise ConfigValidationError( f"kernel {config.name!r} requires a 1D input vector" ) if len(matrix_spec.shape) != 2: raise ConfigValidationError( f"kernel {config.name!r} requires a 2D transform matrix" ) if len(offset_spec.shape) != 1: raise ConfigValidationError( f"kernel {config.name!r} requires a 1D offset vector" ) if len(output_spec.shape) != 1: raise ConfigValidationError( f"kernel {config.name!r} requires a 1D output vector" ) if matrix_spec.shape[1] != vector_spec.shape[0]: raise ConfigValidationError( f"kernel {config.name!r} requires matrix columns to match input vector length" ) if matrix_spec.shape[0] != offset_spec.shape[0]: raise ConfigValidationError( f"kernel {config.name!r} requires matrix rows to match offset vector length" ) if output_spec.shape[0] != matrix_spec.shape[0]: raise ConfigValidationError( f"kernel {config.name!r} requires output vector length to match matrix rows" ) dtypes = { vector_spec.dtype, matrix_spec.dtype, offset_spec.dtype, output_spec.dtype, } if len(dtypes) != 1: raise ConfigValidationError( f"kernel {config.name!r} requires matching dtypes across all affine inputs and outputs" )
[docs] def compute_into( self, trigger_input: Any, output: Any, auxiliary_inputs: Mapping[str, Any], ) -> None: vector = as_gpu_tensor(trigger_input, device=self.device) matrix = as_gpu_tensor( auxiliary_inputs[self.context.config.auxiliary_aliases[0]], device=self.device, ) offset = as_gpu_tensor( auxiliary_inputs[self.context.config.auxiliary_aliases[1]], device=self.device, ) torch.matmul(matrix, vector, out=output) torch.add(output, offset, out=output) torch.cuda.synchronize(output.device)