Skip to content

sdf_pose_network

Parametrized networks for pose and shape estimation.

SDFPoseHead

Bases: Module

Parametrized head to estimate pose and shape from feature vector.

Source code in sdfest/initialization/sdf_pose_network.py
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
class SDFPoseHead(nn.Module):
    """Parametrized head to estimate pose and shape from feature vector."""

    def __init__(
        self,
        in_size: int,
        mlp_out_sizes: List,
        shape_dimension: int,
        batchnorm: bool,
        orientation_repr: Optional[str] = "quaternion",
        orientation_grid_resolution: Optional[int] = None,
    ):
        """Initialize the SDFPoseHead.

        Args:
            in_size:            number of input features
            mlp_out_sizes:      output sizes of each linear layer
            shape_dimension:    dimension of shape description
            batchnorm:          whether to use batchnorm or not
            orientation_repr:
                The orientation represention. One of "quaternion"|"discretized".
            orientation_grid_resolution:
                The resolution of the SO3 grid.
                Only used when orientation_repr == "discretized".
        """
        super().__init__()

        self._in_size = in_size
        self._mlp_out_sizes = mlp_out_sizes
        self._batchnorm = batchnorm
        self._shape_dimension = shape_dimension
        self._orientation_repr = orientation_repr

        # define layers
        self._linear_layers = torch.nn.ModuleList([])
        for i, out_size in enumerate(mlp_out_sizes):
            if i == 0:
                self._linear_layers.append(nn.Linear(self._in_size, out_size))
            else:
                self._linear_layers.append(nn.Linear(mlp_out_sizes[i - 1], out_size))

        self._bn_layers = torch.nn.ModuleList([])
        if self._batchnorm:
            for out_size in mlp_out_sizes:
                self._bn_layers.append(nn.BatchNorm1d(out_size))

        if orientation_repr == "quaternion":
            self._grid = None
            self._final_layer = nn.Linear(mlp_out_sizes[-1], self._shape_dimension + 8)
        elif orientation_repr == "discretized":
            self._grid = SO3Grid(orientation_grid_resolution)
            self._final_layer = nn.Linear(
                mlp_out_sizes[-1], self._shape_dimension + 4 + self._grid.num_cells()
            )
        else:
            raise NotImplementedError(
                f"orientation_repr {orientation_repr} is not supported."
            )

    def forward(self, x):
        """Forward pass of the module.

        Input represents set of input features used to compute pose.

        Args:
            x: batch of input vectors
        Returns:
            Tuple with the following entries:
                The predicted shape vector.
                The predicted pose.
                The predicted scale.
                The predicted orientation in the specified orientation representation.
                    For "quaternion" this will be of shape (N,4) with each quaternion
                    having the order (x, y, z, w), i.e., scalar-last, and normalized.
                    For "discretized" this will be of shape (N,M) based on the grid
                    resolution. No activation function is applied. I.e., softmax has
                    to be used to get probabilities, and cross_entropy_loss should be
                    used during training.
        """
        out = x
        for i, linear_layer in enumerate(self._linear_layers):
            out = linear_layer(out)
            if self._batchnorm:
                out = self._bn_layers[i](out)
            out = nn.functional.relu(out)

        # Normalize quaternion
        if self._orientation_repr == "quaternion":
            out = self._final_layer(out)
            orientation = out[:, self._shape_dimension + 4 :]
            orientation = orientation / torch.sqrt(
                torch.sum(orientation ** 2, 1, keepdim=True)
            )
        elif self._orientation_repr == "discretized":
            out = self._final_layer(out)
            orientation = out[:, self._shape_dimension + 4 :]
        else:
            raise NotImplementedError(
                f"orientation_repr {self.orientation_repr} is not supported."
            )

        return (
            out[:, 0 : self._shape_dimension],
            out[:, self._shape_dimension : self._shape_dimension + 3],
            out[:, self._shape_dimension + 3],
            orientation,
        )

__init__(in_size, mlp_out_sizes, shape_dimension, batchnorm, orientation_repr='quaternion', orientation_grid_resolution=None)

Initialize the SDFPoseHead.

Parameters:

Name Type Description Default
in_size int

number of input features

required
mlp_out_sizes List

output sizes of each linear layer

required
shape_dimension int

dimension of shape description

required
batchnorm bool

whether to use batchnorm or not

required
orientation_repr Optional[str]

The orientation represention. One of "quaternion"|"discretized".

'quaternion'
orientation_grid_resolution Optional[int]

The resolution of the SO3 grid. Only used when orientation_repr == "discretized".

None
Source code in sdfest/initialization/sdf_pose_network.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
def __init__(
    self,
    in_size: int,
    mlp_out_sizes: List,
    shape_dimension: int,
    batchnorm: bool,
    orientation_repr: Optional[str] = "quaternion",
    orientation_grid_resolution: Optional[int] = None,
):
    """Initialize the SDFPoseHead.

    Args:
        in_size:            number of input features
        mlp_out_sizes:      output sizes of each linear layer
        shape_dimension:    dimension of shape description
        batchnorm:          whether to use batchnorm or not
        orientation_repr:
            The orientation represention. One of "quaternion"|"discretized".
        orientation_grid_resolution:
            The resolution of the SO3 grid.
            Only used when orientation_repr == "discretized".
    """
    super().__init__()

    self._in_size = in_size
    self._mlp_out_sizes = mlp_out_sizes
    self._batchnorm = batchnorm
    self._shape_dimension = shape_dimension
    self._orientation_repr = orientation_repr

    # define layers
    self._linear_layers = torch.nn.ModuleList([])
    for i, out_size in enumerate(mlp_out_sizes):
        if i == 0:
            self._linear_layers.append(nn.Linear(self._in_size, out_size))
        else:
            self._linear_layers.append(nn.Linear(mlp_out_sizes[i - 1], out_size))

    self._bn_layers = torch.nn.ModuleList([])
    if self._batchnorm:
        for out_size in mlp_out_sizes:
            self._bn_layers.append(nn.BatchNorm1d(out_size))

    if orientation_repr == "quaternion":
        self._grid = None
        self._final_layer = nn.Linear(mlp_out_sizes[-1], self._shape_dimension + 8)
    elif orientation_repr == "discretized":
        self._grid = SO3Grid(orientation_grid_resolution)
        self._final_layer = nn.Linear(
            mlp_out_sizes[-1], self._shape_dimension + 4 + self._grid.num_cells()
        )
    else:
        raise NotImplementedError(
            f"orientation_repr {orientation_repr} is not supported."
        )

forward(x)

Forward pass of the module.

Input represents set of input features used to compute pose.

Parameters:

Name Type Description Default
x

batch of input vectors

required

Returns: Tuple with the following entries: The predicted shape vector. The predicted pose. The predicted scale. The predicted orientation in the specified orientation representation. For "quaternion" this will be of shape (N,4) with each quaternion having the order (x, y, z, w), i.e., scalar-last, and normalized. For "discretized" this will be of shape (N,M) based on the grid resolution. No activation function is applied. I.e., softmax has to be used to get probabilities, and cross_entropy_loss should be used during training.

Source code in sdfest/initialization/sdf_pose_network.py
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
def forward(self, x):
    """Forward pass of the module.

    Input represents set of input features used to compute pose.

    Args:
        x: batch of input vectors
    Returns:
        Tuple with the following entries:
            The predicted shape vector.
            The predicted pose.
            The predicted scale.
            The predicted orientation in the specified orientation representation.
                For "quaternion" this will be of shape (N,4) with each quaternion
                having the order (x, y, z, w), i.e., scalar-last, and normalized.
                For "discretized" this will be of shape (N,M) based on the grid
                resolution. No activation function is applied. I.e., softmax has
                to be used to get probabilities, and cross_entropy_loss should be
                used during training.
    """
    out = x
    for i, linear_layer in enumerate(self._linear_layers):
        out = linear_layer(out)
        if self._batchnorm:
            out = self._bn_layers[i](out)
        out = nn.functional.relu(out)

    # Normalize quaternion
    if self._orientation_repr == "quaternion":
        out = self._final_layer(out)
        orientation = out[:, self._shape_dimension + 4 :]
        orientation = orientation / torch.sqrt(
            torch.sum(orientation ** 2, 1, keepdim=True)
        )
    elif self._orientation_repr == "discretized":
        out = self._final_layer(out)
        orientation = out[:, self._shape_dimension + 4 :]
    else:
        raise NotImplementedError(
            f"orientation_repr {self.orientation_repr} is not supported."
        )

    return (
        out[:, 0 : self._shape_dimension],
        out[:, self._shape_dimension : self._shape_dimension + 3],
        out[:, self._shape_dimension + 3],
        orientation,
    )

SDFPoseNet

Bases: Module

Pose and shape estimation from sensor data.

Composed of feature extraction backbone and shape/pose head.

Source code in sdfest/initialization/sdf_pose_network.py
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
class SDFPoseNet(nn.Module):
    """Pose and shape estimation from sensor data.

    Composed of feature extraction backbone and shape/pose head.
    """

    def __init__(self, backbone: nn.Module, head: nn.Module):
        """Construct SDF pose and shape network.

        Args:
            backbone:       function or class representing the backbone
            backbone_dict:  parameters passed to backbone on construction
            head:           function or class representing the head
            head_dict:      parameters passed to head on construction
        """
        super().__init__()
        self._backbone = backbone
        self._head = head

    def forward(self, x):
        """Forward pass.

        Args:
            x: input compatible with backbone.
        Returns:
            output from head
        """
        out = self._backbone(x)
        out = self._head(out)
        return out

__init__(backbone, head)

Construct SDF pose and shape network.

Parameters:

Name Type Description Default
backbone Module

function or class representing the backbone

required
backbone_dict

parameters passed to backbone on construction

required
head Module

function or class representing the head

required
head_dict

parameters passed to head on construction

required
Source code in sdfest/initialization/sdf_pose_network.py
124
125
126
127
128
129
130
131
132
133
134
135
def __init__(self, backbone: nn.Module, head: nn.Module):
    """Construct SDF pose and shape network.

    Args:
        backbone:       function or class representing the backbone
        backbone_dict:  parameters passed to backbone on construction
        head:           function or class representing the head
        head_dict:      parameters passed to head on construction
    """
    super().__init__()
    self._backbone = backbone
    self._head = head

forward(x)

Forward pass.

Parameters:

Name Type Description Default
x

input compatible with backbone.

required

Returns: output from head

Source code in sdfest/initialization/sdf_pose_network.py
137
138
139
140
141
142
143
144
145
146
147
def forward(self, x):
    """Forward pass.

    Args:
        x: input compatible with backbone.
    Returns:
        output from head
    """
    out = self._backbone(x)
    out = self._head(out)
    return out