Using Upsample instead of ConvTranspose2d causes cuda out of memory on gradient calculating step

Video card: gtx1070ti 8Gb, batchsize 64, input image size 128*128.
I had such UNET with resnet152 as encoder which worked pretty fine:

class UNetResNet(nn.Module):



def __init__(self, encoder_depth, num_classes, num_filters=32, dropout_2d=0.2,

             pretrained=False, is_deconv=False):

    super().__init__()

    self.num_classes = num_classes

    self.dropout_2d = dropout_2d



    if encoder_depth == 34:

        self.encoder = torchvision.models.resnet34(pretrained=pretrained)

        bottom_channel_nr = 512

    elif encoder_depth == 101:

        self.encoder = torchvision.models.resnet101(pretrained=pretrained)

        bottom_channel_nr = 2048

    elif encoder_depth == 152:

        self.encoder = torchvision.models.resnet152(pretrained=pretrained)

        bottom_channel_nr = 2048



    else:

        raise NotImplementedError('only 34, 101, 152 version of Resnet are implemented')



    self.pool = nn.MaxPool2d(2, 2)



    self.relu = nn.ReLU(inplace=True)



    self.conv1 = nn.Sequential(self.encoder.conv1,

                               self.encoder.bn1,

                               self.encoder.relu,

                               self.pool) #from that pool layer I would like to get rid off



    self.conv2 = self.encoder.layer1

    self.conv3 = self.encoder.layer2

    self.conv4 = self.encoder.layer3

    self.conv5 = self.encoder.layer4

    self.center = DecoderCenter(bottom_channel_nr, num_filters * 8 *2, num_filters * 8, False)



    self.dec5 =  DecoderBlockV(bottom_channel_nr + num_filters * 8, num_filters * 8 * 2, num_filters * 8,   is_deconv)

    self.dec4 = DecoderBlockV(bottom_channel_nr // 2 + num_filters * 8, num_filters * 8 * 2, num_filters * 8, is_deconv)

    self.dec3 = DecoderBlockV(bottom_channel_nr // 4 + num_filters * 8, num_filters * 4 * 2, num_filters * 2, is_deconv)

    self.dec2 = DecoderBlockV(bottom_channel_nr // 8 + num_filters * 2, num_filters * 2 * 2, num_filters * 2 * 2,

                               is_deconv)

    self.dec1 = DecoderBlockV(num_filters * 2 * 2, num_filters * 2 * 2, num_filters, is_deconv)

    self.dec0 = ConvRelu(num_filters, num_filters)

    self.final = nn.Conv2d(num_filters, num_classes, kernel_size=1)



def forward(self, x):

    conv1 = self.conv1(x)

    conv2 = self.conv2(conv1)

    conv3 = self.conv3(conv2)

    conv4 = self.conv4(conv3)

    conv5 = self.conv5(conv4) 

    center = self.center(conv5)

    dec5 = self.dec5(torch.cat([center, conv5], 1))

    dec4 = self.dec4(torch.cat([dec5, conv4], 1))

    dec3 = self.dec3(torch.cat([dec4, conv3], 1))

    dec2 = self.dec2(torch.cat([dec3, conv2], 1))

    dec1 = self.dec1(dec2)

    dec0 = self.dec0(dec1)



    return self.final(F.dropout2d(dec0, p=self.dropout_2d))

# blocks

    class DecoderBlockV(nn.Module):

        def __init__(self, in_channels, middle_channels, out_channels, is_deconv=True):

            super(DecoderBlockV2, self).__init__()

            self.in_channels = in_channels



            if is_deconv:

                self.block = nn.Sequential(

                    ConvRelu(in_channels, middle_channels),

                    nn.ConvTranspose2d(middle_channels, out_channels, kernel_size=4, stride=2,

                                       padding=1),

                    nn.BatchNorm2d(out_channels),

                    nn.ReLU(inplace=True)



                )

            else:





                self.block = nn.Sequential(

                    nn.Upsample(scale_factor=2, mode='bilinear'),

                    ConvRelu(in_channels, middle_channels),

                    ConvRelu(middle_channels, out_channels),

                )



        def forward(self, x):

            return self.block(x)







class DecoderCenter(nn.Module):

    def __init__(self, in_channels, middle_channels, out_channels, is_deconv=True):

        super(DecoderCenter, self).__init__()

        self.in_channels = in_channels





        if is_deconv:

            """

                Paramaters for Deconvolution were chosen to avoid artifacts, following

                link https://distill.pub/2016/deconv-checkerboard/

            """



            self.block = nn.Sequential(

                ConvRelu(in_channels, middle_channels),

                nn.ConvTranspose2d(middle_channels, out_channels, kernel_size=4, stride=2,

                                   padding=1),

        nn.BatchNorm2d(out_channels), 

                nn.ReLU(inplace=True)

            )

        else:

            self.block = nn.Sequential(

                ConvRelu(in_channels, middle_channels),

                ConvRelu(middle_channels, out_channels)



            )



    def forward(self, x):

        return self.block(x)

Then I wanted to use Upsample instead of ConvTranspose2d causes cuda out of memory on gradient calculating step, just by setting is_deconv = False in DecoderBlock - that caused problem on gradient calculating step. Why? Got it fixed only by reducing batch size from 64 to 40

~/anaconda3/lib/python3.6/site-packages/steppy/base.py in fit_transform(self, *args, **kwargs)

    603             dict: outputs

    604         """

--> 605         self.fit(*args, **kwargs)

    606         return self.transform(*args, **kwargs)

    607 



~/Desktop/ml/salt/open-solution-salt-identification-master/common_blocks/models.py in fit(self, datagen, validation_datagen, meta_valid)

     76             for batch_id, data in enumerate(batch_gen):

     77                 self.callbacks.on_batch_begin()

---> 78                 metrics = self._fit_loop(data)

     79                 self.callbacks.on_batch_end(metrics=metrics)

     80                 if batch_id == steps:



~/Desktop/ml/salt/open-solution-salt-identification-master/common_blocks/models.py in _fit_loop(self, data)

    113             batch_loss = sum(partial_batch_losses.values())

    114         partial_batch_losses['sum'] = batch_loss

--> 115         batch_loss.backward()

    116         self.optimizer.step()

    117 



~/anaconda3/lib/python3.6/site-packages/torch/autograd/variable.py in backward(self, gradient, retain_graph, create_graph, retain_variables)

    165                 Variable.

    166         """

--> 167         torch.autograd.backward(self, gradient, retain_graph, create_graph, retain_variables)

    168 

    169     def register_hook(self, hook):



~/anaconda3/lib/python3.6/site-packages/torch/autograd/__init__.py in backward(variables, grad_variables, retain_graph, create_graph, retain_variables)

     97 

     98     Variable._execution_engine.run_backward(

---> 99         variables, grad_variables, retain_graph)

    100 

    101 



RuntimeError: cuda runtime error (2) : out of memory at /opt/conda/conda-bld/pytorch_1518244421288/work/torch/lib/THC/generic/THCStorage.cu:58

asked Sep 26 '18 at 18:39

Rocketq

1235

add a comment |

Video card: gtx1070ti 8Gb, batchsize 64, input image size 128*128.
I had such UNET with resnet152 as encoder which worked pretty fine:

class UNetResNet(nn.Module):



def __init__(self, encoder_depth, num_classes, num_filters=32, dropout_2d=0.2,

             pretrained=False, is_deconv=False):

    super().__init__()

    self.num_classes = num_classes

    self.dropout_2d = dropout_2d



    if encoder_depth == 34:

        self.encoder = torchvision.models.resnet34(pretrained=pretrained)

        bottom_channel_nr = 512

    elif encoder_depth == 101:

        self.encoder = torchvision.models.resnet101(pretrained=pretrained)

        bottom_channel_nr = 2048

    elif encoder_depth == 152:

        self.encoder = torchvision.models.resnet152(pretrained=pretrained)

        bottom_channel_nr = 2048



    else:

        raise NotImplementedError('only 34, 101, 152 version of Resnet are implemented')



    self.pool = nn.MaxPool2d(2, 2)



    self.relu = nn.ReLU(inplace=True)



    self.conv1 = nn.Sequential(self.encoder.conv1,

                               self.encoder.bn1,

                               self.encoder.relu,

                               self.pool) #from that pool layer I would like to get rid off



    self.conv2 = self.encoder.layer1

    self.conv3 = self.encoder.layer2

    self.conv4 = self.encoder.layer3

    self.conv5 = self.encoder.layer4

    self.center = DecoderCenter(bottom_channel_nr, num_filters * 8 *2, num_filters * 8, False)



    self.dec5 =  DecoderBlockV(bottom_channel_nr + num_filters * 8, num_filters * 8 * 2, num_filters * 8,   is_deconv)

    self.dec4 = DecoderBlockV(bottom_channel_nr // 2 + num_filters * 8, num_filters * 8 * 2, num_filters * 8, is_deconv)

    self.dec3 = DecoderBlockV(bottom_channel_nr // 4 + num_filters * 8, num_filters * 4 * 2, num_filters * 2, is_deconv)

    self.dec2 = DecoderBlockV(bottom_channel_nr // 8 + num_filters * 2, num_filters * 2 * 2, num_filters * 2 * 2,

                               is_deconv)

    self.dec1 = DecoderBlockV(num_filters * 2 * 2, num_filters * 2 * 2, num_filters, is_deconv)

    self.dec0 = ConvRelu(num_filters, num_filters)

    self.final = nn.Conv2d(num_filters, num_classes, kernel_size=1)



def forward(self, x):

    conv1 = self.conv1(x)

    conv2 = self.conv2(conv1)

    conv3 = self.conv3(conv2)

    conv4 = self.conv4(conv3)

    conv5 = self.conv5(conv4) 

    center = self.center(conv5)

    dec5 = self.dec5(torch.cat([center, conv5], 1))

    dec4 = self.dec4(torch.cat([dec5, conv4], 1))

    dec3 = self.dec3(torch.cat([dec4, conv3], 1))

    dec2 = self.dec2(torch.cat([dec3, conv2], 1))

    dec1 = self.dec1(dec2)

    dec0 = self.dec0(dec1)



    return self.final(F.dropout2d(dec0, p=self.dropout_2d))

# blocks

    class DecoderBlockV(nn.Module):

        def __init__(self, in_channels, middle_channels, out_channels, is_deconv=True):

            super(DecoderBlockV2, self).__init__()

            self.in_channels = in_channels



            if is_deconv:

                self.block = nn.Sequential(

                    ConvRelu(in_channels, middle_channels),

                    nn.ConvTranspose2d(middle_channels, out_channels, kernel_size=4, stride=2,

                                       padding=1),

                    nn.BatchNorm2d(out_channels),

                    nn.ReLU(inplace=True)



                )

            else:





                self.block = nn.Sequential(

                    nn.Upsample(scale_factor=2, mode='bilinear'),

                    ConvRelu(in_channels, middle_channels),

                    ConvRelu(middle_channels, out_channels),

                )



        def forward(self, x):

            return self.block(x)







class DecoderCenter(nn.Module):

    def __init__(self, in_channels, middle_channels, out_channels, is_deconv=True):

        super(DecoderCenter, self).__init__()

        self.in_channels = in_channels





        if is_deconv:

            """

                Paramaters for Deconvolution were chosen to avoid artifacts, following

                link https://distill.pub/2016/deconv-checkerboard/

            """



            self.block = nn.Sequential(

                ConvRelu(in_channels, middle_channels),

                nn.ConvTranspose2d(middle_channels, out_channels, kernel_size=4, stride=2,

                                   padding=1),

        nn.BatchNorm2d(out_channels), 

                nn.ReLU(inplace=True)

            )

        else:

            self.block = nn.Sequential(

                ConvRelu(in_channels, middle_channels),

                ConvRelu(middle_channels, out_channels)



            )



    def forward(self, x):

        return self.block(x)

~/anaconda3/lib/python3.6/site-packages/steppy/base.py in fit_transform(self, *args, **kwargs)

    603             dict: outputs

    604         """

--> 605         self.fit(*args, **kwargs)

    606         return self.transform(*args, **kwargs)

    607 



~/Desktop/ml/salt/open-solution-salt-identification-master/common_blocks/models.py in fit(self, datagen, validation_datagen, meta_valid)

     76             for batch_id, data in enumerate(batch_gen):

     77                 self.callbacks.on_batch_begin()

---> 78                 metrics = self._fit_loop(data)

     79                 self.callbacks.on_batch_end(metrics=metrics)

     80                 if batch_id == steps:



~/Desktop/ml/salt/open-solution-salt-identification-master/common_blocks/models.py in _fit_loop(self, data)

    113             batch_loss = sum(partial_batch_losses.values())

    114         partial_batch_losses['sum'] = batch_loss

--> 115         batch_loss.backward()

    116         self.optimizer.step()

    117 



~/anaconda3/lib/python3.6/site-packages/torch/autograd/variable.py in backward(self, gradient, retain_graph, create_graph, retain_variables)

    165                 Variable.

    166         """

--> 167         torch.autograd.backward(self, gradient, retain_graph, create_graph, retain_variables)

    168 

    169     def register_hook(self, hook):



~/anaconda3/lib/python3.6/site-packages/torch/autograd/__init__.py in backward(variables, grad_variables, retain_graph, create_graph, retain_variables)

     97 

     98     Variable._execution_engine.run_backward(

---> 99         variables, grad_variables, retain_graph)

    100 

    101 



RuntimeError: cuda runtime error (2) : out of memory at /opt/conda/conda-bld/pytorch_1518244421288/work/torch/lib/THC/generic/THCStorage.cu:58

asked Sep 26 '18 at 18:39

Rocketq

1235

add a comment |

Video card: gtx1070ti 8Gb, batchsize 64, input image size 128*128.
I had such UNET with resnet152 as encoder which worked pretty fine:

class UNetResNet(nn.Module):



def __init__(self, encoder_depth, num_classes, num_filters=32, dropout_2d=0.2,

             pretrained=False, is_deconv=False):

    super().__init__()

    self.num_classes = num_classes

    self.dropout_2d = dropout_2d



    if encoder_depth == 34:

        self.encoder = torchvision.models.resnet34(pretrained=pretrained)

        bottom_channel_nr = 512

    elif encoder_depth == 101:

        self.encoder = torchvision.models.resnet101(pretrained=pretrained)

        bottom_channel_nr = 2048

    elif encoder_depth == 152:

        self.encoder = torchvision.models.resnet152(pretrained=pretrained)

        bottom_channel_nr = 2048



    else:

        raise NotImplementedError('only 34, 101, 152 version of Resnet are implemented')



    self.pool = nn.MaxPool2d(2, 2)



    self.relu = nn.ReLU(inplace=True)



    self.conv1 = nn.Sequential(self.encoder.conv1,

                               self.encoder.bn1,

                               self.encoder.relu,

                               self.pool) #from that pool layer I would like to get rid off



    self.conv2 = self.encoder.layer1

    self.conv3 = self.encoder.layer2

    self.conv4 = self.encoder.layer3

    self.conv5 = self.encoder.layer4

    self.center = DecoderCenter(bottom_channel_nr, num_filters * 8 *2, num_filters * 8, False)



    self.dec5 =  DecoderBlockV(bottom_channel_nr + num_filters * 8, num_filters * 8 * 2, num_filters * 8,   is_deconv)

    self.dec4 = DecoderBlockV(bottom_channel_nr // 2 + num_filters * 8, num_filters * 8 * 2, num_filters * 8, is_deconv)

    self.dec3 = DecoderBlockV(bottom_channel_nr // 4 + num_filters * 8, num_filters * 4 * 2, num_filters * 2, is_deconv)

    self.dec2 = DecoderBlockV(bottom_channel_nr // 8 + num_filters * 2, num_filters * 2 * 2, num_filters * 2 * 2,

                               is_deconv)

    self.dec1 = DecoderBlockV(num_filters * 2 * 2, num_filters * 2 * 2, num_filters, is_deconv)

    self.dec0 = ConvRelu(num_filters, num_filters)

    self.final = nn.Conv2d(num_filters, num_classes, kernel_size=1)



def forward(self, x):

    conv1 = self.conv1(x)

    conv2 = self.conv2(conv1)

    conv3 = self.conv3(conv2)

    conv4 = self.conv4(conv3)

    conv5 = self.conv5(conv4) 

    center = self.center(conv5)

    dec5 = self.dec5(torch.cat([center, conv5], 1))

    dec4 = self.dec4(torch.cat([dec5, conv4], 1))

    dec3 = self.dec3(torch.cat([dec4, conv3], 1))

    dec2 = self.dec2(torch.cat([dec3, conv2], 1))

    dec1 = self.dec1(dec2)

    dec0 = self.dec0(dec1)



    return self.final(F.dropout2d(dec0, p=self.dropout_2d))

# blocks

    class DecoderBlockV(nn.Module):

        def __init__(self, in_channels, middle_channels, out_channels, is_deconv=True):

            super(DecoderBlockV2, self).__init__()

            self.in_channels = in_channels



            if is_deconv:

                self.block = nn.Sequential(

                    ConvRelu(in_channels, middle_channels),

                    nn.ConvTranspose2d(middle_channels, out_channels, kernel_size=4, stride=2,

                                       padding=1),

                    nn.BatchNorm2d(out_channels),

                    nn.ReLU(inplace=True)



                )

            else:





                self.block = nn.Sequential(

                    nn.Upsample(scale_factor=2, mode='bilinear'),

                    ConvRelu(in_channels, middle_channels),

                    ConvRelu(middle_channels, out_channels),

                )



        def forward(self, x):

            return self.block(x)







class DecoderCenter(nn.Module):

    def __init__(self, in_channels, middle_channels, out_channels, is_deconv=True):

        super(DecoderCenter, self).__init__()

        self.in_channels = in_channels





        if is_deconv:

            """

                Paramaters for Deconvolution were chosen to avoid artifacts, following

                link https://distill.pub/2016/deconv-checkerboard/

            """



            self.block = nn.Sequential(

                ConvRelu(in_channels, middle_channels),

                nn.ConvTranspose2d(middle_channels, out_channels, kernel_size=4, stride=2,

                                   padding=1),

        nn.BatchNorm2d(out_channels), 

                nn.ReLU(inplace=True)

            )

        else:

            self.block = nn.Sequential(

                ConvRelu(in_channels, middle_channels),

                ConvRelu(middle_channels, out_channels)



            )



    def forward(self, x):

        return self.block(x)

~/anaconda3/lib/python3.6/site-packages/steppy/base.py in fit_transform(self, *args, **kwargs)

    603             dict: outputs

    604         """

--> 605         self.fit(*args, **kwargs)

    606         return self.transform(*args, **kwargs)

    607 



~/Desktop/ml/salt/open-solution-salt-identification-master/common_blocks/models.py in fit(self, datagen, validation_datagen, meta_valid)

     76             for batch_id, data in enumerate(batch_gen):

     77                 self.callbacks.on_batch_begin()

---> 78                 metrics = self._fit_loop(data)

     79                 self.callbacks.on_batch_end(metrics=metrics)

     80                 if batch_id == steps:



~/Desktop/ml/salt/open-solution-salt-identification-master/common_blocks/models.py in _fit_loop(self, data)

    113             batch_loss = sum(partial_batch_losses.values())

    114         partial_batch_losses['sum'] = batch_loss

--> 115         batch_loss.backward()

    116         self.optimizer.step()

    117 



~/anaconda3/lib/python3.6/site-packages/torch/autograd/variable.py in backward(self, gradient, retain_graph, create_graph, retain_variables)

    165                 Variable.

    166         """

--> 167         torch.autograd.backward(self, gradient, retain_graph, create_graph, retain_variables)

    168 

    169     def register_hook(self, hook):



~/anaconda3/lib/python3.6/site-packages/torch/autograd/__init__.py in backward(variables, grad_variables, retain_graph, create_graph, retain_variables)

     97 

     98     Variable._execution_engine.run_backward(

---> 99         variables, grad_variables, retain_graph)

    100 

    101 



RuntimeError: cuda runtime error (2) : out of memory at /opt/conda/conda-bld/pytorch_1518244421288/work/torch/lib/THC/generic/THCStorage.cu:58

asked Sep 26 '18 at 18:39

Rocketq

1235

Video card: gtx1070ti 8Gb, batchsize 64, input image size 128*128.
I had such UNET with resnet152 as encoder which worked pretty fine:

class UNetResNet(nn.Module):



def __init__(self, encoder_depth, num_classes, num_filters=32, dropout_2d=0.2,

             pretrained=False, is_deconv=False):

    super().__init__()

    self.num_classes = num_classes

    self.dropout_2d = dropout_2d



    if encoder_depth == 34:

        self.encoder = torchvision.models.resnet34(pretrained=pretrained)

        bottom_channel_nr = 512

    elif encoder_depth == 101:

        self.encoder = torchvision.models.resnet101(pretrained=pretrained)

        bottom_channel_nr = 2048

    elif encoder_depth == 152:

        self.encoder = torchvision.models.resnet152(pretrained=pretrained)

        bottom_channel_nr = 2048



    else:

        raise NotImplementedError('only 34, 101, 152 version of Resnet are implemented')



    self.pool = nn.MaxPool2d(2, 2)



    self.relu = nn.ReLU(inplace=True)



    self.conv1 = nn.Sequential(self.encoder.conv1,

                               self.encoder.bn1,

                               self.encoder.relu,

                               self.pool) #from that pool layer I would like to get rid off



    self.conv2 = self.encoder.layer1

    self.conv3 = self.encoder.layer2

    self.conv4 = self.encoder.layer3

    self.conv5 = self.encoder.layer4

    self.center = DecoderCenter(bottom_channel_nr, num_filters * 8 *2, num_filters * 8, False)



    self.dec5 =  DecoderBlockV(bottom_channel_nr + num_filters * 8, num_filters * 8 * 2, num_filters * 8,   is_deconv)

    self.dec4 = DecoderBlockV(bottom_channel_nr // 2 + num_filters * 8, num_filters * 8 * 2, num_filters * 8, is_deconv)

    self.dec3 = DecoderBlockV(bottom_channel_nr // 4 + num_filters * 8, num_filters * 4 * 2, num_filters * 2, is_deconv)

    self.dec2 = DecoderBlockV(bottom_channel_nr // 8 + num_filters * 2, num_filters * 2 * 2, num_filters * 2 * 2,

                               is_deconv)

    self.dec1 = DecoderBlockV(num_filters * 2 * 2, num_filters * 2 * 2, num_filters, is_deconv)

    self.dec0 = ConvRelu(num_filters, num_filters)

    self.final = nn.Conv2d(num_filters, num_classes, kernel_size=1)



def forward(self, x):

    conv1 = self.conv1(x)

    conv2 = self.conv2(conv1)

    conv3 = self.conv3(conv2)

    conv4 = self.conv4(conv3)

    conv5 = self.conv5(conv4) 

    center = self.center(conv5)

    dec5 = self.dec5(torch.cat([center, conv5], 1))

    dec4 = self.dec4(torch.cat([dec5, conv4], 1))

    dec3 = self.dec3(torch.cat([dec4, conv3], 1))

    dec2 = self.dec2(torch.cat([dec3, conv2], 1))

    dec1 = self.dec1(dec2)

    dec0 = self.dec0(dec1)



    return self.final(F.dropout2d(dec0, p=self.dropout_2d))

# blocks

    class DecoderBlockV(nn.Module):

        def __init__(self, in_channels, middle_channels, out_channels, is_deconv=True):

            super(DecoderBlockV2, self).__init__()

            self.in_channels = in_channels



            if is_deconv:

                self.block = nn.Sequential(

                    ConvRelu(in_channels, middle_channels),

                    nn.ConvTranspose2d(middle_channels, out_channels, kernel_size=4, stride=2,

                                       padding=1),

                    nn.BatchNorm2d(out_channels),

                    nn.ReLU(inplace=True)



                )

            else:





                self.block = nn.Sequential(

                    nn.Upsample(scale_factor=2, mode='bilinear'),

                    ConvRelu(in_channels, middle_channels),

                    ConvRelu(middle_channels, out_channels),

                )



        def forward(self, x):

            return self.block(x)







class DecoderCenter(nn.Module):

    def __init__(self, in_channels, middle_channels, out_channels, is_deconv=True):

        super(DecoderCenter, self).__init__()

        self.in_channels = in_channels





        if is_deconv:

            """

                Paramaters for Deconvolution were chosen to avoid artifacts, following

                link https://distill.pub/2016/deconv-checkerboard/

            """



            self.block = nn.Sequential(

                ConvRelu(in_channels, middle_channels),

                nn.ConvTranspose2d(middle_channels, out_channels, kernel_size=4, stride=2,

                                   padding=1),

        nn.BatchNorm2d(out_channels), 

                nn.ReLU(inplace=True)

            )

        else:

            self.block = nn.Sequential(

                ConvRelu(in_channels, middle_channels),

                ConvRelu(middle_channels, out_channels)



            )



    def forward(self, x):

        return self.block(x)

~/anaconda3/lib/python3.6/site-packages/steppy/base.py in fit_transform(self, *args, **kwargs)

    603             dict: outputs

    604         """

--> 605         self.fit(*args, **kwargs)

    606         return self.transform(*args, **kwargs)

    607 



~/Desktop/ml/salt/open-solution-salt-identification-master/common_blocks/models.py in fit(self, datagen, validation_datagen, meta_valid)

     76             for batch_id, data in enumerate(batch_gen):

     77                 self.callbacks.on_batch_begin()

---> 78                 metrics = self._fit_loop(data)

     79                 self.callbacks.on_batch_end(metrics=metrics)

     80                 if batch_id == steps:



~/Desktop/ml/salt/open-solution-salt-identification-master/common_blocks/models.py in _fit_loop(self, data)

    113             batch_loss = sum(partial_batch_losses.values())

    114         partial_batch_losses['sum'] = batch_loss

--> 115         batch_loss.backward()

    116         self.optimizer.step()

    117 



~/anaconda3/lib/python3.6/site-packages/torch/autograd/variable.py in backward(self, gradient, retain_graph, create_graph, retain_variables)

    165                 Variable.

    166         """

--> 167         torch.autograd.backward(self, gradient, retain_graph, create_graph, retain_variables)

    168 

    169     def register_hook(self, hook):



~/anaconda3/lib/python3.6/site-packages/torch/autograd/__init__.py in backward(variables, grad_variables, retain_graph, create_graph, retain_variables)

     97 

     98     Variable._execution_engine.run_backward(

---> 99         variables, grad_variables, retain_graph)

    100 

    101 



RuntimeError: cuda runtime error (2) : out of memory at /opt/conda/conda-bld/pytorch_1518244421288/work/torch/lib/THC/generic/THCStorage.cu:58

python pytorch

asked Sep 26 '18 at 18:39

Rocketq

1235

asked Sep 26 '18 at 18:39

Rocketq

1235

asked Sep 26 '18 at 18:39

Rocketq

1235

asked Sep 26 '18 at 18:39

Rocketq

1235

asked Sep 26 '18 at 18:39

Rocketq

1235

add a comment |

1 Answer
1

active

oldest

votes

I am having the same problem. For some reason Upsample uses more CUDA memory than ConvTranspose2d

answered 2 mins ago

Johnny Riskas

New contributor

add a comment |

Your Answer

StackExchange.ifUsing("editor", function () {
return StackExchange.using("mathjaxEditing", function () {
StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix) {
StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["$", "$"], ["\$","\$"]]);
});
});
}, "mathjax-editing");

StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "557"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);

StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});

function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
autoActivateHeartbeat: false,
convertImagesToLinks: false,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: null,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});

}
});

draft saved

draft discarded

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fdatascience.stackexchange.com%2fquestions%2f38835%2fusing-upsample-instead-of-convtranspose2d-causes-cuda-out-of-memory-on-gradient%23new-answer', 'question_page');
}
);

Post as a guest

Name

Required, but never shown

1 Answer
1

active

oldest

votes

1 Answer
1

active

oldest

votes

I am having the same problem. For some reason Upsample uses more CUDA memory than ConvTranspose2d

answered 2 mins ago

Johnny Riskas

New contributor

add a comment |

I am having the same problem. For some reason Upsample uses more CUDA memory than ConvTranspose2d

answered 2 mins ago

Johnny Riskas

New contributor

add a comment |

I am having the same problem. For some reason Upsample uses more CUDA memory than ConvTranspose2d

answered 2 mins ago

Johnny Riskas

New contributor

I am having the same problem. For some reason Upsample uses more CUDA memory than ConvTranspose2d

answered 2 mins ago

Johnny Riskas

New contributor

answered 2 mins ago

Johnny Riskas

New contributor

answered 2 mins ago

Johnny Riskas

answered 2 mins ago

Johnny Riskas

New contributor

Johnny Riskas is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.

add a comment |

draft saved

draft discarded

Thanks for contributing an answer to Data Science Stack Exchange!

Please be sure to answer the question. Provide details and share your research!

But avoid …

Asking for help, clarification, or responding to other answers.

Making statements based on opinion; back them up with references or personal experience.

Use MathJax to format equations. MathJax reference.

To learn more, see our tips on writing great answers.

draft saved

draft discarded

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Post as a guest

Name

Required, but never shown

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Name

Required, but never shown

Name

Required, but never shown

This page is only for reference, If you need detailed information, please check here

搜尋此網誌

Gfyuki