Using Upsample instead of ConvTranspose2d causes cuda out of memory on gradient calculating step












0












$begingroup$


Video card: gtx1070ti 8Gb, batchsize 64, input image size 128*128.
I had such UNET with resnet152 as encoder which worked pretty fine:



class UNetResNet(nn.Module):

def __init__(self, encoder_depth, num_classes, num_filters=32, dropout_2d=0.2,
pretrained=False, is_deconv=False):
super().__init__()
self.num_classes = num_classes
self.dropout_2d = dropout_2d

if encoder_depth == 34:
self.encoder = torchvision.models.resnet34(pretrained=pretrained)
bottom_channel_nr = 512
elif encoder_depth == 101:
self.encoder = torchvision.models.resnet101(pretrained=pretrained)
bottom_channel_nr = 2048
elif encoder_depth == 152:
self.encoder = torchvision.models.resnet152(pretrained=pretrained)
bottom_channel_nr = 2048

else:
raise NotImplementedError('only 34, 101, 152 version of Resnet are implemented')

self.pool = nn.MaxPool2d(2, 2)

self.relu = nn.ReLU(inplace=True)

self.conv1 = nn.Sequential(self.encoder.conv1,
self.encoder.bn1,
self.encoder.relu,
self.pool) #from that pool layer I would like to get rid off

self.conv2 = self.encoder.layer1
self.conv3 = self.encoder.layer2
self.conv4 = self.encoder.layer3
self.conv5 = self.encoder.layer4
self.center = DecoderCenter(bottom_channel_nr, num_filters * 8 *2, num_filters * 8, False)

self.dec5 = DecoderBlockV(bottom_channel_nr + num_filters * 8, num_filters * 8 * 2, num_filters * 8, is_deconv)
self.dec4 = DecoderBlockV(bottom_channel_nr // 2 + num_filters * 8, num_filters * 8 * 2, num_filters * 8, is_deconv)
self.dec3 = DecoderBlockV(bottom_channel_nr // 4 + num_filters * 8, num_filters * 4 * 2, num_filters * 2, is_deconv)
self.dec2 = DecoderBlockV(bottom_channel_nr // 8 + num_filters * 2, num_filters * 2 * 2, num_filters * 2 * 2,
is_deconv)
self.dec1 = DecoderBlockV(num_filters * 2 * 2, num_filters * 2 * 2, num_filters, is_deconv)
self.dec0 = ConvRelu(num_filters, num_filters)
self.final = nn.Conv2d(num_filters, num_classes, kernel_size=1)

def forward(self, x):
conv1 = self.conv1(x)
conv2 = self.conv2(conv1)
conv3 = self.conv3(conv2)
conv4 = self.conv4(conv3)
conv5 = self.conv5(conv4)
center = self.center(conv5)
dec5 = self.dec5(torch.cat([center, conv5], 1))
dec4 = self.dec4(torch.cat([dec5, conv4], 1))
dec3 = self.dec3(torch.cat([dec4, conv3], 1))
dec2 = self.dec2(torch.cat([dec3, conv2], 1))
dec1 = self.dec1(dec2)
dec0 = self.dec0(dec1)

return self.final(F.dropout2d(dec0, p=self.dropout_2d))
# blocks
class DecoderBlockV(nn.Module):
def __init__(self, in_channels, middle_channels, out_channels, is_deconv=True):
super(DecoderBlockV2, self).__init__()
self.in_channels = in_channels

if is_deconv:
self.block = nn.Sequential(
ConvRelu(in_channels, middle_channels),
nn.ConvTranspose2d(middle_channels, out_channels, kernel_size=4, stride=2,
padding=1),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True)

)
else:


self.block = nn.Sequential(
nn.Upsample(scale_factor=2, mode='bilinear'),
ConvRelu(in_channels, middle_channels),
ConvRelu(middle_channels, out_channels),
)

def forward(self, x):
return self.block(x)



class DecoderCenter(nn.Module):
def __init__(self, in_channels, middle_channels, out_channels, is_deconv=True):
super(DecoderCenter, self).__init__()
self.in_channels = in_channels


if is_deconv:
"""
Paramaters for Deconvolution were chosen to avoid artifacts, following
link https://distill.pub/2016/deconv-checkerboard/
"""

self.block = nn.Sequential(
ConvRelu(in_channels, middle_channels),
nn.ConvTranspose2d(middle_channels, out_channels, kernel_size=4, stride=2,
padding=1),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True)
)
else:
self.block = nn.Sequential(
ConvRelu(in_channels, middle_channels),
ConvRelu(middle_channels, out_channels)

)

def forward(self, x):
return self.block(x)


Then I wanted to use Upsample instead of ConvTranspose2d causes cuda out of memory on gradient calculating step, just by setting is_deconv = False in DecoderBlock - that caused problem on gradient calculating step. Why? Got it fixed only by reducing batch size from 64 to 40



~/anaconda3/lib/python3.6/site-packages/steppy/base.py in fit_transform(self, *args, **kwargs)
603 dict: outputs
604 """
--> 605 self.fit(*args, **kwargs)
606 return self.transform(*args, **kwargs)
607

~/Desktop/ml/salt/open-solution-salt-identification-master/common_blocks/models.py in fit(self, datagen, validation_datagen, meta_valid)
76 for batch_id, data in enumerate(batch_gen):
77 self.callbacks.on_batch_begin()
---> 78 metrics = self._fit_loop(data)
79 self.callbacks.on_batch_end(metrics=metrics)
80 if batch_id == steps:

~/Desktop/ml/salt/open-solution-salt-identification-master/common_blocks/models.py in _fit_loop(self, data)
113 batch_loss = sum(partial_batch_losses.values())
114 partial_batch_losses['sum'] = batch_loss
--> 115 batch_loss.backward()
116 self.optimizer.step()
117

~/anaconda3/lib/python3.6/site-packages/torch/autograd/variable.py in backward(self, gradient, retain_graph, create_graph, retain_variables)
165 Variable.
166 """
--> 167 torch.autograd.backward(self, gradient, retain_graph, create_graph, retain_variables)
168
169 def register_hook(self, hook):

~/anaconda3/lib/python3.6/site-packages/torch/autograd/__init__.py in backward(variables, grad_variables, retain_graph, create_graph, retain_variables)
97
98 Variable._execution_engine.run_backward(
---> 99 variables, grad_variables, retain_graph)
100
101

RuntimeError: cuda runtime error (2) : out of memory at /opt/conda/conda-bld/pytorch_1518244421288/work/torch/lib/THC/generic/THCStorage.cu:58









share|improve this question









$endgroup$

















    0












    $begingroup$


    Video card: gtx1070ti 8Gb, batchsize 64, input image size 128*128.
    I had such UNET with resnet152 as encoder which worked pretty fine:



    class UNetResNet(nn.Module):

    def __init__(self, encoder_depth, num_classes, num_filters=32, dropout_2d=0.2,
    pretrained=False, is_deconv=False):
    super().__init__()
    self.num_classes = num_classes
    self.dropout_2d = dropout_2d

    if encoder_depth == 34:
    self.encoder = torchvision.models.resnet34(pretrained=pretrained)
    bottom_channel_nr = 512
    elif encoder_depth == 101:
    self.encoder = torchvision.models.resnet101(pretrained=pretrained)
    bottom_channel_nr = 2048
    elif encoder_depth == 152:
    self.encoder = torchvision.models.resnet152(pretrained=pretrained)
    bottom_channel_nr = 2048

    else:
    raise NotImplementedError('only 34, 101, 152 version of Resnet are implemented')

    self.pool = nn.MaxPool2d(2, 2)

    self.relu = nn.ReLU(inplace=True)

    self.conv1 = nn.Sequential(self.encoder.conv1,
    self.encoder.bn1,
    self.encoder.relu,
    self.pool) #from that pool layer I would like to get rid off

    self.conv2 = self.encoder.layer1
    self.conv3 = self.encoder.layer2
    self.conv4 = self.encoder.layer3
    self.conv5 = self.encoder.layer4
    self.center = DecoderCenter(bottom_channel_nr, num_filters * 8 *2, num_filters * 8, False)

    self.dec5 = DecoderBlockV(bottom_channel_nr + num_filters * 8, num_filters * 8 * 2, num_filters * 8, is_deconv)
    self.dec4 = DecoderBlockV(bottom_channel_nr // 2 + num_filters * 8, num_filters * 8 * 2, num_filters * 8, is_deconv)
    self.dec3 = DecoderBlockV(bottom_channel_nr // 4 + num_filters * 8, num_filters * 4 * 2, num_filters * 2, is_deconv)
    self.dec2 = DecoderBlockV(bottom_channel_nr // 8 + num_filters * 2, num_filters * 2 * 2, num_filters * 2 * 2,
    is_deconv)
    self.dec1 = DecoderBlockV(num_filters * 2 * 2, num_filters * 2 * 2, num_filters, is_deconv)
    self.dec0 = ConvRelu(num_filters, num_filters)
    self.final = nn.Conv2d(num_filters, num_classes, kernel_size=1)

    def forward(self, x):
    conv1 = self.conv1(x)
    conv2 = self.conv2(conv1)
    conv3 = self.conv3(conv2)
    conv4 = self.conv4(conv3)
    conv5 = self.conv5(conv4)
    center = self.center(conv5)
    dec5 = self.dec5(torch.cat([center, conv5], 1))
    dec4 = self.dec4(torch.cat([dec5, conv4], 1))
    dec3 = self.dec3(torch.cat([dec4, conv3], 1))
    dec2 = self.dec2(torch.cat([dec3, conv2], 1))
    dec1 = self.dec1(dec2)
    dec0 = self.dec0(dec1)

    return self.final(F.dropout2d(dec0, p=self.dropout_2d))
    # blocks
    class DecoderBlockV(nn.Module):
    def __init__(self, in_channels, middle_channels, out_channels, is_deconv=True):
    super(DecoderBlockV2, self).__init__()
    self.in_channels = in_channels

    if is_deconv:
    self.block = nn.Sequential(
    ConvRelu(in_channels, middle_channels),
    nn.ConvTranspose2d(middle_channels, out_channels, kernel_size=4, stride=2,
    padding=1),
    nn.BatchNorm2d(out_channels),
    nn.ReLU(inplace=True)

    )
    else:


    self.block = nn.Sequential(
    nn.Upsample(scale_factor=2, mode='bilinear'),
    ConvRelu(in_channels, middle_channels),
    ConvRelu(middle_channels, out_channels),
    )

    def forward(self, x):
    return self.block(x)



    class DecoderCenter(nn.Module):
    def __init__(self, in_channels, middle_channels, out_channels, is_deconv=True):
    super(DecoderCenter, self).__init__()
    self.in_channels = in_channels


    if is_deconv:
    """
    Paramaters for Deconvolution were chosen to avoid artifacts, following
    link https://distill.pub/2016/deconv-checkerboard/
    """

    self.block = nn.Sequential(
    ConvRelu(in_channels, middle_channels),
    nn.ConvTranspose2d(middle_channels, out_channels, kernel_size=4, stride=2,
    padding=1),
    nn.BatchNorm2d(out_channels),
    nn.ReLU(inplace=True)
    )
    else:
    self.block = nn.Sequential(
    ConvRelu(in_channels, middle_channels),
    ConvRelu(middle_channels, out_channels)

    )

    def forward(self, x):
    return self.block(x)


    Then I wanted to use Upsample instead of ConvTranspose2d causes cuda out of memory on gradient calculating step, just by setting is_deconv = False in DecoderBlock - that caused problem on gradient calculating step. Why? Got it fixed only by reducing batch size from 64 to 40



    ~/anaconda3/lib/python3.6/site-packages/steppy/base.py in fit_transform(self, *args, **kwargs)
    603 dict: outputs
    604 """
    --> 605 self.fit(*args, **kwargs)
    606 return self.transform(*args, **kwargs)
    607

    ~/Desktop/ml/salt/open-solution-salt-identification-master/common_blocks/models.py in fit(self, datagen, validation_datagen, meta_valid)
    76 for batch_id, data in enumerate(batch_gen):
    77 self.callbacks.on_batch_begin()
    ---> 78 metrics = self._fit_loop(data)
    79 self.callbacks.on_batch_end(metrics=metrics)
    80 if batch_id == steps:

    ~/Desktop/ml/salt/open-solution-salt-identification-master/common_blocks/models.py in _fit_loop(self, data)
    113 batch_loss = sum(partial_batch_losses.values())
    114 partial_batch_losses['sum'] = batch_loss
    --> 115 batch_loss.backward()
    116 self.optimizer.step()
    117

    ~/anaconda3/lib/python3.6/site-packages/torch/autograd/variable.py in backward(self, gradient, retain_graph, create_graph, retain_variables)
    165 Variable.
    166 """
    --> 167 torch.autograd.backward(self, gradient, retain_graph, create_graph, retain_variables)
    168
    169 def register_hook(self, hook):

    ~/anaconda3/lib/python3.6/site-packages/torch/autograd/__init__.py in backward(variables, grad_variables, retain_graph, create_graph, retain_variables)
    97
    98 Variable._execution_engine.run_backward(
    ---> 99 variables, grad_variables, retain_graph)
    100
    101

    RuntimeError: cuda runtime error (2) : out of memory at /opt/conda/conda-bld/pytorch_1518244421288/work/torch/lib/THC/generic/THCStorage.cu:58









    share|improve this question









    $endgroup$















      0












      0








      0





      $begingroup$


      Video card: gtx1070ti 8Gb, batchsize 64, input image size 128*128.
      I had such UNET with resnet152 as encoder which worked pretty fine:



      class UNetResNet(nn.Module):

      def __init__(self, encoder_depth, num_classes, num_filters=32, dropout_2d=0.2,
      pretrained=False, is_deconv=False):
      super().__init__()
      self.num_classes = num_classes
      self.dropout_2d = dropout_2d

      if encoder_depth == 34:
      self.encoder = torchvision.models.resnet34(pretrained=pretrained)
      bottom_channel_nr = 512
      elif encoder_depth == 101:
      self.encoder = torchvision.models.resnet101(pretrained=pretrained)
      bottom_channel_nr = 2048
      elif encoder_depth == 152:
      self.encoder = torchvision.models.resnet152(pretrained=pretrained)
      bottom_channel_nr = 2048

      else:
      raise NotImplementedError('only 34, 101, 152 version of Resnet are implemented')

      self.pool = nn.MaxPool2d(2, 2)

      self.relu = nn.ReLU(inplace=True)

      self.conv1 = nn.Sequential(self.encoder.conv1,
      self.encoder.bn1,
      self.encoder.relu,
      self.pool) #from that pool layer I would like to get rid off

      self.conv2 = self.encoder.layer1
      self.conv3 = self.encoder.layer2
      self.conv4 = self.encoder.layer3
      self.conv5 = self.encoder.layer4
      self.center = DecoderCenter(bottom_channel_nr, num_filters * 8 *2, num_filters * 8, False)

      self.dec5 = DecoderBlockV(bottom_channel_nr + num_filters * 8, num_filters * 8 * 2, num_filters * 8, is_deconv)
      self.dec4 = DecoderBlockV(bottom_channel_nr // 2 + num_filters * 8, num_filters * 8 * 2, num_filters * 8, is_deconv)
      self.dec3 = DecoderBlockV(bottom_channel_nr // 4 + num_filters * 8, num_filters * 4 * 2, num_filters * 2, is_deconv)
      self.dec2 = DecoderBlockV(bottom_channel_nr // 8 + num_filters * 2, num_filters * 2 * 2, num_filters * 2 * 2,
      is_deconv)
      self.dec1 = DecoderBlockV(num_filters * 2 * 2, num_filters * 2 * 2, num_filters, is_deconv)
      self.dec0 = ConvRelu(num_filters, num_filters)
      self.final = nn.Conv2d(num_filters, num_classes, kernel_size=1)

      def forward(self, x):
      conv1 = self.conv1(x)
      conv2 = self.conv2(conv1)
      conv3 = self.conv3(conv2)
      conv4 = self.conv4(conv3)
      conv5 = self.conv5(conv4)
      center = self.center(conv5)
      dec5 = self.dec5(torch.cat([center, conv5], 1))
      dec4 = self.dec4(torch.cat([dec5, conv4], 1))
      dec3 = self.dec3(torch.cat([dec4, conv3], 1))
      dec2 = self.dec2(torch.cat([dec3, conv2], 1))
      dec1 = self.dec1(dec2)
      dec0 = self.dec0(dec1)

      return self.final(F.dropout2d(dec0, p=self.dropout_2d))
      # blocks
      class DecoderBlockV(nn.Module):
      def __init__(self, in_channels, middle_channels, out_channels, is_deconv=True):
      super(DecoderBlockV2, self).__init__()
      self.in_channels = in_channels

      if is_deconv:
      self.block = nn.Sequential(
      ConvRelu(in_channels, middle_channels),
      nn.ConvTranspose2d(middle_channels, out_channels, kernel_size=4, stride=2,
      padding=1),
      nn.BatchNorm2d(out_channels),
      nn.ReLU(inplace=True)

      )
      else:


      self.block = nn.Sequential(
      nn.Upsample(scale_factor=2, mode='bilinear'),
      ConvRelu(in_channels, middle_channels),
      ConvRelu(middle_channels, out_channels),
      )

      def forward(self, x):
      return self.block(x)



      class DecoderCenter(nn.Module):
      def __init__(self, in_channels, middle_channels, out_channels, is_deconv=True):
      super(DecoderCenter, self).__init__()
      self.in_channels = in_channels


      if is_deconv:
      """
      Paramaters for Deconvolution were chosen to avoid artifacts, following
      link https://distill.pub/2016/deconv-checkerboard/
      """

      self.block = nn.Sequential(
      ConvRelu(in_channels, middle_channels),
      nn.ConvTranspose2d(middle_channels, out_channels, kernel_size=4, stride=2,
      padding=1),
      nn.BatchNorm2d(out_channels),
      nn.ReLU(inplace=True)
      )
      else:
      self.block = nn.Sequential(
      ConvRelu(in_channels, middle_channels),
      ConvRelu(middle_channels, out_channels)

      )

      def forward(self, x):
      return self.block(x)


      Then I wanted to use Upsample instead of ConvTranspose2d causes cuda out of memory on gradient calculating step, just by setting is_deconv = False in DecoderBlock - that caused problem on gradient calculating step. Why? Got it fixed only by reducing batch size from 64 to 40



      ~/anaconda3/lib/python3.6/site-packages/steppy/base.py in fit_transform(self, *args, **kwargs)
      603 dict: outputs
      604 """
      --> 605 self.fit(*args, **kwargs)
      606 return self.transform(*args, **kwargs)
      607

      ~/Desktop/ml/salt/open-solution-salt-identification-master/common_blocks/models.py in fit(self, datagen, validation_datagen, meta_valid)
      76 for batch_id, data in enumerate(batch_gen):
      77 self.callbacks.on_batch_begin()
      ---> 78 metrics = self._fit_loop(data)
      79 self.callbacks.on_batch_end(metrics=metrics)
      80 if batch_id == steps:

      ~/Desktop/ml/salt/open-solution-salt-identification-master/common_blocks/models.py in _fit_loop(self, data)
      113 batch_loss = sum(partial_batch_losses.values())
      114 partial_batch_losses['sum'] = batch_loss
      --> 115 batch_loss.backward()
      116 self.optimizer.step()
      117

      ~/anaconda3/lib/python3.6/site-packages/torch/autograd/variable.py in backward(self, gradient, retain_graph, create_graph, retain_variables)
      165 Variable.
      166 """
      --> 167 torch.autograd.backward(self, gradient, retain_graph, create_graph, retain_variables)
      168
      169 def register_hook(self, hook):

      ~/anaconda3/lib/python3.6/site-packages/torch/autograd/__init__.py in backward(variables, grad_variables, retain_graph, create_graph, retain_variables)
      97
      98 Variable._execution_engine.run_backward(
      ---> 99 variables, grad_variables, retain_graph)
      100
      101

      RuntimeError: cuda runtime error (2) : out of memory at /opt/conda/conda-bld/pytorch_1518244421288/work/torch/lib/THC/generic/THCStorage.cu:58









      share|improve this question









      $endgroup$




      Video card: gtx1070ti 8Gb, batchsize 64, input image size 128*128.
      I had such UNET with resnet152 as encoder which worked pretty fine:



      class UNetResNet(nn.Module):

      def __init__(self, encoder_depth, num_classes, num_filters=32, dropout_2d=0.2,
      pretrained=False, is_deconv=False):
      super().__init__()
      self.num_classes = num_classes
      self.dropout_2d = dropout_2d

      if encoder_depth == 34:
      self.encoder = torchvision.models.resnet34(pretrained=pretrained)
      bottom_channel_nr = 512
      elif encoder_depth == 101:
      self.encoder = torchvision.models.resnet101(pretrained=pretrained)
      bottom_channel_nr = 2048
      elif encoder_depth == 152:
      self.encoder = torchvision.models.resnet152(pretrained=pretrained)
      bottom_channel_nr = 2048

      else:
      raise NotImplementedError('only 34, 101, 152 version of Resnet are implemented')

      self.pool = nn.MaxPool2d(2, 2)

      self.relu = nn.ReLU(inplace=True)

      self.conv1 = nn.Sequential(self.encoder.conv1,
      self.encoder.bn1,
      self.encoder.relu,
      self.pool) #from that pool layer I would like to get rid off

      self.conv2 = self.encoder.layer1
      self.conv3 = self.encoder.layer2
      self.conv4 = self.encoder.layer3
      self.conv5 = self.encoder.layer4
      self.center = DecoderCenter(bottom_channel_nr, num_filters * 8 *2, num_filters * 8, False)

      self.dec5 = DecoderBlockV(bottom_channel_nr + num_filters * 8, num_filters * 8 * 2, num_filters * 8, is_deconv)
      self.dec4 = DecoderBlockV(bottom_channel_nr // 2 + num_filters * 8, num_filters * 8 * 2, num_filters * 8, is_deconv)
      self.dec3 = DecoderBlockV(bottom_channel_nr // 4 + num_filters * 8, num_filters * 4 * 2, num_filters * 2, is_deconv)
      self.dec2 = DecoderBlockV(bottom_channel_nr // 8 + num_filters * 2, num_filters * 2 * 2, num_filters * 2 * 2,
      is_deconv)
      self.dec1 = DecoderBlockV(num_filters * 2 * 2, num_filters * 2 * 2, num_filters, is_deconv)
      self.dec0 = ConvRelu(num_filters, num_filters)
      self.final = nn.Conv2d(num_filters, num_classes, kernel_size=1)

      def forward(self, x):
      conv1 = self.conv1(x)
      conv2 = self.conv2(conv1)
      conv3 = self.conv3(conv2)
      conv4 = self.conv4(conv3)
      conv5 = self.conv5(conv4)
      center = self.center(conv5)
      dec5 = self.dec5(torch.cat([center, conv5], 1))
      dec4 = self.dec4(torch.cat([dec5, conv4], 1))
      dec3 = self.dec3(torch.cat([dec4, conv3], 1))
      dec2 = self.dec2(torch.cat([dec3, conv2], 1))
      dec1 = self.dec1(dec2)
      dec0 = self.dec0(dec1)

      return self.final(F.dropout2d(dec0, p=self.dropout_2d))
      # blocks
      class DecoderBlockV(nn.Module):
      def __init__(self, in_channels, middle_channels, out_channels, is_deconv=True):
      super(DecoderBlockV2, self).__init__()
      self.in_channels = in_channels

      if is_deconv:
      self.block = nn.Sequential(
      ConvRelu(in_channels, middle_channels),
      nn.ConvTranspose2d(middle_channels, out_channels, kernel_size=4, stride=2,
      padding=1),
      nn.BatchNorm2d(out_channels),
      nn.ReLU(inplace=True)

      )
      else:


      self.block = nn.Sequential(
      nn.Upsample(scale_factor=2, mode='bilinear'),
      ConvRelu(in_channels, middle_channels),
      ConvRelu(middle_channels, out_channels),
      )

      def forward(self, x):
      return self.block(x)



      class DecoderCenter(nn.Module):
      def __init__(self, in_channels, middle_channels, out_channels, is_deconv=True):
      super(DecoderCenter, self).__init__()
      self.in_channels = in_channels


      if is_deconv:
      """
      Paramaters for Deconvolution were chosen to avoid artifacts, following
      link https://distill.pub/2016/deconv-checkerboard/
      """

      self.block = nn.Sequential(
      ConvRelu(in_channels, middle_channels),
      nn.ConvTranspose2d(middle_channels, out_channels, kernel_size=4, stride=2,
      padding=1),
      nn.BatchNorm2d(out_channels),
      nn.ReLU(inplace=True)
      )
      else:
      self.block = nn.Sequential(
      ConvRelu(in_channels, middle_channels),
      ConvRelu(middle_channels, out_channels)

      )

      def forward(self, x):
      return self.block(x)


      Then I wanted to use Upsample instead of ConvTranspose2d causes cuda out of memory on gradient calculating step, just by setting is_deconv = False in DecoderBlock - that caused problem on gradient calculating step. Why? Got it fixed only by reducing batch size from 64 to 40



      ~/anaconda3/lib/python3.6/site-packages/steppy/base.py in fit_transform(self, *args, **kwargs)
      603 dict: outputs
      604 """
      --> 605 self.fit(*args, **kwargs)
      606 return self.transform(*args, **kwargs)
      607

      ~/Desktop/ml/salt/open-solution-salt-identification-master/common_blocks/models.py in fit(self, datagen, validation_datagen, meta_valid)
      76 for batch_id, data in enumerate(batch_gen):
      77 self.callbacks.on_batch_begin()
      ---> 78 metrics = self._fit_loop(data)
      79 self.callbacks.on_batch_end(metrics=metrics)
      80 if batch_id == steps:

      ~/Desktop/ml/salt/open-solution-salt-identification-master/common_blocks/models.py in _fit_loop(self, data)
      113 batch_loss = sum(partial_batch_losses.values())
      114 partial_batch_losses['sum'] = batch_loss
      --> 115 batch_loss.backward()
      116 self.optimizer.step()
      117

      ~/anaconda3/lib/python3.6/site-packages/torch/autograd/variable.py in backward(self, gradient, retain_graph, create_graph, retain_variables)
      165 Variable.
      166 """
      --> 167 torch.autograd.backward(self, gradient, retain_graph, create_graph, retain_variables)
      168
      169 def register_hook(self, hook):

      ~/anaconda3/lib/python3.6/site-packages/torch/autograd/__init__.py in backward(variables, grad_variables, retain_graph, create_graph, retain_variables)
      97
      98 Variable._execution_engine.run_backward(
      ---> 99 variables, grad_variables, retain_graph)
      100
      101

      RuntimeError: cuda runtime error (2) : out of memory at /opt/conda/conda-bld/pytorch_1518244421288/work/torch/lib/THC/generic/THCStorage.cu:58






      python pytorch






      share|improve this question













      share|improve this question











      share|improve this question




      share|improve this question










      asked Sep 26 '18 at 18:39









      RocketqRocketq

      1235




      1235






















          1 Answer
          1






          active

          oldest

          votes


















          0












          $begingroup$

          I am having the same problem. For some reason Upsample uses more CUDA memory than ConvTranspose2d





          share








          New contributor




          Johnny Riskas is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
          Check out our Code of Conduct.






          $endgroup$













            Your Answer





            StackExchange.ifUsing("editor", function () {
            return StackExchange.using("mathjaxEditing", function () {
            StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix) {
            StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["$", "$"], ["\\(","\\)"]]);
            });
            });
            }, "mathjax-editing");

            StackExchange.ready(function() {
            var channelOptions = {
            tags: "".split(" "),
            id: "557"
            };
            initTagRenderer("".split(" "), "".split(" "), channelOptions);

            StackExchange.using("externalEditor", function() {
            // Have to fire editor after snippets, if snippets enabled
            if (StackExchange.settings.snippets.snippetsEnabled) {
            StackExchange.using("snippets", function() {
            createEditor();
            });
            }
            else {
            createEditor();
            }
            });

            function createEditor() {
            StackExchange.prepareEditor({
            heartbeatType: 'answer',
            autoActivateHeartbeat: false,
            convertImagesToLinks: false,
            noModals: true,
            showLowRepImageUploadWarning: true,
            reputationToPostImages: null,
            bindNavPrevention: true,
            postfix: "",
            imageUploader: {
            brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
            contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
            allowUrls: true
            },
            onDemand: true,
            discardSelector: ".discard-answer"
            ,immediatelyShowMarkdownHelp:true
            });


            }
            });














            draft saved

            draft discarded


















            StackExchange.ready(
            function () {
            StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fdatascience.stackexchange.com%2fquestions%2f38835%2fusing-upsample-instead-of-convtranspose2d-causes-cuda-out-of-memory-on-gradient%23new-answer', 'question_page');
            }
            );

            Post as a guest















            Required, but never shown

























            1 Answer
            1






            active

            oldest

            votes








            1 Answer
            1






            active

            oldest

            votes









            active

            oldest

            votes






            active

            oldest

            votes









            0












            $begingroup$

            I am having the same problem. For some reason Upsample uses more CUDA memory than ConvTranspose2d





            share








            New contributor




            Johnny Riskas is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
            Check out our Code of Conduct.






            $endgroup$


















              0












              $begingroup$

              I am having the same problem. For some reason Upsample uses more CUDA memory than ConvTranspose2d





              share








              New contributor




              Johnny Riskas is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
              Check out our Code of Conduct.






              $endgroup$
















                0












                0








                0





                $begingroup$

                I am having the same problem. For some reason Upsample uses more CUDA memory than ConvTranspose2d





                share








                New contributor




                Johnny Riskas is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
                Check out our Code of Conduct.






                $endgroup$



                I am having the same problem. For some reason Upsample uses more CUDA memory than ConvTranspose2d






                share








                New contributor




                Johnny Riskas is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
                Check out our Code of Conduct.








                share


                share






                New contributor




                Johnny Riskas is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
                Check out our Code of Conduct.









                answered 2 mins ago









                Johnny RiskasJohnny Riskas

                1




                1




                New contributor




                Johnny Riskas is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
                Check out our Code of Conduct.





                New contributor





                Johnny Riskas is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
                Check out our Code of Conduct.






                Johnny Riskas is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
                Check out our Code of Conduct.






























                    draft saved

                    draft discarded




















































                    Thanks for contributing an answer to Data Science Stack Exchange!


                    • Please be sure to answer the question. Provide details and share your research!

                    But avoid



                    • Asking for help, clarification, or responding to other answers.

                    • Making statements based on opinion; back them up with references or personal experience.


                    Use MathJax to format equations. MathJax reference.


                    To learn more, see our tips on writing great answers.




                    draft saved


                    draft discarded














                    StackExchange.ready(
                    function () {
                    StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fdatascience.stackexchange.com%2fquestions%2f38835%2fusing-upsample-instead-of-convtranspose2d-causes-cuda-out-of-memory-on-gradient%23new-answer', 'question_page');
                    }
                    );

                    Post as a guest















                    Required, but never shown





















































                    Required, but never shown














                    Required, but never shown












                    Required, but never shown







                    Required, but never shown

































                    Required, but never shown














                    Required, but never shown












                    Required, but never shown







                    Required, but never shown







                    Popular posts from this blog

                    Aikido

                    Tivadar Csontváry Kosztka

                    Metroo de Marsejlo