strong text
- i keep getting the same error that i dont understand
- RuntimeError Traceback (most recent call last)
Cell In[61], line 2
1 # Use a helper function to perform a sanity check on the train_epoch implementation
----> 2 helper_utils.verify_training_process(SimpleCNN, train_loader, loss_function, train_epoch, device)
File /tf/helper_utils.py:276, in verify_training_process(model_class, train_loader, loss_function, train_epoch_fn, device)
273 NUM_VERIFY_BATCHES = 10
275 # Instantiate the model and move it to the specified device
→ 276 verify_model = model_class(15).to(device)
277 # Initialize the Adam optimizer with a learning rate
278 verify_optimizer = optim.Adam(verify_model.parameters(), lr=0.0005)
File /usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py:1343, in Module.to(self, *args, **kwargs)
1340 else:
1341 raise
→ 1343 return self._apply(convert)
File /usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py:903, in Module._apply(self, fn, recurse)
901 if recurse:
902 for module in self.children():
→ 903 module._apply(fn)
905 def compute_should_use_set_data(tensor, tensor_applied):
906 if torch._has_compatible_shallow_copy_type(tensor, tensor_applied):
907 # If the new tensor has compatible tensor type as the existing tensor,
908 # the current behavior is to change the tensor in-place using .data =,
(…) 913 # global flag to let the user control whether they want the future
914 # behavior of overwriting the existing tensor or not.
File /usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py:903, in Module._apply(self, fn, recurse)
901 if recurse:
902 for module in self.children():
→ 903 module._apply(fn)
905 def compute_should_use_set_data(tensor, tensor_applied):
906 if torch._has_compatible_shallow_copy_type(tensor, tensor_applied):
907 # If the new tensor has compatible tensor type as the existing tensor,
908 # the current behavior is to change the tensor in-place using .data =,
(…) 913 # global flag to let the user control whether they want the future
914 # behavior of overwriting the existing tensor or not.
File /usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py:903, in Module._apply(self, fn, recurse)
901 if recurse:
902 for module in self.children():
→ 903 module._apply(fn)
905 def compute_should_use_set_data(tensor, tensor_applied):
906 if torch._has_compatible_shallow_copy_type(tensor, tensor_applied):
907 # If the new tensor has compatible tensor type as the existing tensor,
908 # the current behavior is to change the tensor in-place using .data =,
(…) 913 # global flag to let the user control whether they want the future
914 # behavior of overwriting the existing tensor or not.
File /usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py:930, in Module._apply(self, fn, recurse)
926 # Tensors stored in modules are graph leaves, and we don’t want to
927 # track autograd history of param_applied, so we have to use
928 # with torch.no_grad():
929 with torch.no_grad():
→ 930 param_applied = fn(param)
931 p_should_use_set_data = compute_should_use_set_data(param, param_applied)
933 # subclasses may have multiple child tensors so we need to use swap_tensors
File /usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py:1329, in Module.to..convert(t)
1322 if convert_to_format is not None and t.dim() in (4, 5):
1323 return t.to(
1324 device,
1325 dtype if t.is_floating_point() or t.is_complex() else None,
1326 non_blocking,
1327 memory_format=convert_to_format,
1328 )
→ 1329 return t.to(
1330 device,
1331 dtype if t.is_floating_point() or t.is_complex() else None,
1332 non_blocking,
1333 )
1334 except NotImplementedError as e:
1335 if str(e) == “Cannot copy out of meta tensor; no data!”:
RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with TORCH_USE_CUDA_DSA to enable device-side assertions. Please guide me in this problem