Collectives™ on Stack Overflow
Find centralized, trusted content and collaborate around the technologies you use most.
Learn more about Collectives
Teams
Q&A for work
Connect and share knowledge within a single location that is structured and easy to search.
Learn more about Teams
Ask Question
I am running PyTorch 2.0.0 CPU. My Python version is 3.10.10 64-bit. Here is my code:
import torch
import torch.nn as nn
import string
# define the vocabulary of characters
vocab = string.ascii_letters + " ."
# define the size of the vocabulary and the hidden state
vocab_size = len(vocab)
hidden_size = 16
# define a mapping from characters to indices and vice versa
char_to_index = {c: i for i, c in enumerate(vocab)}
index_to_char = {i: c for i, c in enumerate(vocab)}
class RNN(nn.Module):
def __init__(self, n):
# initialize the parent class
super(RNN, self).__init__()
self.n = n
# define the embedding layer that maps indices to vectors
self.embedding = nn.Embedding(vocab_size, hidden_size)
# define the recurrent layer that updates the hidden state
self.recurrent = nn.Linear(hidden_size, hidden_size)
# define the output layer that maps hidden state to logits
self.output = nn.Linear(hidden_size, vocab_size)
torch.autograd.set_detect_anomaly(True)
def forward(self, x, h):
# x is a tensor of shape (self.n) containing indices
# h is a tensor of shape (1, hidden_size) containing a hidden state
# embed x into a vector of shape (1, hidden_size)
x = self.embedding(x)
# update h with x using a tanh activation function and non-inplace addition
h_new = torch.tanh(self.recurrent(x).add(h))
# compute logits from h_new using a linear layer
logits = self.output(h_new)
return logits,h_new
def update(self, text):
# text is a string containing user input
# initialize an optimizer and a loss function
optimizer = torch.optim.SGD(self.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()
# loop through each character in text except the last self.n ones
for i in range(len(text) - self.n):
# get the current and next characters as indices
current_chars = [char_to_index[c] for c in text[i:i+self.n]]
next_char = char_to_index[text[i+self.n]]
# convert them to tensors of shape (self.n) and (1) respectively
current_chars = torch.tensor(current_chars)
next_char = torch.tensor([next_char])
# zero out the gradients from previous step
optimizer.zero_grad()
# forward pass through the model and get logits and new hidden state
logits, self.h = self.forward(current_chars, self.h)
# compute loss between logits and next_char
loss = criterion(logits.view(1,-1), next_char.view(1))
print(f"Loss: {loss.item():.4f}")
# backward pass to compute gradients
loss.backward()
# update parameters with gradient descent
optimizer.step()
def generate(self, start):
# start is a string of length self.n to start with
# get the indices of the start characters
start_indices = [char_to_index[c] for c in start]
# convert them to a tensor of shape (self.n)
start_indices = torch.tensor(start_indices)
# initialize the output with the start characters
output = [c for c in start]
# loop until reaching a period or a maximum length
while output[-1] != "." and len(output) < 100:
# forward pass through the model and get logits and new hidden state
logits, self.h = self.forward(start_indices, self.h)
# apply softmax to get probabilities
probs = torch.softmax(logits.view(-1), dim=0)
# sample a next index from the probabilities
next_index = torch.multinomial(probs, 1).item()
# get the next character from the index
next_char = index_to_char[next_index]
# append it to the output
output.append(next_char)
# update the start indices with the next index
start_indices[:-1] = start_indices[1:]
start_indices[-1] = next_index
# join and return the output as a string
return "".join(output)
if __name__ == '__main__':
# create a new RNN model with context size
model = RNN(1)
# initialize a random hidden state of shape (1, hidden_size)
model.h = torch.randn(1, hidden_size)
# update the model with some user input
model.update("hello world.")
# generate some text starting with "he"
print(model.generate("he"))
And here is the output when I run it:
Loss: 4.5443
Loss: 4.4064
C:\Users\pythonic\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\torch\autograd\__init__.py:200: UserWarning: Error detected in TanhBackward0. Traceback of forward call that caused the error:
File "c:\Users\pythonic\Documents\python\bing\markov\v4.py", line 100, in <module>
model.update("hello world.") #open('english.txt', 'r').read())
File "c:\Users\pythonic\Documents\python\bing\markov\v4.py", line 57, in update
logits, self.h = self.forward(current_chars, self.h)
File "c:\Users\pythonic\Documents\python\bing\markov\v4.py", line 34, in forward
h_new = torch.tanh(self.recurrent(x).add(h))
(Triggered internally at ..\torch\csrc\autograd\python_anomaly_mode.cpp:119.)
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
Traceback (most recent call last):
File "c:\Users\pythonic\Documents\python\bing\markov\v4.py", line 100, in <module>
model.update("hello world.") #open('english.txt', 'r').read())
File "c:\Users\pythonic\Documents\python\bing\markov\v4.py", line 62, in update
loss.backward()
File "C:\Users\pythonic\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\torch\_tensor.py", line 487, in backward
torch.autograd.backward(
File "C:\Users\pythonic\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\torch\autograd\__init__.py", line 200, in backward
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
RuntimeError: Trying to backward through the graph a second time (or directly access saved tensors after they have already been freed). Saved intermediate values of the graph are freed when you call .backward() or autograd.grad(). Specify retain_graph=True if you need to backward through the graph a second time or if you need to access saved tensors after calling backward.
I am not super experienced in Python or using Torch, so please forgive me if this is a stupid question. All the research I did indicated I was modifying the graph variables (or something like that) in the wrong place.
I have tried loss.backward(retain_graph=True):
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [16, 16]], which is output 0 of TBackward, is at version 2; expected version 1 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!
That Good luck!
slightly worries me xD
Fixed it. Here is the modified forward
and update
functions.
def forward(self, x, h):
x = self.embedding(x)
h_new = torch.tanh(self.recurrent(x) + h.clone())
return self.output(h_new), h_new
def update(self, text):
optimizer = torch.optim.SGD(self.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()
for i in range(len(text) - self.n):
optimizer.zero_grad()
current_chars = [char_to_index[c] for c in text[i:i+self.n]]
next_char = char_to_index[text[i+self.n]]
current_chars = torch.tensor(current_chars)
next_char = torch.tensor([next_char])
logits, h_new = self.forward(current_chars, self.h.detach())
loss = criterion(logits.view(1,-1), next_char.view(1))
loss.backward(retain_graph=True)
optimizer.step()
self.h = h_new.detach()
print(f"Loss: {loss.item():.4f}")
Turned out I needed to .detach()
and also .clone()
the h
variable to not overwrite it. I also needed retain_graph=True
to make it work. This probably uses a lot of memory, but I have yet to have an issue with this.
Thanks for contributing an answer to Stack Overflow!
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
To learn more, see our tips on writing great answers.