import torch
import torch.nn as nn
import torchtext.data as ttd
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
data = {
"label" : [0,1,1] ,
"data" :[
"I like eggs and ham.",
"Eggs I like!",
"Ham and eggs or just ham?"
]
}
df = pd.DataFrame(data)
df.head()
Recall that for images , pytorch cnn expects image to be N x C x H x W
"feature first"
whereas, in Tensorflow / OpenCV/ others, it's N x H x W x C
"feature last"
the torchvision data generators hide this detail
NLP, output of embedding is N x T x D ("feature last")
nn.Conv1d(), we expect N x D x T as input!("feature first")
그래서 , must reshape before and after convolutions
Text Classification with CNNs
output of embediding is alwasy (N, T,D)
conv1d expects (N, D, T)
=> out.permute(0,2,1)
change it back
out.permute(0,2,1)
cnn 도 경과가 좋게 나온다.
class CNN(nn.Module):
def __init__(self, n_vocab, embed_dim, n_outputs):
super(CNN, self).__init__()
self.V = n_vocab
self.D = embed_dim
self.K = n_outputs
self.embed = nn.Embedding(self.V, self.D)
self.conv1 = nn.Conv1d(self.D, 32, 3, padding = 1)
self.pool1 = nn.MaxPool1d(2)
self.conv2 = nn.Conv1d(32, 64, 3, padding = 1)
self.pool2 = nn.MaxPool1d(2)
self.conv3 = nn.Conv1d(64, 128, 3, padding = 1)
self.fc = nn.Linear(128, self.K)
def forward(self, X):
out = self.embed(X)
out = out.permute(0,2,1)
out = self.conv1(out)
out = F.relu(out)
out = self.pool1(out)
out = self.conv2(out)
out = F.relu(out)
out = self.pool2(out)
out = self.conv3(out)
out = F.relu(out)
out = out.permute(0,2,1)
out, _ = torch.max(out, 1)
out = self.fc(out)
return out
making predicions with Trained NLP Model
single_sentence = 'Our dating service has been asked 2 contast U by someone shy!'
toks= TEXT.preprocess(single_sentence)
sent_idx = TEXT.numericalize([toks])
model(sent_idx.to(device))
아래 내용은 Udemy에서 Pytorch: Deep Learning and Artificial Intelligence를 보고 정리한 내용이다.
GRU and LSTM
Modern RNN Units
LSTM
GRU is like 간단한 버전의 LSTM 과 비슷하다.(파라미터가 적고 thus more efficient )
simple RNN이 is not enough 는 vanishing gradient 때문이다.
vanishing gradient해결하는데 ReLU 사용 ????????
하지만 더 효과적인 GRU,LSTM를 발견하였다.
Simple RNN은 수식이 하나이다.
GRU
recurrent Unit
SimpleRNNs have no choice bue to eventually forget, due to the vanishing gradient
binary classifier (logistic regression neurons) as our gates
SimpleRNN 은 long-term dependencies에 학습 할 떄 문제가 있다.
the hidden state becomes the weighted sum of the previos hidden state and new value(allowing you to remeber the old state)
these are controlled by "gates" which are like binary classifiers /logistic regression / neurons
GRU less parameter > more performent
LSTM(long-short term memory)
Simple RNN GRU LSTM code
nn.RNN(
input_size = self.D,
hidden_size = self.M,
num_layers = self.L,
nonlinearity = 'relu',
batch_first = True
)
GRU
nn.GRU(
input_size = self.D,
hidden_size = self.M,
num_layers = self.L,
batch_first = True
)
LSTM
nn.LSTM(
input_size = self.D,
hidden_size = self.M,
num_layers = self.L,
batch_first = True
)
A more challenging Sequence
pytorch nonlinear sequence Linear code
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
series = np.sin((0.1 * np.arange(400)) ** 2)
data 만들기
T = 10
D = 1
X = []
Y = []
for t in range(len(series) - T) :
x = series[t:t+T]
X.append(x)
y = series[t+T]
Y.append(y)
X = np.array(X).reshape(-1, T)
Y = np.array(Y).reshape(-1, 1)
N = len(X)
print(X.shape, " " , Y.shape)
model = nn.Linear(T, 1)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.1)
validation_target = Y[-N//2:]
with torch.no_grad():
validation_predictions = model(X_test).numpy()
Linear model has terrible result
pytorch nonlinear sequence SimpleRNN code
T = 10
D = 1
X = []
Y = []
for t in range(len(series) - T) :
x = series[t:t+T]
X.append(x)
y = series[t+T]
Y.append(y)
X = np.array(X).reshape(-1, T , 1) #MAKE IT N x T
Y = np.array(Y).reshape(-1, 1)
N = len(X)
print(X.shape, " " , Y.shape)
아래 내용은 Udemy에서 Pytorch: Deep Learning and Artificial Intelligence를 보고 정리한 내용이다.
44. Sequence Data
시퀸스 데이터
time series
airline passengers
speech /Audio
text
bag of words example
eamil -> sapm vs. not spam
sequence ?
1-D series signal
linear regression
shape of a sequence
NxTxD?
N: = #samples
D: = #features
T: #time steps in the sequence
eg: GPS data from their cars
N: one sample would be one person's single trip to work
D = 2 , the GPs will record(latitude, longitude) pairs
T: the number of(lat, lng) measurements taken from start to finish of a single trip
variable length sequence ?
Nx D xT
image data: N x H x W x C
N x C x H x W (pytorch, Theano)
In python : N is first , feature maps : C
45. Forecasting
RNNs
Linear Regression
loop를 사용하여 prediction 해야 한다.
x = last values of train set
predictions = []
for i in range(length_of_forecast):
x_next = model.predict(x)
predictions.append(x_next)
x = concat(x[1:], x_next)
model = nn.Linear(1,1)
model = nn.Sequential( nn.Linear(1,10) nn.ReLU(), nn.Linear(10,1) )
46. Autogressive Linear Model for Time Sereies
모델 생성
model = nn.Linear(T, 1)
pytorch RNN series data
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
데이터 생성
N = 1000
series = np.sin(0.1 * np.arange(N))
T = 10
X = []
Y = []
for t in range(len(series) - T):
x = series[t:t+T]
X.append(x)
y = series[t+T]
Y.append(y)
X = np.array(X).reshape(-1, T)
Y = np.array(Y).reshape(-1, 1)
N = len(X)
print("X.shape" , X.shape, "Y.shape" , Y.shape)
build the model
model = nn.Linear(T, 1)
loss and optimizer
regression이기 때무에 mean squar error를 사용한다.
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.1)
out, _ = self.rnn(X, h0) ->hidden layer of each time step
out = self.fc(out[:,-1,:]
return out
train the model
모델 초기화 model = SimpleRNN(n_inputs = 1, n_hidden = 5, n_rnnlayers =1, n_outputs=1)
model.to(device)
loss and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.1)
make inputs and targets
evaluate the model
make predictions
N x T x D -> N x K(outut )
input_ = X_test[i].reshape(1, T,1)
p = model(input_)[0,0].item()
50. RNN for Time Series Prediction
simple rnn sine
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
create data
N = 1000
series = np.sin(0.1 * np.arange(N))
plt.plot(series)
plt.show()
create dataset
T = 10
X = []
Y = []
for t in range(len(series) - T):
x = series[t:t+T]
X.append(x)
y = series[t+T]
Y.append(y)
X = np.array(X).reshape(-1, T, 1)
Y = np.array(Y).reshape(-1, 1)
N = len(X)
print("X.shape" , X.shape, "Y.shape" , Y.shape)
cuda 사용하기
device = torch.device("cuda:0" if torch.cuda.is_available() else 'cpu')
print(device)
wo = wo.data.numpy()
bo = bo.data.numpy()
wo.shape, bo.shape
h_last = np.zeros(M)
x = X[0]
yhats = np.zeros((T, K))
for t in range(T):
h = np.tanh(x[t].dot(W_xh.T) + b_xh+ h_last.dot(W_hh.T) + b_hh)
y = h.dot(wo.T) + bo
yhats[t] = y
h_last = h
print(yhats)
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime