1. 텐서
- Tensor는 다차원 배열을 처리할 수 있는 데이터로, Numpy array 와 비슷하다.
- 여러 데이터 type이 있는데, 아래와 같다. 데이터는 보통 Float 타입을 사용하고, 라벨값은 LongTensor를 사용한다. (Classification Task)
- 부동소수점 타입:
- torch.float16 또는 torch.half: 반정밀도 부동소수점
- torch.float32 또는 torch.float: 단정밀도 부동소수점 (기본값)
- torch.float64 또는 torch.double: 배정밀도 부동소수점
- 정수 타입:
- torch.int8: 8-bit 정수
- torch.uint8: 8-bit 부호 없는 정수
- torch.int16 또는 torch.short: 16-bit 정수
- torch.int32 또는 torch.int: 32-bit 정수
- torch.int64 또는 torch.long: 64-bit 정수
2. 텐서 shape 확인 & 변경
# Tensor 생성
sample = torch.arange(12)
# tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 11])
# Tensor shape 확인
# torch.Size([12])
# 혹은
# torch.Size([12])
# Tensor shape 변경
sample.reshape(3, 4)
# tensor([[ 0, 1, 2, 3],
# [ 4, 5, 6, 7],
# [ 8, 9, 10, 11]])
sample.reshape(2, 3, 2)
# tensor([[[ 0, 1],
# [ 2, 3],
# [ 4, 5]],
# [[ 6, 7],
# [ 8, 9],
# [10, 11]]])
3. GPU 연산을 위한 Tensor 생성
# 리스트처럼 생성해서 텐서를 만들 수 있다.
torch.tensor([[1, 2, 3 ],[4, 5, 6]])
# tensor([[1, 2, 3],
# [4, 5, 6]])
# GPU 연산을 위해, 아래처럼 device를 별도로 설정해줄 수 있다.
torch.tensor([[1, 2, 3],[4, 5, 6]], device="cuda")
tensor([[1, 2, 3],
[4, 5, 6]], device='cuda:0')
# device 를 먼저 설정하고 to(device)도 사용 가능
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch.tensor([[1, 2, 3],[4, 5, 6]]).to(device)
4. 모델 구성
import torch.nn as nn
import torch
class ResBlock(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size=3):
super(ResBlock, self).__init__()
self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size, padding=kernel_size//2)
self.bn1 = nn.BatchNorm1d(out_channels)
self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size, padding=kernel_size//2)
self.bn2 = nn.BatchNorm1d(out_channels)
self.relu = nn.ReLU(inplace=True)
self.shortcut = nn.Sequential()
if in_channels != out_channels:
self.shortcut = nn.Sequential(
nn.Conv1d(in_channels, out_channels, kernel_size=1),
def forward(self, x):
residual = self.shortcut(x)
x = self.relu(self.bn1(self.conv1(x)))
x = self.bn2(self.conv2(x))
x += residual
x = self.relu(x)
return x
class CRNN(nn.Module):
def __init__(self, in_channels, num_filters, num_classes, hidden_size=64, dropout_rate=0.5):
super(CRNN, self).__init__()
# CNN Layer
self.resblock1 = ResBlock(in_channels, num_filters, kernel_size=5)
self.resblock2 = ResBlock(num_filters, num_filters, kernel_size=5)
self.resblock3 = ResBlock(num_filters, num_filters, kernel_size=5)
self.dropout_cnn = nn.Dropout(dropout_rate)
# RNN Layer
self.bi_lstm = nn.LSTM(num_filters, hidden_size, num_layers=2, batch_first=True, bidirectional=True)
self.layer_norm = nn.LayerNorm(hidden_size*2)
# FC layer
self.fc = nn.Linear(hidden_size*2, hidden_size)
self.fc2 = nn.Linear(hidden_size, num_classes)
self.dropout_fc = nn.Dropout(dropout_rate)
def forward(self, x):
# CNN Layer
x = self.resblock1(x)
x = self.resblock2(x)
x = self.resblock3(x)
x = self.dropout_cnn(x)
# RNN Layer
x = x.permute(0, 2, 1) # input shape: (batch_size, 187, 1)
x, _ = self.bi_lstm(x)
x = self.layer_norm(x) # Apply LayerNorm to LSTM output
x = x[:, -1, :]
# FC Layer
x = self.fc(x)
x = self.dropout_fc(x)
x = self.fc2(x)
return x
5. Train code
# train code
def train(args, model, train_dataloader, val_dataloader, criterion, optimizer, lr_scheduler, DEVICE, mn='crnn'):
best_accuracy = 0
train_acc = []
val_acc = []
train_losses = []
val_losses = []
epoch_loss = 0.0
epoch_loss_val = 0.0
for epoch in range(args.EPOCHS):
model.train() # TRAIN MODE
running_loss = 0.0
corrects = 0
for batch_idx, (inputs, labels) in enumerate(train_dataloader):
inputs, labels = inputs.to(DEVICE), labels.squeeze(-1).to(DEVICE)
if mn == 'crnn':
outputs = model(inputs)
elif mn == 'atlstm':
outputs, _ = model(inputs)
loss = criterion(outputs, labels)
corrects += torch.sum(torch.argmax(outputs, dim=1) == labels).item()
running_loss += loss.item()
epoch_loss = running_loss / (batch_idx + 1)
accuracy = corrects / len(train_dataloader.dataset)
model.eval() # EVAL MODE
corrects = 0
running_loss_val = 0.0
with torch.no_grad(): # Disable gradient calculation
for batch_idx, (inputs, labels) in enumerate(val_dataloader):
inputs, labels = inputs.to(DEVICE), labels.squeeze(-1).to(DEVICE)
if mn == 'crnn':
outputs = model(inputs)
elif mn == 'atlstm':
outputs, _ = model(inputs)
loss = criterion(outputs, labels)
corrects += torch.sum(torch.argmax(outputs, dim=1) == labels).item()
running_loss_val += loss.item()
epoch_loss_val= running_loss_val / (batch_idx + 1)
accuracy = corrects / len(val_dataloader.dataset)
if accuracy > best_accuracy:
best_accuracy = accuracy
print(f"Epoch [{epoch + 1}/{args.EPOCHS}], \tTrain Loss: {epoch_loss:.4f}, \tValid Loss: {epoch_loss_val:.4f}, \tTrain Acc: {train_acc[-1]:.4f}, \tValid Acc: {val_acc[-1]:.4f}")
# lr_scheduler.step(epoch_loss_val) # for ReduceLROnPlateau
# lr_scheduler.step()
print(f'BEST ACCURACY : {best_accuracy:.4f}')
return train_losses, val_losses, train_acc, val_acc
6. Test code (+Confusion Matrix)
from sklearn.metrics import confusion_matrix, classification_report
#confusion matrix function
def plot_cm(args, model, dl, categories, normalize='true', mn='crnn'):
#plot the confusion matrix
y_pred = []
y_true = []
for x, y in dl:
x = x.to(args.DEVICE)
y = y.to(args.DEVICE)
if mn == 'crnn':
output = model(x)
elif mn == 'atlstm':
output, _ = model(x)
y_pred.extend(torch.argmax(output, dim=1).cpu().numpy())
cm = confusion_matrix(y_true, y_pred, normalize=normalize)
sns.heatmap(cm, annot=True, fmt= '.2f', cmap='Blues', xticklabels=categories.values(), yticklabels=categories.values())
print(classification_report(y_true, y_pred, target_names=class_names.values()))
plot_cm(args, model, val_dl, class_names)
