Python Code

이미지 데이터 딥러닝 학습 코드

Kimhj 2023. 10. 25. 09:12
  • 이미지 샘플 데이터 확인
import os, cv2
import matplotlib.pyplot as plt

DATA_PATH = './'

fig = plt.figure(figsize=(16,5))
fig.suptitle("Positive", size=22)
img_paths = os.listdir(DATA_PATH)
shuffle(img_paths)	# shuffle images

for i, image in enumerate(img_paths[:4]):
    img = cv2.imread(os.path.join(DATA_PATH, image))
    plt.subplot(1, 4, i+1, frameon=False)
    plt.imshow(img)
fig.show()

 

  • Image Transform
mean_nums = [0.485, 0.456, 0.406]
std_nums = [0.229, 0.224, 0.225]

data_transforms = {'train': transforms.Compose([
    transforms.Resize((150, 150)),
    transforms.RandomRotation(10),
    transforms.RandomHorizontalFlip(p=0.4),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean_nums, std=std_nums),
]),
'val': transforms.Compose([
    transforms.Compose([
        transforms.Resize((150, 150)),
        transforms.CenterCrop(150),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean_nums, std=std_nums),
    ])
])
}

 

  • Data Split & DataLoader (Train & Validation)
from torch.utils.data.sampler import SubsetRandomSampler

# Data Split

def load_split_train_test(datadir, valid_size=0.2):
    train_data = datasets.ImageFolder(datadir,
                                      transform=data_transforms['train'])
    test_data = datasets.ImageFolder(datadir,
                                     transform=data_transforms['val'])

    num_train = len(train_data)
    indices = list(range(num_train))
    split = int(np.floor(valid_size*num_train))
    
    np.random.shuffle(indices)
    train_idx, test_idx = indices[split:], indices[:split]
    dataset_size = {'train':len(train_idx), 'val':len(test_idx)}
    train_sampler = SubsetRandomSampler(train_idx)
    
    test_sampler = SubsetRandomSampler(test_idx)

    # loader
    train_loader = torch.utils.data.DataLoader(train_data,
                                               sampler=train_sampler, batch_size=8)
    test_loader = torch.utils.data.DataLoader(test_data,
                                               sampler=test_sampler, batch_size=8)
    
    return train_loader, test_loader, dataset_size

train_loader, val_loader, dataset_size = load_split_train_test(DATA_PATH, 0.2)
data_loaders= {'train': train_loader, 'val': val_loader}
data_sizes = {x: len(data_loaders[x].sampler) for x in ['train', 'val']}
class_names = train_loader.dataset.classes
print(class_names)

 

  • DataLoader (Test)
TEST_DATA_PATH = './test'

test_transforms = transforms.Compose([
    transforms.Resize((150, 150)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean_nums, std=std_nums)
])

test_image = datasets.ImageFolder(TEST_DATA_PATH, transform=test_transforms)
test_loader = torch.utils.data.DataLoader(test_image, batch_size=1)

 

  • Pre-trained model Load
# Model
import torch.nn as nn

def CNN_Model(pretrained=True):
    model = models.densenet121(pretrained=pretrained)
    num_ftrs = model.classifier.in_features
    model.classifier = nn.Linear(num_ftrs, len(class_names))
    model = model.to(device)
    return model

model = CNN_Model(pretrained=True)

# crietrion
criterion = nn.CrossEntropyLoss()
# optimizer
optimizer = optim.Adam(model.parameters(), lr=1e-3)
# Decay LR by a factor of 0.1 every 7 epoch
exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

 

  • Training Code
# Training
def train_model(model, criterion, optimizer, scheduler, num_epochs=10):
    since = time.time() # start time

    best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = np.inf

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch+1, num_epochs))
        print('-' * 20)

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            current_loss = 0.0
            current_acc = 0
            current_kappa = 0
            val_kappa = list()

            for inputs, labels in tqdm.tqdm(data_loaders[phase], desc=phase, leave=False):
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase=='train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                    
                if phase == 'train':
                    scheduler.step()

                current_loss += loss.item() * inputs.size(0)
                current_acc += torch.sum(preds==labels.data)
                
                val_kappa.append(cohen_kappa_score(preds.cpu().numpy(), labels.data.cpu().numpy()))
            
            epoch_loss = current_loss / data_sizes[phase]
            epoch_acc = current_acc.double() / data_sizes[phase]

            if phase == 'val':
                epoch_kappa = np.mean(val_kappa)
                print('{} Loss: {:.4f} | {} Accuracy: {:.4f} | Kappa Score: {:.4f}'.format(
                    phase, epoch_loss, phase, epoch_acc, epoch_acc, epoch_kappa
                ))
            else:
                print('{} Loss: {:.4f} | {}  Accuracy: {:.4f}'.format(
                    phase, epoch_loss, phase, epoch_acc, epoch_acc
                ))

            # Ealry Stopping
            if phase == 'val' and epoch_loss < best_loss:
                print('Val loss Decreased from {:.4f} to {:.4f} \n Saving Weights...'.format(
                    best_loss, epoch_loss
                ))
                best_loss = epoch_loss
                best_model_wts = copy.deepcopy(model.state_dict())

        print()
    
    time_since = time.time() - since        # End time
    print('Training Complete in {:.0f}m {:.0f}s'.format(
        time_since // 60, time_since % 60
    ))
    print('Best val loss: {:.4f}'.format(best_loss))

    model.load_state_dict(best_model_wts)
    return model

 

  • Validation Code
def visualize_model(model, num_images=6):
    was_training = model.training
    model.eval()
    images_handled = 0
    ax = plt.figure()

    with torch.no_grad():
        for i, (inputs, labels) in enumerate(data_loaders['val']):
            inputs = inputs.to(device)
            label = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            for j in range(inputs.size()[0]):
                images_handled += 1
                ax = plt.subplot(num_images//2, 2, images_handled)
                ax.axis('off')
                ax.set_title('Actual: {} , Predicted: {}'.format(
                    class_names[labels[j].item()], class_names[preds[j]]
                ))
                imshow(inputs.cpu().data[j], (5,5))

                if images_handled == num_images:
                    model.train(mode=was_training)

                    return 
        model.train(mode=was_training)

 

  • Test Code
y_pred_list = []
y_true_list = []

with torch.no_grad():
    for batch_idx, (data, targets) in tqdm.tqdm(enumerate(test_loader), leave=False):
        x_batch, y_batch = data.to(device), targets.to(device)
        y_test_pred = best_model(x_batch)
        y_test_pred = torch.log_softmax(y_test_pred, dim=1)
        _, y_pred_tag = torch.max(y_test_pred, dim=1)
        y_pred_list.append(y_pred_tag.cpu().numpy())
        y_true_list.append(y_batch.cpu().numpy())