from fastai import *
from fastai.vision import *
import torch.nn.functional as F
!pip install git+https://github.com/netbrainml/nbml.git
from nbml.workshops.dog_breed.utils import *
This function is built from the past transfer learning notebook
base_path = "../input/dog-breed-identification/"
tdl,vdl = load_data(top=20, path=base_path, nc=20)
We wrap the DataLoaders in a FastAI Databunch
tdl.c, vdl.c = 20,20
data = ImageDataBunch(tdl, vdl).normalize(imagenet_stats)
Then, we create a Learner, which wraps the data, model, and optimizer.
We have to set our loss function as previously, the loss function does not work.
Also, note we are using mixed precision training, which sets the input data to be half precision, or 16 bits, to promote generalization. However, the gradient calculations are done in 32 bits, which is the normal bits for floating points, as this should be more precise.
Mixed precision training is handled with FastAI
learn = cnn_learner(data, models.resnet34,
metrics= [accuracy, error_rate], model_dir="/tmp/model/").to_fp16()
learn.loss_func = F.cross_entropy
We can first fine-tuned our model with one cycle policy, which linearly increments and decrements the learning rate while also inversely affecting the momentum
learn.freeze()
learn.fit_one_cycle(5)
Unfreeze our model and use discrimitative learning
learn.unfreeze()
learn.fit_one_cycle(5, max_lr=slice(1e-5,3e-3))
Lets convert our model back, and train it further, to see if it will improve.
learn = learn.to_fp32()
print(list(learn.model[0][0].parameters())[0][0].dtype)
Lets redo the same training.
learn.freeze()
learn.fit_one_cycle(3,1e-4)
learn.unfreeze()
learn.fit_one_cycle(3, max_lr=slice(1e-5,1e-4))
Lets now resize our data, and train it further
tdl,vdl = load_data(top=20, path=base_path, dim=448, nc=20, bs=40)
tdl.c, vdl.c = 20,20
data = ImageDataBunch(tdl, vdl).normalize(imagenet_stats)
learn.data = data
learn.freeze()
learn.fit_one_cycle(3)
learn.unfreeze()
learn.fit_one_cycle(3, max_lr=slice(1e-5,1e-4))
import gc
del data, tdl, vdl
gc.collect()
def acc(pred, Y):
return (pred==Y.long()).float().mean()
print(acc(learn.get_preds(ds_type=DatasetType.Valid)[1], torch.cat([i[1] for i in learn.data.valid_dl.dl])),
acc(learn.get_preds(ds_type=DatasetType.Train)[1], torch.cat([i[1] for i in learn.data.train_dl.dl])))
del learn.data
gc.collect()
num_classes = 50
tdl,vdl = load_data(top=num_classes, path=base_path, nc=num_classes, bs=40)
tdl.c, vdl.c = num_classes,num_classes
data = ImageDataBunch(tdl, vdl).normalize(imagenet_stats)
gc.collect()
learn.data = data
learn.model[-1][-1] = nn.Linear(512, num_classes, bias=True).cuda()
learn.freeze()
learn.fit_one_cycle(5)
learn.unfreeze()
learn.fit_one_cycle(5, max_lr=slice(1e-5,1e-4))