Day 6: Transfer learning - impact of the number of hidden layers 2

I modified my code for training and testing processes like below,

epochs = 1
steps = 0
running_loss = 0
print_every = 10

# Start timer
start = time.time()

for epoch in range(epochs):
    for inputs, labels in trainloader:
        if steps > 200:
            break
            
        steps += 1
        
        # Move input and label tensors to the default device
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        
        logps = model.forward(inputs)
        loss = criterion(logps, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        
        if steps % print_every == 0:
            test_loss = 0
            accuracy = 0
            model.eval()
            with torch.no_grad():
                for inputs, labels in testloader:
                    inputs, labels = inputs.to(device), labels.to(device)
                    logps = model.forward(inputs)
                    batch_loss = criterion(logps, labels)
                    
                    test_loss += batch_loss.item()
                    
                    # Calculate accuracy
                    ps = torch.exp(logps)
                    top_p, top_class = ps.topk(1, dim=1)
                    equals = top_class == labels.view(*top_class.shape)
                    accuracy += torch.mean(equals.type(torch.FloatTensor)).item()
                    
            print(f"{epoch+1}/{epochs} "
                  f"{steps} "
                  f"{(time.time() - start)/print_every:.3f} "
                  f"{running_loss/print_every:.3f} "
                  f"{test_loss/len(testloader):.3f} "
                  f"{accuracy/len(testloader):.3f}")
            
            running_loss = 0
            model.train()
            
            # Restart timer
            start = time.time()

And the result for 1 to 5 hidden layers are,

epochs     steps    processing_time/batch     training_loss     test_loss     accuracy

# one hidden layer
1/1 10 5.128 1.432 0.271 0.873
1/1 20 5.285 0.263 0.149 0.945
1/1 30 5.290 0.216 0.071 0.974
1/1 40 5.305 0.191 0.079 0.974
1/1 50 5.139 0.190 0.201 0.924
1/1 60 5.306 0.187 0.100 0.961
1/1 70 5.296 0.178 0.058 0.982
1/1 80 5.291 0.151 0.059 0.977
1/1 90 5.130 0.204 0.065 0.976
1/1 100 5.305 0.177 0.052 0.982
1/1 110 5.303 0.216 0.070 0.977
1/1 120 5.310 0.183 0.053 0.981
1/1 130 5.136 0.125 0.201 0.928
1/1 140 5.292 0.279 0.071 0.974
1/1 150 5.293 0.162 0.053 0.982
1/1 160 5.297 0.165 0.061 0.978
1/1 170 5.129 0.178 0.058 0.980
1/1 180 5.295 0.150 0.065 0.977
1/1 190 5.292 0.252 0.062 0.978
1/1 200 5.293 0.266 0.048 0.986

# two hidden layers
1/1 10 5.185 1.384 0.209 0.975
1/1 20 5.210 0.280 0.147 0.948
1/1 30 5.215 0.462 0.148 0.955
1/1 40 5.205 0.280 0.127 0.964
1/1 50 5.227 0.160 0.057 0.976
1/1 60 5.203 0.287 0.107 0.962
1/1 70 5.196 0.223 0.066 0.979
1/1 80 5.206 0.163 0.045 0.985
1/1 90 5.199 0.150 0.050 0.983
1/1 100 5.221 0.191 0.064 0.977
1/1 110 5.191 0.210 0.056 0.982
1/1 120 5.198 0.151 0.051 0.981
1/1 130 5.208 0.260 0.167 0.917
1/1 140 5.206 0.192 0.044 0.986
1/1 150 5.207 0.192 0.041 0.985
1/1 160 5.200 0.155 0.041 0.984
1/1 170 5.206 0.186 0.048 0.984
1/1 180 5.206 0.132 0.058 0.979
1/1 190 5.212 0.189 0.067 0.973
1/1 200 5.203 0.209 0.049 0.986

# three hidden layers
1/1 10 5.098 1.598 0.590 0.685
1/1 20 5.183 0.473 0.105 0.975
1/1 30 5.121 0.241 0.144 0.946
1/1 40 5.184 0.204 0.065 0.978
1/1 50 5.118 0.205 0.078 0.979
1/1 60 5.203 0.134 0.111 0.958
1/1 70 5.105 0.334 0.142 0.947
1/1 80 5.184 0.270 0.058 0.980
1/1 90 5.102 0.164 0.084 0.971
1/1 100 5.187 0.205 0.088 0.966
1/1 110 5.097 0.171 0.149 0.958
1/1 120 5.192 0.179 0.046 0.984
1/1 130 5.120 0.164 0.052 0.981
1/1 140 5.189 0.133 0.040 0.985
1/1 150 5.099 0.263 0.077 0.985
1/1 160 5.196 0.191 0.067 0.973
1/1 170 5.095 0.181 0.051 0.980
1/1 180 5.185 0.185 0.064 0.975
1/1 190 5.112 0.159 0.047 0.984
1/1 200 5.190 0.141 0.046 0.985

# four hidden layers
1/1 10 5.257 0.806 0.673 0.506
1/1 20 5.277 0.622 0.320 0.927
1/1 30 5.265 0.255 0.107 0.960
1/1 40 5.280 0.247 0.167 0.941
1/1 50 5.256 0.288 0.088 0.980
1/1 60 5.275 0.191 0.060 0.975
1/1 70 5.263 0.194 0.059 0.980
1/1 80 5.266 0.211 0.059 0.980
1/1 90 5.258 0.176 0.100 0.965
1/1 100 5.273 0.224 0.068 0.979
1/1 110 5.270 0.170 0.074 0.974
1/1 120 5.264 0.134 0.046 0.981
1/1 130 5.263 0.176 0.088 0.964
1/1 140 5.276 0.179 0.102 0.966
1/1 150 5.264 0.165 0.042 0.981
1/1 160 5.270 0.133 0.041 0.981
1/1 170 5.271 0.227 0.082 0.981
1/1 180 5.277 0.143 0.041 0.983
1/1 190 5.265 0.259 0.090 0.982
1/1 200 5.275 0.208 0.045 0.983

# five hidden layers
1/1 10 5.564 0.739 0.617 0.525
1/1 20 5.384 0.594 0.285 0.957
1/1 30 5.221 0.404 0.096 0.964
1/1 40 5.164 0.287 0.195 0.907
1/1 50 5.160 0.231 0.059 0.977
1/1 60 5.394 0.255 0.111 0.965
1/1 70 5.190 0.189 0.096 0.971
1/1 80 5.207 0.405 0.152 0.979
1/1 90 5.206 0.247 0.061 0.975
1/1 100 5.386 0.225 0.096 0.970
1/1 110 5.208 0.151 0.071 0.967
1/1 120 5.203 0.242 0.062 0.983
1/1 130 5.206 0.171 0.053 0.977
1/1 140 5.380 0.136 0.056 0.978
1/1 150 5.218 0.156 0.096 0.961
1/1 160 5.235 0.110 0.086 0.971
1/1 170 5.222 0.179 0.050 0.984
1/1 180 5.386 0.126 0.066 0.975
1/1 190 5.212 0.168 0.066 0.971
1/1 200 5.215 0.231 0.096 0.978

I changed the iteration to 200 to shorten the testing time. I found interesting things:

1. The processing time for one to five hidden layers are quite similar, no significance differences. I would like to test using 10 hidden layers tomorrow, in addition to test with resnet101 pre trained algorithm.

2. Interesting part is with accuracy. Increasing hidden layer from one to two will increase the accuracy quicker but adding hidden layer to 3, 4, 5 will drop the accuracies for the same iteration. 

3. I only used 1 epoch, tomorrow I would like to test using several epoch and diiference batch size. I want to see the differences.

Thats all for today.

Comments

Popular posts from this blog