Vanilla Neural Networks


torch.softmax() / torch.nn.Softmax()

softmax

A = torch.Tensor( [ [ -2 , -0.5 , 2.0 , 1.5 ] ,  
[ -2 , -0.5 , 7.0 , 1.5 ] ])

p = torch.softmax(A , dim=1)              # over rows
print(p)
# tensor([[1.0730e-02, 4.8089e-02, 5.8585e-01, 3.5533e-01], 
#         [1.2282e-04, 5.5046e-04, 9.9526e-01, 4.0674e-03]])


p = torch.softmax(A , dim=0) # over columns
print(p)

# tensor([[0.5000, 0.5000, 0.0067, 0.5000],
#         [0.5000, 0.5000, 0.9933, 0.5000]])

# torch.nn.LogSoftmax() is taking logarithms of the result of softmax(xi)
# also we have torch.nn.Softmax()

x is stored in one column, although it is represented in one line when typing it on the console.

x=torch.Tensor([ -2 , -0.5 , 2.0 , 1.5 ])
p=torch.softmax(x, dim=0)

print(p)
print(p.size())

nn.Linear()

# input of size  5 and output of size 3
mod = nn.Linear(5,3,bias = True)         # bias = True by default
print(mod)

# Linear(in_features=5, out_features=3, bias=True)

print(mod.weight)
print(mod.weight.size())

# Parameter containing:
# tensor([[-0.0636, 0.1377, -0.1297, 0.4385, 0.1840],
#		 [-0.4137, 0.2118, 0.2093, -0.0728, -0.2257],
#		 [ 0.4318, -0.1557, 0.1055, 0.3528, 0.2025]], requires_grad=True)
# torch.Size([3, 5])

print(mod.bias)
# Parameter containing:
# tensor([ 0.1466, 0.2684, -0.0493], requires_grad=True)
 
# change the weight of mod
with torch.no_grad():
    mod.weight[0,0] = 0
    mod.weight[0,1] = 1 
    mod.weight[0,2] = 2
print(mod.weight)

vanilla nn

class two_layer_net(nn.Module):
	def __init__(self, input_size, hidden_size, output_size):
		super(two_layer_net , self).__init__()
		self.layer1 = nn.Linear( input_size, hidden_size , bias=True)
		self.layer2 = nn.Linear( hidden_size, output_size , bias=True)  

	def forward(self, x):
		x = self.layer1(x)
		x = torch.relu(x)          # relu activation function
		x = self.layer2(x)
		p = torch.softmax(x, dim= 0)           # can be modified to meet your demands
		return p

 net = two_layer_net(2,5,3)
# Alternatively, all the parameters of the network can be accessed # by **net.parameters()**.


list_of_param = list( net.parameters() )
print(list_of_param)

# [Parameter containing:
#tensor([[10.0000, 20.0000],
#        [ 0.0500, -0.5119],
#        [-0.1930, -0.1993],
#        [-0.0208, -0.0490],
#        [ 0.2011, -0.2519]], requires_grad=True),
# Parameter containing:
# tensor([ 0.1292, -0.3313, -0.3548, -0.5247, 0.1753], requires_grad=True), 

# Parameter containing:
# tensor([[0.3178, -0.1838, -0.1930, -0.3816, 0.1850],
#         [ 0.2342, -0.2743, 0.2424, -0.3598, 0.3090],
#         [ 0.0876, -0.3785, 0.2032, -0.2937, 0.0382]], requires_grad=True), 
#Parameter containing:
# tensor([ 0.2120, -0.2751, 0.2351], requires_grad=True)]

nn.NLLLoss()

负对数似然损失函数(Negtive Log Likehood)

a = torch.Tensor([[1,2,3]])

nll = nn.NLLLoss()
target1 = torch.Tensor([0]).long()
target2 = torch.Tensor([1]).long()
target3 = torch.Tensor([2]).long()

n1 = nll(a,target1)
# tensor(-1.)

n2 = nll(a,target2)
# tensor(-2.)

n3 = nll(a,target3)
# tensor(-3.)

nn.NLLLoss()取出a中对应target位置的值并取负号,比如target[1]==0,就取a中index=0位置上的值再取负,作为NLLLoss的输出

nn.CrossEntropyLoss()

Cross Entropy Loss

mycrit=nn.CrossEntropyLoss()
labels=torch.LongTensor([2,3])
scores=torch.Tensor([ [-1.2, 0.5 , 5, -0.5], 
					[1.4, -1.7 , -1.3, 5.0] ])
average_loss = mycrit(scores,labels)

print('loss = ', average_loss.item() )
# loss = 0.023508397862315178

CrossEntropyLoss = Softmax + Log + NLL

softmax_func=nn.Softmax(dim=1)
soft_output=softmax_func(input)
log_output=torch.log(soft_output)
nllloss_func=nn.NLLLoss(reduction='none')
nllloss_output=nllloss_func(log_output, y_target)

epoch

for epoch in range(15):     # Do 15 passes through the training set
	shuffled_indices=torch.randperm(60000)
	for count in range(0,60000,bs):
	optimizer.zero_grad()
	
    # indices is a tensor, the following is slicing
	indices=shuffled_indices[count:count+bs]
	minibatch_data = train_data[indices]
	minibatch_label = train_label[indices]
	# pay attention on how to slice a tensor

epoch + monitoring loss + time + lr update

start = time.time()
lr = 0.05   \# initial learning rate

for epoch in range(200):
	# learning rate strategy : divide the learning rate by 1.5 every 10 epochs
	if epoch%10==0 and epoch>10: 
		lr = lr / 1.5
	# create a new optimizer at the beginning of each epoch: give the current learning rate.
	optimizer=torch.optim.SGD( net.parameters() , lr=lr )
	running_loss=0
	running_error=0
	num_batches=0
	shuffled_indices=torch.randperm(60000)
	for count in range(0,60000,bs):

		# Set the gradients to zeros
		optimizer.zero_grad()
        
		# create a minibatch 
        indices=shuffled_indices[count:count+bs]
        minibatch_data = train_data[indices]
        minibatch_label= train_label[indices]

        # send them to the gpu
        device = torch.device("cuda")
        net = net.to(device)
        minibatch_data=minibatch_data.to(device)
        minibatch_label=minibatch_label.to(device)

        # reshape the minibatch
        inputs = minibatch_data.view(bs,784)

        # tell Pytorch to start tracking all operations that will be done on "inputs"
        inputs.requires_grad_()

        # forward the minibatch through the net 
        scores=net( inputs ) 

        # Compute the average of the losses of the data points in the minibatch
        loss = criterion( scores , minibatch_label) 

        # backward pass to compute dL/dU, dL/dV and dL/dW 
        loss.backward()

        # do one step of stochastic gradient descent: U=U-lr(dL/dU), V=V-lr(dL/dU), ...
        optimizer.step()

        # START COMPUTING STATS
        # add the loss of this batch to the running loss
        running_loss += loss.detach().item()

        # compute the error made on this batch and add it to the running error  
        error = utils.get_error( scores.detach() , minibatch_label)
        running_error += error.item()

        num_batches+=1

        # compute some stats
        running_loss += loss.detach().item()
        error = utils.get_error( scores.detach(), minibatch_label)
        running_error += error.item()
        num_batches+=1

    # once the epoch is finished we divide the "running quantities"
    # by the number of batches
    total_loss = running_loss/num_batches
    total_error = running_error/num_batches
    elapsed_time = time.time() – start
    # every 10 epoch we display the stats 
    # and compute the error rate on the test set 
    if epoch % 10 == 0 : 
		print('epoch=',epoch, ' time=', elapsed_time,
				' loss=', total_loss , ' error=', total_error*100 ,'percent lr=', lr)
		eval_on_test_set()

Evaluate on test set

def eval_on_test_set():
	running_error=0
	num_batches=0
	for i in range(0,10000,bs):
        minibatch_data = test_data[i:i+bs]
        minibatch_label= test_label[i:i+bs]
        inputs = minibatch_data.view(bs,784)
        scores=net( inputs ) 
        error = utils.get_error( scores , minibatch_label)
        running_error += error.item()
        num_batches+=1

    total_error = running_error/num_batches
	print( 'test error = ', total_error*100 ,'percent')

Author: cipher
Reprint policy: All articles in this blog are used except for special statements CC BY 4.0 reprint policy. If reproduced, please indicate source cipher !
  TOC