Python pytorch basics

torch.manual_seed(seed) # Setting seed 
torch.rand_like(some_array, dtype=torch.float) # Create a tensor with random numbers with same dimension as given array
torch.ones_like(some_array, dtype=torch.float) # Create a tensor of 1s with same dimension as given array
shape = (2,3,)
rand_tensor = torch.rand(shape) # Create a tensor of random numbers with specified shape
ones_tensor = torch.ones(shape) # Create a tensor of 1s with specified shape
zeros_tensor = torch.zeros(shape) # Create a tensor of 0s with specified shape
tensor.shape # Shape of tensor
tensor.dtype # Datatype of tensor
tensor.device  # Device tensor is stored on
some_tensor = torch.arange(12).reshape(3, 4).float() # Creating, reshaping and giving data type to a tensor
print('First row: ',some_tensor[0])
print('First column: ', some_tensor[:, 0])
print('Last column:', some_tensor[:, -1])
t1 = torch.cat([tensor1, tensor2, tensor3], dim=1) # Horizontal concatenation, increasing features/columns
matrix_mul = tensor @ tensor.T # matrix multiplication
matrix_mul = tensor.matmul(tensor.T)  # matrix multiplication with a tensor and the TRANSPOSE of it
matrix_mul = torch.mm(mat.t(), mat) # matrix multiplication with a tensor and the TRANSPOSE of it
normal_mul = tensor1 * tensor2
normal_mul = tensor1.mul(tensor2)
agg = tensor.sum() # summing
agg_item = agg.item()  # extracting value from a tensor

x = torch.arange(4.0)
x.requires_grad_(True)  # Same as `x = torch.arange(4.0, requires_grad=True)` which means we can do derivation with respect to it
y = 2 * torch.dot(x, x) # function y = x*x. square of each x and then sum for dot product: 2⋅(0+1+4+9)=2⋅14=28
y.backward() # Derivative of y which is (2.2.x = 4.x) : 4(0, 1, 2, 3) = (0, 4, 8, 12)
x.grad # Show derivated result

x = np.linspace(-np.pi, np.pi, 100)
x = torch.tensor(x, requires_grad=True)
y = torch.sin(x)
y.backward(torch.ones_like(x)) # specify  initial gradients if x is not scalar when back-propagation
# NOTE : y x is not a scalar. so `y.backward()` will throw error. any y using such x which is not a scalar will require initial grads
# raise NotImplementedError
x.grad # Show derivated result

def sigmoid(x): # turns a value from 0 to 1
    return 1/(1+ torch.exp(-x))

def softmax(X): # turns a row of values into probabilities. sum of the row is 1
    result = torch.zeros_like(X) # This is where we will store the probability values
    for i in range(X.shape[0]): # iterate over each row
        row = X[i]
        max_val = torch.max(row) # get the maximum value of the row
        exp_row = torch.exp(row - max_val) # scale each value of row so that maximum value of row is 0
        row_sum = torch.sum(exp_row) # get the sum of all values in the row
        softmax_row = exp_row / row_sum # get probability for each value
        result[i] = softmax_row # store the result row 
    return result

def linear(X, W, b): # Linear regression (y = mX + c) m is the weight W and c is the bias b
    return X @ W + b

def squared_loss(y_hat, y): # Loss function for regression
    return ((y_hat - y.reshape(y_hat.shape)) ** 2 / 2).mean()

def cross_entropy(y_hat, y): # Loss function for multi-class problem
    n = y.shape[0]  # Number of examples/samples
    loss = -torch.sum(y * torch.log(y_hat)) / n
    return loss

def sgd(params, lr, batch_size): # Optimizer of loss function for non-convex graph
    """ Minibatch stochastic gradient descent """
    # lr = lr / batch_size
    with torch.no_grad(): #  disables gradient calculation in PyTorch, we do not want to calculate gradient during backpropagation
        for param in params:
            param -= lr * param.grad # Manually update parameters (weights and bias) 
            param.grad.zero_() # Reset gradients. new gradients will be calculated during next forward pass

### Training
for epoch in range(num_epochs):
    for X, y in data_iter(batch_size, features, labels):
        y_pred = linear(X, w, b)
        loss = squared_loss(y_pred , y)  # Minibatch loss in `X` and `y`
        loss.backward() # Now do backpropagation to calculate  for each batch (very efficient)
        sgd([w, b], lr, batch_size)  # Update parameters using their gradient for each batch (very efficient)
    with torch.no_grad(): # Finally, see the loss after 1 epoch training (make sure calculating gradient is disabled in this step)
        train_loss = squared_loss(linear(features, w, b), labels)
Python相关代码片段