torch.manual_seed(seed) # Setting seed torch.rand_like(some_array, dtype=torch.float) # Create a tensor with random numbers with same dimension as given array torch.ones_like(some_array, dtype=torch.float) # Create a tensor of 1s with same dimension as given array shape = (2,3,) rand_tensor = torch.rand(shape) # Create a tensor of random numbers with specified shape ones_tensor = torch.ones(shape) # Create a tensor of 1s with specified shape zeros_tensor = torch.zeros(shape) # Create a tensor of 0s with specified shape tensor.shape # Shape of tensor tensor.dtype # Datatype of tensor tensor.device # Device tensor is stored on some_tensor = torch.arange(12).reshape(3, 4).float() # Creating, reshaping and giving data type to a tensor print('First row: ',some_tensor[0]) print('First column: ', some_tensor[:, 0]) print('Last column:', some_tensor[:, -1]) t1 = torch.cat([tensor1, tensor2, tensor3], dim=1) # Horizontal concatenation, increasing features/columns matrix_mul = tensor @ tensor.T # matrix multiplication matrix_mul = tensor.matmul(tensor.T) # matrix multiplication with a tensor and the TRANSPOSE of it matrix_mul = torch.mm(mat.t(), mat) # matrix multiplication with a tensor and the TRANSPOSE of it normal_mul = tensor1 * tensor2 normal_mul = tensor1.mul(tensor2) agg = tensor.sum() # summing agg_item = agg.item() # extracting value from a tensor x = torch.arange(4.0) x.requires_grad_(True) # Same as `x = torch.arange(4.0, requires_grad=True)` which means we can do derivation with respect to it y = 2 * torch.dot(x, x) # function y = x*x. square of each x and then sum for dot product: 2⋅(0+1+4+9)=2⋅14=28 y.backward() # Derivative of y which is (2.2.x = 4.x) : 4(0, 1, 2, 3) = (0, 4, 8, 12) x.grad # Show derivated result x = np.linspace(-np.pi, np.pi, 100) x = torch.tensor(x, requires_grad=True) y = torch.sin(x) y.backward(torch.ones_like(x)) # specify initial gradients if x is not scalar when back-propagation # NOTE : y x is not a scalar. so `y.backward()` will throw error. any y using such x which is not a scalar will require initial grads # raise NotImplementedError x.grad # Show derivated result def sigmoid(x): # turns a value from 0 to 1 return 1/(1+ torch.exp(-x)) def softmax(X): # turns a row of values into probabilities. sum of the row is 1 result = torch.zeros_like(X) # This is where we will store the probability values for i in range(X.shape[0]): # iterate over each row row = X[i] max_val = torch.max(row) # get the maximum value of the row exp_row = torch.exp(row - max_val) # scale each value of row so that maximum value of row is 0 row_sum = torch.sum(exp_row) # get the sum of all values in the row softmax_row = exp_row / row_sum # get probability for each value result[i] = softmax_row # store the result row return result def linear(X, W, b): # Linear regression (y = mX + c) m is the weight W and c is the bias b return X @ W + b def squared_loss(y_hat, y): # Loss function for regression return ((y_hat - y.reshape(y_hat.shape)) ** 2 / 2).mean() def cross_entropy(y_hat, y): # Loss function for multi-class problem n = y.shape[0] # Number of examples/samples loss = -torch.sum(y * torch.log(y_hat)) / n return loss def sgd(params, lr, batch_size): # Optimizer of loss function for non-convex graph """ Minibatch stochastic gradient descent """ # lr = lr / batch_size with torch.no_grad(): # disables gradient calculation in PyTorch, we do not want to calculate gradient during backpropagation for param in params: param -= lr * param.grad # Manually update parameters (weights and bias) param.grad.zero_() # Reset gradients. new gradients will be calculated during next forward pass ### Training for epoch in range(num_epochs): for X, y in data_iter(batch_size, features, labels): y_pred = linear(X, w, b) loss = squared_loss(y_pred , y) # Minibatch loss in `X` and `y` loss.backward() # Now do backpropagation to calculate for each batch (very efficient) sgd([w, b], lr, batch_size) # Update parameters using their gradient for each batch (very efficient) with torch.no_grad(): # Finally, see the loss after 1 epoch training (make sure calculating gradient is disabled in this step) train_loss = squared_loss(linear(features, w, b), labels)