Python 梯度下降：简化要素集的运行时间比原始要素集长

发布于04月07日

这个问题将在3天后返回，它的内容可能会被滥用.我在保存一份原件

推荐答案

看起来代码的运行时主要是簿记和循环操作，而不是实际的梯度下降.我修改了你的代码，避免了外部for循环，并分配一个numpy数组并写入它，而不是依赖于较慢的列表:

import time
import numpy as np
import matplotlib.pyplot as plt

def h_theta(X1, theta1):
    # Implementation of hypothesis function
    return np.dot(X1, theta1)

def j_theta(X1, y1, theta1):
    # Implementation of cost function
    return np.sum((h_theta(X1, theta1) - y1) ** 2) / (2 * X1.size)

def grad(X1, y1, theta):
    # Calculation of gradient
    h = h_theta(X1, theta)
    gradient = np.dot(X1.T, h - y1) / y1.shape[0]
    return gradient

def gradient_descent(X1, y1):
    learning_rates = [0.1, 0.01, 0.001]
    num_iterations = 1000

    num_learning_rates = len(learning_rates)
    learning_rates = np.array(learning_rates).reshape(1, -1)
    theta_initial = np.zeros((num_learning_rates, X1.shape[1])).T  # Initialize theta with zeros
    cost_iterations = np.zeros((num_learning_rates, num_iterations))
    theta_values = np.zeros((num_learning_rates, X1.shape[1], 1))
    theta = theta_initial.copy()
    start = time.time()

    #for idx, alpha in enumerate(learning_rates):

    for i in range(num_iterations):
        gradient = grad(X1, y1, theta)
        theta = theta - learning_rates * gradient
        cost = j_theta(X1, y1, theta)
        cost_iterations[:, i] = cost
    end = time.time()
    print(f"Time taken: {end - start} seconds")
    # fig, axs = plt.subplots(len(learning_rates), figsize=(8, 15))
    # for i, alpha in enumerate(learning_rates):
    #     axs[i].plot(range(num_iterations), cost_iterations[i], label=f'alpha = {alpha}')
    #     axs[i].set_title(f'Learning Rate: {alpha}')
    #     axs[i].set_ylabel('Cost J')
    #     axs[i].set_xlabel('Number of Iterations')
    #     axs[i].legend()
    # plt.tight_layout()
    # plt.show()

X_normalized = np.random.randn(3072, 9)
y_normalized = np.random.randn(3072, 1).repeat(3, 1).reshape((3072, 3))
intercept_column = np.random.randn(3072, 1)


# code to reduce X to 3 features (columns) using SVD:
# Perform Singular Value decomposition on X and reduce it to 3 columns
U, S, Vt = np.linalg.svd(X_normalized)
# Reduce X to 3 columns
X_reduced = np.dot(X_normalized, Vt[:3].T)

# print the first 5 rows of X_reduced
print("First 5 rows of X_reduced:")
# Normalize X_reduced
X_reduced = (X_reduced - np.mean(X_reduced, axis=0)) / np.std(X_reduced, axis=0)

print("the means and stds of X after being reduced and normalized:\n" ,X_reduced.mean(axis=0), X_reduced.std(axis=0))
# Print the shape of the reduced X to confirm it has only 3 features
print("Shape of X_reduced:", X_reduced.shape)

# Adding the intercept column to X_reduced
X_reduced_with_intercept = np.hstack((intercept_column, X_reduced))
X_normalized_with_intercept = np.hstack((intercept_column, X_normalized))

# Example usage
# X_normalized_with_intercept and y_normalized represent the original dataset
# X_reduced_with_intercept and y_normalized represent the reduced dataset

# Performing gradient descent on the original dataset

gradient_descent(X_normalized_with_intercept, y_normalized)

# Performing gradient descent on the reduced dataset
gradient_descent(X_reduced_with_intercept, y_normalized)

现在，代码在简化的数据集上运行得更快.我已经填写了你没有指定的空白，以便我能够运行你的代码.希望我没有把梯度下降代码搞得太糟糕，因为我依赖于有意义的维度，而不是用笔和纸解决问题.