看起来代码的运行时主要是簿记和循环操作,而不是实际的梯度下降.我修改了你的代码,避免了外部for循环,并分配一个numpy数组并写入它,而不是依赖于较慢的列表:
import time
import numpy as np
import matplotlib.pyplot as plt
def h_theta(X1, theta1):
# Implementation of hypothesis function
return np.dot(X1, theta1)
def j_theta(X1, y1, theta1):
# Implementation of cost function
return np.sum((h_theta(X1, theta1) - y1) ** 2) / (2 * X1.size)
def grad(X1, y1, theta):
# Calculation of gradient
h = h_theta(X1, theta)
gradient = np.dot(X1.T, h - y1) / y1.shape[0]
return gradient
def gradient_descent(X1, y1):
learning_rates = [0.1, 0.01, 0.001]
num_iterations = 1000
num_learning_rates = len(learning_rates)
learning_rates = np.array(learning_rates).reshape(1, -1)
theta_initial = np.zeros((num_learning_rates, X1.shape[1])).T # Initialize theta with zeros
cost_iterations = np.zeros((num_learning_rates, num_iterations))
theta_values = np.zeros((num_learning_rates, X1.shape[1], 1))
theta = theta_initial.copy()
start = time.time()
#for idx, alpha in enumerate(learning_rates):
for i in range(num_iterations):
gradient = grad(X1, y1, theta)
theta = theta - learning_rates * gradient
cost = j_theta(X1, y1, theta)
cost_iterations[:, i] = cost
end = time.time()
print(f"Time taken: {end - start} seconds")
# fig, axs = plt.subplots(len(learning_rates), figsize=(8, 15))
# for i, alpha in enumerate(learning_rates):
# axs[i].plot(range(num_iterations), cost_iterations[i], label=f'alpha = {alpha}')
# axs[i].set_title(f'Learning Rate: {alpha}')
# axs[i].set_ylabel('Cost J')
# axs[i].set_xlabel('Number of Iterations')
# axs[i].legend()
# plt.tight_layout()
# plt.show()
X_normalized = np.random.randn(3072, 9)
y_normalized = np.random.randn(3072, 1).repeat(3, 1).reshape((3072, 3))
intercept_column = np.random.randn(3072, 1)
# code to reduce X to 3 features (columns) using SVD:
# Perform Singular Value decomposition on X and reduce it to 3 columns
U, S, Vt = np.linalg.svd(X_normalized)
# Reduce X to 3 columns
X_reduced = np.dot(X_normalized, Vt[:3].T)
# print the first 5 rows of X_reduced
print("First 5 rows of X_reduced:")
# Normalize X_reduced
X_reduced = (X_reduced - np.mean(X_reduced, axis=0)) / np.std(X_reduced, axis=0)
print("the means and stds of X after being reduced and normalized:\n" ,X_reduced.mean(axis=0), X_reduced.std(axis=0))
# Print the shape of the reduced X to confirm it has only 3 features
print("Shape of X_reduced:", X_reduced.shape)
# Adding the intercept column to X_reduced
X_reduced_with_intercept = np.hstack((intercept_column, X_reduced))
X_normalized_with_intercept = np.hstack((intercept_column, X_normalized))
# Example usage
# X_normalized_with_intercept and y_normalized represent the original dataset
# X_reduced_with_intercept and y_normalized represent the reduced dataset
# Performing gradient descent on the original dataset
gradient_descent(X_normalized_with_intercept, y_normalized)
# Performing gradient descent on the reduced dataset
gradient_descent(X_reduced_with_intercept, y_normalized)
现在,代码在简化的数据集上运行得更快.我已经填写了你没有指定的空白,以便我能够运行你的代码.希望我没有把梯度下降代码搞得太糟糕,因为我依赖于有意义的维度,而不是用笔和纸解决问题.