If speed is concern you can consider using numba:
from numba import njit
@njit
def cumprod_triangular_numba(arr):
out = np.zeros((arr.size, arr.size), dtype=np.int64)
for col in range(arr.size):
p = 1
for row in range(col, arr.size):
p *= arr[row]
out[row, col] = p
return out
基准:
import numpy as np
import perfplot
from numba import njit, prange
from numpy.lib.stride_tricks import sliding_window_view
def cumprod_triangular_orig(arr):
n = len(arr)
matrix = np.zeros((n, n))
for i in range(n):
matrix[i, : i + 1] = np.flip(np.cumprod(np.flip(arr[: i + 1])))
return matrix
def cumprod_triangular_james(arr):
return sum(
np.diagflat(sliding_window_view(arr, i + 1).prod(axis=1), -i)
for i in range(len(arr))
)
def cumprod_triangular_onyambu_1(arr):
u = arr.cumprod()
return u[:, None] / np.r_[1, u[:-1]] * np.tri(arr.size, dtype=int)
def cumprod_triangular_onyambu_2(arr):
a = np.triu(arr).T
i1 = a == 0
a[i1] = 1
return np.where(i1, 0, a.cumprod(0))
def cumprod_triangular_onyambu_3(arr):
a = np.triu(arr).T
return np.where(a, a, 1).cumprod(0) * np.tri(arr.size, dtype=int)
@njit
def cumprod_triangular_numba(arr):
out = np.zeros((arr.size, arr.size), dtype=np.int64)
for col in prange(arr.size):
p = 1
for row in range(col, arr.size):
p *= arr[row]
out[row, col] = p
return out
@njit(parallel=True)
def cumprod_triangular_numba_parallel(arr):
out = np.zeros((arr.size, arr.size), dtype=np.int64)
for col in prange(arr.size):
p = 1
for row in range(col, arr.size):
p *= arr[row]
out[row, col] = p
return out
arr = np.array([2, 3, 5, 7])
assert np.allclose(cumprod_triangular_numba(arr), cumprod_triangular_orig(arr))
assert np.allclose(cumprod_triangular_numba_parallel(arr), cumprod_triangular_orig(arr))
assert np.allclose(cumprod_triangular_james(arr), cumprod_triangular_orig(arr))
assert np.allclose(cumprod_triangular_onyambu_1(arr), cumprod_triangular_orig(arr))
assert np.allclose(cumprod_triangular_onyambu_2(arr), cumprod_triangular_orig(arr))
assert np.allclose(cumprod_triangular_onyambu_3(arr), cumprod_triangular_orig(arr))
np.random.seed(0)
perfplot.show(
setup=lambda n: np.random.randint(1, 2, size=n, dtype=np.int64),
kernels=[
cumprod_triangular_orig,
cumprod_triangular_james,
cumprod_triangular_numba,
cumprod_triangular_numba_parallel,
cumprod_triangular_onyambu_1,
cumprod_triangular_onyambu_2,
cumprod_triangular_onyambu_3,
],
labels=["orig", "james", "numba", "numba_parallel", "o_1", "o_2", "o_3"],
n_range=[3, 5, 10, 15, 20, 25, 50, 75, 100, 200, 500, 1000],
xlabel="N",
logx=True,
logy=True,
equality_check=np.allclose,
)
在我的计算机(AMD 5700x)上创建此图: