numpy correlation
# Pearson Correlation Coefficient (PCC) using Pandas import pandas as pd df = df[['colA','colB']].dropna() df.corr() # returns a matrix with each columns correlation to all others # PCC and p-value(significance) using Scipy from scipy.stats import pearsonr pearsonr(df['colA'], df['colB']) # PCC, p-value, and Confidence Level, etc. using pingouin from pingouin import corr corr(df['colA'], df['colB']) # PCC using researchpy from researchpy.correlation import corr_case corr_case(df[['colA','colB']]) # PCC using Numpy import numpy as np arrayOne = np.array(df['colA']) arrayTwo = np.array(df['colB']) np.corrcoef(arrayOne, arrayTwo) # PCC using pyspark from pyspark.sql.functions import corr df.select(corr('colA','colB')).show()