我创建了一个函数,用于查找数据帧中缺少的值.
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
# Create a sample dataset
iris = load_iris()
df = pd.DataFrame(data= np.c_[iris['data'], iris['target']],
columns= iris['feature_names'] + ['target'])
df['species'] = pd.Categorical.from_codes(iris.target, iris.target_names)
# Here we replace all values of setosa with 'missing_value'
df = df.applymap(lambda x: 'missing_value' if x == 'setosa' else x)
# Here we want to create a flag for the missing values
def add_missing_value_flags(mydf, column):
# Generate the new column name
new_col = "missing_" + column
# Create flags where the data is missing
# that has put in a holder to represent a missing value
mydf[new_col]= np.where(mydf[column] == 'missing_value', True,
np.where(mydf[column] == '', True,
np.where(mydf[column] == 'N/A', True,
np.where(mydf[column] == 'N\A', True,
np.where(mydf[column] == 'NA', True,
np.where(mydf[column] == 'N.A.', True,
np.where(mydf[column] == 'NONE', True,
np.where(mydf[column] == '.', True,
np.where(mydf[column].str.len() == 1, True,
np.where(mydf[column] == '..', True, False))))))))))
return(mydf)
add_missing_value_flags(df, 'species')
sepal length (cm) sepal width (cm) ... species missing_species
0 5.1 3.5 ... missing_value True
1 4.9 3.0 ... missing_value True
2 4.7 3.2 ... missing_value True
3 4.6 3.1 ... missing_value True
4 5.0 3.6 ... missing_value True
.. ... ... ... ... ...
145 6.7 3.0 ... virginica False
146 6.3 2.5 ... virginica False
147 6.5 3.0 ... virginica False
148 6.2 3.4 ... virginica False
149 5.9 3.0 ... virginica False
Is there a method in python where I can apply my function to the rest of my columns similiar to: mydf[mydf.columns[mydf.columns.str.contains('species|plant|earth')]].apply...