我正在研究数据帧字典——总共五个,我试图获取每个数据帧的第一行.我的代码似乎适用于某些数据帧,但不适用于其他数据帧.
a0 = {'dataset': {'0.05, 1.0, 175': 'CS:1', '0.05, 1.0, 150': 'CS:1'},
'mean_score': {'0.05, 1.0, 175': -0.2820450154520415,
'0.05, 1.0, 150': -0.28204501545204186},
'rank_score': {'0.05, 1.0, 175': 1, '0.05, 1.0, 150': 2},
'std_score': {'0.05, 1.0, 175': 0.11499605607995111,
'0.05, 1.0, 150': 0.11499605607995127},
'contamination': {'0.05, 1.0, 175': 0.05, '0.05, 1.0, 150': 0.05},
'max_samples': {'0.05, 1.0, 175': 1.0, '0.05, 1.0, 150': 1.0},
'n_estimators': {'0.05, 1.0, 175': 175, '0.05, 1.0, 150': 150}}
a1 = {'dataset': {'0.05, 1.0, 200': 'CK:-1', '0.05, 1.0, 175': 'CK:-1'},
'mean_score': {'0.05, 1.0, 200': -0.20618057780261195,
'0.05, 1.0, 175': -0.20618057780261267},
'rank_score': {'0.05, 1.0, 200': 1, '0.05, 1.0, 175': 2},
'std_score': {'0.05, 1.0, 200': 0.13109535628226052,
'0.05, 1.0, 175': 0.13109535628226013},
'contamination': {'0.05, 1.0, 200': 0.05, '0.05, 1.0, 175': 0.05},
'max_samples': {'0.05, 1.0, 200': 1.0, '0.05, 1.0, 175': 1.0},
'n_estimators': {'0.05, 1.0, 200': 200, '0.05, 1.0, 175': 175}}
a2 = {'dataset': {'0.05, 0.7, 125': 'PH:1',
'0.05, 0.7749999999999999, 200': 'PH:1'},
'mean_score': {'0.05, 0.7, 125': -0.22096885360666768,
'0.05, 0.7749999999999999, 200': -0.22416479620828117},
'rank_score': {'0.05, 0.7, 125': 1, '0.05, 0.7749999999999999, 200': 2},
'std_score': {'0.05, 0.7, 125': 0.05228492731122392,
'0.05, 0.7749999999999999, 200': 0.061897704957581456},
'contamination': {'0.05, 0.7, 125': 0.05,
'0.05, 0.7749999999999999, 200': 0.05},
'max_samples': {'0.05, 0.7, 125': 0.7,
'0.05, 0.7749999999999999, 200': 0.7749999999999999},
'n_estimators': {'0.05, 0.7, 125': 125, '0.05, 0.7749999999999999, 200': 200}}
a3 = {'dataset': {'0.05, 0.85, 125': 'PRT:-1',
'0.05, 0.85, 100': 'PRT:-1'},
'mean_score': {'0.05, 0.85, 125': -0.12896828405478034,
'0.05, 0.85, 100': -0.13141635454748085},
'rank_score': {'0.05, 0.85, 125': 1, '0.05, 0.85, 100': 2},
'std_score': {'0.05, 0.85, 125': 0.016228240324984843,
'0.05, 0.85, 100': 0.013178168219693726},
'contamination': {'0.05, 0.85, 125': 0.05, '0.05, 0.85, 100': 0.05},
'max_samples': {'0.05, 0.85, 125': 0.85, '0.05, 0.85, 100': 0.85},
'n_estimators': {'0.05, 0.85, 125': 125, '0.05, 0.85, 100': 100}}
a4 = {'dataset': {'0.05, 1.0, 200': 'PRT:1',
'0.05, 1.0, 175': 'PRT:1'},
'mean_score': {'0.05, 1.0, 200': -0.1694053747115974,
'0.05, 1.0, 175': -0.1694053747115976},
'rank_score': {'0.05, 1.0, 200': 1, '0.05, 1.0, 175': 2},
'std_score': {'0.05, 1.0, 200': 0.006550547930259526,
'0.05, 1.0, 175': 0.006550547930259387},
'contamination': {'0.05, 1.0, 200': 0.05, '0.05, 1.0, 175': 0.05},
'max_samples': {'0.05, 1.0, 200': 1.0, '0.05, 1.0, 175': 1.0},
'n_estimators': {'0.05, 1.0, 200': 200, '0.05, 1.0, 175': 175}}
下面还显示了基于列 Select 行值的代码.对于数据帧a0 through a2
,这些代码确实可以正常工作,但对于数据帧a3 - a4
,我得到了索引位置错误消息.
subsets = ['CS:1', 'CK:-1', 'PH:1', 'PRT:-1', 'PRT:1'] # rows to select
#the code to retrieve the first rows for a0-a2 work as expected.
(a0.loc[a0['dataset'].isin([subsets[0]])].iloc[0][['contamination', 'max_samples', 'n_estimators']])
(a1.loc[a1['dataset'].isin([subsets[1]])].iloc[0][['contamination', 'max_samples', 'n_estimators']])
(a2.loc[a2['dataset'].isin([subsets[2]])].iloc[0][['contamination', 'max_samples', 'n_estimators']])
# the codes to retrieve the first frow for a3 - a4 give the single positional indexer is out-of-bounds error.
(a3.loc[a3['dataset'].isin([subsets[3]])].iloc[0][['contamination', 'max_samples', 'n_estimators']])
(a4.loc[a4['dataset'].isin([subsets[4]])].iloc[0][['contamination', 'max_samples', 'n_estimators']])
不知道为什么,因为我没有使用超出界限的索引.