• 入门教程
• 分类教程
• 回归教程
• 聚类教程
• KNN教程
• 关注我们

# 机器学习 - 数据处理

## 缩放比例

from pandas import read_csv
from numpy import set_printoptions
from sklearn import preprocessing
path = r'C:\pima-indians-diabetes.csv'
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
array = dataframe.values

data_scaler=preprocessing.MinMaxScaler(feature_range=(0,1))
data_rescaled=data_scaler.fit_transform(array)

set_printoptions(precision=1)
print ("\nScaled data:\n", data_rescaled[0:10])

Scaled data:
[[0.4 0.7 0.6 0.4 0.  0.5 0.2 0.5 1. ]
[0.1  0.4 0.5 0.3 0.  0.4 0.1 0.2 0. ]
[0.5  0.9 0.5 0.  0.  0.3 0.3 0.2 1. ]
[0.1  0.4 0.5 0.2 0.1 0.4 0.  0.  0. ]
[0.   0.7 0.3 0.4 0.2 0.6 0.9 0.2 1. ]
[0.3  0.6 0.6 0.  0.  0.4 0.1 0.2 0. ]
[0.2  0.4 0.4 0.3 0.1 0.5 0.1 0.1 1. ]
[0.6  0.6 0.  0.  0.  0.5 0.  0.1 0. ]
[0.1  1.  0.6 0.5 0.6 0.5 0.  0.5 1. ]
[0.5  0.6 0.8 0.  0.  0.  0.1 0.6 1. ]]

## 二值化

from pandas import read_csv
from sklearn.preprocessing import Binarizer
path = r'C:\pima-indians-diabetes.csv'
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
array = dataframe.values

binarizer=Binarizer(threshold=0.5).fit(array)
Data_binarized=binarizer.transform(array)

print ("\nBinary data:\n", Data_binarized [0:5])

Binary data:
[[1. 1. 1. 1. 0. 1. 1. 1. 1.]
[1.  1. 1. 1. 0. 1. 0. 1. 0.]
[1.  1. 1. 0. 0. 1. 1. 1. 1.]
[1.  1. 1. 1. 1. 1. 0. 1. 0.]
[0.  1. 1. 1. 1. 1. 1. 1. 1.]]

## 标准化

from sklearn.preprocessing import StandardScaler
from numpy import set_printoptions
path = r'C:\pima-indians-diabetes.csv'
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
array = dataframe.values

data_scaler=StandardScaler().fit(array)
data_rescaled=data_scaler.transform(array)

set_printoptions(precision=2)
print ("\nRescaled data:\n", data_rescaled [0:5])

Rescaled data:
[[ 0.64 0.85  0.15  0.91 -0.69  0.2   0.47  1.43  1.37]
[-0.84 -1.12 -0.16  0.53 -0.69 -0.68 -0.37 -0.19 -0.73]
[ 1.23  1.94 -0.26 -1.29 -0.69 -1.1   0.6  -0.11  1.37]
[-0.84 -1.   -0.16  0.15  0.12 -0.49 -0.92 -1.04 -0.73]
[-1.14  0.5  -1.5   0.91  0.77  1.41  5.48 -0.02  1.37]]

## 标签编码

import numpy as np
from sklearn import preprocessing

input_labels=['red','black','red','green','black','yellow','white']

encoder=preprocessing.LabelEncoder()
encoder.fit(input_labels)

test_labels = ['green','red','black']
encoded_values = encoder.transform(test_labels)
print("\nLabels =", test_labels)
print("Encoded values =", list(encoded_values))
encoded_values = [3,0,4,1]
decoded_list = encoder.inverse_transform(encoded_values)

print("\nEncoded values =", encoded_values)
print("\nDecoded labels =", list(decoded_list))

Labels=['green', 'red', 'black']
Encoded values=[1, 2, 0]
Encoded values=[3, 0, 4, 1]
Decoded labels=['white', 'black', 'yellow', 'green']

Vue开发实战 -〔唐金州〕

Linux内核技术实战课 -〔邵亚方〕