11.13.缺失值处理

# -*- coding: utf-8 -*-
"""
Created on Mon Jun 29 10:13:47 2020

@author: two
"""


import numpy as np
from toolkit import H
from sklearn.impute import SimpleImputer, MissingIndicator, KNNImputer

SimpleImputer

imp_mean = SimpleImputer(missing_values=np.nan, strategy='mean')
H.get_param(MissingIndicator)

X = np.array([[7, 2, 3], [4, np.nan, 6], [10, 5, 9]])

imp_mean.fit(X)
print(imp_mean.transform(X))
(missing_values=nan, features='missing-only', sparse='auto',
error_on_new=True)
[[ 7.   2.   3. ]
 [ 4.   3.5  6. ]
 [10.   5.   9. ]]

MissingIndicator

X1 = np.array([[np.nan, 1, 3],
               [4, 0, np.nan],
               [8, 1, 0]])

X2 = np.array([[5, 1, np.nan],
               [np.nan, 2, 3],
               [2, 4, 0]])

indicator = MissingIndicator(features="all")
indicator.fit(X1)

X2_tr = indicator.transform(X2)
X2_tr
array([[False, False,  True],
       [ True, False, False],
       [False, False, False]])

KNNImputer

X = np.array([[1, 2, np.nan], [3, 4, 3], [np.nan, 6, 5], [8, 8, 7]])
H.get_param(KNNImputer)

imputer = KNNImputer(n_neighbors=2)
imputer.fit_transform(X)
(missing_values=nan, n_neighbors=5, weights='uniform',
metric='nan_euclidean', copy=True, add_indicator=False)
array([[1. , 2. , 4. ],
       [3. , 4. , 3. ],
       [5.5, 6. , 5. ],
       [8. , 8. , 7. ]])