特徵工程 特徵選擇 reliefF演算法
return X[:, self.top_features[self.n_features_to_keep]]
return X[:, :self.top_features[self.n_features_to_keep]]
- 匯入環境
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import sklearn
import pandas as pd
import os
import sys
import time
for module in mpl, np, pd, sklearn:
print(module.__name__, module.__version__)
sys.version_info(major=3, minor=7, micro=7, releaselevel=‘final’, serial=0)
matplotlib 3.3.1
numpy 1.19.1
pandas 1.1.1
sklearn 0.23.2
- 資料集
from sklearn.datasets import make_friedman1
from sklearn.feature_selection import RFE
from sklearn.svm import SVR
X, y = make_friedman1(n_samples=5000, n_features=100, random_state=0)
n_features_to_keep = 10
- reliefF
from sklearn.neighbors import KDTree
class ReliefF(object):
"""Feature selection using data-mined expert knowledge.
Based on the ReliefF algorithm as introduced in:
Kononenko, Igor et al. Overcoming the myopia of inductive learning algorithms with RELIEFF (1997), Applied Intelligence, 7(1), p39-55
def __init__(self, n_neighbors=100, n_features_to_keep=n_features_to_keep):
"""Sets up ReliefF to perform feature selection.
n_neighbors: int (default: 100)
The number of neighbors to consider when assigning feature importance scores.
More neighbors results in more accurate scores, but takes longer.
self.feature_scores = None
self.top_features = None
self.tree = None
self.n_neighbors = n_neighbors
self.n_features_to_keep = n_features_to_keep
def fit(self, X, y):
"""Computes the feature importance scores from the training data.
X: array-like {n_samples, n_features}
Training instances to compute the feature importance scores from
y: array-like {n_samples}
Training labels
self.feature_scores = np.zeros(X.shape[1])
self.tree = KDTree(X)
for source_index in range(X.shape[0]):
distances, indices = self.tree.query(X[source_index].reshape(1, -1), k=self.n_neighbors + 1)
# First match is self, so ignore it
for neighbor_index in indices[0][1:]:
similar_features = X[source_index] == X[neighbor_index]
label_match = y[source_index] == y[neighbor_index]
# If the labels match, then increment features that match and decrement features that do not match
# Do the opposite if the labels do not match
if label_match:
self.feature_scores[similar_features] += 1.
self.feature_scores[~similar_features] -= 1.
self.feature_scores[~similar_features] += 1.
self.feature_scores[similar_features] -= 1.
self.top_features = np.argsort(self.feature_scores)[::-1]
def transform(self, X):
"""Reduces the feature set down to the top `n_features_to_keep` features.
X: array-like {n_samples, n_features}
Feature matrix to perform feature selection on
X_reduced: array-like {n_samples, n_features_to_keep}
Reduced feature matrix
return X[:, self.top_features[:self.n_features_to_keep]]
rel = ReliefF()
rel.fit(X, y)
[99 36 26 27 28 29 30 31 32 33 34 35 37 98 38 39 40 41 42 43 44 45 46 47
25 24 23 22 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
21 48 49 50 75 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
96 97 76 74 51 73 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
70 71 72 0]
estimator = SVR(kernel="linear")
rfe = RFE(estimator, n_features_to_select=n_features_to_keep, step=5)
rfe = rfe.fit(X, y)
[ 1 1 19 1 1 13 12 4 14 7 1 10 8 18 7 19 14 5 10 4 13 14 7 17
3 16 18 8 3 8 6 6 13 18 2 5 12 17 12 2 17 10 9 11 7 15 9 16
9 2 8 4 18 5 15 4 2 6 3 9 10 2 1 1 4 16 12 11 13 11 8 11
5 17 6 1 1 16 19 13 19 19 15 3 11 1 14 12 10 6 17 7 14 18 3 15
16 9 15 5]
- 特徵工程之特徵選擇特徵工程
- 專欄 | 基於 Jupyter 的特徵工程手冊:特徵選擇(五)特徵工程
- 專欄 | 基於 Jupyter 的特徵工程手冊:特徵選擇(四)特徵工程
- 專欄 | 基於 Jupyter 的特徵工程手冊:特徵選擇(一)特徵工程
- 專欄 | 基於 Jupyter 的特徵工程手冊:特徵選擇(二)特徵工程
- 專欄 | 基於 Jupyter 的特徵工程手冊:特徵選擇(三)特徵工程
- xgboost 特徵選擇,篩選特徵的正要性特徵
- 特徵選擇和特徵生成問題初探特徵
- 用遺傳演算法進行特徵選擇演算法特徵
- Relief 特徵選擇演算法簡單介紹特徵演算法
- 【特徵工程】(資料)使用Xgboost篩選特徵重要性特徵工程
- 特徵工程之特徵表達特徵工程
- 特徵工程特徵工程
- RF、GBDT、XGboost特徵選擇方法特徵
- 特徵選擇技術總結特徵
- 機器學習中,有哪些特徵選擇的工程方法?機器學習特徵
- xgboost 特徵重要性選擇 / 看所有特徵哪個重要特徵
- 08 特徵工程 - 特徵降維 - LDA特徵工程LDA
- 特徵工程之特徵預處理特徵工程
- 特徵工程:互動特徵與多項式特徵理解特徵工程
- 特徵工程思路特徵工程
- [特徵工程] encoding特徵工程Encoding
- 特徵工程梗概特徵工程
- 決策樹模型(2)特徵選擇模型特徵
- 基於條件熵的特徵選擇熵特徵
- 特徵工程系列:(三)特徵對齊與表徵特徵工程
- 最核心的特徵工程方法-分箱演算法特徵工程演算法
- ch11 特徵選擇與稀疏學習特徵
- 【原始碼】MATLAB特徵選擇函式庫version 6.2.2018.1原始碼Matlab特徵函式
- 決策樹中結點的特徵選擇方法特徵
- XGBoost 輸出特徵重要性以及篩選特徵特徵
- 機器學習 | 特徵工程機器學習特徵工程
- 機器學習——特徵工程機器學習特徵工程
- 機器學習特徵工程機器學習特徵工程
- 資料分析特徵工程方法特徵工程
- 用Dask並行化特徵工程!並行特徵工程
- 一文讀懂特徵工程特徵工程
- 機器學習之特徵工程機器學習特徵工程