• 为了保证你在浏览本网站时有着更好的体验，建议使用类似Chrome、Firefox之类的浏览器~~
• 如果你喜欢本站的内容何不Ctrl+D收藏一下呢，与大家一起分享各种编程知识~
• 本网站研究机器学习、计算机视觉、模式识别~当然不局限于此，生命在于折腾，何不年轻时多折腾一下

# 特征选择（6）-嵌入式选择(embedded)

10个月前 (08-21) 1049次浏览

sklearn 相关介绍

Linear models penalized with the L1 norm have sparse solutions: many of their estimated coefficients are zero. When the goal is to reduce the dimensionality of the data to use with another classifier, they can be used along with feature_selection.SelectFromModel to select the non-zero coefficients. In particular, sparse estimators useful for this purpose are the linear_model.Lasso for regression, and of linear_model.LogisticRegression and svm.LinearSVC for classification:

svc 剖析

from sklearn.feature_selection import  SelectFromModel
from sklearn.svm import  LinearSVC
mod=LinearSVC(C=0.01, penalty="l1", dual=False).fit(irisdata.data,irisdata.target)
selectmod=SelectFromModel(mod, prefit=True)
selectmod.transform(irisdata.data)


array([[ 5.1,  3.5,  1.4],
[ 4.9,  3. ,  1.4],
[ 4.7,  3.2,  1.3],
[ 4.6,  3.1,  1.5],
[ 5. ,  3.6,  1.4],
[ 5.4,  3.9,  1.7],
[ 4.6,  3.4,  1.4],
[ 5. ,  3.4,  1.5],
[ 4.4,  2.9,  1.4],


lasso 剖析

from sklearn.linear_model import LassoCV
lassomodel=LassoCV()
selectmod1=SelectFromModel(lassomodel,threshold=0.1)
selectmod1.fit(irisdata.data,irisdata.target)
selectmod1.transform(irisdata.data)


array([[ 1.4,  0.2],
[ 1.4,  0.2],
[ 1.3,  0.2],
[ 1.5,  0.2],
[ 1.4,  0.2],
[ 1.7,  0.4],
[ 1.4,  0.3],
[ 1.5,  0.2],


lr 剖析

from sklearn.linear_model import  LogisticRegressionCV
lrmodel=LogisticRegressionCV(penalty='l1',solver='liblinear')
selectmod2=SelectFromModel(lrmodel,threshold=10)
selectmod2.fit(irisdata.data,irisdata.target)
selectmod2.transform(irisdata.data)


array([[ 1.4,  0.2],
[ 1.4,  0.2],
[ 1.3,  0.2],
[ 1.5,  0.2],
[ 1.4,  0.2],
[ 1.7,  0.4],
[ 1.4,  0.3],
[ 1.5,  0.2],
[ 1.4,  0.2],
[ 1.5,  0.1],
[ 1.5,  0.2],


# -*- coding: utf-8 -*-
from sklearn.linear_model import LogisticRegression
from sklearn.feature_selection import SelectFromModel

class LR(LogisticRegression):
def __init__(self, threshold=0.01, dual=False, tol=1e-4, C=1.0,
fit_intercept=True, intercept_scaling=1, class_weight=None,
random_state=None, solver='liblinear', max_iter=100,
multi_class='ovr', verbose=0, warm_start=False, n_jobs=1):

#权值相近的阈值
self.threshold = threshold
LogisticRegression.__init__(self, penalty='l1', dual=dual, tol=tol, C=C,
fit_intercept=fit_intercept, intercept_scaling=intercept_scaling, class_weight=class_weight,
random_state=random_state, solver=solver, max_iter=max_iter,
multi_class=multi_class, verbose=verbose, warm_start=warm_start, n_jobs=n_jobs)
#使用同样的参数创建 L2 逻辑回归
self.l2 = LogisticRegression(penalty='l2', dual=dual, tol=tol, C=C, fit_intercept=fit_intercept, intercept_scaling=intercept_scaling, class_weight = class_weight, random_state=random_state, solver=solver, max_iter=max_iter, multi_class=multi_class, verbose=verbose, warm_start=warm_start, n_jobs=n_jobs)

def fit(self, X, y, sample_weight=None):
#训练 L1 逻辑回归
super(LR, self).fit(X, y, sample_weight=sample_weight)
self.coef_old_ = self.coef_.copy()
#训练 L2 逻辑回归
self.l2.fit(X, y, sample_weight=sample_weight)

cntOfRow, cntOfCol = self.coef_.shape
#权值系数矩阵的行数对应目标值的种类数目
for i in range(cntOfRow):
for j in range(cntOfCol):
coef = self.coef_[i][j]
#L1 逻辑回归的权值系数不为 0
if coef != 0:
idx = [j]
#对应在 L2 逻辑回归中的权值系数
coef1 = self.l2.coef_[i][j]
for k in range(cntOfCol):
coef2 = self.l2.coef_[i][k]
#在 L2 逻辑回归中，权值系数之差小于设定的阈值，且在 L1 中对应的权值为 0
if abs(coef1-coef2) < self.threshold and j != k and self.coef_[i][k] == 0:
idx.append(k)
#计算这一类特征的权值系数均值
mean = coef / len(idx)
self.coef_[i][idx] = mean
return self

def main():
print SelectFromModel(LR(threshold=0.5,C=0.1),threshold=1).fit_transform(iris.data,iris.target)

if __name__ == '__main__':
main()


Deeplearn, 版权所有丨如未注明 , 均为原创丨本网站采用BY-NC-SA协议进行授权 , 转载请注明特征选择（6）-嵌入式选择(embedded)

• 版权声明

本站的文章和资源来自互联网或者站长
的原创，按照 CC BY -NC -SA 3.0 CN
协议发布和共享，转载或引用本站文章
应遵循相同协议。如果有侵犯版权的资
源请尽快联系站长，我们会在24h内删
除有争议的资源。
• 网站驱动

• 友情链接

• 支持主题

邮箱：service@deeplearn.me