当前位置 博文首页 > LY的博客:sklearn学习笔记之决策树分类和线性回归
decisoin tree:
# -*- coding: utf-8 -*- import sklearn from sklearn import tree import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split from sklearn import datasets import pandas as pd import numpy def getData_1(): iris = datasets.load_iris() X = iris.data #样本特征矩阵,150*4矩阵,每行一个样本,每个样本维度是4 y = iris.target #样本类别矩阵,150维行向量,每个元素代表一个样本的类别 df1=pd.DataFrame(X, columns =['SepalLengthCm','SepalWidthCm','PetalLengthCm','PetalWidthCm']) df1['target']=y return df1 df=getData_1() X_train, X_test, y_train, y_test = train_test_split(df.iloc[:,0:3],df['target'], test_size=0.3, random_state=42) print X_train, X_test, y_train, y_test model = tree.DecisionTreeClassifier(criterion='gini') #cart树 model.fit(X_train, y_train) model2= tree.DecisionTreeClassifier(criterion='entropy') #c4.5树 model2.fit(X_train, y_train) print 'cart树:{:.3f}'.format(model.score(X_test, y_test)) # 决策树 print 'c4.5树::{:.3f}'.format(model2.score(X_test, y_test))结果:输出的准确度
# -*- coding: utf-8 -*- import sklearn from sklearn.datasets.samples_generator import make_classification from sklearn.linear_model import LinearRegression import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split X, y = make_classification(n_samples=2400, n_features=5, n_informative=2, n_redundant=2, n_classes=2, n_clusters_per_class=2, scale=1.0, random_state=20) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) model = LinearRegression(fit_intercept=True, normalize=False, copy_X=True, n_jobs=1) model.fit(X_train, y_train) print 'FINISH' print model.score(X_train, y_train) # 线性回归:R square; 分类问题: acc print model.score(X_test, y_test) print X_train,y_train print X_test,y_testcs