���Ľ�������л�����Ķ�
基本绘图
import matplotlib as mpl
import matplotlib.pyplot as plt
x = np.linspace(start=-3, stop=3, num=1001, dtype=np.float)
x1=x.reshape(1,1001)
zero= np.zeros((1,1001))
y_relu=np.max(np.vstack((x1,zero)),axis=0)
y_logit=1/(1+np.exp(-x))
y_tanh=(np.exp(x)-np.exp(-x))/(np.exp(x)+np.exp(-x))
plt.figure(figsize=(8,6))
plt.ylim((-1, 1))
plt.plot(x,y_logit,'r-',label='LogisticLoss',linewidth=2)
plt.plot(x,y_tanh,'g-',label='TanhLoss',linewidth=2)
plt.plot(x,y_relu,'b-',label='ReluLoss',linewidth=2)
plt.title("Lossfunction",fontsize=18)
plt.grid()
plt.legend(loc='upperright')
plt.show()
plt.savefig('1.png')
直方图、箱图
import seaborn
data=pd.read_csv('student-por.csv',delimiter=";")
df=pd.DataFrame(data)
plt.hist(df.loc[:,"G1"],bins=19)
plt.xlabel('Performance',fontsize=18)
plt.ylabel('Num of Students',fontsize=18)
plt.title('Histogram of {0}'.format('G1'),fontsize=18)
plt.show()
df.boxplot(column=["G1"],by="Medu")
plt.show()
条形图、核密度估计图
s=pd.Series(df.loc[:,'sex'])
s=s.value_counts()
s=s.sort_index(axis=0)
s.plot(kind='barh')
plt.ylabel('SEX')
plt.show()
df.loc[:,['G1','G2','G3']].plot(kind='kde')
plt.show()
#kind= line 线图
pie 饼图
bar 垂直条形图
barh 水平条形图
kde 核密度估计
hist 直方图
box 箱图
scikit**-learn**: 预处理
http://scikit-learn.org/stable/
import sklearn
#正则化/二值化
preprocessing.normalize(X, norm='l2')
preprocessing.Binarizer(copy=True, threshold=0.0).fit(X)
preprocessing.OneHotEncoder()
#缺失值处理
imp = Imputer(missing_values='NaN', strategy='mean', axis=0)
imp.fit(X)
#from (X1, X2) to (1, X1, X2, X1^2, X1X2, X2^2)
poly = PolynomialFeatures(2)
poly.fit_transform(X)
http://scikit-learn.org/stable/
#分隔数据集
X_train, X_test, y_train, y_test = \
train_test_split(X, y, test_size=0.4)
clf=LogisticRegression()
clf.fit(X_train, y_train)
#预测
clf.predict(X_train)
clf.predict_proba(X_test)
#各种分类器
KNeighborsClassifier(3)
SVC(kernel="linear", C=0.025)
DecisionTreeClassifier(max_depth=5)
RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1)
AdaBoostClassifier()
MLPClassifier(solver='lbfgs',activation='relu', alpha=1e-5,hidden_layer_sizes=(5, 2))