"No one is harder on a talented person than the person themselves" - Linda Wilkinson ; "Trust your guts and don't follow the herd" ; "Validate direction not destination" ;

January 13, 2020

Day #317 - Ensemble Methods

Summary of Ensemble Techniques, Bagging, Boosting code snippets

#https://scikit-learn.org/stable/modules/ensemble.html#forest
#Ensemble - combine several techniques
#averaging - baggining - randomforest
#boosting - seqiential build, combine weak classifiers - Adaboost, Gradient boosting
#voting classifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeClassifier
x_data = [[0,0],[1,1],[1,2],[0,4]]
y_data = [0,1,1,0]
rfit = RandomForestClassifier(n_estimators = 10)
rfit = rfit.fit(x_data,y_data)
scores = cross_val_score(rfit,x_data,y_data,cv=2)
print(scores.mean())
# The minimum number of samples required to split an internal node:
dtree = DecisionTreeClassifier(max_depth=None,min_samples_split=2,random_state=0)
dtree = dtree.fit(x_data,y_data)
scores = cross_val_score(dtree,x_data,y_data,cv=2)
print(scores.mean())
print(rfit.predict([[3,3]]))
print(dtree.predict([[3,3]]))
#boosting
from sklearn.ensemble import AdaBoostClassifier
adaboostmodel = AdaBoostClassifier(n_estimators = 10)
adaboostmodel = adaboostmodel.fit(x_data,y_data)
print(adaboostmodel.predict([[3,3]]))
from sklearn.ensemble import GradientBoostingClassifier
gbmodel = GradientBoostingClassifier(n_estimators = 10,learning_rate=1.0,max_depth=1,random_state=0)
gbmodel = gbmodel.fit(x_data,y_data)
print(gbmodel.predict([[3,3]]))
#VotingClassifier (with voting='hard') would classify based on the majority class label.
#If ‘hard’, uses predicted class labels for majority rule voting. Else if ‘soft’, predicts the class label based on the argmax of the sums of the predicted probabilities
from sklearn.ensemble import VotingClassifier
estimators = [('model1',rfit),('model2',dtree),('model3',adaboostmodel),('model4',gbmodel)]
votinghardmodel = VotingClassifier(estimators=estimators, voting = 'hard')
votinghardmodel = votinghardmodel.fit(x_data,y_data)
print(votinghardmodel.predict([[3,3]]))
votingsoftmodel = VotingClassifier(estimators=estimators, voting = 'soft')
votingsoftmodel = votingsoftmodel.fit(x_data,y_data)
print(votingsoftmodel.predict([[3,3]]))
Happy Learning!!!

No comments: