- Regression to find optimal values of 'X' values
- Add a constraint to make it an optimization problem
- Optimization with minimum expense for each track
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#https://raw.githubusercontent.com/justmarkham/scikit-learn-videos/master/data/Advertising.csv | |
#https://github.com/georgeblu1/Data-Projects/blob/master/Budget%20Optimization.ipynb | |
import pandas as pd | |
data = pd.read_csv(r'https://raw.githubusercontent.com/justmarkham/scikit-learn-videos/master/data/Advertising.csv') | |
data.head() | |
import pandas as pd | |
import numpy as np | |
import seaborn as sns | |
import matplotlib.pyplot as plt | |
import math | |
import statsmodels.api as sm | |
import statsmodels.formula.api as smf | |
from statsmodels.tools.eval_measures import rmse | |
from sklearn import metrics | |
from sklearn.linear_model import LinearRegression | |
from sklearn.linear_model import LogisticRegression | |
from sklearn.model_selection import train_test_split | |
#There should be a linear relationship between target and features. We can use scatterplot to visualize and validate for us. | |
sns.pairplot(data, x_vars=['TV','Radio','Newspaper'], y_vars='Sales', size = 4, aspect = 1) | |
#Little or no multicollinearity between features | |
sns.pairplot(data[['TV','Radio','Newspaper']]) | |
#Homoscedasticity | |
#OLS assumes all residuals drawn from population has constant variance | |
sns.residplot(x = data['TV'], y = data["Sales"]) | |
feature_cols = ['TV', 'Radio', 'Newspaper'] | |
X = data[feature_cols] | |
y = data[["Sales"]] | |
# instantiate and fit | |
SkLearn_model = LinearRegression() | |
SkLearn_result = SkLearn_model.fit(X, y) | |
# print the coefficients | |
print(SkLearn_result.intercept_) | |
print(SkLearn_result.coef_) | |
# include Newspaper | |
X = data[['TV', 'Radio', 'Newspaper']] | |
y = data.Sales | |
# Split data | |
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1) | |
# Instantiate model | |
lm2 = LinearRegression() | |
# Fit Model | |
lm2.fit(X_train, y_train) | |
# Predict | |
y_pred = lm2.predict(X_test) | |
# RMSE | |
print(np.sqrt(metrics.mean_squared_error(y_test, y_pred))) | |
print(metrics.r2_score(y_test,y_pred)) | |
coefficient = lm2.coef_ | |
coefficient | |
inter = lm2.intercept_ | |
inter | |
!pip install pulp | |
#Our budget should be less than 1000 | |
from pulp import * | |
prob = LpProblem("Ads Sales Problem", LpMaximize) | |
#x - tv | |
#y - radio | |
#z - newpaper | |
x = LpVariable("x", 0, 200) | |
y = LpVariable("y", 0, 500) | |
z = LpVariable("z", 0, 500) | |
prob += x + y + z <= 1000 | |
prob += 0.0548*x + 0.1022*y + 0.0007878*y + 4.6338 | |
status = prob.solve() | |
LpStatus[status] | |
print(prob) | |
for v in prob.variables(): | |
print(v.name, "=", v.varValue) | |
calculation = 0.0548*200 + 0.1022*500 + 0.0007878*0 + 4.6338 | |
calculation | |
prob = LpProblem("Ads Sales Problem with minium in each track", LpMaximize) | |
x = LpVariable("x", 50, 200) | |
y = LpVariable("y", 50, 500) | |
z = LpVariable("z", 50, 500) | |
prob += x + y + z <= 1000 | |
prob += 0.0548*x + 0.1022*y + 0.0007878*y + 4.6338 | |
status = prob.solve() | |
LpStatus[status] | |
print(prob) | |
for v in prob.variables(): | |
print(v.name, "=", v.varValue) | |
calculation = 0.0548*200 + 0.1022*500 + 0.0007878*50 + 4.6338 | |
calculation |
Keep Exploring!!!
No comments:
Post a Comment