"No one is harder on a talented person than the person themselves" - Linda Wilkinson ; "Trust your guts and don't follow the herd" ; "Validate direction not destination" ;

June 04, 2023

Forecast + Optimization

  • Regression to find optimal values of 'X' values
  • Add a constraint to make it an optimization problem
  • Optimization with minimum expense for each track

#https://raw.githubusercontent.com/justmarkham/scikit-learn-videos/master/data/Advertising.csv
#https://github.com/georgeblu1/Data-Projects/blob/master/Budget%20Optimization.ipynb
import pandas as pd
data = pd.read_csv(r'https://raw.githubusercontent.com/justmarkham/scikit-learn-videos/master/data/Advertising.csv')
data.head()
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import math
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.tools.eval_measures import rmse
from sklearn import metrics
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
#There should be a linear relationship between target and features. We can use scatterplot to visualize and validate for us.
sns.pairplot(data, x_vars=['TV','Radio','Newspaper'], y_vars='Sales', size = 4, aspect = 1)
#Little or no multicollinearity between features
sns.pairplot(data[['TV','Radio','Newspaper']])
#Homoscedasticity
#OLS assumes all residuals drawn from population has constant variance
sns.residplot(x = data['TV'], y = data["Sales"])
feature_cols = ['TV', 'Radio', 'Newspaper']
X = data[feature_cols]
y = data[["Sales"]]
# instantiate and fit
SkLearn_model = LinearRegression()
SkLearn_result = SkLearn_model.fit(X, y)
# print the coefficients
print(SkLearn_result.intercept_)
print(SkLearn_result.coef_)
# include Newspaper
X = data[['TV', 'Radio', 'Newspaper']]
y = data.Sales
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
# Instantiate model
lm2 = LinearRegression()
# Fit Model
lm2.fit(X_train, y_train)
# Predict
y_pred = lm2.predict(X_test)
# RMSE
print(np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
print(metrics.r2_score(y_test,y_pred))
coefficient = lm2.coef_
coefficient
inter = lm2.intercept_
inter
!pip install pulp
#Our budget should be less than 1000
from pulp import *
prob = LpProblem("Ads Sales Problem", LpMaximize)
#x - tv
#y - radio
#z - newpaper
x = LpVariable("x", 0, 200)
y = LpVariable("y", 0, 500)
z = LpVariable("z", 0, 500)
prob += x + y + z <= 1000
prob += 0.0548*x + 0.1022*y + 0.0007878*y + 4.6338
status = prob.solve()
LpStatus[status]
print(prob)
for v in prob.variables():
print(v.name, "=", v.varValue)
calculation = 0.0548*200 + 0.1022*500 + 0.0007878*0 + 4.6338
calculation
prob = LpProblem("Ads Sales Problem with minium in each track", LpMaximize)
x = LpVariable("x", 50, 200)
y = LpVariable("y", 50, 500)
z = LpVariable("z", 50, 500)
prob += x + y + z <= 1000
prob += 0.0548*x + 0.1022*y + 0.0007878*y + 4.6338
status = prob.solve()
LpStatus[status]
print(prob)
for v in prob.variables():
print(v.name, "=", v.varValue)
calculation = 0.0548*200 + 0.1022*500 + 0.0007878*50 + 4.6338
calculation

Keep Exploring!!!

No comments: