Input is User-Items file as listed below
Sample Execution Command
Happy Learning!!!
Deep Learning - Machine Learning - Data(base), NLP, Video - SQL Learning's - Startups - (Learn - Code - Coach - Teach - Innovate) - Retail - Supply Chain
#Python Exercises | |
#Exercise #1 | |
#Save it to File test1.txt | |
#Input | |
#1 0 1 0 1 | |
#0 0 0 1 1 | |
#1 1 1 1 0 | |
#1 0 0 0 1 | |
#0 0 0 0 1 | |
#Output | |
#1 1 | |
#1 3 | |
#1 5 | |
#2 4 | |
#2 5 | |
#3 1 | |
#3 2 | |
#3 3 | |
#3 4 | |
#4 1 | |
#4 4 | |
#5 5 | |
import pandas as pd | |
import numpy as np | |
file = open('test1.txt','r') | |
filewrite = open('test1_output.txt','w') | |
a = 0 | |
for line in file: | |
a = a+1 | |
values = line.split('\t') | |
b = 1 | |
for value in values: | |
if(value=='1'): | |
filewrite.write(str(a) + '\t'+ str(b) + '\n') | |
b = b+1 | |
filewrite.close() | |
#Exercise 2 | |
#Parse test1.txt and do matrix manipulation | |
import pandas as pd | |
import numpy as np | |
data = pd.read_csv('test1.txt',header=None, sep = "\t") | |
data_matrix = np.matrix(data) | |
print(data_matrix.shape) | |
print(data_matrix.shape[0]) | |
print(data_matrix.shape[1]) | |
#Exercise 3 | |
#Compute row and column sum | |
import pandas as pd | |
import numpy as np | |
data = pd.read_csv('test1.txt',header=None, sep = "\t") | |
data_matrix = np.matrix(data) | |
print(data_matrix.shape) | |
print(data_matrix.shape[0]) | |
print(data_matrix.shape[1]) | |
#Parse Matrix sum of each column | |
for i in range(0,data_matrix.shape[0]): | |
sum = 0 | |
for j in range(0,data_matrix.shape[1]): | |
sum = sum+data_matrix[i,j] | |
print('row -',i+1,'-sum ',sum) | |
#Parse Matrix sum of each column | |
for i in range(0,data_matrix.shape[1]): | |
sum = 0 | |
for j in range(0,data_matrix.shape[0]): | |
sum = sum+data_matrix[j,i] | |
print('column-',i+1,'-sum ',sum) |
import pandas as pd | |
import numpy as np | |
df = pd.DataFrame({'A':[1.1,2.2,3.1],'B':[5.2,6.1,7.1],'C':[5.4,6.6,7.4]}) | |
print(df) | |
#Tip #1 | |
#Create matrix of float values | |
data_intermediate = df.astype(float) | |
data_matrix = np.matrix(data_intermediate) | |
print('matrix') | |
print(data_matrix) | |
#Tip #2 Compute Transpose | |
print('Transpose matrix') | |
print(np.transpose(data_matrix)) | |
#Tip #3 - Matrix Inverse | |
print('Inverse matrix') | |
print(data_matrix.I) | |
#Tip #4 - Identity Matrix | |
print('Identity Matrix') | |
print(np.identity(3)) | |
print('Identity Matrix X Data ') | |
print(np.identity(3)*data_matrix) | |
#Tip #5 - Eigen Values | |
print('Eigen Values') | |
print(np.linalg.eigvals(data_matrix)) | |
#Tip #6 - Eigen Vectors | |
w, v= np.linalg.eig(data_matrix) | |
print('Eigen Vector') | |
print(v) | |
#Tip #7 - svd | |
print('SVD') | |
print(np.linalg.svd(data_matrix)) |
import numpy as np | |
import pandas as pd | |
#Tip #1 | |
#Data frame to matrix | |
df = pd.DataFrame({'A':[1,2,3,4],'B':[5,6,7,8],'C':[5,6,7,8]}) | |
print(df) | |
#Tip #2 | |
#Standardize value in columns | |
df["A"] = (df["A"]-df["A"].mean())/np.std(df["A"]) | |
#Tip #3 | |
#Dynamically stardardize except last column | |
for col in df.columns[:-1]: | |
df[col] = (df[col] - df[col].mean())/ (np.std(df[col])) | |
print(df) | |
features = list(df.columns[:-1]) | |
#Tip #4 - Replance na values | |
df = df.fillna(-9999) | |
#Tip #5 | |
#Dynamically add columns | |
for i in range(0,2): | |
colname1 = str(5+i) | |
col1 = i | |
col2 = i+1 | |
print('colname', colname1) | |
print('col1', col1) | |
print('col2', col2) | |
if(col2 < 2): | |
df[colname1] = df[features[col1]]*df[features[col2]] | |
print('newly added columns') | |
print(df) |
import math | |
def sigmoid(x): | |
a = [] | |
for item in x: | |
a.append(1/(1+math.exp(-item))) | |
return a | |
import matplotlib.pyplot as plt | |
import numpy as np | |
x = np.arange(-70, 70,1) | |
sig = sigmoid(x) | |
plt.plot(x,sig) | |
plt.title('Sigmoid Weight 1') | |
plt.show() | |
x = np.arange(-70, 70,5) | |
sig = sigmoid(x) | |
plt.title('Sigmoid Weight 5') | |
plt.plot(x,sig) | |
plt.show() | |
x = np.arange(-70,70,100) | |
sig = sigmoid(x) | |
plt.title('Sigmoid Weight 100') | |
plt.plot(x,sig) | |
plt.show() |
#3 class classifier | |
import numpy as np | |
import matplotlib.pyplot as plt | |
from sklearn import linear_model, datasets | |
iris = datasets.load_iris() | |
#only two features taken | |
X = iris.data[:,:2] | |
Y = iris.target | |
#step size in mesh | |
h = 0.02 | |
logreg = linear_model.LogisticRegression(C=1e5) | |
#create instance of classifier and fit data | |
logreg.fit(X,Y) | |
#plot decision boundary and assign color for it | |
x_min, x_max = X[:,0].min()-0.5,X[:,0].max()+0.5 | |
y_min, y_max = X[:,1].min()-0.5,X[:,1].max()+0.5 | |
xx,yy = np.meshgrid(np.arange(x_min,x_max,h),np.arange(y_min,y_max,h)) | |
Z = logreg.predict(np.c_[xx.ravel(),yy.ravel()]) | |
#put the result to color plot | |
Z = Z.reshape(xx.shape) | |
plt.figure(1,figsize=(4,3)) | |
plt.pcolormesh(xx,yy,Z,cmap=plt.cm.Paired) | |
#plot also training points | |
plt.scatter(X[:,0],X[:,1],c=Y,edgecolors='k',cmap=plt.cm.Paired) | |
plt.xlabel('Sepal Length') | |
plt.ylabel('Sepal width') | |
plt.xlim(xx.min(),xx.max()) | |
plt.ylim(yy.min(),yy.max()) | |
plt.xticks(()) | |
plt.yticks(()) | |
plt.show() |
import pandas as pd | |
#Create Data Frame with few columns and list values | |
df = pd.DataFrame({'A':[1,2,3,4],'B':[5,6,7,8]}) | |
print(df) | |
#Drop a Column | |
df = df.drop('A',1) | |
print(df) | |
#Create column with int column names | |
df = pd.DataFrame({1:[1,2,3,4],2:[1,2,3,4],10:[1,2,3,4]}) | |
print(df) | |
df = df.drop(1,1) | |
print(df) | |
#Sample two rows from data frame | |
print(df.sample(n=2)) | |
#Create list with 56 values | |
a = [] | |
for i in range(1,56): | |
a.append(i) | |
import random | |
#Sample 15 random entries | |
feature2 = random.sample(a,15) | |
print('feature') | |
print(df) | |
#Find maximum occuring values row wise | |
print(df.mode(axis=1)) | |
For questions/feedback/career opportunities/training / consulting assignments/mentoring - please drop a note to sivaram2k10(at)gmail(dot)com
Coach / Code / Innovate