"No one is harder on a talented person than the person themselves" - Linda Wilkinson ; "Trust your guts and don't follow the herd" ; "Validate direction not destination" ;

January 09, 2018

Day #95 - Work Hacks - Machine Learning Labelling - Quick Work Hacks (Rows comma delimited text to columns)

#Task - Convert row into columns and create table
strdata = 'a,b,c,d,e,f,g,h,'
a = strdata.split(',')
for element in a:
print(element + str(' VARCHAR(100) NULL,'))
#a VARCHAR(100) NULL,
#b VARCHAR(100) NULL,
#c VARCHAR(100) NULL,
#Make The TSQL Statement
CREATE TABLE ATTRIBUTES (
a VARCHAR(100) NULL,
b VARCHAR(100) NULL,
c VARCHAR(100) NULL
)
#Task - One hot encoder
#Step 1 - Load Data in Temp Table
#Step 2 - Create table to store unique values
create table cities
(cityid int identity(1,1),
name varchar(50) null)
#Step 3 - Push all unique values in Another temp table
insert into cities
select distinct(city) from decision_tree_featuredata
#Step 4 - Create new column to update the label values
Alter table decision_tree_featuredata
add city_id int null
#Step 5 - Update all the cities with numbers
update dt
set dt.city_id = ct.cityid
from decision_tree dt join cities ct
on dt.city = ct.name
view raw workhack.py hosted with ❤ by GitHub

Happy Learning!!!

No comments: