"No one is harder on a talented person than the person themselves" - Linda Wilkinson ; "Trust your guts and don't follow the herd" ; "Validate direction not destination" ;

February 19, 2016

R Kaggle Exercise - Baby Names

#Download Data from https://www.kaggle.com/kaggle/us-baby-names
#Playing with source script from https://www.kaggle.com/jagelves/d/kaggle/us-baby-names/2014-popular-baby-names-by-state/files
#setwd - set current working directory
#load data
Names=read.csv("StateNames.csv")
#Pick Baby Boy Names
Names2014M=Names[Names$Year==2014 & Names$Gender=="M",]
#List and see few rows in output
head(Names2014M)
#Aggregate by state
df.agg = aggregate(Count~State,Names2014M,max)
Names2014Max=merge(df.agg,Names2014)
#Sort by A..Z
Names2014Max$State = factor(Names2014Max$State, levels = Names2014Max$State[order(Names2014Max$State)])
library(ggplot2)
#plot data
#Names2014Max - Data Frame
#aes - Generate aesthetic mappings of variables
#geom_tile - Tile plot as densely as possible, assuming that every tile is the same size.
#fill: internal colour
#Changing colors is easy. Simply provide different string or hex values in the #scale_fill_gradient function
ggplot(Names2014Max, aes(State, Name)) +
geom_tile(aes(fill = Count), colour = "black") +
scale_fill_gradient(low = "white", high = "blue")
view raw BabyNames.R hosted with ❤ by GitHub
Happy Learning!!!

No comments: