# To know the working directory which is assigned by default
getwd()
# set the working directory from where you would like to take the files
setwd("C:/Mywork/MyLearning/MyStuddocs_UrbanPro/Data") # Assign the path as per the location where you want to allocate
getwd()
# to see the list of files in your working directory- just assigned above
dir() ## Lists files in the working directory
# Creating a folder in C drive
dir.create("C:/Mywork/MyLearning/MyStuddocs_UrbanPro/Data/Nov26")
#install.packages("car")
#install.packages("Hmisc")
#install.packages("reshape")
#install.packages('pastecs')
#install.packages('gtools')
#install.packages('gmodels')
#install.packages('caret')
#install.packages('MASS')
##-----------------------------------------------------------
## Load required libraries
##-----------------------------------------------------------
# calling the libraries in each active session is very much required
#if we want to use the functions in the library
library(foreign)
library(MASS) # for stepAIC()
library(Hmisc) # for describe()
library(boot)
library(pastecs) # for stat.desc()
library(gmodels)
library(gtools)
library(lattice)
library(ggplot2)
library(caret)
library(car)
library(foreign)
library(reshape)
library(Hmisc)
version # to check what version u are using
# import world data set
world
dim(world) # check how many rows and columns
View(world) # to View the data frame
trans<-read.csv("TransactionMaster.csv")
View(trans)
cust<-read.csv("CustomerMaster.csv")
View(cust)
dim(cust)
str(cust) # to check the structure/meta data of the data frame
# carbon copy of the file
cust_copy<-cust[,]
#save as a R file
saveRDS(cust_copy,"C:/Mywork/MyLearning/MyStuddocs_UrbanPro/Data/customerdata")
# take a sample of 100 rows and all the columns and create a sample file
# 1:100 stands for 100 rows and after comma blank means all columns to pick up
cust_sample<-cust[1:100,]
dim(cust_sample)
# take all the rows and specific columns from teh source file "cust"
samplefile
# take all rows and specific column numbers 1,8,9
samplefile
# do the frequency distribution of the City variable
table(cust$City)
# do a cross table freqency distribution of City and State variable
table(cust$State,cust$City )
table(world$deathCat, world$birthCat)
# calculate average value of energy_use_percapita variable from the world
mean(world$energy_use_percapita, na.rm=T)
#calculate median value of gni_per_capita
median(world$gni_per_capita) # 50th percentile
# to check the type of the R objects
class(world)
class(cust)
class(trans)
is.vector(world)
is.factor(world)
is.data.frame(world)
is.matrix(cust)
length(world) # display the number of cloumns : partcularly use for vectors
head(trans) # display first 6 rows in console
head(trans, n = 2) # Display top 2 rows
tail(trans) # display last 6 rows of a data frame
tail(trans,n=1)
firstfewrows
View(firstfewrows)
# to store the country names in lower case letters
world$country_name<-tolower(world$country_name)
# dropping the first column from a data frame and create a new one
world_1<-world[,-c(1)]
# filter out the atlanta customers
atlantaCustomers
# filter out atlanta or hollywood customers : | OR operator & AND opearator
atlantaHollyCustomers <-cust[which(cust$City == "ATLANTA" | cust$City == "HOLLYWOOD" ) , ]
## Selecting specific cloumns
atlantaCustomers1
# filtering out data with multiple conditions
highSales_mod<-trans[which(trans$Sales_Amount >= 100 & trans$Sales_Amount <= 150 ),]
max(highSales_mod$Sales_Amount)
min(highSales_mod$Sales_Amount)
###------------------------------------------------------------
### Basic Date functions in R
###------------------------------------------------------------
Sys.Date() # Current date
today
class(today)
Sys.time() # Current date and time with time zone
time<-Sys.time()
class(time)