Sei sulla pagina 1di 8

=================Introduction to R==================

========= R Console====================
#Simple assignment and arthematic
1+2
a = 2*2
a
#Two commnads seperated by semicolon
15.3 * 5; 3 * (4 + 5)
#If your cursor is next to the prompt sign,
you can use up and down arrows to go back to
previous commands.
#While typing commands, use the horizontal arrows
to move within the line.
#Comments are introduced by the hash sign (#). Everything on
a line that is preceded by # is not interpreted by R.
#creating a sequence with increments of 0.25 starting with -2
and ending with 1
seq(-2,1, by=0.25)
========= Creating Vectors and Matrices====================
#create a vector a with specific values of 3, 4, and 5
and access elements
a <- c(3,4,5)
a
a[2]

#create vector with three elements


#display the entire vector
#display the second element

#create a vector a with sequence


a = seq(-2,1, by=0.25)
a
#create a 10 by 1 vector of 1's
b = rep(1, 10)
b
#create a vector of numbers 1 to 20
b = c(1:20)

b
#To create a 3 by 2 matrix B and access elements
B = matrix(c(2, 4, 3, 1, 5, 7), nrow=3, ncol=2)
B
B[1,2] #display element in first row second coloumn
B[,1]
#display first coloumn
===========Vector operations===========================
a=c(1,2,3)
a
b=c(2,4,6)
b
a+b #elementwise addition
a*b #elementwise multiplication
a/b #elementwise division
a*5 #multiply all elements by 5
length(a)
sum(a)
mean(a)
c=c(1,5,9)
sd(c)

#number of elements
#sum of all elements
#mean of elements
#standard deviation of element

===========vectorized calculations=======================
#The apply command is often the most efficient way to do
vectorized calculations.
apply(c,2,mean)
apply(c,1,mean)
apply(c,1,sd)

#Here 2 says coloumn wise


#Here 1 says row wise

=================Concantenation=================================================
=====
#You can concatenate by column or by row with cbind and rbind commands
a<-c(3,4,5)
b<-c(10,11,12)
c<- cbind(a,b)
c
c<- rbind(a,b)
c
#To obtain the dimension of a matrix
dim(c)

==============Sampling=========================
a <- rnorm(1,mean=10, sd=1) # draw a scalar from the standard normal distributi
on
a
#Create 10 by 10 matrix with elements from normal distribution
b <- matrix(rnorm(100, mean=10, sd=2), nrow=10, ncol=10)
#If you want to sample from a given object use the command sample. Suppose
we want a sample of 10 ten numbers from b, our 10 by 10 matrix of random numbers
sample(b,10)

========================Data Frames=============================================
#A data frame is used for storing data tables. It is a list of vectors of equal
length.
For example, the following variable df is a data frame containing three vectors
n, s, b
n = c(2, 3, 5)
s = c("aa", "bb", "cc")
b = c(TRUE, FALSE, TRUE)
df = data.frame(n, s, b)
df
nrow(df)
ncol(df)
colnames(df)
df$n
#To acess coloumn named n
df[1,2]
#accessing just like matrices
names(df)<- c("Numbers", "Strings", "Boolean") #To set the coloumn manes
df
==========================Reading and writing Data=============================
================
#Reading in Data
hmnrghts <- read.table("C:/Users/IITH/Desktop/Dec2014/Data sets/1-Revision of R/
hmnrghts.txt", header=TRUE, na="NA")
#Remember you will have to specify the location of the file depending on where
you have it saved.
After reading in a data set, R will treat your data as a data frame.

>colnames(hmnrghts)
1. country,
2. democ, a scale rating the level of democracy in a country
3. sdnew, U.S. State Dept. scale of Political Terror
4. military, a dummy variable for a military regime
5. gnpcats, level of gnp in four categories
6. lpop, log of population
7. civ.war, a dummy variable for whether a country was involved in a civil war
8. int.war, a dummy variable for whether a country was involved in an internatio
nal war.

>head(hmnrghts)
read.table(file, header = FALSE, sep = "", skip, stringsAsFactors=TRUE)
file
header
sep

: The name of the file to import


: Logical, does the first row contain column labels
: Field separator character
sep=" "
space (default)
sep="\t" tab-delimited
sep=","
comma-separated
skip
: Number of lines to skip before reading data
stringsAsFactors : Logical, should character vectors be converted to factors
#By default R converts character string variables to factor variables.
Use stringsAsFactors to change the default
dataset = read.csv("C:/Users/IITH/Desktop/Dec2014/Data sets/1-Revision of R/Diam
onds.csv")
#Changing working directory
setwd("C:/Users/IITH/Desktop/Dec2014/Data sets/1-Revision of R/")
getwd()

#Writing csv file


write.csv(hmnrghts,"hmnrights.csv")
===============================Logical Statements===============================
==========
<
<=
>
>=

Less Than
Less Than or Equal To
Greater Than
Greater Than or Equal To

==
=
&
|

Equal To
Not Equal To
And
Or

res= hmnrghts$sdnew > 2


vector
table(res)

#compare every element with 2 and generate a logical

#Let's say instead of having the population of each country we wanted a 3 catego
ry ordinal level variable.
pop.3 <- hmnrghts$lpop
pop.3[hmnrghts$lpop <15.18] = 1
pop.3[hmnrghts$lpop >= 15.18 & hmnrghts$lpop < 17.11] = 2
pop.3[hmnrghts$lpop >= 17.11] <- 3
pop.3
==============================Recoding==========================================
========
pop <- exp(hmnrghts$lpop) #exp applied on every element.
#Any type of mathematical operator could be substituted in for exp

==================================Factors=======================================
=============
Categorical variables in R can be given a special designation as factors.
If you designate a categorical variable as a factor, R will treat it as such in
statistical
operation and create dummy variables for each level when it is used in a regres
sion.
If you import a variable with no numeric coding, R will automatically treat the
variable as a factor.
For example country in the data set is automatically treated as a factor.
levels(hmnrghts$country)
#To change which level is the first level (i.e. to change which category R will
use as the reference category in a regression) use the relevel command.
attach(hmnrghts)
levels(country)
country <- relevel(country, "united states")
levels(country)

==================================For Loop======================================
==============
#for (i in 1:10) {COMMANDS}
store <- rep(1,1000)

for (i in 1:1000){
a <- rnorm(i)
store[i] <- mean(a)
}
plot(store, type="o")
#Write a program to print multiplication table for 12
for (i in 1:20){
cat(i,"*",12,"=",i*12,"\n")
}
#Write a program to print 10th fibonanci number
current = 1;
previous = 1;
for ( i in 3: 10)
{
temp = current + previous
previous = current
current = temp
}
print(current)

===================================while loop===================================
==============
j <- 1
while(j <= 1000){
a <- rnorm(j)
store[j] <- mean(a)
j <- j + 1
}

===================================list=========================================
================
foo <- list( str='R', vec=c(1,2,3), bool=TRUE )
>foo[[2]]
#return the second element in list which is a vector
>foo[[3]]
#return the third element in list which is a boolean

===================================Functions====================================
==============
#Functions in R
#myfunction <- function(arg1, arg2, ... ){statements,return(object)}

mysum <- function(x) {return(x + x)}


priceCalculator <- function(hours, pph=40){
net.price <- hours * pph
if(hours > 100) {
net.price <- net.price * 0.9
}
else {
net.price <- net.price * 0.8
}
return(round(net.price))
}

#Loading functions from a file


source("filename.r")
#Write a function to calculate kth fibonanci number
fibk <-function(k){
if(k <3) return(1)
current = 1;
previous = 1;
for ( i in 3: k)
{
temp = current + previous
previous = current
current = temp
}
return(current)
}
===================================Saving and Loading R Data===================
==============
save(store, file="dumData.Rdata")
rm(store)
ls()
load("dumData.Rdata")
=====================================Import/Export via ODBC=====================
==============
library(RODBC)
connection <- odbcConnect(dsn="servername",uid="userid",pwd="******")
query <- "SELECT * FROM lib.table WHERE ..."
myData <- sqlQuery(connection, query, errors=TRUE)
odbcClose(connection)
=====================================Graphics===================================
===============
#Most graphing in R is done using the plot() command. plot() is a generic functi
on which means that it changes its behaviour depending on the arguments you spec
ify.

hmnrghts <- read.table("C:/Users/IITH/Desktop/Dec2014/Data sets/1-Revision of R/


hmnrghts.txt", header=TRUE, na="NA")
plot(lpop, sdnew)
plot(lpop,jitter(sdnew))
plot(lpop,jitter(sdnew), xlab="Log of Population",ylab="Human Rights Violations"
,main="Human Rights Violations by Population")
#we use text() to write text at a given position in the plotting region
plot(5,4)
text(5,4,"Your Name")

#Let's say we wanted multiple graphs on a single page in order to make compariso
ns
par(mfrow=c(2,1))
plot(lpop, sdnew)
plot(lpop,jitter(sdnew))
============================Save Charts into Files==============================
===============
If there are many graphs produced in data exploration, a good practice is to sav
e them into files.
R provides a variety of functions for that purpose. Below are examples of saving
charts into PDF
and PS files respectively with pdf() and postscript(). Picture files of BMP, JPE
G, PNG and
TIFF formats can be generated respectively with bmp(), jpeg(), png() and tiff().
Note that
the files (or graphics devices) need be closed with graphics.off() or dev.off()
after plotting.
pdf("myPlot.pdf")
x <- 1:50
plot(x, log(x))
graphics.off()
postscript("myPlot2.ps")
x <- -20:20
plot(x, x^2)
graphics.off()

Potrebbero piacerti anche