<- function(number1, number2){
add_two_numbers return(number1 + number2)
}
add_two_numbers(number1 = 2, number2 = 9)
[1] 11
# We can call function without argument names but it is not good practice e.g.
add_two_numbers(2, 9)
[1] 11
Premise for Inverse CDF method: When you want random samples from a distribution.
Here we use examples from distributions that are easy to sample from so we can compare our results with existing functions. In reality this method would be used for uncommon distributions.
First, let’s recall/introduce how we can make our own functions in in R.
Toy example:
<- function(number1, number2){
add_two_numbers return(number1 + number2)
}
add_two_numbers(number1 = 2, number2 = 9)
[1] 11
# We can call function without argument names but it is not good practice e.g.
add_two_numbers(2, 9)
[1] 11
Before we start generating random numbers, let’s set the seed to make our experiments reproducible
set.seed(345454)
What is the inverse CDF of an exponential distribution? Uh oh. How can we calculate this? R has functions that compute quantiles, but quantiles return the same value as inverse cdfs.
For example, if \(X \sim Exp(0.1)\), then number \(c\) such that \(Pr(X < c) = 0.6\) is \(F^{-1}(0.6)\) and can be obtained by the following R code
qexp(0.6, rate = 0.1)
[1] 9.162907
Equipped with this observation, let’s compute
<- function(num_draws, rate = 1){
rexp_isi_buds <- runif(num_draws) # Random draws from uniform distirbution
runif_draws return(qexp(runif_draws, rate = rate)) # Plug draws into cdf
}
<- rexp_isi_buds(100000, rate = 0.1) my_exp_sample
We can plot our observations to visually asses that we did it correctly. Note the difference in default bin sizes for the two plotting methods.
# base R plot for reference
hist(my_exp_sample)
# ggplot
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.2 ✔ readr 2.1.4
✔ forcats 1.0.0 ✔ stringr 1.5.0
✔ ggplot2 3.4.2 ✔ tibble 3.2.1
✔ lubridate 1.9.2 ✔ tidyr 1.3.0
✔ purrr 1.0.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data.frame("my_exp_sample" = my_exp_sample) %>% # Need data frame for ggplot()
ggplot(aes(x = my_exp_sample)) +
geom_histogram() +
theme_bw()# Try changing the number of bins by setting bins = 10
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Let’s compare our simulations with simulations produced by the R function rexp()
<- rexp(100000, 0.1)
my_exp_sample2
qqplot(my_exp_sample, my_exp_sample2)
abline(0,1)
We can also compare empirical and theoretical means and standard deviations
# Parentheses tell R to print the value being stored
<- mean(my_exp_sample)) (emp_exp_mean
[1] 9.99156
<- 1/0.1) (theor_exp_mean
[1] 10
<- sd(my_exp_sample)) (emp_exp_sd
[1] 10.07315
<- sqrt(1/0.1^2)) (theor_exp_sd
[1] 10
Write a function to generate realizations of a normal random variable with a user specified mean and variance. Compare results of your simulations and simulation produced using rnorm().
<- function(num_draws, mean = 0, sd = 1){
rnorm_isi_buds <- runif(num_draws)
runif_draws return(qnorm(runif_draws, mean = mean, sd = sd))
}
<- rnorm_isi_buds(1000, mean = 2.3, sd = 1.2)
my_norm_sample
hist(my_norm_sample)
Let’s compare our simulations with simulations produced by the R function rnorm()
<- rnorm(1000, mean = 2.3 , sd = 1.2)
my_norm_sample2
qqplot(my_norm_sample, my_norm_sample2)
abline(0,1)
We can also compare empirical and theoretical means and standard deviations
<- mean(my_norm_sample)) (emp_norm_mean
[1] 2.283325
<- 2.3) (theor_norm_mean
[1] 2.3
<- sd(my_norm_sample)) (emp_norm_sd
[1] 1.221574
<- 1.2) (theor_sd
[1] 1.2
<- function(success_prob){
rbernoulli <- 0
return_value
if (runif(1) < success_prob){
<- 1 # else statement is not necessary, because we set return_value to 0 from the start
return_value
}
return(return_value)
}
Let’s test our function
<- 1000
num_sim <- numeric(num_sim)
my_coins <- 0.24
my_prob
for (i in 1:num_sim){
<- rbernoulli(my_prob)
my_coins[i]
}
mean(my_coins)
[1] 0.245
Modify the rbernoulli()
function so it has 2 arguments: the number of realizations to draw and success probability
First, using a for loop
<- function(num_draws, success_prob){
rbernoulli_for_loop <- numeric(num_draws) # Creates a double-precision vector of the specified length with each element equal to 0.
return_vector
<- runif(num_draws)
my_unif
for (i in 1:num_draws) {
if (my_unif[i] < success_prob) {
<- 1
return_vector[i]
}
}
return(return_vector)
}
Now using vectorized functions
<- function(num_draws, success_prob){
rbernoulli_no_for_loop
<- runif(num_draws)
my_unif
<- ifelse(my_unif < success_prob, yes = 1, no = 0)
return_vector
return(return_vector)
}
<- rbernoulli_for_loop(10000, 0.44)
test_coins1 <- rbernoulli_no_for_loop(10000, 0.44)
test_coins2
mean(test_coins1)
[1] 0.4391
mean(test_coins2)
[1] 0.4415
Both function definitions work, but R is notoriously slow with loops.
Write a function to simulate from a discrete random variable taking values 1, 2, 3 with probabilities p1, p2, p3, supplied by the user of the function. You could do it with a sample()
function, but we don’t want you to use it.
<- function(p1, p2, p3){
rdiscr3 if (abs(1 - p1 - p2 - p3) > 10^-7) { # 10^-7 used to approximate 0
stop("probabilities must add to 1")
}
<- 3
return_value
<- runif(1)
my_unif
if (my_unif < p1) {
<- 1
return_value else if (my_unif < p1 + p2) {
} <- 2
return_value
}
return(return_value)
}
The code below should help you test your function. Name your function rdiscr3
<- numeric(num_sim)
test_vector
for (i in 1:num_sim) {
<- rdiscr3(0.1, 0.2, 0.7)
test_vector[i]
}
# Calculate empirical proportions
sum(ifelse(test_vector == 1, yes = 1, no = 0)) / num_sim
[1] 0.105
sum(ifelse(test_vector == 2, yes = 1, no = 0)) / num_sim
[1] 0.213
sum(ifelse(test_vector == 3, yes = 1, no = 0)) / num_sim
[1] 0.682