## The objective of this simulation is to prove the following hypothesis:
## In a regression model, when you have predictors with different standard deviations, the standardized coefficients will be no longer reliable

# repeat the simulation N times
N = 1000

results = replicate(N, {
  # create 2 variables x1 and x2, normally distributed with mean 0 with the only difference that x2 has a larger standard deviation
  x1 = rnorm(n = 30, mean = 0, sd = 1)
  x2 = rnorm(n = 30, mean = 0, sd = 20)
  
  # create the y variable such as the true coefficients of x1 and x2 are equal to 1
  # and add a small error term to avoid a perfect model fit
  y = x1 + x2 + 0.01 * rnorm(30, 0, 20)
  
  # standardize the variables
  x1 = scale(x1)
  x2 = scale(x2)
  y = scale(y)
  
  # run a linear regression model
  model = lm(y ~ x1 + x2)
  
  # save the difference between the coefficient of x2 and x1 in the variable "results"
  abs(model$coefficients[3]) - abs(model$coefficients[2])
})

# get the mean difference between the standardized coefficients of x2 and x1
mean(results)

## HOW TO INTERPRET THE OUTCOME
## the output is close to 1, which means that
## x2 has a standardized coefficient bigger than that of x1
## which proves the above hypothesis

## NOTE:
## try re-running this simulation without standardizing the variables
## the output will be very close to 0
## i.e. the unstandardized coefficients of x1 and x2 will be equal