viernes, 10 de diciembre de 2010

Análisis geoestadístico con R. Paquete geoR.

Ordenes básicas del paquete geoR para el análisis de datos geoestadísiticos, recopilados del material "geoR: PAckage for Geostatistical Data Analysis. An illustrative session" por Paulo J. Ribeiro Jr. & Peter J. Diggle, 2006.
Pronto subiré también análisis completos con un conjunto de datos de R así como contenido teórico de este tema. Espero les guste...

################################################################################
### geoR : Package for Geostatistical Data Analysis An illustrative session#####
### Paulo J. Ribeiro Jr. & Peter J. Diggle. 2006 ###############################
### geoRintro_package%examples.pdf #######
##########################################
 
library(geoR)
data(s100)
 
# 3) herramientas exploratorias
#1. gráficos
summary(s100)
plot(s100) #para objetos de clase "geodata" produce 4 gráficos
 par(mfrow = c(2, 2))
points(s100, xlab = "Coord X", ylab = "Coord Y")
points(s100, xlab = "Coord X", ylab = "Coord Y", pt.divide = "rank.prop")
points(s100, xlab = "Coord X", ylab = "Coord Y", cex.max = 1.7, col = gray(seq(1, 0.1, l = 100)), pt.divide = "equal")
points(s100, pt.divide = "quintile", xlab = "Coord X", ylab = "Coord Y")


#2. variograma empírico
cloud1 <- variog(s100, option = "cloud", max.dist = 1)
cloud2 <- variog(s100, option = "cloud", estimator.type = "modulus", max.dist = 1)
bin1 <- variog(s100, uvec = seq(0, 1, l = 11))
bin2 <- variog(s100, uvec = seq(0, 1, l = 11), estimator.type = "modulus")
par(mfrow = c(2, 2))
plot(cloud1, main = "classical estimator"); plot(cloud2, main = "modulus estimator")
plot(bin1, main = "classical estimator"); plot(bin2, main = "modulus estimator")
names(bin1) #los tres primeros resultados indican las distancias, semivarianza estimada y el número de pares para cada bin

 
bin1 <- variog(s100, uvec = seq(0, 1, l = 11), bin.cloud = T) #los puntos del variograma pueden agruparse en clases de distancias y graficarse en box-plot para cada una
bin2 <- variog(s100, uvec = seq(0, 1, l = 11), estimator.type = "modulus", bin.cloud = T)
par(mfrow = c(1, 2))
plot(bin1, bin.cloud = T, main = "classical estimator"); plot(bin2, bin.cloud = T, main = "modulus estimator")

 
vario60 <- variog(s100, max.dist = 1, direction = pi/3) #podemos calcular los variogramas direccionales utilizando el argumento "direction" y "tolerance", por ejemplo calculamos el variograma para la direcciónd e 60 grados con el ángulo de tolerancia por default (22.5 grados).
vario.4 <- variog4(s100, max.dist = 1) #cálculo en 4 direcciones
par(mfrow = c(1, 2), mar = c(3, 3, 1.5, 0.5))
plot(vario60); title(main = expression(paste("directional, angle = ", 60 * degree))); plot(vario.4, lwd = 2)

 
# 4) estimación de parámetros
#comparamos variogramas teóricos y empíricos visualmente
bin1 <- variog(s100, uvec = seq(0, 1, l = 11))
plot(bin1)
lines.variomodel(cov.model = "exp", cov.pars = c(1, 0.3), nugget = 0, max.dist = 1, lwd = 3)
smooth <- variog(s100, option = "smooth", max.dist = 1, n.points = 100, kernel = "normal", band = 0.2)
lines(smooth, type = "l", lty = 2)
legend(0.4, 0.3, c("empirical", "exponential model", "smoothed"), lty = c(1, 1, 2), lwd = c(1, 3, 1))


 #pero en la práctica no conocemos los valores reales y debemos estimarlos: 1) a ojo, o 2) por mínimos cuadrados ajustados al variogramas empíricos (OLS  y WLS), 3) métodos basados en verosimilitud (ML o REML), o 4) métodos bayesianos
#1) a ojo
plot(variog(s100, max.dist = 1))
lines.variomodel(cov.model = "exp", cov.pars = c(1, 0.3), nug = 0, max.dist = 1)
lines.variomodel(cov.model = "mat", cov.pars = c(0.85, 0.2), nug = 0.1, kappa = 1, max.dist = 1, lty = 2)
lines.variomodel(cov.model = "sph", cov.pars = c(0.8, 0.8), nug = 0.1, max.dist = 1, lwd = 2)



#función de estimación de parámetro

ml <- likfit(s100, ini = c(1, 0.5)) ; ml ; summary(ml)
# Fitting models with nugget fixed to zero
ml <- likfit(s100, ini = c(1, 0.5), fix.nugget = T)
reml <- likfit(s100, ini = c(1, 0.5), fix.nugget = T, method = "RML")
ols <- variofit(bin1, ini = c(1, 0.5), fix.nugget = T, weights = "equal")
wls <- variofit(bin1, ini = c(1, 0.5), fix.nugget = T)
# Fitting models with a fixed value for the nugget
ml.fn <- likfit(s100, ini = c(1, 0.5), fix.nugget = T, nugget = 0.15)
reml.fn <- likfit(s100, ini = c(1, 0.5), fix.nugget = T, nugget = 0.15, method = "RML")
ols.fn <- variofit(bin1, ini = c(1, 0.5), fix.nugget = T, nugget = 0.15, weights = "equal")
wls.fn <- variofit(bin1, ini = c(1, 0.5), fix.nugget = T, nugget = 0.15)
# Fitting models estimated nugget
ml.n <- likfit(s100, ini = c(1, 0.5), nug = 0.5)
reml.n <- likfit(s100, ini = c(1, 0.5), nug = 0.5, method = "RML")
ols.n <- variofit(bin1, ini = c(1, 0.5), nugget = 0.5, weights = "equal")
wls.n <- variofit(bin1, ini = c(1, 0.5), nugget = 0.5)
#graficamos los modelos ajustados sobre los variogramas empírivos
par(mfrow = c(1, 3))
plot(bin1, main = expression(paste("fixed ", tau^2 == 0)))
lines(ml, max.dist = 1)
lines(reml, lwd = 2, max.dist = 1)
lines(ols, lty = 2, max.dist = 1)
lines(wls, lty = 2, lwd = 2, max.dist = 1)
legend(0.5, 0.3, legend = c("ML", "REML", "OLS", "WLS"), lty = c(1, 1, 2, 2), lwd = c(1, 2, 1, 2), cex = 0.7)
plot(bin1, main = expression(paste("fixed ", tau^2 == 0.15)))
lines(ml.fn, max.dist = 1)
lines(reml.fn, lwd = 2, max.dist = 1)
lines(ols.fn, lty = 2, max.dist = 1)
lines(wls.fn, lty = 2, lwd = 2, max.dist = 1)
legend(0.5, 0.3, legend = c("ML", "REML", "OLS", "WLS"), lty = c(1, 1, 2, 2), lwd = c(1, 2, 1, 2), cex = 0.7)
plot(bin1, main = expression(paste("estimated ", tau^2)))
lines(ml.n, max.dist = 1)
lines(reml.n, lwd = 2, max.dist = 1)
lines(ols.n, lty = 2, max.dist = 1)
lines(wls.n, lty = 2, lwd = 2, max.dist = 1)
legend(0.5, 0.3, legend = c("ML", "REML", "OLS", "WLS"), lty = c(1, 1, 2, 2), lwd = c(1, 2, 1, 2), cex = 0.7)

 


#Two kinds of variogram envelopes computed by simulation are illustrated in the figure below.
#The plot on the left-hand side shows an envelope based on permutations of the data values across the locations, i.e. envelopes built under the assumption of no spatial correlation.
#The envelopes shown on the right-hand side are based on simulations from a given set of model parameters, in this example the parameter estimates from the WLS variogram fit. This envelope shows the variability of the empirical variogram.
env.mc <- variog.mc.env(s100, obj.var = bin1)
env.model <- variog.model.env(s100, obj.var = bin1, model = wls)
#Profile likelihoods (1-D and 2-D) are computed by the function proflik. Here we show the profile likelihoods for the covariance parameters of the model without nugget effect
par(mfrow = c(1, 2))
plot(bin1, envelope = env.mc)
plot(bin1, envelope = env.model)

 

#5) validación cruzada
#cross-validation using the leaving-one-out: data points are removed one by one and predicted by kriging using the remaining data.
#The commands below illustrates cross-validation for the models fitted by maximum likelihood and weighted least squares.
xv.ml <- xvalid(s100, model = ml)
xv.wls <- xvalid(s100, model = wls)
#Graphical results are shown for the cross-validation results where the leaving-one-out strategy combined with the wls estimates for the parameters was used. Cross-validation
#residuals are obtained subtracting the observed data minus the predicted value. Standardised residuals are obtained dividing by the square root of the prediction variance (‘kriging
#variance’). By default the 10 plots shown in the Figure 10 are produced but the user can restrict the choice using the function arguments.
prof <- proflik(ml, geodata = s100, sill.val = seq(0.48, 2, l = 11), range.val = seq(0.1, 0.52, l = 11), uni.only = FALSE)
par(mfrow = c(1, 3)); plot(prof, nlevels = 16)

par(mfcol = c(5, 2), mar = c(3, 3, 1, 0.5), mgp = c(1.5, 0.7, 0))
plot(xv.wls)


#A variation of this method is illustrated by the next two calls where the model parameters are re-estimated each time a point is removed from the data-set.

 xvR.ml <- xvalid(s100, model = ml, reest = TRUE)
xvR.wls <- xvalid(s100, model = wls, reest = TRUE, variog.obj = bin1)
 
#6) interpolación espacial. Kriging con las siguientes opciones:
#1. Simple kriging
#2. Ordinary kriging
#3. Trend (universal) kriging
#4. External trend kriging
#First example consider the prediction at four locations labeled 1, 2, 3, 4 and indicated in the figure below. The command to perform ordinary kriging using the parameters estimated by weighted least squares with nugget fixed to zero would be:
kc4 <- krige.conv(s100, locations = loci, krige = krige.control(obj.m = wls))
#Second example: The goal is to perform prediction on a grid covering the area and to display the results. Again, we use ordinary kriging.
plot(s100$coords, xlim = c(0, 1.2), ylim = c(0, 1.2), xlab = "Coord X", ylab = "Coord Y")
loci <- matrix(c(0.2, 0.6, 0.2, 1.1, 0.2, 0.3, 1, 1.1), ncol = 2)
text(loci, as.character(1:4), col = "red")
polygon(x = c(0, 1, 1, 0), y = c(0, 0, 1, 1), lty = 2)
pred.grid <- expand.grid(seq(0, 1, l = 51), seq(0, 1, l = 51))
kc <- krige.conv(s100, loc = pred.grid, krige = krige.control(obj.m = ml))
image(kc, loc = pred.grid, col = gray(seq(1, 0.1, l = 30)), xlab = "Coord X", ylab = "Coord Y")

 

# 7) análisis bayesiano
#consider a model without nugget and including uncertainty in the mean, sill and range parameters. Prediction at the four locations indicated above is performed by typing a command like:
#predictive distributions at the four selected locations. Dashed lines show Gaussian distributions with mean and variance given by results of ordinary
#kriging obtained in Section 4. The full lines correspond to the Bayesian prediction. The plot shows results of density estimation using samples from the predictive distributions.
bsp4 <- krige.bayes(s100, loc = loci, prior = prior.control(phi.discrete = seq(+ 5, l = 101), phi.prior = "rec"), output = output.control(n.post = 5000))
par(mfrow = c(2, 2))
for (i in 1:4) {
kpx <- seq(kc4$pred[i] - 3 * sqrt(kc4$krige.var[i]), kc4$pred[i] + 3 * sqrt(kc4$krige.var[i]), l = 100)
kpy <- dnorm(kpx, mean = kc4$pred[i], sd = sqrt(kc4$krige.var[i]))
bp <- density(bsp4$predic$simul[i, ])
rx <- range(c(kpx, bp$x))
ry <- range(c(kpy, bp$y))
par(mfrow = c(1, 3), mar = c(3, 3, 1, 0.5), mgp = c(2, 1, 0))
hist(bsp4$posterior$sample$beta, main = "", xlab = expression(beta), prob = T)
hist(bsp4$posterior$sample$sigmasq, main = "", xlab = expression(sigma^2), prob = T)
hist(bsp4$posterior$sample$phi, main = "", xlab = expression(phi), prob = T)
plot(cbind(rx, ry), type = "n", xlab = paste("Location", i), ylab = "density", xlim = c(-4, 4), ylim = c(0, 1.1))
lines(kpx, kpy, lty = 2); lines(bp)
}
 
#Consider now, under the same model assumptions, obtaining simulations from the predictive distributions on a grid of points covering the area. The commands to define the grid and perform Bayesian prediction are:
pred.grid <- expand.grid(seq(0, 1, l = 31), seq(0, 1, l = 31))
bsp <- krige.bayes(s100, loc = pred.grid, prior = prior.control(phi.discrete + 5, l = 51)), output = output.control(n.predictive = 2))
plot(bin1, ylim = c(0, 1.5))
lines(bsp4, max.dist = 1.2, summ = mean)
lines(bsp4, max.dist = 1.2, summ = median, lty = 2)
lines(bsp4, max.dist = 1.2, summ = "mode", post = "par", lwd = 2, lty = 2)
legend(0.25, 0.4, legend = c("variogram posterior mean", "variogram posterior median", "parameters posterior mode"), lty = c(1, 2, 2), lwd = c(1, 1, 2), cex = 0.8)
 
# 8) simulaciones de campos aleatorios gaussianos: (mejor usar paquete RandomFields)
#The function grf generates simulations of Gaussian random fields on regular or irregular sets of locations. It relies on the decomposition of the covariance matrix and therfore
#won’t work for large number of locations in which case we suggest the usage of the package RandomFields. Some of its functionality is illustrated by the next commands.
#mapas obtenidos de la distribución predictiva
par(mfrow = c(2, 2), mar = c(3, 3, 1, 0.5), mgp = c(1.5, 0.7, 0))
image(bsp, loc = pred.grid, main = "predicted", col = gray(seq(1, 0.1, l = 30)))
image(bsp, val = "variance", loc = pred.grid, main = "prediction variance", col = gray(seq(1, 0.1, l = 30)))
image(bsp, val = "simulation", number.col = 1, loc = pred.grid, main = "a simulation from\nthe predictive distribution", col = gray(seq(1, 0.1, l = 30)))
image(bsp, val = "simulation", number.col = 2, loc = pred.grid, main = "another simulation from \n the predictive distribution", col = gray(seq(1, 0.1, l = 30)))
#resultados de simulación producidos con la función "grf"
par(mfrow = c(1, 2))
sim1 <- grf(100, cov.pars = c(1, 0.25))
points.geodata(sim1, main = "simulated locations and values")
plot(sim1, max.dist = 1, main = "true and empirical variograms")
#simulaciones con diferentes resoluciones
par(mfrow = c(1, 2))
sim2 <- grf(441, grid = "reg", cov.pars = c(1, 0.25))
image(sim2, main = "a small-ish simulation", col = gray(seq(1, 0.1, l = 30)))
 
 
######################################################################