Quantitative locational model for archaeological sites in the North of Santa Cruz (Argentina)

2022-10-18


Carga de paquetes y datos

library(psych)
library(MuMIn)
library(sjPlot)
library(ggplot2)
library(ggpubr)
library(multcomp)
library(ROCR)

arqueo <- read.delim("C:/RD/datos_lucia_magnin.txt")
arqueo$Geomorfo_LP <- as.factor(arqueo$Geomorfo_LP)
arqueo$sin.aspect <- sin(arqueo$aspect)
arqueo$cos.aspect <- cos(arqueo$aspect)

Función para obtener pseudo-R2

R2logit <- function(y, model){
    R2 <- 1-(model$deviance/model$null.deviance)
    return(R2)
    }

Filtrar los datos en base al tipo de sitio

A <- subset(arqueo, FUN == "A" | FUN == "NS")
AC <- subset(arqueo, FUN == "AC" | FUN == "NS")
C <- subset(arqueo, FUN == "C" | FUN == "NS")
CC <- subset(arqueo, FUN == "CC" | FUN == "NS")
Ch <- subset(arqueo, FUN == "Ch" | FUN == "NS")
CT <- subset(arqueo, FUN == "CT" | FUN == "NS")
LAL <- subset(arqueo, FUN == "LAL" | FUN == "NS")
LAM <- subset(arqueo, FUN == "LAM" | FUN == "NS")
T <- subset(arqueo, FUN == "T" | FUN == "NS")
TO <- subset(arqueo, FUN == "TO" | FUN == "NS")


Gráficos circulares

a<-ggplot(A, aes(x = aspect, fill = FUN)) + coord_polar(start = 300, direction = -1) + 
      geom_histogram(aes(y = stat(count) / sum(count)*100), bins = 20) + 
      scale_x_continuous("", limits = c(0, 360), 
      breaks = seq(0, 360, by = 45), labels = seq(0, 360, by = 45)) +
      theme_minimal() + ylab("Frecuencia relativa")

ac<-ggplot(AC, aes(x = aspect, fill = FUN)) + coord_polar(start = 300, direction = -1) + 
      geom_histogram(aes(y = stat(count) / sum(count)*100), bins = 20) + 
      scale_x_continuous("", limits = c(0, 360), 
      breaks = seq(0, 360, by = 45), labels = seq(0, 360, by = 45)) +
      theme_minimal() + ylab("Frecuencia relativa")

c<-ggplot(C, aes(x = aspect, fill = FUN)) + coord_polar(start = 300, direction = -1) + 
      geom_histogram(aes(y = stat(count) / sum(count)*100), bins = 20) + 
      scale_x_continuous("", limits = c(0, 360), 
      breaks = seq(0, 360, by = 45), labels = seq(0, 360, by = 45)) +
      theme_minimal() + ylab("Frecuencia relativa")

cc<-ggplot(CC, aes(x = aspect, fill = FUN)) + coord_polar(start = 300, direction = -1) + 
      geom_histogram(aes(y = stat(count) / sum(count)*100), bins = 20) + 
      scale_x_continuous("", limits = c(0, 360), 
      breaks = seq(0, 360, by = 45), labels = seq(0, 360, by = 45)) +
      theme_minimal() + ylab("Frecuencia relativa")

ch<-ggplot(Ch, aes(x = aspect, fill = FUN)) + coord_polar(start = 300, direction = -1) + 
      geom_histogram(aes(y = stat(count) / sum(count)*100), bins = 20) + 
      scale_x_continuous("", limits = c(0, 360), 
      breaks = seq(0, 360, by = 45), labels = seq(0, 360, by = 45)) +
      theme_minimal() + ylab("Frecuencia relativa")

ct<-ggplot(CT, aes(x = aspect, fill = FUN)) + coord_polar(start = 300, direction = -1) + 
      geom_histogram(aes(y = stat(count) / sum(count)*100), bins = 20) + 
      scale_x_continuous("", limits = c(0, 360), 
      breaks = seq(0, 360, by = 45), labels = seq(0, 360, by = 45)) +
      theme_minimal() + ylab("Frecuencia relativa")

lal<-ggplot(LAL, aes(x = aspect, fill = FUN)) + coord_polar(start = 300, direction = -1) + 
      geom_histogram(aes(y = stat(count) / sum(count)*100), bins = 20) + 
      scale_x_continuous("", limits = c(0, 360), 
      breaks = seq(0, 360, by = 45), labels = seq(0, 360, by = 45)) +
      theme_minimal() + ylab("Frecuencia relativa")

lam<-ggplot(LAM, aes(x = aspect, fill = FUN)) + coord_polar(start = 300, direction = -1) + 
      geom_histogram(aes(y = stat(count) / sum(count)*100), bins = 20) + 
      scale_x_continuous("", limits = c(0, 360), 
      breaks = seq(0, 360, by = 45), labels = seq(0, 360, by = 45)) +
      theme_minimal() + ylab("Frecuencia relativa")

t<-ggplot(T, aes(x = aspect, fill = FUN)) + coord_polar(start = 300, direction = -1) + 
      geom_histogram(aes(y = stat(count) / sum(count)*100), bins = 20) + 
      scale_x_continuous("", limits = c(0, 360), 
      breaks = seq(0, 360, by = 45), labels = seq(0, 360, by = 45)) +
      theme_minimal() + ylab("Frecuencia relativa")

to<-ggplot(TO, aes(x = aspect, fill = FUN)) + coord_polar(start = 300, direction = -1) + 
      geom_histogram(aes(y = stat(count) / sum(count)*100), bins = 20) + 
      scale_x_continuous("", limits = c(0, 360), 
      breaks = seq(0, 360, by = 45), labels = seq(0, 360, by = 45)) +
      theme_minimal() + ylab("Frecuencia relativa")

ggarrange(a, ac, nrow = 2, ncol = 2)

ggarrange(c, cc, nrow = 2, ncol = 2)

ggarrange(ch, ct, nrow = 2, ncol = 2)

ggarrange(lal, lam, nrow = 2, ncol = 2)

ggarrange(t, to, nrow = 2, ncol = 2)


Selección de modelos


A

A <- subset(arqueo, FUN == "A" | FUN == "NS")
A$sitio <- ifelse(A$FUN == "A", 1, 0)
m1A <- glm(sitio ~ slope + sin.aspect + cos.aspect,
           family = binomial, data = A)
m2A <- glm(sitio ~ slope + sin.aspect + cos.aspect + cst_dst_ag,
           family = binomial, data = A)
m3A <- glm(sitio ~ slope + sin.aspect + cos.aspect + cst_dst_ag + cst_dst_cam.ppal,
           family = binomial, data = A)
m4A  <- glm(sitio ~ slope + sin.aspect + cos.aspect + cst_dst_ag + cst_dst_cam.ppal + accesibilidad,
           family = binomial, data = A)
m5A  <- glm(sitio ~ slope + sin.aspect + cos.aspect + cst_dst_ag + cst_dst_cam.ppal + accesibilidad + Cuenca_Visual,
           family = binomial, data = A)
m6A <- glm(sitio ~ 1, family = binomial, data = A)

modelosA <- list(m1A, m2A, m3A, m4A, m5A, m6A)

aic.tableA <- model.sel(modelosA, rank = "AICc")

aic.tableA
## Model selection table 
##    (Int)   cos.asp sin.asp    slp cst_dst_ag
## 4  4.127 -0.013700 -0.1670 0.2639  -0.003772
## 5  3.828 -0.071710 -0.1699 0.2648  -0.003960
## 2 -3.671  0.151400 -0.4793 0.1734  -0.005591
## 3 -3.569  0.186900 -0.5044 0.1767  -0.004304
## 1 -4.162 -0.005114 -0.6843 0.1269           
## 6 -2.503                                    
##   cst_dst_cam.ppl        acc   Cnc_Vsl df  logLik AICc
## 4       2.563e-04 -0.0003368            7 -19.257 53.5
## 5      -1.256e-05 -0.0003317 1.075e-05  8 -19.148 55.6
## 2                                       5 -26.007 62.5
## 3      -3.357e-03                       6 -25.532 63.8
## 1                                       4 -27.924 64.2
## 6                                       1 -31.888 65.8
##   delta weight
## 4  0.00  0.726
## 5  2.08  0.257
## 2  9.02  0.008
## 3 10.29  0.004
## 1 10.68  0.003
## 6 12.29  0.002
## Models ranked by AICc(x)
R2logit(A$sitio, m4A)
## [1] 0.3960891
m4zA  <- glm(sitio ~ scale(slope) + scale(sin.aspect) + scale(cos.aspect) + 
scale(cst_dst_ag) + scale(cst_dst_cam.ppal) + scale(accesibilidad) + scale(Cuenca_Visual),
           family = binomial, data = A)
summary(m4zA)
## 
## Call:
## glm(formula = sitio ~ scale(slope) + scale(sin.aspect) + scale(cos.aspect) + 
##     scale(cst_dst_ag) + scale(cst_dst_cam.ppal) + scale(accesibilidad) + 
##     scale(Cuenca_Visual), family = binomial, data = A)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.9463  -0.3292  -0.1314  -0.0551   3.0641  
## 
## Coefficients:
##                          Estimate Std. Error z value
## (Intercept)             -4.328938   0.917422  -4.719
## scale(slope)             1.792540   0.603378   2.971
## scale(sin.aspect)       -0.122575   0.468937  -0.261
## scale(cos.aspect)       -0.049821   0.524776  -0.095
## scale(cst_dst_ag)       -0.677969   0.748967  -0.905
## scale(cst_dst_cam.ppal) -0.001994   0.643813  -0.003
## scale(accesibilidad)    -2.029849   0.700193  -2.899
## scale(Cuenca_Visual)     0.202507   0.414094   0.489
##                         Pr(>|z|)    
## (Intercept)             2.37e-06 ***
## scale(slope)             0.00297 ** 
## scale(sin.aspect)        0.79379    
## scale(cos.aspect)        0.92436    
## scale(cst_dst_ag)        0.36536    
## scale(cst_dst_cam.ppal)  0.99753    
## scale(accesibilidad)     0.00374 ** 
## scale(Cuenca_Visual)     0.62482    
## ---
## Signif. codes:  
## 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 63.776  on 118  degrees of freedom
## Residual deviance: 38.295  on 111  degrees of freedom
## AIC: 54.295
## 
## Number of Fisher Scoring iterations: 7
plot_model(m4zA, type="std")

 

Predicciones para las orientaciones

new <- data.frame(ori = c("N","S","E","O","NE","NO","SE","SO"), 
                  sin.aspect = scale(sin(c(90,270,0,180,45,135,225,315))),
                  cos.aspect = scale(cos(c(90,270,0,180,45,135,225,315))),
                  slope = 0, cst_dst_ag = 0, cst_dst_cam.ppal = 0,
                  accesibilidad = 0, Cuenca_Visual = 0)
data.frame(orientacion = new$ori, prediccion = predict(m4zA, newdata = new, type = "response"))
##   orientacion prediccion
## 1           N  0.9757484
## 2           S  0.9784387
## 3           E  0.9775046
## 4           O  0.9838928
## 5          NE  0.9737554
## 6          NO  0.9808824
## 7          SE  0.9829159
## 8          SO  0.9740577


Porcentajes de predicciones correctas

pred <- prediction(predictions = predict(m4zA, type = "response"),
               labels = A$sitio)
sens <- data.frame(x = 100*unlist(performance(pred, "sens")@x.values), 
                   y = 100*unlist(performance(pred, "sens")@y.values),
                   tipo = "Sitio")
spec <- data.frame(x = 100*unlist(performance(pred, "spec")@x.values), 
                   y = 100*unlist(performance(pred, "spec")@y.values),
                   tipo = "No sitio")
sens_spec <- rbind(sens,spec)
diff_sens_spec <- abs(sens$y - spec$y)
opt_perc_position <- which.min(diff_sens_spec)
opt_perc <- sens[opt_perc_position, 2] # Sensibilidad-especificidad óptimos
opt_perc
## [1] 77.77778
opt_cutoff <- sens[opt_perc_position, 1] # Probabilidad de corte óptima
opt_cutoff
## [1] 7.147292
ggplot() + 
  geom_line(data = sens_spec, aes(x, y, col = tipo), size = 1.5) + 
  labs(x = "Probabilidad de sitio predicha", y = "Porcentaje correcto de predicción") +
  geom_hline(yintercept = opt_perc, linetype = "dashed") +
  geom_vline(xintercept = opt_cutoff, linetype = "dashed")


AC

AC <- subset(arqueo, FUN == "AC" | FUN == "NS")
AC <- subset(arqueo, Geomorfo_LP != "5")
AC$sitio <- ifelse(AC$FUN == "AC", 1, 0)
m1zAC <- glm(sitio ~ srtm_wgs84 + slope + sin.aspect + cos.aspect + Geomorfo_LP,
           family = binomial, data = AC)
m2zAC <- glm(sitio ~ srtm_wgs84 + slope + sin.aspect + cos.aspect + Geomorfo_LP + hill_invierno,
           family = binomial, data = AC)
m3zAC <- glm(sitio ~ srtm_wgs84 + slope + sin.aspect + cos.aspect + Geomorfo_LP + hill_invierno + Cuenca_Visual,
           family = binomial, data = AC)
m4zAC  <- glm(sitio ~ srtm_wgs84 + slope + sin.aspect + cos.aspect + Geomorfo_LP + hill_invierno + Cuenca_Visual + accesibilidad,
           family = binomial, data = AC)
m5zAC  <- glm(sitio ~ srtm_wgs84 + slope + sin.aspect + cos.aspect + Geomorfo_LP + hill_invierno + Cuenca_Visual + accesibilidad + cst_dst_afl,
           family = binomial, data = AC)
m6zAC <- glm(sitio ~ srtm_wgs84 + slope + sin.aspect + cos.aspect + Geomorfo_LP + hill_invierno + Cuenca_Visual + accesibilidad + cst_dst_ag + cst_dst_cam.ppal + 
                    cst_dst_CC + cst_dst_CT + cst_dst_LAM, family = binomial, data = AC)
m7zAC <- glm(sitio ~ 1, family = binomial, data = AC)

modelosAC <- list(m1zAC, m2zAC, m3zAC, m4zAC, m5zAC, m6zAC, m7zAC)

R2logit(AC$sitio, m1zAC)
## [1] 0.2940222
aic.tableAC <- model.sel(modelosAC, rank = "AICc")
aic.tableAC
## Model selection table 
##     (Int)  cos.asp Gmr_LP sin.asp     slp  srt_w84 hll_inv
## 1 -10.470 -0.08371      +  0.1795 -0.1276  0.01111        
## 2 -12.630 -0.05143      +  0.2260 -0.1206  0.01172 0.01569
## 3 -12.900  0.04325      +  0.2316 -0.1382  0.01179 0.01409
## 4 -14.430  0.07131      +  0.2295 -0.1303  0.01767 0.01287
## 6  -3.262  0.12660      +  0.1372 -0.1633 -0.02471 0.01479
## 5 -13.760  0.07872      +  0.2042 -0.1430  0.01641 0.01495
## 7  -3.401                                                 
##     Cnc_Vsl        acc cst_dst_afl cst_dst_ag
## 1                                            
## 2                                            
## 3 1.445e-05                                  
## 4 1.562e-05 -8.128e-05                       
## 6 1.527e-05  5.334e-04              -0.005526
## 5 1.433e-05 -8.109e-05   0.0009712           
## 7                                            
##   cst_dst_cam.ppl cst_dst_CC cst_dst_CT cst_dst_LAM df
## 1                                                    7
## 2                                                    8
## 3                                                    9
## 4                                                   10
## 6        -0.02067  0.0004619  -0.001039     0.01062 15
## 5                                                   11
## 7                                                    1
##    logLik  AICc delta weight
## 1 -49.901 114.0  0.00  0.381
## 2 -49.340 115.0  0.95  0.238
## 3 -48.431 115.2  1.20  0.209
## 4 -48.298 117.0  3.02  0.084
## 6 -43.451 117.9  3.87  0.055
## 5 -48.201 118.9  4.92  0.033
## 7 -70.683 143.4 29.34  0.000
## Models ranked by AICc(x)
m1zAC <- glm(sitio ~ scale(srtm_wgs84) + scale(slope) + scale(sin.aspect) + scale(cos.aspect) + Geomorfo_LP,
           family = binomial, data = AC)
summary(m1zAC)
## 
## Call:
## glm(formula = sitio ~ scale(srtm_wgs84) + scale(slope) + scale(sin.aspect) + 
##     scale(cos.aspect) + Geomorfo_LP, family = binomial, data = AC)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.0663  -0.2349  -0.0877  -0.0394   4.1045  
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       -3.74184    0.53847  -6.949 3.68e-12 ***
## scale(srtm_wgs84)  1.26677    0.36456   3.475 0.000511 ***
## scale(slope)      -0.72264    0.40623  -1.779 0.075259 .  
## scale(sin.aspect)  0.12771    0.28850   0.443 0.658009    
## scale(cos.aspect) -0.05878    0.28518  -0.206 0.836710    
## Geomorfo_LP3       0.38251    0.64105   0.597 0.550705    
## Geomorfo_LP4      -2.33130    1.07588  -2.167 0.030244 *  
## ---
## Signif. codes:  
## 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 141.366  on 495  degrees of freedom
## Residual deviance:  99.801  on 489  degrees of freedom
## AIC: 113.8
## 
## Number of Fisher Scoring iterations: 8
plot_model(m1zAC, type="std")

 

Comparaciones múltiples

multAC <- glht(m1zAC, linfct = mcp(Geomorfo_LP = "Tukey"))
summary(multAC)
## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: glm(formula = sitio ~ scale(srtm_wgs84) + scale(slope) + scale(sin.aspect) + 
##     scale(cos.aspect) + Geomorfo_LP, family = binomial, data = AC)
## 
## Linear Hypotheses:
##            Estimate Std. Error z value Pr(>|z|)  
## 3 - 1 == 0   0.3825     0.6410   0.597   0.8159  
## 4 - 1 == 0  -2.3313     1.0759  -2.167   0.0725 .
## 4 - 3 == 0  -2.7138     1.1616  -2.336   0.0480 *
## ---
## Signif. codes:  
## 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)

Predicciones para las orientaciones

new <- data.frame(ori = c("N","S","E","O","NE","NO","SE","SO"), 
                  sin.aspect = scale(sin(c(90,270,0,180,45,135,225,315))),
                  cos.aspect = scale(cos(c(90,270,0,180,45,135,225,315))),
                  srtm_wgs84 = 0, slope = 0, Geomorfo_LP = as.factor(1))
data.frame(orientacion = new$ori, prediccion = predict(m1zAC, newdata = new, type = "response"))
##   orientacion   prediccion
## 1           N 3.737140e-05
## 2           S 2.441239e-05
## 3           E 2.547356e-05
## 4           O 2.480857e-05
## 5          NE 3.323240e-05
## 6          NO 3.240615e-05
## 7          SE 2.160786e-05
## 8          SO 3.186266e-05


Porcentajes de predicciones correctas

pred <- prediction(predictions = predict(m1zAC, type = "response"),
               labels = AC$sitio)
sens <- data.frame(x = 100*unlist(performance(pred, "sens")@x.values), 
                   y = 100*unlist(performance(pred, "sens")@y.values),
                   tipo = "Sitio")
spec <- data.frame(x = 100*unlist(performance(pred, "spec")@x.values), 
                   y = 100*unlist(performance(pred, "spec")@y.values),
                   tipo = "No sitio")
sens_spec <- rbind(sens,spec)
diff_sens_spec <- abs(sens$y - spec$y)
opt_perc_position <- which.min(diff_sens_spec)
opt_perc <- sens[opt_perc_position, 2] # Sensibilidad-especificidad óptimos
opt_perc
## [1] 87.5
opt_cutoff <- sens[opt_perc_position, 1] # Probabilidad de corte óptima
opt_cutoff
## [1] 6.193442
ggplot() + 
  geom_line(data = sens_spec, aes(x, y, col = tipo), size = 1.5) + 
  labs(x = "Probabilidad de sitio predicha", y = "Porcentaje correcto de predicción") +
  geom_hline(yintercept = opt_perc, linetype = "dashed") +
  geom_vline(xintercept = opt_cutoff, linetype = "dashed")


C

C <- subset(arqueo, FUN == "C" | FUN == "NS")
C <- subset(C, Geomorfo_LP != "3")
C$sitio <- ifelse(C$FUN == "C", 1, 0)
m1C <- glm(sitio ~ srtm_wgs84 + slope + sin.aspect + cos.aspect + Geomorfo_LP,
           family = binomial, data = C)
m2C <- glm(sitio ~ srtm_wgs84 + slope + sin.aspect + cos.aspect + Geomorfo_LP + hill_invierno,
           family = binomial, data = C)
m3C <- glm(sitio ~ srtm_wgs84 + slope + sin.aspect + cos.aspect + Geomorfo_LP + hill_invierno + cst_dst_cam.ppal,
           family = binomial, data = C)
m4C  <- glm(sitio ~ srtm_wgs84 + slope + sin.aspect + cos.aspect + Geomorfo_LP + hill_invierno + cst_dst_cam.ppal + accesibilidad,
           family = binomial, data = C)
m5C  <- glm(sitio ~ srtm_wgs84 + slope + sin.aspect + cos.aspect + Geomorfo_LP + hill_invierno + cst_dst_cam.ppal + accesibilidad + Cuenca_Visual,
           family = binomial, data = C)
m6C <- glm(sitio ~ 1, family = binomial, data = C)

modelosC <- list(m1C, m2C, m3C, m4C, m5C, m6C)

aic.tableC <- model.sel(modelosC, rank = "AICc")
aic.tableC
## Model selection table 
##     (Int) cos.asp Gmr_LP  sin.asp     slp   srt_w84
## 1  0.6889 -0.5827      + -0.21330 0.04234 -0.005825
## 6 -2.3320                                          
## 2  0.4528 -0.5889      + -0.21750 0.04475 -0.005799
## 3  0.6353 -0.5626      + -0.09887 0.05053 -0.006024
## 4  0.6876 -0.5729      + -0.08998 0.04979 -0.007031
## 5 -2.2440 -0.6070      + -0.14220 0.05000 -0.003614
##    hll_inv cst_dst_cam.ppl        acc   Cnc_Vsl df  logLik
## 1                                                7 -26.346
## 6                                                1 -33.792
## 2 0.001871                                       8 -26.334
## 3 0.002594       -0.002326                       9 -26.110
## 4 0.002869       -0.002648  2.073e-05           10 -26.106
## 5 0.001828       -0.003256 -5.900e-06 3.467e-05 11 -25.057
##   AICc delta weight
## 1 67.8  0.00  0.526
## 6 69.6  1.86  0.207
## 2 70.1  2.29  0.167
## 3 72.0  4.21  0.064
## 4 74.4  6.61  0.019
## 5 74.7  6.97  0.016
## Models ranked by AICc(x)
R2logit(C$sitio, m1C)
## [1] 0.2203601
m1zC <- glm(sitio ~ scale(srtm_wgs84) + scale(slope) + scale(sin.aspect) + scale(cos.aspect) + Geomorfo_LP,
           family = binomial, data = C)

summary(m1zC)
## 
## Call:
## glm(formula = sitio ~ scale(srtm_wgs84) + scale(slope) + scale(sin.aspect) + 
##     scale(cos.aspect) + Geomorfo_LP, family = binomial, data = C)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.5952  -0.3959  -0.3051  -0.2208   2.7362  
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)   
## (Intercept)        -2.8265     1.0614  -2.663  0.00775 **
## scale(srtm_wgs84)  -0.6440     0.4620  -1.394  0.16332   
## scale(slope)        0.2706     0.4162   0.650  0.51561   
## scale(sin.aspect)  -0.1554     0.3882  -0.400  0.68896   
## scale(cos.aspect)  -0.3988     0.4395  -0.907  0.36422   
## Geomorfo_LP4       -0.1599     1.1676  -0.137  0.89105   
## Geomorfo_LP5        2.3645     1.5366   1.539  0.12386   
## ---
## Signif. codes:  
## 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 67.584  on 112  degrees of freedom
## Residual deviance: 52.691  on 106  degrees of freedom
## AIC: 66.691
## 
## Number of Fisher Scoring iterations: 6
plot_model(m1zC, type="std")

Comparaciones múltiples

multC <- glht(m1zC, linfct = mcp(Geomorfo_LP = "Tukey"))
summary(multC)
## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: glm(formula = sitio ~ scale(srtm_wgs84) + scale(slope) + scale(sin.aspect) + 
##     scale(cos.aspect) + Geomorfo_LP, family = binomial, data = C)
## 
## Linear Hypotheses:
##            Estimate Std. Error z value Pr(>|z|)  
## 4 - 1 == 0  -0.1599     1.1676  -0.137   0.9893  
## 5 - 1 == 0   2.3645     1.5366   1.539   0.2642  
## 5 - 4 == 0   2.5244     1.1171   2.260   0.0588 .
## ---
## Signif. codes:  
## 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)

Predicciones para las orientaciones

new <- data.frame(ori = c("N","S","E","O","NE","NO","SE","SO"), 
                  sin.aspect = scale(sin(c(90,270,0,180,45,135,225,315))),
                  cos.aspect = scale(cos(c(90,270,0,180,45,135,225,315))),
                  srtm_wgs84 = 0, slope = 0, Geomorfo_LP = as.factor(1))
data.frame(orientacion = new$ori, prediccion = predict(m1zC, newdata = new, type = "response"))
##   orientacion prediccion
## 1           N  0.7173307
## 2           S  0.5398182
## 3           E  0.5237603
## 4           O  0.8252469
## 5          NE  0.5504596
## 6          NO  0.8305432
## 7          SE  0.7016211
## 8          SO  0.5315213


Porcentajes de predicciones correctas

pred <- prediction(predictions = predict(m1zC, type = "response"),
               labels = C$sitio)
sens <- data.frame(x = 100*unlist(performance(pred, "sens")@x.values), 
                   y = 100*unlist(performance(pred, "sens")@y.values),
                   tipo = "Sitio")
spec <- data.frame(x = 100*unlist(performance(pred, "spec")@x.values), 
                   y = 100*unlist(performance(pred, "spec")@y.values),
                   tipo = "No sitio")
sens_spec <- rbind(sens,spec)
diff_sens_spec <- abs(sens$y - spec$y)
opt_perc_position <- which.min(diff_sens_spec)
opt_perc <- sens[opt_perc_position, 2] # Sensibilidad-especificidad óptimos
opt_perc
## [1] 70
opt_cutoff <- sens[opt_perc_position, 1] # Probabilidad de corte óptima
opt_cutoff
## [1] 7.367712
ggplot() + 
  geom_line(data = sens_spec, aes(x, y, col = tipo), size = 1.5) + 
  labs(x = "Probabilidad de sitio predicha", y = "Porcentaje correcto de predicción") +
  geom_hline(yintercept = opt_perc, linetype = "dashed") +
  geom_vline(xintercept = opt_cutoff, linetype = "dashed")


CC

CC <- subset(arqueo, FUN == "CC" | FUN == "NS")
CC$sitio <- ifelse(CC$FUN == "CC", 1, 0)
m1CC <- glm(sitio ~ slope + sin.aspect + cos.aspect + Geomorfo_LP,
           family = binomial, data = CC)
m2CC <- glm(sitio ~ slope + sin.aspect + cos.aspect + Geomorfo_LP + cst_dst_afl,
           family = binomial, data = CC)
m3CC <- glm(sitio ~ slope + sin.aspect + cos.aspect + Geomorfo_LP + cst_dst_afl + cst_dst_AC + cst_dst_LAM,
           family = binomial, data = CC)
m4CC  <- glm(sitio ~ slope + sin.aspect + cos.aspect + Geomorfo_LP + cst_dst_afl + cst_dst_AC + cst_dst_LAM + Cuenca_Visual,
           family = binomial, data = CC)
m5CC <- glm(sitio ~ 1, family = binomial, data = CC)

modelosCC <- list(m1CC, m2CC, m3CC, m4CC, m5CC)
aic.tableCC <- model.sel(modelosCC, rank = "AICc")
aic.tableCC
## Model selection table 
##     (Int) cos.asp Gmr_LP sin.asp      slp cst_dst_afl
## 5 -2.3980                                            
## 3 -1.0700  -1.825      +  0.6143 -0.08491    0.001111
## 1 -1.5000  -1.136      +  0.2341 -0.05298            
## 4 -0.6339  -1.757      +  0.6224 -0.08151    0.001752
## 2 -1.3430  -1.170      +  0.2477 -0.04673   -0.001439
##   cst_dst_AC cst_dst_LAM    Cnc_Vsl df  logLik AICc delta
## 5                                    1 -34.420 70.9  0.00
## 3  -0.006930   -0.002883            10 -26.266 74.6  3.68
## 1                                    7 -29.932 74.9  3.99
## 4  -0.007151   -0.001707 -2.882e-05 11 -25.822 76.1  5.21
## 2                                    8 -29.811 76.9  6.05
##   weight
## 5  0.705
## 3  0.112
## 1  0.096
## 4  0.052
## 2  0.034
## Models ranked by AICc(x)


Ch

Ch <- subset(arqueo, FUN == "Ch" | FUN == "NS")
Ch <- subset(Ch, Geomorfo_LP != "5")
Ch$sitio <- ifelse(Ch$FUN == "Ch", 1, 0)
m1Ch <- glm(sitio ~ srtm_wgs84 + slope + sin.aspect + cos.aspect + Geomorfo_LP,
           family = binomial, data = Ch)
m2Ch <- glm(sitio ~ srtm_wgs84 + slope + sin.aspect + cos.aspect + Geomorfo_LP + Cuenca_Visual,
           family = binomial, data = Ch)
m3Ch <- glm(sitio ~ srtm_wgs84 + slope + sin.aspect + cos.aspect + Geomorfo_LP + Cuenca_Visual + cst_dst_LAM,
           family = binomial, data = Ch)
m4Ch  <- glm(sitio ~ srtm_wgs84 + slope + sin.aspect + cos.aspect + Geomorfo_LP + Cuenca_Visual + cst_dst_LAM + accesibilidad,
           family = binomial, data = Ch)
m5Ch <- glm(sitio ~ 1, family = binomial, data = Ch)

modelosCh <- list(m1Ch, m2Ch, m3Ch, m4Ch, m5Ch)
aic.tableCh <- model.sel(modelosCh, rank = "AICc")
aic.tableCh
## Model selection table 
##     (Int) cos.asp Gmr_LP sin.asp      slp srt_w84   Cnc_Vsl
## 2 -23.240  0.5968      + -0.0393 -0.08478 0.02780 4.602e-05
## 3 -17.530  0.7002      + -0.3732 -0.18580 0.01999 4.953e-05
## 4 -17.030  0.6927      + -0.3618 -0.17840 0.01725 4.934e-05
## 1 -21.630  0.2585      +  0.5314 -0.06552 0.02714          
## 5  -2.034                                                  
##   cst_dst_LAM       acc df  logLik AICc delta weight
## 2                        8 -19.586 56.5  0.00  0.512
## 3    0.003971            9 -18.821 57.3  0.81  0.342
## 4    0.003476 4.514e-05 10 -18.800 59.6  3.14  0.106
## 1                        7 -23.296 61.6  5.13  0.039
## 5                        1 -43.351 88.7 32.28  0.000
## Models ranked by AICc(x)
R2logit(Ch$sitio, m2Ch)
## [1] 0.5482087
m2zCh <- glm(sitio ~ scale(srtm_wgs84) + scale(slope) + scale(sin.aspect) + scale(cos.aspect) + Geomorfo_LP + scale(Cuenca_Visual),
           family = binomial, data = Ch)

summary(m2zCh)
## 
## Call:
## glm(formula = sitio ~ scale(srtm_wgs84) + scale(slope) + scale(sin.aspect) + 
##     scale(cos.aspect) + Geomorfo_LP + scale(Cuenca_Visual), family = binomial, 
##     data = Ch)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.69491  -0.27028  -0.08945  -0.01968   2.74973  
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)
## (Intercept)          -3.51073    0.96474  -3.639 0.000274
## scale(srtm_wgs84)     2.93391    0.78832   3.722 0.000198
## scale(slope)         -0.53029    0.53312  -0.995 0.319892
## scale(sin.aspect)    -0.02850    0.55840  -0.051 0.959290
## scale(cos.aspect)     0.41072    0.42652   0.963 0.335572
## Geomorfo_LP3          0.07864    1.22925   0.064 0.948991
## Geomorfo_LP4         -1.42091    1.04091  -1.365 0.172232
## scale(Cuenca_Visual)  1.01703    0.39494   2.575 0.010020
##                         
## (Intercept)          ***
## scale(srtm_wgs84)    ***
## scale(slope)            
## scale(sin.aspect)       
## scale(cos.aspect)       
## Geomorfo_LP3            
## Geomorfo_LP4            
## scale(Cuenca_Visual) *  
## ---
## Signif. codes:  
## 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 86.702  on 120  degrees of freedom
## Residual deviance: 39.171  on 113  degrees of freedom
## AIC: 55.171
## 
## Number of Fisher Scoring iterations: 7
plot_model(m2zCh, type="std")


Comparaciones múltiples

multCh <- glht(m2zCh, linfct = mcp(Geomorfo_LP = "Tukey"))
summary(multCh)
## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: glm(formula = sitio ~ scale(srtm_wgs84) + scale(slope) + scale(sin.aspect) + 
##     scale(cos.aspect) + Geomorfo_LP + scale(Cuenca_Visual), family = binomial, 
##     data = Ch)
## 
## Linear Hypotheses:
##            Estimate Std. Error z value Pr(>|z|)
## 3 - 1 == 0  0.07864    1.22925   0.064    0.998
## 4 - 1 == 0 -1.42091    1.04091  -1.365    0.357
## 4 - 3 == 0 -1.49955    1.29868  -1.155    0.478
## (Adjusted p values reported -- single-step method)


Predicciones para las orientaciones

new <- data.frame(ori = c("N","S","E","O","NE","NO","SE","SO"), 
                  sin.aspect = scale(sin(c(90,270,0,180,45,135,225,315))),
                  cos.aspect = scale(cos(c(90,270,0,180,45,135,225,315))),
                  srtm_wgs84 = 0, slope = 0, Geomorfo_LP = as.factor(1), Cuenca_Visual = 0)
data.frame(orientacion = new$ori, prediccion = predict(m2zCh, newdata = new, type = "response"))
##   orientacion   prediccion
## 1           N 4.709023e-11
## 2           S 1.527636e-10
## 3           E 1.531456e-10
## 4           O 4.597128e-11
## 5          NE 1.008968e-10
## 6          NO 3.209601e-11
## 7          SE 9.837938e-11
## 8          SO 1.133435e-10


Porcentajes de predicciones correctas

pred <- prediction(predictions = predict(m2zCh, type = "response"),
               labels = Ch$sitio)
sens <- data.frame(x = 100*unlist(performance(pred, "sens")@x.values), 
                   y = 100*unlist(performance(pred, "sens")@y.values),
                   tipo = "Sitio")
spec <- data.frame(x = 100*unlist(performance(pred, "spec")@x.values), 
                   y = 100*unlist(performance(pred, "spec")@y.values),
                   tipo = "No sitio")
sens_spec <- rbind(sens,spec)
diff_sens_spec <- abs(sens$y - spec$y)
opt_perc_position <- which.min(diff_sens_spec)
opt_perc <- sens[opt_perc_position, 2] # Sensibilidad-especificidad óptimos
opt_perc
## [1] 85.71429
opt_cutoff <- sens[opt_perc_position, 1] # Probabilidad de corte óptima
opt_cutoff
## [1] 9.661575
ggplot() + 
  geom_line(data = sens_spec, aes(x, y, col = tipo), size = 1.5) + 
  labs(x = "Probabilidad de sitio predicha", y = "Porcentaje correcto de predicción") +
  geom_hline(yintercept = opt_perc, linetype = "dashed") +
  geom_vline(xintercept = opt_cutoff, linetype = "dashed")


CT

CT <- subset(arqueo, FUN == "CT" | FUN == "NS")
CT$sitio <- ifelse(CT$FUN == "CT", 1, 0)
m1CT <- glm(sitio ~ srtm_wgs84 + slope + sin.aspect + cos.aspect + Geomorfo_LP,
           family = binomial, data = CT)
m2CT <- glm(sitio ~ srtm_wgs84 + slope + sin.aspect + cos.aspect + Geomorfo_LP + hill_verano,
           family = binomial, data = CT)
m3CT <- glm(sitio ~ srtm_wgs84 + slope + sin.aspect + cos.aspect + Geomorfo_LP + hill_verano + cst_dst_ag + cst_dst_afl + cst_dst_cant2,
           family = binomial, data = CT)
m4CT  <- glm(sitio ~ srtm_wgs84 + slope + sin.aspect + cos.aspect + Geomorfo_LP + hill_verano + cst_dst_ag + cst_dst_afl + cst_dst_cant2 + Cuenca_Visual,
           family = binomial, data = CT)
m5CT <- glm(sitio ~ 1, family = binomial, data = CT)

modelosCT <- list(m1CT, m2CT, m3CT, m4CT, m5CT)
aic.tableCT <- model.sel(modelosCT, rank = "AICc")
aic.tableCT
## Model selection table 
##     (Int) cos.asp Gmr_LP sin.asp      slp   srt_w84 hll_vrn
## 1 -0.9716 -0.2219      + -0.2543 -0.09303  0.001943        
## 2 -4.4470 -0.2497      + -0.2685 -0.08675  0.002420 0.01549
## 3 -2.6560 -0.3133      + -0.2493 -0.12330 -0.002038 0.02130
## 4 -2.8740 -0.2775      + -0.3403 -0.13160 -0.001108 0.01787
## 5 -1.5220                                                  
##   cst_dst_afl cst_dst_ag cst_dst_cn2   Cnc_Vsl df  logLik
## 1                                               8 -53.704
## 2                                               9 -53.227
## 3    0.002571  -0.003249    0.002587           12 -49.851
## 4    0.002188  -0.003189    0.002391 1.356e-05 13 -49.247
## 5                                               1 -62.984
##    AICc delta weight
## 1 124.6  0.00  0.427
## 2 125.9  1.35  0.218
## 3 126.3  1.72  0.181
## 4 127.5  2.97  0.097
## 5 128.0  3.44  0.077
## Models ranked by AICc(x)
R2logit(CT$sitio, m1CT)
## [1] 0.1473399
m1zCT <- glm(sitio ~ scale(srtm_wgs84) + scale(slope) + scale(sin.aspect) + scale(cos.aspect) + Geomorfo_LP,
           family = binomial, data = CT)

summary(m1zCT)
## 
## Call:
## glm(formula = sitio ~ scale(srtm_wgs84) + scale(slope) + scale(sin.aspect) + 
##     scale(cos.aspect) + Geomorfo_LP, family = binomial, data = CT)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.4113  -0.5822  -0.4545  -0.2663   2.2889  
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        -0.4447     0.4073  -1.092 0.274920    
## scale(srtm_wgs84)   0.2092     0.2822   0.741 0.458561    
## scale(slope)       -0.5648     0.3052  -1.851 0.064215 .  
## scale(sin.aspect)  -0.1830     0.2548  -0.718 0.472611    
## scale(cos.aspect)  -0.1541     0.2492  -0.618 0.536493    
## Geomorfo_LP3       -0.9114     0.9187  -0.992 0.321189    
## Geomorfo_LP4       -1.8683     0.5454  -3.426 0.000614 ***
## Geomorfo_LP5       -1.0255     1.3362  -0.767 0.442809    
## ---
## Signif. codes:  
## 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 125.97  on 133  degrees of freedom
## Residual deviance: 107.41  on 126  degrees of freedom
## AIC: 123.41
## 
## Number of Fisher Scoring iterations: 5
plot_model(m1zCT, type="std")

Comparaciones múltiples

multCT <- glht(m1CT, linfct = mcp(Geomorfo_LP = "Tukey"))
summary(multCT)
## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: glm(formula = sitio ~ srtm_wgs84 + slope + sin.aspect + cos.aspect + 
##     Geomorfo_LP, family = binomial, data = CT)
## 
## Linear Hypotheses:
##            Estimate Std. Error z value Pr(>|z|)   
## 3 - 1 == 0  -0.9114     0.9187  -0.992  0.73547   
## 4 - 1 == 0  -1.8683     0.5454  -3.426  0.00298 **
## 5 - 1 == 0  -1.0255     1.3362  -0.767  0.85770   
## 4 - 3 == 0  -0.9570     0.9219  -1.038  0.70731   
## 5 - 3 == 0  -0.1141     1.5598  -0.073  0.99984   
## 5 - 4 == 0   0.8429     1.2874   0.655  0.90595   
## ---
## Signif. codes:  
## 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)


Predicciones para las orientaciones

new <- data.frame(ori = c("N","S","E","O","NE","NO","SE","SO"), 
                  sin.aspect = scale(sin(c(90,270,0,180,45,135,225,315))),
                  cos.aspect = scale(cos(c(90,270,0,180,45,135,225,315))),
                  srtm_wgs84 = 0, slope = 0, Geomorfo_LP = as.factor(1))
data.frame(orientacion = new$ori, prediccion = predict(m1zCT, newdata = new, type = "response"))
##   orientacion prediccion
## 1           N  0.2543195
## 2           S  0.2478414
## 3           E  0.2355218
## 4           O  0.3945260
## 5          NE  0.2070590
## 6          NO  0.3475586
## 7          SE  0.3401604
## 8          SO  0.2065005
pred <- prediction(predictions = predict(m1zCT, type = "response"),
               labels = CT$sitio)
sens <- data.frame(x = 100*unlist(performance(pred, "sens")@x.values), 
                   y = 100*unlist(performance(pred, "sens")@y.values),
                   tipo = "Sitio")
spec <- data.frame(x = 100*unlist(performance(pred, "spec")@x.values), 
                   y = 100*unlist(performance(pred, "spec")@y.values),
                   tipo = "No sitio")
sens_spec <- rbind(sens,spec)
diff_sens_spec <- abs(sens$y - spec$y)
opt_perc_position <- which.min(diff_sens_spec)
opt_perc <- sens[opt_perc_position, 2] # Sensibilidad-especificidad óptimos
opt_perc
## [1] 62.5
opt_cutoff <- sens[opt_perc_position, 1] # Probabilidad de corte óptima
opt_cutoff
## [1] 14.3261
ggplot() + 
  geom_line(data = sens_spec, aes(x, y, col = tipo), size = 1.5) + 
  labs(x = "Probabilidad de sitio predicha", y = "Porcentaje correcto de predicción") +
  geom_hline(yintercept = opt_perc, linetype = "dashed") +
  geom_vline(xintercept = opt_cutoff, linetype = "dashed")


LAL

LAL <- subset(arqueo, FUN == "LAL" | FUN == "NS")
LAL$sitio <- ifelse(LAL$FUN == "LAL", 1, 0)
m1LAL <- glm(sitio ~ srtm_wgs84 + slope + sin.aspect + cos.aspect + Geomorfo_LP,
           family = binomial, data = LAL)
m2LAL <- glm(sitio ~ srtm_wgs84 + slope + sin.aspect + cos.aspect + Geomorfo_LP + cst_dst_ag + cst_dst_afl + cst_dst_cant2,
           family = binomial, data = LAL)
m3LAL <- glm(sitio ~ srtm_wgs84 + slope + sin.aspect + cos.aspect + Geomorfo_LP + cst_dst_ag + cst_dst_afl + cst_dst_cant2 + cst_dst_AC + cst_dst_CC,
           family = binomial, data = LAL)
m4LAL <- glm(sitio ~ 1, family = binomial, data = LAL)

modelosLAL <- list(m1LAL, m2LAL, m3LAL, m4LAL)
aic.tableLAL <- model.sel(modelosLAL, rank = "AICc")
aic.tableLAL
## Model selection table 
##    (Int) cos.asp Gmr_LP sin.asp     slp    srt_w84
## 1 3.0030  0.1732      +  0.2073 -0.1419 -0.0013690
## 2 2.7880  0.1834      +  0.2229 -0.1278 -0.0008426
## 3 1.6900  0.1739      +  0.2433 -0.1326  0.0007035
## 4 0.1974                                          
##   cst_dst_afl cst_dst_ag cst_dst_cn2 cst_dst_AC cst_dst_CC
## 1                                                         
## 2   -0.001988  0.0002354   3.847e-05                      
## 3   -0.001440  0.0011300  -4.758e-05  -0.002106   0.000255
## 4                                                         
##   df   logLik  AICc delta weight
## 1  8 -145.072 306.8  0.00  0.863
## 2 11 -144.102 311.3  4.58  0.087
## 3 13 -142.441 312.5  5.71  0.050
## 4  1 -167.946 337.9 31.15  0.000
## Models ranked by AICc(x)
R2logit(LAL$sitio, m1LAL)
## [1] 0.1361973
m1zLAL <- glm(sitio ~ scale(srtm_wgs84) + scale(slope) + scale(sin.aspect) + scale(cos.aspect) + Geomorfo_LP,
           family = binomial, data = LAL)

summary(m1zLAL)
## 
## Call:
## glm(formula = sitio ~ scale(srtm_wgs84) + scale(slope) + scale(sin.aspect) + 
##     scale(cos.aspect) + Geomorfo_LP, family = binomial, data = LAL)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0717  -1.0886   0.6026   0.9165   1.9623  
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         1.0586     0.3093   3.423 0.000619 ***
## scale(srtm_wgs84)  -0.1602     0.1580  -1.014 0.310437    
## scale(slope)       -0.7770     0.1729  -4.493 7.01e-06 ***
## scale(sin.aspect)   0.1489     0.1409   1.057 0.290309    
## scale(cos.aspect)   0.1200     0.1447   0.830 0.406698    
## Geomorfo_LP3       -0.5716     0.6473  -0.883 0.377216    
## Geomorfo_LP4       -1.2073     0.3618  -3.337 0.000847 ***
## Geomorfo_LP5       -1.1762     0.8216  -1.432 0.152283    
## ---
## Signif. codes:  
## 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 335.89  on 243  degrees of freedom
## Residual deviance: 290.14  on 236  degrees of freedom
## AIC: 306.14
## 
## Number of Fisher Scoring iterations: 4
plot_model(m1zLAL, type="std")


Comparaciones múltiples

multLAL <- glht(m1zLAL, linfct = mcp(Geomorfo_LP = "Tukey"))
summary(multLAL)
## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: glm(formula = sitio ~ scale(srtm_wgs84) + scale(slope) + scale(sin.aspect) + 
##     scale(cos.aspect) + Geomorfo_LP, family = binomial, data = LAL)
## 
## Linear Hypotheses:
##            Estimate Std. Error z value Pr(>|z|)   
## 3 - 1 == 0 -0.57158    0.64729  -0.883  0.79794   
## 4 - 1 == 0 -1.20733    0.36181  -3.337  0.00414 **
## 5 - 1 == 0 -1.17618    0.82163  -1.432  0.45371   
## 4 - 3 == 0 -0.63575    0.61091  -1.041  0.70509   
## 5 - 3 == 0 -0.60460    0.96171  -0.629  0.91527   
## 5 - 4 == 0  0.03115    0.74840   0.042  0.99997   
## ---
## Signif. codes:  
## 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)


Predicciones para las orientaciones

new <- data.frame(ori = c("N","S","E","O","NE","NO","SE","SO"), 
                  sin.aspect = scale(sin(c(90,270,0,180,45,135,225,315))),
                  cos.aspect = scale(cos(c(90,270,0,180,45,135,225,315))),
                  srtm_wgs84 = 0, slope = 0, Geomorfo_LP = as.factor(1))
data.frame(orientacion = new$ori, prediccion = predict(m1zLAL, newdata = new, type = "response"))
##   orientacion prediccion
## 1           N  0.9566764
## 2           S  0.9572371
## 3           E  0.9594197
## 4           O  0.9287990
## 5          NE  0.9645207
## 6          NO  0.9391571
## 7          SE  0.9398932
## 8          SO  0.9645668
pred <- prediction(predictions = predict(m1zLAL, type = "response"),
               labels = LAL$sitio)
sens <- data.frame(x = 100*unlist(performance(pred, "sens")@x.values), 
                   y = 100*unlist(performance(pred, "sens")@y.values),
                   tipo = "Sitio")
spec <- data.frame(x = 100*unlist(performance(pred, "spec")@x.values), 
                   y = 100*unlist(performance(pred, "spec")@y.values),
                   tipo = "No sitio")
sens_spec <- rbind(sens,spec)
diff_sens_spec <- abs(sens$y - spec$y)
opt_perc_position <- which.min(diff_sens_spec)
opt_perc <- sens[opt_perc_position, 2] # Sensibilidad-especificidad óptimos
opt_perc
## [1] 64.92537
opt_cutoff <- sens[opt_perc_position, 1] # Probabilidad de corte óptima
opt_cutoff
## [1] 57.47858
ggplot() + 
  geom_line(data = sens_spec, aes(x, y, col = tipo), size = 1.5) + 
  labs(x = "Probabilidad de sitio predicha", y = "Porcentaje correcto de predicción") +
  geom_hline(yintercept = opt_perc, linetype = "dashed") +
  geom_vline(xintercept = opt_cutoff, linetype = "dashed")


LAM

LAM <- subset(arqueo, FUN == "LAM" | FUN == "NS")
LAM <- subset(LAM, Geomorfo_LP!="5")
LAM$sitio <- ifelse(LAM$FUN == "LAM", 1, 0)
m1LAM <- glm(sitio ~ srtm_wgs84 + slope + sin.aspect + cos.aspect + Geomorfo_LP,
           family = binomial, data = LAM)
m2LAM <- glm(sitio ~ srtm_wgs84 + slope + sin.aspect + cos.aspect + Geomorfo_LP + hill_verano,
           family = binomial, data = LAM)
m3LAM <- glm(sitio ~ srtm_wgs84 + slope + sin.aspect + cos.aspect + Geomorfo_LP + hill_verano + cst_dst_ag + cst_dst_cant2,
           family = binomial, data = LAM)
m4LAM <- glm(sitio ~ srtm_wgs84 + slope + sin.aspect + cos.aspect + Geomorfo_LP + hill_verano + cst_dst_ag + cst_dst_cant2 + Cuenca_Visual,
           family = binomial, data = LAM)
m5LAM <- glm(sitio ~ srtm_wgs84 + slope + sin.aspect + cos.aspect + Geomorfo_LP + hill_verano + cst_dst_ag + cst_dst_cant2 + Cuenca_Visual + cst_dst_CC + cst_dst_Chenque,
           family = binomial, data = LAM)
m6LAM <- glm(sitio ~ 1, family = binomial, data = LAM)

modelosLAM <- list(m1LAM, m2LAM, m3LAM, m4LAM, m5LAM, m6LAM)
aic.tableLAM <- model.sel(modelosLAM, rank = "AICc")
aic.tableLAM
## Model selection table 
##     (Int) cos.asp Gmr_LP sin.asp      slp   srt_w84
## 3 -0.7329 -0.8193      +  0.5040 -0.02958 -0.001317
## 4 -0.7598 -0.8180      +  0.5121 -0.02839 -0.001368
## 6 -1.7820                                          
## 5 -0.3153 -0.8301      +  0.5233 -0.03343 -0.001830
## 1  0.9237 -0.4329      +  0.2559 -0.11330 -0.002112
## 2  0.7901 -0.4345      +  0.2555 -0.11280 -0.002104
##     hll_vrn cst_dst_ag cst_dst_cn2    Cnc_Vsl cst_dst_CC
## 3 0.0091090   -0.01627   0.0001630                      
## 4 0.0096440   -0.01639   0.0001738 -1.806e-06           
## 6                                                       
## 5 0.0096170   -0.01644   0.0002927 -6.506e-07 -0.0004001
## 1                                                       
## 2 0.0006245                                             
##   cst_dst_Chn df  logLik  AICc delta weight
## 3             10 -39.161 100.3  0.00  0.701
## 4             11 -39.155 102.6  2.40  0.212
## 6              1 -51.520 105.1  4.82  0.063
## 5  -0.0003187 13 -39.123 107.5  7.27  0.018
## 1              7 -47.767 110.5 10.24  0.004
## 2              8 -47.766 112.8 12.52  0.001
## Models ranked by AICc(x)
R2logit(LAM$sitio, m3LAM)
## [1] 0.2398921
m3zLAM <- glm(sitio ~ scale(srtm_wgs84) + scale(slope) + scale(sin.aspect) + 
scale(cos.aspect) + Geomorfo_LP + scale(hill_verano) + scale(cst_dst_ag) + 
scale(cst_dst_cant2),
           family = binomial, data = LAM)
summary(m3zLAM)
## 
## Call:
## glm(formula = sitio ~ scale(srtm_wgs84) + scale(slope) + scale(sin.aspect) + 
##     scale(cos.aspect) + Geomorfo_LP + scale(hill_verano) + scale(cst_dst_ag) + 
##     scale(cst_dst_cant2), family = binomial, data = LAM)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.39959  -0.53617  -0.27201  -0.05527   2.58782  
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)
## (Intercept)           -3.0257     0.8453  -3.579 0.000344
## scale(srtm_wgs84)     -0.1362     0.3498  -0.389 0.696961
## scale(slope)          -0.1855     0.4409  -0.421 0.674002
## scale(sin.aspect)      0.3685     0.2846   1.295 0.195393
## scale(cos.aspect)     -0.5595     0.3804  -1.471 0.141316
## Geomorfo_LP3           0.6030     1.5524   0.388 0.697668
## Geomorfo_LP4          -0.1511     0.7456  -0.203 0.839453
## scale(hill_verano)     0.1914     0.4290   0.446 0.655504
## scale(cst_dst_ag)     -2.7795     0.8575  -3.242 0.001189
## scale(cst_dst_cant2)   0.0378     0.4411   0.086 0.931718
##                         
## (Intercept)          ***
## scale(srtm_wgs84)       
## scale(slope)            
## scale(sin.aspect)       
## scale(cos.aspect)       
## Geomorfo_LP3            
## Geomorfo_LP4            
## scale(hill_verano)      
## scale(cst_dst_ag)    ** 
## scale(cst_dst_cant2)    
## ---
## Signif. codes:  
## 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 103.040  on 124  degrees of freedom
## Residual deviance:  78.321  on 115  degrees of freedom
## AIC: 98.321
## 
## Number of Fisher Scoring iterations: 7
plot_model(m3LAM, type="std")

Comparaciones múltiples

multLAM <- glht(m3LAM, linfct = mcp(Geomorfo_LP = "Tukey"))
summary(multLAM)
## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: glm(formula = sitio ~ srtm_wgs84 + slope + sin.aspect + cos.aspect + 
##     Geomorfo_LP + hill_verano + cst_dst_ag + cst_dst_cant2, family = binomial, 
##     data = LAM)
## 
## Linear Hypotheses:
##            Estimate Std. Error z value Pr(>|z|)
## 3 - 1 == 0   0.6030     1.5524   0.388    0.917
## 4 - 1 == 0  -0.1511     0.7456  -0.203    0.977
## 4 - 3 == 0  -0.7541     1.4736  -0.512    0.860
## (Adjusted p values reported -- single-step method)


Predicciones para las orientaciones

new <- data.frame(ori = c("N","S","E","O","NE","NO","SE","SO"), 
                  sin.aspect = scale(sin(c(90,270,0,180,45,135,225,315))),
                  cos.aspect = scale(cos(c(90,270,0,180,45,135,225,315))),
                  srtm_wgs84 = 0, slope = 0, Geomorfo_LP = as.factor(1),
                  hill_verano = 0, cst_dst_ag = 0, cst_dst_cant2 = 0)
data.frame(orientacion = new$ori, prediccion = predict(m3zLAM, newdata = new, type = "response"))
##   orientacion prediccion
## 1           N  0.6271657
## 2           S  0.1455427
## 3           E  0.1594339
## 4           O  0.3738977
## 5          NE  0.3651247
## 6          NO  0.6314496
## 7          SE  0.1622957
## 8          SO  0.3144156


pred <- prediction(predictions = predict(m3zLAM, type = "response"),
               labels = LAM$sitio)
sens <- data.frame(x = 100*unlist(performance(pred, "sens")@x.values), 
                   y = 100*unlist(performance(pred, "sens")@y.values),
                   tipo = "Sitio")
spec <- data.frame(x = 100*unlist(performance(pred, "spec")@x.values), 
                   y = 100*unlist(performance(pred, "spec")@y.values),
                   tipo = "No sitio")
sens_spec <- rbind(sens,spec)
diff_sens_spec <- abs(sens$y - spec$y)
opt_perc_position <- which.min(diff_sens_spec)
opt_perc <- sens[opt_perc_position, 2] # Sensibilidad-especificidad óptimos
opt_perc
## [1] 77.77778
opt_cutoff <- sens[opt_perc_position, 1] # Probabilidad de corte óptima
opt_cutoff
## [1] 19.13566
ggplot() + 
  geom_line(data = sens_spec, aes(x, y, col = tipo), size = 1.5) + 
  labs(x = "Probabilidad de sitio predicha", y = "Porcentaje correcto de predicción") +
  geom_hline(yintercept = opt_perc, linetype = "dashed") +
  geom_vline(xintercept = opt_cutoff, linetype = "dashed")


T

T <- subset(arqueo, FUN == "T" | FUN == "NS")
T <- subset(T, Geomorfo_LP != "3")
T <- subset(T, Geomorfo_LP != "5")
T$sitio <- ifelse(T$FUN == "T", 1, 0)
m1T <- glm(sitio ~ srtm_wgs84 + slope + sin.aspect + cos.aspect,
           family = binomial, data = T)
m2T <- glm(sitio ~ srtm_wgs84 + slope + sin.aspect + cos.aspect + hill_verano,
           family = binomial, data = T)
m3T <- glm(sitio ~ srtm_wgs84 + slope + sin.aspect + cos.aspect + hill_verano + cst_dst_cant2,
           family = binomial, data = T)
m4T <- glm(sitio ~ srtm_wgs84 + slope + sin.aspect + cos.aspect + hill_verano + cst_dst_cant2 + Cuenca_Visual,
           family = binomial, data = T)
m5T <- glm(sitio ~ 1, family = binomial, data = T)

modelosT <- list(m1T, m2T, m3T, m4T, m5T)
aic.tableT <- model.sel(modelosT, rank = "AICc")
aic.tableT
## Model selection table 
##      (Int) cos.asp  sin.asp     slp srt_w84 hll_vrn
## 4 -114.700  10.990 -13.9300 -1.4240 0.05939  0.2987
## 2  -39.770   3.432  -1.3050 -0.3867 0.01243  0.1418
## 1  -11.070   2.683  -0.7931 -0.1692 0.01127        
## 3  -41.510   3.532  -1.3660 -0.4050 0.01194  0.1512
## 5   -2.996                                         
##   cst_dst_cn2   Cnc_Vsl df  logLik AICc delta weight
## 4  -0.0108700 0.0004247  8  -4.146 25.8  0.00  0.969
## 2                        6 -10.570 34.0  8.20  0.016
## 1                        5 -12.206 35.0  9.23  0.010
## 3   0.0008766            7 -10.542 36.2 10.45  0.005
## 5                        1 -20.102 42.2 16.45  0.000
## Models ranked by AICc(x)
m4zT <- glm(sitio ~ scale(srtm_wgs84) + scale(slope) + scale(cos.aspect) +
scale(hill_verano) + scale(cst_dst_cant2) + scale(Cuenca_Visual),
           family = binomial, data = T)

R2logit(T$sitio, m4zT)
## [1] 0.5762492
summary(m4zT)
## 
## Call:
## glm(formula = sitio ~ scale(srtm_wgs84) + scale(slope) + scale(cos.aspect) + 
##     scale(hill_verano) + scale(cst_dst_cant2) + scale(Cuenca_Visual), 
##     family = binomial, data = T)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.0981  -0.1209  -0.0302  -0.0028   2.9695  
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)
## (Intercept)           -8.1931     3.0177  -2.715  0.00663
## scale(srtm_wgs84)      1.2263     0.8080   1.518  0.12907
## scale(slope)          -3.0939     2.0388  -1.518  0.12914
## scale(cos.aspect)      2.7621     1.4780   1.869  0.06164
## scale(hill_verano)     2.0402     2.2685   0.899  0.36846
## scale(cst_dst_cant2)  -0.1048     1.1031  -0.095  0.92430
## scale(Cuenca_Visual)   1.3541     0.6784   1.996  0.04593
##                        
## (Intercept)          **
## scale(srtm_wgs84)      
## scale(slope)           
## scale(cos.aspect)    . 
## scale(hill_verano)     
## scale(cst_dst_cant2)   
## scale(Cuenca_Visual) * 
## ---
## Signif. codes:  
## 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 40.203  on 104  degrees of freedom
## Residual deviance: 17.036  on  98  degrees of freedom
## AIC: 31.036
## 
## Number of Fisher Scoring iterations: 9
plot_model(m4zT, type="std")


Predicciones para las orientaciones

new <- data.frame(ori = c("N","S","E","O","NE","NO","SE","SO"), 
                  sin.aspect = scale(sin(c(90,270,0,180,45,135,225,315))),
                  cos.aspect = scale(cos(c(90,270,0,180,45,135,225,315))),
                  srtm_wgs84 = 0, slope = 0,
                  hill_verano = 0, Cuenca_Visual = 0, cst_dst_cant2 = 0)
data.frame(orientacion = new$ori, prediccion = predict(m4zT, newdata = new, type = "link"))
##   orientacion prediccion
## 1           N  -34.09114
## 2           S  -26.61105
## 3           E  -26.52950
## 4           O  -34.87644
## 5          NE  -29.00820
## 6          NO  -36.95280
## 7          SE  -29.83327
## 8          SO  -28.26882


pred <- prediction(predictions = predict(m4zT, type = "response"),
               labels = T$sitio)
sens <- data.frame(x = 100*unlist(performance(pred, "sens")@x.values), 
                   y = 100*unlist(performance(pred, "sens")@y.values),
                   tipo = "Sitio")
spec <- data.frame(x = 100*unlist(performance(pred, "spec")@x.values), 
                   y = 100*unlist(performance(pred, "spec")@y.values),
                   tipo = "No sitio")
sens_spec <- rbind(sens,spec)
diff_sens_spec <- abs(sens$y - spec$y)
opt_perc_position <- which.min(diff_sens_spec)
opt_perc <- sens[opt_perc_position, 2] # Sensibilidad-especificidad óptimos
opt_perc
## [1] 80
opt_cutoff <- sens[opt_perc_position, 1] # Probabilidad de corte óptima
opt_cutoff
## [1] 1.237864
ggplot() + 
  geom_line(data = sens_spec, aes(x, y, col = tipo), size = 1.5) + 
  labs(x = "Probabilidad de sitio predicha", y = "Porcentaje correcto de predicción") +
  geom_hline(yintercept = opt_perc, linetype = "dashed") +
  geom_vline(xintercept = opt_cutoff, linetype = "dashed")


TO

TO <- subset(arqueo, FUN == "TO" | FUN == "NS")
TO$sitio <- ifelse(TO$FUN == "TO", 1, 0)
m1TO <- glm(sitio ~ srtm_wgs84 + slope + sin.aspect + cos.aspect + Geomorfo_LP,
           family = binomial, data = TO)
m2TO <- glm(sitio ~ srtm_wgs84 + slope + sin.aspect + cos.aspect + Geomorfo_LP + cst_dst_afl,
           family = binomial, data = TO)
m3TO <- glm(sitio ~ 1, family = binomial, data = TO)

modelosTO <- list(m1TO, m2TO, m3TO)
aic.tableTO <- model.sel(modelosTO, rank = "AICc")
aic.tableTO
## Model selection table 
##    (Int) cos.asp Gmr_LP  sin.asp      slp    srt_w84
## 1 2.5120  0.3349      + -0.02340 -0.04248 -0.0016420
## 2 2.1480  0.3146      + -0.03183 -0.02648 -0.0009758
## 3 0.4412                                            
##   cst_dst_afl df   logLik  AICc delta weight
## 1              8 -178.487 373.5  0.00  0.484
## 2   -0.001611  9 -177.450 373.6  0.06  0.470
## 3              1 -188.100 378.2  4.71  0.046
## Models ranked by AICc(x)
R2logit(TO$sitio, m1TO)
## [1] 0.05110843
m1zTO <- glm(sitio ~ scale(srtm_wgs84) + scale(slope) + scale(sin.aspect) + scale(cos.aspect) + Geomorfo_LP,
           family = binomial, data = TO)

summary(m1zTO)
## 
## Call:
## glm(formula = sitio ~ scale(srtm_wgs84) + scale(slope) + scale(sin.aspect) + 
##     scale(cos.aspect) + Geomorfo_LP, family = binomial, data = TO)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.7756  -1.1761   0.7445   1.0000   1.6684  
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        1.05868    0.29446   3.595 0.000324 ***
## scale(srtm_wgs84) -0.16541    0.13847  -1.195 0.232251    
## scale(slope)      -0.26363    0.13404  -1.967 0.049198 *  
## scale(sin.aspect) -0.01654    0.12809  -0.129 0.897268    
## scale(cos.aspect)  0.23744    0.12844   1.849 0.064517 .  
## Geomorfo_LP3      -0.13582    0.54839  -0.248 0.804387    
## Geomorfo_LP4      -0.86081    0.33373  -2.579 0.009899 ** 
## Geomorfo_LP5      -0.26096    0.75315  -0.346 0.728971    
## ---
## Signif. codes:  
## 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 376.20  on 280  degrees of freedom
## Residual deviance: 356.97  on 273  degrees of freedom
## AIC: 372.97
## 
## Number of Fisher Scoring iterations: 4
plot_model(m1zTO, type="std")


Comparaciones múltiples

multTO <- glht(m1TO, linfct = mcp(Geomorfo_LP = "Tukey"))
summary(multTO)
## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: glm(formula = sitio ~ srtm_wgs84 + slope + sin.aspect + cos.aspect + 
##     Geomorfo_LP, family = binomial, data = TO)
## 
## Linear Hypotheses:
##            Estimate Std. Error z value Pr(>|z|)  
## 3 - 1 == 0  -0.1358     0.5484  -0.248   0.9941  
## 4 - 1 == 0  -0.8608     0.3337  -2.579   0.0438 *
## 5 - 1 == 0  -0.2610     0.7532  -0.346   0.9843  
## 4 - 3 == 0  -0.7250     0.4992  -1.452   0.4430  
## 5 - 3 == 0  -0.1251     0.8451  -0.148   0.9987  
## 5 - 4 == 0   0.5998     0.6999   0.857   0.8134  
## ---
## Signif. codes:  
## 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)

 

Predicciones para las orientaciones

new <- data.frame(ori = c("N","S","E","O","NE","NO","SE","SO"), 
                  sin.aspect = scale(sin(c(90,270,0,180,45,135,225,315))),
                  cos.aspect = scale(cos(c(90,270,0,180,45,135,225,315))),
                  srtm_wgs84 = 0, slope = 0, Geomorfo_LP = as.factor(1))
data.frame(orientacion = new$ori, prediccion = predict(m1zTO, newdata = new, type = "response"))
##   orientacion prediccion
## 1           N  0.9008640
## 2           S  0.9463025
## 3           E  0.9463568
## 4           O  0.8999406
## 5          NE  0.9330503
## 6          NO  0.8800911
## 7          SE  0.9323714
## 8          SO  0.9370259


pred <- prediction(predictions = predict(m1zTO, type = "response"),
               labels = TO$sitio)
sens <- data.frame(x = 100*unlist(performance(pred, "sens")@x.values), 
                   y = 100*unlist(performance(pred, "sens")@y.values),
                   tipo = "Sitio")
spec <- data.frame(x = 100*unlist(performance(pred, "spec")@x.values), 
                   y = 100*unlist(performance(pred, "spec")@y.values),
                   tipo = "No sitio")
sens_spec <- rbind(sens,spec)
diff_sens_spec <- abs(sens$y - spec$y)
opt_perc_position <- which.min(diff_sens_spec)
opt_perc <- sens[opt_perc_position, 2] # Sensibilidad-especificidad óptimos
opt_perc
## [1] 57.30994
opt_cutoff <- sens[opt_perc_position, 1] # Probabilidad de corte óptima
opt_cutoff
## [1] 61.3088
ggplot() + 
  geom_line(data = sens_spec, aes(x, y, col = tipo), size = 1.5) + 
  labs(x = "Probabilidad de sitio predicha", y = "Porcentaje correcto de predicción") +
  geom_hline(yintercept = opt_perc, linetype = "dashed") +
  geom_vline(xintercept = opt_cutoff, linetype = "dashed")