Load libraries

library(sf)
library(tmap)
library(spdep)
library(rgdal)
library(tidyverse)
library(tigris)
library(mapview)
library(GWmodel)    
library(regclass)
library(viridis)
library(grid)
library(RColorBrewer)
library(rgeoda)
library(sjPlot)
library(jtools)
library(dlookr)
library(terra)
library(ggpubr)
library(rstatix)

Set directory

getwd()

## [1] "C:/Users/ntybl/OneDrive/Documents/Semestre 8"

setwd("C:/Users/ntybl/OneDrive/Documents/Semestre 8/Datos")
getwd()

## [1] "C:/Users/ntybl/OneDrive/Documents/Semestre 8/Datos"

Importing non-spatial data and geospatial data (shapefile) Unir las bases de datos para poder crear los mapas

covid_shpf <-  readOGR(dsn = "C:\\Users\\ntybl\\Downloads\\spda_covid19 (3)\\spda_covid19\\shp_mx_mpios\\mx_mpios.shp")

## OGR data source with driver: ESRI Shapefile 
## Source: "C:\Users\ntybl\Downloads\spda_covid19 (3)\spda_covid19\shp_mx_mpios\mx_mpios.shp", layer: "mx_mpios"
## with 2456 features
## It has 3 fields
## Integer64 fields read as strings:  CODELAG

covid_df <- read.csv("C:\\Users\\ntybl\\OneDrive\\Documents\\Semestre 8\\Datos\\datosFinal.csv")
covid_j <- geo_join(covid_shpf,covid_df,'IDUNICO','IDUNICO',how='inner') ### Combining geospatial and non-spatial data

Limpieza de datos

summary(covid_shpf)

## Object of class SpatialPolygonsDataFrame
## Coordinates:
##         min       max
## x -118.4076 -86.71041
## y   14.5321  32.71865
## Is projected: FALSE 
## proj4string : [+proj=longlat +datum=WGS84 +no_defs]
## Data attributes:
##    CODELAG             CVE_ENT         IDUNICO     
##  Length:2456        Min.   : 1.00   Min.   : 1001  
##  Class :character   1st Qu.:14.00   1st Qu.:14084  
##  Mode  :character   Median :20.00   Median :20231  
##                     Mean   :19.26   Mean   :19367  
##                     3rd Qu.:24.00   3rd Qu.:24030  
##                     Max.   :32.00   Max.   :32058

summary(covid_df)

##     IDUNICO            n               mpio           poblacion_2022   
##  Min.   : 1001   Min.   :   1.00   Length:2460        Min.   :     95  
##  1st Qu.:14083   1st Qu.:   3.00   Class :character   1st Qu.:   4470  
##  Median :20231   Median :  11.00   Mode  :character   Median :  14137  
##  Mean   :19360   Mean   :  87.96                      Mean   :  52012  
##  3rd Qu.:24029   3rd Qu.:  41.00                      3rd Qu.:  37099  
##  Max.   :32058   Max.   :4993.00                      Max.   :1815551  
##                  NA's   :68                           NA's   :3        
##   hogrem2015        hogremjefmuj2015   popnoafmed2015       gini2015        
##  Length:2460        Length:2460        Length:2460        Length:2460       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##   popden2020         crimen_2018      crimen_2019     inclusion_fin_2019
##  Length:2460        Min.   :  0.00   Min.   :  0.00   Min.   : 0.0000   
##  Class :character   1st Qu.:  0.00   1st Qu.:  0.00   1st Qu.: 0.0000   
##  Mode  :character   Median :  9.75   Median : 11.50   Median : 0.0000   
##                     Mean   : 19.02   Mean   : 20.30   Mean   : 0.4876   
##                     3rd Qu.: 25.71   3rd Qu.: 26.95   3rd Qu.: 0.8500   
##                     Max.   :719.42   Max.   :551.82   Max.   :10.6800   
##                     NA's   :3        NA's   :3        NA's   :3         
##  porcentaje_pob_pobreza porcentaje_pob_pobreza_ext
##  Length:2460            Length:2460               
##  Class :character       Class :character          
##  Mode  :character       Mode  :character          
##                                                   
##                                                   
##                                                   
##                                                   
##  porcentaje_pob_servicios_salud porcentaje_pob_acceso_ss pob_6.14_no_edu 
##  Length:2460                    Length:2460              Min.   : 0.000  
##  Class :character               Class :character         1st Qu.: 4.170  
##  Mode  :character               Mode  :character         Median : 5.720  
##                                                          Mean   : 6.304  
##                                                          3rd Qu.: 7.810  
##                                                          Max.   :38.560  
##                                                          NA's   :3       
##  rezago_social         grado_rs            feb_2020          march_2020    
##  Min.   :-1.550000   Length:2460        Min.   :0.000000   Min.   :  0.00  
##  1st Qu.:-0.760000   Class :character   1st Qu.:0.000000   1st Qu.:  0.00  
##  Median :-0.220000   Mode  :character   Median :0.000000   Median :  0.00  
##  Mean   :-0.001486                      Mean   :0.003256   Mean   :  1.27  
##  3rd Qu.: 0.470000                      3rd Qu.:0.000000   3rd Qu.:  0.00  
##  Max.   : 6.830000                      Max.   :1.000000   Max.   :139.00  
##  NA's   :3                              NA's   :3          NA's   :3       
##    april_2020         may_2020         june_2020         july_2020      
##  Min.   :   0.00   Min.   :   0.00   Min.   :   0.00   Min.   :   0.00  
##  1st Qu.:   0.00   1st Qu.:   0.00   1st Qu.:   0.00   1st Qu.:   1.00  
##  Median :   0.00   Median :   1.00   Median :   4.00   Median :   5.00  
##  Mean   :  12.07   Mean   :  37.62   Mean   :  64.66   Mean   :  86.39  
##  3rd Qu.:   2.00   3rd Qu.:   8.00   3rd Qu.:  19.00   3rd Qu.:  27.00  
##  Max.   :1902.00   Max.   :4543.00   Max.   :5872.00   Max.   :6079.00  
##  NA's   :3         NA's   :3         NA's   :3         NA's   :3        
##   august_2020        sept_2020          oct_2020          nov_2020      
##  Min.   :   0.00   Min.   :   0.00   Min.   :   0.00   Min.   :   0.00  
##  1st Qu.:   1.00   1st Qu.:   0.00   1st Qu.:   0.00   1st Qu.:   0.00  
##  Median :   4.00   Median :   3.00   Median :   2.00   Median :   2.00  
##  Mean   :  69.52   Mean   :  60.02   Mean   :  70.06   Mean   :  83.85  
##  3rd Qu.:  22.00   3rd Qu.:  17.00   3rd Qu.:  14.00   3rd Qu.:  15.00  
##  Max.   :4295.00   Max.   :3877.00   Max.   :8984.00   Max.   :6798.00  
##  NA's   :3         NA's   :3         NA's   :3         NA's   :3        
##     dic_2020          jan_2021          feb_2021           mar_2021     
##  Min.   :    0.0   Min.   :    0.0   Min.   :    0.00   Min.   :   0.0  
##  1st Qu.:    0.0   1st Qu.:    1.0   1st Qu.:    1.00   1st Qu.:   0.0  
##  Median :    3.0   Median :    7.0   Median :    3.00   Median :   2.0  
##  Mean   :  134.3   Mean   :  172.7   Mean   :   79.46   Mean   :  58.3  
##  3rd Qu.:   20.0   3rd Qu.:   35.0   3rd Qu.:   17.00   3rd Qu.:  12.0  
##  Max.   :19628.0   Max.   :24992.0   Max.   :11834.00   Max.   :9918.0  
##  NA's   :3         NA's   :3         NA's   :3          NA's   :3       
##    mar_2021.1       april_2021         may_2021         june_2021     
##  Min.   :   0.0   Min.   :   0.00   Min.   :   0.00   Min.   :   0.0  
##  1st Qu.:   0.0   1st Qu.:   0.00   1st Qu.:   0.00   1st Qu.:   0.0  
##  Median :   2.0   Median :   1.00   Median :   1.00   Median :   1.0  
##  Mean   :  58.3   Mean   :  38.11   Mean   :  26.81   Mean   :  43.6  
##  3rd Qu.:  12.0   3rd Qu.:   9.00   3rd Qu.:   5.00   3rd Qu.:   5.0  
##  Max.   :9918.0   Max.   :5065.00   Max.   :3917.00   Max.   :6640.0  
##  NA's   :3        NA's   :3         NA's   :3         NA's   :3       
##    july_2021        august_2021        sept_2021         oct_2021      
##  Min.   :    0.0   Min.   :    0.0   Min.   :   0.0   Min.   :   0.00  
##  1st Qu.:    1.0   1st Qu.:    2.0   1st Qu.:   2.0   1st Qu.:   0.00  
##  Median :    5.0   Median :   13.0   Median :   8.0   Median :   3.00  
##  Mean   :  155.3   Mean   :  212.8   Mean   : 109.9   Mean   :  49.88  
##  3rd Qu.:   31.0   3rd Qu.:   73.0   3rd Qu.:  43.0   3rd Qu.:  16.00  
##  Max.   :20315.0   Max.   :17590.0   Max.   :7742.0   Max.   :3556.00  
##  NA's   :3         NA's   :3         NA's   :3        NA's   :3        
##     nov_2021          dic_2021          REGION            total_2021       
##  Min.   :   0.00   Min.   :   0.00   Length:2460        Min.   :     0.00  
##  1st Qu.:   0.00   1st Qu.:   0.00   Class :character   1st Qu.:    14.75  
##  Median :   1.00   Median :   1.00   Mode  :character   Median :    63.00  
##  Mean   :  33.14   Mean   :  48.76                      Mean   :  1085.73  
##  3rd Qu.:   6.00   3rd Qu.:   6.00                      3rd Qu.:   301.25  
##  Max.   :4854.00   Max.   :4708.00                      Max.   :115547.00  
##  NA's   :3         NA's   :3                                               
##   tasa_covid         total_casos      
##  Length:2460        Min.   :     0.0  
##  Class :character   1st Qu.:    22.0  
##  Mode  :character   Median :   102.5  
##                     Mean   :  1704.7  
##                     3rd Qu.:   454.5  
##                     Max.   :168500.0  
##

Convertir a numeric

covid_df$porcentaje_pob_acceso_ss <- as.numeric(covid_df$porcentaje_pob_acceso_ss)
covid_df$porcentaje_pob_pobreza <- as.numeric(covid_df$porcentaje_pob_pobreza)
covid_df$porcentaje_pob_servicios_salud <- as.numeric(covid_df$porcentaje_pob_servicios_salud)
covid_df$hogremjefmuj2015 <- as.numeric(covid_df$hogremjefmuj2015)
covid_df$tasa_covid <- as.numeric(covid_df$tasa_covid)
covid_df$hogrem2015 <- as.numeric(covid_df$hogrem2015)
covid_df$popnoafmed2015 <- as.numeric(covid_df$popnoafmed2015)
covid_df$gini2015 <- as.numeric(covid_df$gini2015)
covid_df$popden2020 <- as.numeric(covid_df$popden2020)
covid_df$porcentaje_pob_pobreza_ext <- as.numeric(covid_df$porcentaje_pob_pobreza_ext)

Convertir a factor

covid_df$REGION <- as.factor(covid_df$REGION)

Convertir NA a la mediana

covid_df <- covid_df %>% mutate(across(where(is.numeric), ~ replace_na(., median(., na.rm = TRUE))))

Cambiar 0 a 0.01

covid_df$tasa_covid <- replace(covid_df$tasa_covid, covid_df$tasa_covid == 0, 0.01)
covid_df$crimen_2019 <- replace(covid_df$crimen_2019, covid_df$crimen_2019 == 0, 0.01)

summary(covid_j)

## Object of class SpatialPolygonsDataFrame
## Coordinates:
##         min       max
## x -118.4076 -86.71041
## y   14.5321  32.71865
## Is projected: FALSE 
## proj4string : [+proj=longlat +datum=WGS84 +no_defs]
## Data attributes:
##    CODELAG             CVE_ENT         IDUNICO        IDUNICO.1    
##  Length:2456        Min.   : 1.00   Min.   : 1001   Min.   : 1001  
##  Class :character   1st Qu.:14.00   1st Qu.:14084   1st Qu.:14084  
##  Mode  :character   Median :20.00   Median :20231   Median :20231  
##                     Mean   :19.26   Mean   :19367   Mean   :19367  
##                     3rd Qu.:24.00   3rd Qu.:24030   3rd Qu.:24030  
##                     Max.   :32.00   Max.   :32058   Max.   :32058  
##                                                                    
##        n               mpio           poblacion_2022     hogrem2015       
##  Min.   :   1.00   Length:2456        Min.   :     95   Length:2456       
##  1st Qu.:   3.00   Class :character   1st Qu.:   4468   Class :character  
##  Median :  11.00   Mode  :character   Median :  14129   Mode  :character  
##  Mean   :  88.07                      Mean   :  52014                     
##  3rd Qu.:  41.00                      3rd Qu.:  37095                     
##  Max.   :4993.00                      Max.   :1815551                     
##  NA's   :68                                                               
##  hogremjefmuj2015   popnoafmed2015       gini2015          popden2020       
##  Length:2456        Length:2456        Length:2456        Length:2456       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##   crimen_2018       crimen_2019     inclusion_fin_2019 porcentaje_pob_pobreza
##  Min.   :  0.000   Min.   :  0.00   Min.   : 0.0000    Length:2456           
##  1st Qu.:  0.000   1st Qu.:  0.00   1st Qu.: 0.0000    Class :character      
##  Median :  9.745   Median : 11.49   Median : 0.0000    Mode  :character      
##  Mean   : 19.006   Mean   : 20.29   Mean   : 0.4869                          
##  3rd Qu.: 25.695   3rd Qu.: 26.91   3rd Qu.: 0.8425                          
##  Max.   :719.420   Max.   :551.82   Max.   :10.6800                          
##                                                                              
##  porcentaje_pob_pobreza_ext porcentaje_pob_servicios_salud
##  Length:2456                Length:2456                   
##  Class :character           Class :character              
##  Mode  :character           Mode  :character              
##                                                           
##                                                           
##                                                           
##                                                           
##  porcentaje_pob_acceso_ss pob_6.14_no_edu  rezago_social      
##  Length:2456              Min.   : 0.000   Min.   :-1.550000  
##  Class :character         1st Qu.: 4.170   1st Qu.:-0.760000  
##  Mode  :character         Median : 5.715   Median :-0.220000  
##                           Mean   : 6.304   Mean   :-0.001437  
##                           3rd Qu.: 7.810   3rd Qu.: 0.470000  
##                           Max.   :38.560   Max.   : 6.830000  
##                                                               
##    grado_rs            feb_2020          march_2020       april_2020     
##  Length:2456        Min.   :0.000000   Min.   :  0.00   Min.   :   0.00  
##  Class :character   1st Qu.:0.000000   1st Qu.:  0.00   1st Qu.:   0.00  
##  Mode  :character   Median :0.000000   Median :  0.00   Median :   0.00  
##                     Mean   :0.003257   Mean   :  1.27   Mean   :  12.07  
##                     3rd Qu.:0.000000   3rd Qu.:  0.00   3rd Qu.:   2.00  
##                     Max.   :1.000000   Max.   :139.00   Max.   :1902.00  
##                                                                          
##     may_2020         june_2020         july_2020        august_2020     
##  Min.   :   0.00   Min.   :   0.00   Min.   :   0.00   Min.   :   0.00  
##  1st Qu.:   0.00   1st Qu.:   0.00   1st Qu.:   1.00   1st Qu.:   1.00  
##  Median :   1.00   Median :   4.00   Median :   5.00   Median :   4.00  
##  Mean   :  37.63   Mean   :  64.68   Mean   :  86.39   Mean   :  69.51  
##  3rd Qu.:   8.00   3rd Qu.:  19.00   3rd Qu.:  27.00   3rd Qu.:  22.00  
##  Max.   :4543.00   Max.   :5872.00   Max.   :6079.00   Max.   :4295.00  
##                                                                         
##    sept_2020          oct_2020          nov_2020          dic_2020      
##  Min.   :   0.00   Min.   :   0.00   Min.   :   0.00   Min.   :    0.0  
##  1st Qu.:   0.00   1st Qu.:   0.00   1st Qu.:   0.00   1st Qu.:    0.0  
##  Median :   3.00   Median :   2.00   Median :   2.00   Median :    3.0  
##  Mean   :  60.02   Mean   :  70.09   Mean   :  83.88   Mean   :  134.3  
##  3rd Qu.:  17.00   3rd Qu.:  14.00   3rd Qu.:  15.00   3rd Qu.:   20.0  
##  Max.   :3877.00   Max.   :8984.00   Max.   :6798.00   Max.   :19628.0  
##                                                                         
##     jan_2021          feb_2021           mar_2021         mar_2021.1     
##  Min.   :    0.0   Min.   :    0.00   Min.   :   0.00   Min.   :   0.00  
##  1st Qu.:    1.0   1st Qu.:    1.00   1st Qu.:   0.00   1st Qu.:   0.00  
##  Median :    7.0   Median :    3.00   Median :   2.00   Median :   2.00  
##  Mean   :  172.8   Mean   :   79.48   Mean   :  58.32   Mean   :  58.32  
##  3rd Qu.:   35.0   3rd Qu.:   17.00   3rd Qu.:  12.00   3rd Qu.:  12.00  
##  Max.   :24992.0   Max.   :11834.00   Max.   :9918.00   Max.   :9918.00  
##                                                                          
##    april_2021         may_2021         june_2021        july_2021      
##  Min.   :   0.00   Min.   :   0.00   Min.   :   0.0   Min.   :    0.0  
##  1st Qu.:   0.00   1st Qu.:   0.00   1st Qu.:   0.0   1st Qu.:    1.0  
##  Median :   1.00   Median :   1.00   Median :   1.0   Median :    5.0  
##  Mean   :  38.12   Mean   :  26.81   Mean   :  43.6   Mean   :  155.4  
##  3rd Qu.:   9.00   3rd Qu.:   5.00   3rd Qu.:   5.0   3rd Qu.:   31.0  
##  Max.   :5065.00   Max.   :3917.00   Max.   :6640.0   Max.   :20315.0  
##                                                                        
##   august_2021        sept_2021         oct_2021          nov_2021      
##  Min.   :    0.0   Min.   :   0.0   Min.   :   0.00   Min.   :   0.00  
##  1st Qu.:    2.0   1st Qu.:   2.0   1st Qu.:   0.00   1st Qu.:   0.00  
##  Median :   13.0   Median :   8.0   Median :   3.00   Median :   1.00  
##  Mean   :  212.9   Mean   : 109.9   Mean   :  49.89   Mean   :  33.16  
##  3rd Qu.:   73.0   3rd Qu.:  43.0   3rd Qu.:  16.00   3rd Qu.:   6.00  
##  Max.   :17590.0   Max.   :7742.0   Max.   :3556.00   Max.   :4854.00  
##                                                                        
##     dic_2021          REGION            total_2021        tasa_covid       
##  Min.   :   0.00   Length:2456        Min.   :     0.0   Length:2456       
##  1st Qu.:   0.00   Class :character   1st Qu.:    15.0   Class :character  
##  Median :   1.00   Mode  :character   Median :    63.5   Mode  :character  
##  Mean   :  48.77                      Mean   :  1087.4                     
##  3rd Qu.:   6.00                      3rd Qu.:   301.2                     
##  Max.   :4708.00                      Max.   :115547.0                     
##                                                                            
##   total_casos      
##  Min.   :     0.0  
##  1st Qu.:    22.0  
##  Median :   103.0  
##  Mean   :  1707.3  
##  3rd Qu.:   454.5  
##  Max.   :168500.0  
##

Convertir a numeric

covid_j$hogrem2015 <- as.numeric(covid_j$hogrem2015)
covid_j$hogremjefmuj2015 <- as.numeric(covid_j$hogremjefmuj2015)
covid_j$popnoafmed2015 <- as.numeric(covid_j$popnoafmed2015)
covid_j$gini2015 <- as.numeric(covid_j$gini2015)
covid_j$popden2020 <- as.numeric(covid_j$popden2020)
covid_j$porcentaje_pob_pobreza <- as.numeric(covid_j$porcentaje_pob_pobreza)
covid_j$porcentaje_pob_pobreza_ext <- as.numeric(covid_j$porcentaje_pob_pobreza_ext)
covid_j$porcentaje_pob_servicios_salud <-  as.numeric(covid_j$porcentaje_pob_servicios_salud)
covid_j$porcentaje_pob_acceso_ss <- as.numeric(covid_j$porcentaje_pob_acceso_ss)
covid_j$tasa_covid <- as.numeric(covid_j$tasa_covid)

Cambiar 0 por 0.01

covid_j$crimen_2019[covid_j$crimen_2019 == 0 ] <- 0.01
covid_j$tasa_covid[covid_j$tasa_covid == 0 ] <- 0.01
covid_j$hogremjefmuj2015[covid_j$hogremjefmuj2015 == 0] <- 0.01
covid_j$REGION <- as.factor(covid_j$REGION)

Cambiar NAs por la mediana

covid_j$n[is.na(covid_j$n)] <- 11.00
covid_j$porcentaje_pob_servicios_salud[is.na(covid_j$porcentaje_pob_servicios_salud)] <- 23.22
covid_j$porcentaje_pob_acceso_ss[is.na(covid_j$porcentaje_pob_acceso_ss)] <- 76.46
covid_j$porcentaje_pob_pobreza[is.na(covid_j$porcentaje_pob_pobreza)] <- 62.60
covid_j$porcentaje_pob_pobreza_ext[is.na(covid_j$porcentaje_pob_pobreza_ext)] <- 12.48

Objetivos

Predecir los factores asociados con el incremento / disminución de casos confirmados de COVID-19
Visualizar y analizar clusters en nuestras variables de interés
Estimación regresión espacial global y local
Comparar los resultados de las diferentes regresiones que se realizaran

Visualización de las variables

visualización de casos confirmados de covid 19 y factores socioeconómicos

qtm(covid_j, "tasa_covid")

En el mapa anterior podemos visualizar los casos de COVID confirmados en una taza de cada 10 por el total de casos, podemos notar concentraciones en 3 áreas especificas estas siendo Baja California sur, Tabasco, Nuevo León y una alta concentración en el Estado de México.

qtm(covid_j, "rezago_social")

## Variable(s) "rezago_social" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

#mapview(covid_j, zcol="rezago_social")

En el mapa anterior se pueden visualizar clusters podemos visualizar un alto grado de rezago social en la región norte del país a lo largo de los estados de Sonora, Chihuahua, Sinaloa, Durango y Nayarit.

En cuanto al centro y sur del país podemos visualizar concentraciones de rezago social a lo largo de estas regiones.

H0: La distribución del rezago social por estado se distribuye aleatoriamente. H1: La distribución del rezago social por estado no se distribuye aleatoriamente, hay conglomerados (autocorrelación positiva) o valores atípicos (autocorrelación negativa)

Modelado de vecinos espaciales y matrices de conectividad espacial

swm_queen2 <- poly2nb(covid_shpf, queen = TRUE)
summary(swm_queen2)

## Neighbour list object:
## Number of regions: 2456 
## Number of nonzero links: 14392 
## Percentage nonzero weights: 0.2385967 
## Average number of links: 5.859935 
## Link number distribution:
## 
##   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  19  20  22 
##   8  63 202 393 515 467 317 233 143  52  27  15   5   4   3   3   1   1   1   3 
## 8 least connected regions:
## 299 354 667 858 930 1243 1461 2234 with 1 link
## 3 most connected regions:
## 966 1058 1173 with 22 links

Resumen de la matriz de peso espacial Hay 2,456 municipios en México La unidad de área más conectada tiene 22 vecinos La unidad del área menos conectada tiene 1 vecino

swm_rook2 <- poly2nb(covid_shpf, queen = FALSE)
summary(swm_rook2)

## Neighbour list object:
## Number of regions: 2456 
## Number of nonzero links: 14176 
## Percentage nonzero weights: 0.2350158 
## Average number of links: 5.771987 
## Link number distribution:
## 
##   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  19  20  21 
##   8  68 217 416 507 461 321 230 119  52  23  13   6   4   4   1   1   1   2   1 
##  22 
##   1 
## 8 least connected regions:
## 299 354 667 858 930 1243 1461 2234 with 1 link
## 1 most connected region:
## 1058 with 22 links

Trazado de mapas de vecinos basados en la contigüidad de queen

plot(covid_shpf, borders = 'lightgrey') 
plot(swm_queen2, coordinates(covid_shpf), pch = 19, cex = 0.6, add = TRUE, col = "red")
title(main = "Contigüidad Queen", cex.main = 0.9)

Trazado de mapas de vecinos basados en la contigüidad de rook

plot(covid_shpf, borders = 'lightgrey') 
plot(swm_rook2, coordinates(covid_shpf), pch = 19, cex = 0.6, add = TRUE, col = "red")
title(main = "Contigüidad Rook", cex.main = 0.9)

Cálculo de vecinos basados en distancia

coords2 <- coordinates(covid_shpf)
head(coords2)

##        [,1]     [,2]
## 0 -99.20622 19.19834
## 1 -99.09036 19.24515
## 2 -99.26841 19.26898
## 3 -99.18211 19.48533
## 4 -99.16113 19.38064
## 5 -99.14906 19.43137

knn2 <- knn2nb(knearneigh(coords2))
knn1_dist2 <- unlist(nbdists(knn2, coords2, longlat = TRUE))
summary(knn1_dist2)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.558   7.387  11.413  15.811  19.110 229.364

En promedio, ¿cada municipio es el más cercano en distancia a otras 122 áreas de puntos?

dwm2 <- dnearneigh(coords2, 0 ,98, longlat = TRUE)
dwm2

## Neighbour list object:
## Number of regions: 2456 
## Number of nonzero links: 298802 
## Percentage nonzero weights: 4.953667 
## Average number of links: 121.6621 
## 5 regions with no links:
## 348 352 353 355 2448

rswm_queen2 <- nb2listw(swm_queen2, style = "W", zero.policy = TRUE)
rswm_queen2

## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 2456 
## Number of nonzero links: 14392 
## Percentage nonzero weights: 0.2385967 
## Average number of links: 5.859935 
## 
## Weights style: W 
## Weights constants summary:
##      n      nn   S0       S1       S2
## W 2456 6031936 2456 898.7549 10338.48

Vamos a crear un spatial lag de la variable del conjunto de datos.

covid_j$sp_rezago_social<- lag.listw(rswm_queen2, covid_j$rezago_social, zero.policy = TRUE)

qtm(covid_j, "rezago_social" )

## Variable(s) "rezago_social" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

qtm(covid_j, "sp_rezago_social")

## Variable(s) "sp_rezago_social" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

En los mapas anteriores se comparar el rezago social en México y el spatial lag de rezago social, podemos notar una mayor concentración en el mapa de lag tanto en negativo como en positivo a comparación del mapa sin lag, la principal observación seria que el mapa de spatial lag muestra una alta concentración entre los valores de -1 a 0. también podemos notar más valores positivos entre 1 y 2 a comparación del mapa de rezago.

Moran’s I Test

Identificar y medir la autocorrelación espacial

moran.test(covid_j$rezago_social, listw = rswm_queen2, zero.policy = TRUE, na.action = na.omit)

## 
##  Moran I test under randomisation
## 
## data:  covid_j$rezago_social  
## weights: rswm_queen2    
## 
## Moran I statistic standard deviate = 56.494, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      0.6879873175     -0.0004073320      0.0001484831

En conclusión, los resultados sugieren que existe una autocorrelación espacial positiva significativa en la variable analizada, lo que indica que las ubicaciones vecinas tienden a tener valores similares.

Computar el correlograma I de moran

Moran_cor <- sp.correlogram(swm_queen2, covid_j$rezago_social, order = 6, method = "I", style = "B")
plot(Moran_cor)

En general, el correlograma indica que existe una autocorrelación espacial positiva en la variable que se analiza, con observaciones vecinas que tienen valores similares. La fuerza de la autocorrelación espacial se debilita gradualmente a medida que aumenta la distancia entre las observaciones, y el efecto de las observaciones vecinas disminuye más allá de cierta distancia.

Exploremos el análisis de datos espaciales y modelemos las relaciones espaciales locales

A continuación, haremos un normality plot para analizar la distribución de las variables y concluir si estas necesitan ser alteradas

plot_normality(covid_df, hogremjefmuj2015, crimen_2019, porcentaje_pob_pobreza, porcentaje_pob_servicios_salud, porcentaje_pob_acceso_ss, rezago_social, porcentaje_pob_pobreza, tasa_covid)

En base a los resultados usaremos log en las variables: tasa_covid (variable dependiente) y crimen_2019.

Regresion no espacial

non_spatial_model2 = lm(log(tasa_covid) ~ hogremjefmuj2015 + log(crimen_2019) + porcentaje_pob_servicios_salud + porcentaje_pob_acceso_ss + rezago_social + porcentaje_pob_pobreza + REGION, data = covid_df) 

summary(non_spatial_model2)

## 
## Call:
## lm(formula = log(tasa_covid) ~ hogremjefmuj2015 + log(crimen_2019) + 
##     porcentaje_pob_servicios_salud + porcentaje_pob_acceso_ss + 
##     rezago_social + porcentaje_pob_pobreza + REGION, data = covid_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.7896 -0.4613  0.1233  0.7076  3.5079 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     5.7172576  0.2855323  20.023  < 2e-16 ***
## hogremjefmuj2015                0.0145767  0.0050986   2.859  0.00429 ** 
## log(crimen_2019)                0.0650888  0.0086855   7.494 9.28e-14 ***
## porcentaje_pob_servicios_salud  0.0001964  0.0026083   0.075  0.93997    
## porcentaje_pob_acceso_ss       -0.0025806  0.0029666  -0.870  0.38445    
## rezago_social                  -0.3086272  0.0513128  -6.015 2.07e-09 ***
## porcentaje_pob_pobreza         -0.0304026  0.0030041 -10.120  < 2e-16 ***
## REGIONCENTRO NORTE             -0.5194208  0.0962660  -5.396 7.48e-08 ***
## REGIONNORTE                    -0.3463307  0.1285403  -2.694  0.00710 ** 
## REGIONSUR                      -0.3204493  0.0818138  -3.917 9.22e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.441 on 2450 degrees of freedom
## Multiple R-squared:  0.3482, Adjusted R-squared:  0.3458 
## F-statistic: 145.4 on 9 and 2450 DF,  p-value: < 2.2e-16

VIF(non_spatial_model2)

##                                    GVIF Df GVIF^(1/(2*Df))
## hogremjefmuj2015               1.140313  1        1.067855
## log(crimen_2019)               1.165702  1        1.079677
## porcentaje_pob_servicios_salud 1.248341  1        1.117292
## porcentaje_pob_acceso_ss       2.283268  1        1.511049
## rezago_social                  3.121308  1        1.766722
## porcentaje_pob_pobreza         5.116764  1        2.262026
## REGION                         2.496127  3        1.164692

Interpretacion:

El intercepto tiene un valor estimado de 5,717, que representa el valor esperado de la variable dependiente cuando todas las variables independientes son iguales a cero.
El valor R-cuadrado de 0.3482 sugiere que aproximadamente el 34.82 % de la varianza de la variable dependiente puede explicarse por las variables independientes.
El F-statistic de 145.4 con un p-value de < 2.2e-16 sugiere que el modelo en su conjunto es estadísticamente significativo.
Los residuos tienen un valor mínimo de -8.7896 y un valor máximo de 3.5079, con una media de aproximadamente cero, lo que indica que la suposición de errores normalmente distribuidos es razonable.

-La varible porcentaje_pob_pobreza muestra un VIF de 5.116764 esta muestra multicollinearity por lo cual será removida del modelo.

AIC(non_spatial_model2)

## [1] 8790.453

Global Spatial Regression Analysis

Convertir a sp

covid.sp = as(covid_shpf, "Spatial")

spatial_auto <- lagsarlm(log(tasa_covid) ~ hogremjefmuj2015 + log(crimen_2019) + porcentaje_pob_servicios_salud + porcentaje_pob_acceso_ss + rezago_social + porcentaje_pob_pobreza + REGION, data = covid_j, listw = rswm_queen2, Durbin = FALSE)
summary(spatial_auto)

## 
## Call:
## lagsarlm(formula = log(tasa_covid) ~ hogremjefmuj2015 + log(crimen_2019) + 
##     porcentaje_pob_servicios_salud + porcentaje_pob_acceso_ss + 
##     rezago_social + porcentaje_pob_pobreza + REGION, data = covid_j, 
##     listw = rswm_queen2, Durbin = FALSE)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -8.69596 -0.39978  0.11408  0.61356  3.93984 
## 
## Type: lag 
## Coefficients: (asymptotic standard errors) 
##                                   Estimate  Std. Error z value  Pr(>|z|)
## (Intercept)                     3.71278485  0.30459866 12.1891 < 2.2e-16
## hogremjefmuj2015                0.01482141  0.00484015  3.0622  0.002197
## log(crimen_2019)                0.05549374  0.00827233  6.7084 1.968e-11
## porcentaje_pob_servicios_salud  0.00018247  0.00248020  0.0736  0.941352
## porcentaje_pob_acceso_ss       -0.00212963  0.00281814 -0.7557  0.449837
## rezago_social                  -0.24798032  0.04909005 -5.0515 4.383e-07
## porcentaje_pob_pobreza         -0.02253814  0.00291559 -7.7302 1.066e-14
## REGIONCENTRO NORTE             -0.41501515  0.09193795 -4.5141 6.359e-06
## REGIONNORTE                    -0.30893457  0.12204657 -2.5313  0.011365
## REGIONSUR                      -0.15084423  0.07913821 -1.9061  0.056639
## 
## Rho: 0.35781, LR test value: 191.11, p-value: < 2.22e-16
## Asymptotic standard error: 0.0254
##     z-value: 14.087, p-value: < 2.22e-16
## Wald statistic: 198.45, p-value: < 2.22e-16
## 
## Log likelihood: -4283.414 for lag model
## ML residual variance (sigma squared): 1.8708, (sigma: 1.3678)
## Number of observations: 2456 
## Number of parameters estimated: 12 
## AIC: 8590.8, (AIC for lm: 8779.9)
## LM test for residual autocorrelation
## test value: 1.475, p-value: 0.22455

El AIC del modelo Global Spatial Regression es 8590.8, que es inferior al AIC del modelo de Regresion no espacial (8790.453).

Detección de residuos de regresión espacialmente autocorrelacionados

moran.test(exp(spatial_auto$residuals), rswm_queen2)

## 
##  Moran I test under randomisation
## 
## data:  exp(spatial_auto$residuals)  
## weights: rswm_queen2    
## 
## Moran I statistic standard deviate = 10.285, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.121717673      -0.000407332       0.000141004

El p-value de 2.2e-16 indica que la hipótesis nula (es decir, la ausencia de autocorrelación espacial) se puede rechazar con un grado de confianza muy alto. La hipótesis alternativa es mayor, lo que significa que los datos exhiben una autocorrelación espacial positiva.

Los resultados indican que existe una fuerte autocorrelación espacial positiva presente en los datos

Local Spatial Regression Analysis

Determinar kernel bandwidth

El modelo de Kennel Barwidth nos demuestra a qué distancia dos objetos deben estar para darle significancia o tener influencia en los resultados. Una distancia cercana indica que las estimaciones están cerca de los datos mientras que una distancia lejana proporciona una transición más fácil de realizar.

bw4 <- bw.gwr(log(tasa_covid) ~ hogremjefmuj2015 + log(crimen_2019) + porcentaje_pob_servicios_salud + porcentaje_pob_acceso_ss + rezago_social,
              approach = "AIC", adaptive = T, data=covid_j)

## Take a cup of tea and have a break, it will take a few minutes.
##           -----A kind suggestion from GWmodel development group
## Adaptive bandwidth (number of nearest neighbours): 1525 AICc value: 8673.046 
## Adaptive bandwidth (number of nearest neighbours): 950 AICc value: 8590.03 
## Adaptive bandwidth (number of nearest neighbours): 594 AICc value: 8536.634 
## Adaptive bandwidth (number of nearest neighbours): 374 AICc value: 8490.652 
## Adaptive bandwidth (number of nearest neighbours): 238 AICc value: 8439.269 
## Adaptive bandwidth (number of nearest neighbours): 154 AICc value: 8416.585 
## Adaptive bandwidth (number of nearest neighbours): 102 AICc value: 8450.97 
## Adaptive bandwidth (number of nearest neighbours): 186 AICc value: 8421.968 
## Adaptive bandwidth (number of nearest neighbours): 134 AICc value: 8420.626 
## Adaptive bandwidth (number of nearest neighbours): 166 AICc value: 8416.786 
## Adaptive bandwidth (number of nearest neighbours): 146 AICc value: 8417.038 
## Adaptive bandwidth (number of nearest neighbours): 158 AICc value: 8415.81 
## Adaptive bandwidth (number of nearest neighbours): 162 AICc value: 8415.941 
## Adaptive bandwidth (number of nearest neighbours): 157 AICc value: 8415.569 
## Adaptive bandwidth (number of nearest neighbours): 155 AICc value: 8415.75 
## Adaptive bandwidth (number of nearest neighbours): 157 AICc value: 8415.569

bw5 <- bw.gwr(log(tasa_covid) ~ hogremjefmuj2015 + log(crimen_2019) + porcentaje_pob_servicios_salud + porcentaje_pob_acceso_ss + rezago_social,
              approach = "AIC", adaptive = F, data=covid_j)

## Take a cup of tea and have a break, it will take a few minutes.
##           -----A kind suggestion from GWmodel development group
## Fixed bandwidth: 19.92188 AICc value: 8854.335 
## Fixed bandwidth: 12.31486 AICc value: 8792.772 
## Fixed bandwidth: 7.613466 AICc value: 8717.815 
## Fixed bandwidth: 4.707843 AICc value: 8656.888 
## Fixed bandwidth: 2.912069 AICc value: 8666.716 
## Fixed bandwidth: 5.817693 AICc value: 8677.912 
## Fixed bandwidth: 4.021919 AICc value: 8649.338 
## Fixed bandwidth: 3.597994 AICc value: 8648.359 
## Fixed bandwidth: 3.335994 AICc value: 8650.582 
## Fixed bandwidth: 3.759919 AICc value: 8648.175 
## Fixed bandwidth: 3.859994 AICc value: 8648.477 
## Fixed bandwidth: 3.698069 AICc value: 8648.137 
## Fixed bandwidth: 3.659844 AICc value: 8648.181 
## Fixed bandwidth: 3.721693 AICc value: 8648.136 
## Fixed bandwidth: 3.736294 AICc value: 8648.145 
## Fixed bandwidth: 3.71267 AICc value: 8648.134 
## Fixed bandwidth: 3.707093 AICc value: 8648.134 
## Fixed bandwidth: 3.716116 AICc value: 8648.134 
## Fixed bandwidth: 3.710539 AICc value: 8648.134

Modelo GWR

m.gwr2 <- gwr.basic(log(tasa_covid) ~ hogremjefmuj2015 + log(crimen_2019) + porcentaje_pob_servicios_salud + porcentaje_pob_acceso_ss + rezago_social,
                    adaptive = T, data = covid_j, bw = bw4)
m.gwr2

##    ***********************************************************************
##    *                       Package   GWmodel                             *
##    ***********************************************************************
##    Program starts at: 2023-05-04 00:33:56 
##    Call:
##    gwr.basic(formula = log(tasa_covid) ~ hogremjefmuj2015 + log(crimen_2019) + 
##     porcentaje_pob_servicios_salud + porcentaje_pob_acceso_ss + 
##     rezago_social, data = covid_j, bw = bw4, adaptive = T)
## 
##    Dependent (y) variable:  tasa_covid
##    Independent variables:  hogremjefmuj2015 crimen_2019 porcentaje_pob_servicios_salud porcentaje_pob_acceso_ss rezago_social
##    Number of data points: 2456
##    ***********************************************************************
##    *                    Results of Global Regression                     *
##    ***********************************************************************
## 
##    Call:
##     lm(formula = formula, data = data)
## 
##    Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.5549 -0.4475  0.1306  0.7534  5.4171 
## 
##    Coefficients:
##                                    Estimate Std. Error t value Pr(>|t|)    
##    (Intercept)                     4.579491   0.234557  19.524  < 2e-16 ***
##    hogremjefmuj2015                0.016727   0.005077   3.295 0.000999 ***
##    log(crimen_2019)                0.087698   0.008536  10.274  < 2e-16 ***
##    porcentaje_pob_servicios_salud -0.002822   0.002574  -1.096 0.273108    
##    porcentaje_pob_acceso_ss       -0.016689   0.002565  -6.507 9.26e-11 ***
##    rezago_social                  -0.703160   0.038453 -18.286  < 2e-16 ***
## 
##    ---Significance stars
##    Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
##    Residual standard error: 1.483 on 2450 degrees of freedom
##    Multiple R-squared: 0.3093
##    Adjusted R-squared: 0.3079 
##    F-statistic: 219.5 on 5 and 2450 DF,  p-value: < 2.2e-16 
##    ***Extra Diagnostic information
##    Residual sum of squares: 5390.149
##    Sigma(hat): 1.482051
##    AIC:  8914.338
##    AICc:  8914.384
##    BIC:  6553.626
##    ***********************************************************************
##    *          Results of Geographically Weighted Regression              *
##    ***********************************************************************
## 
##    *********************Model calibration information*********************
##    Kernel function: bisquare 
##    Adaptive bandwidth: 157 (number of nearest neighbours)
##    Regression points: the same locations as observations are used.
##    Distance metric: Euclidean distance metric is used.
## 
##    ****************Summary of GWR coefficient estimates:******************
##                                         Min.    1st Qu.     Median    3rd Qu.
##    Intercept                      -3.0648570  3.9220523  4.7755925  5.6603797
##    hogremjefmuj2015               -0.0777765  0.0057677  0.0223156  0.0438807
##    log(crimen_2019)               -0.0528253  0.0183622  0.0391678  0.0939836
##    porcentaje_pob_servicios_salud -0.0686921 -0.0105793 -0.0028595  0.0099910
##    porcentaje_pob_acceso_ss       -0.1067718 -0.0278628 -0.0157524 -0.0072135
##    rezago_social                  -2.2025605 -0.7455267 -0.5057845 -0.2140323
##                                     Max.
##    Intercept                      9.9606
##    hogremjefmuj2015               0.1331
##    log(crimen_2019)               0.2719
##    porcentaje_pob_servicios_salud 0.0799
##    porcentaje_pob_acceso_ss       0.0638
##    rezago_social                  0.9165
##    ************************Diagnostic information*************************
##    Number of data points: 2456 
##    Effective number of parameters (2trace(S) - trace(S'S)): 294.0972 
##    Effective degrees of freedom (n-2trace(S) + trace(S'S)): 2161.903 
##    AICc (GWR book, Fotheringham, et al. 2002, p. 61, eq 2.33): 8415.569 
##    AIC (GWR book, Fotheringham, et al. 2002,GWR p. 96, eq. 4.22): 8146.973 
##    BIC (GWR book, Fotheringham, et al. 2002,GWR p. 61, eq. 2.34): 7201.187 
##    Residual sum of squares: 3623.659 
##    R-square value:  0.5356917 
##    Adjusted R-square value:  0.4724997 
## 
##    ***********************************************************************
##    Program stops at: 2023-05-04 00:33:58

El intercepto tiene un valor estimado de 4.57949, que representa el valor esperado de la variable dependiente cuando todas las variables independientes son iguales a cero.
El valor R-cuadrado de 0.3093 sugiere que aproximadamente el 30.93 % de la varianza de la variable dependiente puede explicarse por las variables independientes.
El F-statistic de 219.5 con un p-value de < 2.2e-16 sugiere que el modelo en su conjunto es estadísticamente significativo.
Los residuos tienen un valor mínimo de -8.5549 y un valor máximo de 5.4171, con una media de aproximadamente cero, lo que indica que la suposición de errores normalmente distribuidos es razonable.

Mapeo de output GWR

gwr_sf2 = st_as_sf(m.gwr2$SDF)
gwr_sf2

## Simple feature collection with 2456 features and 24 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -118.4076 ymin: 14.5321 xmax: -86.71041 ymax: 32.71865
## Geodetic CRS:  WGS 84
## First 10 features:
##   Intercept hogremjefmuj2015 log(crimen_2019) porcentaje_pob_servicios_salud
## 0  3.517920       0.08621184       0.04665791                   0.0076622301
## 1  4.461638       0.08473739       0.02553958                   0.0037638611
## 2  2.994190       0.09013743       0.07950330                   0.0034340540
## 3  3.589087       0.09087288       0.06544337                   0.0001715673
## 4  3.918151       0.09319415       0.05651821                   0.0009192429
## 5  3.993362       0.09219580       0.05245274                   0.0009886000
## 6  3.070738       0.09272199       0.08487122                   0.0001666924
## 7  4.255017       0.09012785       0.03918088                   0.0011673969
## 8  2.637823       0.09139063       0.10558417                  -0.0003432528
## 9  3.413731       0.09210259       0.07382638                  -0.0001231033
##   porcentaje_pob_acceso_ss rezago_social        y     yhat   residual CV_Score
## 0              -0.01845044   -0.01352783 6.987234 5.798431  1.1888031        0
## 1              -0.02491597    0.27686592 6.771397 5.745879  1.0255181        0
## 2              -0.01490060   -0.21284135 6.615602 5.846263  0.7693390        0
## 3              -0.02243718   -0.12551921 6.741702 6.565670  0.1760325        0
## 4              -0.02316494    0.15589781 6.284016 6.725628 -0.4416122        0
## 5              -0.02465204    0.12732143 6.455303 6.736862 -0.2815586        0
## 6              -0.01569027   -0.18774268 7.326983 5.885798  1.4411841        0
## 7              -0.02781860    0.13956600 6.521996 6.155235  0.3667610        0
## 8              -0.01214897   -0.39016803 6.357757 5.177360  1.1803977        0
## 9              -0.01993830   -0.12667738 6.303898 6.549978 -0.2460803        0
##   Stud_residual Intercept_SE hogremjefmuj2015_SE log(crimen_2019)_SE
## 0     0.9375075     1.876321          0.03099525          0.08541510
## 1     0.8114358     1.827819          0.03035636          0.08064994
## 2     0.6053743     1.865082          0.03190989          0.08752613
## 3     0.1428804     1.767819          0.03323395          0.08903749
## 4    -0.3637076     1.810613          0.03237590          0.08578721
## 5    -0.2320735     1.799178          0.03290365          0.08625244
## 6     1.1370093     1.819595          0.03218194          0.08879828
## 7     0.2917584     1.849283          0.03444389          0.09014064
## 8     0.9492099     1.829258          0.03234873          0.09064597
## 9    -0.2000458     1.758460          0.03248253          0.08778225
##   porcentaje_pob_servicios_salud_SE porcentaje_pob_acceso_ss_SE
## 0                        0.01976335                  0.01882982
## 1                        0.02137517                  0.01855193
## 2                        0.02054331                  0.01843928
## 3                        0.02222371                  0.01735915
## 4                        0.02224150                  0.01779353
## 5                        0.02201040                  0.01747450
## 6                        0.02209163                  0.01822173
## 7                        0.02205752                  0.01755258
## 8                        0.02170162                  0.01836497
## 9                        0.02228138                  0.01746176
##   rezago_social_SE Intercept_TV hogremjefmuj2015_TV log(crimen_2019)_TV
## 0        0.6727749     1.874904            2.781454           0.5462489
## 1        0.6544276     2.440963            2.791421           0.3166720
## 2        0.6212024     1.605393            2.824749           0.9083379
## 3        0.5853562     2.030234            2.734338           0.7350092
## 4        0.6222357     2.163991            2.878503           0.6588186
## 5        0.6123675     2.219548            2.801993           0.6081305
## 6        0.6013997     1.687594            2.881181           0.9557755
## 7        0.6346973     2.300901            2.616658           0.4346638
## 8        0.5793087     1.442017            2.825169           1.1647972
## 9        0.5790804     1.941318            2.835450           0.8410172
##   porcentaje_pob_servicios_salud_TV porcentaje_pob_acceso_ss_TV
## 0                       0.387698861                  -0.9798526
## 1                       0.176085693                  -1.3430394
## 2                       0.167161683                  -0.8080904
## 3                       0.007720013                  -1.2925271
## 4                       0.041330068                  -1.3018745
## 5                       0.044915124                  -1.4107441
## 6                       0.007545500                  -0.8610745
## 7                       0.052925132                  -1.5848725
## 8                      -0.015816920                  -0.6615294
## 9                      -0.005524941                  -1.1418265
##   rezago_social_TV  Local_R2                       geometry
## 0      -0.02010751 0.8536732 MULTIPOLYGON (((-99.19671 1...
## 1       0.42306580 0.8653228 MULTIPOLYGON (((-99.13301 1...
## 2      -0.34262804 0.8530821 MULTIPOLYGON (((-99.24489 1...
## 3      -0.21443218 0.8757937 MULTIPOLYGON (((-99.15718 1...
## 4       0.25054462 0.8709750 MULTIPOLYGON (((-99.1435 19...
## 5       0.20791670 0.8751116 MULTIPOLYGON (((-99.17439 1...
## 6      -0.31217622 0.8604929 MULTIPOLYGON (((-99.18906 1...
## 7       0.21989378 0.8819562 MULTIPOLYGON (((-99.11789 1...
## 8      -0.67350623 0.8517563 MULTIPOLYGON (((-99.25738 1...
## 9      -0.21875611 0.8704589 MULTIPOLYGON (((-99.16371 1...

Predicción local de la variable dependiente

gwr_sf2$y_predicted <- exp(gwr_sf2$yhat)
qtm(gwr_sf2, "y_predicted")

#mapview(gwr_sf2, zcol = "y_predicted")

El mapa representa la variable dependiente por municipios. Como podemos observar, la mayoría de los municipios caen en la categoría más baja de 0-500. Sin embargo, se puede observar que en la región del centro se encuentran valores que exceden y brincan hacia la siguiente categoría indicando un margen de posibilidad más alto.

Predicción local de variables explicativas estadísticamente significativas

qtm(gwr_sf2, "hogremjefmuj2015_TV")

## Variable(s) "hogremjefmuj2015_TV" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

qtm(gwr_sf2, "porcentaje_pob_acceso_ss_TV")

## Variable(s) "porcentaje_pob_acceso_ss_TV" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

#mapview(gwr_sf2, zcol = "hogremjefmuj2015_TV")
#mapview(gwr_sf2, zcol = "porcentaje_pob_acceso_ss_TV")

El mapa presentado a continuación provee la descripción de las variables explicativas en el país. La mayoría del país contiene valores positivos bajos con valores de 2-4 en todo el país. De igual manera si existen municipios en estados de las regiones Norte y Noroeste que generan valores negativos y existen municipios minúsculos que pertenecen a la misma categoría.

Predicción local de R2

qtm(gwr_sf2, "Local_R2")

#mapview(gwr_sf2, zcol = "Local_R2")

La predicción local de R al cuadrado tiene como valores más altos los estados de los municipios en la región Norte y Noroeste. Las regiones del centro y sur son las que categóricamente hablando juntaron el valor mínimo.

Residuos de regresión local

qtm(gwr_sf2, "residual")

## Variable(s) "residual" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

#mapview(gwr_sf2, zcol = "residual")

A excepción de algunos estados singulares en dispersión a lo largo del país, la mayoría de los municipios no generó valores negativos en cuanto a la regresión local lo cual indica correlación positiva.

Visualizar más de 2 variables explicativas

map_4 <- tm_shape(gwr_sf2) + 
  tm_polygons(col = "hogremjefmuj2015_TV", style = "quantile", palette = "BuGn", title = "P-Value de jefa de familia")
map_5 <- tm_shape(gwr_sf2) + 
  tm_polygons(col = "porcentaje_pob_acceso_ss_TV", style = "quantile", palette = "-BuGn", title = "P-Value de porcentaje poblacion con acceso a ss")
tmap_arrange(map_4, map_5, ncol = 2)

Podemos notar una alta concentración de valores negativos en porcentaje de población con acceso a servicio social a lo largo del país.

Los principales hallazgos basados en resultados locales.

Tabla de coeficientes GWR

gwr_table2 = apply(m.gwr2$SDF@data[,1:7], 2, summary)

Coeficientes OLS

non_spatial_model2_table = coef(non_spatial_model2)

Juntarlos con un row link

table33 <- rbind(gwr_table2, non_spatial_model2_table)

Agregar nombre a la última fila de la pestaña

rownames(table33) [7] <- "Global"

Transpose tab Resumen de modelos estimados no espaciales y GWR

table33 <- t(round(table33, 3))
table33

##                                  Min. 1st Qu. Median   Mean 3rd Qu.  Max.
## Intercept                      -3.065   3.922  4.776  4.575   5.660 9.961
## hogremjefmuj2015               -0.078   0.006  0.022  0.025   0.044 0.133
## log(crimen_2019)               -0.053   0.018  0.039  0.064   0.094 0.272
## porcentaje_pob_servicios_salud -0.069  -0.011 -0.003  0.000   0.010 0.080
## porcentaje_pob_acceso_ss       -0.107  -0.028 -0.016 -0.018  -0.007 0.064
## rezago_social                  -2.203  -0.746 -0.506 -0.510  -0.214 0.917
## y                              -4.605   3.185  4.072  3.786   4.864 7.327
##                                Global
## Intercept                       5.717
## hogremjefmuj2015                0.015
## log(crimen_2019)                0.065
## porcentaje_pob_servicios_salud  0.000
## porcentaje_pob_acceso_ss       -0.003
## rezago_social                  -0.309
## y                              -0.030

Hallazgos

Los resultados de la regresión local que muestra la estimación del modelo GWR indican un AIC más bajo que los resultados de la regresión global espacial y no espacial. Basandonos en el AIC más bajo, seleccionamos el GWR
Jefa de familia que recibe remesas es una variable estadística mente significativa en cuanto a la detección de casos confirmados de COVID -19 en México la mayor concentración de esta variable se observa en las regiones centro norte y sur del país.
Local R2 es significativamente alta en las regiones turísticas (playas), la capital del país, Nuevo León y la zona fronteriza de Tamaulipas.

Recomendaciones

Dentro de las variables explicativas, el porcentaje de la población que tiene acceso a ss es negativo en casi todo el país. Esto demuestra que hay áreas de oportunidad para incrementar estos porcentajes en estos sectores
La dispersión entre máximo y mínimo en la variable de crimen contiene valores muy cercanos, por lo que se recomienda usar el modelo Kennel Bardwith para una transición más sencilla en la aplicación.
Como se observó en los mapas que utilizan la variable de rezago social este tiene concentraciones altas en ciertas áreas de la región norte pero una gran distribución a lo largo de la región sur por lo tanto se recomienda implementar actividades socioeconómicas que beneficien a esta región para que el rezago social tenga una distribución normal a lo largo de México.

Conclusión

En conclusión podemos determinar que los modelos de regresión no espacial y global espacial muestran resultados significativos en cuanto a la explicación de la variable predictiva en este caso tasa de covid sin embargo el modelo de regresión espacial local muestra mejores resultados en cuanto a la relación de las variables explicativas y la variable dependiente.

Act3 GWR

Equipo 1

2023-04-25