Preamble

Load libraries

library(ezknitr)
library(knitr)
library(lubridate)
library(raster)
library(move)
library(amt) 
library(broom)
library(nlme)
library(lme4)
library(here)
library(tidyverse)
options(width=165)
opts_chunk$set(fig.width=12,fig.height=4.5, error=TRUE,cache = F)

Set the seed for the random number generator, so it will be possible to reproduce the random points

set.seed(10299)

Load data

trk (fisher tracks) and fisher.dat created in TestVignetteMovebank.R

trk <- read_rds(here("data", "trk.Rdata"))

RSF prep

Generate random points within MCPs for a single individual using amt functions. Notes:

  • It is common to generate points randomly, but other options are possible.
  • In particular, it can beneficial to generate a systematically placed sample
  • Samples can also be generated using the spsample function in the sp package or using a GIS (note: amt uses the spsample function within its function random_points)
  • Other home range polygons could be used (e.g., kernel density, local convex hull etc.)

Random points: illustrate for 1 individual

trk %>% filter(id=="F1") %>%
  random_points(factor = 20) %>% plot()

Illustrate systematic points (to do this, we need to create the mcp first)

trk %>% filter(id=="F1") %>% 
  random_points(factor = 20, type="regular") %>% 
  plot() 

Or use just 1 random point for each observed point for better visualization.

trk %>% filter(id=="F1") %>% 
  random_points(factor = 1, type="regular") %>% 
  plot() 

Now, lets generate points for all individuals. We can do this efficiently by making use of pipes (%>%), nested data frames, and then by adding a new column – a list-column – to trks

avail.pts <- trk %>% group_by(id) %>% nest %>% 
  mutate(rnd_pts = map(data, ~ random_points(., factor = 20, type="regular"))) %>% 
  select(id, rnd_pts) %>%  # you dont want to have the original point twice, hence drop data
  unnest()

Because unnesting loses the class, we will have to manually reset it

class(avail.pts) <- c("random_points", class(avail.pts))

Or, we could do this using a loop (commented out, below)

# avail.pts<-NULL
# uid<-unique(trk$id) # individual identifiers
# luid<-length(uid) # number of unique individuals
# for(i in 1:luid){
#  random_points will generate random points within mcp
#  Add on the individual id and combine all data
#   temp<-cbind(id=uid[i],trk%>%filter(id==uid[i])%>%random_points)
#   avail.pts<-rbind(avail.pts, temp)
# }
# avail.pts<-as_tibble(avail.pts)
# avail.pts

Prepare environmental covariates

We will use three covariates: land use class, elevation and population density

landuse <- raster(here("data/landuse/landuse_study_area.tif"))
elevation <- raster(here("data/elevation/ASTER ASTGTM2 Elevation-20100101120000000-0-0.tif"))
popden <- raster(here("data/pop_den/pop_den.tif"))

First, we need to bring all thre layers to the same CRS.

get_crs(trk)
## CRS arguments:
##  +init=epsg:5070 +proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=23 +lon_0=-96 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs +ellps=GRS80 +towgs84=0,0,0
landuse <- projectRaster(landuse, crs = get_crs(trk), method = "ngb")
elevation <- projectRaster(elevation, crs = get_crs(trk))
popden <- projectRaster(popden, crs = get_crs(trk))

Plot the raster

plot(landuse)

plot(elevation)

plot(popden)

Check the resolution

res(landuse)
## [1] 30 30
res(elevation)
## [1] 44.0 60.4
res(popden)
## [1] 659 905

Assign each raster a meaningful name

names(landuse) <- "landclass"
names(elevation) <- "ele"
names(popden) <- "popden"

Resolutions (and also extents) are different. This means, that we can not stack the rasters. There two options: 1. Resample rasters to the coarsest raster (popden) using the function raster::resample. 2. Extract covariate values from each raster idendependetly. We will continue with second option.

avail.pts <- avail.pts %>% 
  extract_covariates(landuse) %>%  
  extract_covariates(elevation) %>% 
  extract_covariates(popden)

avail.pts
## # A tibble: 690,988 x 7
##    id    case_       x_       y_ landclass   ele popden
##    <fct> <lgl>    <dbl>    <dbl>     <dbl> <dbl>  <dbl>
##  1 M1    TRUE  1782673. 2402297.        41  109.   59.9
##  2 M1    TRUE  1782680. 2402297.        41  109.   59.9
##  3 M1    TRUE  1782683. 2402292.        41  109.   59.9
##  4 M1    TRUE  1782686. 2402305.        41  110.   59.9
##  5 M1    TRUE  1782681. 2402297.        41  109.   59.9
##  6 M1    TRUE  1782671. 2402290.        41  109.   59.9
##  7 M1    TRUE  1782683. 2402298.        41  110.   59.9
##  8 M1    TRUE  1782686. 2402281.        41  109.   59.9
##  9 M1    TRUE  1782682. 2402290.        41  109.   59.9
## 10 M1    TRUE  1782681. 2402291.        41  109.   59.9
## # ... with 690,978 more rows

Create landcover classes (as suggested by Scott Lapoint :)

rsfdat <- avail.pts %>%  mutate(
  landclass = as.character(landclass), 
  landC = fct_collapse(landclass,
      agri = c("81", "82"),
      forest =c("41", "42", "43"),
      shrub = c("52"),
      grass = c("31", "71"),
      wet = c("90", "95"),
      other = c("11", "21", "22", "23", "24")))

Center and scale variables, make response numeric

rsfdat <- rsfdat %>% mutate(elev = scale(ele), 
                            popD = scale(popden), 
                            case_ = as.numeric(case_))

Save RSF data for later (multiple animal)

write_rds(rsfdat, "data/rsf_dat.rds")

Explore the data

Look Distribution of habitat class for used and available observations

ggplot(rsfdat, aes(x=landC, y=..prop..,group=case_, colour=case_))+
geom_bar(position="dodge", aes(fill=case_))+facet_wrap(~id, scales="free")

RSF fitting

Weight available data

rsfdat$w <- ifelse(rsfdat$case_ == 1, 1, 5000)

We can fit an RSF model to a single animal using logistic regression

summary(glm(case_ ~ elev + popD + landC, data = subset(rsfdat, id == "M2"), 
            weight = w,family = binomial))
## 
## Call:
## glm(formula = case_ ~ elev + popD + landC, family = binomial, 
##     data = subset(rsfdat, id == "M2"), weights = w)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.6378  -0.4234  -0.1717  -0.1500   5.1255  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -12.57777    0.17558 -71.634  < 2e-16 ***
## elev         -0.04157    0.19670  -0.211    0.833    
## popD         -0.30344    0.04303  -7.052 1.76e-12 ***
## landCgrass   -8.43959   69.21830  -0.122    0.903    
## landCforest   1.95070    0.07514  25.960  < 2e-16 ***
## landCshrub    0.52381    0.58169   0.901    0.368    
## landCagri     1.58854    0.13732  11.569  < 2e-16 ***
## landCwet      2.33522    0.08282  28.196  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 40992  on 34395  degrees of freedom
## Residual deviance: 39527  on 34388  degrees of freedom
## AIC: 39543
## 
## Number of Fisher Scoring iterations: 12

Note, this individual did not experience all landcover classes

rsfdat %>% filter(id == "M2") %>% with(table(case_, landC))  
##      landC
## case_ other grass forest shrub  agri   wet
##     0 18956    25   9660   138  1012  2967
##     1   234     0    915     3    70   416
rsfdat %>% filter(id == "M2") %>% count(landC, case_) # this works with the amt development version
## # A tibble: 1 x 1
##       n
## * <int>
## 1 34396
rsfdat$used<-as.factor(rsfdat$case_)
rsfdat$used<-fct_recode(rsfdat$used, "avail" = "0", "used" = "1")
ggplot(subset(rsfdat, id=="M2"),  aes(x=landC,group=used))+
  geom_bar(position=position_dodge(), aes(y=..prop.., fill = used), stat="count") +
  scale_fill_brewer(palette="Paired")+
  geom_text(aes( label = scales::percent(..prop..),
                 y= ..prop.. ), stat= "count", vjust = -.3, position=position_dodge(0.9)) +
  labs(y = "Proportion", fill="used", x="Landcover") 

Now, fit an RSF model to data from each animal. Since not all animals experience all habitat types, lets just explore forest versus non-forest

rsfdat$forest <- ifelse(rsfdat$landC == "forest", 1, 0)

class(rsfdat) <- class(rsfdat)[-1] # this should be fixed in the dev version
  
rsffits <- rsfdat %>% group_by(id) %>% nest %>% 
  mutate(mod = map(data, function(x) glm(case_ ~ elev + popD + forest, data = x,
                                         weight=w,family = binomial))) 

This stores a list of model fits

rsffits 
## # A tibble: 8 x 3
##   id    data                    mod      
##   <fct> <list>                  <list>   
## 1 M1    <tibble [19,301 x 12]>  <S3: glm>
## 2 M4    <tibble [188,120 x 12]> <S3: glm>
## 3 F2    <tibble [63,075 x 12]>  <S3: glm>
## 4 M3    <tibble [51,156 x 12]>  <S3: glm>
## 5 F3    <tibble [31,526 x 12]>  <S3: glm>
## 6 M2    <tibble [34,396 x 12]>  <S3: glm>
## 7 F1    <tibble [28,326 x 12]>  <S3: glm>
## 8 M5    <tibble [275,088 x 12]> <S3: glm>

Look at first model

rsffits$mod[[1]]
## 
## Call:  glm(formula = case_ ~ elev + popD + forest, family = binomial, 
##     data = x, weights = w)
## 
## Coefficients:
## (Intercept)         elev         popD       forest  
##     -6.3712       6.9569       0.3062       0.1174  
## 
## Degrees of Freedom: 19300 Total (i.e. Null);  19297 Residual
## Null Deviance:       23000 
## Residual Deviance: 22860     AIC: 22870

Now, use tidy to extract information about the model fits into a nested data frame

rsffits <- rsffits %>%
   dplyr::mutate(tidy = purrr::map(mod, broom::tidy),
                 n = purrr::map(data, nrow) %>% simplify())
rsffits 
## # A tibble: 8 x 5
##   id    data                    mod       tidy                  n
##   <fct> <list>                  <list>    <list>            <int>
## 1 M1    <tibble [19,301 x 12]>  <S3: glm> <tibble [4 x 5]>  19301
## 2 M4    <tibble [188,120 x 12]> <S3: glm> <tibble [4 x 5]> 188120
## 3 F2    <tibble [63,075 x 12]>  <S3: glm> <tibble [4 x 5]>  63075
## 4 M3    <tibble [51,156 x 12]>  <S3: glm> <tibble [4 x 5]>  51156
## 5 F3    <tibble [31,526 x 12]>  <S3: glm> <tibble [4 x 5]>  31526
## 6 M2    <tibble [34,396 x 12]>  <S3: glm> <tibble [4 x 5]>  34396
## 7 F1    <tibble [28,326 x 12]>  <S3: glm> <tibble [4 x 5]>  28326
## 8 M5    <tibble [275,088 x 12]> <S3: glm> <tibble [4 x 5]> 275088
rsffits$tidy
## [[1]]
## # A tibble: 4 x 5
##   term        estimate std.error statistic  p.value
##   <chr>          <dbl>     <dbl>     <dbl>    <dbl>
## 1 (Intercept)   -6.37     0.570     -11.2  5.27e-29
## 2 elev           6.96     0.753       9.24 2.48e-20
## 3 popD           0.306    0.0421      7.27 3.72e-13
## 4 forest         0.117    0.0717      1.64 1.02e- 1
## 
## [[2]]
## # A tibble: 4 x 5
##   term        estimate std.error statistic   p.value
##   <chr>          <dbl>     <dbl>     <dbl>     <dbl>
## 1 (Intercept)  -11.6      0.0641   -181.   0.       
## 2 elev           0.443    0.0797      5.56 2.73e-  8
## 3 popD          -0.313    0.0140    -22.3  1.92e-110
## 4 forest         1.14     0.0220     51.7  0.       
## 
## [[3]]
## # A tibble: 4 x 5
##   term        estimate std.error statistic  p.value
##   <chr>          <dbl>     <dbl>     <dbl>    <dbl>
## 1 (Intercept)   -6.50     0.314     -20.7  3.18e-95
## 2 elev           6.90     0.423      16.3  6.62e-60
## 3 popD          -0.224    0.0292     -7.65 1.99e-14
## 4 forest         0.171    0.0391      4.38 1.21e- 5
## 
## [[4]]
## # A tibble: 4 x 5
##   term        estimate std.error statistic  p.value
##   <chr>          <dbl>     <dbl>     <dbl>    <dbl>
## 1 (Intercept)  -14.4      0.318     -45.3  0.      
## 2 elev          -3.80     0.378     -10.0  1.02e-23
## 3 popD          -0.428    0.0249    -17.2  2.65e-66
## 4 forest         0.457    0.0492      9.29 1.57e-20
## 
## [[5]]
## # A tibble: 4 x 5
##   term        estimate std.error statistic  p.value
##   <chr>          <dbl>     <dbl>     <dbl>    <dbl>
## 1 (Intercept)  -2.93      0.951     -3.08  2.06e- 3
## 2 elev         10.1       1.11       9.07  1.20e-19
## 3 popD          0.0930    0.103      0.905 3.66e- 1
## 4 forest        0.0987    0.0666     1.48  1.38e- 1
## 
## [[6]]
## # A tibble: 4 x 5
##   term        estimate std.error statistic  p.value
##   <chr>          <dbl>     <dbl>     <dbl>    <dbl>
## 1 (Intercept)  -11.2      0.174     -64.3  0.      
## 2 elev           0.357    0.206       1.73 8.40e- 2
## 3 popD          -0.498    0.0424    -11.7  8.70e-32
## 4 forest         0.981    0.0519     18.9  1.13e-79
## 
## [[7]]
## # A tibble: 4 x 5
##   term        estimate std.error statistic  p.value
##   <chr>          <dbl>     <dbl>     <dbl>    <dbl>
## 1 (Intercept)  -12.6      0.292    -43.1   0.      
## 2 elev           0.152    0.325      0.467 6.41e- 1
## 3 popD           0.300    0.0573     5.24  1.64e- 7
## 4 forest         1.56     0.0788    19.8   4.29e-87
## 
## [[8]]
## # A tibble: 4 x 5
##   term        estimate std.error statistic   p.value
##   <chr>          <dbl>     <dbl>     <dbl>     <dbl>
## 1 (Intercept)  -12.7      0.132     -96.1  0.       
## 2 elev           0.452    0.0196     23.1  3.46e-118
## 3 popD          -1.29     0.209      -6.18 6.36e- 10
## 4 forest        -0.160    0.0274     -5.85 4.94e-  9

Now, create data frame w/ the coefficients, etc

rsf_coefs<-rsffits %>% unnest(tidy) %>% 
  select(-(std.error:p.value))
rsf_coefs
## # A tibble: 32 x 4
##    id         n term        estimate
##    <fct>  <int> <chr>          <dbl>
##  1 M1     19301 (Intercept)   -6.37 
##  2 M1     19301 elev           6.96 
##  3 M1     19301 popD           0.306
##  4 M1     19301 forest         0.117
##  5 M4    188120 (Intercept)  -11.6  
##  6 M4    188120 elev           0.443
##  7 M4    188120 popD          -0.313
##  8 M4    188120 forest         1.14 
##  9 F2     63075 (Intercept)   -6.50 
## 10 F2     63075 elev           6.90 
## # ... with 22 more rows

Plot coefficients

 rsf_coefs %>% filter(term!="(Intercept)") %>%
   ggplot(., aes(x=1, y=estimate)) + 
      geom_dotplot(binaxis="y", stackdir="center")+geom_hline(yintercept=0)+
      facet_wrap(~term, scales="free")
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.