Welcome Data Analyses Discussion Maps
Analyses
Urban Heat All Areas Portland Tacoma King County iNaturalist City Inventories
library(tidyverse)
library(corrplot)
library(knitr)
library(kableExtra)
library(gghalves)
library(patchwork)
library(scales)
library(glmmTMB)
library(bbmle)
library(DHARMa)

ALL AREAS

Purpose

The purpose of this page is to summarize an investigation of tree health categories as a categorical response to urban heat (temperatures).

Response variable: category: healthy, unhealthy

Data Wrangling

data <- read.csv('https://raw.githubusercontent.com/jmhulbert/redhot/main/data/urban-data-modified.csv')

Remove dead trees

# Remove Dead Trees
data.w.dead <- data 
data <- data %>% filter(field.tree.canopy.symptoms!="Tree is dead") %>% filter(field.dieback.percent<100) %>% droplevels()
data$binary.tree.canopy.symptoms <- as.factor(data$binary.tree.canopy.symptoms)
levels(data$binary.tree.canopy.symptoms)
## [1] "Healthy"   "Unhealthy"

Remove observations from Hoyt

data <- data %>% filter(!str_detect(place_guess, "Hoyt") & !str_detect(place_guess, "hoyt"))

Data Visualization

Binary Tree Condition Categories

ggplot(data,aes(binary.tree.canopy.symptoms))+geom_histogram(stat="count")+theme_bw()+coord_flip()
## Warning in geom_histogram(stat = "count"): Ignoring unknown parameters:
## `binwidth`, `bins`, and `pad`

p1 <-ggplot(data,aes(binary.tree.canopy.symptoms,dist.from.mean.af, fill=binary.tree.canopy.symptoms ))+geom_boxplot(alpha=0.5)+theme_bw()+coord_flip() + scale_fill_manual(name="Tree Condition",values=c("#7fcdbb","#fe9929"))+guides(fill=FALSE) +labs(x="Tree Condition",y=NULL)
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
p2 <-ggplot(data,aes(dist.from.mean.af,fill=binary.tree.canopy.symptoms))+geom_density(alpha=0.5) + scale_fill_manual(name="Tree Condition",values=c("#7fcdbb","#fe9929")) +theme_bw() +labs(x="Distance from Mean Afternoon Temperature") +guides(fill=FALSE)
p1 / p2

Reclassified Tree Condition Categories

ggplot(data,aes(reclassified.tree.canopy.symptoms))+geom_histogram(stat="count")+theme_bw()+coord_flip()
## Warning in geom_histogram(stat = "count"): Ignoring unknown parameters:
## `binwidth`, `bins`, and `pad`

data$top.dieback <- as.factor(data$top.dieback)
data$thinning <- as.factor(data$thinning)
ggplot(data,aes(reclassified.tree.canopy.symptoms,dist.from.mean.af ))+geom_boxplot()+theme_bw()+coord_flip()

Temp and Site Type

king <- data %>% filter(Area=="King County")
king <- king %>% filter(field.optional...site.type=="Urban"|field.optional...site.type=="Suburban"|field.optional...site.type=="Rural")
ggplot(king,aes(field.optional...site.type,dist.from.mean.af ))+geom_boxplot()+theme_bw()+coord_flip()

Analyses

Time Series Model Fit

binomial.daily <- glmmTMB(binary.tree.canopy.symptoms ~ dist.from.mean.daily + (1|Area),family=binomial,data=data)
binomial.am <- glmmTMB(binary.tree.canopy.symptoms ~ dist.from.mean.am + (1|Area),data=data,family=binomial)
binomial.af <- glmmTMB(binary.tree.canopy.symptoms ~ dist.from.mean.af + (1|Area),data=data,family=binomial)
binomial.pm <- glmmTMB(binary.tree.canopy.symptoms ~ dist.from.mean.pm + (1|Area),data=data,family=binomial)

Compare Time Series Models

AICtab(binomial.daily,binomial.am,binomial.af,binomial.pm)
##                dAIC df
## binomial.af     0.0 3 
## binomial.am    16.4 3 
## binomial.daily 16.6 3 
## binomial.pm    18.1 3

Afternoon heat was the best fit

Response 1.0: healthy / unhealthy

#binomial.af <- glmmTMB(binary.tree.canopy.symptoms ~ dist.from.mean.af + (1|Area),data=data,family=binomial)

Summary

summary(binomial.af)
##  Family: binomial  ( logit )
## Formula:          binary.tree.canopy.symptoms ~ dist.from.mean.af + (1 | Area)
## Data: data
## 
##      AIC      BIC   logLik deviance df.resid 
##   1402.2   1417.2   -698.1   1396.2     1101 
## 
## Random effects:
## 
## Conditional model:
##  Groups Name        Variance Std.Dev.
##  Area   (Intercept) 0.2044   0.4522  
## Number of obs: 1104, groups:  Area, 3
## 
## Conditional model:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       -0.69512    0.27004  -2.574     0.01 *  
## dist.from.mean.af  0.13977    0.03219   4.343 1.41e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Tree health is associated with urban heat. The probability of success (tree categorized as unhealthy) increases with urban heat

Unhealthy trees are considered success (1) because it is the second level. The probability of failure (0) is the first level (Healthy).

levels(data$binary.tree.canopy.symptoms)
## [1] "Healthy"   "Unhealthy"

The probability of a tree being classified as unhealthy increases with urban temperature.

#predict(binomial.af,data[data$DN_AF1=0,])
#predict(binomial.af,DN_AF1=0)

Visualize effects of predictors

First, generate predictions from model

# Create a data frame for predictions
new.data <- expand.grid(
  dist.from.mean.af = seq(min(data$dist.from.mean.af), max(data$dist.from.mean.af), length.out = 100),  # keeping predictor2 constant at its mean
  Area = 0  # assuming random effect is zero for prediction
)
new.data$predicted_response <- predict(binomial.af, newdata = new.data, type = "response")
## Warning in checkTerms(data.tmb1$terms, data.tmb0$terms): Predicting new random effect levels for terms: 1 | Area
## Disable this warning with 'allow.new.levels=TRUE'
p1 <- ggplot(new.data, aes(x = dist.from.mean.af, y = predicted_response)) +
  geom_line() +
  labs(x = "Distance from mean (F)", y = "Predicted Response") +
  theme_minimal() 
p1

# Create a data frame for predictions
new.data.2.inf <- expand.grid(
  dist.from.mean.af = seq(min(data$dist.from.mean.af), max(100), length.out = 1000), Area = 0)  # keeping predictor2 constant at its mean
new.data.2.inf$predicted_response <- predict(binomial.af, newdata = new.data.2.inf, type = "response")
## Warning in checkTerms(data.tmb1$terms, data.tmb0$terms): Predicting new random effect levels for terms: 1 | Area
## Disable this warning with 'allow.new.levels=TRUE'
p2 <- ggplot(new.data.2.inf, aes(x = dist.from.mean.af, y = predicted_response)) + geom_line() + labs(x = "Distance from mean (F)", y = "Predicted Response") + scale_x_continuous(limits = c(-5, 40)) +theme_minimal()

p2
## Warning: Removed 578 rows containing missing values or values outside the scale range
## (`geom_line()`).

p1 + p2
## Warning: Removed 578 rows containing missing values or values outside the scale range
## (`geom_line()`).

Probability when standardized temperature value is -2

new_data <- data.frame(dist.from.mean.af = -2,Area=0)
predicted_response <- predict(binomial.af, newdata = new_data, type = "response")
## Warning in checkTerms(data.tmb1$terms, data.tmb0$terms): Predicting new random effect levels for terms: 1 | Area
## Disable this warning with 'allow.new.levels=TRUE'
print(predicted_response)
## [1] 0.2739535

Probability when standardized temperature value is 0

new_data <- data.frame(dist.from.mean.af = 0,Area=0)
predicted_response <- predict(binomial.af, newdata = new_data, type = "response")
## Warning in checkTerms(data.tmb1$terms, data.tmb0$terms): Predicting new random effect levels for terms: 1 | Area
## Disable this warning with 'allow.new.levels=TRUE'
print(predicted_response)
## [1] 0.3328951

Probability when standardized temperature value is 2

new_data <- data.frame(dist.from.mean.af = 2,Area=0)
predicted_response <- predict(binomial.af, newdata = new_data, type = "response")
## Warning in checkTerms(data.tmb1$terms, data.tmb0$terms): Predicting new random effect levels for terms: 1 | Area
## Disable this warning with 'allow.new.levels=TRUE'
print(predicted_response)
## [1] 0.3975739

The probability of tree being classified as unhealthy increases with distance from mean af

Response 2.0: healthy / top-dieback

data.top <- data %>% filter(reclassified.tree.canopy.symptoms=="Healthy"|reclassified.tree.canopy.symptoms=="Dead Top") %>% droplevels()
top.dieback.binomial.af <- glmmTMB(top.dieback ~ dist.from.mean.af + (1|Area),data=data.top,family=binomial)

Summary

summary(top.dieback.binomial.af)
##  Family: binomial  ( logit )
## Formula:          top.dieback ~ dist.from.mean.af + (1 | Area)
## Data: data.top
## 
##      AIC      BIC   logLik deviance df.resid 
##    602.8    616.9   -298.4    596.8      812 
## 
## Random effects:
## 
## Conditional model:
##  Groups Name        Variance Std.Dev.
##  Area   (Intercept) 0.6071   0.7792  
## Number of obs: 815, groups:  Area, 3
## 
## Conditional model:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        -2.2149     0.4729  -4.683 2.82e-06 ***
## dist.from.mean.af   0.2386     0.0688   3.468 0.000525 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Tree health is associated with urban heat. The probability of success (tree categorized as unhealthy) increases with urban heat

Unhealthy trees are considered success (1) because it is the second level. The probability of failure (0) is the first level (Healthy).

levels(data$top.dieback)
## [1] "No"  "Yes"
p3 <-ggplot(data,aes(top.dieback,dist.from.mean.af, fill=top.dieback ))+geom_boxplot(alpha=0.5)+theme_bw()+coord_flip() + scale_fill_manual(name="Top Dieback",values=c("#7fcdbb","#fe9929"))+guides(fill=FALSE) +labs(x="Top Dieback",y=NULL)
p4 <-ggplot(data,aes(dist.from.mean.af,fill=top.dieback))+geom_density(alpha=0.5) + scale_fill_manual(name="Top Dieback",values=c("#7fcdbb","#fe9929")) +theme_bw() +labs(x="Distance from Mean Afternoon Temperature") +guides(fill=FALSE)
p3 / p4

The probability of a tree having top dieback increases with urban temperature.

Visualize effects of predictors

First, generate predictions from model

# Create a data frame for predictions
new.data.2 <- expand.grid(
  dist.from.mean.af = seq(min(data$dist.from.mean.af), max(data$dist.from.mean.af), length.out = 100),  # keeping predictor2 constant at its mean
  Area = 0  # assuming random effect is zero for prediction
)
new.data.2$predicted_response <- predict(top.dieback.binomial.af, newdata = new.data.2, type = "response")
## Warning in checkTerms(data.tmb1$terms, data.tmb0$terms): Predicting new random effect levels for terms: 1 | Area
## Disable this warning with 'allow.new.levels=TRUE'
ggplot(new.data.2, aes(x = dist.from.mean.af, y = predicted_response)) +
  geom_line() +
  labs(x = "Distance from Mean Temperature", y = "Predicted Response") +
  theme_minimal()

Probability when standardized temperature value is 0

new_data <- data.frame(dist.from.mean.af = 0,Area=0)
predicted_response <- predict(top.dieback.binomial.af, newdata = new_data, type = "response")
## Warning in checkTerms(data.tmb1$terms, data.tmb0$terms): Predicting new random effect levels for terms: 1 | Area
## Disable this warning with 'allow.new.levels=TRUE'
print(predicted_response)
## [1] 0.09841986

Probability when standardized temperature value is 2

new_data <- data.frame(dist.from.mean.af = 2,Area=0)
predicted_response <- predict(top.dieback.binomial.af, newdata = new_data, type = "response")
## Warning in checkTerms(data.tmb1$terms, data.tmb0$terms): Predicting new random effect levels for terms: 1 | Area
## Disable this warning with 'allow.new.levels=TRUE'
print(predicted_response)
## [1] 0.1495957

Response 3.0: healthy / thinning

data.thin <- data %>% filter(reclassified.tree.canopy.symptoms=="Healthy"|reclassified.tree.canopy.symptoms=="Thinning Canopy") %>% droplevels()
thinning.binomial.af <- glmmTMB(thinning ~ dist.from.mean.af + (1|Area),data=data.thin,family=binomial)

Summary

summary(thinning.binomial.af)
##  Family: binomial  ( logit )
## Formula:          thinning ~ dist.from.mean.af + (1 | Area)
## Data: data.thin
## 
##      AIC      BIC   logLik deviance df.resid 
##    786.7    800.9   -390.3    780.7      855 
## 
## Random effects:
## 
## Conditional model:
##  Groups Name        Variance Std.Dev.
##  Area   (Intercept) 0.1578   0.3973  
## Number of obs: 858, groups:  Area, 3
## 
## Conditional model:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       -1.63907    0.24977  -6.562  5.3e-11 ***
## dist.from.mean.af  0.12305    0.04641   2.651  0.00802 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Tree health is associated with urban heat. The probability of success (tree categorized as unhealthy) increases with urban heat

Unhealthy trees are considered success (1) because it is the second level. The probability of failure (0) is the first level (Healthy).

levels(data$thinning)
## [1] "No"  "Yes"

The probability of a tree being classified as unhealthy increases with urban temperature.

Visualize effects of predictors

First, generate predictions from model

# Create a data frame for predictions
new.data.3 <- expand.grid(
  dist.from.mean.af = seq(min(data$dist.from.mean.af), max(data$dist.from.mean.af), length.out = 100),  # keeping predictor2 constant at its mean
  Area = 0  # assuming random effect is zero for prediction
)
new.data.3$predicted_response <- predict(thinning.binomial.af, newdata = new.data.3, type = "response")
## Warning in checkTerms(data.tmb1$terms, data.tmb0$terms): Predicting new random effect levels for terms: 1 | Area
## Disable this warning with 'allow.new.levels=TRUE'
ggplot(new.data.3, aes(x = dist.from.mean.af, y = predicted_response)) +
  geom_line() +
  labs(x = "Distance from Mean Temperature", y = "Predicted Response") +
  theme_minimal()

Compare models

Odds Ratios

Unhealthy Trees

# Unhealthy Overall
coef(binomial.af)
## $Area
##             (Intercept) dist.from.mean.af
## King County   -0.272711         0.1397678
## Portland      -1.296521         0.1397678
## Tacoma        -0.510957         0.1397678
exp(0.1397678)
## [1] 1.150007

Top Dieback

# Top Dieback
coef(top.dieback.binomial.af)
## $Area
##             (Intercept) dist.from.mean.af
## King County   -1.363863         0.2385653
## Portland      -3.177853         0.2385653
## Tacoma        -2.062167         0.2385653
exp(0.2385653)
## [1] 1.269427

Thinning

# Thinning
coef(thinning.binomial.af)
## $Area
##             (Intercept) dist.from.mean.af
## King County   -1.355800          0.123049
## Portland      -2.151415          0.123049
## Tacoma        -1.391666          0.123049
exp(0.09135893)
## [1] 1.095662

“An odds ratio greater than 1 implies that the predictor increases the odds of the outcome.”

Heat had the largest effect on predicting if trees had top dieback

Models with Co-Factors

Note the models below were tested on filtered data. Data categorized as ‘No Selection’, ‘Other’ or blank were filtered out.

Co-Factor Model 1: adding size

Visualize co-factor

data.size.filter <- data %>% filter(tree.size.simplified!="" & tree.size.simplified!="No Selection")
levels(as.factor(data.size.filter$tree.size.simplified))
## [1] "Large"  "Medium" "Small"

223 were removed from the data. 881 observations were used in below the co-factor model adding size below.

ggplot(data.size.filter,aes(tree.size.simplified))+geom_histogram(stat="count")+coord_flip()+theme_bw()
## Warning in geom_histogram(stat = "count"): Ignoring unknown parameters:
## `binwidth`, `bins`, and `pad`

All Trees
ggplot(data.size.filter,aes(binary.tree.canopy.symptoms)) +facet_wrap(~tree.size.simplified) +geom_histogram(stat="count")+theme_bw()+coord_flip()
## Warning in geom_histogram(stat = "count"): Ignoring unknown parameters:
## `binwidth`, `bins`, and `pad`

Trees with Dead Tops
ggplot(data.size.filter,aes(top.dieback)) +facet_wrap(~tree.size.simplified) +geom_histogram(stat="count")+theme_bw()+coord_flip()
## Warning in geom_histogram(stat = "count"): Ignoring unknown parameters:
## `binwidth`, `bins`, and `pad`

Thinning Trees
ggplot(data.size.filter,aes(thinning)) +facet_wrap(~tree.size.simplified) +geom_histogram(stat="count")+theme_bw()+coord_flip()
## Warning in geom_histogram(stat = "count"): Ignoring unknown parameters:
## `binwidth`, `bins`, and `pad`

Response 1.1 healthy / unhealthy

binomial.af.size <- glmmTMB(binary.tree.canopy.symptoms ~ dist.from.mean.af +  tree.size.simplified +(1|Area),data=data.size.filter,family=binomial)
Overall Effect
# Fit the reduced model (without site location description)
reduced_model.binomial.size <- glmmTMB(binary.tree.canopy.symptoms ~ dist.from.mean.af + (1 | Area), 
                       data = data.size.filter, 
                       family = binomial)

# Perform a likelihood ratio test
anova(reduced_model.binomial.size, binomial.af.size, test = "Chisq")
## Data: data.size.filter
## Models:
## reduced_model.binomial.size: binary.tree.canopy.symptoms ~ dist.from.mean.af + (1 | Area), zi=~0, disp=~1
## binomial.af.size: binary.tree.canopy.symptoms ~ dist.from.mean.af + tree.size.simplified + , zi=~0, disp=~1
## binomial.af.size:     (1 | Area), zi=~0, disp=~1
##                             Df    AIC    BIC  logLik deviance  Chisq Chi Df
## reduced_model.binomial.size  3 1125.3 1139.6 -559.64   1119.3              
## binomial.af.size             5 1120.4 1144.3 -555.20   1110.4 8.8781      2
##                             Pr(>Chisq)  
## reduced_model.binomial.size             
## binomial.af.size               0.01181 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Model Summary
summary(binomial.af.size)
##  Family: binomial  ( logit )
## Formula:          
## binary.tree.canopy.symptoms ~ dist.from.mean.af + tree.size.simplified +  
##     (1 | Area)
## Data: data.size.filter
## 
##      AIC      BIC   logLik deviance df.resid 
##   1120.4   1144.3   -555.2   1110.4      876 
## 
## Random effects:
## 
## Conditional model:
##  Groups Name        Variance Std.Dev.
##  Area   (Intercept) 0.1238   0.3518  
## Number of obs: 881, groups:  Area, 3
## 
## Conditional model:
##                            Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                -0.72759    0.22395  -3.249 0.001158 ** 
## dist.from.mean.af           0.13074    0.03459   3.779 0.000157 ***
## tree.size.simplifiedMedium  0.28156    0.15542   1.812 0.070034 .  
## tree.size.simplifiedSmall  -0.63900    0.33293  -1.919 0.054947 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Response 2.1 healthy / top-dieback

data.top.size <- data.size.filter %>% filter(reclassified.tree.canopy.symptoms=="Healthy"|reclassified.tree.canopy.symptoms=="Dead Top") %>% droplevels()
top.dieback.binomial.af.size <- glmmTMB(top.dieback ~ dist.from.mean.af + tree.size.simplified + (1|Area),data=data.top.size,family=binomial)
Overall Effect
# Fit the reduced model (without site location description)
reduced_model.top.size <- glmmTMB(top.dieback ~ dist.from.mean.af + (1 | Area), 
                       data = data.top.size, 
                       family = binomial)

# Perform a likelihood ratio test
anova(reduced_model.top.size, top.dieback.binomial.af.size, test = "Chisq")
## Data: data.top.size
## Models:
## reduced_model.top.size: top.dieback ~ dist.from.mean.af + (1 | Area), zi=~0, disp=~1
## top.dieback.binomial.af.size: top.dieback ~ dist.from.mean.af + tree.size.simplified + (1 | , zi=~0, disp=~1
## top.dieback.binomial.af.size:     Area), zi=~0, disp=~1
##                              Df    AIC    BIC  logLik deviance  Chisq Chi Df
## reduced_model.top.size        3 433.74 447.12 -213.87   427.74              
## top.dieback.binomial.af.size  5 427.83 450.13 -208.91   417.83 9.9166      2
##                              Pr(>Chisq)   
## reduced_model.top.size                    
## top.dieback.binomial.af.size   0.007025 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The significant p-value (0.007025) from the Chi-Square test indicates that adding tree.size.simplified to the model significantly improves the fit. The reduction in AIC (from 433.74 to 427.83) also supports the inclusion of tree size as a meaningful predictor for top dieback. Despite a slight increase in BIC, the overall improvement in model fit justifies including tree.size.simplified in the model.

Model Summary
summary(top.dieback.binomial.af.size)
##  Family: binomial  ( logit )
## Formula:          
## top.dieback ~ dist.from.mean.af + tree.size.simplified + (1 |      Area)
## Data: data.top.size
## 
##      AIC      BIC   logLik deviance df.resid 
##    427.8    450.1   -208.9    417.8      634 
## 
## Random effects:
## 
## Conditional model:
##  Groups Name        Variance Std.Dev.
##  Area   (Intercept) 0.4865   0.6975  
## Number of obs: 639, groups:  Area, 3
## 
## Conditional model:
##                            Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                -2.64512    0.45569  -5.805 6.45e-09 ***
## dist.from.mean.af           0.15033    0.07431   2.023  0.04307 *  
## tree.size.simplifiedMedium  0.85568    0.27103   3.157  0.00159 ** 
## tree.size.simplifiedSmall   0.28621    0.48528   0.590  0.55533    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The estimate for medium-sized trees is 0.85568, which means that, holding other variables constant, medium-sized trees have a higher log-odds of top dieback compared to large trees (the reference category).

The p-value (0.00159) indicates that this effect is statistically significant at the 1% level. This suggests that medium-sized trees are more likely to experience top dieback than large trees.

Response 3.1 healthy / thinning

data.thin.size <- data.size.filter %>% filter(reclassified.tree.canopy.symptoms=="Healthy"|reclassified.tree.canopy.symptoms=="Thinning Canopy") %>% droplevels()
thinning.binomial.af.size <- glmmTMB(thinning ~ dist.from.mean.af + tree.size.simplified + (1|Area),data=data.thin.size,family=binomial)
Overall Effect
# Fit the reduced model (without site location description)
reduced_model.thin.size <- glmmTMB(thinning ~ dist.from.mean.af + (1 | Area), 
                       data = data.thin.size, 
                       family = binomial)

# Perform a likelihood ratio test
anova(reduced_model.thin.size, thinning.binomial.af.size, test = "Chisq")
## Data: data.thin.size
## Models:
## reduced_model.thin.size: thinning ~ dist.from.mean.af + (1 | Area), zi=~0, disp=~1
## thinning.binomial.af.size: thinning ~ dist.from.mean.af + tree.size.simplified + (1 | Area), zi=~0, disp=~1
##                           Df    AIC    BIC  logLik deviance  Chisq Chi Df
## reduced_model.thin.size    3 667.69 681.34 -330.84   661.69              
## thinning.binomial.af.size  5 663.68 686.43 -326.84   653.68 8.0083      2
##                           Pr(>Chisq)  
## reduced_model.thin.size               
## thinning.binomial.af.size    0.01824 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Model Summary
summary(thinning.binomial.af.size)
##  Family: binomial  ( logit )
## Formula:          
## thinning ~ dist.from.mean.af + tree.size.simplified + (1 | Area)
## Data: data.thin.size
## 
##      AIC      BIC   logLik deviance df.resid 
##    663.7    686.4   -326.8    653.7      694 
## 
## Random effects:
## 
## Conditional model:
##  Groups Name        Variance Std.Dev.
##  Area   (Intercept) 0.1317   0.3629  
## Number of obs: 699, groups:  Area, 3
## 
## Conditional model:
##                            Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                -1.55134    0.24607  -6.305 2.89e-10 ***
## dist.from.mean.af           0.12852    0.04909   2.618  0.00884 ** 
## tree.size.simplifiedMedium  0.19655    0.21049   0.934  0.35042    
## tree.size.simplifiedSmall  -1.28676    0.61263  -2.100  0.03569 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Co-Factor Model 2: adding location

Visualize co-factor

data.site.filter <- data %>% filter(field.optional...site.location.description!="" & field.optional...site.location.description!="No Selection" & field.optional...site.location.description!="Other") %>% droplevels()
levels(as.factor(data.site.filter$field.optional...site.location.description))
## [1] "Forest edge"                     "Inside a forest canopy"         
## [3] "Roadside"                        "Urban yard or open park grounds"

193 were removed from the data. 911 observations were used in below the co-factor model adding size below.

ggplot(data.site.filter,aes(field.optional...site.location.description))+geom_histogram(stat="count")+coord_flip()+theme_bw()
## Warning in geom_histogram(stat = "count"): Ignoring unknown parameters:
## `binwidth`, `bins`, and `pad`

All Trees
ggplot(data.site.filter,aes(binary.tree.canopy.symptoms)) +facet_wrap(~field.optional...site.location.description) +geom_histogram(stat="count")+theme_bw()+coord_flip()
## Warning in geom_histogram(stat = "count"): Ignoring unknown parameters:
## `binwidth`, `bins`, and `pad`

Trees with Dead Tops
ggplot(data.site.filter,aes(top.dieback)) +facet_wrap(~field.optional...site.location.description) +geom_histogram(stat="count")+theme_bw()+coord_flip()
## Warning in geom_histogram(stat = "count"): Ignoring unknown parameters:
## `binwidth`, `bins`, and `pad`

Thinning Trees
ggplot(data.site.filter,aes(thinning)) +facet_wrap(~field.optional...site.location.description) +geom_histogram(stat="count")+theme_bw()+coord_flip()
## Warning in geom_histogram(stat = "count"): Ignoring unknown parameters:
## `binwidth`, `bins`, and `pad`

Response 1.2 healthy / unhealthy

data.site.filter$field.optional...site.location.description <- as.factor(data.site.filter$field.optional...site.location.description)

data.site.filter$field.optional...site.location.description <- relevel(data.site.filter$field.optional...site.location.description, ref = "Inside a forest canopy")
binomial.af.site <- glmmTMB(binary.tree.canopy.symptoms ~ dist.from.mean.af + field.optional...site.location.description  + (1|Area),data=data.site.filter,family=binomial)
Overall Effect
# Fit the reduced model (without site location description)
reduced_model.binomial.site <- glmmTMB(binary.tree.canopy.symptoms ~ dist.from.mean.af + (1 | Area), 
                       data = data.site.filter, 
                       family = binomial)

# Perform a likelihood ratio test
anova(reduced_model.binomial.site, binomial.af.site, test = "Chisq")
## Data: data.site.filter
## Models:
## reduced_model.binomial.site: binary.tree.canopy.symptoms ~ dist.from.mean.af + (1 | Area), zi=~0, disp=~1
## binomial.af.site: binary.tree.canopy.symptoms ~ dist.from.mean.af + field.optional...site.location.description + , zi=~0, disp=~1
## binomial.af.site:     (1 | Area), zi=~0, disp=~1
##                             Df    AIC    BIC  logLik deviance  Chisq Chi Df
## reduced_model.binomial.site  3 1149.8 1164.2 -571.88   1143.8              
## binomial.af.site             6 1147.8 1176.7 -567.92   1135.8 7.9173      3
##                             Pr(>Chisq)  
## reduced_model.binomial.site             
## binomial.af.site               0.04775 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The inclusion of the site variable improves the model fit, implying that the site location descriptions have a meaningful and statistically significant impact on the binary outcome binary.tree.canopy.symptoms.

Model Summary
summary(binomial.af.site)
##  Family: binomial  ( logit )
## Formula:          
## binary.tree.canopy.symptoms ~ dist.from.mean.af + field.optional...site.location.description +  
##     (1 | Area)
## Data: data.site.filter
## 
##      AIC      BIC   logLik deviance df.resid 
##   1147.8   1176.7   -567.9   1135.8      905 
## 
## Random effects:
## 
## Conditional model:
##  Groups Name        Variance Std.Dev.
##  Area   (Intercept) 0.1667   0.4083  
## Number of obs: 911, groups:  Area, 3
## 
## Conditional model:
##                                                                           Estimate
## (Intercept)                                                               -0.53046
## dist.from.mean.af                                                          0.14851
## field.optional...site.location.descriptionForest edge                      0.04091
## field.optional...site.location.descriptionRoadside                         0.13546
## field.optional...site.location.descriptionUrban yard or open park grounds -0.36074
##                                                                           Std. Error
## (Intercept)                                                                  0.31621
## dist.from.mean.af                                                            0.03594
## field.optional...site.location.descriptionForest edge                        0.36106
## field.optional...site.location.descriptionRoadside                           0.25177
## field.optional...site.location.descriptionUrban yard or open park grounds    0.23962
##                                                                           z value
## (Intercept)                                                                -1.678
## dist.from.mean.af                                                           4.132
## field.optional...site.location.descriptionForest edge                       0.113
## field.optional...site.location.descriptionRoadside                          0.538
## field.optional...site.location.descriptionUrban yard or open park grounds  -1.505
##                                                                           Pr(>|z|)
## (Intercept)                                                                 0.0934
## dist.from.mean.af                                                          3.6e-05
## field.optional...site.location.descriptionForest edge                       0.9098
## field.optional...site.location.descriptionRoadside                          0.5905
## field.optional...site.location.descriptionUrban yard or open park grounds   0.1322
##                                                                              
## (Intercept)                                                               .  
## dist.from.mean.af                                                         ***
## field.optional...site.location.descriptionForest edge                        
## field.optional...site.location.descriptionRoadside                           
## field.optional...site.location.descriptionUrban yard or open park grounds    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Response 2.2 healthy / top-dieback

data.top.site <- data.site.filter %>% filter(reclassified.tree.canopy.symptoms=="Healthy"|reclassified.tree.canopy.symptoms=="Dead Top") %>% droplevels()
data.top.site$field.optional...site.location.description <- as.factor(data.top.site$field.optional...site.location.description)

data.top.site$field.optional...site.location.description <- relevel(data.top.site$field.optional...site.location.description, ref = "Inside a forest canopy")
top.dieback.binomial.af.site <- glmmTMB(top.dieback ~ dist.from.mean.af + field.optional...site.location.description + (1|Area),data=data.top.site,family=binomial)
Overall Effect
# Fit the reduced model (without site location description)
reduced_model.top.site <- glmmTMB(top.dieback ~ dist.from.mean.af + (1 | Area), 
                       data = data.top.site, 
                       family = binomial)

# Perform a likelihood ratio test
anova(reduced_model.top.site, top.dieback.binomial.af.site, test = "Chisq")
## Data: data.top.site
## Models:
## reduced_model.top.site: top.dieback ~ dist.from.mean.af + (1 | Area), zi=~0, disp=~1
## top.dieback.binomial.af.site: top.dieback ~ dist.from.mean.af + field.optional...site.location.description + , zi=~0, disp=~1
## top.dieback.binomial.af.site:     (1 | Area), zi=~0, disp=~1
##                              Df    AIC    BIC  logLik deviance  Chisq Chi Df
## reduced_model.top.site        3 476.09 489.63 -235.05   470.09              
## top.dieback.binomial.af.site  6 474.38 501.45 -231.19   462.38 7.7184      3
##                              Pr(>Chisq)  
## reduced_model.top.site                   
## top.dieback.binomial.af.site     0.0522 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The inclusion of the site variable in the model provides a better fit, but the improvement is only marginally significant. This suggests that while site location descriptions may have an effect on top.dieback, the evidence is not strong enough to definitively conclude a significant overall effect at the 0.05 level. Further investigation or additional data might help clarify the importance of this predictor.

Model Summary
summary(top.dieback.binomial.af.site)
##  Family: binomial  ( logit )
## Formula:          
## top.dieback ~ dist.from.mean.af + field.optional...site.location.description +  
##     (1 | Area)
## Data: data.top.site
## 
##      AIC      BIC   logLik deviance df.resid 
##    474.4    501.4   -231.2    462.4      667 
## 
## Random effects:
## 
## Conditional model:
##  Groups Name        Variance Std.Dev.
##  Area   (Intercept) 0.7519   0.8671  
## Number of obs: 673, groups:  Area, 3
## 
## Conditional model:
##                                                                           Estimate
## (Intercept)                                                               -1.83775
## dist.from.mean.af                                                          0.21257
## field.optional...site.location.descriptionForest edge                      0.45747
## field.optional...site.location.descriptionRoadside                        -0.64557
## field.optional...site.location.descriptionUrban yard or open park grounds -0.62509
##                                                                           Std. Error
## (Intercept)                                                                  0.60028
## dist.from.mean.af                                                            0.07773
## field.optional...site.location.descriptionForest edge                        0.48784
## field.optional...site.location.descriptionRoadside                           0.41695
## field.optional...site.location.descriptionUrban yard or open park grounds    0.36636
##                                                                           z value
## (Intercept)                                                                -3.062
## dist.from.mean.af                                                           2.735
## field.optional...site.location.descriptionForest edge                       0.938
## field.optional...site.location.descriptionRoadside                         -1.548
## field.optional...site.location.descriptionUrban yard or open park grounds  -1.706
##                                                                           Pr(>|z|)
## (Intercept)                                                                0.00220
## dist.from.mean.af                                                          0.00624
## field.optional...site.location.descriptionForest edge                      0.34837
## field.optional...site.location.descriptionRoadside                         0.12155
## field.optional...site.location.descriptionUrban yard or open park grounds  0.08797
##                                                                             
## (Intercept)                                                               **
## dist.from.mean.af                                                         **
## field.optional...site.location.descriptionForest edge                       
## field.optional...site.location.descriptionRoadside                          
## field.optional...site.location.descriptionUrban yard or open park grounds . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Response 3.2 healthy / thinning

data.thin.site <- data.site.filter %>% filter(reclassified.tree.canopy.symptoms=="Healthy"|reclassified.tree.canopy.symptoms=="Thinning Canopy") %>% droplevels()
data.thin.site$field.optional...site.location.description <- as.factor(data.thin.site$field.optional...site.location.description)

data.thin.site$field.optional...site.location.description <- relevel(data.thin.site$field.optional...site.location.description, ref = "Inside a forest canopy")
thinning.binomial.af.site <- glmmTMB(thinning ~ dist.from.mean.af + field.optional...site.location.description + (1|Area),data=data.thin.site,family=binomial)
Overall Effect
# Fit the reduced model (without site location description)
reduced_model.thin.site <- glmmTMB(thinning ~ dist.from.mean.af + (1 | Area), 
                       data = data.thin.site, 
                       family = binomial)

# Perform a likelihood ratio test
anova(reduced_model.thin.site, thinning.binomial.af.site, test = "Chisq")
## Data: data.thin.site
## Models:
## reduced_model.thin.site: thinning ~ dist.from.mean.af + (1 | Area), zi=~0, disp=~1
## thinning.binomial.af.site: thinning ~ dist.from.mean.af + field.optional...site.location.description + , zi=~0, disp=~1
## thinning.binomial.af.site:     (1 | Area), zi=~0, disp=~1
##                           Df    AIC    BIC  logLik deviance Chisq Chi Df
## reduced_model.thin.site    3 664.05 677.78 -329.02   658.05             
## thinning.binomial.af.site  6 659.18 686.64 -323.59   647.18 10.87      3
##                           Pr(>Chisq)  
## reduced_model.thin.site               
## thinning.binomial.af.site    0.01245 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The p-value of 0.01245 indicates that the inclusion of the field.optional…site.location.description variable significantly improves the model’s ability to predict thinning compared to the reduced model that only includes dist.from.mean.af.

adding the site variable to the model significantly enhances its ability to predict thinning in trees. The statistically significant improvement in model fit suggests that the site location descriptions contribute valuable information about the likelihood of thinning. Therefore, both the distance from mean temp and the specific site location should be considered important factors in understanding and predicting tree thinning.

Model Summary
summary(thinning.binomial.af.site)
##  Family: binomial  ( logit )
## Formula:          
## thinning ~ dist.from.mean.af + field.optional...site.location.description +  
##     (1 | Area)
## Data: data.thin.site
## 
##      AIC      BIC   logLik deviance df.resid 
##    659.2    686.6   -323.6    647.2      712 
## 
## Random effects:
## 
## Conditional model:
##  Groups Name        Variance Std.Dev.
##  Area   (Intercept) 0.1268   0.3561  
## Number of obs: 718, groups:  Area, 3
## 
## Conditional model:
##                                                                           Estimate
## (Intercept)                                                               -1.84097
## dist.from.mean.af                                                          0.12110
## field.optional...site.location.descriptionForest edge                     -0.47285
## field.optional...site.location.descriptionRoadside                         0.77358
## field.optional...site.location.descriptionUrban yard or open park grounds  0.08443
##                                                                           Std. Error
## (Intercept)                                                                  0.39740
## dist.from.mean.af                                                            0.05168
## field.optional...site.location.descriptionForest edge                        0.69207
## field.optional...site.location.descriptionRoadside                           0.38174
## field.optional...site.location.descriptionUrban yard or open park grounds    0.37521
##                                                                           z value
## (Intercept)                                                                -4.632
## dist.from.mean.af                                                           2.343
## field.optional...site.location.descriptionForest edge                      -0.683
## field.optional...site.location.descriptionRoadside                          2.026
## field.optional...site.location.descriptionUrban yard or open park grounds   0.225
##                                                                           Pr(>|z|)
## (Intercept)                                                               3.61e-06
## dist.from.mean.af                                                           0.0191
## field.optional...site.location.descriptionForest edge                       0.4945
## field.optional...site.location.descriptionRoadside                          0.0427
## field.optional...site.location.descriptionUrban yard or open park grounds   0.8220
##                                                                              
## (Intercept)                                                               ***
## dist.from.mean.af                                                         *  
## field.optional...site.location.descriptionForest edge                        
## field.optional...site.location.descriptionRoadside                        *  
## field.optional...site.location.descriptionUrban yard or open park grounds    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The Roadside location is significantly associated with a higher likelihood of thinning compared to the trees inside a forest canopy. In contrast, Forest edge and Urban yard or open park grounds do not show significant effects on thinning compared to trees inside a forest canopy.

The model suggests that the distance from mean temp is a key factor influencing tree thinning, with greater distances associated with higher thinning probabilities. Additionally, trees located near roadsides are more susceptible to thinning, indicating that site location plays a role, though not all site locations are equally influential. The variability across areas, as indicated by the random effect for Area, suggests that other unmeasured factors may also be at play in different regions.

Overall Summary

Core Models - 3 responses

Area Response Explanatory df.resid Estimate Std.Error z-val Pr(>z) Signif.
All Areas healthy/unhealthy Dist from Mean AF 1101 0.13977 0.03219 4.343 1.41e-05 ***
All Areas no/dead-top Dist from Mean AF 812 0.2386 0.0688 3.468 0.000525 ***
All Areas no/thinning Dist from Mean AF 855 0.12305 0.04641 2.651 0.00802 **

Co Factors

Size

Overall Effects

Response Model DF AIC BIC ChiSq DF Pr(>Chisq) Signif.
healthy/unhealthy reduced_model.binomial.size 3 1125.3 1139.6
healthy/unhealthy binomial.af.size 5 1120.4 1144.3 8.8781 2 0.01181 *
no/dead-top reduced_model.top.size 3 433.74 447.12
no/dead-top top.dieback.binomial.af.size 5 427.83 450.13 9.9166 2 0.007025 **
no/thinning reduced_model.thin.size 3 667.69 681.34
no/thinning thinning.binomial.af.size 5 663.68 686.43 8.0083 2 0.01824 *

Model Summaries

Area Response Explanatory df.resid Estimate Std.Error z-val Pr(>z) Signif.
All Areas healthy/unhealthy Dist from Mean AF 876 0.13074 0.03459 3.779 0.000157 ***
All Areas healthy/unhealthy Tree.Size.Medium 876 0.28156 0.15542 1.812 0.070034 .
All Areas healthy/unhealthy Tree.Size.Small 876 -0.63900 0.33293 -1.919 0.054947 .
All Areas no/dead-top Dist from Mean AF 634 0.15033 0.07431 2.023 0.04307 *
All Areas no/dead-top Tree.Size.Medium 634 0.85568 0.27103 3.157 0.00159 **
All Areas no/dead-top Tree.Size.Small 634 0.28621 0.48528 0.590 0.55533
All Areas no/thinning Dist from Mean AF 694 0.12852 0.04909 2.618 0.00884 **
All Areas no/thinning Tree.Size.Medium 694 0.19655 0.21049 0.934 0.35042
All Areas no/thinning Tree.Size.Small 694 -1.28676 0.61263 -2.100 0.03569 *

Location

Overall Effects

Response Model DF AIC BIC ChiSq DF Pr(>Chisq) Signif.
healthy/unhealthy reduced_model.binomial.site 3 1149.8 1164.2
healthy/unhealthy binomial.af.site 6 1147.8 1176.7 7.9173 3 0.04775 *
no/dead-top reduced_model.top.site 3 476.09 489.63
no/dead-top top.dieback.binomial.af.site 6 474.38 501.45 7.7184 3 0.0522 .
no/thinning reduced_model.thin.site 3 664.05 677.78
no/thinning thinning.binomial.af.site 6 659.18 686.64 10.87 3 0.01245 *

Model Summaries

Area Response Explanatory df.resid Estimate Std.Error z-val Pr(>z) Signif.
All Areas healthy/unhealthy Dist from Mean AF 905 0.14851 0.03594 4.132 3.6e-05 ***
All Areas healthy/unhealthy Forest edge 905 0.04091 0.36106 0.113 0.9098
All Areas healthy/unhealthy Roadside 905 0.13546 0.25177 0.538 0.5905
All Areas healthy/unhealthy Urban yard or open park grounds 905 -0.36074 0.23962 -1.505 0.1322
All Areas no/dead-top Dist from Mean AF 667 0.21257 0.07773 2.735 0.00624 **
All Areas no/dead-top Forest edge 667 0.45747 0.48784 0.938 0.34837
All Areas no/dead-top Roadside 667 -0.64557 0.41695 -1.548 0.12155
All Areas no/dead-top Urban yard or open park grounds 667 -0.62509 0.36636 -1.706 0.08797
All Areas no/thinning Dist from Mean AF 712 0.12110 0.05168 2.343 0.019122 *
All Areas no/thinning Forest edge 712 -0.47285 0.69207 -0.683 0.4945
All Areas no/thinning Roadside 712 0.77358 0.38174 2.026 0.0427 *
All Areas no/thinning Urban yard or open park grounds 712 0.08443 0.37521 0.225 0.8220