<- "OSBS"
site <- 100
ens_members <- seq(Sys.Date() - 365*2, Sys.Date() - 2, by = "1 day") sim_dates
20 Applying to data assimilation to process model
In this chapter, we apply the particle filter that was introduced in Chapter 15 to the forest process model. We will use an ensemble of 100 particles and assimilate the last two years of data.
20.1 Prep data
Get the historical weather for the site
<- get_historical_met(site = site, sim_dates, use_mean = FALSE)
inputs <- assign_met_ensembles(inputs, ens_members) inputs_ensemble
Set the constant parameters (we will fit two parameters using the particle filter below)
<- list()
params $alpha <- rep(0.02, ens_members)
params$SLA <- rep(4.74, ens_members)
params$leaf_frac <- rep(0.315, ens_members)
params$Ra_frac <- rep(0.5, ens_members)
params$Rbasal <- rep(0.002, ens_members)
params$Q10 <- rep(2.1, ens_members)
params$litterfall_rate <- rep(1/(2.0*365), ens_members) #Two year leaf lifespan
params$litterfall_start <- rep(200, ens_members)
params$litterfall_length<- rep(70, ens_members)
params$mortality <- rep(0.00015, ens_members) #Wood lives about 18 years on average (all trees, branches, roots, course roots)
params$sigma.leaf <- rep(0.1, ens_members) #0.01
params$sigma.wood <- rep(0.1, ens_members) #0.01 ## wood biomass
params$sigma.soil <- rep(0.1, ens_members)# 0.01
params<- as.data.frame(params) params
Read in the observational data and use the first data in the simulation period as the initial condition.
<- read_csv("data/site_carbon_data.csv", show_col_types = FALSE)
obs <- rep(NA, 3)
state_init
1] <- obs |>
state_init[filter(variable == "lai",
%in% sim_dates) |>
datetime na.omit() |>
slice(1) |>
mutate(observation = observation / (mean(params$SLA) * 0.1)) |>
pull(observation)
2] <- obs |>
state_init[filter(variable == "wood",
%in% sim_dates) |>
datetime na.omit() |>
slice(1) |>
pull(observation)
3] <- obs |>
state_init[filter(variable == "som") |>
na.omit() |>
slice(1) |>
pull(observation)
Assign the initial conditions to the first time-step of the simulation
#Set initial conditions
<- array(NA, dim = c(length(sim_dates), ens_members, 12)) #12 is the number of outputs
forecast 1, , 1] <- state_init[1]
forecast[1, , 2] <- state_init[2]
forecast[1, , 3] <- state_init[3]
forecast[
<- array(1, dim = c(length(sim_dates), ens_members)) wt
Read in the parameter chain from Chapter 19. We sample from the chain to get the initial starting distribution for the particle filter. Importantly, the parameter has to be sampled together to preserve the correlation between them. In our example, assume the litterfall_start
is constant at the calibrated mean.
<- read_csv("data/saved_parameter_chain.csv", show_col_types = FALSE) |>
fit_params_table pivot_wider(names_from = parameter, values_from = value)
<- 2
num_pars <- array(NA, dim = c(length(sim_dates) ,ens_members , num_pars))
fit_params <- sample(1:nrow(fit_params_table), size = ens_members, replace = TRUE)
samples 1, , 1] <- fit_params_table$alpha[samples]
fit_params[1, , 2] <- fit_params_table$Rbasal[samples]
fit_params[$litterfall_start <- mean(fit_params_table$litterfall_start, na.rm = TRUE) params
20.2 Run the particle filter
In the particle filter, we perturb the parameter values slightly to prevent them from collapsing to a single value.
for(t in 2:length(sim_dates)){
1] <- rnorm(ens_members, fit_params[t-1, ,1], sd = 0.0005)
fit_params[t, , 2] <- rnorm(ens_members, fit_params[t-1, ,2], sd = 0.00005)
fit_params[t, ,
$alpha <- fit_params[t, , 1]
params$Rbasal <- fit_params[t, , 2]
params
<- forest_model(t,
forecast[t, , ] states = matrix(forecast[t-1 , , 1:3], nrow = ens_members) ,
parms = params,
inputs = matrix(inputs_ensemble[t , , ], nrow = ens_members))
<- obs |>
curr_obs filter(datetime == sim_dates[t],
%in% c("nee","lai","wood"))
variable
if(nrow(curr_obs) > 0){
<- output_to_df(forecast[t, , ], sim_dates[t], sim_name = "particle_filter")
forecast_df
<- combine_model_obs(forecast_df,
combined_output_obs obs = curr_obs,
variables = c("lai", "wood", "som", "nee"),
sds = c(0.1, 1, 20, 0.005))
<- rep(0, ens_members)
likelihood for(ens in 1:ens_members){
<- combined_output_obs |>
curr_ens filter(ensemble == ens)
<- exp(sum(dnorm(x = curr_ens$observation,
likelihood[ens] mean = curr_ens$prediction,
sd = curr_ens$sds, log = TRUE)))
}
<- likelihood * wt[t-1, ]
wt[t, ]
<- wt[t, ]/sum(wt[t, ])
wt_norm = 1/sum(wt_norm^2)
Neff
if(Neff < ens_members/2){
## resample ensemble members in proportion to their weight
<- sample(1:ens_members, ens_members, replace = TRUE, prob = wt_norm )
resample_index <- as.matrix(forecast[t, resample_index, 1:12]) ## update state
forecast[t, , ] <- as.matrix(fit_params[t, resample_index, ])
fit_params[t, , ] <- rep(1, ens_members)
wt[t, ]
}
} }
Process the output and parameters into a data frame using the particle weights.
<- array(NA, dim = c(length(sim_dates), ens_members, 12))
forecast_weighted <- array(NA, dim = c(length(sim_dates) ,ens_members , num_pars))
params_weighted for(t in 1:length(sim_dates)){
<- wt[t, ]/sum(wt[t, ])
wt_norm <- sample(1:ens_members, ens_members, replace = TRUE, prob = wt_norm )
resample_index <- forecast[t, resample_index, 1:12]
forecast_weighted[t, , ] <- fit_params[t,resample_index, ]
params_weighted[t, , ]
}<- output_to_df(forecast_weighted, sim_dates, sim_name = "particle_filter")
output_df <- parameters_to_df(params_weighted, sim_dates, sim_name = "particle_filter", param_names = c("alpha","Rbasal")) parameter_df
Visualize the simulation using the particle filter
<- obs |>
obs_dates filter(variable == c("nee","lai", "wood")) |>
pull(datetime)
|>
output_df summarise(median = median(prediction),
upper = quantile(prediction, 0.95, na.rm = TRUE),
lower = quantile(prediction, 0.05, na.rm = TRUE), .by = c("datetime", "variable")) |>
left_join(obs, by = c("datetime", "variable")) |>
filter(variable %in% c("nee","wood","som","lai")) |>
ggplot(aes(x = datetime)) +
geom_ribbon(aes(ymin = upper, ymax = lower), alpha = 0.7) +
geom_line(aes(y = median)) +
geom_point(aes(y = observation), color = "red") +
facet_wrap(~variable, scale = "free") +
theme_bw()
Visualize how the parameters changed over the simulation. You can see the distributions widening between observations and narrowing when an observation is assimilated.
|>
parameter_df summarise(median = median(prediction),
upper = quantile(prediction, 0.95, na.rm = TRUE),
lower = quantile(prediction, 0.05, na.rm = TRUE), .by = c("datetime", "variable")) |>
left_join(obs, by = c("datetime", "variable")) |>
ggplot(aes(x = datetime)) +
geom_ribbon(aes(ymin = upper, ymax = lower), alpha = 0.7) +
geom_line(aes(y = median)) +
facet_wrap(~variable, scale = "free") +
theme_bw()
Combine the forecast, parameters, weights, simulation dates, and constant parameters into an object called analysis
and save for use in Chapter 21
<- list(forecast = forecast, fit_params = fit_params, wt = wt, sim_dates = sim_dates, params = params)
analysis save(analysis, file = "data/PF_analysis_0.Rdata")