Add files via upload

plamentrayanov · May 6, 2020 · ba9a7ca · ba9a7ca
1 parent 92a1084
commit ba9a7ca
Show file tree

Hide file tree

Showing 26 changed files with 15,589 additions and 0 deletions.
diff --git a/Bulgaria/Figures/Estimated_R0_and_Im.png b/Bulgaria/Figures/Estimated_R0_and_Im.png
diff --git a/Bulgaria/Figures/ImmigrationSimulations.png b/Bulgaria/Figures/ImmigrationSimulations.png
diff --git a/Bulgaria/Figures/InfectedPeriodPDF.png b/Bulgaria/Figures/InfectedPeriodPDF.png
diff --git a/Bulgaria/Figures/Model_vs_Actual_Cases.png b/Bulgaria/Figures/Model_vs_Actual_Cases.png
diff --git a/Bulgaria/Figures/PointProcessDensity.png b/Bulgaria/Figures/PointProcessDensity.png
diff --git a/Bulgaria/Figures/forecast_active_MainScenario.png b/Bulgaria/Figures/forecast_active_MainScenario.png
diff --git a/Bulgaria/Figures/forecast_active_OptimisticScenario.png b/Bulgaria/Figures/forecast_active_OptimisticScenario.png
diff --git a/Bulgaria/Figures/forecast_active_PessimisticScenario.png b/Bulgaria/Figures/forecast_active_PessimisticScenario.png
diff --git a/Bulgaria/Figures/forecast_newcases_MainScenario.png b/Bulgaria/Figures/forecast_newcases_MainScenario.png
diff --git a/Bulgaria/Figures/forecast_newcases_OptimisticScenario.png b/Bulgaria/Figures/forecast_newcases_OptimisticScenario.png
diff --git a/Bulgaria/Figures/forecast_newcases_PessimisticScenario.png b/Bulgaria/Figures/forecast_newcases_PessimisticScenario.png
diff --git a/Bulgaria/Figures/forecast_newcases_by_scenario.png b/Bulgaria/Figures/forecast_newcases_by_scenario.png
diff --git a/Bulgaria/Figures/forecast_total_MainScenario.png b/Bulgaria/Figures/forecast_total_MainScenario.png
diff --git a/Bulgaria/Figures/forecast_total_OptimisticScenario.png b/Bulgaria/Figures/forecast_total_OptimisticScenario.png
diff --git a/Bulgaria/Figures/forecast_total_PessimisticScenario.png b/Bulgaria/Figures/forecast_total_PessimisticScenario.png
diff --git a/Bulgaria/RunScript_for_Bulgaria.m b/Bulgaria/RunScript_for_Bulgaria.m
diff --git a/README.md b/README.md
@@ -0,0 +1,25 @@
+# fitCOVID19_BranchingProcess
+The project contains Matlab script files for fitting the parameters of the Branching Process Simulator, available at https://github.com/plamentrayanov/BranchingProcessSimulator .
+The project is oriented specifically towards modelling the COVID19 (SARS-CoV-2) epidemics. Some of the required parameters in the simulator are virus-specific, defined as hard-coded values from articles on the subject and observed data. Others are country-specific, for which an optimization procedure is implemented. The procedure aims at estimating the point process mass of the BP (i.e. R0(t), a.k.a R(t)) and the Immigration expectation as a smooth function of time. The procedure required subjectively chosen smoothing parameters that restrict the curvature of R0(t) and Im(t).
+
+The project aims to:
+1. Provide an epidemics projection based on the stochastic theory of Branching Processes, as an alternative to the classical SIR, SEIR and other deterministic models.
+2. Provide the user with a starting point for testing different hypothesises and project their effect on the epidemics development. The users may provide their own forecasts for R0 and the immigration or use the scenarios defined in the script. As the BranchingProcessSimulator repository provides a much larger set of possible models, the user is encouraged to experiment with different models and assumptions.
+3. Demonstrate the uses of Branching Processes to model epidemics.
+
+
+# Included in the package
+Fitted models for Germany, France, Italy, Sweden, Ukraine, Indonesia and Bulgaria are included. The code uses parallel computing and the more CPU cores you have, the more RAM you need. Make sure you have enough RAM to run the simulations on all cores! There are 3 scenarios for the future epidemics development for each country:
+1. No change in R0 and Immigration. The value of R0 and Immigration from the last days is taken as a constant for the future development.
+2. Decrease in R0 and Immigration, which is more optimistic scenario from what we are currently experiencing.
+3. Increase in R0 and no change in Immigration, which is more pessimistic scenario from what we are currently experiencing.
+
+These scenarios produce projections about what will happen if the assumptions on R0 and Immigration are correct. To produce a forecast, you need to forecast the R0 and Immigration. 
+
+The scripts download the latest data AUTOMATICALLY FROM WORLDOMETERS.COM when they run! You may use data also from https://opendata.ecdc.europa.eu/covid19/casedistribution/csv but you need to download it manually and it seems they are more noisy/incorrect.
+
+To run the example model for a country, go to the corresponding directory for that country and run the script. The figures are then saved in ./Figures/ .
+
+The ./lib directory contains common functions used by all scripts inside the subdirectories.
+
+The latest Branching Process Simulator is available at https://github.com/plamentrayanov/BranchingProcessSimulator 
diff --git a/data.csv b/data.csv
diff --git a/lib/BranchingProcessSimulator.m b/lib/BranchingProcessSimulator.m
diff --git a/lib/Im_function.m b/lib/Im_function.m
@@ -0,0 +1,14 @@
+function Im_matrix = Im_function(mu, sgm, Im_age_struct_prob)
+Im_matrix=zeros(size(Im_age_struct_prob,1), 1, size(mu,2));
+
+theta = sgm.^2./mu;
+k = (mu./sgm).^2;
+theta(mu==0)=0;
+k(sgm==0)=0;
+
+Im_matrix(:,1,:)=mnrnd(round(gamrnd(k, theta)'), Im_age_struct_prob)';
+if any(isnan(Im_matrix(:)))
+ error('Im_matrix contains nans!')
+end
+end
+
diff --git a/lib/Y_projection.m b/lib/Y_projection.m
@@ -0,0 +1,21 @@
+function N=Y_projection(Z_0, mu_matrix, Im_matrix, h)
+% implements a numerical method for calculating the total progeny of the branching process (BP), which is the expectation of the BP, given there is
+% no death, i.e. S(t)=1 for every t. This is used in an optimization procedure to find the parameters of the BP so that the projection equals 
+% the history so far (i.e. method of moments)
+% More details on the numerical approximation for the expectation of a BP is discussed in:
+% Crump-Mode-Jagers Branching Process: A Numerical Approach. Pages 167-182, Trayanov, Plamen
+% Book: Branching Processes and Their Applications
+% Editors: del Puerto, I.M., González, M., Gutiérrez, C., Martínez, R., Minuesa, C., Molina, M., Mota, M., Ramos, A. (Eds.) 
+
+N=zeros(size(Z_0,1), size(mu_matrix,2));
+N(:,1)=Z_0;
+for t=1:size(mu_matrix,2)
+ if ~isempty(Im_matrix)
+ N(:,t) = N(:,t) + Im_matrix(:, t);
+ end
+ N(1,t+1) = N(:,t)'*mu_matrix(:, t)*h;
+ % no dying:
+ N(2:end, t+1) = N(1:end-1, t);
+ N(end, t+1) = N(end, t+1) + N(end, t);
+end
+end
diff --git a/lib/buildPlots.m b/lib/buildPlots.m
@@ -0,0 +1,82 @@
+function buildPlots(NewCasesDaily, TotalCasesDaily, ActiveCasesDaily, NewCasesHist, dates, horizon, detection_time, alpha, scenario_name, scenario_title, varargin)
+p=inputParser();
+p.addOptional('SavePlots', false, @islogical);
+p.parse(varargin{:});
+save_plots = p.Results.SavePlots;
+
+%%% helper function
+[NewCasesDaily_mean, NewCasesDaily_lower, NewCasesDaily_upper, NewCasesDaily_median]=confInterval(NewCasesDaily, alpha);
+
+%% number of new cases detected per day, model VS reality
+% as the detection of the infection is after 8 days, we have 2 Branching Processes here:
+% 1) real process happening in the population right now, but not yet detected
+% 2) shifted process by 8 days so we can supperimpose with the real data and see if the model parameters are chosen correctly 
+line_wd=2.5;
+figure('visible','on', 'Units','pixels','OuterPosition',[0 0 1280 1024]);
+set(gca,'FontSize',16)
+hold on
+h_hist=plot(dates, NewCasesHist, 'Color', [0, 0, 0, 0.7],'LineWidth', 1.5);
+h_median_supperimposed=plot(dates(1):dates(end)+horizon, NewCasesDaily_median(1:end-detection_time-1), '-', 'Color', [1, 0, 0, 0.5], 'LineWidth', line_wd);
+h_median_real=plot((dates(1):dates(end)+horizon)-detection_time, NewCasesDaily_median(1:end-detection_time-1), ':', 'Color', [0, 0.5, 0, 0.5], 'LineWidth', line_wd);
+h_CI=plot((dates(1):dates(end)+horizon), NewCasesDaily_lower(1:end-detection_time-1), '--', 'Color', [0,155/255,1,1], 'LineWidth', line_wd);
+plot((dates(1):dates(end)+horizon), NewCasesDaily_upper(1:end-detection_time-1), '--', 'Color', [0,155/255,1,1], 'LineWidth', line_wd);
+legend([h_median_real, h_median_supperimposed, h_CI, h_hist], 'Median (Real Process)', 'Prediction of observed new daily cases', strcat(sprintf('%0.0f', 100*(1-2*alpha)), '% conf. interval'), 'Daily New Cases', 'Location', 'NorthWest')
+xtickangle(90)
+x_ticks = xticks;
+xticks(x_ticks(1):7:x_ticks(end))
+dateaxis('X',2)
+ylabel('\bf{Daily New Cases (Observed)}')
+xlabel('\bf{Date}')
+title(scenario_title)
+if save_plots
+ print(strcat('./Figures/forecast_newcases_', scenario_name), '-dpng', '-r0')
+end
+%% total number of cases
+[TotalCases_mean, TotalCases_lower, TotalCases_upper, TotalCases_median]=confInterval(TotalCasesDaily, alpha);
+
+figure('visible','on', 'Units','pixels','OuterPosition',[0 0 1280 1024]);
+set(gca,'FontSize',16)
+hold on
+h_hist=plot(dates, cumsum(NewCasesHist), 'Color', [0, 0, 0, 0.7],'LineWidth', 1.5);
+h_median_supperimposed=plot(dates(1):(dates(end)+horizon), TotalCases_median(1:end-detection_time), '-', 'Color', [1, 0, 0, 0.5], 'LineWidth', line_wd);
+h_median_real=plot((dates(1):dates(end)+horizon)-detection_time, TotalCases_median(1:end-detection_time), ':', 'Color', [0, 0.5, 0, 0.5], 'LineWidth', line_wd);
+h_CI=plot(dates(1):dates(end)+horizon, TotalCases_lower(1:end-detection_time), '--', 'Color', [0,155/255,1,1], 'LineWidth', line_wd);
+plot(dates(1):dates(end)+horizon, TotalCases_upper(1:end-detection_time), '--', 'Color', [0,155/255,1,1], 'LineWidth', line_wd);
+legend([h_median_real, h_median_supperimposed, h_CI, h_hist], 'Median (Real Process)', 'Prediction of observed total cases', strcat(sprintf('%0.0f', 100*(1-2*alpha)), '% conf. interval'), 'Total Cases', 'Location', 'NorthWest')
+xtickangle(90)
+x_ticks = xticks;
+xticks(x_ticks(1):7:x_ticks(end))
+dateaxis('X',2)
+ylabel('\bf{Total Cases (Observed)}')
+xlabel('\bf{Date}')
+title(scenario_title)
+if save_plots
+ print(strcat('./Figures/forecast_total_', scenario_name), '-dpng', '-r0')
+end
+
+%% total number of active cases
+% on the data site there is no history for Active cases. In addition this measure is highly sensitive to the way we decide if the person is recovered
+% or not, e.g. some countries measure the days being sick, other countries measure the time to clear the virus out of the body!
+[ActiveCasesDaily_mean, ActiveCasesDaily_lower, ActiveCasesDaily_upper, ActiveCasesDaily_median]=confInterval(ActiveCasesDaily, alpha);
+
+figure('visible','on', 'Units','pixels','OuterPosition',[0 0 1280 1024]);
+set(gca,'FontSize',16)
+hold on
+h_median_supperimposed=plot(dates(1):dates(end)+horizon, ActiveCasesDaily_median(1:end-detection_time), '-', 'Color', [1, 0, 0, 0.5], 'LineWidth', line_wd);
+h_CI=plot(dates(1):dates(end)+horizon, ActiveCasesDaily_lower(1:end-detection_time), '--', 'Color', [0,155/255,1,1], 'LineWidth', line_wd);
+plot(dates(1):dates(end)+horizon, ActiveCasesDaily_upper(1:end-detection_time), '--', 'Color', [0,155/255,1,1], 'LineWidth', line_wd);
+plot([dates(end),dates(end)], [0, ActiveCasesDaily_median(length(dates))], '--', 'Color', [0,0,0,1], 'LineWidth', 1);
+
+legend([h_median_supperimposed, h_CI], 'Prediction of observed active cases', strcat(sprintf('%0.0f', 100*(1-2*alpha)), '% conf. interval'), 'Location', 'NorthWest')
+xtickangle(90)
+x_ticks = xticks;
+xticks(x_ticks(1):7:x_ticks(end))
+dateaxis('X',2)
+ylabel('\bf{Active Cases (Observed)}')
+xlabel('\bf{Date}')
+title(scenario_title)
+if save_plots
+ print(strcat('./Figures/forecast_active_', scenario_name), '-dpng', '-r0')
+end
+
+end
diff --git a/lib/confInterval.m b/lib/confInterval.m
@@ -0,0 +1,20 @@
+function [Z_mean, Z_lower, Z_upper, Z_median]=confInterval(Z, alpha)
+% [Z_MEAN, Z_LOWER, Z_UPPER, Z_MEDIAN]=confInterval(Z, ALPHA) calculates the confidence intervals of the branching 
+% process Z with confidence level ALPHA.
+%
+% Z_MEAN - the expectation of the branching process
+% 
+% Z_LOWER and Z_UPPER - the lower and upper confidence bounds, resprectively
+%
+% Z_MEDIAN - the median of the branching process
+
+Z_lower=zeros(1,size(Z,2));
+Z_upper=zeros(1,size(Z,2));
+for t=1:size(Z,2)
+ [F,X]=ecdf(Z(:,t));
+ Z_lower(t)=X(find(F>=alpha/2, 1, 'first'))';
+ Z_upper(t)=X(find(F>=1-alpha./2, 1, 'first'))';
+end
+Z_mean=mean(Z,1)';
+Z_median=median(Z,1)';
+end
diff --git a/lib/obj_Mu_and_Im.m b/lib/obj_Mu_and_Im.m
@@ -0,0 +1,25 @@
+function [res, Y_proj, Im_matrix] = obj_Mu_and_Im(X, Z_0, mu_covid_pdf, Im_age_struct_prob, h, totalcases_hist, lambda)
+% objective function for estimating the Branching Process (BP) parameters - R0 and Immigration probabilities, 
+% such that the numerical approximation for the BP total progeny expectection is close to the actual total number of cases observed, but with
+% penalization for 1) expected daily new cases curvature, 2) R0(t) curvature and 3) Immigration probabilities curvature
+
+R0_daily = X(1:length(X)/2);
+Im_mu_daily = X((1+length(X)/2):end);
+R0 = reshape(repmat(R0_daily, 1/h,1), length(X)/2/h,1)';
+Im_mu = reshape(repmat(Im_mu_daily, 1/h,1), length(X)/2/h,1)';
+
+mu_matrix=R0.*repmat(mu_covid_pdf/(sum(mu_covid_pdf)*h),1, length(R0));
+
+Im_matrix=Im_age_struct_prob .* Im_mu;
+
+Y_proj = sum(Y_projection(Z_0, mu_matrix, Im_matrix, h))';
+newcases_proj = diff(Y_proj(1:2:(length(totalcases_hist)/h)));
+
+newcases_curvature = sqrt(sum((diff(newcases_proj,2)).^2))./length(newcases_proj);
+R0_daily_curvature = sqrt(sum((diff(R0_daily,2)).^2))./length(R0_daily);
+Im_mass_daily_curvature = sqrt(sum((diff(Im_mu_daily,2)).^2))./length(Im_mu_daily);
+
+% the multiplication constants are chosen for regularization
+res = 10*sqrt(sum((totalcases_hist - Y_proj(1:2:(length(totalcases_hist)/h))).^2))./(length(totalcases_hist)/h) + ...
+ lambda(1)*100*newcases_curvature + lambda(2)*100*R0_daily_curvature + lambda(3)*100*Im_mass_daily_curvature;
+end
diff --git a/lib/readCSVData.m b/lib/readCSVData.m
@@ -0,0 +1,28 @@
+function [dates_extended, newcases_extended, totalcases_extended] = readCSVData(CSVFile, CountryNames)
+%% reads the data in a table
+% if the format of data.csv changes, you need to edit this line:
+T = readtable(CSVFile,'Format','%{dd/MM/yyyy}D%f%f%f%f%f%q%q%q%f%q', 'Delimiter', ',', 'HeaderLines', 0, 'ReadVariableNames', true);
+
+T_slice = T((strcmp(T.countriesAndTerritories, CountryNames)),:);
+[dates, dates_sorted_ind] = sort(datenum(T_slice.year, T_slice.month, T_slice.day));
+newcases = T_slice.cases(dates_sorted_ind);
+
+dates_extended = dates(1):dates(end);
+newcases_extended = zeros(size(dates_extended));
+newcases_extended(arrayfun(@(x)(any(dates==x)), dates_extended))=newcases;
+totalcases_extended = cumsum(newcases_extended);
+
+dates_extended = dates_extended(totalcases_extended~=0)';
+newcases_extended = newcases_extended(totalcases_extended~=0)';
+totalcases_extended = totalcases_extended(totalcases_extended~=0)';
+
+end
+
+
+
+
+
+
+
+
+
diff --git a/lib/readWorldometerData.m b/lib/readWorldometerData.m
@@ -0,0 +1,80 @@
+function [dates_extended, newcases_extended, totalcases_extended] = readWorldometerData(CountryName)
+data_cell = textscan(webread(strcat('https://www.worldometers.info/coronavirus/country/', lower(CountryName), '/')), '%s', 'delimiter', newline);
+data_raw = data_cell{1};
+
+%% read total cases
+first_found=[];
+for i_line=1:length(data_raw)
+ if ~isempty(strfind(data_raw{i_line}, 'name: ''Cases'''))
+ first_found=i_line;
+ break;
+ end
+end
+data_ind=first_found;
+for i_line=first_found+1:length(data_raw)
+ if ~isempty(strfind(data_raw{i_line}, 'data: '))
+ data_ind=i_line;
+ break;
+ end
+end
+data_line = data_raw{data_ind};
+data_line = strrep(data_line, 'null', '0');
+totalcases_extended = cell2mat(textscan(data_line(strfind(data_line, '[')+1:strfind(data_line, ']')-1), '%f', 'Delimiter', ','));
+
+%% read new cases
+first_found=[];
+for i_line=1:length(data_raw)
+ if ~isempty(strfind(data_raw{i_line}, 'name: ''Daily Cases'''))
+ first_found=i_line;
+ break;
+ end
+end
+data_ind=first_found;
+for i_line=first_found+1:length(data_raw)
+ if ~isempty(strfind(data_raw{i_line}, 'data: '))
+ data_ind=i_line;
+ break;
+ end
+end
+data_line = data_raw{data_ind};
+data_line = strrep(data_line, 'null', '0');
+newcases_extended = cell2mat(textscan(data_line(strfind(data_line, '[')+1:strfind(data_line, ']')-1), '%f', 'Delimiter', ','));
+
+% %% read new recoveries
+% 
+% first_found=[];
+% for i_line=1:length(data_raw)
+% if ~isempty(strfind(data_raw{i_line}, 'name: ''New Recoveries'''))
+% first_found=i_line;
+% break;
+% end
+% end
+% data_ind=first_found;
+% for i_line=first_found+1:length(data_raw)
+% if ~isempty(strfind(data_raw{i_line}, 'data: '))
+% data_ind=i_line;
+% break;
+% end
+% end
+% data_line = data_raw{data_ind};
+% data_line = strrep(data_line, 'null', '0');
+% newrecoveries = cell2mat(textscan(data_line(strfind(data_line, '[')+1:strfind(data_line, ']')-1), '%f', 'Delimiter', ','));
+
+%% read dates
+first_found=[];
+for i_line=1:length(data_raw)
+ if ~isempty(strfind(data_raw{i_line}, 'categories: '))
+ first_found=i_line;
+ break;
+ end
+end
+data_line = data_raw{first_found};
+
+dates_raw = textscan(data_line(strfind(data_line, '[')+1:strfind(data_line, ']')-1), '%s', 'Delimiter', ',');
+dates_extended = datenum(strcat(strrep(dates_raw{1}, '"', ''), ' 2020'), 'mmm dd yyyy');
+
+ep_start = find(newcases_extended>0, 1, 'first');
+dates_extended=dates_extended(ep_start:end);
+newcases_extended=newcases_extended(ep_start:end);
+totalcases_extended=totalcases_extended(ep_start:end);
+end