From 1705e7314cf2c29a4bc0930171722899f57e8865 Mon Sep 17 00:00:00 2001
From: CITF <87315214+MoH-Malaysia@users.noreply.github.com>
Date: Mon, 21 Mar 2022 21:05:44 +0800
Subject: [PATCH] Create deaths_capita_vaxstatus_ts.ipynb
---
notebooks/deaths_capita_vaxstatus_ts.ipynb | 470 +++++++++++++++++++++
1 file changed, 470 insertions(+)
create mode 100644 notebooks/deaths_capita_vaxstatus_ts.ipynb
diff --git a/notebooks/deaths_capita_vaxstatus_ts.ipynb b/notebooks/deaths_capita_vaxstatus_ts.ipynb
new file mode 100644
index 000000000..e2d3b13b3
--- /dev/null
+++ b/notebooks/deaths_capita_vaxstatus_ts.ipynb
@@ -0,0 +1,470 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "22b06768",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "from datetime import date, timedelta\n",
+ "import matplotlib.pyplot as plt\n",
+ "import matplotlib.dates as mdates\n",
+ "\n",
+ "linelist_deaths = 'https://raw.githubusercontent.com/MoH-Malaysia/covid19-public/main/epidemic/linelist/linelist_deaths.csv'\n",
+ "vax_agg = 'https://raw.githubusercontent.com/MoH-Malaysia/covid19-public/main/vaccination/vax_malaysia.csv'\n",
+ "\n",
+ "date_min = date(2021,5,24) # Phase 2 vax started 19 Apr; 2nd dose on 10 May; fully vax on 24 May\n",
+ "date_max = date.today() - timedelta(8) # most recent 7 days incomplete\n",
+ "data_range = 'data from ' + date_min.strftime('%d-%b') + ' to ' + date_max.strftime('%d-%b')\n",
+ "total_adults = 23966637 # same as other notebooks"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "35140c24",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " date | \n",
+ " status | \n",
+ " deaths | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 1152 | \n",
+ " 2022-03-08 | \n",
+ " boosted | \n",
+ " 14 | \n",
+ "
\n",
+ " \n",
+ " 1153 | \n",
+ " 2022-03-08 | \n",
+ " fullyvax | \n",
+ " 37 | \n",
+ "
\n",
+ " \n",
+ " 1155 | \n",
+ " 2022-03-08 | \n",
+ " unvax | \n",
+ " 22 | \n",
+ "
\n",
+ " \n",
+ " 1156 | \n",
+ " 2022-03-09 | \n",
+ " boosted | \n",
+ " 20 | \n",
+ "
\n",
+ " \n",
+ " 1157 | \n",
+ " 2022-03-09 | \n",
+ " fullyvax | \n",
+ " 33 | \n",
+ "
\n",
+ " \n",
+ " 1159 | \n",
+ " 2022-03-09 | \n",
+ " unvax | \n",
+ " 33 | \n",
+ "
\n",
+ " \n",
+ " 1160 | \n",
+ " 2022-03-10 | \n",
+ " boosted | \n",
+ " 21 | \n",
+ "
\n",
+ " \n",
+ " 1161 | \n",
+ " 2022-03-10 | \n",
+ " fullyvax | \n",
+ " 39 | \n",
+ "
\n",
+ " \n",
+ " 1163 | \n",
+ " 2022-03-10 | \n",
+ " unvax | \n",
+ " 18 | \n",
+ "
\n",
+ " \n",
+ " 1164 | \n",
+ " 2022-03-11 | \n",
+ " boosted | \n",
+ " 13 | \n",
+ "
\n",
+ " \n",
+ " 1165 | \n",
+ " 2022-03-11 | \n",
+ " fullyvax | \n",
+ " 39 | \n",
+ "
\n",
+ " \n",
+ " 1167 | \n",
+ " 2022-03-11 | \n",
+ " unvax | \n",
+ " 22 | \n",
+ "
\n",
+ " \n",
+ " 1168 | \n",
+ " 2022-03-12 | \n",
+ " boosted | \n",
+ " 23 | \n",
+ "
\n",
+ " \n",
+ " 1169 | \n",
+ " 2022-03-12 | \n",
+ " fullyvax | \n",
+ " 52 | \n",
+ "
\n",
+ " \n",
+ " 1171 | \n",
+ " 2022-03-12 | \n",
+ " unvax | \n",
+ " 32 | \n",
+ "
\n",
+ " \n",
+ " 1172 | \n",
+ " 2022-03-13 | \n",
+ " boosted | \n",
+ " 25 | \n",
+ "
\n",
+ " \n",
+ " 1173 | \n",
+ " 2022-03-13 | \n",
+ " fullyvax | \n",
+ " 35 | \n",
+ "
\n",
+ " \n",
+ " 1175 | \n",
+ " 2022-03-13 | \n",
+ " unvax | \n",
+ " 25 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " date status deaths\n",
+ "1152 2022-03-08 boosted 14\n",
+ "1153 2022-03-08 fullyvax 37\n",
+ "1155 2022-03-08 unvax 22\n",
+ "1156 2022-03-09 boosted 20\n",
+ "1157 2022-03-09 fullyvax 33\n",
+ "1159 2022-03-09 unvax 33\n",
+ "1160 2022-03-10 boosted 21\n",
+ "1161 2022-03-10 fullyvax 39\n",
+ "1163 2022-03-10 unvax 18\n",
+ "1164 2022-03-11 boosted 13\n",
+ "1165 2022-03-11 fullyvax 39\n",
+ "1167 2022-03-11 unvax 22\n",
+ "1168 2022-03-12 boosted 23\n",
+ "1169 2022-03-12 fullyvax 52\n",
+ "1171 2022-03-12 unvax 32\n",
+ "1172 2022-03-13 boosted 25\n",
+ "1173 2022-03-13 fullyvax 35\n",
+ "1175 2022-03-13 unvax 25"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "def vaxStatus(date_pos, date1, date2, date3):\n",
+ " if (date_pos - date3).days > 6: return 'boosted'\n",
+ " elif (date_pos - date2).days > 13: return 'fullyvax'\n",
+ " elif (date_pos - date1).days >= 0: return 'partialvax'\n",
+ " else: return 'unvax'\n",
+ " \n",
+ "\n",
+ "# Pull latest deaths linelist and wrangle\n",
+ "cols_date = ['date', 'date_positive', 'date_dose1', 'date_dose2', 'date_dose3']\n",
+ "df = pd.read_csv(linelist_deaths,\n",
+ " usecols=cols_date + ['brand1','age'])\n",
+ "for c in cols_date: df[c] = pd.to_datetime(df[c],errors='coerce').dt.date\n",
+ "df = df[(df.date >= date_min) & (df.date <= date_max)]\n",
+ "df = df[df.age > 17] # adults only\n",
+ "df.drop(['age'],axis=1,inplace=True)\n",
+ "\n",
+ "# Ensure no null vax dates (future date as placeholder), shift 14 days for Cansino, then encode vax status\n",
+ "for c in ['date_dose1', 'date_dose2', 'date_dose3']: df[c] = df[c].fillna(date.today() + timedelta(1))\n",
+ "df.loc[df.brand1.isin(['Cansino']), 'date_dose2'] = df.date_dose1 + timedelta(14)\n",
+ "df['status'] = df.apply(lambda x: vaxStatus(x['date_positive'], x['date_dose1'], x['date_dose2'], x['date_dose3']), axis=1)\n",
+ "df = df.replace(date.today() + timedelta(1), np.nan) # Remove placeholder dates\n",
+ "df['deaths'] = 1\n",
+ "\n",
+ "df.date = pd.to_datetime(df.date)\n",
+ "df = df.groupby(['date', 'status']).sum() \\\n",
+ " .unstack(fill_value=0) \\\n",
+ " .asfreq('D',fill_value=0) \\\n",
+ " .stack() \\\n",
+ " .reset_index() # Typically, unstack/stack suffices, but this is robust to having dates with no deaths\n",
+ "df.date = df.date.dt.date\n",
+ "df = df[~df.status.isin(['partialvax'])]\n",
+ "df[-18:].head(18)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "00d778d2",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " date | \n",
+ " status | \n",
+ " population | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 1547 | \n",
+ " 2022-03-08 | \n",
+ " boosted | \n",
+ " 14759732 | \n",
+ "
\n",
+ " \n",
+ " 1157 | \n",
+ " 2022-03-08 | \n",
+ " fullyvax | \n",
+ " 8719041 | \n",
+ "
\n",
+ " \n",
+ " 377 | \n",
+ " 2022-03-08 | \n",
+ " unvax | \n",
+ " 758016 | \n",
+ "
\n",
+ " \n",
+ " 1548 | \n",
+ " 2022-03-09 | \n",
+ " boosted | \n",
+ " 14830764 | \n",
+ "
\n",
+ " \n",
+ " 1158 | \n",
+ " 2022-03-09 | \n",
+ " fullyvax | \n",
+ " 8622531 | \n",
+ "
\n",
+ " \n",
+ " 378 | \n",
+ " 2022-03-09 | \n",
+ " unvax | \n",
+ " 757120 | \n",
+ "
\n",
+ " \n",
+ " 1549 | \n",
+ " 2022-03-10 | \n",
+ " boosted | \n",
+ " 14903527 | \n",
+ "
\n",
+ " \n",
+ " 1159 | \n",
+ " 2022-03-10 | \n",
+ " fullyvax | \n",
+ " 8532850 | \n",
+ "
\n",
+ " \n",
+ " 379 | \n",
+ " 2022-03-10 | \n",
+ " unvax | \n",
+ " 756138 | \n",
+ "
\n",
+ " \n",
+ " 1550 | \n",
+ " 2022-03-11 | \n",
+ " boosted | \n",
+ " 14961911 | \n",
+ "
\n",
+ " \n",
+ " 1160 | \n",
+ " 2022-03-11 | \n",
+ " fullyvax | \n",
+ " 8445111 | \n",
+ "
\n",
+ " \n",
+ " 380 | \n",
+ " 2022-03-11 | \n",
+ " unvax | \n",
+ " 755183 | \n",
+ "
\n",
+ " \n",
+ " 1551 | \n",
+ " 2022-03-12 | \n",
+ " boosted | \n",
+ " 15000150 | \n",
+ "
\n",
+ " \n",
+ " 1161 | \n",
+ " 2022-03-12 | \n",
+ " fullyvax | \n",
+ " 8381290 | \n",
+ "
\n",
+ " \n",
+ " 381 | \n",
+ " 2022-03-12 | \n",
+ " unvax | \n",
+ " 754344 | \n",
+ "
\n",
+ " \n",
+ " 1552 | \n",
+ " 2022-03-13 | \n",
+ " boosted | \n",
+ " 15032666 | \n",
+ "
\n",
+ " \n",
+ " 1162 | \n",
+ " 2022-03-13 | \n",
+ " fullyvax | \n",
+ " 8322054 | \n",
+ "
\n",
+ " \n",
+ " 382 | \n",
+ " 2022-03-13 | \n",
+ " unvax | \n",
+ " 753674 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " date status population\n",
+ "1547 2022-03-08 boosted 14759732\n",
+ "1157 2022-03-08 fullyvax 8719041\n",
+ "377 2022-03-08 unvax 758016\n",
+ "1548 2022-03-09 boosted 14830764\n",
+ "1158 2022-03-09 fullyvax 8622531\n",
+ "378 2022-03-09 unvax 757120\n",
+ "1549 2022-03-10 boosted 14903527\n",
+ "1159 2022-03-10 fullyvax 8532850\n",
+ "379 2022-03-10 unvax 756138\n",
+ "1550 2022-03-11 boosted 14961911\n",
+ "1160 2022-03-11 fullyvax 8445111\n",
+ "380 2022-03-11 unvax 755183\n",
+ "1551 2022-03-12 boosted 15000150\n",
+ "1161 2022-03-12 fullyvax 8381290\n",
+ "381 2022-03-12 unvax 754344\n",
+ "1552 2022-03-13 boosted 15032666\n",
+ "1162 2022-03-13 fullyvax 8322054\n",
+ "382 2022-03-13 unvax 753674"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "shifts = {'cumul_partial_adult': 1, 'cumul_full_adult': 14, 'cumul_booster':7}\n",
+ "vf = pd.read_csv(vax_agg)\n",
+ "vf.date = pd.to_datetime(vf.date).dt.date\n",
+ "vf['cumul_partial_adult'] = vf.cumul_partial - vf.cumul_partial_adol - vf.cumul_partial_child\n",
+ "vf['unvax_adult'] = total_adults - vf.cumul_partial_adult\n",
+ "vf['cumul_full_adult'] = vf.cumul_full - vf.cumul_full_adol - vf.cumul_full_child - vf.cumul_booster\n",
+ "vf['cumul_partial_adult'] = vf['cumul_partial_adult'] - vf['cumul_full_adult'] - vf.cumul_booster\n",
+ "\n",
+ "for c in ['cumul_partial_adult', 'cumul_full_adult','cumul_booster']: vf[c] = vf[c].shift(shifts[c]).fillna(0).astype(int)\n",
+ "vf = vf[['date','unvax_adult','cumul_partial_adult','cumul_full_adult','cumul_booster']]\n",
+ "col_status = ['unvax','partialvax','fullyvax','boosted']\n",
+ "vf.columns = ['date'] + col_status\n",
+ "vf = pd.melt(vf, id_vars=['date'], value_vars=col_status)\n",
+ "vf.columns = ['date','status','population']\n",
+ "vf = vf[(vf.date >= date_min) & (vf.date <= date_max)]\n",
+ "vf = vf[~vf.status.isin(['partialvax'])].sort_values(by=['date','status']).reset_index(drop=True)\n",
+ "vf[-18:].head(18)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "34bd49f1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Merge frames and compute incidence, then get 7d rolling average\n",
+ "df = pd.merge(df,vf, on=['date','status'], how='left')\n",
+ "df['capita'] = df.deaths/df.population * 1e5\n",
+ "df = df.pivot(index='date', columns='status', values=['capita']).fillna(0).reset_index()\n",
+ "df.columns = ['date','Boosted','Fully Vaccinated','Unvaccinated']\n",
+ "df = df[['date','Unvaccinated','Fully Vaccinated','Boosted']].set_index('date')\n",
+ "df = df.rolling(7).mean().reset_index()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "54685078",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.6"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}