From a9a6a4ee077d2b4bfbbba94479f1b8c324f6d360 Mon Sep 17 00:00:00 2001 From: CITF <87315214+MoH-Malaysia@users.noreply.github.com> Date: Fri, 18 Mar 2022 09:15:54 +0800 Subject: [PATCH] Create cfr_vaxstatus.ipynb --- notebooks/cfr_vaxstatus.ipynb | 739 ++++++++++++++++++++++++++++++++++ 1 file changed, 739 insertions(+) create mode 100644 notebooks/cfr_vaxstatus.ipynb diff --git a/notebooks/cfr_vaxstatus.ipynb b/notebooks/cfr_vaxstatus.ipynb new file mode 100644 index 000000000..36856c685 --- /dev/null +++ b/notebooks/cfr_vaxstatus.ipynb @@ -0,0 +1,739 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "2bb42c50", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from tabulate import tabulate\n", + "\n", + "import numpy as np\n", + "import datetime\n", + "from datetime import date, timedelta\n", + "\n", + "import seaborn as sb\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib.patches as mpatches\n", + "from matplotlib.lines import Line2D\n", + "import matplotlib.ticker as tkr\n", + "\n", + "linelist_cases_1 = 'https://raw.githubusercontent.com/MoH-Malaysia/covid19-public/main/epidemic/linelist/linelist_cases_06.csv'\n", + "linelist_cases_2 = 'https://raw.githubusercontent.com/MoH-Malaysia/covid19-public/main/epidemic/linelist/linelist_cases_07.csv'\n", + "linelist_deaths = 'https://raw.githubusercontent.com/MoH-Malaysia/covid19-public/main/epidemic/linelist/linelist_deaths.csv'" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ecad69a0", + "metadata": {}, + "outputs": [], + "source": [ + "date_min = date.today() - timedelta(14)\n", + "date_max = date.today() - timedelta(1)\n", + "\n", + "def vaxStatus(date_pos, date1, date2, date3, cases=1):\n", + " if cases == 1:\n", + " if date3 > 6: return 'boosted'\n", + " elif date2 > 13: return 'fullyvax'\n", + " elif date1 >= 0: return 'partialvax'\n", + " else: return 'unvax'\n", + " elif cases == 0:\n", + " if (date_pos - date3).days > 6: return 'boosted'\n", + " elif (date_pos - date2).days > 13: return 'fullyvax'\n", + " elif (date_pos - date1).days >= 0: return 'partialvax'\n", + " else: return 'unvax'\n", + "\n", + "def castAge(age):\n", + " if age == -1: return 'missing'\n", + " elif age < 5: return '0_4'\n", + " elif age < 12: return '5_11'\n", + " elif age < 18: return '12_17'\n", + " elif age < 30: return '18_29'\n", + " elif age < 40: return '30_39'\n", + " elif age < 50: return '40_49'\n", + " elif age < 60: return '50_59'\n", + " elif age < 70: return '60_69'\n", + " elif age < 80: return '70_79'\n", + " else: return '80+'\n", + "\n", + "ages = [x for x in range(-1, 150)]\n", + "age_cat = dict(zip(ages, [castAge(x) for x in ages]))" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "67db3a24", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agestatusdeaths
018_29fullyvax7
118_29unvax5
230_39boosted6
330_39fullyvax13
430_39unvax7
540_49boosted17
640_49fullyvax20
740_49unvax17
850_59boosted27
950_59fullyvax50
1050_59unvax28
1160_69boosted37
1260_69fullyvax94
1360_69unvax40
1470_79boosted55
1570_79fullyvax118
1670_79unvax60
1780+boosted52
1880+fullyvax137
1980+unvax117
20Overallboosted194
21Overallfullyvax439
22Overallunvax274
\n", + "
" + ], + "text/plain": [ + " age status deaths\n", + "0 18_29 fullyvax 7\n", + "1 18_29 unvax 5\n", + "2 30_39 boosted 6\n", + "3 30_39 fullyvax 13\n", + "4 30_39 unvax 7\n", + "5 40_49 boosted 17\n", + "6 40_49 fullyvax 20\n", + "7 40_49 unvax 17\n", + "8 50_59 boosted 27\n", + "9 50_59 fullyvax 50\n", + "10 50_59 unvax 28\n", + "11 60_69 boosted 37\n", + "12 60_69 fullyvax 94\n", + "13 60_69 unvax 40\n", + "14 70_79 boosted 55\n", + "15 70_79 fullyvax 118\n", + "16 70_79 unvax 60\n", + "17 80+ boosted 52\n", + "18 80+ fullyvax 137\n", + "19 80+ unvax 117\n", + "20 Overall boosted 194\n", + "21 Overall fullyvax 439\n", + "22 Overall unvax 274" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Pull latest death linelist and wrangle\n", + "df = pd.read_csv(linelist_deaths, usecols=['date', 'age', 'date_positive', 'date_dose1', 'date_dose2', 'date_dose3', 'brand1'])\n", + "for c in ['date', 'date_positive', 'date_dose1', 'date_dose2', 'date_dose3']: df[c] = pd.to_datetime(df[c], errors='coerce').dt.date\n", + "df = df[(df.date >= date_min) & (df.date <= date_max)]\n", + "\n", + "# Ensure no null vax dates (future date as placeholder), shift 14 days for Cansino, then encode vax status and age group\n", + "for c in ['date_dose1', 'date_dose2', 'date_dose3']: df[c] = df[c].fillna(date.today() + timedelta(1))\n", + "df.loc[df.brand1.isin(['Cansino']), 'date_dose2'] = df.date_dose1 + timedelta(14)\n", + "df['status'] = df.apply(lambda x: vaxStatus(x['date_positive'], x['date_dose1'], x['date_dose2'], x['date_dose3'], cases=0), axis=1)\n", + "df = df.replace(date.today() + timedelta(1), np.nan) # Remove placeholder dates\n", + "df.age = df.age.map(age_cat) # Encode age group\n", + "\n", + "# Tabulate\n", + "df = df[~df.age.isin(['missing','0_4','5_11','12_17'])].groupby(['age', 'status']).size().to_frame('deaths').reset_index()\n", + "df = df[~df.status.isin(['partialvax'])].reset_index(drop=True)\n", + "df = pd.concat([df, df.groupby(['status']).sum().reset_index()],axis=0).fillna('Overall').reset_index(drop=True)\n", + "df.head(len(df))" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "08ad8f97", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agestatuscases
018_29boosted52657
118_29fullyvax52817
218_29unvax4183
330_39boosted56575
430_39fullyvax34507
530_39unvax3822
640_49boosted37800
740_49fullyvax15158
840_49unvax2071
950_59boosted25015
1050_59fullyvax9390
1150_59unvax1398
1260_69boosted15879
1360_69fullyvax6520
1460_69unvax1052
1570_79boosted6430
1670_79fullyvax3355
1770_79unvax759
1880+boosted1567
1980+fullyvax1346
2080+unvax927
21Overallboosted195923
22Overallfullyvax123093
23Overallunvax14212
\n", + "
" + ], + "text/plain": [ + " age status cases\n", + "0 18_29 boosted 52657\n", + "1 18_29 fullyvax 52817\n", + "2 18_29 unvax 4183\n", + "3 30_39 boosted 56575\n", + "4 30_39 fullyvax 34507\n", + "5 30_39 unvax 3822\n", + "6 40_49 boosted 37800\n", + "7 40_49 fullyvax 15158\n", + "8 40_49 unvax 2071\n", + "9 50_59 boosted 25015\n", + "10 50_59 fullyvax 9390\n", + "11 50_59 unvax 1398\n", + "12 60_69 boosted 15879\n", + "13 60_69 fullyvax 6520\n", + "14 60_69 unvax 1052\n", + "15 70_79 boosted 6430\n", + "16 70_79 fullyvax 3355\n", + "17 70_79 unvax 759\n", + "18 80+ boosted 1567\n", + "19 80+ fullyvax 1346\n", + "20 80+ unvax 927\n", + "21 Overall boosted 195923\n", + "22 Overall fullyvax 123093\n", + "23 Overall unvax 14212" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Pull latest 2 case linelists and wrangle\n", + "col_cases = ['date', 'age', 'days_dose1', 'days_dose2', 'days_dose3', 'brand1']\n", + "cf = pd.concat([pd.read_csv(linelist_cases_1, usecols=col_cases),\n", + " pd.read_csv(linelist_cases_2, usecols=col_cases)],axis=0)\n", + "for c in ['date']: cf[c] = pd.to_datetime(cf[c], errors='coerce').dt.date\n", + "cf = cf[(cf.date >= date_min - timedelta(7)) & (cf.date <= date_max - timedelta(7))] # backshift to account for lag between cases and deaths\n", + "\n", + "# Ensure no null vax dates (-1 as placeholder), shift 14 days for Cansino, then encode vax status and age group\n", + "for c in ['days_dose1', 'days_dose2', 'days_dose3']: cf[c] = cf[c].fillna(-1)\n", + "cf.loc[cf.brand1.isin(['c']), 'days_dose2'] = cf.days_dose1 - 14\n", + "cf['status'] = cf.apply(lambda x: vaxStatus(x['date'], x['days_dose1'], x['days_dose2'], x['days_dose3'], cases=1), axis=1)\n", + "cf.age = cf.age.map(age_cat) # Encode age group\n", + "\n", + "# Tabulate\n", + "cf = cf[~cf.age.isin(['missing','0_4','5_11','12_17'])].groupby(['age', 'status']).size().to_frame('cases').reset_index()\n", + "cf = cf[~cf.status.isin(['partialvax'])].reset_index(drop=True)\n", + "cf = pd.concat([cf, cf.groupby(['status']).sum().reset_index()],axis=0).fillna('Overall').reset_index(drop=True)\n", + "cf.head(len(cf))" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "a75c9d8a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ageUnvaccinatedFully VaccinatedBoosted
018_290.1195310.0132530.000000
130_390.1831500.0376740.010605
240_490.8208590.1319440.044974
350_592.0028610.5324810.107935
460_693.8022811.4417180.233012
570_797.9051383.5171390.855365
680+12.62135910.1783063.318443
7Overall1.9279480.3566410.099018
\n", + "
" + ], + "text/plain": [ + " age Unvaccinated Fully Vaccinated Boosted\n", + "0 18_29 0.119531 0.013253 0.000000\n", + "1 30_39 0.183150 0.037674 0.010605\n", + "2 40_49 0.820859 0.131944 0.044974\n", + "3 50_59 2.002861 0.532481 0.107935\n", + "4 60_69 3.802281 1.441718 0.233012\n", + "5 70_79 7.905138 3.517139 0.855365\n", + "6 80+ 12.621359 10.178306 3.318443\n", + "7 Overall 1.927948 0.356641 0.099018" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Merge frames and compute incidence, then pivot\n", + "df = pd.merge(cf,df, on=['age','status'], how='left')\n", + "df['cfr'] = df.deaths/df.cases * 100\n", + "try: assert len(df[df.cfr < 0]) == 0\n", + "except AssertionError:\n", + " print('Negative CFR(s) detected')\n", + " df.head(len(df))\n", + "df = df.pivot(index='age', columns='status', values=['cfr']).fillna(0).reset_index()\n", + "df.columns = ['age','Boosted','Fully Vaccinated','Unvaccinated']\n", + "df = df[['age','Unvaccinated','Fully Vaccinated','Boosted']]\n", + "df.head(len(df))" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "8c846da8", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# Chart\n", + "def percFormat(x, pos): return ('{:,}'.format(x) + '%').replace('.0', '')\n", + "\n", + "plt.rcParams.update({'font.size': 10,\n", + " 'font.family':'Monospace',\n", + " 'grid.linestyle':'dashed'})\n", + "plt.rcParams[\"figure.figsize\"] = [8,6]\n", + "plt.rcParams[\"figure.autolayout\"] = True\n", + "fig, ax = plt.subplots()\n", + "\n", + "df.age = df.age.str.replace('_', ' to ')\n", + "df.plot(x='age', y=['Unvaccinated','Fully Vaccinated','Boosted'], kind='bar',\n", + " color=['#a70000','lightgray','#183f78'],\n", + " align='center',\n", + " width=0.7,linewidth=0.3,\n", + " edgecolor='black',ax=ax)\n", + "ax.spines['top'].set_visible(False)\n", + "ax.spines['right'].set_visible(False)\n", + "\n", + "ax.yaxis.grid(True)\n", + "ax.set_axisbelow(True)\n", + "ax.legend(loc='upper center', bbox_to_anchor=(0.2, 0.95), ncol=1, labelspacing = 1.5, frameon=True, fancybox=True)\n", + "plt.xticks(rotation=0)\n", + "ax.yaxis.set_major_formatter(tkr.FuncFormatter(percFormat))\n", + "plt.tick_params(bottom=False)\n", + "\n", + "plt.title('Case Fatality Rate (CFR) by Age & Vax Status \\n\\n' + 'Note: CFR values likely exceed true death rates due to under-testing\\n\\n'\n", + " '(deaths data from ' + date_min.strftime('%d-%b') + ' to ' + date_max.strftime('%d-%b') + ', ' +\n", + " 'cases data from ' + (date_min-timedelta(7)).strftime('%d-%b') + ' to ' + (date_max-timedelta(7)).strftime('%d-%b') + ')')\n", + "plt.xlabel('')\n", + "plt.ylabel('')\n", + "plt.savefig('src_deaths/deaths_vaxstatus_cfr.png', bbox_inches='tight', pad_inches=0.2,dpi=400)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fc1e9067", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}