7.1. Case Study COVID-19

7.1.1. Rationale

7.1.2. Code

import pandas as pd
from doctest import testmod as run_doctest


CONFIRMED = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19' \
            '/master/csse_covid_19_data/csse_covid_19_time_series' \
            '/time_series_covid19_confirmed_global.csv'
DEATHS = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master' \
         '/csse_covid_19_data/csse_covid_19_time_series' \
         '/time_series_covid19_deaths_global.csv'
RECOVERED = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19' \
            '/master/csse_covid_19_data/csse_covid_19_time_series' \
            '/time_series_covid19_recovered_global.csv'


confirmed = pd.read_csv(CONFIRMED)
deaths = pd.read_csv(DEATHS)
recovered = pd.read_csv(RECOVERED)


def _get(column_name, df, country='__all__'):
    """
    >>> _get('Confirmed', confirmed, 'Poland').loc['2020-11-11']
    Confirmed    618813
    Name: 2020-11-11 00:00:00, dtype: int64

    >>> _get('Confirmed', confirmed, 'US').loc['2020-11-11']
    Confirmed    10495075
    Name: 2020-11-11 00:00:00, dtype: int64

    >>> _get('Deaths', confirmed, 'Poland').loc['2020-11-11']
    Deaths    618813
    Name: 2020-11-11 00:00:00, dtype: int64
    """
    if country != '__all__':
        query = df['Country/Region'] == country
        df = df[query]

    df = df.transpose()[4:].sum(axis='columns').astype(int)
    df = pd.DataFrame(df)
    df.columns = [column_name]
    df.index = pd.to_datetime(df.index)
    return df


def covid19(country='__all__'):
    """
    >>> covid19('Poland').loc['2020-11-11']
    Confirmed    618813
    Deaths         8805
    Recovered    242875
    Name: 2020-11-11 00:00:00, dtype: int64

    >>> covid19('US').loc['2020-11-11']
    Confirmed    10495075
    Deaths         243077
    Recovered     3997175
    Name: 2020-11-11 00:00:00, dtype: int64
    """
    return pd.concat((
            _get('Confirmed', confirmed, country),
            _get('Deaths', deaths, country),
            _get('Recovered', recovered, country),
    ), axis=1)


def trend(df, since='2020-06-01', until='2021'):
    """
    >>> poland = covid19('Poland')
    >>> trend(poland).loc['2020-11-11']
    2.54786618630983
    """
    df = df.loc[since:until]
    return df['Confirmed'] / df['Recovered']


poland = covid19('Poland')
us = covid19('US')
india = covid19('India')
france = covid19('France')
china = covid19('China')
world = covid19('__all__')

trend(poland, since='2021-01-01', until='2021').plot()
trend(india, since='2021-01-01', until='2021').plot()
trend(us, since='2021-01-01', until='2021').plot()
trend(china, since='2021-01-01', until='2021').plot()
trend(world, since='2021-01-01', until='2021').plot()


# Numer of new cases in last two weeks
new_cases = poland.last('2W').diff().plot()

# Average number of confirmed cases each month
average_cases = poland.resample('M').sum().plot()

# Select timeframe
since_december = poland.loc['2021-12':, ['Confirmed', 'Deaths']].plot(subplots=True, layout=(1,2))

# Ratio of deaths vs new cases in last two weeks
timeframe = poland.last('2W')
ratio = timeframe['Deaths'] / timeframe['Confirmed']
ratio.plot()

# Percent of deaths vs new cases in last two weeks
timeframe = poland.last('2W')
ratio = timeframe['Deaths'] / timeframe['Confirmed']
percent = ratio * 100
percent.round(decimals=3).plot(
    kind='line',
    title='Percent of deaths vs new cases in last two weeks',
    xlabel='Day',
    ylabel='Percent',
    ylim=(2.1, 2.5),
    figsize=(10,10),
    grid=True)