Wrangle live beds into a simple census

Run the beds live sql and then wrangle the data into a simple census

from pathlib import Path

import numpy as np
import pandas as pd
import sqlalchemy as sa

Assume that we’re running from the notebooks directory and need to pull data from ./src/mock/mock.db

sqlite_db = "../src/mock/mock.db"
assert Path(sqlite_db).is_file()
engine = sa.create_engine(f"sqlite:///{sqlite_db}")
df = pd.read_sql("bedsmock", engine)
df.iloc[0]

So now you want to return a dataframe with the following characteristics - per department (one row per department) - count number of beds - count number of occupied beds - count number of empty beds - last date of closed discharge from that department (i.e. if > 1 week and most beds empty then is the department closed) - number of side rooms?

temp = (
    df["location_string"]
    .str.split("^", expand=True)
    .rename(columns={0: "dept", 1: "room", 2: "bed"})
)
for s in ["dept", "room", "bed"]:
    df[s] = temp[s]
del temp

remove null and waiting locations

mask = df["bed"].str.lower().isin(["null", "wait"])
df = df[~mask]
df.shape
groups = df.groupby("department")
groups.get_group("GWB L01 CRITICAL CARE")
res = groups.agg(
    beds=("location_id", "count"),
    patients=("occupied", "sum"),
    last_dc=("cvl_discharge", lambda x: x.max(skipna=True)),
    modified_at=("modified_at", "max"),
)
res["empties"] = res["beds"] - res["patients"]
res["opens"] = res["empties"]  # place holder : need to subtract closed from empties
res["last_dc"] = (res["modified_at"] - res["last_dc"]).apply(
    lambda x: pd.Timedelta.floor(x, "d")
)
res["closed_temp"] = pd.DataFrame(
    [
        res["last_dc"] > pd.Timedelta(2, "days"),
        res["last_dc"] <= pd.Timedelta(30, "days"),
        res["patients"] == 0,
    ]
).T.all(axis="columns")

res["closed_perm"] = pd.DataFrame(
    [
        res["last_dc"] > pd.Timedelta(30, "days"),
        res["patients"] == 0,
    ]
).T.all(axis="columns")
mask = ~res["closed_perm"]

res = res[mask]
res = res[
    ["beds", "patients", "empties", "opens", "last_dc", "closed_temp" "modified_at"]
]
res
from typing import List

import pandas as pd


def _split_location_string(df: pd.DataFrame) -> pd.DataFrame:
    """
    Splits a location string into dept/room/bed
    """
    temp = (
        df["location_string"]
        .str.split("^", expand=True)
        .rename(columns={0: "dept", 1: "room", 2: "bed"})
    )
    for s in ["dept", "room", "bed"]:
        df[s] = temp[s]
    return df


def _remove_non_beds(
    df: pd.DataFrame, nonbeds: List[str] = ["null", "wait"]
) -> pd.DataFrame:
    """
    Removes non beds e.g. null, wait
    """
    mask = df["bed"].str.lower().isin(nonbeds)
    df = df[~mask]
    return df


def _aggregate_by_department(df: pd.DataFrame) -> pd.DataFrame:
    """
    Aggregation from location (bed) level to ward level
    """
    groups = df.groupby("department")
    # aggregate by dept
    res = groups.agg(
        beds=("location_id", "count"),
        patients=("occupied", "sum"),
        last_dc=("cvl_discharge", lambda x: x.max(skipna=True)),
        modified_at=("modified_at", "max"),
    )
    # calculate additional numbers
    res["empties"] = res["beds"] - res["patients"]
    res["opens"] = res["empties"]  # place holder : need to subtract closed from empties
    res["last_dc"] = (
        (res["modified_at"] - res["last_dc"])
        .apply(lambda x: pd.Timedelta.floor(x, "d"))
        .dt.days
    )

    # defined closed: temp and perm
    res["closed_temp"] = pd.DataFrame(
        [
            res["last_dc"] > 2,
            res["last_dc"] <= 30,
            res["patients"] == 0,
        ]
    ).T.all(axis="columns")

    res["closed_perm"] = pd.DataFrame(
        [
            res["last_dc"] > 30,
            res["patients"] == 0,
        ]
    ).T.all(axis="columns")

    # drop closed perm
    mask = ~res["closed_perm"]

    res = res[mask]
    res = res[
        [
            "beds",
            "patients",
            "empties",
            "opens",
            "last_dc",
            "closed_temp",
            "modified_at",
        ]
    ]
    res.reset_index(inplace=True)
    return res


def aggregate_by_department(df: pd.DataFrame) -> pd.DataFrame:
    """
    Aggregation from location (bed) level to ward level
    Wrapper function
    """
    df = _split_location_string(df)
    df = _remove_non_beds(df)
    df = _aggregate_by_department(df)
    return df
df = pd.read_sql("bedsmock", engine)
aggregate_by_department(df)