from pathlib import Path
import numpy as np
import pandas as pd
import sqlalchemy as sa
Wrangle live beds into a simple census
Run the beds live sql and then wrangle the data into a simple census
Assume that we’re running from the notebooks directory and need to pull data from ./src/mock/mock.db
= "../src/mock/mock.db"
sqlite_db assert Path(sqlite_db).is_file()
= sa.create_engine(f"sqlite:///{sqlite_db}") engine
= pd.read_sql("bedsmock", engine) df
0] df.iloc[
So now you want to return a dataframe with the following characteristics - per department (one row per department) - count number of beds - count number of occupied beds - count number of empty beds - last date of closed discharge from that department (i.e. if > 1 week and most beds empty then is the department closed) - number of side rooms?
= (
temp "location_string"]
df[str.split("^", expand=True)
.={0: "dept", 1: "room", 2: "bed"})
.rename(columns
)for s in ["dept", "room", "bed"]:
= temp[s] df[s]
del temp
remove null and waiting locations
= df["bed"].str.lower().isin(["null", "wait"])
mask = df[~mask] df
df.shape
= df.groupby("department") groups
"GWB L01 CRITICAL CARE") groups.get_group(
= groups.agg(
res =("location_id", "count"),
beds=("occupied", "sum"),
patients=("cvl_discharge", lambda x: x.max(skipna=True)),
last_dc=("modified_at", "max"),
modified_at
)"empties"] = res["beds"] - res["patients"]
res["opens"] = res["empties"] # place holder : need to subtract closed from empties res[
"last_dc"] = (res["modified_at"] - res["last_dc"]).apply(
res[lambda x: pd.Timedelta.floor(x, "d")
)
"closed_temp"] = pd.DataFrame(
res[
["last_dc"] > pd.Timedelta(2, "days"),
res["last_dc"] <= pd.Timedelta(30, "days"),
res["patients"] == 0,
res[
]all(axis="columns")
).T.
"closed_perm"] = pd.DataFrame(
res[
["last_dc"] > pd.Timedelta(30, "days"),
res["patients"] == 0,
res[
]all(axis="columns") ).T.
= ~res["closed_perm"]
mask
= res[mask]
res = res[
res "beds", "patients", "empties", "opens", "last_dc", "closed_temp" "modified_at"]
[ ]
res
from typing import List
import pandas as pd
def _split_location_string(df: pd.DataFrame) -> pd.DataFrame:
"""
Splits a location string into dept/room/bed
"""
= (
temp "location_string"]
df[str.split("^", expand=True)
.={0: "dept", 1: "room", 2: "bed"})
.rename(columns
)for s in ["dept", "room", "bed"]:
= temp[s]
df[s] return df
def _remove_non_beds(
str] = ["null", "wait"]
df: pd.DataFrame, nonbeds: List[-> pd.DataFrame:
) """
Removes non beds e.g. null, wait
"""
= df["bed"].str.lower().isin(nonbeds)
mask = df[~mask]
df return df
def _aggregate_by_department(df: pd.DataFrame) -> pd.DataFrame:
"""
Aggregation from location (bed) level to ward level
"""
= df.groupby("department")
groups # aggregate by dept
= groups.agg(
res =("location_id", "count"),
beds=("occupied", "sum"),
patients=("cvl_discharge", lambda x: x.max(skipna=True)),
last_dc=("modified_at", "max"),
modified_at
)# calculate additional numbers
"empties"] = res["beds"] - res["patients"]
res["opens"] = res["empties"] # place holder : need to subtract closed from empties
res["last_dc"] = (
res["modified_at"] - res["last_dc"])
(res[apply(lambda x: pd.Timedelta.floor(x, "d"))
.
.dt.days
)
# defined closed: temp and perm
"closed_temp"] = pd.DataFrame(
res[
["last_dc"] > 2,
res["last_dc"] <= 30,
res["patients"] == 0,
res[
]all(axis="columns")
).T.
"closed_perm"] = pd.DataFrame(
res[
["last_dc"] > 30,
res["patients"] == 0,
res[
]all(axis="columns")
).T.
# drop closed perm
= ~res["closed_perm"]
mask
= res[mask]
res = res[
res
["beds",
"patients",
"empties",
"opens",
"last_dc",
"closed_temp",
"modified_at",
]
]=True)
res.reset_index(inplacereturn res
def aggregate_by_department(df: pd.DataFrame) -> pd.DataFrame:
"""
Aggregation from location (bed) level to ward level
Wrapper function
"""
= _split_location_string(df)
df = _remove_non_beds(df)
df = _aggregate_by_department(df)
df return df
= pd.read_sql("bedsmock", engine)
df aggregate_by_department(df)