from pathlib import Path
import numpy as np
import pandas as pd
import sqlalchemy as saWrangle live beds into a simple census
Run the beds live sql and then wrangle the data into a simple census
Assume that we’re running from the notebooks directory and need to pull data from ./src/mock/mock.db
sqlite_db = "../src/mock/mock.db"
assert Path(sqlite_db).is_file()engine = sa.create_engine(f"sqlite:///{sqlite_db}")df = pd.read_sql("bedsmock", engine)df.iloc[0]So now you want to return a dataframe with the following characteristics - per department (one row per department) - count number of beds - count number of occupied beds - count number of empty beds - last date of closed discharge from that department (i.e. if > 1 week and most beds empty then is the department closed) - number of side rooms?
temp = (
df["location_string"]
.str.split("^", expand=True)
.rename(columns={0: "dept", 1: "room", 2: "bed"})
)
for s in ["dept", "room", "bed"]:
df[s] = temp[s]del tempremove null and waiting locations
mask = df["bed"].str.lower().isin(["null", "wait"])
df = df[~mask]df.shapegroups = df.groupby("department")groups.get_group("GWB L01 CRITICAL CARE")res = groups.agg(
beds=("location_id", "count"),
patients=("occupied", "sum"),
last_dc=("cvl_discharge", lambda x: x.max(skipna=True)),
modified_at=("modified_at", "max"),
)
res["empties"] = res["beds"] - res["patients"]
res["opens"] = res["empties"] # place holder : need to subtract closed from emptiesres["last_dc"] = (res["modified_at"] - res["last_dc"]).apply(
lambda x: pd.Timedelta.floor(x, "d")
)res["closed_temp"] = pd.DataFrame(
[
res["last_dc"] > pd.Timedelta(2, "days"),
res["last_dc"] <= pd.Timedelta(30, "days"),
res["patients"] == 0,
]
).T.all(axis="columns")
res["closed_perm"] = pd.DataFrame(
[
res["last_dc"] > pd.Timedelta(30, "days"),
res["patients"] == 0,
]
).T.all(axis="columns")mask = ~res["closed_perm"]
res = res[mask]
res = res[
["beds", "patients", "empties", "opens", "last_dc", "closed_temp" "modified_at"]
]resfrom typing import List
import pandas as pd
def _split_location_string(df: pd.DataFrame) -> pd.DataFrame:
"""
Splits a location string into dept/room/bed
"""
temp = (
df["location_string"]
.str.split("^", expand=True)
.rename(columns={0: "dept", 1: "room", 2: "bed"})
)
for s in ["dept", "room", "bed"]:
df[s] = temp[s]
return df
def _remove_non_beds(
df: pd.DataFrame, nonbeds: List[str] = ["null", "wait"]
) -> pd.DataFrame:
"""
Removes non beds e.g. null, wait
"""
mask = df["bed"].str.lower().isin(nonbeds)
df = df[~mask]
return df
def _aggregate_by_department(df: pd.DataFrame) -> pd.DataFrame:
"""
Aggregation from location (bed) level to ward level
"""
groups = df.groupby("department")
# aggregate by dept
res = groups.agg(
beds=("location_id", "count"),
patients=("occupied", "sum"),
last_dc=("cvl_discharge", lambda x: x.max(skipna=True)),
modified_at=("modified_at", "max"),
)
# calculate additional numbers
res["empties"] = res["beds"] - res["patients"]
res["opens"] = res["empties"] # place holder : need to subtract closed from empties
res["last_dc"] = (
(res["modified_at"] - res["last_dc"])
.apply(lambda x: pd.Timedelta.floor(x, "d"))
.dt.days
)
# defined closed: temp and perm
res["closed_temp"] = pd.DataFrame(
[
res["last_dc"] > 2,
res["last_dc"] <= 30,
res["patients"] == 0,
]
).T.all(axis="columns")
res["closed_perm"] = pd.DataFrame(
[
res["last_dc"] > 30,
res["patients"] == 0,
]
).T.all(axis="columns")
# drop closed perm
mask = ~res["closed_perm"]
res = res[mask]
res = res[
[
"beds",
"patients",
"empties",
"opens",
"last_dc",
"closed_temp",
"modified_at",
]
]
res.reset_index(inplace=True)
return res
def aggregate_by_department(df: pd.DataFrame) -> pd.DataFrame:
"""
Aggregation from location (bed) level to ward level
Wrapper function
"""
df = _split_location_string(df)
df = _remove_non_beds(df)
df = _aggregate_by_department(df)
return dfdf = pd.read_sql("bedsmock", engine)
aggregate_by_department(df)