import pkg_resources
= pkg_resources.working_set
installed_packages = sorted([f"{i.key}=={i.version}" for i in installed_packages]) installed_packages_list
Make predictions using a saved binomial model for ICU elective admission
This notebook shows how to use the models I have saved to ML flow, for the purposes of predicting how many elective patients will arrive in the ICU in the 24 hours following 9.15 on a given day.
One of three locations can be requested: tower, gwb and wms (all lower case). These have different trained models. You need to retrieve the relevant model from ML flow for the location requested. You would do this be setting model_name and model_version to the saved values (as an example for the tower: MODEL__TAP_ELECTIVE_TOWER__NAME, MODEL__TAP_ELECTIVE_TOWER__VERSION need to be set; these should eventually be saved as constants in global settings)
Logic: - Find out how many patients are on the surgical list for a given day - Retrieve a ML model which uses simple date parameters (but could one day be more sophisticated) to determine probability of ICU admission for any patient (not taking patient level characteristics into account) based on day of week - Use this probability to generate a binomial probability distribution over the number of beds needed in ICU for those patients
NOTES - we decided not to make predictions for weekends, as almost all weekend days have no elective patients - we use a lens here which does not really serve any purpose, other than to one-hot encode the day of week. However, in future, other covariates (eg the number of patients in the hospital currently) could be added to the model, and the lens is already in place to add scaling functions
Input required
The input data to the models takes the form of of one-row dataframe with the following columns; [‘model_function’, ‘date’, ‘icu_counts’, ‘noticu_counts’, ‘wkday’, ‘N’] - model_function - set this to ‘binom’ - date - use pd.to_datetime to set format eg pd.to_datetime(‘2022-08-08’) - icu_counts - set this field to zero [this is an artefact of the lens method] - noticu_counts - ditto - wkday - an integer for the day of the week, where Monday is 0 - N - number of patients in the elective list (retrieved from Caboodle)
Input validation
The called function SKProbabilityPredictorStats() will return an error if: - you request a weekend day - there are no patients on the elective list
Packages
from datetime import datetime
import numpy as np
import pandas as pd
import os
import pickle
import tempfile
from pathlib import Path
import mlflow
from mlflow.tracking import MlflowClient
import urllib
from hylib import settings
from hylib.dt import LONDON_TZ
from hymind.predict.base import BaseLensPredictor
from patsy import dmatrices
from scipy.stats import binom
from sqlalchemy import create_engine
Database connection
= "DRIVER={{ODBC Driver 17 for SQL Server}};SERVER={},{};DATABASE={};UID={};PWD={}".format(
conn_str
settings.CABOODLE_DB_HOST,
settings.CABOODLE_DB_PORT,
settings.CABOODLE_DB_NAME,
settings.CABOODLE_DB_USER,
settings.CABOODLE_DB_PASSWORD,
)= create_engine(
caboodle_db f"mssql+pyodbc:///?odbc_connect={urllib.parse.quote_plus(conn_str)}"
)
Set parameters
"sqlite:///mlruns.db")
mlflow.set_tracking_uri(
= os.getenv("HYMIND_REPO_TRACKING_URI")
mlflow_var
mlflow.set_tracking_uri(mlflow_var)
= MlflowClient() client
= "tap_elective_tower"
MODEL__TAP_ELECTIVE_TOWER__NAME = 5
MODEL__TAP_ELECTIVE_TOWER__VERSION = "tap_elective_gwb"
MODEL__TAP_ELECTIVE_GWB__NAME = 5
MODEL__TAP_ELECTIVE_GWB__VERSION = "tap_elective_wms"
MODEL__TAP_ELECTIVE_WMS__NAME = 5 MODEL__TAP_ELECTIVE_WMS__VERSION
def get_model_details(location):
if location == "tower":
= (
model_name, model_version
MODEL__TAP_ELECTIVE_TOWER__NAME,
MODEL__TAP_ELECTIVE_TOWER__VERSION,
)elif location == "gwb":
= (
model_name, model_version
MODEL__TAP_ELECTIVE_GWB__NAME,
MODEL__TAP_ELECTIVE_GWB__VERSION,
)else:
= (
model_name, model_version
MODEL__TAP_ELECTIVE_WMS__NAME,
MODEL__TAP_ELECTIVE_WMS__VERSION,
)return model_name, model_version
Create predictor class
class SKProbabilityPredictorStats(BaseLensPredictor):
def __init__(self, model_name: str, model_version: int) -> None:
super().__init__(model_name, model_version)
self.model = mlflow.sklearn.load_model(f"models:/{model_name}/{model_version}")
self.expr = self._load_expr(self.model_info.run_id)
self.lens = self._load_lens(self.model_info.run_id)
self.input_df = self._is_weekday(input_df)
self.input_df = self._elective_list_gt0(input_df)
def _is_weekday(self, input_df: pd.DataFrame):
if input_df.iloc[0, 0] == "binom":
if not input_df.iloc[0, 4] in list(range(0, 5)):
raise ValueError("Date requested is not a weekday")
return input_df
def _elective_list_gt0(self, input_df: pd.DataFrame):
if input_df.iloc[0, 0] == "binom":
if input_df.iloc[0, 5] == 0:
raise ValueError("There are no patients on the elective list")
return input_df
@staticmethod
def _load_expr(run_id: str):
with tempfile.TemporaryDirectory() as tmp:
= Path(tmp)
tmp_dir
"expr", tmp_dir)
client.download_artifacts(run_id,
= next((tmp_dir / "expr").rglob("*.txt"))
expr_path with open(expr_path, "rb") as f:
= f.read()
expr = str(expr, "utf-8")
expr
return expr
def predict(self, input_df: pd.DataFrame) -> pd.DataFrame:
= self.lens.transform(input_df)
X_df = dmatrices(self.expr, X_df, return_type="dataframe")
X_df__, X_df
= self.model.get_prediction(X_df)
predictions_set_df = predictions_set_df.summary_frame().iloc[0, 0]
p
if input_df.iloc[0, 0] == "binom":
= input_df.iloc[0, 5]
N = pd.DataFrame.from_dict(
predictions_df
{"bed_count": list(range(0, N + 1)),
"probability": binom.pmf(list(range(0, N + 1)), N, p),
}
)
else:
= 11
N = pd.DataFrame.from_dict(
predictions_df
{"bed_count": list(range(0, N + 1)),
"probability": poisson.pmf(list(range(0, N + 1)), p),
}
)
"predict_dt"] = datetime.now(LONDON_TZ)
predictions_df["model_name"] = self.model_name
predictions_df["model_version"] = self.model_version
predictions_df["run_id"] = self.model_info.run_id
predictions_df[
return predictions_df
Get current number of patients
def get_elective_cases(date, location):
if location == "tower":
= "UCH P03 THEATRE SUITE"
department1 = "UCH T02 DAY SURG THR"
department2
elif location == "wms":
= "WMS W01 THEATRE SUITE"
department1 = "WMS W01 THEATRE SUITE"
department2
elif location == "gwb":
= "GWB B-1 THEATRE SUITE"
department1 = "GWB B-1 THEATRE SUITE"
department2
= pd.read_sql(
data """
SELECT COUNT (DISTINCT scf.[PatientKey])
FROM [CABOODLE_REPORT].[dbo].[SurgicalCaseFact] scf
LEFT JOIN [CABOODLE_REPORT].[dbo].[WaitingListEntryFact] wlef ON wlef.[SurgicalCaseKey] = scf.[SurgicalCaseKey]
LEFT JOIN [CABOODLE_REPORT].[dbo].[SurgicalCaseUclhFactX] scufx ON scf.[SurgicalCaseKey] = scufx.[SurgicalCaseKey]
LEFT JOIN [CABOODLE_REPORT].[dbo].[PatientDim] patd ON scf.[PatientDurableKey] = patd.[DurableKey]
LEFT JOIN [CABOODLE_REPORT].[dbo].[ProcedureDim] pd ON scf.[PrimaryProcedureKey] = pd.[ProcedureKey]
LEFT JOIN [CABOODLE_REPORT].[dbo].[DepartmentDim] dd ON scf.[OperatingRoomKey] = dd.[DepartmentKey]
LEFT JOIN [CABOODLE_REPORT].[dbo].[DateDim] datewl ON wlef.[PlacedOnWaitingListDateKey] = datewl.[DateKey]
LEFT JOIN [CABOODLE_REPORT].[dbo].[DateDim] datedta ON wlef.[DecidedToAdmitDateKey] = datedta.[DateKey]
LEFT JOIN [CABOODLE_REPORT].[dbo].[DateDim] datesurg ON scf.[SurgeryDateKey] = datesurg.[DateKey]
LEFT JOIN [CABOODLE_REPORT].[dbo].[DateDim] datecasereq ON scf.[CaseRequestDateKey] = datecasereq.[DateKey]
LEFT JOIN [CABOODLE_REPORT].[dbo].[TimeOfDayDim] todcase ON scf.[CaseRequestTimeOfDayKey] = todcase.[TimeOfDayKey]
LEFT JOIN [CABOODLE_REPORT].[dbo].[DateDim] datecancel ON scufx.[CancelDateKey] = datecancel.[DateKey]
WHERE scf.[PatientDurableKey] > 1 AND scf.[PatientDurableKey] IS NOT NULL
-- AND wlef.[SurgicalService] != '*Unspecified'
AND scf.[PrimaryService] != 'Obstetrics' AND scf.[PrimaryService] != 'Neurosurgery' AND scf.[PrimaryService] != 'Paediatric Dental'
AND scf.[PatientInFacilityDateKey] < 0
AND dd.[DepartmentName] != 'NHNN THEATRE SUITE' AND dd.[DepartmentName] != 'RNTNE THEATRE SUITE' AND dd.[DepartmentName] != 'EGA E02 LABOUR WARD'
AND dd.[DepartmentName] != 'MCC H-1 THEATRE SUITE' AND dd.[DepartmentName] != 'UCH ANAESTHESIA DEPT'
--AND dd.[DepartmentName] != 'UCH P02 ENDOSCOPY'
AND patd.[AgeInYears] >= 18
AND (wlef.[IntendedManagement] IN ('*Unspecified', 'Inpatient', 'Inpatient Series', 'Night Admit Series') OR wlef.[IntendedManagement] IS NULL)
AND CONVERT(DATE, scufx.[PlannedOperationStartInstant]) = ?
AND ((scf.[Classification] = 'Elective') OR (scf.[Classification] = 'Expedited (within 2 weeks on elective list)'))
AND ((dd.[DepartmentName] = ?) OR (dd.[DepartmentName] = ?))
""",
caboodle_db,=[date, department1, department2],
params
)
return data
Retrieve model predictions
## Create input matrix for model, specifying a date to make a prediction for. The model will return a probability of admission to ICU
= "2022-08-05"
date = pd.to_datetime(date)
to_predict = "gwb"
location = get_model_details(location)
model_name, model_version
= pd.DataFrame(
input_df "binom", to_predict, 0, 0])[np.newaxis],
np.array([=["model_function", "date", "icu_counts", "noticu_counts"],
columns
)"wkday"] = (
input_df.loc[:, "date"].apply(datetime.weekday).astype("object")
input_df.loc[:,
)"date"] = input_df["date"].values.astype(np.float)
input_df["N"] = get_elective_cases(to_predict, location)
input_df[ input_df.columns
= SKProbabilityPredictorStats(model_name, model_version)
predictor = predictor.predict(input_df) predictions_df
"probability"].values predictions_df[
Optional code to inspect the model
# Retrieve the model and the saved lens
= mlflow.sklearn.load_model(f"models:/{model_name}/{model_version}")
model print(model.summary())
with tempfile.TemporaryDirectory() as tmp:
= Path(tmp)
tmp_dir
"lens", tmp_dir)
client.download_artifacts(run_id,
= next((tmp_dir / "lens").rglob("*.pkl"))
lens_path with open(lens_path, "rb") as f:
= pickle.load(f)
lens
= lens.transform(input_df)
X_df = dmatrices(expr, X_df, return_type="dataframe") X_df__, X_df