Make predictions using a saved binomial model for ICU elective admission

This notebook shows how to use the models I have saved to ML flow, for the purposes of predicting how many elective patients will arrive in the ICU in the 24 hours following 9.15 on a given day.

One of three locations can be requested: tower, gwb and wms (all lower case). These have different trained models. You need to retrieve the relevant model from ML flow for the location requested. You would do this be setting model_name and model_version to the saved values (as an example for the tower: MODEL__TAP_ELECTIVE_TOWER__NAME, MODEL__TAP_ELECTIVE_TOWER__VERSION need to be set; these should eventually be saved as constants in global settings)

Logic: - Find out how many patients are on the surgical list for a given day - Retrieve a ML model which uses simple date parameters (but could one day be more sophisticated) to determine probability of ICU admission for any patient (not taking patient level characteristics into account) based on day of week - Use this probability to generate a binomial probability distribution over the number of beds needed in ICU for those patients

NOTES - we decided not to make predictions for weekends, as almost all weekend days have no elective patients - we use a lens here which does not really serve any purpose, other than to one-hot encode the day of week. However, in future, other covariates (eg the number of patients in the hospital currently) could be added to the model, and the lens is already in place to add scaling functions

Input required

The input data to the models takes the form of of one-row dataframe with the following columns; [‘model_function’, ‘date’, ‘icu_counts’, ‘noticu_counts’, ‘wkday’, ‘N’] - model_function - set this to ‘binom’ - date - use pd.to_datetime to set format eg pd.to_datetime(‘2022-08-08’) - icu_counts - set this field to zero [this is an artefact of the lens method] - noticu_counts - ditto - wkday - an integer for the day of the week, where Monday is 0 - N - number of patients in the elective list (retrieved from Caboodle)

Input validation

The called function SKProbabilityPredictorStats() will return an error if: - you request a weekend day - there are no patients on the elective list

Packages

import pkg_resources

installed_packages = pkg_resources.working_set
installed_packages_list = sorted([f"{i.key}=={i.version}" for i in installed_packages])
from datetime import datetime

import numpy as np
import pandas as pd
import os
import pickle
import tempfile
from pathlib import Path

import mlflow
from mlflow.tracking import MlflowClient
import urllib

from hylib import settings
from hylib.dt import LONDON_TZ
from hymind.predict.base import BaseLensPredictor
from patsy import dmatrices
from scipy.stats import binom
from sqlalchemy import create_engine

Database connection

conn_str = "DRIVER={{ODBC Driver 17 for SQL Server}};SERVER={},{};DATABASE={};UID={};PWD={}".format(
    settings.CABOODLE_DB_HOST,
    settings.CABOODLE_DB_PORT,
    settings.CABOODLE_DB_NAME,
    settings.CABOODLE_DB_USER,
    settings.CABOODLE_DB_PASSWORD,
)
caboodle_db = create_engine(
    f"mssql+pyodbc:///?odbc_connect={urllib.parse.quote_plus(conn_str)}"
)

Set parameters

mlflow.set_tracking_uri("sqlite:///mlruns.db")

mlflow_var = os.getenv("HYMIND_REPO_TRACKING_URI")
mlflow.set_tracking_uri(mlflow_var)

client = MlflowClient()
MODEL__TAP_ELECTIVE_TOWER__NAME = "tap_elective_tower"
MODEL__TAP_ELECTIVE_TOWER__VERSION = 5
MODEL__TAP_ELECTIVE_GWB__NAME = "tap_elective_gwb"
MODEL__TAP_ELECTIVE_GWB__VERSION = 5
MODEL__TAP_ELECTIVE_WMS__NAME = "tap_elective_wms"
MODEL__TAP_ELECTIVE_WMS__VERSION = 5
def get_model_details(location):

    if location == "tower":
        model_name, model_version = (
            MODEL__TAP_ELECTIVE_TOWER__NAME,
            MODEL__TAP_ELECTIVE_TOWER__VERSION,
        )
    elif location == "gwb":
        model_name, model_version = (
            MODEL__TAP_ELECTIVE_GWB__NAME,
            MODEL__TAP_ELECTIVE_GWB__VERSION,
        )
    else:
        model_name, model_version = (
            MODEL__TAP_ELECTIVE_WMS__NAME,
            MODEL__TAP_ELECTIVE_WMS__VERSION,
        )
    return model_name, model_version

Create predictor class

class SKProbabilityPredictorStats(BaseLensPredictor):
    def __init__(self, model_name: str, model_version: int) -> None:
        super().__init__(model_name, model_version)
        self.model = mlflow.sklearn.load_model(f"models:/{model_name}/{model_version}")
        self.expr = self._load_expr(self.model_info.run_id)
        self.lens = self._load_lens(self.model_info.run_id)
        self.input_df = self._is_weekday(input_df)
        self.input_df = self._elective_list_gt0(input_df)

    def _is_weekday(self, input_df: pd.DataFrame):
        if input_df.iloc[0, 0] == "binom":
            if not input_df.iloc[0, 4] in list(range(0, 5)):
                raise ValueError("Date requested is not a weekday")
            return input_df

    def _elective_list_gt0(self, input_df: pd.DataFrame):
        if input_df.iloc[0, 0] == "binom":
            if input_df.iloc[0, 5] == 0:
                raise ValueError("There are no patients on the elective list")
            return input_df

    @staticmethod
    def _load_expr(run_id: str):
        with tempfile.TemporaryDirectory() as tmp:
            tmp_dir = Path(tmp)

            client.download_artifacts(run_id, "expr", tmp_dir)

            expr_path = next((tmp_dir / "expr").rglob("*.txt"))
            with open(expr_path, "rb") as f:
                expr = f.read()
            expr = str(expr, "utf-8")

            return expr

    def predict(self, input_df: pd.DataFrame) -> pd.DataFrame:

        X_df = self.lens.transform(input_df)
        X_df__, X_df = dmatrices(self.expr, X_df, return_type="dataframe")

        predictions_set_df = self.model.get_prediction(X_df)
        p = predictions_set_df.summary_frame().iloc[0, 0]

        if input_df.iloc[0, 0] == "binom":

            N = input_df.iloc[0, 5]
            predictions_df = pd.DataFrame.from_dict(
                {
                    "bed_count": list(range(0, N + 1)),
                    "probability": binom.pmf(list(range(0, N + 1)), N, p),
                }
            )

        else:

            N = 11
            predictions_df = pd.DataFrame.from_dict(
                {
                    "bed_count": list(range(0, N + 1)),
                    "probability": poisson.pmf(list(range(0, N + 1)), p),
                }
            )

        predictions_df["predict_dt"] = datetime.now(LONDON_TZ)
        predictions_df["model_name"] = self.model_name
        predictions_df["model_version"] = self.model_version
        predictions_df["run_id"] = self.model_info.run_id

        return predictions_df

Get current number of patients

def get_elective_cases(date, location):

    if location == "tower":

        department1 = "UCH P03 THEATRE SUITE"
        department2 = "UCH T02 DAY SURG THR"

    elif location == "wms":

        department1 = "WMS W01 THEATRE SUITE"
        department2 = "WMS W01 THEATRE SUITE"

    elif location == "gwb":

        department1 = "GWB B-1 THEATRE SUITE"
        department2 = "GWB B-1 THEATRE SUITE"

    data = pd.read_sql(
        """
        SELECT COUNT (DISTINCT scf.[PatientKey]) 
  
      FROM [CABOODLE_REPORT].[dbo].[SurgicalCaseFact] scf 
      LEFT JOIN [CABOODLE_REPORT].[dbo].[WaitingListEntryFact] wlef ON wlef.[SurgicalCaseKey] = scf.[SurgicalCaseKey]
      LEFT JOIN [CABOODLE_REPORT].[dbo].[SurgicalCaseUclhFactX] scufx ON scf.[SurgicalCaseKey] = scufx.[SurgicalCaseKey]
      LEFT JOIN [CABOODLE_REPORT].[dbo].[PatientDim] patd ON scf.[PatientDurableKey] = patd.[DurableKey]
      LEFT JOIN [CABOODLE_REPORT].[dbo].[ProcedureDim] pd ON scf.[PrimaryProcedureKey] = pd.[ProcedureKey]
      LEFT JOIN [CABOODLE_REPORT].[dbo].[DepartmentDim] dd ON scf.[OperatingRoomKey] = dd.[DepartmentKey]
      LEFT JOIN [CABOODLE_REPORT].[dbo].[DateDim] datewl ON wlef.[PlacedOnWaitingListDateKey] = datewl.[DateKey]
      LEFT JOIN [CABOODLE_REPORT].[dbo].[DateDim] datedta ON wlef.[DecidedToAdmitDateKey] = datedta.[DateKey]
      LEFT JOIN [CABOODLE_REPORT].[dbo].[DateDim] datesurg ON scf.[SurgeryDateKey] = datesurg.[DateKey]
      LEFT JOIN [CABOODLE_REPORT].[dbo].[DateDim] datecasereq ON scf.[CaseRequestDateKey] = datecasereq.[DateKey]
      LEFT JOIN [CABOODLE_REPORT].[dbo].[TimeOfDayDim] todcase ON scf.[CaseRequestTimeOfDayKey] = todcase.[TimeOfDayKey]
      LEFT JOIN [CABOODLE_REPORT].[dbo].[DateDim] datecancel ON scufx.[CancelDateKey] = datecancel.[DateKey]
      WHERE scf.[PatientDurableKey] > 1 AND scf.[PatientDurableKey] IS NOT NULL
    --  AND wlef.[SurgicalService] != '*Unspecified' 
      AND scf.[PrimaryService] != 'Obstetrics' AND scf.[PrimaryService] != 'Neurosurgery' AND scf.[PrimaryService] != 'Paediatric Dental'
      AND scf.[PatientInFacilityDateKey] < 0
      AND dd.[DepartmentName] != 'NHNN THEATRE SUITE' AND dd.[DepartmentName] != 'RNTNE THEATRE SUITE' AND dd.[DepartmentName] != 'EGA E02 LABOUR WARD'
      AND dd.[DepartmentName] != 'MCC H-1 THEATRE SUITE' AND dd.[DepartmentName] != 'UCH ANAESTHESIA DEPT'
      --AND dd.[DepartmentName] != 'UCH P02 ENDOSCOPY'
      AND patd.[AgeInYears] >= 18
      AND (wlef.[IntendedManagement] IN ('*Unspecified', 'Inpatient', 'Inpatient Series', 'Night Admit Series') OR wlef.[IntendedManagement] IS NULL)
      AND CONVERT(DATE, scufx.[PlannedOperationStartInstant]) = ?
      AND ((scf.[Classification] = 'Elective') OR (scf.[Classification] = 'Expedited (within 2 weeks on elective list)'))
      AND ((dd.[DepartmentName] = ?) OR (dd.[DepartmentName] = ?))
      
        """,
        caboodle_db,
        params=[date, department1, department2],
    )

    return data

Retrieve model predictions

## Create input matrix for model, specifying a date to make a prediction for. The model will return a probability of admission to ICU

date = "2022-08-05"
to_predict = pd.to_datetime(date)
location = "gwb"
model_name, model_version = get_model_details(location)

input_df = pd.DataFrame(
    np.array(["binom", to_predict, 0, 0])[np.newaxis],
    columns=["model_function", "date", "icu_counts", "noticu_counts"],
)
input_df.loc[:, "wkday"] = (
    input_df.loc[:, "date"].apply(datetime.weekday).astype("object")
)
input_df["date"] = input_df["date"].values.astype(np.float)
input_df["N"] = get_elective_cases(to_predict, location)
input_df.columns
predictor = SKProbabilityPredictorStats(model_name, model_version)
predictions_df = predictor.predict(input_df)
predictions_df["probability"].values

Optional code to inspect the model

# Retrieve the model and the saved lens

model = mlflow.sklearn.load_model(f"models:/{model_name}/{model_version}")
print(model.summary())

with tempfile.TemporaryDirectory() as tmp:
    tmp_dir = Path(tmp)

    client.download_artifacts(run_id, "lens", tmp_dir)

    lens_path = next((tmp_dir / "lens").rglob("*.pkl"))
    with open(lens_path, "rb") as f:
        lens = pickle.load(f)

X_df = lens.transform(input_df)
X_df__, X_df = dmatrices(expr, X_df, return_type="dataframe")