Make predictions using a saved poisson model for ICU non-elective admission

This notebook shows how to use the models I have saved to ML flow, for the purposes of predicting how many non-elective patients will arrive in the ICU in the 24 hours following 9.15 on a given day.

One of three locations can be requested: tower, gwb and wms (all lower case). These have different trained models. You need to retrieve the relevant model from ML flow for the location requested. You would do this be setting model_name and model_version to the saved values (as an example for the tower: MODEL__TAP_NONELECTIVE_TOWER__NAME, MODEL__TAP_NONELECTIVE_TOWER__VERSION need to be set; these should eventually be saved as constants in global settings)

Logic: - Retrieve a ML model which uses simple date parameters (but could one day be more sophisticated) to generate a mean for a poisson distribution - Usep as a parameter to generate a probability distribution over the number of ICU beds needed by non-elective patients, where the maximum number

NOTES - we DO make predictions for weekends, unlike for the elective taps - but we don’t differentiate between days of week; just a binary indicator for whether it is a weekday or a weekend - we use a lens here which does not really serve any purpose, other than to one-hot encode whether it is a weekend. However, in future, other covariates (eg the number of patients in the hospital currently, the weather) could be added to the model, and the lens is already in place to add scaling functions

We agreed to predict flows at 9.15 and 12.30 pm. However, inspection suggested there is minimal difference between these times so I have dropped the second one and only use 9.15 am

Input required

The input data to the models takes the form of of one-row dataframe with the following columns; [‘model_function’, ‘date’, ‘count’, ‘wkday’] - model_function - set this to ‘poisson’ - date - use pd.to_datetime to set format eg pd.to_datetime(‘2022-08-08’) - count - set this field to zero [this is an artefact of the lens method] - wkday - an integer for whether it is a weekend (value 0) or a weekday (value 1); set this as shown below

Packages

import pkg_resources

installed_packages = pkg_resources.working_set
installed_packages_list = sorted([f"{i.key}=={i.version}" for i in installed_packages])

from datetime import datetime

import numpy as np
import pandas as pd

import os
import pickle
import tempfile
from pathlib import Path

import mlflow
from mlflow.tracking import MlflowClient

import urllib

from hylib import settings
from hylib.dt import LONDON_TZ
from hymind.predict.base import BaseLensPredictor
from patsy import dmatrices
from scipy.stats import poisson
from sqlalchemy import create_engine

Set parameters

mlflow.set_tracking_uri("sqlite:///mlruns.db")

mlflow_var = os.getenv("HYMIND_REPO_TRACKING_URI")
mlflow.set_tracking_uri(mlflow_var)

client = MlflowClient()

MODEL__TAP_NONELECTIVE_TOWER__NAME = "tap_nonelective_tower"
MODEL__TAP_NONELECTIVE_TOWER__VERSION = 2
MODEL__TAP_NONELECTIVE_GWB__NAME = "tap_nonelective_gwb"
MODEL__TAP_NONELECTIVE_GWB__VERSION = 2
MODEL__TAP_NONELECTIVE_WMS__NAME = "tap_nonelective_wms"
MODEL__TAP_NONELECTIVE_WMS__VERSION = 2

def get_model_details(location):

    if location == "tower":
        model_name, model_version = (
            MODEL__TAP_NONELECTIVE_TOWER__NAME,
            MODEL__TAP_NONELECTIVE_TOWER__VERSION,
        )
    elif location == "gwb":
        model_name, model_version = (
            MODEL__TAP_NONELECTIVE_GWB__NAME,
            MODEL__TAP_NONELECTIVE_GWB__VERSION,
        )
    else:
        model_name, model_version = (
            MODEL__TAP_NONELECTIVE_WMS__NAME,
            MODEL__TAP_NONELECTIVE_WMS__VERSION,
        )
    return model_name, model_version

Create predictor class

class SKProbabilityPredictorPoisson(BaseLensPredictor):
    def __init__(self, model_name: str, model_version: int) -> None:
        super().__init__(model_name, model_version)
        self.model = mlflow.sklearn.load_model(f"models:/{model_name}/{model_version}")
        self.expr = self._load_expr(self.model_info.run_id)
        self.lens = self._load_lens(self.model_info.run_id)

    @staticmethod
    def _load_expr(run_id: str):
        with tempfile.TemporaryDirectory() as tmp:
            tmp_dir = Path(tmp)

            client.download_artifacts(run_id, "expr", tmp_dir)

            expr_path = next((tmp_dir / "expr").rglob("*.txt"))
            with open(expr_path, "rb") as f:
                expr = f.read()
            expr = str(expr, "utf-8")

            return expr

    def predict(self, input_df: pd.DataFrame) -> pd.DataFrame:

        X_df = self.lens.transform(input_df)
        X_df__, X_df = dmatrices(self.expr, X_df, return_type="dataframe")

        predictions_set_df = self.model.get_prediction(X_df)

        p = predictions_set_df.summary_frame().iloc[0, 0]
        N = 11

        predictions_df = pd.DataFrame.from_dict(
            {
                "bed_count": list(range(0, N + 1)),
                "probability": poisson.pmf(list(range(0, N + 1)), p),
            }
        )

        predictions_df["predict_dt"] = datetime.now(LONDON_TZ)
        predictions_df["model_name"] = self.model_name
        predictions_df["model_version"] = self.model_version
        predictions_df["run_id"] = self.model_info.run_id

        return predictions_df

class SKProbabilityPredictorStats(BaseLensPredictor):
    def __init__(self, model_name: str, model_version: int) -> None:
        super().__init__(model_name, model_version)
        self.model = mlflow.sklearn.load_model(f"models:/{model_name}/{model_version}")
        self.expr = self._load_expr(self.model_info.run_id)
        self.lens = self._load_lens(self.model_info.run_id)
        self.input_df = self._is_weekday(input_df)
        self.input_df = self._elective_list_gt0(input_df)

    def _is_weekday(self, input_df: pd.DataFrame):
        if input_df.iloc[0, 0] == "binom":
            if not input_df.iloc[0, 4] in list(range(0, 5)):
                raise ValueError("Date requested is not a weekday")
            return input_df

    def _elective_list_gt0(self, input_df: pd.DataFrame):
        if input_df.iloc[0, 0] == "binom":
            if input_df.iloc[0, 5] == 0:
                raise ValueError("There are no patients on the elective list")
            return input_df

    @staticmethod
    def _load_expr(run_id: str):
        with tempfile.TemporaryDirectory() as tmp:
            tmp_dir = Path(tmp)

            client.download_artifacts(run_id, "expr", tmp_dir)

            expr_path = next((tmp_dir / "expr").rglob("*.txt"))
            with open(expr_path, "rb") as f:
                expr = f.read()
            expr = str(expr, "utf-8")

            return expr

    def predict(self, input_df: pd.DataFrame) -> pd.DataFrame:

        X_df = self.lens.transform(input_df)
        X_df__, X_df = dmatrices(self.expr, X_df, return_type="dataframe")

        predictions_set_df = self.model.get_prediction(X_df)
        p = predictions_set_df.summary_frame().iloc[0, 0]

        if input_df.iloc[0, 0] == "binom":

            N = input_df.iloc[0, 5]
            predictions_df = pd.DataFrame.from_dict(
                {
                    "bed_count": list(range(0, N + 1)),
                    "probability": binom.pmf(list(range(0, N + 1)), N, p),
                }
            )

        else:

            N = 11
            predictions_df = pd.DataFrame.from_dict(
                {
                    "bed_count": list(range(0, N + 1)),
                    "probability": poisson.pmf(list(range(0, N + 1)), p),
                }
            )

        predictions_df["predict_dt"] = datetime.now(LONDON_TZ)
        predictions_df["model_name"] = self.model_name
        predictions_df["model_version"] = self.model_version
        predictions_df["run_id"] = self.model_info.run_id

        return predictions_df

Retrieve model predictions

## Create input matrix for model, specifying a date to make a prediction for. The model will return a probability of admission to ICU

date = "2022-08-09"
to_predict = pd.to_datetime(date)
location = "tower"
model_name, model_version = get_model_details(location)

input_df = pd.DataFrame(
    np.array(["poisson", to_predict, 0])[np.newaxis],
    columns=["model_function", "date", "count"],
)
input_df.loc[:, "wkday"] = (
    input_df.loc[:, "date"].apply(datetime.weekday).astype("object") <= 4
)
input_df["date"] = input_df["date"].values.astype(np.float)
input_df

model_name, model_version = get_model_details("gwb")
predictor = SKProbabilityPredictorStats(model_name, model_version)
predictions_df = predictor.predict(input_df)

predictions_df["probability"].values

Optional code to inspect the model

model_name, model_version = get_model_details("tower")

model = mlflow.sklearn.load_model(f"models:/{model_name}/{model_version}")
print(model.summary())

# Retrieve the model and the saved lens

model = mlflow.sklearn.load_model(f"models:/{model_name}/{model_version}")
print(model.summary())

with tempfile.TemporaryDirectory() as tmp:
    tmp_dir = Path(tmp)

    client.download_artifacts(run_id, "lens", tmp_dir)

    lens_path = next((tmp_dir / "lens").rglob("*.pkl"))
    with open(lens_path, "rb") as f:
        lens = pickle.load(f)

X_df = lens.transform(input_df)
X_df__, X_df = dmatrices(expr, X_df, return_type="dataframe")