import pkg_resources
= pkg_resources.working_set
installed_packages = sorted([f"{i.key}=={i.version}" for i in installed_packages]) installed_packages_list
Make predictions using a saved poisson model for ICU non-elective admission
This notebook shows how to use the models I have saved to ML flow, for the purposes of predicting how many non-elective patients will arrive in the ICU in the 24 hours following 9.15 on a given day.
One of three locations can be requested: tower, gwb and wms (all lower case). These have different trained models. You need to retrieve the relevant model from ML flow for the location requested. You would do this be setting model_name and model_version to the saved values (as an example for the tower: MODEL__TAP_NONELECTIVE_TOWER__NAME, MODEL__TAP_NONELECTIVE_TOWER__VERSION need to be set; these should eventually be saved as constants in global settings)
Logic: - Retrieve a ML model which uses simple date parameters (but could one day be more sophisticated) to generate a mean for a poisson distribution - Usep as a parameter to generate a probability distribution over the number of ICU beds needed by non-elective patients, where the maximum number
NOTES - we DO make predictions for weekends, unlike for the elective taps - but we don’t differentiate between days of week; just a binary indicator for whether it is a weekday or a weekend - we use a lens here which does not really serve any purpose, other than to one-hot encode whether it is a weekend. However, in future, other covariates (eg the number of patients in the hospital currently, the weather) could be added to the model, and the lens is already in place to add scaling functions
We agreed to predict flows at 9.15 and 12.30 pm. However, inspection suggested there is minimal difference between these times so I have dropped the second one and only use 9.15 am
Input required
The input data to the models takes the form of of one-row dataframe with the following columns; [‘model_function’, ‘date’, ‘count’, ‘wkday’] - model_function - set this to ‘poisson’ - date - use pd.to_datetime to set format eg pd.to_datetime(‘2022-08-08’) - count - set this field to zero [this is an artefact of the lens method] - wkday - an integer for whether it is a weekend (value 0) or a weekday (value 1); set this as shown below
Packages
from datetime import datetime
import numpy as np
import pandas as pd
import os
import pickle
import tempfile
from pathlib import Path
import mlflow
from mlflow.tracking import MlflowClient
import urllib
from hylib import settings
from hylib.dt import LONDON_TZ
from hymind.predict.base import BaseLensPredictor
from patsy import dmatrices
from scipy.stats import poisson
from sqlalchemy import create_engine
Set parameters
"sqlite:///mlruns.db")
mlflow.set_tracking_uri(
= os.getenv("HYMIND_REPO_TRACKING_URI")
mlflow_var
mlflow.set_tracking_uri(mlflow_var)
= MlflowClient() client
= "tap_nonelective_tower"
MODEL__TAP_NONELECTIVE_TOWER__NAME = 2
MODEL__TAP_NONELECTIVE_TOWER__VERSION = "tap_nonelective_gwb"
MODEL__TAP_NONELECTIVE_GWB__NAME = 2
MODEL__TAP_NONELECTIVE_GWB__VERSION = "tap_nonelective_wms"
MODEL__TAP_NONELECTIVE_WMS__NAME = 2 MODEL__TAP_NONELECTIVE_WMS__VERSION
def get_model_details(location):
if location == "tower":
= (
model_name, model_version
MODEL__TAP_NONELECTIVE_TOWER__NAME,
MODEL__TAP_NONELECTIVE_TOWER__VERSION,
)elif location == "gwb":
= (
model_name, model_version
MODEL__TAP_NONELECTIVE_GWB__NAME,
MODEL__TAP_NONELECTIVE_GWB__VERSION,
)else:
= (
model_name, model_version
MODEL__TAP_NONELECTIVE_WMS__NAME,
MODEL__TAP_NONELECTIVE_WMS__VERSION,
)return model_name, model_version
Create predictor class
class SKProbabilityPredictorPoisson(BaseLensPredictor):
def __init__(self, model_name: str, model_version: int) -> None:
super().__init__(model_name, model_version)
self.model = mlflow.sklearn.load_model(f"models:/{model_name}/{model_version}")
self.expr = self._load_expr(self.model_info.run_id)
self.lens = self._load_lens(self.model_info.run_id)
@staticmethod
def _load_expr(run_id: str):
with tempfile.TemporaryDirectory() as tmp:
= Path(tmp)
tmp_dir
"expr", tmp_dir)
client.download_artifacts(run_id,
= next((tmp_dir / "expr").rglob("*.txt"))
expr_path with open(expr_path, "rb") as f:
= f.read()
expr = str(expr, "utf-8")
expr
return expr
def predict(self, input_df: pd.DataFrame) -> pd.DataFrame:
= self.lens.transform(input_df)
X_df = dmatrices(self.expr, X_df, return_type="dataframe")
X_df__, X_df
= self.model.get_prediction(X_df)
predictions_set_df
= predictions_set_df.summary_frame().iloc[0, 0]
p = 11
N
= pd.DataFrame.from_dict(
predictions_df
{"bed_count": list(range(0, N + 1)),
"probability": poisson.pmf(list(range(0, N + 1)), p),
}
)
"predict_dt"] = datetime.now(LONDON_TZ)
predictions_df["model_name"] = self.model_name
predictions_df["model_version"] = self.model_version
predictions_df["run_id"] = self.model_info.run_id
predictions_df[
return predictions_df
class SKProbabilityPredictorStats(BaseLensPredictor):
def __init__(self, model_name: str, model_version: int) -> None:
super().__init__(model_name, model_version)
self.model = mlflow.sklearn.load_model(f"models:/{model_name}/{model_version}")
self.expr = self._load_expr(self.model_info.run_id)
self.lens = self._load_lens(self.model_info.run_id)
self.input_df = self._is_weekday(input_df)
self.input_df = self._elective_list_gt0(input_df)
def _is_weekday(self, input_df: pd.DataFrame):
if input_df.iloc[0, 0] == "binom":
if not input_df.iloc[0, 4] in list(range(0, 5)):
raise ValueError("Date requested is not a weekday")
return input_df
def _elective_list_gt0(self, input_df: pd.DataFrame):
if input_df.iloc[0, 0] == "binom":
if input_df.iloc[0, 5] == 0:
raise ValueError("There are no patients on the elective list")
return input_df
@staticmethod
def _load_expr(run_id: str):
with tempfile.TemporaryDirectory() as tmp:
= Path(tmp)
tmp_dir
"expr", tmp_dir)
client.download_artifacts(run_id,
= next((tmp_dir / "expr").rglob("*.txt"))
expr_path with open(expr_path, "rb") as f:
= f.read()
expr = str(expr, "utf-8")
expr
return expr
def predict(self, input_df: pd.DataFrame) -> pd.DataFrame:
= self.lens.transform(input_df)
X_df = dmatrices(self.expr, X_df, return_type="dataframe")
X_df__, X_df
= self.model.get_prediction(X_df)
predictions_set_df = predictions_set_df.summary_frame().iloc[0, 0]
p
if input_df.iloc[0, 0] == "binom":
= input_df.iloc[0, 5]
N = pd.DataFrame.from_dict(
predictions_df
{"bed_count": list(range(0, N + 1)),
"probability": binom.pmf(list(range(0, N + 1)), N, p),
}
)
else:
= 11
N = pd.DataFrame.from_dict(
predictions_df
{"bed_count": list(range(0, N + 1)),
"probability": poisson.pmf(list(range(0, N + 1)), p),
}
)
"predict_dt"] = datetime.now(LONDON_TZ)
predictions_df["model_name"] = self.model_name
predictions_df["model_version"] = self.model_version
predictions_df["run_id"] = self.model_info.run_id
predictions_df[
return predictions_df
Retrieve model predictions
## Create input matrix for model, specifying a date to make a prediction for. The model will return a probability of admission to ICU
= "2022-08-09"
date = pd.to_datetime(date)
to_predict = "tower"
location = get_model_details(location)
model_name, model_version
= pd.DataFrame(
input_df "poisson", to_predict, 0])[np.newaxis],
np.array([=["model_function", "date", "count"],
columns
)"wkday"] = (
input_df.loc[:, "date"].apply(datetime.weekday).astype("object") <= 4
input_df.loc[:,
)"date"] = input_df["date"].values.astype(np.float)
input_df[ input_df
= get_model_details("gwb")
model_name, model_version = SKProbabilityPredictorStats(model_name, model_version)
predictor = predictor.predict(input_df) predictions_df
"probability"].values predictions_df[
Optional code to inspect the model
= get_model_details("tower") model_name, model_version
= mlflow.sklearn.load_model(f"models:/{model_name}/{model_version}")
model print(model.summary())
# Retrieve the model and the saved lens
= mlflow.sklearn.load_model(f"models:/{model_name}/{model_version}")
model print(model.summary())
with tempfile.TemporaryDirectory() as tmp:
= Path(tmp)
tmp_dir
"lens", tmp_dir)
client.download_artifacts(run_id,
= next((tmp_dir / "lens").rglob("*.pkl"))
lens_path with open(lens_path, "rb") as f:
= pickle.load(f)
lens
= lens.transform(input_df)
X_df = dmatrices(expr, X_df, return_type="dataframe") X_df__, X_df