Working notes on merging caboodle info on beds with EMAP (UCLH)

Set-up incl database connections

import os
import urllib
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy.engine import Engine
def emap_db() -> Engine:
    url = "postgresql+psycopg2://{}:{}@{}:{}/{}".format(
        os.getenv("EMAP_DB_USER"),
        os.getenv("EMAP_DB_PASSWORD"),
        os.getenv("EMAP_DB_HOST"),
        os.getenv("EMAP_DB_PORT"),
        os.getenv("EMAP_DB_NAME"),
    )
    engine = create_engine(
        url, pool_size=4, max_overflow=6, connect_args={"connect_timeout": 120}
    )
    return engine


def caboodle_db() -> Engine:
    db_host = os.getenv("CABOODLE_DB_HOST")
    db_user = os.getenv("CABOODLE_DB_USER")
    db_password = os.getenv("CABOODLE_DB_PASSWORD")
    db_port = os.getenv("CABOODLE_DB_PORT")
    db_name = os.getenv("CABOODLE_DB_NAME")
    connection_str = f"mssql+pyodbc://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}?driver=ODBC+Driver+17+for+SQL+Server"
    engine = create_engine(connection_str)
    return engine
emap_engine = emap_db()
caboodle_engine = caboodle_db()

Reliable way of joining Caboodle and EMAP bed level data

First load EMAP location table into memory

Do this with no modifications / transformations other than the join

q = """
SELECT 
    lo.location_id, lo.location_string, lo.department_id, lo.room_id, lo.bed_id, dept.name department, dept.speciality, room.name room
FROM star.location lo
LEFT JOIN star.department dept ON lo.department_id = dept.department_id
LEFT JOIN star.room ON lo.room_id = room.room_id
"""
dfe = pd.read_sql_query(q, emap_engine)
dfe.head()

And load Caboodle DepartmentDim into memory

q = """
SELECT 
 DepartmentKey
,BedEpicId
,Name
,DepartmentName
,RoomName
,BedName
,IsBed
,BedInCensus
,IsDepartment
,IsRoom
,IsCareArea
,DepartmentExternalName
,DepartmentSpecialty
,DepartmentType
,DepartmentServiceGrouper
,DepartmentLevelOfCareGrouper
,LocationName
,ParentLocationName
,_CreationInstant
,_LastUpdatedInstant
FROM dbo.DepartmentDim
"""
dfc = pd.read_sql_query(q, caboodle_engine)
dfc.head()

Now join these two tables - don’t attempt joins where there is no room/bed level data (b/c they’re not physical locations - you need a multi-key join - EMAP ‘name’ (department.name) joins on to Caboodle ‘DepartmentName’ - EMAP ‘bed’ (derived by splitting location.location_string) joins on to Caboodle ‘Name’ - drop ‘wait’ beds since these duplicate and block a one-to-one merge - try to be rigorous in pd.merge - indicator=True to allow inspection post merge - validate='one_to_one' to throw an error if duplicates found

Note - sometimes (in Caboodle) DepartmentName and Name are duplicated so pick the most recently ‘created’

using …

Now load external code

import importlib

import add_caboodle2emap_beds

importlib.reload(add_caboodle2emap_beds)
from add_caboodle2emap_beds import bed_merge
departments = [
    # Built from Tower Report 14 Jun 2022
    # NAME                         # n emap locations
    "UCH T01 ACUTE MEDICAL",  # 86
    "UCH T01 ENHANCED CARE",  # 20
    "UCH T03 INTENSIVE CARE",  # 37
    "UCH T06 HEAD (T06H)",  # 27
    "UCH T06 CENTRAL (T06C)",  # 25
    "UCH T06 SOUTH PACU",  # 22
    "UCH T06 GYNAE (T06G)",  # 18
    "UCH T07 NORTH (T07N)",  # 45
    "UCH T07 CV SURGE",  # 37
    "UCH T07 SOUTH",  # 33
    "UCH T07 SOUTH (T07S)",  # 23
    "UCH T07 HDRU",  # 20
    "UCH T08 NORTH (T08N)",  # 28
    "UCH T08 SOUTH (T08S)",  # 25
    "UCH T08S ARCU",  #  6
    "UCH T09 SOUTH (T09S)",  # 34
    "UCH T09 NORTH (T09N)",  # 32
    "UCH T09 CENTRAL (T09C)",  # 25
    "UCH T10 SOUTH (T10S)",  # 34
    "UCH T10 NORTH (T10N)",  # 32
    "UCH T10 MED (T10M)",  # 16
    "UCH T11 SOUTH (T11S)",  # 27
    "UCH T11 NORTH (T11N)",  # 25
    "UCH T11 EAST (T11E)",  # 16
    "UCH T11 NORTH (T11NO)",  #  8
    "UCH T12 SOUTH (T12S)",  # 32
    "UCH T12 NORTH (T12N)",  # 23
    "UCH T13 SOUTH (T13S)",  # 31
    "UCH T13 NORTH ONCOLOGY",  # 26
    "UCH T13 NORTH (T13N)",  # 26
    "UCH T14 NORTH TRAUMA",  # 28
    "UCH T14 NORTH (T14N)",  # 28
    "UCH T14 SOUTH ASU",  # 22
    "UCH T14 SOUTH (T14S)",  # 17
    "UCH T15 SOUTH DECANT",  # 21
    "UCH T15 SOUTH (T15S)",  # 21
    "UCH T15 NORTH (T15N)",  # 16
    "UCH T15 NORTH DECANT",  # 15
    "UCH T16 NORTH (T16N)",  # 19
    "UCH T16 SOUTH (T16S)",  # 18
    "UCH T16 SOUTH WINTER",  # 17
    "GWB L01 ELECTIVE SURG",  # 37
    "GWB L01 CRITICAL CARE",  # 12
    "GWB L02 NORTH (L02N)",  # 19
    "GWB L02 EAST (L02E)",  # 19
    "GWB L03 NORTH (L03N)",  # 19
    "GWB L03 EAST (L03E)",  # 19
    "GWB L04 NORTH (L04N)",  # 20
    "GWB L04 EAST (L04E)",  # 17
    "WMS W04 WARD",  # 28
    "WMS W03 WARD",  # 27
    "WMS W02 SHORT STAY",  # 20
    "WMS W01 CRITICAL CARE",  # 11
]
departments = [
    # Built from Tower Report 14 Jun 2022
    # NAME                         # n emap locations
    "UCH T01 ACUTE MEDICAL",  # 86
    "UCH T01 ENHANCED CARE",  # 20
    "UCH T03 INTENSIVE CARE",  # 37
    "UCH T06 HEAD (T06H)",  # 27
    "UCH T06 CENTRAL (T06C)",  # 25
    "UCH T06 SOUTH PACU",  # 22
    "UCH T06 GYNAE (T06G)",  # 18
    "UCH T07 NORTH (T07N)",  # 45
    "UCH T07 CV SURGE",  # 37
    "UCH T07 SOUTH",  # 33
    "UCH T07 SOUTH (T07S)",  # 23
    "UCH T07 HDRU",  # 20
    "UCH T08 NORTH (T08N)",  # 28
    "UCH T08 SOUTH (T08S)",  # 25
    "UCH T08S ARCU",  #  6
    "UCH T09 SOUTH (T09S)",  # 34
    "UCH T09 NORTH (T09N)",  # 32
    "UCH T09 CENTRAL (T09C)",  # 25
    "UCH T10 SOUTH (T10S)",  # 34
    "UCH T10 NORTH (T10N)",  # 32
    "UCH T10 MED (T10M)",  # 16
    "UCH T11 SOUTH (T11S)",  # 27
    "UCH T11 NORTH (T11N)",  # 25
    "UCH T11 EAST (T11E)",  # 16
    "UCH T11 NORTH (T11NO)",  #  8
    "UCH T12 SOUTH (T12S)",  # 32
    "UCH T12 NORTH (T12N)",  # 23
    "UCH T13 SOUTH (T13S)",  # 31
    "UCH T13 NORTH ONCOLOGY",  # 26
    "UCH T13 NORTH (T13N)",  # 26
    "UCH T14 NORTH TRAUMA",  # 28
    "UCH T14 NORTH (T14N)",  # 28
    "UCH T14 SOUTH ASU",  # 22
    "UCH T14 SOUTH (T14S)",  # 17
    "UCH T15 SOUTH DECANT",  # 21
    "UCH T15 SOUTH (T15S)",  # 21
    "UCH T15 NORTH (T15N)",  # 16
    "UCH T15 NORTH DECANT",  # 15
    "UCH T16 NORTH (T16N)",  # 19
    "UCH T16 SOUTH (T16S)",  # 18
    "UCH T16 SOUTH WINTER",  # 17
    "GWB L01 ELECTIVE SURG",  # 37
    "GWB L01 CRITICAL CARE",  # 12
    "GWB L02 NORTH (L02N)",  # 19
    "GWB L02 EAST (L02E)",  # 19
    "GWB L03 NORTH (L03N)",  # 19
    "GWB L03 EAST (L03E)",  # 19
    "GWB L04 NORTH (L04N)",  # 20
    "GWB L04 EAST (L04E)",  # 17
    "WMS W04 WARD",  # 28
    "WMS W03 WARD",  # 27
    "WMS W02 SHORT STAY",  # 20
    "WMS W01 CRITICAL CARE",  # 11
]
dfm = bed_merge(df_emap=dfe, df_caboodle=dfc, departments=departments)
dfm.head()
dfm._merge.value_counts()
dfm.to_csv("beds.tsv", sep="\t", index_label="local_id")