Working notes on merging Caboodle and EMAP (NHNN)

Set-up incl database connections

import os
import urllib
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy.engine import Engine
def emap_db() -> Engine:
    url = "postgresql+psycopg2://{}:{}@{}:{}/{}".format(
        os.getenv("EMAP_DB_USER"),
        os.getenv("EMAP_DB_PASSWORD"),
        os.getenv("EMAP_DB_HOST"),
        os.getenv("EMAP_DB_PORT"),
        os.getenv("EMAP_DB_NAME"),
    )
    engine = create_engine(
        url, pool_size=4, max_overflow=6, connect_args={"connect_timeout": 120}
    )
    return engine


def caboodle_db() -> Engine:
    db_host = os.getenv("CABOODLE_DB_HOST")
    db_user = os.getenv("CABOODLE_DB_USER")
    db_password = os.getenv("CABOODLE_DB_PASSWORD")
    db_port = os.getenv("CABOODLE_DB_PORT")
    db_name = os.getenv("CABOODLE_DB_NAME")
    connection_str = f"mssql+pyodbc://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}?driver=ODBC+Driver+17+for+SQL+Server"
    engine = create_engine(connection_str)
    return engine
emap_engine = emap_db()
caboodle_engine = caboodle_db()

Reliable way of joining Caboodle and EMAP bed level data

First load EMAP location table into memory

Do this with no modifications / transformations other than the join

q = """
SELECT 
    lo.location_id, lo.location_string, lo.department_id, lo.room_id, lo.bed_id, dept.name department, dept.speciality, room.name room
FROM star.location lo
LEFT JOIN star.department dept ON lo.department_id = dept.department_id
LEFT JOIN star.room ON lo.room_id = room.room_id
"""
dfe = pd.read_sql_query(q, emap_engine)
dfe.head()

And load Caboodle DepartmentDim into memory

q = """
SELECT 
 DepartmentKey
,BedEpicId
,Name
,DepartmentName
,RoomName
,BedName
,IsBed
,BedInCensus
,IsDepartment
,IsRoom
,IsCareArea
,DepartmentExternalName
,DepartmentSpecialty
,DepartmentType
,DepartmentServiceGrouper
,DepartmentLevelOfCareGrouper
,LocationName
,ParentLocationName
,_CreationInstant
,_LastUpdatedInstant
FROM dbo.DepartmentDim
"""
dfc = pd.read_sql_query(q, caboodle_engine)
dfc.head()

Now join these two tables - don’t attempt joins where there is no room/bed level data (b/c they’re not physical locations - you need a multi-key join - EMAP ‘name’ (department.name) joins on to Caboodle ‘DepartmentName’ - EMAP ‘bed’ (derived by splitting location.location_string) joins on to Caboodle ‘Name’ - drop ‘wait’ beds since these duplicate and block a one-to-one merge - try to be rigorous in pd.merge - indicator=True to allow inspection post merge - validate='one_to_one' to throw an error if duplicates found

Note - sometimes (in Caboodle) DepartmentName and Name are duplicated so pick the most recently ‘created’

using …

Now load external code

import importlib

import add_caboodle2emap_beds

importlib.reload(add_caboodle2emap_beds)
from add_caboodle2emap_beds import bed_merge
departments = [
    # Built from Tower Report 14 Jun 2022
    # NAME                         # n emap locations
    # "UCH T01 ACUTE MEDICAL",       # 86
    # "UCH T01 ENHANCED CARE",       # 20
    # "UCH T03 INTENSIVE CARE",      # 37
    # "UCH T06 HEAD (T06H)",         # 27
    # "UCH T06 CENTRAL (T06C)",      # 25
    # "UCH T06 SOUTH PACU",          # 22
    # "UCH T06 GYNAE (T06G)",        # 18
    # "UCH T07 NORTH (T07N)",        # 45
    # "UCH T07 CV SURGE",            # 37
    # "UCH T07 SOUTH",               # 33
    # "UCH T07 SOUTH (T07S)",        # 23
    # "UCH T07 HDRU",                # 20
    # "UCH T08 NORTH (T08N)",        # 28
    # "UCH T08 SOUTH (T08S)",        # 25
    # "UCH T08S ARCU",               #  6
    # "UCH T09 SOUTH (T09S)",        # 34
    # "UCH T09 NORTH (T09N)",        # 32
    # "UCH T09 CENTRAL (T09C)",      # 25
    # "UCH T10 SOUTH (T10S)",        # 34
    # "UCH T10 NORTH (T10N)",        # 32
    # "UCH T10 MED (T10M)",          # 16
    # "UCH T11 SOUTH (T11S)",        # 27
    # "UCH T11 NORTH (T11N)",        # 25
    # "UCH T11 EAST (T11E)",         # 16
    # "UCH T11 NORTH (T11NO)",       #  8
    # "UCH T12 SOUTH (T12S)",        # 32
    # "UCH T12 NORTH (T12N)",        # 23
    # "UCH T13 SOUTH (T13S)",        # 31
    # "UCH T13 NORTH ONCOLOGY",      # 26
    # "UCH T13 NORTH (T13N)",        # 26
    # "UCH T14 NORTH TRAUMA",        # 28
    # "UCH T14 NORTH (T14N)",        # 28
    # "UCH T14 SOUTH ASU",           # 22
    # "UCH T14 SOUTH (T14S)",        # 17
    # "UCH T15 SOUTH DECANT",        # 21
    # "UCH T15 SOUTH (T15S)",        # 21
    # "UCH T15 NORTH (T15N)",        # 16
    # "UCH T15 NORTH DECANT",        # 15
    # "UCH T16 NORTH (T16N)",        # 19
    # "UCH T16 SOUTH (T16S)",        # 18
    # "UCH T16 SOUTH WINTER",        # 17
    # "GWB L01 ELECTIVE SURG",       # 37
    # "GWB L01 CRITICAL CARE",       # 12
    # "GWB L02 NORTH (L02N)",        # 19
    # "GWB L02 EAST (L02E)",         # 19
    # "GWB L03 NORTH (L03N)",        # 19
    # "GWB L03 EAST (L03E)",         # 19
    # "GWB L04 NORTH (L04N)",        # 20
    # "GWB L04 EAST (L04E)",         # 17
    # "WMS W04 WARD",                # 28
    # "WMS W03 WARD",                # 27
    # "WMS W02 SHORT STAY",          # 20
    # "WMS W01 CRITICAL CARE",       # 11
    "NHNN C0 NCCU",
    "NHNN C1 NCCU",
    # "NHNN C0 EMERGENCY STROKE UNIT",
]
dfm = bed_merge(df_emap=dfe, df_caboodle=dfc, departments=departments)
dfm.head()
dfm._merge.value_counts()
dfm.to_csv("beds.tsv", sep="\t", index_label="local_id")