import os
import urllib
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy.engine import Engine
Working notes on merging Caboodle and EMAP (NHNN)
2022-07-03vNHNN
specifically for NHNN critical care
working notes on merging caboodle info on beds with EMAP
aim is to build a reliable view of bed census
programmatically returns a list of locatoins with Caboodle detail
but no testing/quality control
works in conjunction with add_caboodle2emap_beds.py
use to populate base bed definitions for the tower flow
Set-up incl database connections
def emap_db() -> Engine:
= "postgresql+psycopg2://{}:{}@{}:{}/{}".format(
url "EMAP_DB_USER"),
os.getenv("EMAP_DB_PASSWORD"),
os.getenv("EMAP_DB_HOST"),
os.getenv("EMAP_DB_PORT"),
os.getenv("EMAP_DB_NAME"),
os.getenv(
)= create_engine(
engine =4, max_overflow=6, connect_args={"connect_timeout": 120}
url, pool_size
)return engine
def caboodle_db() -> Engine:
= os.getenv("CABOODLE_DB_HOST")
db_host = os.getenv("CABOODLE_DB_USER")
db_user = os.getenv("CABOODLE_DB_PASSWORD")
db_password = os.getenv("CABOODLE_DB_PORT")
db_port = os.getenv("CABOODLE_DB_NAME")
db_name = f"mssql+pyodbc://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}?driver=ODBC+Driver+17+for+SQL+Server"
connection_str = create_engine(connection_str)
engine return engine
= emap_db()
emap_engine = caboodle_db() caboodle_engine
Reliable way of joining Caboodle and EMAP bed level data
First load EMAP location table into memory
Do this with no modifications / transformations other than the join
= """
q SELECT
lo.location_id, lo.location_string, lo.department_id, lo.room_id, lo.bed_id, dept.name department, dept.speciality, room.name room
FROM star.location lo
LEFT JOIN star.department dept ON lo.department_id = dept.department_id
LEFT JOIN star.room ON lo.room_id = room.room_id
"""
= pd.read_sql_query(q, emap_engine)
dfe dfe.head()
And load Caboodle DepartmentDim into memory
= """
q SELECT
DepartmentKey
,BedEpicId
,Name
,DepartmentName
,RoomName
,BedName
,IsBed
,BedInCensus
,IsDepartment
,IsRoom
,IsCareArea
,DepartmentExternalName
,DepartmentSpecialty
,DepartmentType
,DepartmentServiceGrouper
,DepartmentLevelOfCareGrouper
,LocationName
,ParentLocationName
,_CreationInstant
,_LastUpdatedInstant
FROM dbo.DepartmentDim
"""
= pd.read_sql_query(q, caboodle_engine)
dfc dfc.head()
Now join these two tables - don’t attempt joins where there is no room/bed level data (b/c they’re not physical locations - you need a multi-key join - EMAP ‘name’ (department.name) joins on to Caboodle ‘DepartmentName’ - EMAP ‘bed’ (derived by splitting location.location_string) joins on to Caboodle ‘Name’ - drop ‘wait’ beds since these duplicate and block a one-to-one merge - try to be rigorous in pd.merge
- indicator=True
to allow inspection post merge - validate='one_to_one'
to throw an error if duplicates found
Note - sometimes (in Caboodle) DepartmentName and Name are duplicated so pick the most recently ‘created’
using …
Now load external code
import importlib
import add_caboodle2emap_beds
reload(add_caboodle2emap_beds)
importlib.from add_caboodle2emap_beds import bed_merge
= [
departments # Built from Tower Report 14 Jun 2022
# NAME # n emap locations
# "UCH T01 ACUTE MEDICAL", # 86
# "UCH T01 ENHANCED CARE", # 20
# "UCH T03 INTENSIVE CARE", # 37
# "UCH T06 HEAD (T06H)", # 27
# "UCH T06 CENTRAL (T06C)", # 25
# "UCH T06 SOUTH PACU", # 22
# "UCH T06 GYNAE (T06G)", # 18
# "UCH T07 NORTH (T07N)", # 45
# "UCH T07 CV SURGE", # 37
# "UCH T07 SOUTH", # 33
# "UCH T07 SOUTH (T07S)", # 23
# "UCH T07 HDRU", # 20
# "UCH T08 NORTH (T08N)", # 28
# "UCH T08 SOUTH (T08S)", # 25
# "UCH T08S ARCU", # 6
# "UCH T09 SOUTH (T09S)", # 34
# "UCH T09 NORTH (T09N)", # 32
# "UCH T09 CENTRAL (T09C)", # 25
# "UCH T10 SOUTH (T10S)", # 34
# "UCH T10 NORTH (T10N)", # 32
# "UCH T10 MED (T10M)", # 16
# "UCH T11 SOUTH (T11S)", # 27
# "UCH T11 NORTH (T11N)", # 25
# "UCH T11 EAST (T11E)", # 16
# "UCH T11 NORTH (T11NO)", # 8
# "UCH T12 SOUTH (T12S)", # 32
# "UCH T12 NORTH (T12N)", # 23
# "UCH T13 SOUTH (T13S)", # 31
# "UCH T13 NORTH ONCOLOGY", # 26
# "UCH T13 NORTH (T13N)", # 26
# "UCH T14 NORTH TRAUMA", # 28
# "UCH T14 NORTH (T14N)", # 28
# "UCH T14 SOUTH ASU", # 22
# "UCH T14 SOUTH (T14S)", # 17
# "UCH T15 SOUTH DECANT", # 21
# "UCH T15 SOUTH (T15S)", # 21
# "UCH T15 NORTH (T15N)", # 16
# "UCH T15 NORTH DECANT", # 15
# "UCH T16 NORTH (T16N)", # 19
# "UCH T16 SOUTH (T16S)", # 18
# "UCH T16 SOUTH WINTER", # 17
# "GWB L01 ELECTIVE SURG", # 37
# "GWB L01 CRITICAL CARE", # 12
# "GWB L02 NORTH (L02N)", # 19
# "GWB L02 EAST (L02E)", # 19
# "GWB L03 NORTH (L03N)", # 19
# "GWB L03 EAST (L03E)", # 19
# "GWB L04 NORTH (L04N)", # 20
# "GWB L04 EAST (L04E)", # 17
# "WMS W04 WARD", # 28
# "WMS W03 WARD", # 27
# "WMS W02 SHORT STAY", # 20
# "WMS W01 CRITICAL CARE", # 11
"NHNN C0 NCCU",
"NHNN C1 NCCU",
# "NHNN C0 EMERGENCY STROKE UNIT",
]
= bed_merge(df_emap=dfe, df_caboodle=dfc, departments=departments)
dfm dfm.head()
dfm._merge.value_counts()
"beds.tsv", sep="\t", index_label="local_id") dfm.to_csv(