### Convert CHARTEVENTS into durations

（我已经连这应该是几级标题都搞不清楚了）

INPUTEVENTS_MV 处理好了，轮到下一个 CHARTEVENTS 。我们直接复用之前写好的代码就行了（一样的作为示例我们只看一个病人的）：

  1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143  WITH crrt_settings AS ( SELECT ce.icustay_id, ce.charttime , MAX(CASE WHEN ce.itemid IN ( 224149, -- Access Pressure 224144, -- Blood Flow (ml/min) 228004, -- Citrate (ACD-A) 225183, -- Current Goal 225977, -- Dialysate Fluid 224154, -- Dialysate Rate 224151, -- Effluent Pressure 224150, -- Filter Pressure 225958, -- Heparin Concentration (units/mL) 224145, -- Heparin Dose (per hour) 224191, -- Hourly Patient Fluid Removal 228005, -- PBP (Prefilter) Replacement Rate 228006, -- Post Filter Replacement Rate 225976, -- Replacement Fluid 224153, -- Replacement Rate 224152, -- Return Pressure 226457 -- Ultrafiltrate Output ) THEN 1 ELSE 0 END) AS RRT -- Below indicates that a new instance of CRRT has started , MAX(CASE -- System Integrity WHEN ce.itemid = 224146 AND value IN ('New Filter','Reinitiated') THEN 1 ELSE 0 END) AS RRT_start -- Below indicates that the current instance of CRRT has ended , MAX(CASE -- System Integrity WHEN ce.itemid = 224146 AND value IN ('Discontinued','Recirculating') THEN 1 WHEN ce.itemid = 225956 THEN 1 ELSE 0 END ) AS RRT_end FROM chartevents ce WHERE ce.itemid IN ( -- MetaVision ITEMIDs -- Below require special handling 224146, -- System Integrity 225956, -- Reason for CRRT Filter Change -- Below are settings which indicate CRRT is started/continuing 224149, -- Access Pressure 224144, -- Blood Flow (ml/min) 228004, -- Citrate (ACD-A) 225183, -- Current Goal 225977, -- Dialysate Fluid 224154, -- Dialysate Rate 224151, -- Effluent Pressure 224150, -- Filter Pressure 225958, -- Heparin Concentration (units/mL) 224145, -- Heparin Dose (per hour) 224191, -- Hourly Patient Fluid Removal 228005, -- PBP (Prefilter) Replacement Rate 228006, -- Post Filter Replacement Rate 225976, -- Replacement Fluid 224153, -- Replacement Rate 224152, -- Return Pressure 226457 -- Ultrafiltrate Output ) AND ce.value is not null AND icustay_id = 246866 GROUP BY icustay_id, charttime ) -- create the durations for each CRRT instance SELECT icustay_id , ROW_NUMBER() OVER (PARTITION BY icustay_id ORDER BY num) AS num , MIN(charttime) AS starttime , MAX(charttime) AS endtime FROM ( SELECT vd1.* -- create a cumulative sum of the instances of new CRRT -- this results in a monotonically increasing integer assigned to each CRRT , CASE WHEN RRT_start = 1 OR RRT=1 OR RRT_end = 1 THEN SUM(NewCRRT) OVER (PARTITION BY icustay_id ORDER BY charttime ) ELSE null END AS num --- now we convert CHARTTIME of CRRT settings into durations FROM ( -- vd1 SELECT icustay_id -- this carries over the previous charttime , CASE WHEN RRT=1 THEN LAG(CHARTTIME, 1) OVER (PARTITION BY icustay_id, RRT ORDER BY charttime) ELSE null END AS charttime_lag , charttime , RRT, RRT_start, RRT_end -- calculate the time since the last event , CASE -- non-null iff the current observation indicates settings are present WHEN RRT=1 THEN CHARTTIME - ( LAG(CHARTTIME, 1) OVER (PARTITION BY icustay_id, RRT ORDER BY charttime) ) ELSE null END AS CRRT_duration -- now we determine if the current event is a new instantiation , CASE WHEN RRT_start = 1 THEN 1 -- if there is an end flag, we mark any subsequent event as new WHEN RRT_end = 1 THEN 0 -- note the end is *not* a new event, the *subsequent* row is -- so here we output 0 WHEN LAG(RRT_end,1) OVER (PARTITION BY icustay_id, CASE WHEN RRT=1 OR RRT_end=1 THEN 1 ELSE 0 END ORDER BY charttime ) = 1 THEN 1 -- if there is less than 2 hours between CRRT settings -- we do not treat this as a new CRRT event WHEN (CHARTTIME - (LAG(CHARTTIME, 1) OVER (PARTITION BY icustay_id, CASE WHEN RRT=1 OR RRT_end=1 THEN 1 ELSE 0 END ORDER BY charttime)) ) <= INTERVAL '2' hour THEN 0 ELSE 1 END AS NewCRRT -- use the temp table with only settings from chartevents FROM crrt_settings ) AS vd1 -- now we can isolate to just rows with settings -- (before we had rows with start/end flags) -- this removes any null values for NewCRRT WHERE RRT_start = 1 OR RRT = 1 OR RRT_end = 1 ) AS vd2 GROUP BY icustay_id, num HAVING MIN(charttime) != MAX(charttime) ORDER BY icustay_id, num;

(这一段代码的缩进我已经无力了…)

* num starttime endtime
0 1 Day 11, 23:43 Day 12, 20:00
1 2 Day 12, 22:00 Day 13, 16:30
2 3 Day 13, 18:15 Day 13, 23:00
3 4 Day 14, 15:27 Day 16, 16:00

  1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157  # happy with above query # now remove the one patient constraints query_chartevents = query_schema + """ WITH crrt_settings AS( SELECT ce.icustay_id, ce.charttime, MAX(CASE WHEN ce.itemid IN ( 224149, -- Access Pressure 224144, -- Blood Flow (ml/min) 228004, -- Citrate (ACD-A) 225183, -- Current Goal 225977, -- Dialysate Fluid 224154, -- Dialysate Rate 224151, -- Effluent Pressure 224150, -- Filter Pressure 225958, -- Heparin Concentration (units/mL) 224145, -- Heparin Dose (per hour) 224191, -- Hourly Patient Fluid Removal 228005, -- PBP (Prefilter) Replacement Rate 228006, -- Post Filter Replacement Rate 225976, -- Replacement Fluid 224153, -- Replacement Rate 224152, -- Return Pressure 226457 -- Ultrafiltrate Output ) THEN 1 ELSE 0 END) AS RRT -- Below indicates that a new instance of CRRT has started , MAX( CASE -- System Integrity WHEN ce.itemid = 224146 AND value IN ('New Filter','Reinitiated') THEN 1 ELSE 0 END) AS RRT_start -- Below indicates that the current instance of CRRT has ended , MAX( CASE -- System Integrity WHEN ce.itemid = 224146 AND value IN ('Discontinued','Recirculating') THEN 1 WHEN ce.itemid = 225956 THEN 1 ELSE 0 END) AS RRT_end FROM chartevents ce WHERE ce.itemid IN ( -- MetaVision ITEMIDs -- Below require special handling 224146, -- System Integrity 225956, -- Reason fOR CRRT Filter Change -- Below are settings which indicate CRRT is started/continuing 224149, -- Access Pressure 224144, -- Blood Flow (ml/min) 228004, -- Citrate (ACD-A) 225183, -- Current Goal 225977, -- Dialysate Fluid 224154, -- Dialysate Rate 224151, -- Effluent Pressure 224150, -- Filter Pressure 225958, -- Heparin Concentration (units/mL) 224145, -- Heparin Dose (per hour) 224191, -- Hourly Patient Fluid Removal 228005, -- PBP (Prefilter) Replacement Rate 228006, -- Post Filter Replacement Rate 225976, -- Replacement Fluid 224153, -- Replacement Rate 224152, -- Return Pressure 226457 -- Ultrafiltrate Output ) AND ce.value IS NOT null GROUP BY icustay_id, charttime ) -- create the durations fOR each CRRT instance SELECT icustay_id , ROW_NUMBER() OVER (PARTITION BY icustay_id ORDER BY num) AS num , MIN(charttime) AS starttime , MAX(charttime) AS endtime FROM ( SELECT vd1.* -- create a cumulative sum of the instances of new CRRT -- this results in a monotonically increasing integer assigned to each CRRT , CASE WHEN RRT_start = 1 OR RRT=1 OR RRT_end = 1 THEN SUM(NewCRRT) OVER (PARTITION BY icustay_id ORDER BY charttime) ELSE null END AS num --- now we convert CHARTTIME of CRRT settings into durations FROM ( -- vd1 SELECT icustay_id -- this carries over the previous charttime , CASE WHEN RRT=1 THEN LAG(CHARTTIME, 1) OVER (PARTITION BY icustay_id, RRT ORDER BY charttime) ELSE null END AS charttime_lag , charttime , RRT , RRT_start , RRT_end -- calculate the time since the last event , CASE -- non-null iff the current observation indicates settings are present WHEN RRT=1 THEN CHARTTIME - ( LAG(CHARTTIME, 1) OVER ( PARTITION BY icustay_id, RRT ORDER BY charttime ) ) ELSE null END AS CRRT_duration -- now we determine if the current event is a new instantiation , CASE WHEN RRT_start = 1 THEN 1 -- if there is an end flag, we mark any subsequent event as new WHEN RRT_end = 1 -- note the end is *not* a new event, the *subsequent* row is -- so here we output 0 THEN 0 WHEN LAG(RRT_end,1) OVER ( PARTITION BY icustay_id, CASE WHEN RRT=1 OR RRT_end=1 THEN 1 ELSE 0 END ORDER BY charttime ) = 1 THEN 1 -- if there is less than 2 hours between CRRT settings, we do not treat this as a new CRRT event WHEN (CHARTTIME - (LAG(CHARTTIME, 1) OVER ( PARTITION BY icustay_id, CASE WHEN RRT=1 OR RRT_end=1 THEN 1 ELSE 0 END ORDER BY charttime ))) <= interval '2' hour THEN 0 ELSE 1 END AS NewCRRT -- use the temp table with only settings from chartevents FROM crrt_settings ) AS vd1 -- now we can isolate to just rows with settings -- (befORe we had rows with start/end flags) -- this removes any null values fOR NewCRRT WHERE RRT_start = 1 OR RRT = 1 OR RRT_end = 1 ) AS vd2 GROUP BY icustay_id, num HAVING MIN(charttime) != MAX(charttime) ORDER BY icustay_id, num; """

### Extract durations from PROCEDUREEVENTS_MV

PROCEDUREEVENTS_MV 里也有透析的记录。估计你们也忘了前面选的那些了。再列一次我们挑出来 itemid

• 225802 – Dialysis - CRRT
• 225803 – Dialysis - CVVHD
• 225809 – Dialysis - CVVHDF
• 225955 – Dialysis - SCUF

  1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17  # extract the durations from PROCEDUREEVENTS_MV # NOTE: we only look at a single patient as an exemplar SELECT icustay_id , ROW_NUMBER() OVER ( PARTITION BY icustay_id ORDER BY starttime, endtime) AS num , starttime, endtime FROM procedureevents_mv WHERE itemid IN ( 225802 -- Dialysis - CRRT , 225803 -- Dialysis - CVVHD , 225809 -- Dialysis - CVVHDF , 225955 -- Dialysis - SCUF ) AND icustay_id = 246866 ORDER BY icustay_id, num;

* num starttime endtime
0 1 Day 11, 23:45 Day 12, 20:30
1 2 Day 12, 21:30 Day 13, 23:15
2 3 Day 14, 15:27 Day 16, 16:02

  1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17  # happy with above query # now remove the one patient constraints query_procedureevents = query_schema + """ SELECT icustay_id , ROW_NUMBER() OVER (PARTITION BY icustay_id ORDER BY starttime, endtime) AS num , starttime, endtime FROM procedureevents_mv WHERE itemid IN ( 225802 -- Dialysis - CRRT , 225803 -- Dialysis - CVVHD , 225809 -- Dialysis - CVVHDF , 225955 -- Dialysis - SCUF ) ORDER BY icustay_id, num; """

## Roundup: data from INPUTEVENTS_MV, CHARTEVENTS, and PROCEDUREEVENTS_MV

  1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60  query_inputevents = query_schema + """ WITH t1 AS ( SELECT icustay_id , CASE WHEN itemid = 227525 THEN 'Calcium' ELSE 'KCl' END AS label , starttime, endtime , CASE WHEN LAG(endtime) OVER (PARTITION BY icustay_id, itemid ORDER BY starttime, endtime) = starttime THEN 0 ELSE 1 END AS new_event_flag , rate, rateuom , statusdescription FROM inputevents_mv WHERE itemid IN ( 227525,-- Calcium Gluconate (CRRT) 227536 -- KCl (CRRT) ) AND statusdescription != 'Rewritten' ) , t2 as ( SELECT icustay_id, label , starttime, endtime , SUM(new_event_flag) OVER (PARTITION BY icustay_id, label ORDER BY starttime, endtime) AS time_partition , rate, rateuom, statusdescription FROM t1 ) , t3 as ( SELECT icustay_id, label , starttime, endtime , time_partition , rate, rateuom, statusdescription , ROW_NUMBER() OVER (PARTITION BY icustay_id, label, time_partition ORDER BY starttime DESC, endtime DESC) AS lastrow FROM t2 ) SELECT icustay_id , time_partition AS num , MIN(starttime) AS starttime , max(endtime) AS endtime , label --, MIN(rate) AS rate_min --, max(rate) AS rate_max --, MIN(rateuom) AS rateuom --, MIN(CASE WHEN lastrow = 1 THEN statusdescription ELSE null END) AS statusdescription FROM t3 GROUP BY icustay_id, label, time_partition ORDER BY starttime, endtime; """

 1 2 3 4 5 6 7 8  import numpy as np import pandas as pd import matplotlib.pyplot as plt import psycopg2 import getpass from IPython.display import HTML, display import matplotlib.dates as dates import matplotlib.lines as mlines

  1 2 3 4 5 6 7 8 9 10 11  %matplotlib inline plt.style.use("ggplot") dbname = 'mimic' user = 'postgres' schema_name = 'mimiciii' # ln -s /var/run/postgresql/.s.PGSQL.5432 /tmp/.s.PGSQL.5432 con = psycopg2.connect(dbname="mimic", user="postgres", password=getpass.getpass(prompt='Password:'.format(user))) query_schema = 'SET search_path to ' + schema_name + ';'

 1 2 3 4 5 6 7 8  print("Durations from INPUTEVENTS...") ie = pd.read_sql_query(query_inputevents,con) print("Durations from CHARTEVENTS...") ce = pd.read_sql_query(query_chartevents,con) print("Durations from PROCEDUREEVENTS...") pe = pd.read_sql_query(query_procedureevents,con)

 1 2  print("First 5 lines of ie...") ie.head()

* icustay_id num starttime endtime label
0 205508 1 2101-07-09 18:10:00 2101-07-13 15:44:00 Calcium
1 280550 1 2101-08-02 21:20:00 2101-08-04 16:05:00 Calcium
2 280550 1 2101-08-03 08:56:00 2101-08-04 16:05:00 KCl
3 217315 1 2101-09-21 01:00:00 2101-09-21 09:00:00 Calcium
4 217315 2 2101-09-21 11:00:00 2101-09-27 11:00:00 Calcium

 1 2  print("First 5 lines of ce...") ce.head()
* icustay_id num starttime endtime
0 200347 1 2116-06-10 15:00:00 2116-06-11 01:00:00
1 200347 2 2116-06-11 04:20:00 2116-06-11 18:00:00
2 200347 3 2116-06-11 19:00:00 2116-06-12 08:00:00
3 200347 4 2116-06-12 10:02:00 2116-06-13 10:26:00
4 200699 1 2105-04-30 00:19:00 2105-04-30 08:00:00

 1 2  print("First 5 lines of pe...") pe.head()
* icustay_id num starttime endtime
0 200347 1 2116-06-10 15:00:00 2116-06-11 00:07:00
1 200347 2 2116-06-11 04:20:00 2116-06-12 07:27:00
2 200347 3 2116-06-12 10:00:00 2116-06-12 12:22:00
3 200347 4 2116-06-12 13:15:00 2116-06-13 10:29:00
4 200699 1 2105-04-30 00:19:00 2105-04-30 09:00:00

### Compare durations

  1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17  def display_df(df): col = [x for x in df.columns if x != 'icustay_id'] df_tmp = df[col].copy() for c in df_tmp.columns: if '[ns]' in str(df_tmp[c].dtype): df_tmp[c] = df_tmp[c].dt.strftime('Day %d, %H:%M') display(HTML(df_tmp.to_html().replace('NaN', ''))) # compare the above durations ce['source'] = 'chartevents' ie['source'] = 'inputevents_kcl' ie.loc[ie['label']=='Calcium','source'] = 'inputevents_ca' pe['source'] = 'procedureevents' df = pd.concat([ie[['icustay_id','num','starttime','endtime','source']], ce, pe]) df.head()

* icustay_id num starttime endtime source
0 205508 1 2101-07-09 18:10:00 2101-07-13 15:44:00 inputevents_ca
1 280550 1 2101-08-02 21:20:00 2101-08-04 16:05:00 inputevents_ca
2 280550 1 2101-08-03 08:56:00 2101-08-04 16:05:00 inputevents_kcl
3 217315 1 2101-09-21 01:00:00 2101-09-21 09:00:00 inputevents_ca
4 217315 2 2101-09-21 11:00:00 2101-09-27 11:00:00 inputevents_ca

 1 2 3 4  iid = 205508 idxDisplay = df['icustay_id'] == iid display_df(df.loc[idxDisplay, :])

* num starttime endtime source
0 1 Day 09, 18:10 Day 13, 15:44 inputevents_ca
136 1 Day 09, 18:00 Day 12, 15:15 chartevents
137 2 Day 12, 16:02 Day 12, 19:01 chartevents
138 3 Day 12, 21:00 Day 13, 14:03 chartevents
147 1 Day 09, 18:00 Day 13, 15:04 procedureevents

  1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38  # set a color palette col_dict = {'chartevents': [247,129,191], 'inputevents_kcl': [255,127,0], 'inputevents_ca': [228,26,28], 'procedureevents': [55,126,184]} for c in col_dict: col_dict[c] = [x/256.0 for x in col_dict[c]] fig, ax = plt.subplots(figsize=[16,10]) m = 0. M = np.sum(idxDisplay) # dummy plots for legend legend_handle = list() for c in col_dict: legend_handle.append(mlines.Line2D([], [], color=col_dict[c], marker='o', markersize=15, label=c)) for row in df.loc[idxDisplay,:].iterrows(): # row is a tuple: [index, actual_data], so we use row[1] plt.plot([row[1]['starttime'].to_pydatetime(), row[1]['endtime'].to_pydatetime()], [0+m/M,0+m/M], 'o-',color=col_dict[row[1]['source']], markersize=15, linewidth=2) m=m+1 ax.xaxis.set_minor_locator(dates.HourLocator(byhour=[0,12],interval=1)) ax.xaxis.set_minor_formatter(dates.DateFormatter('%H:%M')) ax.xaxis.grid(True, which="minor") ax.xaxis.set_major_locator(dates.DayLocator(interval=1)) ax.xaxis.set_major_formatter(dates.DateFormatter('\n%d\n%a')) ax.set_ylim([-0.1,1.0]) plt.legend(handles=legend_handle,loc='best') plt.savefig('0-crrt_' + str(iid) + '.png') plt.show()

  1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57  # print out the above for 10 examples # compare the above durations ce['source'] = 'chartevents' ie['source'] = 'inputevents_kcl' ie.loc[ie['label']=='Calcium','source'] = 'inputevents_ca' pe['source'] = 'procedureevents' df = pd.concat([ie[['icustay_id','num','starttime','endtime','source']], ce, pe]) for iid in np.sort(df.icustay_id.unique()[0:10]): iid = int(iid) # how many PROCEDUREEVENTS_MV dialysis events encapsulate CHARTEVENTS/INPUTEVENTS_MV? # vice-versa? idxDisplay = df['icustay_id'] == iid # no need to display here #display_df(df.loc[idxDisplay, :]) # 2) how many have no overlap whatsoever? col_dict = {'chartevents': [247,129,191], 'inputevents_kcl': [255,127,0], 'inputevents_ca': [228,26,28], 'procedureevents': [55,126,184]} for c in col_dict: col_dict[c] = [x/256.0 for x in col_dict[c]] fig, ax = plt.subplots(figsize=[16,10]) m = 0. M = np.sum(idxDisplay) # dummy plots for legend legend_handle = list() for c in col_dict: legend_handle.append(mlines.Line2D([], [], color=col_dict[c], marker='o', markersize=15, label=c)) for row in df.loc[idxDisplay,:].iterrows(): # row is a tuple: [index, actual_data], so we use row[1] plt.plot([row[1]['starttime'].to_pydatetime(), row[1]['endtime'].to_pydatetime()], [0+m/M,0+m/M], 'o-',color=col_dict[row[1]['source']], markersize=15, linewidth=2) m=m+1 ax.xaxis.set_minor_locator(dates.HourLocator(byhour=[0,6,12,18],interval=1)) ax.xaxis.set_minor_formatter(dates.DateFormatter('%H:%M')) ax.xaxis.grid(True, which="minor") ax.xaxis.set_major_locator(dates.DayLocator(interval=1)) ax.xaxis.set_major_formatter(dates.DateFormatter('\n%d-%m-%Y')) ax.set_ylim([-0.1,1.0]) plt.legend(handles=legend_handle,loc='best') # if you want to save the figures, uncomment the line below #plt.savefig('crrt_' + str(iid) + '.png')