-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathLOS_Preprocessing.py
More file actions
28 lines (19 loc) · 774 Bytes
/
LOS_Preprocessing.py
File metadata and controls
28 lines (19 loc) · 774 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import pandas as pd
import numpy as np
## Complete Preprocessing Steps
def preprocess_data(df):
# Columns to drop
cols_to_drop = ['HOSPITAL_CODE','PATIENTID','ADMISSION_DATE','DISCHARGE_DATE']
df = df.drop(cols_to_drop,axis=1)
# Setting the index
df.set_index('CASE_ID',inplace=True)
# Fixing the data types issue
num_columns = ['AVAILABLE_EXTRA_ROOMS_IN_HOSPITAL','VISITORS_WITH_PATIENT','ADMISSION_DEPOSIT','LOS']
cat_columns = [col for col in df.columns.tolist() if col not in num_columns]
for col in cat_columns:
df[col] = df[col].astype(object)
for col in num_columns:
df[col] = df[col].astype(int)
# One-Hot Encoding
df_final = pd.get_dummies(df)
return df_final