3  Results

Data dictionary:

Sources:

3.1 Question 1: Does our data change over time in terms of frequency of complications, readmissions, and demographics/medical information?

Code
complication_counts = cab_counts %>% group_by(Year, complication) %>% summarise(Freq=sum(Count)) %>% arrange(Year, desc(Freq)) %>% mutate(complication=factor(complication, levels = unique(complication)))
readmission_counts = cab_counts %>% group_by(Year, readmission) %>% summarise(Freq=sum(Count))
strata_counts = cab_counts %>% group_by(Year, category, Strata) %>% summarise(Freq=sum(Count)) %>% subset(category != "All") 

ggplot(complication_counts, aes(x=Year, y=Freq, fill=complication)) +
  geom_bar(color="black", stat='identity', position='dodge') +
  scale_fill_brewer(palette=p) +
  scale_y_continuous(labels = scales::label_number(scale = 1e-3, suffix = ""))+
  theme_gray(16)+
  labs(title="Case Counts of Complications over the Years", y="Case Count in thousands")

We can see that one complication Post-Operative Atrial Fibrillation is by far the most common complication.

Code
ggplot(complication_counts, aes(x=Year, y=log10(Freq), fill=complication)) +
  geom_bar(color="black", stat='identity', position='dodge') +
  scale_fill_brewer(palette=p) +
  theme_gray(16)+
  labs(title="Log Case Counts of Complications over the Years", y="Log Count")

Deep Sternal Infection and Reintervention for Myocardial Ischemia are much lower in frequency than the rest of the strata for the Complications category. In the log bar plot, we can see that they are an entire order of magnitude less than the others. For the purposes of our analysis, we have removed them.

Code
cab_counts = cab_counts  %>% filter(!(complication %in% c('Deep Sternal Infection', 'Reintervention for Myocardial Ischemia'))) %>% arrange(Year, complication)
complication_counts = cab_counts %>% group_by(Year, complication) %>% summarise(Freq=sum(Count)) %>% arrange(Year, desc(Freq)) %>% mutate(complication=factor(complication, levels = unique(complication)))
readmission_counts = cab_counts %>% group_by(Year, readmission) %>% summarise(Freq=sum(Count))
strata_counts = cab_counts %>% group_by(Year, category, Strata) %>% summarise(Freq=sum(Count)) %>% subset(category != "All") 
Code
ggplot(readmission_counts, aes(x=Year, y=Freq, fill=readmission)) +
  geom_bar(color="black", stat='identity', position='dodge') +
  scale_y_continuous(labels = scales::label_number(scale = 1e-3, suffix = ""))+
  scale_fill_brewer(palette=p) +
  theme_gray(16)+
  labs(title="Case Counts of Readmissions over the Years", y="Case Count in thousands")

We can see from this plot that the proportion of readmissions and non-readmissions stays relatively each year. However, we can again observe that the number of complications overall seems to decrease over time.

Code
ggplot(strata_counts %>% subset(category == "Age"), aes(x=Year, y=Freq, fill=Strata)) +
      geom_bar(color="black", stat='identity', position='dodge') +
      scale_fill_brewer(palette=p) +
      scale_y_continuous(labels = scales::label_number(scale = 1e-3, suffix = ""))+
      labs(title=paste(paste("Case Counts of", "Age"), "over the Years"), y="Case Count in thousands") +
      theme_gray(16)

In terms of Age, the number of cases decreases slightly overall, most notable for the Over 65 age group. The Under 40 age group is too small to show up on the graph. For the rest of our analysis, we will not be using this age group.

Code
ggplot(strata_counts %>% subset(category == "Surgical Procedure") %>% arrange(Year, desc(Freq)) %>% mutate(Strata = factor(Strata, levels=unique(Strata))), aes(x=Year, y=Freq, fill=Strata)) +
      geom_bar(color="black", stat='identity', position='dodge') +
      scale_fill_brewer(palette=p) +
      scale_y_continuous(labels = scales::label_number(scale = 1e-3, suffix = ""))+
      labs(title=paste(paste("Case Counts of", "Surgical Procedure"), "over the Years"), y="Case Count in thousands") +
      theme_gray(16)

Overall, as we have seen before, the total case count decreases and this decrease is relatively proportional for each strata of Surgical Procedure.

Code
ggplot(strata_counts %>% subset(category == "Gender") %>% arrange(Year, desc(Freq)) %>% mutate(Strata = factor(Strata, levels=unique(Strata))), aes(x=Year, y=Freq, fill=Strata)) +
      geom_bar(color="black", stat='identity', position='dodge') +
      scale_fill_brewer(palette=p) +
      scale_y_continuous(labels = scales::label_number(scale = 1e-3, suffix = ""))+
      labs(title=paste(paste("Case Counts of", "Gender"), "over the Years"), y="Case Count in thousands") +
      theme_gray(16)

For Gender, the Male case count decreased more relative to Female case count. But, we can see also that men are getting far more CABG procedures than women.

Code
ggplot(strata_counts %>% subset(category == "Insurance Type") %>% arrange(Year, desc(Freq)) %>% mutate(Strata = factor(Strata, levels=unique(Strata))), aes(x=Year, y=Freq, fill=Strata)) +
      geom_bar(color="black", stat='identity', position='dodge') +
      scale_fill_brewer(palette=p) +
      scale_y_continuous(labels = scales::label_number(scale = 1e-3, suffix = ""))+
      labs(title=paste(paste("Case Counts of", "Insurance Type"), "over the Years"), y="Case Count in thousands") +
      theme_gray(16)

For the Insurance Type of the cases, the counts are relatively in line with the overall case count decrease we have observed in each graph. Due to the small amount of Uninsured cases, we will be removing the strata from the category for the rest of our analysis.

Code
ggplot(strata_counts %>% subset(category == "Race") %>% arrange(Year, desc(Freq)) %>% mutate(Strata = factor(Strata, levels=unique(Strata))), aes(x=Year, y=Freq, fill=Strata)) +
      geom_bar(color="black", stat='identity', position='dodge') +
      scale_fill_brewer(palette=p) +
      scale_y_continuous(labels = scales::label_number(scale = 1e-3, suffix = ""))+
      labs(title=paste(paste("Case Counts of", "Race"), "over the Years"), y="Case Count in thousands") +
      theme_gray(16)

While White people are by more the most common cases fro CABG procedures, they also saw the biggest relative decline over the years of our data. All other strata of the Race category stayed relatively the same. For the purporses of our analysis, we will be removing the Pacific and Native strata due to lack of data.

We can see that over the years the counts for readmission as well as the counts for complications, readmissions, and demographics/medical information all stay relatively consistent over the three year period with some slight decline for some strata in categories like Race and Gender.

3.2 Question 2: What are the most common complications associated with CABG?

Code
cab_counts = cab %>% filter(!(Strata %in% c('Under 40', 'Uninsured', 'Native', 'Others', 'Pacific'))) %>% filter(!(complication %in% c('Deep Sternal Infection', 'Reintervention for Myocardial Ischemia'))) %>% arrange(Year, complication)

complication_counts = cab_counts %>% group_by(Year, complication) %>% summarise(Freq=sum(Count)) %>% arrange(Year, desc(Freq)) %>% mutate(complication=factor(complication, levels = unique(complication)))
readmission_counts = cab_counts %>% group_by(Year, readmission) %>% summarise(Freq=sum(Count))
strata_counts = cab_counts %>% group_by(Year, category, Strata) %>% summarise(Freq=sum(Count)) %>% subset(category != "All") 
ggplot(complication_counts %>% group_by(Year, complication) %>% summarize(Freq=sum(Freq)) %>% mutate(complication=factor(complication)), aes(y=reorder(complication, Freq), x=Freq)) +
  geom_col(fill="#8dd3c7", color="black") +
  facet_grid(~ Year, scales = "free") +
  scale_x_continuous(labels = scales::label_number(scale = 1e-3, suffix = ""))+
  labs(x="Case Count in thousands", y="Complication", title="Case Counts for Complications\nfor Each Year") +
  guides(fill="none")+
  theme_gray(16)+
  theme(axis.text.x = element_text(hjust = 0.5), plot.margin = unit(c(0, 0, 0, 2), "cm"), plot.title = element_text(hjust = 0.5))

Code
ggplot(complication_counts %>% group_by(complication) %>% summarize(Freq=sum(Freq)) %>% mutate(complication=factor(complication)), aes(y=reorder(complication, Freq), x=Freq)) +
  geom_col(fill="#8dd3c7", color="black") +
  scale_x_continuous(labels = scales::label_number(scale = 1e-3, suffix = ""))+
  labs(y="Complication", x="Case Count in thousands", title="Case Counts for Complications") +
  guides(fill="none")+
  theme_gray(16)+
  theme(plot.margin = unit(c(0, 0, 0, 2), "cm"))

In this first plot, as we saw before, we can see that all the bars maintain their order in terms of decreasing frequency over the years. This further reinforces our decision to combine the years for future analysis of complications. We can see also that Post-Operative Atrial Fibrillation is still far and away the most common complication. The order and relative relationships of the bars are maintained when the data over all the years is combined in the second graph.

3.3 Question 3: What complications most often lead to readmission to the hospital?

Code
comp_read_counts = cab_counts %>% subset(category=="All") %>% arrange(Year, desc(Yes)) %>% mutate(complication=factor(complication, levels = unique(complication)))

ggplot(comp_read_counts, aes(y = reorder(complication, Yes), x = Yes, fill = readmission)) +
  geom_col(color="black",  position = "dodge") +
  facet_grid(~ Year, scales = "free") +
  scale_fill_brewer(palette=p, breaks=c("Y", "N")) +
  scale_x_continuous(labels = scales::percent)+
  labs(title="Complication Readmission Rates", y = "Complication", x = "Percentage of Complication Cases", fill = "Readmission") +
  theme_gray(16)+
  theme(panel.spacing = unit(2, "lines"))

Post-Operative Atrial Fibrillation not only accounts for a plurality of the complications, but also a plurality of readmissions. However, over the years, Prolonged Ventilation and Post-Operative Renal Failure seemed to be more skewed towards readmission than not, making them potentially more concerning complications with respect to readmission than Post-Operative Atrial Fibrillation. The other three complications were not skewed towards readmission, yet they still followed the trend of having more readmission cases than not.

3.4 Question 4: Does your demographic data and medical information impact your likelihood to face a complication from a CABG procedure?

Code
options(warn=-1)
library(forcats)
neworder = c("Post-Operative Atrial Fibrillation","Prolonged Ventilation","Post-Operative Renal Failure","Re-Operative for Bleed","Post-Operative Stroke","Post-Operative Dialysis" )

age_counts = cab_counts %>% subset(category == "Age") %>% group_by(complication, Strata) %>% summarize(Freq = sum(Count))  %>% rename(Complication = complication, Age = Strata) %>% mutate(Age=factor(Age)) %>% arrange(Age, desc(Freq)) %>% mutate(Complication=factor(Complication, levels = unique(Complication))) %>% uncount(weights = Freq)

age_counts$Complication = factor(age_counts$Complication)
age_counts$Complication = fct_relevel(age_counts$Complication, neworder)


cabg_counts = cab_counts %>% subset(category == "Surgical Procedure") %>% group_by(complication, Strata) %>% summarize(Freq = sum(Count)) %>% rename(Complication = complication, Surgical.Procedure = Strata) %>% mutate(Surgical.Procedure=factor(Surgical.Procedure)) %>% uncount(weights = Freq)

cabg_counts$Complication = factor(cabg_counts$Complication)
cabg_counts$Complication = fct_relevel(cabg_counts$Complication, neworder)

gender_counts = cab_counts %>% subset(category == "Gender") %>% group_by(complication, Strata) %>% summarize(Freq = sum(Count)) %>% rename(Complication = complication, Gender = Strata) %>% mutate(Gender=factor(Gender)) %>% uncount(weights = Freq)

gender_counts$Complication = factor(gender_counts$Complication)
gender_counts$Complication = fct_relevel(gender_counts$Complication, neworder)

pay_counts = cab_counts %>% subset(category == "Insurance Type") %>% group_by(complication, Strata) %>% summarize(Freq = sum(Count)) %>% rename(Complication = complication, Insurance.Type = Strata) %>% mutate(Insurance.Type =factor(Insurance.Type)) %>% uncount(weights = Freq)

pay_counts$Complication = factor(pay_counts$Complication)
pay_counts$Complication = fct_relevel(pay_counts$Complication, neworder)

race_counts = cab_counts %>% subset(category == "Race") %>% group_by(complication, Strata) %>% summarize(Freq = sum(Count)) %>% rename(Complication = complication, Race = Strata) %>% mutate(Race=factor(Race)) %>% uncount(weights = Freq)

race_counts$Complication = factor(race_counts$Complication)
race_counts$Complication = fct_relevel(race_counts$Complication, neworder)
Code
ggplot(age_counts) +
  geom_mosaic(aes(x=product(Age), fill = Complication)) +
  scale_fill_brewer(palette=p, direction = -1) +
  labs(title = "Mosaic Plot of Complication and Age")+
  theme_minimal(20) +
  theme(axis.text.x = element_text(angle = 45, hjust=1),plot.title = element_text(hjust =1), aspect.ratio = 1, axis.ticks.x=element_blank(), line = element_blank(), axis.ticks.y=element_blank())

It seems that the age group Over 65 faces more Post-Operative Atrial Fibrillation cases and fewer Prolonged Ventilation cases compared to 41-65.

Code
ggplot(cabg_counts) +
  geom_mosaic(aes(x=product(Surgical.Procedure), fill = Complication)) +
  scale_fill_brewer(palette=p, direction = -1) +
  theme_minimal(20) +
  labs(x="Surgical Procedure",title="Mosaic Plot of Complication and Surgical Procedure")+
  theme(axis.text.x = element_text(angle = 45, hjust=1), plot.title = element_text(hjust =1), aspect.ratio = 1, axis.ticks.x=element_blank(), line = element_blank(), axis.ticks.y=element_blank())

Post-Operative Atrial Fibrillation varies the most with Isolated CABG procedures having a lot more and Other procedures having a lot less. The amount of cases of each complication is most even with the Other strata of Surgical Procedure.

Code
ggplot(gender_counts) +
  geom_mosaic(aes(x=product(Gender), fill = Complication)) +
  scale_fill_brewer(palette=p, direction = -1) +
  theme_minimal(20) +
  labs(title = "Mosaic Plot of Complication and Gender")+
  theme(axis.text.x = element_text(angle = 45, hjust=1), plot.title = element_text(hjust =1), aspect.ratio = 1, axis.ticks.x=element_blank(), line = element_blank(), axis.ticks.y=element_blank())

Compared to men, women are more likely to suffer complications in Pronlonged Ventilation, Post-Operative Renal Failure, Post-Operative Stroke, and Post-Operative Dialysis.

Code
ggplot(pay_counts) +
  geom_mosaic(aes(x=product(Insurance.Type), fill = Complication)) +
  scale_fill_brewer(palette=p, direction = -1) +
  theme_minimal(20) +
  labs(title = "Mosaic Plot of Complication and Insurance Type", x="Insurance Type")+
  theme(axis.text.x = element_text(angle = 45, hjust=1), plot.title = element_text(hjust =1), aspect.ratio = 1, axis.ticks.x=element_blank(), line = element_blank(), axis.ticks.y=element_blank())

It appears that Other Insurance is less likely to have every complication except Post-Operative Atrial Fibrillation relative to the rest of the insurance types. On the other hand, Medi-Cal is more likely to have every complication besides Post-Operative Atrial Fibrillation relative to the rest of the insurance types. Medicare seems to have the same likelihood for each complication relative to Private insurance.

Code
ggplot(race_counts) +
  geom_mosaic(aes(x=product(Race), fill = Complication)) +
  scale_fill_brewer(palette=p, direction = -1) +
  theme_minimal(20) +
  labs(title="Mosaic Plot of Complication and Race")+
  theme(axis.text.x = element_text(angle = 45, hjust=1), aspect.ratio = 1,plot.title = element_text(hjust =1), axis.ticks.x=element_blank(), line = element_blank(), axis.ticks.y=element_blank())

It appears that White people are more likely to have Post-Operative Atrial Fibrillation relative to every other race, followed by Asian people, Hispanic people, and finally Black people. Conversely, Black and Hispanic people are more likely to have Prolonged Ventilation relative to the rest of the other races.

3.5 Question 5: Does your demographic data and medical information impact your likelihood that you will be readmitted to the hospital regardless of complication?

We decided that it would be interesting to see how the mosaic plots would look like if we looked at readmission as the dependent variable with each different demographic data as the independent variable. We also want to use a chi-squared test to determine if the variable in question is independent from readmission or not.

Code
custom = c("#fdb462", "#80b1d3")
age_read_counts = cab_counts %>% subset(category == "Age") %>% group_by(readmission, Strata) %>% summarize(Freq = sum(Count)) %>% rename(Readmission = readmission, Age = Strata) %>% mutate(Age=factor(Age)) %>% uncount(weights = Freq)
cabg_read_counts = cab_counts %>% subset(category == "Surgical Procedure") %>% group_by(readmission, Strata) %>% summarize(Freq = sum(Count)) %>% rename(Readmission = readmission, CABG.Type = Strata) %>% mutate(CABG.Type=factor(CABG.Type)) %>% uncount(weights = Freq)
gender_read_counts = cab_counts %>% subset(category == "Gender") %>% group_by(readmission, Strata) %>% summarize(Freq = sum(Count)) %>% rename(Readmission = readmission, Gender = Strata) %>% mutate(Gender=factor(Gender)) %>% uncount(weights = Freq)
pay_read_counts = cab_counts %>% subset(category == "Insurance Type") %>% group_by(readmission, Strata) %>% summarize(Freq = sum(Count)) %>% rename(Readmission = readmission, PayorType = Strata) %>% mutate(PayorType=factor(PayorType)) %>% uncount(weights = Freq)
race_read_counts = cab_counts %>% subset(category == "Race") %>% group_by(readmission, Strata) %>% summarize(Freq = sum(Count)) %>% rename(Readmission = readmission, Race = Strata) %>% mutate(Race=factor(Race)) %>% uncount(weights = Freq)

ggplot(age_read_counts) +
  geom_mosaic(aes(x=product(Readmission, Age), fill = Readmission)) +
  labs(title = "Mosaic Plot of Age and Readmission") +
  scale_fill_manual(values = custom) +
  theme_minimal(16) +
  theme(aspect.ratio = 1, axis.ticks.x=element_blank(), line = element_blank(), axis.ticks.y=element_blank())

Code
cat("----- Age -----\n")
----- Age -----
Code
test = chisq.test(table(age_read_counts$Readmission, age_read_counts$Age))
rounded = round(test$p.value, 4)
if (rounded < 0.0001) {
  rounded <- "<0.0001"
}
cat(paste(paste("Chi Squared Statistic:", round(test$statistic, 4))), "\n")
Chi Squared Statistic: 3.6179 
Code
cat(paste(paste("p-value:", rounded)), "\n")
p-value: 0.0572 
Code
cat(paste("-----------------", "\n"))
----------------- 

As we can see from the chi-squared test, we cannot reject the null hypothesis, which is that these two variables are independent. This is also visible on the mosaic plot, in which we can see barely any change between the two age groups.

Code
ggplot(cabg_read_counts) +
  geom_mosaic(aes(x=product(Readmission, CABG.Type), fill = Readmission)) +
  labs(title = "Mosaic Plot of Surgical Procedure\nand Readmission", x="Surgical Procedure") +
  scale_fill_manual(values = custom) +
  theme_minimal(16) +
  theme(axis.text.x = element_text(angle = 45, hjust=1), aspect.ratio = 1, axis.ticks.x=element_blank(), line = element_blank(), axis.ticks.y=element_blank())

Code
cat("----- Surgical Procedure -----\n")
----- Surgical Procedure -----
Code
test = chisq.test(table(cabg_read_counts$Readmission, cabg_read_counts$CABG.Type))
rounded = round(test$p.value, 4)
if (rounded < 0.0001) {
  rounded <- "<0.0001"
}
cat(paste(paste("Chi Squared Statistic:", round(test$statistic, 4))), "\n")
Chi Squared Statistic: 117.2364 
Code
cat(paste(paste("p-value:", rounded)), "\n")
p-value: <0.0001 
Code
cat(paste("-----------------", "\n"))
----------------- 

We observe that the chi-squared test results in a p-value of less than 0.05. Therefore we can reject the null hypothesis and determine that the type of surgery does affect readmission likelihood. The mosaic plot supports this finding as well. We can see that Isolated CABG procedures are the least likely to result in a readmission despite being the most frequent type of surgical procedure in our dataset.

Code
ggplot(gender_read_counts) +
  geom_mosaic(aes(x=product(Readmission, Gender), fill = Readmission)) +
  scale_fill_manual(values = custom) +
  labs(title = "Mosaic Plot of Gender and Readmission") +
  theme_minimal(16) +
  theme(aspect.ratio = 1, axis.ticks.x=element_blank(), line = element_blank(), axis.ticks.y=element_blank())

Code
cat("----- Gender -----\n")
----- Gender -----
Code
test = chisq.test(table(gender_read_counts$Readmission, gender_read_counts$Gender))
rounded = round(test$p.value, 4)
if (rounded < 0.0001) {
  rounded <- "<0.0001"
}
cat(paste(paste("Chi Squared Statistic:", round(test$statistic, 4))), "\n")
Chi Squared Statistic: 205.467 
Code
cat(paste(paste("p-value:", rounded)), "\n")
p-value: <0.0001 
Code
cat(paste("-----------------", "\n"))
----------------- 

The chi-squared test for gender and readmission tells us that gender plays a role in readmission likelihood. It appears that women are more likely to be readmitted compared to men.

Code
ggplot(pay_read_counts) +
  geom_mosaic(aes(x=product(Readmission, PayorType), fill = Readmission)) +
  scale_fill_manual(values = custom) +
  labs(title = "Mosaic Plot of Insurance Type and Readmission", x="Insurance Type") +
  theme_minimal(16) +
  theme(axis.text.x = element_text(angle = 45, hjust=1), aspect.ratio = 1, axis.ticks.x=element_blank(), line = element_blank(), axis.ticks.y=element_blank(),plot.margin = unit(c(0, 3, 0, 0), "cm"))

Code
cat("----- Insurance Type -----\n")
----- Insurance Type -----
Code
test = chisq.test(table(pay_read_counts$Readmission, pay_read_counts$PayorType))
rounded = round(test$p.value, 4)
if (rounded < 0.0001) {
  rounded <- "<0.0001"
}
cat(paste(paste("Chi Squared Statistic:", round(test$statistic, 4))), "\n")
Chi Squared Statistic: 10997.5074 
Code
cat(paste(paste("p-value:", rounded)), "\n")
p-value: <0.0001 
Code
cat(paste("-----------------", "\n"))
----------------- 

The mosaic plot clearly supports the chi-squared test in this case. It appears that patients with Other Insurance are far less likely to be readmitted compared to other insurance types. On the other hand, Medi-Cal patients were found most likely to be readmitted.

Code
ggplot(race_read_counts) +
  geom_mosaic(aes(x=product(Readmission, Race), fill = Readmission)) +
  scale_fill_manual(values = custom) +
  labs(title = "Mosaic Plot of Race and Readmission") +
  theme_minimal(16) +
  theme(axis.text.x = element_text(angle = 45, hjust=1), aspect.ratio = 1, axis.ticks.x=element_blank(), line = element_blank(), axis.ticks.y=element_blank())

Code
cat("----- Race -----\n")
----- Race -----
Code
test = chisq.test(table(race_read_counts$Readmission, race_read_counts$Race))
rounded = round(test$p.value, 4)
if (rounded < 0.0001) {
  rounded <- "<0.0001"
}
cat(paste(paste("Chi Squared Statistic:", round(test$statistic, 4))), "\n")
Chi Squared Statistic: 78.0327 
Code
cat(paste(paste("p-value:", rounded)), "\n")
p-value: <0.0001 
Code
cat(paste("-----------------", "\n"))
----------------- 

While it appears that each race has relatively the same likelihood of readmission, according to the chi-squared test, race does affect the chances of readmission. White people seem to be less likely relative to other races to be readmitted. Black and Hispanic people seem more likely relative to the other races to be readmitted.

3.6 Question 6: Does your demographic data, medical information, and complication impact your likelihood that you will be readmitted to the hospital?

Code
age_comp_read_counts = cab_counts %>% subset(category == "Age") %>% group_by(complication, readmission, Strata) %>% summarize(Freq = sum(Count)) %>% rename(Complication = complication, Readmission = readmission, Age = Strata) %>% mutate(Age=factor(Age)) %>% uncount(weights = Freq)
cabg_comp_read_counts = cab_counts %>% subset(category == "Surgical Procedure") %>% group_by(complication, readmission, Strata) %>% summarize(Freq = sum(Count)) %>% rename(Complication = complication, Readmission = readmission, CABG.Type = Strata) %>% mutate(CABG.Type=factor(CABG.Type)) %>% uncount(weights = Freq)
gender_comp_read_counts = cab_counts %>% subset(category == "Gender") %>% group_by(complication, readmission, Strata) %>% summarize(Freq = sum(Count)) %>% rename(Complication = complication, Readmission = readmission, Gender = Strata) %>% mutate(Gender=factor(Gender)) %>% uncount(weights = Freq)
pay_comp_read_counts = cab_counts %>% subset(category == "Insurance Type") %>% group_by(complication, readmission, Strata) %>% summarize(Freq = sum(Count)) %>% rename(Complication = complication, Readmission = readmission, PayorType = Strata) %>% mutate(PayorType=factor(PayorType)) %>% uncount(weights = Freq)
race_comp_read_counts = cab_counts %>% subset(category == "Race") %>% group_by(complication, readmission, Strata) %>% summarize(Freq = sum(Count)) %>% rename(Complication = complication, Readmission = readmission, Race = Strata) %>% mutate(Race=factor(Race)) %>% uncount(weights = Freq)

ggplot(age_comp_read_counts) +
  geom_mosaic(aes(x=product(Readmission, Age), fill = Readmission)) +
  scale_fill_manual(values = custom) +
  facet_wrap(~Complication) +
  labs(title = "Mosaic Plot of Age and Readmission for Each Complication") +
  theme_grey(13)+
  theme(aspect.ratio = 1, axis.ticks.x=element_blank(), line = element_blank(), axis.ticks.y=element_blank(), plot.title = element_text(hjust = 0.5))

It appears that for most complications, age does not really affect readmission chances according to the different mosaic plots. The chi-squared tests give us a more precise view:

Code
options(scipen=999)
cat("----- Age -----\n")
----- Age -----
Code
for (complication in unique(age_comp_read_counts$Complication)){
cat(paste(complication, "\n"))
test = chisq.test(table(age_comp_read_counts$Readmission, age_comp_read_counts$Age, age_comp_read_counts$Complication)[, , complication])
rounded = round(test$p.value, 4)
if (rounded < 0.0001) {
  rounded <- "<0.0001"
}
cat(paste(paste("Chi Squared Statistic:", round(test$statistic, 4))), "\n")
cat(paste(paste("p-value:", rounded)), "\n")
cat(paste("-----------------", "\n"))
}
Post-Operative
Atrial Fibrillation 
Chi Squared Statistic: 13.5689 
p-value: 0.0002 
----------------- 
Post-Operative
Dialysis 
Chi Squared Statistic: 5.7033 
p-value: 0.0169 
----------------- 
Post-Operative
Renal Failure 
Chi Squared Statistic: 0.0023 
p-value: 0.9615 
----------------- 
Post-Operative
Stroke 
Chi Squared Statistic: 0.002 
p-value: 0.964 
----------------- 
Prolonged
Ventilation 
Chi Squared Statistic: 0.0036 
p-value: 0.9522 
----------------- 
Re-Operative
for Bleed 
Chi Squared Statistic: 3.1566 
p-value: 0.0756 
----------------- 

According to the chi-squared tests, for only 4 out of 6 complications, age does not affect readmission rates. Both Post-Operative Atrial Fibrillation and Post-Operative Dialysis seem to have opposite effects. The former seems to show that the older patients are less likely to be readmitted. However for Post-Operative Dialysis, older patients are more likely to be readmitted compared to middle-aged patients.

Code
ggplot(cabg_comp_read_counts) +
  geom_mosaic(aes(x=product(Readmission, CABG.Type), fill = Readmission)) +
  scale_fill_manual(values = custom) +
  facet_wrap(~Complication) +
  labs(title = "Mosaic Plot of Surgical Procedure and\nReadmission for Each Complication", x="Surgical Procedure") +
  theme_grey(13)+
  theme(axis.text.x = element_text(angle = 45, hjust=1), aspect.ratio = 1, axis.ticks.x=element_blank(), line = element_blank(), axis.ticks.y=element_blank(), plot.title = element_text(hjust = 0.5))

Code
cat("----- Surgical Procedure -----\n")
----- Surgical Procedure -----
Code
for (complication in unique(cabg_comp_read_counts$Complication)){
cat(paste(complication, "\n"))
test = chisq.test(table(cabg_comp_read_counts$Readmission, cabg_comp_read_counts$CABG.Type, cabg_comp_read_counts$Complication)[, , complication])
rounded = round(test$p.value, 4)
if (rounded < 0.0001) {
  rounded <- "<0.0001"
}
cat(paste(paste("Chi Squared Statistic:", round(test$statistic, 4))), "\n")
cat(paste(paste("p-value:", rounded)), "\n")
cat(paste("-----------------", "\n"))
}
Post-Operative
Atrial Fibrillation 
Chi Squared Statistic: 57.5318 
p-value: <0.0001 
----------------- 
Post-Operative
Dialysis 
Chi Squared Statistic: 1.3322 
p-value: 0.5137 
----------------- 
Post-Operative
Renal Failure 
Chi Squared Statistic: 4.3848 
p-value: 0.1116 
----------------- 
Post-Operative
Stroke 
Chi Squared Statistic: 2.4665 
p-value: 0.2913 
----------------- 
Prolonged
Ventilation 
Chi Squared Statistic: 14.2171 
p-value: 0.0008 
----------------- 
Re-Operative
for Bleed 
Chi Squared Statistic: 9.7124 
p-value: 0.0078 
----------------- 

Despite each facet in the mosaic plot looking the same, the type of surgery affects readmission chances for only three complications. It appears that Post-Operative Dialysis, Post-Operative Renal Failure and Post-Operative Stroke are independent from readmission rates. However, for the complications that are affected by surgery types, CABG+Valve and Other seem to be more likely responsible for more readmissions relative to Isolated CABG procedures.

Code
ggplot(gender_comp_read_counts) +
  geom_mosaic(aes(x=product(Readmission, Gender), fill = Readmission)) +
  scale_fill_manual(values = custom) +
  facet_wrap(~Complication) +
  labs(title = "Mosaic Plot of Gender and Readmission\nfor Each Complication") +
  theme_grey(13)+
  theme(aspect.ratio = 1, axis.ticks.x=element_blank(), line = element_blank(), axis.ticks.y=element_blank(), plot.title = element_text(hjust = 0.5))

Code
cat("----- Gender -----\n")
----- Gender -----
Code
for (complication in unique(gender_comp_read_counts$Complication)){
cat(paste(complication, "\n"))
test = chisq.test(table(gender_comp_read_counts$Readmission, gender_comp_read_counts$Gender, gender_comp_read_counts$Complication)[, , complication])
rounded = round(test$p.value, 4)
if (rounded < 0.0001) {
  rounded <- "<0.0001"
}
cat(paste(paste("Chi Squared Statistic:", round(test$statistic, 4))), "\n")
cat(paste(paste("p-value:", rounded)), "\n")
cat(paste("-----------------", "\n"))
}
Post-Operative
Atrial Fibrillation 
Chi Squared Statistic: 117.0435 
p-value: <0.0001 
----------------- 
Post-Operative
Dialysis 
Chi Squared Statistic: 4.794 
p-value: 0.0286 
----------------- 
Post-Operative
Renal Failure 
Chi Squared Statistic: 8.76 
p-value: 0.0031 
----------------- 
Post-Operative
Stroke 
Chi Squared Statistic: 2.8418 
p-value: 0.0918 
----------------- 
Prolonged
Ventilation 
Chi Squared Statistic: 20.6166 
p-value: <0.0001 
----------------- 
Re-Operative
for Bleed 
Chi Squared Statistic: 7.0267 
p-value: 0.008 
----------------- 

For one complication, Post-Operative Stroke, gender is independent of readmission. For the rest, it appears that women are more likely to be readmitted compared to men.

Code
ggplot(pay_comp_read_counts) +
  geom_mosaic(aes(x=product(Readmission, PayorType), fill = Readmission)) +
  scale_fill_manual(values = custom) +
  facet_wrap(~Complication) +
  labs(title = "Mosaic Plot of Insurance Type and Readmission\nfor Each Complication", x="Insurance Type") +
  theme_grey(13)+
  theme(axis.text.x = element_text(angle = 45, hjust=1), aspect.ratio = 1, axis.ticks.x=element_blank(), line = element_blank(), axis.ticks.y=element_blank(), plot.title = element_text(hjust = 0.5))

Code
cat("----- Insurance Type -----\n")
----- Insurance Type -----
Code
for (complication in unique(pay_comp_read_counts$Complication)){
cat(paste(complication, "\n"))
test = chisq.test(table(pay_comp_read_counts$Readmission, pay_comp_read_counts$PayorType, pay_comp_read_counts$Complication)[, , complication])
rounded = round(test$p.value, 4)
if (rounded < 0.0001) {
  rounded <- "<0.0001"
}
cat(paste(paste("Chi Squared Statistic:", round(test$statistic, 4))), "\n")
cat(paste(paste("p-value:", rounded)), "\n")
cat(paste("-----------------", "\n"))
}
Post-Operative
Atrial Fibrillation 
Chi Squared Statistic: 7444.8242 
p-value: <0.0001 
----------------- 
Post-Operative
Dialysis 
Chi Squared Statistic: 312.9139 
p-value: <0.0001 
----------------- 
Post-Operative
Renal Failure 
Chi Squared Statistic: 510.6406 
p-value: <0.0001 
----------------- 
Post-Operative
Stroke 
Chi Squared Statistic: 325.0059 
p-value: <0.0001 
----------------- 
Prolonged
Ventilation 
Chi Squared Statistic: 1789.4062 
p-value: <0.0001 
----------------- 
Re-Operative
for Bleed 
Chi Squared Statistic: 529.2087 
p-value: <0.0001 
----------------- 

We can observe that for each complication, Insurance Type affects readmission rates. Just as we determined from the earlier mosaic plots on Insurance Type, Other Insurance has drastically a lower rate of readmission compared to the rest of the insurance types. In particular we also note that Medi-Cal has the worst rates for readmission for every complication except for Re-Operative for Bleed.

Code
ggplot(race_comp_read_counts) +
  geom_mosaic(aes(x=product(Readmission, Race), fill = Readmission)) +
  scale_fill_manual(values = custom) +
  facet_wrap(~Complication) +
  labs(title = "Mosaic Plot of Race and Readmission for Each Complication") +
  theme_grey(13)+
  theme(axis.text.x = element_text(angle = 45, hjust=1), aspect.ratio = 1, axis.ticks.x=element_blank(), line = element_blank(), axis.ticks.y=element_blank(), plot.title = element_text(hjust = 0.5))

Code
cat("----- Race -----\n")
----- Race -----
Code
for (complication in unique(race_comp_read_counts$Complication)){
cat(paste(complication, "\n"))
test = chisq.test(table(race_comp_read_counts$Readmission, race_comp_read_counts$Race, race_comp_read_counts$Complication)[, , complication])
rounded = round(test$p.value, 4)
if (rounded < 0.0001) {
  rounded <- "<0.0001"
}
cat(paste(paste("Chi Squared Statistic:", round(test$statistic, 4))), "\n")
cat(paste(paste("p-value:", rounded)), "\n")
cat(paste("-----------------", "\n"))
}
Post-Operative
Atrial Fibrillation 
Chi Squared Statistic: 25.2347 
p-value: <0.0001 
----------------- 
Post-Operative
Dialysis 
Chi Squared Statistic: 9.9607 
p-value: 0.0189 
----------------- 
Post-Operative
Renal Failure 
Chi Squared Statistic: 11.1967 
p-value: 0.0107 
----------------- 
Post-Operative
Stroke 
Chi Squared Statistic: 7.7891 
p-value: 0.0506 
----------------- 
Prolonged
Ventilation 
Chi Squared Statistic: 5.0378 
p-value: 0.1691 
----------------- 
Re-Operative
for Bleed 
Chi Squared Statistic: 3.7467 
p-value: 0.2901 
----------------- 

Post-Operative Stroke, Prolonged Ventilation, and Re-Operative for Bleed, are independent of Readmission according to the chi-squared tests. It appears that there is a significant dependent relationship for Post-Operative Dialysis patients, in which Black and Hispanic patients are more likely to be readmitted compared to Asian and White patients.