Chapter 6 Network analysis

mag_cor <- genome_counts_filt %>%
    dplyr::select(where(~ !all(. == 0))) %>%
    filter(rowSums(select(., where(is.numeric)) != 0) > 3) %>%
    column_to_rownames(var = "genome") %>%
    transform(., 'clr') %>%
    t() %>%
    cor(.,method="pearson")

threshold <- 0.7

#Positive correlations
mag_cor_pos <- ifelse(mag_cor > threshold, 1, 0)
graph_pos <- graph_from_adjacency_matrix(mag_cor_pos, mode = "undirected", diag = FALSE)

#Node color
V(graph_pos)$color <- tibble(order=genome_metadata$order %>% unique() %>% sort(),
                             color=order_colors) %>%
  right_join(genome_metadata,by="order")%>%
  filter(genome %in%  V(graph_pos)$name) %>%
  select(genome,color) %>%
  pull(color)

#Node size
V(graph_pos)$size <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>%
  rowwise() %>%
  mutate(average = mean(c_across(where(is.numeric)), na.rm = TRUE)) %>%
  ungroup() %>% 
  select(genome,average) %>% 
  filter(genome %in%  V(graph_pos)$name) %>%
  arrange(match(genome,V(graph_pos)$name)) %>%
  mutate(size = scales::rescale(average, to = c(2, 10))) %>% 
  pull(size)

cluster_pos <- cluster_edge_betweenness(graph_pos)

communities_pos <- split(V(graph_pos)$name, membership(cluster_pos) )%>%
  keep(~ length(.x) > 1)

#Negative correlations
mag_cor_neg <- ifelse(mag_cor < -threshold, 1, 0)
graph_neg <- graph_from_adjacency_matrix(mag_cor_neg, mode = "undirected", diag = FALSE)

#Node color
V(graph_neg)$color <- tibble(order=genome_metadata$order %>% unique() %>% sort(),
                             color=order_colors) %>%
  right_join(genome_metadata,by="order")%>%
  filter(genome %in%  V(graph_neg)$name) %>%
  select(genome,color) %>%
  pull(color)

#Node size
V(graph_neg)$size <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>%
  rowwise() %>%
  mutate(average = mean(c_across(where(is.numeric)), na.rm = TRUE)) %>%
  ungroup() %>% 
  select(genome,average) %>% 
  filter(genome %in%  V(graph_neg)$name) %>%
  arrange(match(genome,V(graph_neg)$name)) %>%
  mutate(size = scales::rescale(average, to = c(2, 10))) %>% 
  pull(size)

cluster_neg <- cluster_edge_betweenness(graph_neg)

communities_neg <- split(V(graph_neg)$name, membership(cluster_neg) )%>%
  keep(~ length(.x) > 1)
corrplot(mag_cor, method = 'color', order = 'AOE')

plot(
  graph_pos,
  vertex.color = V(graph_pos)$color,
  vertex.size = V(graph_pos)$size,
  vertex.label = NA,
  vertex.frame.color = NA, 
  edge.width = 1,
  mark.groups = communities(cluster_pos) %>% keep(~ length(.x) >= 2),
  mark.col = "#f4f4f4",
  mark.border = NA,
  layout = layout_with_fr
)

plot(
  graph_neg,
  vertex.color = V(graph_neg)$color,
  vertex.size = V(graph_neg)$size,
  vertex.label = NA,
  vertex.frame.color = NA, 
  edge.width = 1,
  mark.groups = communities(cluster_neg) %>% keep(~ length(.x) >= 2),
  mark.col = "#f4f4f4",
  mark.border = NA,
  layout = layout_with_fr
)

6.1 Timewise analysis

6.1.1 Day 7

sorted_genomes <- genome_tree$tip.label

sample_subset <- sample_metadata %>%
  filter(day==7) %>%
  pull(sample)

mag_cor <- genome_counts_filt[,c("genome",sample_subset)] %>%
    dplyr::select(where(~ !all(. == 0))) %>%
    filter(rowSums(select(., where(is.numeric)) != 0) > 3) %>%
    column_to_rownames(var = "genome") %>%
    transform(., 'clr') %>%
    t() %>%
    cor(.,method="pearson")

threshold <- 0.7

#Positive correlations
mag_cor_pos <- ifelse(mag_cor > threshold, 1, 0)
graph_pos <- graph_from_adjacency_matrix(mag_cor_pos, mode = "undirected", diag = FALSE)

#Node color
V(graph_pos)$color <- tibble(order=genome_metadata$order %>% unique() %>% sort(),
                             color=order_colors) %>%
  right_join(genome_metadata,by="order")%>%
  filter(genome %in%  V(graph_pos)$name) %>%
  select(genome,color) %>%
  pull(color)

#Node size
V(graph_pos)$size <- genome_counts_filt[,c("genome",sample_subset)] %>%
    dplyr::select(where(~ !all(. == 0))) %>%
    filter(rowSums(select(., where(is.numeric)) != 0) > 3) %>%
    column_to_rownames(var = "genome") %>%
    transform(., 'clr') %>%
    as.data.frame() %>%
    rownames_to_column(var="genome") %>% 
    rowwise() %>% 
    mutate(average = mean(c_across(where(is.numeric)), na.rm = TRUE)) %>%
    ungroup() %>% 
    select(genome,average) %>% 
    filter(genome %in%  V(graph_pos)$name) %>%
    arrange(match(genome,V(graph_pos)$name)) %>%
    mutate(size = scales::rescale(average, to = c(2, 10))) %>% 
    pull(size)

cluster_pos <- cluster_edge_betweenness(graph_pos)

communities_pos_7 <- split(V(graph_pos)$name, membership(cluster_pos) )%>%
  keep(~ length(.x) > 1)

#Negative correlations
mag_cor_neg <- ifelse(mag_cor < -threshold, 1, 0)
graph_neg <- graph_from_adjacency_matrix(mag_cor_neg, mode = "undirected", diag = FALSE)

#Node color
V(graph_neg)$color <- tibble(order=genome_metadata$order %>% unique() %>% sort(),
                             color=order_colors) %>%
  right_join(genome_metadata,by="order")%>%
  filter(genome %in%  V(graph_neg)$name) %>%
  select(genome,color) %>%
  pull(color)

#Node size
V(graph_neg)$size <- genome_counts_filt[,c("genome",sample_subset)] %>%
    dplyr::select(where(~ !all(. == 0))) %>%
    filter(rowSums(select(., where(is.numeric)) != 0) > 3) %>%
    column_to_rownames(var = "genome") %>%
    transform(., 'clr') %>%
    as.data.frame() %>%
    rownames_to_column(var="genome") %>% 
    rowwise() %>% 
    mutate(average = mean(c_across(where(is.numeric)), na.rm = TRUE)) %>%
    ungroup() %>% 
    select(genome,average) %>% 
    filter(genome %in%  V(graph_neg)$name) %>%
    arrange(match(genome,V(graph_neg)$name)) %>%
    mutate(size = scales::rescale(average, to = c(2, 10))) %>% 
    pull(size)

cluster_neg <- cluster_edge_betweenness(graph_neg)
communities_neg_7 <- split(V(graph_neg)$name, membership(cluster_neg) )%>%
  keep(~ length(.x) > 1)

#Network properties
network_positive_d7 <- tibble(day="day7",
                     density=graph.density(graph_pos),
                     modularity=modularity(cluster_walktrap(graph_pos)),
                     assortability=assortativity_degree(graph_pos),
                     connectivity=mean(components(graph_pos)$csize),
                     centrality=eigen_centrality(graph_pos)$value)

network_negative_d7 <- tibble(day="day7",
                     density=graph.density(graph_neg),
                     modularity=modularity(cluster_walktrap(graph_neg)),
                     assortability=assortativity_degree(graph_neg),
                     connectivity=mean(components(graph_neg)$csize),
                     centrality=eigen_centrality(graph_neg)$value)
corrplot(mag_cor, method = 'color', order = 'AOE')

plot(
  graph_pos,
  vertex.color = V(graph_pos)$color,
  vertex.size = V(graph_pos)$size,
  vertex.label = NA,
  vertex.frame.color = NA, 
  edge.width = 1,
  mark.groups = communities(cluster_pos) %>% keep(~ length(.x) >= 2),
  mark.col = "#f4f4f4",
  mark.border = NA,
  layout = layout_with_fr
)

plot(
  graph_neg,
  vertex.color = V(graph_neg)$color,
  vertex.size = V(graph_neg)$size,
  vertex.label = NA,
  vertex.frame.color = NA, 
  edge.width = 1,
  mark.groups = communities(cluster_neg) %>% keep(~ length(.x) >= 2),
  mark.col = "#f4f4f4",
  mark.border = NA,
  layout = layout_with_fr
)

6.1.2 Day 14

sorted_genomes <- genome_tree$tip.label

sample_subset <- sample_metadata %>%
  filter(day==14) %>%
  pull(sample)

mag_cor <- genome_counts_filt[,c("genome",sample_subset)] %>%
    dplyr::select(where(~ !all(. == 0))) %>%
    filter(rowSums(select(., where(is.numeric)) != 0) > 3) %>%
    column_to_rownames(var = "genome") %>%
    transform(., 'clr') %>%
    t() %>%
    cor(.,method="pearson")

threshold <- 0.7

#Positive correlations
mag_cor_pos <- ifelse(mag_cor > threshold, 1, 0)
graph_pos <- graph_from_adjacency_matrix(mag_cor_pos, mode = "undirected", diag = FALSE)

#Node color
V(graph_pos)$color <- tibble(order=genome_metadata$order %>% unique() %>% sort(),
                             color=order_colors) %>%
  right_join(genome_metadata,by="order")%>%
  filter(genome %in%  V(graph_pos)$name) %>%
  select(genome,color) %>%
  pull(color)

#Node size
V(graph_pos)$size <- genome_counts_filt[,c("genome",sample_subset)] %>%
    dplyr::select(where(~ !all(. == 0))) %>%
    filter(rowSums(select(., where(is.numeric)) != 0) > 3) %>%
    column_to_rownames(var = "genome") %>%
    transform(., 'clr') %>%
    as.data.frame() %>%
    rownames_to_column(var="genome") %>% 
    rowwise() %>% 
    mutate(average = mean(c_across(where(is.numeric)), na.rm = TRUE)) %>%
    ungroup() %>% 
    select(genome,average) %>% 
    filter(genome %in%  V(graph_pos)$name) %>%
    arrange(match(genome,V(graph_pos)$name)) %>%
    mutate(size = scales::rescale(average, to = c(2, 10))) %>% 
    pull(size)

cluster_pos <- cluster_edge_betweenness(graph_pos)

communities_pos_14 <- split(V(graph_pos)$name, membership(cluster_pos) )%>%
  keep(~ length(.x) > 1)

#Negative correlations
mag_cor_neg <- ifelse(mag_cor < -threshold, 1, 0)
graph_neg <- graph_from_adjacency_matrix(mag_cor_neg, mode = "undirected", diag = FALSE)

#Node color
V(graph_neg)$color <- tibble(order=genome_metadata$order %>% unique() %>% sort(),
                             color=order_colors) %>%
  right_join(genome_metadata,by="order")%>%
  filter(genome %in%  V(graph_neg)$name) %>%
  select(genome,color) %>%
  pull(color)

#Node size
V(graph_neg)$size <- genome_counts_filt[,c("genome",sample_subset)] %>%
    dplyr::select(where(~ !all(. == 0))) %>%
    filter(rowSums(select(., where(is.numeric)) != 0) > 3) %>%
    column_to_rownames(var = "genome") %>%
    transform(., 'clr') %>%
    as.data.frame() %>%
    rownames_to_column(var="genome") %>% 
    rowwise() %>% 
    mutate(average = mean(c_across(where(is.numeric)), na.rm = TRUE)) %>%
    ungroup() %>% 
    select(genome,average) %>% 
    filter(genome %in%  V(graph_neg)$name) %>%
    arrange(match(genome,V(graph_neg)$name)) %>%
    mutate(size = scales::rescale(average, to = c(2, 10))) %>% 
    pull(size)

cluster_neg <- cluster_edge_betweenness(graph_neg)
communities_neg_14 <- split(V(graph_neg)$name, membership(cluster_neg) )%>%
  keep(~ length(.x) > 1)

#Network properties
network_positive_d14 <- tibble(day="day14",
                     density=graph.density(graph_pos),
                     modularity=modularity(cluster_walktrap(graph_pos)),
                     assortability=assortativity_degree(graph_pos),
                     connectivity=mean(components(graph_pos)$csize),
                     centrality=eigen_centrality(graph_pos)$value)

network_negative_d14 <- tibble(day="day14",
                     density=graph.density(graph_neg),
                     modularity=modularity(cluster_walktrap(graph_neg)),
                     assortability=assortativity_degree(graph_neg),
                     connectivity=mean(components(graph_neg)$csize),
                     centrality=eigen_centrality(graph_neg)$value)
corrplot(mag_cor, method = 'color', order = 'AOE')

plot(
  graph_pos,
  vertex.color = V(graph_pos)$color,
  vertex.size = V(graph_pos)$size,
  vertex.label = NA,
  vertex.frame.color = NA, 
  edge.width = 1,
  mark.groups = communities(cluster_pos) %>% keep(~ length(.x) >= 2),
  mark.col = "#f4f4f4",
  mark.border = NA,
  layout = layout_with_fr
)

plot(
  graph_neg,
  vertex.color = V(graph_neg)$color,
  vertex.size = V(graph_neg)$size,
  vertex.label = NA,
  vertex.frame.color = NA, 
  edge.width = 1,
  mark.groups = communities(cluster_neg) %>% keep(~ length(.x) >= 2),
  mark.col = "#f4f4f4",
  mark.border = NA,
  layout = layout_with_fr
)

6.1.3 Day 21

sorted_genomes <- genome_tree$tip.label

sample_subset <- sample_metadata %>%
  filter(day==21) %>%
  pull(sample)

mag_cor <- genome_counts_filt[,c("genome",sample_subset)] %>%
    dplyr::select(where(~ !all(. == 0))) %>%
    filter(rowSums(select(., where(is.numeric)) != 0) > 3) %>%
    column_to_rownames(var = "genome") %>%
    transform(., 'clr') %>%
    t() %>%
    cor(.,method="pearson")

threshold <- 0.7

#Positive correlations
mag_cor_pos <- ifelse(mag_cor > threshold, 1, 0)
graph_pos <- graph_from_adjacency_matrix(mag_cor_pos, mode = "undirected", diag = FALSE)

#Node color
V(graph_pos)$color <- tibble(order=genome_metadata$order %>% unique() %>% sort(),
                             color=order_colors) %>%
  right_join(genome_metadata,by="order")%>%
  filter(genome %in%  V(graph_pos)$name) %>%
  select(genome,color) %>%
  pull(color)

#Node size
V(graph_pos)$size <- genome_counts_filt[,c("genome",sample_subset)] %>%
    dplyr::select(where(~ !all(. == 0))) %>%
    filter(rowSums(select(., where(is.numeric)) != 0) > 3) %>%
    column_to_rownames(var = "genome") %>%
    transform(., 'clr') %>%
    as.data.frame() %>%
    rownames_to_column(var="genome") %>% 
    rowwise() %>% 
    mutate(average = mean(c_across(where(is.numeric)), na.rm = TRUE)) %>%
    ungroup() %>% 
    select(genome,average) %>% 
    filter(genome %in%  V(graph_pos)$name) %>%
    arrange(match(genome,V(graph_pos)$name)) %>%
    mutate(size = scales::rescale(average, to = c(2, 10))) %>% 
    pull(size)

cluster_pos <- cluster_edge_betweenness(graph_pos)

communities_pos_21 <- split(V(graph_pos)$name, membership(cluster_pos) )%>%
  keep(~ length(.x) > 1)

#Negative correlations
mag_cor_neg <- ifelse(mag_cor < -threshold, 1, 0)
graph_neg <- graph_from_adjacency_matrix(mag_cor_neg, mode = "undirected", diag = FALSE)

#Node color
V(graph_neg)$color <- tibble(order=genome_metadata$order %>% unique() %>% sort(),
                             color=order_colors) %>%
  right_join(genome_metadata,by="order")%>%
  filter(genome %in%  V(graph_neg)$name) %>%
  select(genome,color) %>%
  pull(color)

#Node size
V(graph_neg)$size <- genome_counts_filt[,c("genome",sample_subset)] %>%
    dplyr::select(where(~ !all(. == 0))) %>%
    filter(rowSums(select(., where(is.numeric)) != 0) > 3) %>%
    column_to_rownames(var = "genome") %>%
    transform(., 'clr') %>%
    as.data.frame() %>%
    rownames_to_column(var="genome") %>% 
    rowwise() %>% 
    mutate(average = mean(c_across(where(is.numeric)), na.rm = TRUE)) %>%
    ungroup() %>% 
    select(genome,average) %>% 
    filter(genome %in%  V(graph_neg)$name) %>%
    arrange(match(genome,V(graph_neg)$name)) %>%
    mutate(size = scales::rescale(average, to = c(2, 10))) %>% 
    pull(size)

cluster_neg <- cluster_edge_betweenness(graph_neg)
communities_neg_21 <- split(V(graph_neg)$name, membership(cluster_neg) )%>%
  keep(~ length(.x) > 1)

#Network properties
network_positive_d21 <- tibble(day="day21",
                     density=graph.density(graph_pos),
                     modularity=modularity(cluster_walktrap(graph_pos)),
                     assortability=assortativity_degree(graph_pos),
                     connectivity=mean(components(graph_pos)$csize),
                     centrality=eigen_centrality(graph_pos)$value)

network_negative_d21 <- tibble(day="day21",
                     density=graph.density(graph_neg),
                     modularity=modularity(cluster_walktrap(graph_neg)),
                     assortability=assortativity_degree(graph_neg),
                     connectivity=mean(components(graph_neg)$csize),
                     centrality=eigen_centrality(graph_neg)$value)
corrplot(mag_cor, method = 'color', order = 'AOE')

plot(
  graph_pos,
  vertex.color = V(graph_pos)$color,
  vertex.size = V(graph_pos)$size,
  vertex.label = NA,
  vertex.frame.color = NA, 
  edge.width = 1,
  mark.groups = communities(cluster_pos) %>% keep(~ length(.x) >= 2),
  mark.col = "#f4f4f4",
  mark.border = NA,
  layout = layout_with_fr
)

plot(
  graph_neg,
  vertex.color = V(graph_neg)$color,
  vertex.size = V(graph_neg)$size,
  vertex.label = NA,
  vertex.frame.color = NA, 
  edge.width = 1,
  mark.groups = communities(cluster_neg) %>% keep(~ length(.x) >= 2),
  mark.col = "#f4f4f4",
  mark.border = NA,
  layout = layout_with_fr
)

6.1.4 Day 28

sorted_genomes <- genome_tree$tip.label

sample_subset <- sample_metadata %>%
  filter(day==28) %>%
  pull(sample)

mag_cor <- genome_counts_filt[,c("genome",sample_subset)] %>%
    dplyr::select(where(~ !all(. == 0))) %>%
    filter(rowSums(select(., where(is.numeric)) != 0) > 3) %>%
    column_to_rownames(var = "genome") %>%
    transform(., 'clr') %>%
    t() %>%
    cor(.,method="pearson")

threshold <- 0.7

#Positive correlations
mag_cor_pos <- ifelse(mag_cor > threshold, 1, 0)
graph_pos <- graph_from_adjacency_matrix(mag_cor_pos, mode = "undirected", diag = FALSE)

#Node color
V(graph_pos)$color <- tibble(order=genome_metadata$order %>% unique() %>% sort(),
                             color=order_colors) %>%
  right_join(genome_metadata,by="order")%>%
  filter(genome %in%  V(graph_pos)$name) %>%
  select(genome,color) %>%
  pull(color)

#Node size
V(graph_pos)$size <- genome_counts_filt[,c("genome",sample_subset)] %>%
    dplyr::select(where(~ !all(. == 0))) %>%
    filter(rowSums(select(., where(is.numeric)) != 0) > 3) %>%
    column_to_rownames(var = "genome") %>%
    transform(., 'clr') %>%
    as.data.frame() %>%
    rownames_to_column(var="genome") %>% 
    rowwise() %>% 
    mutate(average = mean(c_across(where(is.numeric)), na.rm = TRUE)) %>%
    ungroup() %>% 
    select(genome,average) %>% 
    filter(genome %in%  V(graph_pos)$name) %>%
    arrange(match(genome,V(graph_pos)$name)) %>%
    mutate(size = scales::rescale(average, to = c(2, 10))) %>% 
    pull(size)

cluster_pos <- cluster_edge_betweenness(graph_pos)

communities_pos_28 <- split(V(graph_pos)$name, membership(cluster_pos) )%>%
  keep(~ length(.x) > 1)

#Negative correlations
mag_cor_neg <- ifelse(mag_cor < -threshold, 1, 0)
graph_neg <- graph_from_adjacency_matrix(mag_cor_neg, mode = "undirected", diag = FALSE)

#Node color
V(graph_neg)$color <- tibble(order=genome_metadata$order %>% unique() %>% sort(),
                             color=order_colors) %>%
  right_join(genome_metadata,by="order")%>%
  filter(genome %in%  V(graph_neg)$name) %>%
  select(genome,color) %>%
  pull(color)

#Node size
V(graph_neg)$size <- genome_counts_filt[,c("genome",sample_subset)] %>%
    dplyr::select(where(~ !all(. == 0))) %>%
    filter(rowSums(select(., where(is.numeric)) != 0) > 3) %>%
    column_to_rownames(var = "genome") %>%
    transform(., 'clr') %>%
    as.data.frame() %>%
    rownames_to_column(var="genome") %>% 
    rowwise() %>% 
    mutate(average = mean(c_across(where(is.numeric)), na.rm = TRUE)) %>%
    ungroup() %>% 
    select(genome,average) %>% 
    filter(genome %in%  V(graph_neg)$name) %>%
    arrange(match(genome,V(graph_neg)$name)) %>%
    mutate(size = scales::rescale(average, to = c(2, 10))) %>% 
    pull(size)

cluster_neg <- cluster_edge_betweenness(graph_neg)
communities_neg_28 <- split(V(graph_neg)$name, membership(cluster_neg) )%>%
  keep(~ length(.x) > 1)

#Network properties
network_positive_d28 <- tibble(day="day28",
                     density=graph.density(graph_pos),
                     modularity=modularity(cluster_walktrap(graph_pos)),
                     assortability=assortativity_degree(graph_pos),
                     connectivity=mean(components(graph_pos)$csize),
                     centrality=eigen_centrality(graph_pos)$value)

network_negative_d28 <- tibble(day="day28",
                     density=graph.density(graph_neg),
                     modularity=modularity(cluster_walktrap(graph_neg)),
                     assortability=assortativity_degree(graph_neg),
                     connectivity=mean(components(graph_neg)$csize),
                     centrality=eigen_centrality(graph_neg)$value)
corrplot(mag_cor, method = 'color', order = 'AOE')

plot(
  graph_pos,
  vertex.color = V(graph_pos)$color,
  vertex.size = V(graph_pos)$size,
  vertex.label = NA,
  vertex.frame.color = NA, 
  edge.width = 1,
  mark.groups = communities(cluster_pos) %>% keep(~ length(.x) >= 2),
  mark.col = "#f4f4f4",
  mark.border = NA,
  layout = layout_with_fr
)

plot(
  graph_neg,
  vertex.color = V(graph_neg)$color,
  vertex.size = V(graph_neg)$size,
  vertex.label = NA,
  vertex.frame.color = NA, 
  edge.width = 1,
  mark.groups = communities(cluster_neg) %>% keep(~ length(.x) >= 2),
  mark.col = "#f4f4f4",
  mark.border = NA,
  layout = layout_with_fr
)

6.1.5 Day 35

sorted_genomes <- genome_tree$tip.label

sample_subset <- sample_metadata %>%
  filter(day==35) %>%
  pull(sample)

mag_cor <- genome_counts_filt[,c("genome",sample_subset)] %>%
    dplyr::select(where(~ !all(. == 0))) %>%
    filter(rowSums(select(., where(is.numeric)) != 0) > 3) %>%
    column_to_rownames(var = "genome") %>%
    transform(., 'clr') %>%
    t() %>%
    cor(.,method="pearson")

threshold <- 0.7

#Positive correlations
mag_cor_pos <- ifelse(mag_cor > threshold, 1, 0)
graph_pos <- graph_from_adjacency_matrix(mag_cor_pos, mode = "undirected", diag = FALSE)

#Node color
V(graph_pos)$color <- tibble(order=genome_metadata$order %>% unique() %>% sort(),
                             color=order_colors) %>%
  right_join(genome_metadata,by="order")%>%
  filter(genome %in%  V(graph_pos)$name) %>%
  select(genome,color) %>%
  pull(color)

#Node size
V(graph_pos)$size <- genome_counts_filt[,c("genome",sample_subset)] %>%
    dplyr::select(where(~ !all(. == 0))) %>%
    filter(rowSums(select(., where(is.numeric)) != 0) > 3) %>%
    column_to_rownames(var = "genome") %>%
    transform(., 'clr') %>%
    as.data.frame() %>%
    rownames_to_column(var="genome") %>% 
    rowwise() %>% 
    mutate(average = mean(c_across(where(is.numeric)), na.rm = TRUE)) %>%
    ungroup() %>% 
    select(genome,average) %>% 
    filter(genome %in%  V(graph_pos)$name) %>%
    arrange(match(genome,V(graph_pos)$name)) %>%
    mutate(size = scales::rescale(average, to = c(2, 10))) %>% 
    pull(size)

cluster_pos <- cluster_edge_betweenness(graph_pos)

communities_pos_35 <- split(V(graph_pos)$name, membership(cluster_pos) )%>%
  keep(~ length(.x) > 1)

#Negative correlations
mag_cor_neg <- ifelse(mag_cor < -threshold, 1, 0)
graph_neg <- graph_from_adjacency_matrix(mag_cor_neg, mode = "undirected", diag = FALSE)

#Node color
V(graph_neg)$color <- tibble(order=genome_metadata$order %>% unique() %>% sort(),
                             color=order_colors) %>%
  right_join(genome_metadata,by="order")%>%
  filter(genome %in%  V(graph_neg)$name) %>%
  select(genome,color) %>%
  pull(color)

#Node size
V(graph_neg)$size <- genome_counts_filt[,c("genome",sample_subset)] %>%
    dplyr::select(where(~ !all(. == 0))) %>%
    filter(rowSums(select(., where(is.numeric)) != 0) > 3) %>%
    column_to_rownames(var = "genome") %>%
    transform(., 'clr') %>%
    as.data.frame() %>%
    rownames_to_column(var="genome") %>% 
    rowwise() %>% 
    mutate(average = mean(c_across(where(is.numeric)), na.rm = TRUE)) %>%
    ungroup() %>% 
    select(genome,average) %>% 
    filter(genome %in%  V(graph_neg)$name) %>%
    arrange(match(genome,V(graph_neg)$name)) %>%
    mutate(size = scales::rescale(average, to = c(2, 10))) %>% 
    pull(size)

cluster_neg <- cluster_edge_betweenness(graph_neg)
communities_neg_35 <- split(V(graph_neg)$name, membership(cluster_neg) )%>%
  keep(~ length(.x) > 1)

#Network properties
network_positive_d35 <- tibble(day="day35",
                     density=graph.density(graph_pos),
                     modularity=modularity(cluster_walktrap(graph_pos)),
                     assortability=assortativity_degree(graph_pos),
                     connectivity=mean(components(graph_pos)$csize),
                     centrality=eigen_centrality(graph_pos)$value)

network_negative_d35 <- tibble(day="day35",
                     density=graph.density(graph_neg),
                     modularity=modularity(cluster_walktrap(graph_neg)),
                     assortability=assortativity_degree(graph_neg),
                     connectivity=mean(components(graph_neg)$csize),
                     centrality=eigen_centrality(graph_neg)$value)
corrplot(mag_cor, method = 'color', order = 'AOE')

plot(
  graph_pos,
  vertex.color = V(graph_pos)$color,
  vertex.size = V(graph_pos)$size,
  vertex.label = NA,
  vertex.frame.color = NA, 
  edge.width = 1,
  mark.groups = communities(cluster_pos) %>% keep(~ length(.x) >= 2),
  mark.col = "#f4f4f4",
  mark.border = NA,
  layout = layout_with_fr
)

plot(
  graph_neg,
  vertex.color = V(graph_neg)$color,
  vertex.size = V(graph_neg)$size,
  vertex.label = NA,
  vertex.frame.color = NA, 
  edge.width = 1,
  mark.groups = communities(cluster_neg) %>% keep(~ length(.x) >= 2),
  mark.col = "#f4f4f4",
  mark.border = NA,
  layout = layout_with_fr
)

6.2 Treatment-wise analysis

6.2.1 TG1

sorted_genomes <- genome_tree$tip.label

sample_subset <- sample_metadata %>%
  filter(treatment=="TG1") %>%
  pull(sample)

mag_cor <- genome_counts_filt[,c("genome",sample_subset)] %>%
    dplyr::select(where(~ !all(. == 0))) %>%
    filter(rowSums(select(., where(is.numeric)) != 0) > 3) %>%
    column_to_rownames(var = "genome") %>%
    transform(., 'clr') %>%
    t() %>%
    cor(.,method="pearson")

threshold <- 0.7

#Positive correlations
mag_cor_pos <- ifelse(mag_cor > threshold, 1, 0)
graph_pos <- graph_from_adjacency_matrix(mag_cor_pos, mode = "undirected", diag = FALSE)

#Node color
V(graph_pos)$color <- tibble(order=genome_metadata$order %>% unique() %>% sort(),
                             color=order_colors) %>%
  right_join(genome_metadata,by="order")%>%
  filter(genome %in%  V(graph_pos)$name) %>%
  select(genome,color) %>%
  pull(color)

#Node size
V(graph_pos)$size <- genome_counts_filt[,c("genome",sample_subset)] %>%
    dplyr::select(where(~ !all(. == 0))) %>%
    filter(rowSums(select(., where(is.numeric)) != 0) > 3) %>%
    column_to_rownames(var = "genome") %>%
    transform(., 'clr') %>%
    as.data.frame() %>%
    rownames_to_column(var="genome") %>% 
    rowwise() %>% 
    mutate(average = mean(c_across(where(is.numeric)), na.rm = TRUE)) %>%
    ungroup() %>% 
    select(genome,average) %>% 
    filter(genome %in%  V(graph_pos)$name) %>%
    arrange(match(genome,V(graph_pos)$name)) %>%
    mutate(size = scales::rescale(average, to = c(2, 10))) %>% 
    pull(size)

cluster_pos <- cluster_edge_betweenness(graph_pos)

communities_pos_TG1 <- split(V(graph_pos)$name, membership(cluster_pos) )%>%
  keep(~ length(.x) > 1)

#Negative correlations
mag_cor_neg <- ifelse(mag_cor < -threshold, 1, 0)
graph_neg <- graph_from_adjacency_matrix(mag_cor_neg, mode = "undirected", diag = FALSE)

#Node color
V(graph_neg)$color <- tibble(order=genome_metadata$order %>% unique() %>% sort(),
                             color=order_colors) %>%
  right_join(genome_metadata,by="order")%>%
  filter(genome %in%  V(graph_neg)$name) %>%
  select(genome,color) %>%
  pull(color)

#Node size
V(graph_neg)$size <- genome_counts_filt[,c("genome",sample_subset)] %>%
    dplyr::select(where(~ !all(. == 0))) %>%
    filter(rowSums(select(., where(is.numeric)) != 0) > 3) %>%
    column_to_rownames(var = "genome") %>%
    transform(., 'clr') %>%
    as.data.frame() %>%
    rownames_to_column(var="genome") %>% 
    rowwise() %>% 
    mutate(average = mean(c_across(where(is.numeric)), na.rm = TRUE)) %>%
    ungroup() %>% 
    select(genome,average) %>% 
    filter(genome %in%  V(graph_neg)$name) %>%
    arrange(match(genome,V(graph_neg)$name)) %>%
    mutate(size = scales::rescale(average, to = c(2, 10))) %>% 
    pull(size)

cluster_neg <- cluster_edge_betweenness(graph_neg)
communities_neg_TG1 <- split(V(graph_neg)$name, membership(cluster_neg) )%>%
  keep(~ length(.x) > 1)

#Network properties
network_positive_TG1 <- tibble(treatment="TG1",
                     density=graph.density(graph_pos),
                     modularity=modularity(cluster_walktrap(graph_pos)),
                     assortability=assortativity_degree(graph_pos),
                     connectivity=mean(components(graph_pos)$csize),
                     centrality=eigen_centrality(graph_pos)$value)

network_negative_TG1 <- tibble(treatment="TG1",
                     density=graph.density(graph_neg),
                     modularity=modularity(cluster_walktrap(graph_neg)),
                     assortability=assortativity_degree(graph_neg),
                     connectivity=mean(components(graph_neg)$csize),
                     centrality=eigen_centrality(graph_neg)$value)
corrplot(mag_cor, method = 'color', order = 'AOE')

plot(
  graph_pos,
  vertex.color = V(graph_pos)$color,
  vertex.size = V(graph_pos)$size,
  vertex.label = NA,
  vertex.frame.color = NA, 
  edge.width = 1,
  mark.groups = communities(cluster_pos) %>% keep(~ length(.x) >= 2),
  mark.col = "#f4f4f4",
  mark.border = NA,
  layout = layout_with_fr
)

plot(
  graph_neg,
  vertex.color = V(graph_neg)$color,
  vertex.size = V(graph_neg)$size,
  vertex.label = NA,
  vertex.frame.color = NA, 
  edge.width = 1,
  mark.groups = communities(cluster_neg) %>% keep(~ length(.x) >= 2),
  mark.col = "#f4f4f4",
  mark.border = NA,
  layout = layout_with_fr
)

6.2.2 TG2

sorted_genomes <- genome_tree$tip.label

sample_subset <- sample_metadata %>%
  filter(treatment=="TG2") %>%
  pull(sample)

mag_cor <- genome_counts_filt[,c("genome",sample_subset)] %>%
    dplyr::select(where(~ !all(. == 0))) %>%
    filter(rowSums(select(., where(is.numeric)) != 0) > 3) %>%
    column_to_rownames(var = "genome") %>%
    transform(., 'clr') %>%
    t() %>%
    cor(.,method="pearson")

threshold <- 0.7

#Positive correlations
mag_cor_pos <- ifelse(mag_cor > threshold, 1, 0)
graph_pos <- graph_from_adjacency_matrix(mag_cor_pos, mode = "undirected", diag = FALSE)

#Node color
V(graph_pos)$color <- tibble(order=genome_metadata$order %>% unique() %>% sort(),
                             color=order_colors) %>%
  right_join(genome_metadata,by="order")%>%
  filter(genome %in%  V(graph_pos)$name) %>%
  select(genome,color) %>%
  pull(color)

#Node size
V(graph_pos)$size <- genome_counts_filt[,c("genome",sample_subset)] %>%
    dplyr::select(where(~ !all(. == 0))) %>%
    filter(rowSums(select(., where(is.numeric)) != 0) > 3) %>%
    column_to_rownames(var = "genome") %>%
    transform(., 'clr') %>%
    as.data.frame() %>%
    rownames_to_column(var="genome") %>% 
    rowwise() %>% 
    mutate(average = mean(c_across(where(is.numeric)), na.rm = TRUE)) %>%
    ungroup() %>% 
    select(genome,average) %>% 
    filter(genome %in%  V(graph_pos)$name) %>%
    arrange(match(genome,V(graph_pos)$name)) %>%
    mutate(size = scales::rescale(average, to = c(2, 10))) %>% 
    pull(size)

cluster_pos <- cluster_edge_betweenness(graph_pos)

communities_pos_TG2 <- split(V(graph_pos)$name, membership(cluster_pos) )%>%
  keep(~ length(.x) > 1)

#Negative correlations
mag_cor_neg <- ifelse(mag_cor < -threshold, 1, 0)
graph_neg <- graph_from_adjacency_matrix(mag_cor_neg, mode = "undirected", diag = FALSE)

#Node color
V(graph_neg)$color <- tibble(order=genome_metadata$order %>% unique() %>% sort(),
                             color=order_colors) %>%
  right_join(genome_metadata,by="order")%>%
  filter(genome %in%  V(graph_neg)$name) %>%
  select(genome,color) %>%
  pull(color)

#Node size
V(graph_neg)$size <- genome_counts_filt[,c("genome",sample_subset)] %>%
    dplyr::select(where(~ !all(. == 0))) %>%
    filter(rowSums(select(., where(is.numeric)) != 0) > 3) %>%
    column_to_rownames(var = "genome") %>%
    transform(., 'clr') %>%
    as.data.frame() %>%
    rownames_to_column(var="genome") %>% 
    rowwise() %>% 
    mutate(average = mean(c_across(where(is.numeric)), na.rm = TRUE)) %>%
    ungroup() %>% 
    select(genome,average) %>% 
    filter(genome %in%  V(graph_neg)$name) %>%
    arrange(match(genome,V(graph_neg)$name)) %>%
    mutate(size = scales::rescale(average, to = c(2, 10))) %>% 
    pull(size)

cluster_neg <- cluster_edge_betweenness(graph_neg)
communities_neg_TG2 <- split(V(graph_neg)$name, membership(cluster_neg) )%>%
  keep(~ length(.x) > 1)

#Network properties
network_positive_TG2 <- tibble(treatment="TG2",
                     density=graph.density(graph_pos),
                     modularity=modularity(cluster_walktrap(graph_pos)),
                     assortability=assortativity_degree(graph_pos),
                     connectivity=mean(components(graph_pos)$csize),
                     centrality=eigen_centrality(graph_pos)$value)

network_negative_TG2 <- tibble(treatment="TG2",
                     density=graph.density(graph_neg),
                     modularity=modularity(cluster_walktrap(graph_neg)),
                     assortability=assortativity_degree(graph_neg),
                     connectivity=mean(components(graph_neg)$csize),
                     centrality=eigen_centrality(graph_neg)$value)
corrplot(mag_cor, method = 'color', order = 'AOE')

plot(
  graph_pos,
  vertex.color = V(graph_pos)$color,
  vertex.size = V(graph_pos)$size,
  vertex.label = NA,
  vertex.frame.color = NA, 
  edge.width = 1,
  mark.groups = communities(cluster_pos) %>% keep(~ length(.x) >= 2),
  mark.col = "#f4f4f4",
  mark.border = NA,
  layout = layout_with_fr
)

plot(
  graph_neg,
  vertex.color = V(graph_neg)$color,
  vertex.size = V(graph_neg)$size,
  vertex.label = NA,
  vertex.frame.color = NA, 
  edge.width = 1,
  mark.groups = communities(cluster_neg) %>% keep(~ length(.x) >= 2),
  mark.col = "#f4f4f4",
  mark.border = NA,
  layout = layout_with_fr
)

6.2.3 TG3

sorted_genomes <- genome_tree$tip.label

sample_subset <- sample_metadata %>%
  filter(treatment=="TG2") %>%
  pull(sample)

mag_cor <- genome_counts_filt[,c("genome",sample_subset)] %>%
    dplyr::select(where(~ !all(. == 0))) %>%
    filter(rowSums(select(., where(is.numeric)) != 0) > 3) %>%
    column_to_rownames(var = "genome") %>%
    transform(., 'clr') %>%
    t() %>%
    cor(.,method="pearson")

threshold <- 0.7

#Positive correlations
mag_cor_pos <- ifelse(mag_cor > threshold, 1, 0)
graph_pos <- graph_from_adjacency_matrix(mag_cor_pos, mode = "undirected", diag = FALSE)

#Node color
V(graph_pos)$color <- tibble(order=genome_metadata$order %>% unique() %>% sort(),
                             color=order_colors) %>%
  right_join(genome_metadata,by="order")%>%
  filter(genome %in%  V(graph_pos)$name) %>%
  select(genome,color) %>%
  pull(color)

#Node size
V(graph_pos)$size <- genome_counts_filt[,c("genome",sample_subset)] %>%
    dplyr::select(where(~ !all(. == 0))) %>%
    filter(rowSums(select(., where(is.numeric)) != 0) > 3) %>%
    column_to_rownames(var = "genome") %>%
    transform(., 'clr') %>%
    as.data.frame() %>%
    rownames_to_column(var="genome") %>% 
    rowwise() %>% 
    mutate(average = mean(c_across(where(is.numeric)), na.rm = TRUE)) %>%
    ungroup() %>% 
    select(genome,average) %>% 
    filter(genome %in%  V(graph_pos)$name) %>%
    arrange(match(genome,V(graph_pos)$name)) %>%
    mutate(size = scales::rescale(average, to = c(2, 10))) %>% 
    pull(size)

cluster_pos <- cluster_edge_betweenness(graph_pos)

communities_pos_TG3 <- split(V(graph_pos)$name, membership(cluster_pos) )%>%
  keep(~ length(.x) > 1)

#Negative correlations
mag_cor_neg <- ifelse(mag_cor < -threshold, 1, 0)
graph_neg <- graph_from_adjacency_matrix(mag_cor_neg, mode = "undirected", diag = FALSE)

#Node color
V(graph_neg)$color <- tibble(order=genome_metadata$order %>% unique() %>% sort(),
                             color=order_colors) %>%
  right_join(genome_metadata,by="order")%>%
  filter(genome %in%  V(graph_neg)$name) %>%
  select(genome,color) %>%
  pull(color)

#Node size
V(graph_neg)$size <- genome_counts_filt[,c("genome",sample_subset)] %>%
    dplyr::select(where(~ !all(. == 0))) %>%
    filter(rowSums(select(., where(is.numeric)) != 0) > 3) %>%
    column_to_rownames(var = "genome") %>%
    transform(., 'clr') %>%
    as.data.frame() %>%
    rownames_to_column(var="genome") %>% 
    rowwise() %>% 
    mutate(average = mean(c_across(where(is.numeric)), na.rm = TRUE)) %>%
    ungroup() %>% 
    select(genome,average) %>% 
    filter(genome %in%  V(graph_neg)$name) %>%
    arrange(match(genome,V(graph_neg)$name)) %>%
    mutate(size = scales::rescale(average, to = c(2, 10))) %>% 
    pull(size)

cluster_neg <- cluster_edge_betweenness(graph_neg)
communities_neg_TG3 <- split(V(graph_neg)$name, membership(cluster_neg) )%>%
  keep(~ length(.x) > 1)

#Network properties
network_positive_TG3 <- tibble(treatment="TG3",
                     density=graph.density(graph_pos),
                     modularity=modularity(cluster_walktrap(graph_pos)),
                     assortability=assortativity_degree(graph_pos),
                     connectivity=mean(components(graph_pos)$csize),
                     centrality=eigen_centrality(graph_pos)$value)

network_negative_TG3 <- tibble(treatment="TG3",
                     density=graph.density(graph_neg),
                     modularity=modularity(cluster_walktrap(graph_neg)),
                     assortability=assortativity_degree(graph_neg),
                     connectivity=mean(components(graph_neg)$csize),
                     centrality=eigen_centrality(graph_neg)$value)
corrplot(mag_cor, method = 'color', order = 'AOE')

plot(
  graph_pos,
  vertex.color = V(graph_pos)$color,
  vertex.size = V(graph_pos)$size,
  vertex.label = NA,
  vertex.frame.color = NA, 
  edge.width = 1,
  mark.groups = communities(cluster_pos) %>% keep(~ length(.x) >= 2),
  mark.col = "#f4f4f4",
  mark.border = NA,
  layout = layout_with_fr
)

plot(
  graph_neg,
  vertex.color = V(graph_neg)$color,
  vertex.size = V(graph_neg)$size,
  vertex.label = NA,
  vertex.frame.color = NA, 
  edge.width = 1,
  mark.groups = communities(cluster_neg) %>% keep(~ length(.x) >= 2),
  mark.col = "#f4f4f4",
  mark.border = NA,
  layout = layout_with_fr
)

6.2.4 TG4

sorted_genomes <- genome_tree$tip.label

sample_subset <- sample_metadata %>%
  filter(treatment=="TG4") %>%
  pull(sample)

mag_cor <- genome_counts_filt[,c("genome",sample_subset)] %>%
    dplyr::select(where(~ !all(. == 0))) %>%
    filter(rowSums(select(., where(is.numeric)) != 0) > 3) %>%
    column_to_rownames(var = "genome") %>%
    transform(., 'clr') %>%
    t() %>%
    cor(.,method="pearson")

threshold <- 0.7

#Positive correlations
mag_cor_pos <- ifelse(mag_cor > threshold, 1, 0)
graph_pos <- graph_from_adjacency_matrix(mag_cor_pos, mode = "undirected", diag = FALSE)

#Node color
V(graph_pos)$color <- tibble(order=genome_metadata$order %>% unique() %>% sort(),
                             color=order_colors) %>%
  right_join(genome_metadata,by="order")%>%
  filter(genome %in%  V(graph_pos)$name) %>%
  select(genome,color) %>%
  pull(color)

#Node size
V(graph_pos)$size <- genome_counts_filt[,c("genome",sample_subset)] %>%
    dplyr::select(where(~ !all(. == 0))) %>%
    filter(rowSums(select(., where(is.numeric)) != 0) > 3) %>%
    column_to_rownames(var = "genome") %>%
    transform(., 'clr') %>%
    as.data.frame() %>%
    rownames_to_column(var="genome") %>% 
    rowwise() %>% 
    mutate(average = mean(c_across(where(is.numeric)), na.rm = TRUE)) %>%
    ungroup() %>% 
    select(genome,average) %>% 
    filter(genome %in%  V(graph_pos)$name) %>%
    arrange(match(genome,V(graph_pos)$name)) %>%
    mutate(size = scales::rescale(average, to = c(2, 10))) %>% 
    pull(size)

cluster_pos <- cluster_edge_betweenness(graph_pos)

communities_pos_TG4 <- split(V(graph_pos)$name, membership(cluster_pos) )%>%
  keep(~ length(.x) > 1)

#Negative correlations
mag_cor_neg <- ifelse(mag_cor < -threshold, 1, 0)
graph_neg <- graph_from_adjacency_matrix(mag_cor_neg, mode = "undirected", diag = FALSE)

#Node color
V(graph_neg)$color <- tibble(order=genome_metadata$order %>% unique() %>% sort(),
                             color=order_colors) %>%
  right_join(genome_metadata,by="order")%>%
  filter(genome %in%  V(graph_neg)$name) %>%
  select(genome,color) %>%
  pull(color)

#Node size
V(graph_neg)$size <- genome_counts_filt[,c("genome",sample_subset)] %>%
    dplyr::select(where(~ !all(. == 0))) %>%
    filter(rowSums(select(., where(is.numeric)) != 0) > 3) %>%
    column_to_rownames(var = "genome") %>%
    transform(., 'clr') %>%
    as.data.frame() %>%
    rownames_to_column(var="genome") %>% 
    rowwise() %>% 
    mutate(average = mean(c_across(where(is.numeric)), na.rm = TRUE)) %>%
    ungroup() %>% 
    select(genome,average) %>% 
    filter(genome %in%  V(graph_neg)$name) %>%
    arrange(match(genome,V(graph_neg)$name)) %>%
    mutate(size = scales::rescale(average, to = c(2, 10))) %>% 
    pull(size)

cluster_neg <- cluster_edge_betweenness(graph_neg)
communities_neg_TG4 <- split(V(graph_neg)$name, membership(cluster_neg) )%>%
  keep(~ length(.x) > 1)

#Network properties
network_positive_TG4 <- tibble(treatment="TG4",
                     density=graph.density(graph_pos),
                     modularity=modularity(cluster_walktrap(graph_pos)),
                     assortability=assortativity_degree(graph_pos),
                     connectivity=mean(components(graph_pos)$csize),
                     centrality=eigen_centrality(graph_pos)$value)

network_negative_TG4 <- tibble(treatment="TG4",
                     density=graph.density(graph_neg),
                     modularity=modularity(cluster_walktrap(graph_neg)),
                     assortability=assortativity_degree(graph_neg),
                     connectivity=mean(components(graph_neg)$csize),
                     centrality=eigen_centrality(graph_neg)$value)
corrplot(mag_cor, method = 'color', order = 'AOE')

plot(
  graph_pos,
  vertex.color = V(graph_pos)$color,
  vertex.size = V(graph_pos)$size,
  vertex.label = NA,
  vertex.frame.color = NA, 
  edge.width = 1,
  mark.groups = communities(cluster_pos) %>% keep(~ length(.x) >= 2),
  mark.col = "#f4f4f4",
  mark.border = NA,
  layout = layout_with_fr
)

plot(
  graph_neg,
  vertex.color = V(graph_neg)$color,
  vertex.size = V(graph_neg)$size,
  vertex.label = NA,
  vertex.frame.color = NA, 
  edge.width = 1,
  mark.groups = communities(cluster_neg) %>% keep(~ length(.x) >= 2),
  mark.col = "#f4f4f4",
  mark.border = NA,
  layout = layout_with_fr
)

6.2.5 TG5

sorted_genomes <- genome_tree$tip.label

sample_subset <- sample_metadata %>%
  filter(treatment=="TG5") %>%
  pull(sample)

mag_cor <- genome_counts_filt[,c("genome",sample_subset)] %>%
    dplyr::select(where(~ !all(. == 0))) %>%
    filter(rowSums(select(., where(is.numeric)) != 0) > 3) %>%
    column_to_rownames(var = "genome") %>%
    transform(., 'clr') %>%
    t() %>%
    cor(.,method="pearson")

threshold <- 0.7

#Positive correlations
mag_cor_pos <- ifelse(mag_cor > threshold, 1, 0)
graph_pos <- graph_from_adjacency_matrix(mag_cor_pos, mode = "undirected", diag = FALSE)

#Node color
V(graph_pos)$color <- tibble(order=genome_metadata$order %>% unique() %>% sort(),
                             color=order_colors) %>%
  right_join(genome_metadata,by="order")%>%
  filter(genome %in%  V(graph_pos)$name) %>%
  select(genome,color) %>%
  pull(color)

#Node size
V(graph_pos)$size <- genome_counts_filt[,c("genome",sample_subset)] %>%
    dplyr::select(where(~ !all(. == 0))) %>%
    filter(rowSums(select(., where(is.numeric)) != 0) > 3) %>%
    column_to_rownames(var = "genome") %>%
    transform(., 'clr') %>%
    as.data.frame() %>%
    rownames_to_column(var="genome") %>% 
    rowwise() %>% 
    mutate(average = mean(c_across(where(is.numeric)), na.rm = TRUE)) %>%
    ungroup() %>% 
    select(genome,average) %>% 
    filter(genome %in%  V(graph_pos)$name) %>%
    arrange(match(genome,V(graph_pos)$name)) %>%
    mutate(size = scales::rescale(average, to = c(2, 10))) %>% 
    pull(size)

cluster_pos <- cluster_edge_betweenness(graph_pos)
communities_pos_TG5 <- split(V(graph_pos)$name, membership(cluster_pos) )%>%
  keep(~ length(.x) > 1)

#Negative correlations
mag_cor_neg <- ifelse(mag_cor < -threshold, 1, 0)
graph_neg <- graph_from_adjacency_matrix(mag_cor_neg, mode = "undirected", diag = FALSE)

#Node color
V(graph_neg)$color <- tibble(order=genome_metadata$order %>% unique() %>% sort(),
                             color=order_colors) %>%
  right_join(genome_metadata,by="order")%>%
  filter(genome %in%  V(graph_neg)$name) %>%
  select(genome,color) %>%
  pull(color)

#Node size
V(graph_neg)$size <- genome_counts_filt[,c("genome",sample_subset)] %>%
    dplyr::select(where(~ !all(. == 0))) %>%
    filter(rowSums(select(., where(is.numeric)) != 0) > 3) %>%
    column_to_rownames(var = "genome") %>%
    transform(., 'clr') %>%
    as.data.frame() %>%
    rownames_to_column(var="genome") %>% 
    rowwise() %>% 
    mutate(average = mean(c_across(where(is.numeric)), na.rm = TRUE)) %>%
    ungroup() %>% 
    select(genome,average) %>% 
    filter(genome %in%  V(graph_neg)$name) %>%
    arrange(match(genome,V(graph_neg)$name)) %>%
    mutate(size = scales::rescale(average, to = c(2, 10))) %>% 
    pull(size)

cluster_neg <- cluster_edge_betweenness(graph_neg)
communities_neg_TG5 <- split(V(graph_neg)$name, membership(cluster_neg) )%>%
  keep(~ length(.x) > 1)

#Network properties
network_positive_TG5 <- tibble(treatment="TG5",
                     density=graph.density(graph_pos),
                     modularity=modularity(cluster_walktrap(graph_pos)),
                     assortability=assortativity_degree(graph_pos),
                     connectivity=mean(components(graph_pos)$csize),
                     centrality=eigen_centrality(graph_pos)$value)

network_negative_TG5 <- tibble(treatment="TG5",
                     density=graph.density(graph_neg),
                     modularity=modularity(cluster_walktrap(graph_neg)),
                     assortability=assortativity_degree(graph_neg),
                     connectivity=mean(components(graph_neg)$csize),
                     centrality=eigen_centrality(graph_neg)$value)
corrplot(mag_cor, method = 'color', order = 'AOE')

plot(
  graph_pos,
  vertex.color = V(graph_pos)$color,
  vertex.size = V(graph_pos)$size,
  vertex.label = NA,
  vertex.frame.color = NA, 
  edge.width = 1,
  mark.groups = communities(cluster_pos) %>% keep(~ length(.x) >= 2),
  mark.col = "#f4f4f4",
  mark.border = NA,
  layout = layout_with_fr
)

plot(
  graph_neg,
  vertex.color = V(graph_neg)$color,
  vertex.size = V(graph_neg)$size,
  vertex.label = NA,
  vertex.frame.color = NA, 
  edge.width = 1,
  mark.groups = communities(cluster_neg) %>% keep(~ length(.x) >= 2),
  mark.col = "#f4f4f4",
  mark.border = NA,
  layout = layout_with_fr
)

6.3 Combined

6.3.1 Network properties

How the nodes in the graph tend to cluster together, indicating how tightly correlated groups of genomes are.

bind_rows(network_positive_d7, 
          network_positive_d14, 
          network_positive_d21, 
          network_positive_d28, 
          network_positive_d35) %>% 
  tt()
day density modularity assortability connectivity centrality
day7 0.02898551 0.7108564 0.4534088 2.464286 5.356402
day14 0.01832907 0.8566793 0.1627001 1.916667 3.411426
day21 0.05297158 0.6595253 0.7433303 5.931034 25.187985
day28 0.04900662 0.7412937 0.7938659 6.291667 19.910469
day35 0.04781551 0.7829484 0.8335534 6.571429 14.669188
bind_rows(network_negative_d7, 
          network_negative_d14, 
          network_negative_d21, 
          network_negative_d28, 
          network_negative_d35) %>% 
  tt()
day density modularity assortability connectivity centrality
day7 0.02983802 0.6322449 0.4174550 3.285714 4.771890
day14 0.01193521 0.6817602 0.1482890 1.533333 3.046309
day21 0.01495988 0.6810744 -0.3395006 2.567164 8.203132
day28 0.01660044 0.6420750 -0.4033989 2.649123 7.023332
day35 0.02528298 0.6870766 -0.5396025 4.451613 7.077941
bind_rows(network_positive_TG1, 
          network_positive_TG2, 
          network_positive_TG3, 
          network_positive_TG4, 
          network_positive_TG5)%>% 
  tt()
treatment density modularity assortability connectivity centrality
TG1 0.10008591 0.5870043 0.5231654 4.041667 19.215842
TG2 0.04949608 0.5685080 0.4027239 3.800000 11.561955
TG3 0.04949608 0.5685080 0.4027239 3.800000 11.561955
TG4 0.06074561 0.6194333 0.5257698 3.692308 14.593375
TG5 0.05030644 0.7195754 0.4255001 3.869565 9.706974
bind_rows(network_negative_TG1, 
          network_negative_TG2, 
          network_negative_TG3, 
          network_negative_TG4, 
          network_negative_TG5) %>% 
  tt()
treatment density modularity assortability connectivity centrality
TG1 0.08075601 0.04194135 0.10293399 2.552632 17.171754
TG2 0.03852184 0.11959167 -0.18242183 1.826923 11.222618
TG3 0.03852184 0.11959167 -0.18242183 1.826923 11.222618
TG4 0.03399123 0.20002081 0.20107116 2.042553 10.008741
TG5 0.03472932 0.50981293 0.03896628 2.282051 7.511184
bind_rows(network_positive_d7, 
          network_positive_d14, 
          network_positive_d21, 
          network_positive_d28, 
          network_positive_d35) %>% 
  mutate(day=factor(day,levels=c("day7","day14","day21","day28","day35"))) %>%
  pivot_longer(!day, names_to="metric", values_to = "value") %>% 
  filter(metric != "assortability") %>% 
  ggplot(aes(x=day, y=value)) + 
    geom_col() +
    facet_wrap(. ~ metric, scales="free", nrow = 1) +
    theme_classic()

bind_rows(network_negative_d7, 
          network_negative_d14, 
          network_negative_d21, 
          network_negative_d28, 
          network_negative_d35) %>% 
  mutate(day=factor(day,levels=c("day7","day14","day21","day28","day35"))) %>%
  pivot_longer(!day, names_to="metric", values_to = "value") %>% 
  filter(metric != "assortability") %>%  
  ggplot(aes(x=day, y=value)) + 
    geom_col() +
    facet_wrap(. ~ metric, scales="free", nrow = 1) +
    theme_classic()

bind_rows(network_positive_TG1, 
          network_positive_TG2, 
          network_positive_TG3, 
          network_positive_TG4, 
          network_positive_TG5) %>% 
  mutate(treatment=factor(treatment,levels=c("TG1","TG2","TG3","TG4","TG5"))) %>%
  pivot_longer(!treatment, names_to="metric", values_to = "value") %>% 
  filter(metric != "assortability") %>%  
  ggplot(aes(x=treatment, y=value)) + 
    geom_col() +
    facet_wrap(. ~ metric, scales="free", nrow = 1) +
    theme_classic()

bind_rows(network_negative_TG1, 
          network_negative_TG2, 
          network_negative_TG3, 
          network_negative_TG4, 
          network_negative_TG5) %>% 
  mutate(treatment=factor(treatment,levels=c("TG1","TG2","TG3","TG4","TG5"))) %>%
  pivot_longer(!treatment, names_to="metric", values_to = "value") %>% 
  filter(metric != "assortability") %>%  
  ggplot(aes(x=treatment, y=value)) + 
    geom_col() +
    facet_wrap(. ~ metric, scales="free", nrow = 1) +
    theme_classic()

6.3.2 Positive correlations

6.3.2.1 Clusters per day

day_clusters_positive <- c(communities_pos_7, 
                  communities_pos_14, 
                  communities_pos_21, 
                  communities_pos_28, 
                  communities_pos_35) %>%
  purrr::map(~ combn(.x, 2, simplify = FALSE)) %>%
  unlist(recursive = FALSE) %>% 
  purrr::map(~ sort(.x))

# Calculate functional distances
day_clusters_positive <- table(map_chr(day_clusters_positive, ~ paste(.x, collapse = " - "))) %>% 
  as.data.frame(., stringsAsFactors = FALSE) %>%
  rename(genomes=1,count=2) %>%
  mutate(distance = map_dbl(genomes, function(pair) {
    genomes <- str_split(pair, " - ", simplify = TRUE)
    dist_value <- stats::dist(genome_gifts[genomes, ], method = "manhattan") / ncol(genome_gifts[genomes, ])
    return(as.numeric(dist_value))
  })) %>% 
  arrange(-count)

# Print top pairs
day_clusters_positive %>% 
  filter(count>3) %>% 
  tt()
genomes count distance
D300452:bin_000016 - GPB:bin_000208 5 0.07025397
D300452:bin_000016 - GPB:bin_000009 4 0.31057143
D300452:bin_000016 - GPB:bin_000021 4 0.10955556
D300472:bin_000003 - TG1:bin_000014 4 0.13850794
D300472:bin_000003 - TG2_14:bin_000017 4 0.11273016
D300481:bin_000002 - GPB:bin_000021 4 0.09898413
D300482:bin_000009 - TG2:bin_000003 4 0.17666667
D300482:bin_000009 - TG3:bin_000068 4 0.09482540
GPB:bin_000009 - GPB:bin_000021 4 0.29600000
GPB:bin_000009 - GPB:bin_000208 4 0.31917460
GPB:bin_000010 - GPB:bin_000140 4 0.27104762
GPB:bin_000021 - GPB:bin_000208 4 0.10939683
GPB:bin_000127 - TG2_07:bin_000029 4 0.11314286
GPB:bin_000127 - TG2_28:bin_000031 4 0.13279365
TG1:bin_000014 - TG2_14:bin_000017 4 0.12203175
TG2_07:bin_000029 - TG2_28:bin_000031 4 0.13063492
TG2:bin_000003 - TG3:bin_000068 4 0.14641270

6.3.2.2 Clusters per treatment

treatment_clusters_positive <- c(communities_pos_TG1, 
                        communities_pos_TG2, 
                        communities_pos_TG3, 
                        communities_pos_TG4, 
                        communities_pos_TG5) %>%
  purrr::map(~ combn(.x, 2, simplify = FALSE)) %>%
  unlist(recursive = FALSE) %>% 
  purrr::map(~ sort(.x))

# Calculate functional distances
treatment_clusters_positive <- table(map_chr(treatment_clusters_positive, ~ paste(.x, collapse = " - "))) %>% 
  as.data.frame(., stringsAsFactors = FALSE) %>%
  rename(genomes=1,count=2) %>%
  mutate(distance = map_dbl(genomes, function(pair) {
    genomes <- str_split(pair, " - ", simplify = TRUE)
    dist_value <- stats::dist(genome_gifts[genomes, ], method = "manhattan") / ncol(genome_gifts[genomes, ])
    return(as.numeric(dist_value))
  })) %>% 
  arrange(-count)

# Print top pairs
treatment_clusters_positive %>% 
  filter(count>3) %>% 
  tt()
genomes count distance
D300452:bin_000016 - GPB:bin_000140 5 0.12161905
GPB:bin_000015 - GPB:bin_000034 5 0.02752381
GPB:bin_000143 - TG5_28:bin_000003 5 0.09393651
D300444:bin_000010 - GPB:bin_000009 4 0.29526984
D300509:bin_000001 - GPB:bin_000194 4 0.12076190
GPB:bin_000002 - GPB:bin_000009 4 0.16752381
GPB:bin_000059 - GPB:bin_000143 4 0.13358730
GPB:bin_000059 - TG5_28:bin_000003 4 0.11190476

6.3.2.3 Cluster combined

all_clusters_positive <- c(communities_pos_7, 
                        communities_pos_14, 
                        communities_pos_21, 
                        communities_pos_28, 
                        communities_pos_35,
                        communities_pos_TG1, 
                        communities_pos_TG2, 
                        communities_pos_TG3, 
                        communities_pos_TG4, 
                        communities_pos_TG5) %>%
  purrr::map(~ combn(.x, 2, simplify = FALSE)) %>%
  unlist(recursive = FALSE) %>% 
  purrr::map(~ sort(.x))

# Calculate functional distances
all_clusters_positive <- table(map_chr(all_clusters_positive, ~ paste(.x, collapse = " - "))) %>% 
  as.data.frame(., stringsAsFactors = FALSE) %>%
  rename(genomes=1,count=2) %>%
  mutate(distance = map_dbl(genomes, function(pair) {
    genomes <- str_split(pair, " - ", simplify = TRUE)
    dist_value <- stats::dist(genome_gifts[genomes, ], method = "manhattan") / ncol(genome_gifts[genomes, ])
    return(as.numeric(dist_value))
  })) %>% 
  arrange(-count)

# Print top pairs
all_clusters_positive %>% 
  filter(count>5) %>% 
  tt()
genomes count distance
D300452:bin_000016 - GPB:bin_000140 8 0.12161905
D300452:bin_000016 - GPB:bin_000208 8 0.07025397
GPB:bin_000015 - GPB:bin_000034 8 0.02752381
D300452:bin_000016 - GPB:bin_000009 7 0.31057143
D300472:bin_000003 - TG2_14:bin_000017 7 0.11273016
D300481:bin_000002 - GPB:bin_000021 7 0.09898413
GPB:bin_000009 - GPB:bin_000021 7 0.29600000
GPB:bin_000009 - GPB:bin_000208 7 0.31917460
GPB:bin_000010 - GPB:bin_000140 7 0.27104762
D300435:bin_000001 - D300452:bin_000016 6 0.12930159
D300435:bin_000001 - GPB:bin_000208 6 0.14438095
D300444:bin_000010 - GPB:bin_000009 6 0.29526984
D300452:bin_000016 - GPB:bin_000010 6 0.28257143
D300482:bin_000009 - TG2:bin_000003 6 0.17666667
D300482:bin_000009 - TG3:bin_000068 6 0.09482540
D300500:bin_000011 - TG1:bin_000014 6 0.10250794
D300509:bin_000001 - GPB:bin_000194 6 0.12076190
GPB:bin_000002 - GPB:bin_000009 6 0.16752381
GPB:bin_000009 - GPB:bin_000010 6 0.11092063
GPB:bin_000009 - GPB:bin_000140 6 0.30844444
GPB:bin_000010 - GPB:bin_000208 6 0.29155556
GPB:bin_000059 - GPB:bin_000124 6 0.11126984
GPB:bin_000059 - GPB:bin_000131 6 0.13409524
GPB:bin_000123 - GPB:bin_000175 6 0.14961905
GPB:bin_000140 - GPB:bin_000208 6 0.14742857
GPB:bin_000143 - TG5_28:bin_000003 6 0.09393651
GPB:bin_000194 - TG1:bin_000014 6 0.12644444
TG2:bin_000003 - TG3:bin_000068 6 0.14641270
day_clusters_positive_filtered <- day_clusters_positive %>% 
  filter(count>3) %>%
  rename(count_day=count) %>% 
  mutate(clustering_day=TRUE)

treatment_clusters_positive_filtered <- treatment_clusters_positive %>% 
  filter(count>3) %>%
  rename(count_treatment=count) %>% 
  mutate(clustering_treatment=TRUE)

all_clusters_positive_filtered <- all_clusters_positive %>% 
  filter(count > 5) %>%
  rename(count_all=count) %>% 
  mutate(clustering_all=TRUE)

associated_pairs_positive <- all_clusters_positive_filtered %>% 
  full_join(day_clusters_positive_filtered, by=join_by("genomes"=="genomes","distance"=="distance")) %>% 
  full_join(treatment_clusters_positive_filtered, by=join_by("genomes"=="genomes","distance"=="distance")) %>%
  rowwise() %>% 
  mutate(count=max(count_all,count_day,count_treatment, na.rm=TRUE)) %>%
  mutate(clustering = case_when(
    clustering_all == TRUE ~ "all",  # If clustering_all is TRUE
    clustering_day == TRUE & is.na(clustering_all) & is.na(clustering_treatment) ~ "day", 
    clustering_treatment == TRUE & is.na(clustering_all) & is.na(clustering_day) ~ "treatment")) %>%
  select(genomes,count,clustering,distance) %>%
  mutate(clustering=factor(clustering)) %>%
  #add coordinates
  separate(genomes, into = c("genome1", "genome2"), sep = " - ") %>%
  left_join(gift_pcoa_vectors %>% rownames_to_column(var = "genome"), by = c("genome1" = "genome")) %>%
  rename(x1 = Axis.1, y1 = Axis.2) %>%
  left_join(gift_pcoa_vectors %>% rownames_to_column(var = "genome"), by = c("genome2" = "genome")) %>%
  rename(x2 = Axis.1, y2 = Axis.2)

associated_pairs_positive %>% 
  select(genome1,genome2,count,clustering) %>% 
  left_join(genome_metadata %>% select(genome,order,species),by=join_by(genome1==genome)) %>% 
  left_join(genome_metadata %>% select(genome,order,species),by=join_by(genome2==genome)) %>% 
  tt()
genome1 genome2 count clustering order.x species.x order.y species.y
D300452:bin_000016 GPB:bin_000140 8 all o__Lachnospirales s__Eisenbergiella sp904392525 o__Oscillospirales s__Flavonifractor plautii
D300452:bin_000016 GPB:bin_000208 8 all o__Lachnospirales s__Eisenbergiella sp904392525 o__Lachnospirales s__Eisenbergiella intestinigallinarum
GPB:bin_000015 GPB:bin_000034 8 all o__Oscillospirales s__Lawsonibacter pullicola o__Oscillospirales s__Lawsonibacter sp900545895
D300452:bin_000016 GPB:bin_000009 7 all o__Lachnospirales s__Eisenbergiella sp904392525 o__Enterobacterales s__Klebsiella pneumoniae
D300472:bin_000003 TG2_14:bin_000017 7 all o__Christensenellales s__Borkfalkia sp904373255 o__Oscillospirales s__Heritagella sp905215105
D300481:bin_000002 GPB:bin_000021 7 all o__Oscillospirales s__Merdivicinus intestinigallinarum o__Oscillospirales s__Anaeromassilibacillus stercoravium
GPB:bin_000009 GPB:bin_000021 7 all o__Enterobacterales s__Klebsiella pneumoniae o__Oscillospirales s__Anaeromassilibacillus stercoravium
GPB:bin_000009 GPB:bin_000208 7 all o__Enterobacterales s__Klebsiella pneumoniae o__Lachnospirales s__Eisenbergiella intestinigallinarum
GPB:bin_000010 GPB:bin_000140 7 all o__Enterobacterales s__Escherichia coli o__Oscillospirales s__Flavonifractor plautii
D300435:bin_000001 D300452:bin_000016 6 all o__Lachnospirales s__Anaerotignum merdipullorum o__Lachnospirales s__Eisenbergiella sp904392525
D300435:bin_000001 GPB:bin_000208 6 all o__Lachnospirales s__Anaerotignum merdipullorum o__Lachnospirales s__Eisenbergiella intestinigallinarum
D300444:bin_000010 GPB:bin_000009 6 all o__Lachnospirales s__Enterocloster excrementigallinarum o__Enterobacterales s__Klebsiella pneumoniae
D300452:bin_000016 GPB:bin_000010 6 all o__Lachnospirales s__Eisenbergiella sp904392525 o__Enterobacterales s__Escherichia coli
D300482:bin_000009 TG2:bin_000003 6 all o__Lachnospirales s__Mediterraneibacter sp900761655 o__Lactobacillales s__Limosilactobacillus reuteri_E
D300482:bin_000009 TG3:bin_000068 6 all o__Lachnospirales s__Mediterraneibacter sp900761655 o__Lachnospirales s__An181 sp002160325
D300500:bin_000011 TG1:bin_000014 6 all o__Oscillospirales s__Gemmiger stercoravium o__Erysipelotrichales s__Merdibacter merdigallinarum
D300509:bin_000001 GPB:bin_000194 6 all o__Erysipelotrichales s__Massilimicrobiota merdigallinarum o__Lachnospirales s__Lachnoclostridium_B sp002160985
GPB:bin_000002 GPB:bin_000009 6 all o__Enterobacterales s__Proteus mirabilis o__Enterobacterales s__Klebsiella pneumoniae
GPB:bin_000009 GPB:bin_000010 6 all o__Enterobacterales s__Klebsiella pneumoniae o__Enterobacterales s__Escherichia coli
GPB:bin_000009 GPB:bin_000140 6 all o__Enterobacterales s__Klebsiella pneumoniae o__Oscillospirales s__Flavonifractor plautii
GPB:bin_000010 GPB:bin_000208 6 all o__Enterobacterales s__Escherichia coli o__Lachnospirales s__Eisenbergiella intestinigallinarum
GPB:bin_000059 GPB:bin_000124 6 all o__Oscillospirales s__Dysosmobacter avistercoris o__Lachnospirales s__Copromonas faecavium
GPB:bin_000059 GPB:bin_000131 6 all o__Oscillospirales s__Dysosmobacter avistercoris o__Christensenellales s__Borkfalkia excrementigallinarum
GPB:bin_000123 GPB:bin_000175 6 all o__Oscillospirales s__Faecousia gallistercoris o__Lachnospirales s__Mediterraneibacter intestinavium
GPB:bin_000140 GPB:bin_000208 6 all o__Oscillospirales s__Flavonifractor plautii o__Lachnospirales s__Eisenbergiella intestinigallinarum
GPB:bin_000143 TG5_28:bin_000003 6 all o__Lachnospirales s__Blautia ornithocaccae o__Erysipelotrichales s__Thomasclavelia spiroformis
GPB:bin_000194 TG1:bin_000014 6 all o__Lachnospirales s__Lachnoclostridium_B sp002160985 o__Erysipelotrichales s__Merdibacter merdigallinarum
TG2:bin_000003 TG3:bin_000068 6 all o__Lactobacillales s__Limosilactobacillus reuteri_E o__Lachnospirales s__An181 sp002160325
D300452:bin_000016 GPB:bin_000021 4 day o__Lachnospirales s__Eisenbergiella sp904392525 o__Oscillospirales s__Anaeromassilibacillus stercoravium
D300472:bin_000003 TG1:bin_000014 4 day o__Christensenellales s__Borkfalkia sp904373255 o__Erysipelotrichales s__Merdibacter merdigallinarum
GPB:bin_000021 GPB:bin_000208 4 day o__Oscillospirales s__Anaeromassilibacillus stercoravium o__Lachnospirales s__Eisenbergiella intestinigallinarum
GPB:bin_000127 TG2_07:bin_000029 4 day o__Oscillospirales s__Onthovicinus excrementipullorum o__Lachnospirales s__Mediterraneibacter vanvlietii
GPB:bin_000127 TG2_28:bin_000031 4 day o__Oscillospirales s__Onthovicinus excrementipullorum o__Lachnospirales s__Anaerostipes butyraticus
TG1:bin_000014 TG2_14:bin_000017 4 day o__Erysipelotrichales s__Merdibacter merdigallinarum o__Oscillospirales s__Heritagella sp905215105
TG2_07:bin_000029 TG2_28:bin_000031 4 day o__Lachnospirales s__Mediterraneibacter vanvlietii o__Lachnospirales s__Anaerostipes butyraticus
GPB:bin_000059 GPB:bin_000143 4 treatment o__Oscillospirales s__Dysosmobacter avistercoris o__Lachnospirales s__Blautia ornithocaccae
GPB:bin_000059 TG5_28:bin_000003 4 treatment o__Oscillospirales s__Dysosmobacter avistercoris o__Erysipelotrichales s__Thomasclavelia spiroformis

6.3.3 Negative correlations

6.3.3.1 Clusters per day

day_clusters_negative <- c(communities_neg_7, 
                  communities_neg_14, 
                  communities_neg_21, 
                  communities_neg_28, 
                  communities_neg_35) %>%
  purrr::map(~ combn(.x, 2, simplify = FALSE)) %>%
  unlist(recursive = FALSE) %>% 
  purrr::map(~ sort(.x))

# Calculate functional distances
day_clusters_negative <- table(map_chr(day_clusters_negative, ~ paste(.x, collapse = " - "))) %>% 
  as.data.frame(., stringsAsFactors = FALSE) %>%
  rename(genomes=1,count=2) %>%
  mutate(distance = map_dbl(genomes, function(pair) {
    genomes <- str_split(pair, " - ", simplify = TRUE)
    dist_value <- stats::dist(genome_gifts[genomes, ], method = "manhattan") / ncol(genome_gifts[genomes, ])
    return(as.numeric(dist_value))
  })) %>% 
  arrange(-count)

# Print top pairs
day_clusters_negative %>% 
  filter(count>3) %>% 
  tt()
genomes count distance
D300482:bin_000009 - GPB:bin_000053 4 0.06346032
D300482:bin_000009 - TG2:bin_000003 4 0.17666667
GPB:bin_000025 - GPB:bin_000146 4 0.14895238
GPB:bin_000053 - GPB:bin_000092 4 0.07273016
GPB:bin_000053 - TG2:bin_000003 4 0.15295238
GPB:bin_000124 - GPB:bin_000181 4 0.12419048

6.3.3.2 Clusters per treatment

treatment_clusters_negative <- c(communities_neg_TG1, 
                        communities_neg_TG2, 
                        communities_neg_TG3, 
                        communities_neg_TG4, 
                        communities_neg_TG5) %>%
  purrr::map(~ combn(.x, 2, simplify = FALSE)) %>%
  unlist(recursive = FALSE) %>% 
  purrr::map(~ sort(.x))

# Calculate functional distances
treatment_clusters_negative <- table(map_chr(treatment_clusters_negative, ~ paste(.x, collapse = " - "))) %>% 
  as.data.frame(., stringsAsFactors = FALSE) %>%
  rename(genomes=1,count=2) %>%
  mutate(distance = map_dbl(genomes, function(pair) {
    genomes <- str_split(pair, " - ", simplify = TRUE)
    dist_value <- stats::dist(genome_gifts[genomes, ], method = "manhattan") / ncol(genome_gifts[genomes, ])
    return(as.numeric(dist_value))
  })) %>% 
  arrange(-count)

# Print top pairs
treatment_clusters_negative %>% 
  filter(count>3) %>% 
  tt()
genomes count distance
GPB:bin_000002 - GPB:bin_000054 5 0.22371429
GPB:bin_000208 - TG5_28:bin_000003 5 0.11015873
D300481:bin_000002 - GPB:bin_000124 4 0.13396825
D300509:bin_000001 - GPB:bin_000124 4 0.10571429
D300509:bin_000001 - GPB:bin_000208 4 0.10692063
D300509:bin_000001 - TG5_28:bin_000003 4 0.05168254
D300511:bin_000002 - GPB:bin_000197 4 0.14053968
GPB:bin_000002 - TG5_28:bin_000004 4 0.34053968
GPB:bin_000054 - TG5_28:bin_000004 4 0.19911111
GPB:bin_000059 - GPB:bin_000130 4 0.17339683
GPB:bin_000059 - GPB:bin_000197 4 0.09558730
GPB:bin_000123 - GPB:bin_000208 4 0.13295238
GPB:bin_000123 - TG5_28:bin_000003 4 0.12996825
GPB:bin_000124 - GPB:bin_000208 4 0.11561905
GPB:bin_000124 - TG5_28:bin_000003 4 0.10698413
GPB:bin_000130 - GPB:bin_000197 4 0.16625397
GPB:bin_000194 - GPB:bin_000208 4 0.12336508
GPB:bin_000194 - TG5_28:bin_000003 4 0.10946032

6.3.3.3 Cluster combined

all_clusters_negative <- c(communities_neg_7, 
                        communities_neg_14, 
                        communities_neg_21, 
                        communities_neg_28, 
                        communities_neg_35,
                        communities_neg_TG1, 
                        communities_neg_TG2, 
                        communities_neg_TG3, 
                        communities_neg_TG4, 
                        communities_neg_TG5) %>%
  purrr::map(~ combn(.x, 2, simplify = FALSE)) %>%
  unlist(recursive = FALSE) %>% 
  purrr::map(~ sort(.x))

# Calculate functional distances
all_clusters_negative <- table(map_chr(all_clusters_negative, ~ paste(.x, collapse = " - "))) %>% 
  as.data.frame(., stringsAsFactors = FALSE) %>%
  rename(genomes=1,count=2) %>%
  mutate(distance = map_dbl(genomes, function(pair) {
    genomes <- str_split(pair, " - ", simplify = TRUE)
    dist_value <- stats::dist(genome_gifts[genomes, ], method = "manhattan") / ncol(genome_gifts[genomes, ])
    return(as.numeric(dist_value))
  })) %>% 
  arrange(-count)

# Print top pairs
all_clusters_negative %>% 
  filter(count>5) %>% 
  tt()
genomes count distance
GPB:bin_000059 - GPB:bin_000130 7 0.17339683
D300452:bin_000016 - GPB:bin_000208 6 0.07025397
D300482:bin_000009 - GPB:bin_000053 6 0.06346032
D300482:bin_000009 - TG2:bin_000003 6 0.17666667
GPB:bin_000004 - GPB:bin_000031 6 0.08136508
GPB:bin_000004 - TG3_35:bin_000033 6 0.07961905
GPB:bin_000053 - GPB:bin_000092 6 0.07273016
GPB:bin_000053 - TG2:bin_000003 6 0.15295238
GPB:bin_000059 - GPB:bin_000124 6 0.11126984
GPB:bin_000124 - GPB:bin_000181 6 0.12419048
GPB:bin_000208 - TG5_28:bin_000003 6 0.11015873
day_clusters_negative_filtered <- day_clusters_negative %>% 
  filter(count>3) %>%
  rename(count_day=count) %>% 
  mutate(clustering_day=TRUE)

treatment_clusters_negative_filtered <- treatment_clusters_negative %>% 
  filter(count>3) %>%
  rename(count_treatment=count) %>% 
  mutate(clustering_treatment=TRUE)

all_clusters_negative_filtered <- all_clusters_negative %>% 
  filter(count > 5) %>%
  rename(count_all=count) %>% 
  mutate(clustering_all=TRUE)

associated_pairs_negative <- all_clusters_negative_filtered %>% 
  full_join(day_clusters_negative_filtered, by=join_by("genomes"=="genomes","distance"=="distance")) %>% 
  full_join(treatment_clusters_negative_filtered, by=join_by("genomes"=="genomes","distance"=="distance")) %>%
  rowwise() %>% 
  mutate(count=max(count_all,count_day,count_treatment, na.rm=TRUE)) %>%
  mutate(clustering = case_when(
    clustering_all == TRUE ~ "all",  # If clustering_all is TRUE
    clustering_day == TRUE & is.na(clustering_all) & is.na(clustering_treatment) ~ "day", 
    clustering_treatment == TRUE & is.na(clustering_all) & is.na(clustering_day) ~ "treatment")) %>%
  select(genomes,count,clustering,distance) %>%
  mutate(clustering=factor(clustering)) %>%
  #add coordinates
  separate(genomes, into = c("genome1", "genome2"), sep = " - ") %>%
  left_join(gift_pcoa_vectors %>% rownames_to_column(var = "genome"), by = c("genome1" = "genome")) %>%
  rename(x1 = Axis.1, y1 = Axis.2) %>%
  left_join(gift_pcoa_vectors %>% rownames_to_column(var = "genome"), by = c("genome2" = "genome")) %>%
  rename(x2 = Axis.1, y2 = Axis.2)

associated_pairs_negative %>% 
  select(genome1,genome2,count,clustering) %>% 
  left_join(genome_metadata %>% select(genome,order,species),by=join_by(genome1==genome)) %>% 
  left_join(genome_metadata %>% select(genome,order,species),by=join_by(genome2==genome)) %>% 
  tt()
genome1 genome2 count clustering order.x species.x order.y species.y
GPB:bin_000059 GPB:bin_000130 7 all o__Oscillospirales s__Dysosmobacter avistercoris o__Oscillospirales s__Negativibacillus faecipullorum
D300452:bin_000016 GPB:bin_000208 6 all o__Lachnospirales s__Eisenbergiella sp904392525 o__Lachnospirales s__Eisenbergiella intestinigallinarum
D300482:bin_000009 GPB:bin_000053 6 all o__Lachnospirales s__Mediterraneibacter sp900761655 o__Lachnospirales s__Mediterraneibacter excrementipullorum
D300482:bin_000009 TG2:bin_000003 6 all o__Lachnospirales s__Mediterraneibacter sp900761655 o__Lactobacillales s__Limosilactobacillus reuteri_E
GPB:bin_000004 GPB:bin_000031 6 all o__Oscillospirales s__Acutalibacter stercoravium o__Lachnospirales s__Mediterraneibacter faecigallinarum
GPB:bin_000004 TG3_35:bin_000033 6 all o__Oscillospirales s__Acutalibacter stercoravium o__Lachnospirales s__Hungatella_B pullicola
GPB:bin_000053 GPB:bin_000092 6 all o__Lachnospirales s__Mediterraneibacter excrementipullorum o__Lachnospirales s__Choladocola avistercoris
GPB:bin_000053 TG2:bin_000003 6 all o__Lachnospirales s__Mediterraneibacter excrementipullorum o__Lactobacillales s__Limosilactobacillus reuteri_E
GPB:bin_000059 GPB:bin_000124 6 all o__Oscillospirales s__Dysosmobacter avistercoris o__Lachnospirales s__Copromonas faecavium
GPB:bin_000124 GPB:bin_000181 6 all o__Lachnospirales s__Copromonas faecavium o__Oscillospirales s__Intestinimonas pullistercoris
GPB:bin_000208 TG5_28:bin_000003 6 all o__Lachnospirales s__Eisenbergiella intestinigallinarum o__Erysipelotrichales s__Thomasclavelia spiroformis
GPB:bin_000025 GPB:bin_000146 4 day o__Lactobacillales s__Ligilactobacillus salivarius o__Lachnospirales s__Clostridium_Q saccharolyticum_A
GPB:bin_000002 GPB:bin_000054 5 treatment o__Enterobacterales s__Proteus mirabilis o__Lachnospirales s__Merdisoma faecalis
D300481:bin_000002 GPB:bin_000124 4 treatment o__Oscillospirales s__Merdivicinus intestinigallinarum o__Lachnospirales s__Copromonas faecavium
D300509:bin_000001 GPB:bin_000124 4 treatment o__Erysipelotrichales s__Massilimicrobiota merdigallinarum o__Lachnospirales s__Copromonas faecavium
D300509:bin_000001 GPB:bin_000208 4 treatment o__Erysipelotrichales s__Massilimicrobiota merdigallinarum o__Lachnospirales s__Eisenbergiella intestinigallinarum
D300509:bin_000001 TG5_28:bin_000003 4 treatment o__Erysipelotrichales s__Massilimicrobiota merdigallinarum o__Erysipelotrichales s__Thomasclavelia spiroformis
D300511:bin_000002 GPB:bin_000197 4 treatment o__Lachnospirales s__Eisenbergiella merdigallinarum o__Oscillospirales s__Flavonifractor avistercoris
GPB:bin_000002 TG5_28:bin_000004 4 treatment o__Enterobacterales s__Proteus mirabilis o__Lactobacillales s__Lactobacillus johnsonii
GPB:bin_000054 TG5_28:bin_000004 4 treatment o__Lachnospirales s__Merdisoma faecalis o__Lactobacillales s__Lactobacillus johnsonii
GPB:bin_000059 GPB:bin_000197 4 treatment o__Oscillospirales s__Dysosmobacter avistercoris o__Oscillospirales s__Flavonifractor avistercoris
GPB:bin_000123 GPB:bin_000208 4 treatment o__Oscillospirales s__Faecousia gallistercoris o__Lachnospirales s__Eisenbergiella intestinigallinarum
GPB:bin_000123 TG5_28:bin_000003 4 treatment o__Oscillospirales s__Faecousia gallistercoris o__Erysipelotrichales s__Thomasclavelia spiroformis
GPB:bin_000124 GPB:bin_000208 4 treatment o__Lachnospirales s__Copromonas faecavium o__Lachnospirales s__Eisenbergiella intestinigallinarum
GPB:bin_000124 TG5_28:bin_000003 4 treatment o__Lachnospirales s__Copromonas faecavium o__Erysipelotrichales s__Thomasclavelia spiroformis
GPB:bin_000130 GPB:bin_000197 4 treatment o__Oscillospirales s__Negativibacillus faecipullorum o__Oscillospirales s__Flavonifractor avistercoris
GPB:bin_000194 GPB:bin_000208 4 treatment o__Lachnospirales s__Lachnoclostridium_B sp002160985 o__Lachnospirales s__Eisenbergiella intestinigallinarum
GPB:bin_000194 TG5_28:bin_000003 4 treatment o__Lachnospirales s__Lachnoclostridium_B sp002160985 o__Erysipelotrichales s__Thomasclavelia spiroformis

6.3.4 Association vs. functional distance

weighted_sd <- function(x, w) {
  weighted_mean <- sum(w * x) / sum(w)
  variance <- sum(w * (x - weighted_mean)^2) / sum(w)
  sqrt(variance)
}

# Summary statistics
distance_stats <- bind_rows(associated_pairs_positive %>% mutate(type="positive"),
          associated_pairs_negative %>% mutate(type="negative")) %>% 
          left_join(genome_metadata %>% select(genome,genus),by=join_by(genome1==genome)) %>% 
          left_join(genome_metadata %>% select(genome,genus),by=join_by(genome2==genome)) %>%
          filter(genus.x!=genus.y) %>% #remove relationships with same genus due to crossmaping error probability
          group_by(type) %>% 
          summarise(weighted_mean = sum(distance * count) / sum(count),
                    weighed_sd = weighted_sd(distance, count))

distance_stats %>% 
  tt()
type weighted_mean weighed_sd
negative 0.1340044 0.05442698
positive 0.1702117 0.07771069
# Permutation test
weighted_mean <- function(x, w) {
  sum(x * w) / sum(w)
}

permutation_test <- function(data, n_permutations = 1000) {
  original_diff <- abs(weighted_mean(data$distance[data$type == "positive"], data$count[data$type == "positive"]) -
                         weighted_mean(data$distance[data$type == "negative"], data$count[data$type == "negative"]))
  
  perm_diff <- replicate(n_permutations, {
    shuffled_type <- sample(data$type)
    abs(weighted_mean(data$distance[shuffled_type == "positive"], data$count[shuffled_type == "positive"]) -
          weighted_mean(data$distance[shuffled_type == "negative"], data$count[shuffled_type == "negative"]))
  })
  
  p_value <- mean(perm_diff >= original_diff)
  return(p_value)
}

permutation_test(bind_rows(associated_pairs_positive %>% mutate(type="positive"),
                                      associated_pairs_negative %>% mutate(type="negative")), 
                n_permutations = 10000)
[1] 0.0967
# Calculate the weighted mean distance for the "positive" group
weighted_mean_positive <- sum(associated_pairs_positive$distance * associated_pairs_positive$count) / sum(associated_pairs_positive$count)

# Calculate the mean of all distances from the functional distance matrix
all_distances <- functional_distances[upper.tri(functional_distances, diag = FALSE)]
all_distances <- all_distances[!is.na(all_distances)]
sampled_distances <- sample(all_distances, size = 37, replace = FALSE)
mean_all_distances <- mean(sampled_distances)

# Number of permutations
n_permutations <- 10000

# Combine the positive distances and all distances into a single vector
combined_distances <- c(associated_pairs_positive$distance, sampled_distances)

# Get the observed difference in means
observed_diff <- weighted_mean_positive - mean_all_distances

# Permutation test
perm_diff <- replicate(n_permutations, {
  # Randomly shuffle the combined distances
  shuffled_distances <- sample(combined_distances)
  
  # Split into "positive" group (same size as original positive group) and "all" group
  shuffled_positive <- shuffled_distances[1:length(associated_pairs_positive$distance)]
  shuffled_all <- shuffled_distances[(length(associated_pairs_positive$distance) + 1):length(combined_distances)]
  
  # Calculate weighted mean for the shuffled positive group (use original counts)
  shuffled_weighted_mean_positive <- sum(shuffled_positive * associated_pairs_positive$count) / sum(associated_pairs_positive$count)
  
  # Calculate the mean for the shuffled all group
  shuffled_mean_all <- mean(shuffled_all)
  
  # Return the difference in means
  shuffled_weighted_mean_positive - shuffled_mean_all
})

# Calculate p-value (two-tailed test)
p_value <- mean(abs(perm_diff) >= abs(observed_diff))
p_value
[1] 0
# Calculate the weighted mean distance for the "negative" group
weighted_mean_negative <- sum(associated_pairs_negative$distance * associated_pairs_negative$count) / sum(associated_pairs_negative$count)

# Calculate the mean of all distances from the functional distance matrix
all_distances <- functional_distances[upper.tri(functional_distances, diag = FALSE)]
all_distances <- all_distances[!is.na(all_distances)]
sampled_distances <- sample(all_distances, size = 37, replace = FALSE)
mean_all_distances <- mean(sampled_distances)

# Number of permutations
n_permutations <- 10000

# Combine the negative distances and all distances into a single vector
combined_distances <- c(associated_pairs_negative$distance, sampled_distances)

# Get the observed difference in means
observed_diff <- weighted_mean_negative - mean_all_distances

# Permutation test
perm_diff <- replicate(n_permutations, {
  # Randomly shuffle the combined distances
  shuffled_distances <- sample(combined_distances)
  
  # Split into "negative" group (same size as original negative group) and "all" group
  shuffled_negative <- shuffled_distances[1:length(associated_pairs_negative$distance)]
  shuffled_all <- shuffled_distances[(length(associated_pairs_negative$distance) + 1):length(combined_distances)]
  
  # Calculate weighted mean for the shuffled negative group (use original counts)
  shuffled_weighted_mean_negative <- sum(shuffled_negative * associated_pairs_negative$count) / sum(associated_pairs_negative$count)
  
  # Calculate the mean for the shuffled all group
  shuffled_mean_all <- mean(shuffled_all)
  
  # Return the difference in means
  shuffled_weighted_mean_negative - shuffled_mean_all
})

# Calculate p-value (two-tailed test)
p_value <- mean(abs(perm_diff) >= abs(observed_diff))
p_value
[1] 0
tibble(dist=functional_distances[upper.tri(functional_distances, diag = FALSE)]) %>% 
  filter(!is.na(dist)) %>% 
  ggplot(aes(x = dist)) +
  geom_histogram(bins = 10, fill = "skyblue", color = "black", alpha = 0.7) +
  geom_vline(xintercept = pull(distance_stats[1,2]), linetype="dotted", color = "red", size=1.5) +
  geom_vline(xintercept = pull(distance_stats[2,2]), linetype="dotted", color = "green", size=1.5) +
  theme_minimal()

6.3.5 Persistent associations

gift_pcoa_vectors %>%
  rownames_to_column(var="genome") %>%
  left_join(genome_metadata, by="genome") %>%
  ggplot() +
    geom_segment(data = associated_pairs_negative, 
               aes(x = x1, y = y1, xend = x2, yend = y2, linetype=clustering),
               color="#f5a2af",
               linewidth=1) +
    geom_segment(data = associated_pairs_positive, 
               aes(x = x1, y = y1, xend = x2, yend = y2, linetype=clustering),
               color="#bdf5a2",
               linewidth=1) +
    scale_linetype_manual(values=c("solid", "dashed", "dotted")) +
    geom_point(aes(x = Axis.1, y = Axis.2, color = order, size = length), 
             alpha = 0.9, shape = 16) +
    scale_color_manual(values = order_colors) +
    theme_minimal() + 
    theme(legend.position = "none")

selected_genomes <- c("GPB:bin_000009","GPB:bin_000021","GPB:bin_000208","D300452:bin_000016","D300444:bin_000010","GPB:bin_000140")

#Functional difference matrix
genome_gifts[selected_genomes,] %>%
    to.elements(., GIFT_db) %>%
    as.data.frame() %>%
    stats::dist(., method = "manhattan") / ncol(genome_gifts[selected_genomes, ])
                   GPB:bin_000009 GPB:bin_000021 GPB:bin_000208 D300452:bin_000016 D300444:bin_000010
GPB:bin_000021         0.17892063                                                                    
GPB:bin_000208         0.18403175     0.07406349                                                     
D300452:bin_000016     0.19073016     0.07498413     0.04365079                                      
D300444:bin_000010     0.17415873     0.07612698     0.07488889         0.07136508                   
GPB:bin_000140         0.18619048     0.06504762     0.08761905         0.07266667         0.07158730
genome_gifts[selected_genomes, ] %>% 
  to.elements(.,GIFT_db=GIFT_db) %>%
  as.data.frame() %>% 
  rownames_to_column(var="genome") %>% 
  pivot_longer(!genome, names_to = "trait", values_to = "gift") %>%
  mutate(functionid = substr(trait, 1, 3)) %>%
    mutate(trait = case_when(
      trait %in% GIFT_db$Code_element ~ GIFT_db$Element[match(trait, GIFT_db$Code_element)],
      TRUE ~ trait
    )) %>%
    filter(!functionid %in% c("D04","D08","B09","B10","S01","S02","S03")) %>% 
    mutate(functionid = case_when(
      functionid %in% GIFT_db$Code_function ~ GIFT_db$Function[match(functionid, GIFT_db$Code_function)],
      TRUE ~ functionid
    )) %>%
    mutate(genome=factor(genome,levels=selected_genomes)) %>% 
    mutate(trait=factor(trait,levels=unique(GIFT_db$Element))) %>%
    mutate(functionid=factor(functionid,levels=unique(GIFT_db$Function))) %>%
    ggplot(aes(x=genome,y=trait,fill=gift)) +
        geom_tile(colour="white", linewidth=0.2)+
        scale_fill_gradientn(colours=rev(c("#d53e4f", "#f46d43", "#fdae61", "#fee08b", "#e6f598", "#abdda4", "#ddf1da")))+
        facet_grid(functionid ~ ., scales="free",space="free") +
        theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
              strip.text.y = element_text(angle = 0)) + 
        labs(y="Traits",x="Samples",fill="GIFT")

selected_genomes <- c("GPB:bin_000208","D300452:bin_000016","TG5_28:bin_000003","D300509:bin_000001","GPB:bin_000123","GPB:bin_000124","GPB:bin_000194")

#Functional difference matrix
genome_gifts[selected_genomes,] %>%
    to.elements(., GIFT_db) %>%
    as.data.frame() %>%
    stats::dist(., method = "manhattan") / ncol(genome_gifts[selected_genomes, ])
                   GPB:bin_000208 D300452:bin_000016 TG5_28:bin_000003 D300509:bin_000001 GPB:bin_000123
D300452:bin_000016     0.04365079                                                                       
TG5_28:bin_000003      0.07638095         0.06092063                                                    
D300509:bin_000001     0.07377778         0.06441270        0.03504762                                  
GPB:bin_000123         0.08888889         0.08930159        0.08749206         0.07498413               
GPB:bin_000124         0.07688889         0.06784127        0.06711111         0.06292063     0.09047619
GPB:bin_000194         0.08177778         0.06574603        0.07212698         0.07815873     0.10285714
                   GPB:bin_000124
D300452:bin_000016               
TG5_28:bin_000003                
D300509:bin_000001               
GPB:bin_000123                   
GPB:bin_000124                   
GPB:bin_000194         0.08082540
genome_gifts[selected_genomes, ] %>% 
  to.elements(.,GIFT_db=GIFT_db) %>%
  as.data.frame() %>% 
  rownames_to_column(var="genome") %>% 
  pivot_longer(!genome, names_to = "trait", values_to = "gift") %>%
  mutate(functionid = substr(trait, 1, 3)) %>%
    mutate(trait = case_when(
      trait %in% GIFT_db$Code_element ~ GIFT_db$Element[match(trait, GIFT_db$Code_element)],
      TRUE ~ trait
    )) %>%
    filter(!functionid %in% c("D04","D08","B09","B10","S01","S02","S03")) %>% 
    mutate(functionid = case_when(
      functionid %in% GIFT_db$Code_function ~ GIFT_db$Function[match(functionid, GIFT_db$Code_function)],
      TRUE ~ functionid
    )) %>%
    mutate(genome=factor(genome,levels=selected_genomes)) %>% 
    mutate(trait=factor(trait,levels=unique(GIFT_db$Element))) %>%
    mutate(functionid=factor(functionid,levels=unique(GIFT_db$Function))) %>%
    ggplot(aes(x=genome,y=trait,fill=gift)) +
        geom_tile(colour="white", linewidth=0.2)+
        scale_fill_gradientn(colours=rev(c("#d53e4f", "#f46d43", "#fdae61", "#fee08b", "#e6f598", "#abdda4", "#ddf1da")))+
        facet_grid(functionid ~ ., scales="free",space="free") +
        theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
              strip.text.y = element_text(angle = 0)) + 
        labs(y="Traits",x="Samples",fill="GIFT")