Chapter 4 Community composition

load("data/data.Rdata")

4.1 Taxonomy overview

4.1.1 Stacked barplot

genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  filter(count > 0) %>% #filter 0 counts
  ggplot(., aes(x=sample,y=count, fill=order, group=order)) + #grouping enables keeping the same sorting of taxonomic units
    geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
    scale_fill_manual(values=order_colors) +
    facet_nested(. ~ treatment + day,  scales="free", space="free") + #facet per day and treatment
    guides(fill = guide_legend(ncol = 1)) +
    theme(axis.text.x=element_blank(),
          axis.title.x = element_blank(),
          panel.background = element_blank(),
          panel.border = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black")) +
   labs(fill="Phylum",y = "Relative abundance",x="Samples") +
   theme(legend.position = "none")

### Phylum relative abundances

phylum_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
  left_join(sample_metadata, by = join_by(sample == sample)) %>%
  left_join(genome_metadata, by = join_by(genome == genome)) %>%
  group_by(sample,phylum) %>%
  summarise(relabun=sum(count))

phylum_summary %>%
    group_by(phylum) %>%
    summarise(mean=mean(relabun, na.rm=T),sd=sd(relabun, na.rm=T)) %>%
    arrange(-mean) %>%
    tt()
phylum mean sd
p__Bacillota_A 0.9263387454 0.052472019
p__Bacillota 0.0311457034 0.020581919
p__Pseudomonadota 0.0213337036 0.031447325
p__Bacteroidota 0.0173904491 0.041859646
p__Actinomycetota 0.0029250266 0.002033819
p__Verrucomicrobiota 0.0008663719 0.003274428
phylum_arrange <- phylum_summary %>%
    group_by(phylum) %>%
    summarise(mean=mean(relabun)) %>%
    arrange(-mean) %>%
    select(phylum) %>%
    pull()

phylum_summary %>%
    filter(phylum %in% phylum_arrange) %>%
    mutate(phylum=factor(phylum,levels=rev(phylum_arrange))) %>%
    ggplot(aes(x=relabun, y=phylum, group=phylum, color=phylum)) +
        scale_color_manual(values=phylum_colors[rev(phylum_arrange)]) +
        geom_jitter(alpha=0.5) + 
        theme_minimal() + 
        theme(legend.position="none") +
        labs(y="Phylum",x="Relative abundance")

phylum_arrange <- phylum_summary %>%
    group_by(phylum) %>%
    summarise(mean=mean(relabun)) %>%
    arrange(-mean) %>%
    select(phylum) %>%
    pull()

phylum_summary %>%
    left_join(sample_metadata,by="sample") %>% 
    filter(phylum %in% phylum_arrange) %>%
    mutate(phylum=factor(phylum,levels=rev(phylum_arrange))) %>%
    filter(treatment!="TG0") %>% 
    ggplot(aes(x=day, y=relabun, group=phylum, color=phylum, fill=phylum)) +
        scale_color_manual(values=phylum_colors[rev(phylum_arrange)]) +
        scale_fill_manual(values=phylum_colors[rev(phylum_arrange)]) +
        geom_jitter(width=0.3) + 
        geom_smooth(method="loess",se=TRUE) +
        theme_minimal() + 
        facet_wrap(. ~ treatment, nrow=1) +
        theme(legend.position="none") +
        labs(y="Phylum",x="Days")

4.2 Taxonomy boxplot

4.2.1 Family

family_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  group_by(sample,family) %>%
  summarise(relabun=sum(count))

family_summary %>%
    group_by(family) %>%
    summarise(mean=mean(relabun, na.rm=T),sd=sd(relabun, na.rm=T)) %>%
    arrange(-mean) %>%
    tt()
family mean sd
f__Lachnospiraceae 4.329063e-01 0.1040793532
f__Oscillospiraceae 1.377885e-01 0.0423221647
f__Ruminococcaceae 9.281622e-02 0.0673953782
f__Borkfalkiaceae 8.719646e-02 0.0752380263
f__Acutalibacteraceae 8.093733e-02 0.0315079409
f__Butyricicoccaceae 5.513905e-02 0.0299466932
f__Enterobacteriaceae 2.133370e-02 0.0314473254
f__Anaerotignaceae 1.939008e-02 0.0166309768
f__Rikenellaceae 1.739045e-02 0.0418596462
f__Coprobacillaceae 1.183006e-02 0.0104369897
f__CAG-508 1.095063e-02 0.0262151388
f__Lactobacillaceae 7.712804e-03 0.0068350532
f__Erysipelotrichaceae 3.961127e-03 0.0075083374
f__CAJFEE01 3.787948e-03 0.0082436691
f__Eggerthellaceae 2.733944e-03 0.0019639378
f__UBA1750 2.472356e-03 0.0072042453
f__Monoglobaceae 2.000573e-03 0.0046527638
f__UBA1381 1.892527e-03 0.0035251341
f__UBA660 1.601667e-03 0.0101286199
f__CAG-274 1.095219e-03 0.0039707980
f__Enterococcaceae 1.039147e-03 0.0043240156
f__Streptococcaceae 8.707222e-04 0.0030291486
f__Akkermansiaceae 8.663719e-04 0.0032744284
f__CAG-314 7.266243e-04 0.0021075449
f__Peptostreptococcaceae 4.572793e-04 0.0006294381
f__CAG-74 3.946608e-04 0.0018794730
f__CAG-313 2.087340e-04 0.0012151482
f__Bifidobacteriaceae 1.910823e-04 0.0006867560
f__Paenibacillaceae 1.334933e-04 0.0008636227
f__Clostridiaceae 8.794823e-05 0.0009307566
f__Anaerovoracaceae 8.700195e-05 0.0001931964
family_arrange <- family_summary %>%
    group_by(family) %>%
    summarise(mean=sum(relabun, na.rm=TRUE)) %>%
    arrange(-mean) %>%
    select(family) %>%
    pull()

# Per day
family_summary %>%
    left_join(genome_metadata %>% select(family,phylum) %>% unique(),by=join_by(family==family)) %>%
    left_join(sample_metadata,by=join_by(sample==sample)) %>%
    filter(family %in% family_arrange[1:20]) %>%
    mutate(family=factor(family,levels=rev(family_arrange[1:20]))) %>%
    filter(relabun > 0) %>%
    ggplot(aes(x=relabun, y=family, group=family, color=phylum)) +
        scale_color_manual(values=phylum_colors[-8]) +
        geom_jitter(alpha=0.5) + 
        facet_grid(.~day)+
        theme_minimal() + 
        labs(y="Family", x="Relative abundance", color="Phylum")

# Per treatment
family_summary %>%
    left_join(genome_metadata %>% select(family,phylum) %>% unique(),by=join_by(family==family)) %>%
    left_join(sample_metadata,by=join_by(sample==sample)) %>%
    filter(family %in% family_arrange[1:20]) %>%
    mutate(family=factor(family,levels=rev(family_arrange[1:20]))) %>%
    filter(relabun > 0) %>%
    ggplot(aes(x=relabun, y=family, group=family, color=phylum)) +
        scale_color_manual(values=phylum_colors[-8]) +
        geom_jitter(alpha=0.5) + 
        facet_grid(.~treatment)+
        theme_minimal() + 
        labs(y="Family", x="Relative abundance", color="Phylum")

4.2.2 Genus

genus_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  left_join(genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  group_by(sample,phylum,genus) %>%
  summarise(relabun=sum(count)) %>%
  filter(genus != "g__") %>%
  mutate(genus= sub("^g__", "", genus))

genus_summary_sort <- genus_summary %>%
    group_by(genus) %>%
    summarise(mean=mean(relabun, na.rm=T),sd=sd(relabun, na.rm=T)) %>%
    arrange(-mean) 

genus_summary_sort %>%
    tt()
genus mean sd
Eisenbergiella 1.445156e-01 0.0883905286
Blautia 5.916810e-02 0.0505543219
Agathobaculum 5.322314e-02 0.0301610831
Mediterraneibacter 5.290562e-02 0.0435215131
Caccovicinus 4.476313e-02 0.0321768001
Flavonifractor 3.453579e-02 0.0271724191
Lawsonibacter 3.375375e-02 0.0169043070
Dysosmobacter 3.258327e-02 0.0162310449
Lachnoclostridium_A 3.220221e-02 0.0269217326
Borkfalkia 3.204880e-02 0.0360383686
Coproplasma 3.066065e-02 0.0539482783
Merdivicinus 2.864373e-02 0.0309516849
Acutalibacter 2.691433e-02 0.0176707958
Enterocloster 2.516145e-02 0.0137469924
Gallimonas 1.998859e-02 0.0345141515
Gemmiger 1.926366e-02 0.0297015190
Anaeromassilibacillus 1.916098e-02 0.0145425120
Merdisoma 1.769360e-02 0.0192752166
Alistipes 1.739045e-02 0.0418596462
Escherichia 1.632474e-02 0.0241923609
UMGS1370 1.615394e-02 0.0372379577
Negativibacillus 1.544956e-02 0.0121601465
Anaerotignum 1.523550e-02 0.0123401837
Intestinimonas 1.263498e-02 0.0119940206
Pelethomonas 1.259604e-02 0.0218433912
Choladocola 1.184133e-02 0.0140509068
Onthovicinus 1.026027e-02 0.0122805507
Fournierella 1.006317e-02 0.0483977313
Faecousia 8.839579e-03 0.0131441523
UBA1417 8.343485e-03 0.0088859150
Thomasclavelia 8.226498e-03 0.0095251673
Copromonas 7.196101e-03 0.0074936185
Clostridium_Q 7.095562e-03 0.0108851076
Faecalibacterium 6.409228e-03 0.0283842543
CAG-269 5.550825e-03 0.0209272065
Lactobacillus 4.431109e-03 0.0061421622
Fimenecus 4.336314e-03 0.0145370351
Scatosoma 4.182029e-03 0.0093080381
CAG-273 4.082432e-03 0.0133584607
Faeciplasma 4.043975e-03 0.0116168407
Harrysmithimonas 3.787948e-03 0.0082436691
Massilimicrobiota 3.603563e-03 0.0056940870
Anaerotruncus 3.600088e-03 0.0028485188
Ruthenibacterium 3.524271e-03 0.0162325912
Blautia_A 3.017374e-03 0.0074140508
Klebsiella 2.898479e-03 0.0077490024
Ligilactobacillus 2.584617e-03 0.0041317157
Spyradocola 2.472356e-03 0.0072042453
Fimicola 2.375286e-03 0.0146709499
Merdibacter 2.277474e-03 0.0062077492
Rubneribacter 2.275614e-03 0.0016954158
Heritagella 2.265200e-03 0.0081259552
Caccousia 2.225811e-03 0.0063187280
Gallacutalibacter 2.183300e-03 0.0026543679
Eubacterium_R 2.149909e-03 0.0060679476
Roslinia 2.000573e-03 0.0046527638
Anaerobutyricum 1.951366e-03 0.0020592442
Pseudobutyricicoccus 1.804699e-03 0.0041057861
Proteus 1.795748e-03 0.0042092912
Clostridium_AQ 1.573753e-03 0.0037519678
Ornithomonoglobus 1.402383e-03 0.0034000890
Sellimonas 1.355524e-03 0.0014000810
Hungatella_B 1.295591e-03 0.0024858996
CAG-302 1.251232e-03 0.0093271964
Coprocola 1.232176e-03 0.0029180886
Merdimonas 1.042921e-03 0.0018959126
CAG-245 9.909064e-04 0.0055208507
Pullilachnospira 8.877658e-04 0.0019116542
Anaerostipes 8.807228e-04 0.0016493466
Streptococcus 8.707222e-04 0.0030291486
Akkermansia 8.663719e-04 0.0032744284
Tyzzerella 8.549861e-04 0.0039467559
Galloscillospira_A 8.453837e-04 0.0039969277
Fimimorpha 8.178870e-04 0.0038043652
Ruminococcus_G 8.166585e-04 0.0022527243
Lachnoclostridium_B 8.018810e-04 0.0014303087
Scybalenecus 7.674383e-04 0.0048805441
Heteroclostridium 7.266243e-04 0.0021075449
Scatomorpha 7.146354e-04 0.0036272957
Limosilactobacillus 6.970782e-04 0.0019160211
An181 6.754902e-04 0.0017219605
Faecivivens 5.889048e-04 0.0016432903
Egerieicola 5.367254e-04 0.0017162579
Enterococcus_B 4.665754e-04 0.0018265861
Gordonibacter 4.583304e-04 0.0007654231
Romboutsia 4.572793e-04 0.0006294381
CAJFUH01 4.375669e-04 0.0009959092
Pararuminococcus 4.308149e-04 0.0015265075
Pullichristensenella 3.946608e-04 0.0018794730
Enterococcus 3.933570e-04 0.0034471452
UBA4716 3.892234e-04 0.0014028047
Avoscillospira 3.723899e-04 0.0038117569
Scatavimonas 3.692015e-04 0.0010089017
Metalachnospira 3.521580e-04 0.0010102879
Caccomorpha 3.487812e-04 0.0030560594
UMGS775 3.163978e-04 0.0012014435
Salmonella 3.147373e-04 0.0008102992
Caccenecus 3.143350e-04 0.0033266085
Evtepia 2.842658e-04 0.0005804857
Enterenecus 2.411763e-04 0.0011008124
Gallispira 2.402329e-04 0.0006521117
HGM12545 2.269032e-04 0.0008091141
JAETTH01 2.141598e-04 0.0012567610
CAG-313 2.087340e-04 0.0012151482
Neoanaerotignum_A 1.949623e-04 0.0006113248
Bifidobacterium 1.910823e-04 0.0006867560
Enterococcus_D 1.792150e-04 0.0006369121
Avimicrobium 1.703215e-04 0.0004572784
HGM12998 1.675215e-04 0.0004945362
Timburyella 1.411641e-04 0.0005176237
Paenibacillus_A 1.334933e-04 0.0008636227
Butyricicoccus 1.112143e-04 0.0006691107
UMGS856 1.105091e-04 0.0005845197
Holdemania 1.099001e-04 0.0004731983
Ornithomonoglobus_A 1.009201e-04 0.0004893389
Fimivicinus 9.405076e-05 0.0009547374
Angelakisella 9.176895e-05 0.0004511442
Sarcina 8.794823e-05 0.0009307566
Alangreenwoodia 8.700195e-05 0.0001931964
RUG626 6.149174e-05 0.0003482508
Catenibacillus 5.889514e-05 0.0005067441
RUG591 3.610048e-05 0.0003820515
CAJFPI01 3.504625e-05 0.0001286358
Acetatifactor 3.183280e-05 0.0003368866
Pediococcus 0.000000e+00 0.0000000000
genus_arrange <- genus_summary %>%
    group_by(genus) %>%
    summarise(mean=sum(relabun)) %>%
    filter(genus != "g__")%>%
    arrange(-mean) %>%
    select(genus) %>%
    mutate(genus= sub("^g__", "", genus)) %>%
    pull()

#Per day
genus_summary %>%
    left_join(sample_metadata,by=join_by(sample==sample)) %>%
    mutate(genus=factor(genus, levels=rev(genus_summary_sort %>% pull(genus)))) %>%
    filter(relabun > 0) %>%
    ggplot(aes(x=relabun, y=genus, group=genus, color=phylum)) +
        scale_color_manual(values=phylum_colors) +
        geom_jitter(alpha=0.5) + 
        facet_grid(.~day)+
        theme_minimal() + 
        labs(y="Family", x="Relative abundance", color="Phylum")

#Per treatment
genus_summary %>%
    left_join(sample_metadata,by=join_by(sample==sample)) %>%
    mutate(genus=factor(genus, levels=rev(genus_summary_sort %>% pull(genus)))) %>%
    filter(relabun > 0) %>%
    ggplot(aes(x=relabun, y=genus, group=genus, color=phylum)) +
        scale_color_manual(values=phylum_colors) +
        geom_jitter(alpha=0.5) + 
        facet_grid(.~treatment)+
        theme_minimal() + 
        labs(y="Family", x="Relative abundance", color="Phylum")

4.2.3 Salmonella enterica

salmonella_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  left_join(genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  filter(genome=="GPB:bin_000051") %>%
  #group_by(sample, species, day, treatment) %>%
  #summarise(relabun=sum(count), .groups="drop") %>%
  filter(species != "s__")

salmonella_summary %>%
    select(-species) %>%
    filter(treatment != "TG0") %>%
    group_by(day, treatment) %>%
    summarise(mean=mean(count, na.rm=T),sd=sd(count, na.rm=T)) %>%
    mutate(salmonella=paste0(mean * 100," ± ", sd*100)) %>%
    select(-c(mean,sd)) %>%
    pivot_wider(names_from = "day", values_from = "salmonella") %>%
    tt()
treatment 7 14 21 28 35
TG1 0 ± 0 0 ± 0 0 ± 0 0.0241551432240449 ± 0.033382187982062 0 ± 0
TG2 0.242359007365227 ± 0.283469447128929 0 ± 0 0 ± 0 0.0386581717315069 ± 0.0230783075441766 0.0358379137922966 ± 0.0543374342307738
TG3 0.113298245608306 ± 0.0195711543058188 0 ± 0 0 ± 0 0.0179850913593979 ± 0.0402158868611578 0.0479331332811194 ± 0.0520647652609058
TG4 0.151044112426683 ± 0.159517071319007 0 ± 0 0 ± 0 0.064716903391552 ± 0.0900049874152957 0.012025539948005 ± 0.0268899247898786
TG5 0.170793630800146 ± 0.0273961354484702 0.0202299784310606 ± 0.0452356069552061 0 ± 0 0 ± 0 0 ± 0

4.2.4 Probiotics

genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>%
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
  left_join(sample_metadata, by = join_by(sample == sample)) %>% 
  left_join(genome_metadata, by = join_by(genome == genome)) %>% 
  filter(genome %in% c("GEXTRA:bin_000001","GEXTRA:bin_000002","GEXTRA:bin_000004","GPB:bin_000025","GEXTRA:bin_000006")) %>%
    select(-genus) %>%
    filter(treatment != "TG0") %>%
    group_by(day, treatment) %>%
    summarise(mean=mean(count, na.rm=T),sd=sd(count, na.rm=T)) %>%
    mutate(salmonella=paste0(mean * 100," ± ", sd*100)) %>%
    select(-c(mean,sd)) %>%
    pivot_wider(names_from = "day", values_from = "salmonella") %>%
    tt()
treatment 7 14 21 28 35
TG1 0 ± 0 0 ± 0 0 ± 0 0 ± 0 0 ± 0
TG2 0 ± 0 0 ± 0 0 ± 0 0 ± 0 0 ± 0
TG3 0 ± 0 0.0235235806846099 ± 0.117617903423049 0.0981598054504961 ± 0.25535188561404 0.0816638450870714 ± 0.184343495413486 0.081541923405096 ± 0.1769845972708
TG4 0.0695611912789772 ± 0.15573979801665 0.257345690121293 ± 0.536494709196875 0.154303615899709 ± 0.332387661558835 0.0843527344384325 ± 0.200118689108087 0.119836568122253 ± 0.249474582149052
TG5 0.141611150695204 ± 0.30739741992713 0.233696277462193 ± 0.528055185240311 0 ± 0 0 ± 0 0 ± 0
genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>%
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
  left_join(sample_metadata, by = join_by(sample == sample)) %>% 
  left_join(genome_metadata, by = join_by(genome == genome)) %>% 
  filter(species=="s__Ligilactobacillus salivarius") %>%
  filter(species != "s__") %>%
    select(-species) %>%
    filter(treatment != "TG0") %>%
    group_by(day, treatment) %>%
    summarise(mean=mean(count, na.rm=T),sd=sd(count, na.rm=T)) %>%
    mutate(salmonella=paste0(mean * 100," ± ", sd*100)) %>%
    select(-c(mean,sd)) %>%
    pivot_wider(names_from = "day", values_from = "salmonella") %>%
    tt()
treatment 7 14 21 28 35
TG1 0 ± 0 0 ± 0 0 ± 0 0 ± 0 0 ± 0
TG2 0 ± 0 0 ± 0 0 ± 0 0 ± 0 0 ± 0
TG3 0 ± 0 0.117617903423049 ± 0.263001627424943 0.49079902725248 ± 0.387741960287886 0.408319225435357 ± 0.192796669959423 0.40770961702548 ± 0.14735723331951
TG4 0.347805956394886 ± 0.156912118204287 1.25704020617654 ± 0.390955403217101 0.771518079498544 ± 0.260094594996096 0.406859224366084 ± 0.276495317467263 0.599182840611264 ± 0.120022191063496
TG5 0.70805575347602 ± 0.219749502734166 1.08999800550827 ± 0.700552308855792 0 ± 0 0 ± 0 0 ± 0
genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>%
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
  left_join(sample_metadata, by = join_by(sample == sample)) %>% 
  left_join(genome_metadata, by = join_by(genome == genome)) %>% 
  filter(species=="s__Bifidobacterium animalis") %>%
  filter(species != "s__") %>%
    select(-species) %>%
    filter(treatment != "TG0") %>%
    group_by(day, treatment) %>%
    summarise(mean=mean(count, na.rm=T),sd=sd(count, na.rm=T)) %>%
    mutate(salmonella=paste0(mean * 100," ± ", sd*100)) %>%
    select(-c(mean,sd)) %>%
    pivot_wider(names_from = "day", values_from = "salmonella") %>%
    tt()
treatment 7 14 21 28 35
TG1 0 ± 0 0 ± 0 0 ± 0 0 ± 0 0 ± 0
TG2 0 ± 0 0 ± 0 0 ± 0 0 ± 0 0 ± 0
TG3 0 ± 0 0 ± 0 0 ± 0 0 ± 0 0 ± 0
TG4 0 ± 0 0.0296882444299303 ± 0.0593764888598606 0 ± 0 0 ± 0 0 ± 0
TG5 0 ± 0 0.0784833818026937 ± 0.175494176814893 0 ± 0 0 ± 0 0 ± 0
genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>%
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
  left_join(sample_metadata, by = join_by(sample == sample)) %>% 
  left_join(genome_metadata, by = join_by(genome == genome)) %>% 
  filter(species=="s__Limosilactobacillus reuteri") %>%
  filter(species != "s__") %>%
    select(-species) %>%
    filter(treatment != "TG0") %>%
    group_by(day, treatment) %>%
    summarise(mean=mean(count, na.rm=T),sd=sd(count, na.rm=T)) %>%
    mutate(salmonella=paste0(mean * 100," ± ", sd*100)) %>%
    select(-c(mean,sd)) %>%
    pivot_wider(names_from = "day", values_from = "salmonella") %>%
    tt()
treatment 7 14 21 28 35
TG1 0 ± 0 0 ± 0 0 ± 0 0 ± 0 0 ± 0
TG2 0 ± 0 0 ± 0 0 ± 0 0 ± 0 0 ± 0
TG3 0 ± 0 0 ± 0 0 ± 0 0 ± 0 0 ± 0
TG4 0 ± 0 0 ± 0 0 ± 0 0.0149044478260783 ± 0.03332735850621 0 ± 0
TG5 0 ± 0 0 ± 0 0 ± 0 0 ± 0 0 ± 0

4.3 Functional overview

genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>%
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
  left_join(sample_metadata, by = join_by(sample == sample)) %>% 
  filter(!is.na(day)) %>% 
  group_by(genome,treatment,day) %>% 
  summarise(count=mean(count)) %>% 
  left_join(gift_pcoa_vectors %>% rownames_to_column(var="genome"), by="genome") %>%
  left_join(genome_metadata, by="genome") %>%
  ggplot() +
      scale_color_manual(values=order_colors)+

      #genome positions
      geom_point(aes(x=Axis.1,y=Axis.2, color=order, size=count), 
                 alpha=0.8, shape=16) +
      #scale_color_manual(values=phylum_colors) +
      scale_size_continuous(range = c(0,10)) +
      #loading positions

     #Primary and secondary scale adjustments
     scale_x_continuous(name = paste0("PCoA1 (",round(gift_pcoa_rel_eigen[1]*100, digits = 2), " %)"),
                      sec.axis = sec_axis(~ . / scale, name = "Loadings on PCoA1")
            ) +
     scale_y_continuous(name = paste0("PCoA2 (",round(gift_pcoa_rel_eigen[2]*100, digits = 2), " %)"),
                      sec.axis = sec_axis(~ . / scale, name = "Loadings on PCoA2")
            ) +
    facet_grid(treatment ~ day) +
    theme_linedraw() + 
    theme(legend.position = "none")

genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>%
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
  left_join(sample_metadata, by = join_by(sample == sample)) %>% 
  filter(!is.na(day)) %>% 
  group_by(genome,treatment,day) %>% 
  summarise(count=mean(count)) %>% 
  left_join(gift_pcoa_vectors %>% rownames_to_column(var="genome"), by="genome") %>%
  left_join(genome_metadata, by="genome") %>%
  ggplot(aes(x=Axis.1, y=count, group=treatment, color=treatment)) +
        scale_color_manual(values=treatment_colors)+
        geom_smooth(method = lm, formula = y ~ splines::bs(x, 3), se = FALSE) +
        facet_grid(. ~ day) +
        theme_minimal() + 
        theme(legend.position = "none")

genome_gifts %>%
    to.elements(., GIFT_db) %>% 
    to.functions(., GIFT_db) %>% 
    to.community(., genome_counts_filt %>%
                    mutate_at(vars(-genome),~./sum(.)) %>%
                   column_to_rownames(var="genome"), GIFT_db) %>% 
    as.data.frame() %>% 
    rownames_to_column(var="sample") %>% 
    pivot_longer(-sample, names_to = "trait", values_to = "value") %>%
    left_join(sample_metadata, by = join_by(sample == sample)) %>% 
    group_by(trait,treatment,day) %>% 
    summarise(value=mean(value)) %>% 
    mutate(trait=factor(trait)) %>% 
    filter(treatment != "TG0") %>% 
    ggplot(aes(x=fct_reorder(trait, desc(value)), y=value, fill=trait)) +
        geom_bar(stat="identity", width=1) +
    facet_grid(treatment ~ day) +
    theme_minimal() + 
    theme(legend.position = "none")

genome_gifts %>%
    to.elements(., GIFT_db) %>% 
    to.functions(., GIFT_db) %>% 
    to.community(., genome_counts_filt %>%
                    mutate_at(vars(-genome),~./sum(.)) %>%
                   column_to_rownames(var="genome"), GIFT_db) %>% 
    as.data.frame() %>% 
    rownames_to_column(var="sample") %>% 
    pivot_longer(-sample, names_to = "trait", values_to = "value") %>%
    left_join(sample_metadata, by = join_by(sample == sample)) %>% 
    group_by(sample,treatment,day) %>% 
    summarise(value=mean(value)) %>% 
    filter(treatment != "TG0") %>% 
    ggplot(aes(x=day, y=value, group=day, fill=treatment, color=treatment)) +
        geom_boxplot() +
        geom_jitter() +
        scale_color_manual(name="Treatment",
          breaks=c("TG1","TG2","TG3","TG4","TG5"),
          values=c("#4059AE", "#6A9AC3","#97D8C4","#AFD699","#F3B942")) +
      scale_fill_manual(name="Treatment",
          breaks=c("TG1","TG2","TG3","TG4","TG5"),
          values=c("#4059AE50", "#6A9AC350","#97D8C450","#AFD69950","#F3B94250")) +
    facet_grid(treatment ~ .) +
    theme_linedraw() + 
    theme(legend.position = "none")

4.4 Alpha diversity

load("data/data.Rdata")
# Calculate Hill numbers
richness <- genome_counts_filt %>%
  column_to_rownames(var = "genome") %>%
  dplyr::select(where(~ !all(. == 0))) %>%
  hilldiv(., q = 0) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(richness = 1) %>%
  rownames_to_column(var = "sample")

neutral <- genome_counts_filt %>%
  column_to_rownames(var = "genome") %>%
  dplyr::select(where(~ !all(. == 0))) %>%
  hilldiv(., q = 1) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(neutral = 1) %>%
  rownames_to_column(var = "sample")

phylogenetic <- genome_counts_filt %>%
  column_to_rownames(var = "genome") %>%
  dplyr::select(where(~ !all(. == 0))) %>%
  hilldiv(., q = 1, tree = genome_tree) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(phylogenetic = 1) %>%
  rownames_to_column(var = "sample")

# Aggregate basal GIFT into elements
dist <- genome_gifts %>%
  to.elements(., GIFT_db) %>%
  traits2dist(., method = "gower")

# Remove genomes with no hits
dist_filt <- dist[!rownames(dist) %in% c("GEXTRA:bin_000002", "GEXTRA:bin_000006"), 
                               !colnames(dist) %in% c("GEXTRA:bin_000002", "GEXTRA:bin_000006")]

functional <- genome_counts_filt %>%
  arrange(match(genome,rownames(dist))) %>%
  filter(!genome %in% c("GEXTRA:bin_000002", "GEXTRA:bin_000006")) %>% 
  column_to_rownames(var = "genome") %>%
  dplyr::select(where(~ !all(. == 0))) %>%
  hilldiv(., q = 1, dist = dist_filt) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(functional = 1) %>%
  rownames_to_column(var = "sample") %>%
  mutate(functional = if_else(is.nan(functional), 1, functional))

# Merge all metrics
alpha_div <- richness %>%
  full_join(neutral, by = join_by(sample == sample)) %>%
  full_join(phylogenetic, by = join_by(sample == sample)) %>%
  full_join(functional, by = join_by(sample == sample))
#Richness
alpha_div %>%
  pivot_longer(-sample, names_to = "metric", values_to = "value") %>%
  left_join(., sample_metadata, by = join_by(sample == sample)) %>%
  filter(metric=="richness") %>%
      ggplot(aes(y = value, x = day, group=day, color=treatment, fill=treatment)) +
      geom_boxplot(outlier.shape = NA) +
      geom_jitter(alpha=0.5) +
      scale_color_manual(name="Treatment",
          breaks=c("TG1","TG2","TG3","TG4","TG5"),
          values=c("#4059AE", "#6A9AC3","#97D8C4","#AFD699","#F3B942")) +
      scale_fill_manual(name="Treatment",
          breaks=c("TG1","TG2","TG3","TG4","TG5"),
          values=c("#4059AE50", "#6A9AC350","#97D8C450","#AFD69950","#F3B94250")) +
      facet_grid(treatment ~ ., scales = "fixed") +
      coord_cartesian(xlim = c(1, NA)) +
      theme_classic() +
      theme(
        strip.background = element_blank(),
        panel.grid.minor.x = element_line(size = .1, color = "grey"),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1)
      )

#Neutral
alpha_div %>%
  pivot_longer(-sample, names_to = "metric", values_to = "value") %>%
  left_join(., sample_metadata, by = join_by(sample == sample)) %>%
  filter(metric=="neutral") %>%
      ggplot(aes(y = value, x = day, group=day, color=treatment, fill=treatment)) +
      geom_boxplot(outlier.shape = NA) +
      geom_jitter(alpha=0.5) +
      scale_color_manual(name="Treatment",
          breaks=c("TG1","TG2","TG3","TG4","TG5"),
          values=c("#4059AE", "#6A9AC3","#97D8C4","#AFD699","#F3B942")) +
      scale_fill_manual(name="Treatment",
          breaks=c("TG1","TG2","TG3","TG4","TG5"),
          values=c("#4059AE50", "#6A9AC350","#97D8C450","#AFD69950","#F3B94250")) +
      facet_grid(treatment ~ ., scales = "fixed") +
      theme_classic() +
      theme(
        strip.background = element_blank(),
        panel.grid.minor.x = element_line(size = .1, color = "grey"),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1)
      ) +
    theme_linedraw() + 
    theme(legend.position = "none")

#Phylogenetic
alpha_div %>%
  pivot_longer(-sample, names_to = "metric", values_to = "value") %>%
  left_join(., sample_metadata, by = join_by(sample == sample)) %>%
  filter(metric=="phylogenetic") %>%
      ggplot(aes(y = value, x = day, group=day, color=treatment, fill=treatment)) +
      geom_boxplot(outlier.shape = NA) +
      geom_jitter(alpha=0.5) +
      scale_color_manual(name="Treatment",
          breaks=c("TG1","TG2","TG3","TG4","TG5"),
          values=c("#4059AE", "#6A9AC3","#97D8C4","#AFD699","#F3B942")) +
      scale_fill_manual(name="Treatment",
          breaks=c("TG1","TG2","TG3","TG4","TG5"),
          values=c("#4059AE50", "#6A9AC350","#97D8C450","#AFD69950","#F3B94250")) +
      facet_grid(treatment ~ ., scales = "fixed") +
      coord_cartesian(xlim = c(1, NA)) +
      theme_classic() +
      theme(
        strip.background = element_blank(),
        panel.grid.minor.x = element_line(size = .1, color = "grey"),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1)
      )

#Functional
alpha_div %>%
  pivot_longer(-sample, names_to = "metric", values_to = "value") %>%
  left_join(., sample_metadata, by = join_by(sample == sample)) %>%
  filter(metric=="functional") %>%
      ggplot(aes(y = value, x = treatment, group=treatment, color=treatment, fill=treatment)) +
      geom_boxplot(outlier.shape = NA) +
      geom_jitter(alpha=0.5) +
      scale_color_manual(name="Treatment",
          breaks=c("TG1","TG2","TG3","TG4","TG5"),
          values=c("#4059AE", "#6A9AC3","#97D8C4","#AFD699","#F3B942")) +
      scale_fill_manual(name="Treatment",
          breaks=c("TG1","TG2","TG3","TG4","TG5"),
          values=c("#4059AE50", "#6A9AC350","#97D8C450","#AFD69950","#F3B94250")) +
      facet_wrap(. ~ day, scales = "fixed", ncol=5) +
      coord_cartesian(xlim = c(1, NA)) +
      theme_classic() +
      theme(
        strip.background = element_blank(),
        panel.grid.minor.x = element_line(size = .1, color = "grey"),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1)
      )

#Richness
alpha_div %>%
  pivot_longer(-sample, names_to = "metric", values_to = "value") %>%
  left_join(., sample_metadata, by = join_by(sample == sample)) %>%
  filter(metric=="richness") %>%
      ggplot(aes(y = value, x = day, group=treatment, color=treatment, fill=treatment)) +
      geom_point() +
      geom_smooth(method="loess",se=TRUE)+
      scale_color_manual(name="Treatment",
          breaks=c("TG1","TG2","TG3","TG4","TG5"),
          values=c("#4059AE", "#6A9AC3","#97D8C4","#AFD699","#F3B942")) +
      scale_fill_manual(name="Treatment",
          breaks=c("TG1","TG2","TG3","TG4","TG5"),
          values=c("#4059AE50", "#6A9AC350","#97D8C450","#AFD69950","#F3B94250")) +
      coord_cartesian(xlim = c(7, 35)) +
      theme_classic() +
      theme(
        strip.background = element_blank(),
        panel.grid.minor.x = element_line(size = .1, color = "grey"),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1)
      )

alpha_div %>%
  pivot_longer(-sample, names_to = "metric", values_to = "value") %>%
  left_join(., sample_metadata, by = join_by(sample == sample)) %>%
  filter(metric=="neutral") %>%
      ggplot(aes(y = value, x = day, group=treatment, color=treatment, fill=treatment)) +
      geom_point() +
      geom_smooth(method="loess",se=TRUE)+
      scale_color_manual(name="Treatment",
          breaks=c("TG1","TG2","TG3","TG4","TG5"),
          values=c("#4059AE", "#6A9AC3","#97D8C4","#AFD699","#F3B942")) +
      scale_fill_manual(name="Treatment",
          breaks=c("TG1","TG2","TG3","TG4","TG5"),
          values=c("#4059AE50", "#6A9AC350","#97D8C450","#AFD69950","#F3B94250")) +
      coord_cartesian(xlim = c(7, 35)) +
      theme_classic() +
      theme(
        strip.background = element_blank(),
        panel.grid.minor.x = element_line(size = .1, color = "grey"),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1)
      )

alpha_div %>%
  pivot_longer(-sample, names_to = "metric", values_to = "value") %>%
  left_join(., sample_metadata, by = join_by(sample == sample)) %>%
  filter(metric=="phylogenetic") %>%
      ggplot(aes(y = value, x = day, group=treatment, color=treatment, fill=treatment)) +
      geom_point() +
      geom_smooth(method="loess",se=TRUE)+
      scale_color_manual(name="Treatment",
          breaks=c("TG1","TG2","TG3","TG4","TG5"),
          values=c("#4059AE", "#6A9AC3","#97D8C4","#AFD699","#F3B942")) +
      scale_fill_manual(name="Treatment",
          breaks=c("TG1","TG2","TG3","TG4","TG5"),
          values=c("#4059AE50", "#6A9AC350","#97D8C450","#AFD69950","#F3B94250")) +
      coord_cartesian(xlim = c(7, 35)) +
      theme_classic() +
      theme(
        strip.background = element_blank(),
        panel.grid.minor.x = element_line(size = .1, color = "grey"),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1)
      )

alpha_div %>%
  pivot_longer(-sample, names_to = "metric", values_to = "value") %>%
  left_join(., sample_metadata, by = join_by(sample == sample)) %>%
  filter(metric=="functional") %>%
      ggplot(aes(y = value, x = day, group=treatment, color=treatment, fill=treatment)) +
      geom_point() +
      geom_smooth(method="loess",se=TRUE)+
      scale_color_manual(name="Treatment",
          breaks=c("TG1","TG2","TG3","TG4","TG5"),
          values=c("#4059AE", "#6A9AC3","#97D8C4","#AFD699","#F3B942")) +
      scale_fill_manual(name="Treatment",
          breaks=c("TG1","TG2","TG3","TG4","TG5"),
          values=c("#4059AE50", "#6A9AC350","#97D8C450","#AFD69950","#F3B94250")) +
      coord_cartesian(xlim = c(7, 35)) +
      theme_classic() +
      theme(
        strip.background = element_blank(),
        panel.grid.minor.x = element_line(size = .1, color = "grey"),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1)
      )

4.5 Alpha diversity vs. MCI

alpha <- alpha_div %>%
  pivot_longer(-sample, names_to = "metric", values_to = "alpha") %>%
  left_join(., sample_metadata, by = join_by(sample == sample)) %>%
  filter(metric=="richness") %>%
  select(sample,alpha)

mci <- genome_gifts %>%
    to.elements(., GIFT_db) %>% 
    to.functions(., GIFT_db) %>% 
    to.community(., genome_counts_filt %>%
                    mutate_at(vars(-genome),~./sum(.)) %>%
                   column_to_rownames(var="genome"), GIFT_db) %>% 
    as.data.frame() %>% 
    rownames_to_column(var="sample") %>% 
    pivot_longer(-sample, names_to = "trait", values_to = "mci") %>%
    left_join(sample_metadata, by = join_by(sample == sample)) %>% 
    group_by(sample,treatment,day) %>% 
    summarise(mci=mean(mci)) %>% 
    filter(treatment != "TG0") 

inner_join(alpha,mci,by="sample") %>%
      ggplot(aes(y = alpha, x = mci)) +
      geom_smooth(method = "glm", 
                  formula = y~x, 
                  method.args = list(family = gaussian(link = 'log')),
                  color="#999999") +
      geom_point(aes(color=treatment)) +
      scale_color_manual(name="Treatment",
      breaks=c("TG1","TG2","TG3","TG4","TG5"),
      values=c("#4059AE", "#6A9AC3","#97D8C4","#AFD699","#F3B942")) +
      theme_classic() + 
        theme(legend.position = "none")