Skip to contents

This is a brief guide on how to perform network analysis on microbiome count data using a similar strategy as performed in the Isala study using tidytacos.

The method clusters taxa based on their co-occurrence patterns across the samples.

As per usual the test dataset used is a human microbiome samples from the upper respiratory tract (URT), taken from this paper by De Boeck et al. It contains nose as well as nasopharynx samples. Most samples were taken using a swab method, but a minority was taking with the aspirate method.

Step 1: Creating a network

The first step in the analysis is creating a sparcc network from our taxonomic counts. For this the SpiecEasi package is used.

devtools::install_github('zdk123/SpiecEasi')
urt.net <- urt.nf %>%
  network(
    min_occurrence = 0.05, # the minimum percentage of samples that needs to have a taxa for it to be included in the network
    iter=20, # number of iterations of the outer loop of sparcc 
    inner_iter=10, # number of iterations of the inner loop of sparcc
    th=0.1, # correlations beneath this threshold are set to 0
    taxon_name=taxon_name)

# The network object is a list with two elements:
urt.net$Cor %>% head(2)# Correlation matrix
#>             [,1]        [,2]         [,3]        [,4]       [,5]       [,6]
#> [1,]  1.00000000 -0.02529089 -0.005801137  0.01516097 0.05986462 0.13709887
#> [2,] -0.02529089  1.00000000  0.030665238 -0.09163985 0.07760764 0.02597374
#>             [,7]        [,8]       [,9]       [,10]       [,11]       [,12]
#> [1,]  0.03071816 -0.08361307 0.10603959 -0.01524706 -0.09335080 -0.01788330
#> [2,] -0.05482966 -0.07955803 0.04296975 -0.01357689  0.08080531  0.05173014
#>            [,13]       [,14]       [,15]       [,16]       [,17]       [,18]
#> [1,] -0.01253595  0.03261072  0.01128624 -0.01741375 -0.01796809 0.308998423
#> [2,]  0.08546550 -0.03786864 -0.01991150 -0.03926070  0.02300305 0.006928278
#>            [,19]      [,20]        [,21]       [,22]      [,23]        [,24]
#> [1,] -0.06816382 0.04028286 -0.007349141 -0.01681414 0.04898641 -0.006463706
#> [2,] -0.08602481 0.08172153 -0.005358079  0.15721554 0.04186287  0.010783317
#>            [,25]       [,26]        [,27]       [,28]       [,29]       [,30]
#> [1,] -0.02932673 -0.04693648 -0.006408764 -0.08380906 -0.06849173 -0.09679495
#> [2,]  0.13554391 -0.10768510 -0.038097269  0.03864768  0.02522536  0.01495462
#>            [,31]      [,32]      [,33]       [,34]       [,35]       [,36]
#> [1,] 0.006154588 0.05529007  0.1787906 -0.01988641 -0.05646627  0.02435975
#> [2,] 0.124082370 0.12098392 -0.0169648 -0.07313655 -0.05252636 -0.01685275
#>            [,37]       [,38]       [,39]       [,40]       [,41]        [,42]
#> [1,]  0.05573063  0.27021927 -0.08233737  0.27717752  0.32687175 -0.020168237
#> [2,] -0.09939702 -0.04105809  0.10925131 -0.02920952 -0.04238792  0.004643286
#>            [,43]       [,44]       [,45]       [,46]       [,47]       [,48]
#> [1,] -0.13372090 -0.02110049 -0.11099158 -0.01791130 -0.05660303 -0.04285631
#> [2,] -0.03572358  0.04358548 -0.06357924 -0.02876454 -0.05476827  0.01373997
#>            [,49]      [,50]      [,51]      [,52]       [,53]        [,54]
#> [1,] -0.06429722 0.01189109 0.04833545 -0.0179242  0.01958685 -0.003913994
#> [2,] -0.01867380 0.09034714 0.01886988 -0.0573012 -0.01708747  0.026535551
#>             [,55]      [,56]        [,57]      [,58]      [,59]       [,60]
#> [1,] -0.011371461 0.09608058  0.008814129 0.08724145 0.02541135  0.05272446
#> [2,]  0.001016776 0.03030416 -0.003755814 0.09917971 0.06309768 -0.02141835
#>            [,61]       [,62]       [,63]       [,64]       [,65]       [,66]
#> [1,] 0.007878015 -0.07609307 -0.07515718 -0.08300833 -0.09037017  0.04006076
#> [2,] 0.072125421 -0.04605675 -0.09692066 -0.08869844  0.02120372 -0.06606351
#>            [,67]       [,68]       [,69]       [,70]       [,71]       [,72]
#> [1,] -0.07315554 -0.02608547 -0.02595041 -0.12857806 -0.02958168 -0.05685777
#> [2,]  0.11404219 -0.01836244  0.01069843 -0.05887034  0.04082737  0.08911415
#>            [,73]       [,74]      [,75]       [,76]      [,77]       [,78]
#> [1,] -0.02120009 0.015739045 0.03643760 0.006902477 0.01807948  0.14955167
#> [2,] -0.04430801 0.005226727 0.05359429 0.126778092 0.01243113 -0.09374607
#>           [,79]         [,80]      [,81]       [,82]        [,83]      [,84]
#> [1,] 0.32421126 -0.0203249517 0.06744001 -0.04116257 -0.007734448 0.09769376
#> [2,] 0.01576832 -0.0002167615 0.01841182  0.09279505  0.021557648 0.15094090
#>            [,85]      [,86]        [,87]       [,88]       [,89]        [,90]
#> [1,] -0.02065859 0.08824767 0.0002962855 -0.07892791 -0.01215166  0.023823958
#> [2,]  0.04044059 0.01583869 0.0379670151 -0.10003871 -0.01476276 -0.002405105
#>            [,91]       [,92]       [,93]       [,94]       [,95]       [,96]
#> [1,]  0.02127067 -0.04943964 -0.03150189  0.01665404 -0.02428586 -0.03945849
#> [2,] -0.04528266  0.10176461 -0.05882543 -0.04453790  0.05828724 -0.05052593
#>            [,97]       [,98]        [,99]      [,100]      [,101]      [,102]
#> [1,]  0.04364262 -0.02789056 -0.162288183 -0.06216937 -0.07105051 0.118253684
#> [2,] -0.01618929 -0.05109104 -0.007859769 -0.05034302 -0.14405057 0.001834282
#>           [,103]      [,104]     [,105]      [,106]      [,107]      [,108]
#> [1,] -0.04045566 -0.03360512 -0.0701020 -0.04941166 -0.01169133  0.02423498
#> [2,] -0.00955543 -0.07275669 -0.0178108 -0.04841629 -0.10043112 -0.07519695
#>           [,109]
#> [1,] -0.02405523
#> [2,] -0.04680215
# urt.net$Cov (covariance matrix)
# urt.net$names (names of the taxa on x and y-axes)

Step 2: Filtering the network matrix

Next up we filter the output correlation matrix to remove weak correlations. For this we use the filter_network function, which has the Matrix package as a dependency. We also install the igraph package, which can be used for the visualization of the network.

We remove weak correlations (here set to an absolute value of <=0.1) and remove the diagonal. This is all handled with the filter_network function.

urt.net.filt <- urt.net %>% filter_network(threshold = 0.1)

Step 3: Clustering of the network

Now the taxa in the network are clustered using an unsupervised clustering algorithm, the Markov Cluster Algorithm (MCL). The cluster_network function has the MCL package as a dependency. The algorithm cannot handle negative interactions so any correlation <0 will be set to 0.

urt.net.clust <- urt.net.filt %>% 
  cluster_network(min_n=5, visualize=T)


urt.net.clust %>% 
  group_by(cluster) %>% 
  summarize(n=n())
#> # A tibble: 5 × 2
#>   cluster     n
#>   <chr>   <int>
#> 1 c0         42
#> 2 c17         8
#> 3 c18         8
#> 4 c21        20
#> 5 c54        11

Using the new clustered taxa (“eigentaxa”) we can visualize the clusters in the microbiome profile.

urt.nf$taxa <- urt.nf$taxa %>% 
  left_join(urt.net.clust, by=c("taxon_name"="taxon")) 
urt.nf <- urt.nf %>%
  add_taxon_name() %>% 
  mutate_taxa(cluster= ifelse(is.na(cluster), taxon_name, cluster)) %>% 
  set_rank_names(c("cluster")) %>% 
  aggregate_taxa(rank="cluster")
urt.nf  %>% tacoplot_stack()


urt.net.clust %>% filter(cluster == unique(urt.net.clust$cluster)[[4]])
#> # A tibble: 20 × 2
#>    taxon            cluster
#>    <chr>            <chr>  
#>  1 Alloprevotella 3 c21    
#>  2 Campylobacter 2  c21    
#>  3 Fusobacterium 5  c21    
#>  4 Gemella          c21    
#>  5 Granulicatella 1 c21    
#>  6 Granulicatella 2 c21    
#>  7 Haemophilus 3    c21    
#>  8 Leptotrichia 1   c21    
#>  9 Megasphaera 1    c21    
#> 10 Neisseria 2      c21    
#> 11 Neisseria 4      c21    
#> 12 Prevotella_7 1   c21    
#> 13 Streptococcus 3  c21    
#> 14 Streptococcus 4  c21    
#> 15 Streptococcus 5  c21    
#> 16 Streptococcus 9  c21    
#> 17 Veillonella 1    c21    
#> 18 Veillonella 2    c21    
#> 19 Veillonella 4    c21    
#> 20 Veillonella 5    c21