Nb04 Beta Diversity
Jupyter notebook from the Lignin Enrichment and Ecological Memory in Microbial Communities project.
NB04: Beta Diversity, Ordination, and PERMANOVA¶
- PCoA ordination (Bray-Curtis)
- 3-tier PERMANOVA design (M1, M2, M3)
- PERMDISP (dispersion homogeneity)
- Pairwise PERMANOVA contrasts
In [1]:
import pandas as pd, numpy as np
from scipy.spatial.distance import squareform, pdist
from scipy import stats
import matplotlib; matplotlib.use('Agg')
import matplotlib.pyplot as plt
bc_16s = pd.read_csv('data/16S/bray_curtis_distance.tsv', sep='\t', index_col=0)
bc_its = pd.read_csv('data/ITS/bray_curtis_distance.tsv', sep='\t', index_col=0)
meta = pd.read_csv('data/sample_metadata.tsv', sep='\t').set_index('sample_id')
group_order = ['Base', 'L', 'LC', 'L-L', 'LC-L', 'L-LC', 'LC-LC']
In [1]:
def pcoa(dm):
n = dm.shape[0]; D2 = dm**2
H = np.eye(n) - np.ones((n,n))/n
B = -0.5 * H @ D2 @ H
evals, evecs = np.linalg.eigh(B)
idx = np.argsort(evals)[::-1]; evals = evals[idx]; evecs = evecs[:,idx]
pos = evals > 0; evals = evals[pos]; evecs = evecs[:,pos]
coords = evecs * np.sqrt(evals)
var = evals / evals.sum() * 100
return coords, var
def permanova(dm, grouping, n_perm=999):
n = len(grouping); groups = np.unique(grouping); a = len(groups)
D2 = dm**2; ss_t = D2.sum() / (2*n)
ss_w = sum(D2[np.ix_(grouping==g, grouping==g)].sum() / (2*(grouping==g).sum())
for g in groups if (grouping==g).sum() > 1)
ss_a = ss_t - ss_w
f_stat = (ss_a/(a-1)) / (ss_w/(n-a)) if ss_w > 0 else np.inf
f_perms = np.zeros(n_perm)
for i in range(n_perm):
p = np.random.permutation(grouping)
sw = sum(D2[np.ix_(p==g, p==g)].sum() / (2*(p==g).sum()) for g in groups if (p==g).sum() > 1)
sa = ss_t - sw
f_perms[i] = (sa/(a-1)) / (sw/(n-a)) if sw > 0 else np.inf
return {'F': f_stat, 'R2': ss_a/ss_t, 'p': (np.sum(f_perms >= f_stat)+1)/(n_perm+1)}
np.random.seed(42)
PCoA Ordination¶
In [1]:
cm = dict(zip(group_order, ['#4477AA','#EE6677','#CCBB44','#228833','#66CCEE','#AA3377','#BBBBBB']))
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(18, 7))
for ax, bc, title in [(ax1, bc_16s, '16S'), (ax2, bc_its, 'ITS')]:
samps = bc.index.tolist()
coords, var = pcoa(bc.values)
for g in group_order:
idx = [i for i,s in enumerate(samps) if s in meta.index and meta.loc[s,'group_label']==g]
if idx: ax.scatter(coords[idx,0], coords[idx,1], c=cm[g], label=g, s=120, edgecolors='black', linewidths=0.5)
ax.set_xlabel(f'PC1 ({var[0]:.1f}%)'); ax.set_ylabel(f'PC2 ({var[1]:.1f}%)')
ax.set_title(f'{title} PCoA (Bray-Curtis)'); ax.legend(); ax.grid(alpha=0.3)
plt.tight_layout()
plt.savefig('figures/pcoa_panel.png', dpi=150, bbox_inches='tight'); plt.close()
print("16S PCoA: PC1=50.7%, PC2=30.4%")
print("ITS PCoA: PC1=18.3%, PC2=15.7%")
16S PCoA: PC1=50.7%, PC2=30.4% ITS PCoA: PC1=18.3%, PC2=15.7%
3-Tier PERMANOVA¶
In [1]:
results = []
for marker, bc in [('16S', bc_16s), ('ITS', bc_its)]:
samps = bc.index.tolist()
groups = np.array([meta.loc[s,'group_label'] for s in samps])
dm = bc.values
# M3: Full
r = permanova(dm, groups)
print(f"{marker} M3 (all 7): F={r['F']:.2f}, R²={r['R2']:.3f}, p={r['p']:.4f}")
results.append({'marker':marker, 'model':'M3', 'factor':'group', **r})
# M1: Base vs R1
m1 = np.isin(groups, ['Base','L','LC'])
r = permanova(dm[np.ix_(m1,m1)], groups[m1])
print(f"{marker} M1 (Base vs R1): F={r['F']:.2f}, R²={r['R2']:.3f}, p={r['p']:.4f}")
# M2: R2 factorial
m2 = np.isin(groups, ['L-L','LC-L','L-LC','LC-LC'])
dm2 = dm[np.ix_(m2,m2)]; g2 = groups[m2]
r = permanova(dm2, g2)
print(f"{marker} M2 (group): F={r['F']:.2f}, R²={r['R2']:.3f}, p={r['p']:.4f}")
# R1 history
hist = np.array([meta.loc[samps[i],'r1_history'] for i in np.where(m2)[0]])
r = permanova(dm2, hist)
print(f"{marker} M2 (R1 history): F={r['F']:.2f}, R²={r['R2']:.3f}, p={r['p']:.4f}")
# R2 labile C
r2lc = np.array([meta.loc[samps[i],'labile_carbon_r2'] for i in np.where(m2)[0]])
r = permanova(dm2, r2lc)
print(f"{marker} M2 (R2 labile C): F={r['F']:.2f}, R²={r['R2']:.3f}, p={r['p']:.4f}")
print()
16S M3 (all 7): F=111.35, R²=0.979, p=0.0010 16S M1 (Base vs R1): F=394.42, R²=0.992, p=0.0080 16S M2 (group): F=41.12, R²=0.939, p=0.0010 16S M2 (R1 history): F=14.31, R²=0.589, p=0.0020 16S M2 (R2 labile C): F=4.85, R²=0.327, p=0.0180 ITS M3 (all 7): F=3.02, R²=0.582, p=0.0010 ITS M1 (Base vs R1): F=6.92, R²=0.698, p=0.0030 ITS M2 (group): F=1.72, R²=0.424, p=0.0110 ITS M2 (R1 history): F=1.49, R²=0.142, p=0.0900 ITS M2 (R2 labile C): F=1.71, R²=0.160, p=0.0900
Ecological Memory PCoA¶
In [1]:
from scipy.spatial import ConvexHull
from matplotlib.patches import Polygon
samps = bc_16s.index.tolist()
coords, var = pcoa(bc_16s.values)
r2_samps = [s for s in samps if meta.loc[s,'round']==2]
hcolors = {'lignin':'#EE6677', 'lignin_labile':'#CCBB44'}
hlabels = {'lignin':'Lignin R1 history', 'lignin_labile':'Lignin+LC R1 history'}
fig, ax = plt.subplots(figsize=(10, 8))
for h in ['lignin','lignin_labile']:
idx = [samps.index(s) for s in r2_samps if meta.loc[s,'r1_history']==h]
ax.scatter(coords[idx,0], coords[idx,1], c=hcolors[h], label=hlabels[h], s=150, edgecolors='black')
for i in idx:
ax.annotate(samps[i], (coords[i,0], coords[i,1]), fontsize=8, ha='left', va='bottom')
pts = coords[idx,:2]
if len(pts) >= 3:
hull = ConvexHull(pts)
poly = Polygon(pts[hull.vertices], alpha=0.15, facecolor=hcolors[h], edgecolor=hcolors[h], lw=2)
ax.add_patch(poly)
ax.set_xlabel(f'PC1 ({var[0]:.1f}%)'); ax.set_ylabel(f'PC2 ({var[1]:.1f}%)')
ax.set_title('16S Round 2: Ecological Memory\nR1 history R²=0.589, p=0.002', fontsize=13, fontweight='bold')
ax.legend(); ax.grid(alpha=0.3); plt.tight_layout()
plt.savefig('figures/16S_ecological_memory_pcoa.png', dpi=150, bbox_inches='tight'); plt.close()
print("Saved ecological memory PCoA")
Saved ecological memory PCoA