Nb06 Ecological Memory
Jupyter notebook from the Lignin Enrichment and Ecological Memory in Microbial Communities project.
NB06: Enrichment History and Ecological Memory¶
- Between-group Bray-Curtis distances
- Convergence vs divergence test
- Shared/unique OTUs across passages
- Community trajectory visualization
- Memory strength quantification
In [1]:
import pandas as pd, numpy as np
from scipy.spatial.distance import squareform, pdist
from scipy.spatial import ConvexHull
from matplotlib.patches import Polygon
import matplotlib; matplotlib.use('Agg')
import matplotlib.pyplot as plt
bc_16s = pd.read_csv('data/16S/bray_curtis_distance.tsv', sep='\t', index_col=0)
bc_its = pd.read_csv('data/ITS/bray_curtis_distance.tsv', sep='\t', index_col=0)
otu_16s = pd.read_csv('data/16S/otu_table_rarefied.tsv', sep='\t', index_col=0)
otu_its = pd.read_csv('data/ITS/otu_table_rarefied.tsv', sep='\t', index_col=0)
meta = pd.read_csv('data/sample_metadata.tsv', sep='\t').set_index('sample_id')
group_order = ['Base', 'L', 'LC', 'L-L', 'LC-L', 'L-LC', 'LC-LC']
Between-Group Distances¶
In [1]:
for marker, bc in [('16S', bc_16s), ('ITS', bc_its)]:
samps = bc.index.tolist()
groups = [meta.loc[s,'group_label'] for s in samps]
dm = []
for g1 in group_order:
row = []
for g2 in group_order:
i1 = [i for i,g in enumerate(groups) if g==g1]
i2 = [i for i,g in enumerate(groups) if g==g2]
if not i1 or not i2: row.append(np.nan); continue
d = [bc.iloc[i,j] for i in i1 for j in i2 if i!=j]
row.append(np.mean(d) if d else 0)
dm.append(row)
df = pd.DataFrame(dm, index=group_order, columns=group_order)
df.to_csv(f'data/{marker}/between_group_distances.tsv', sep='\t')
print(f"\n{marker} key distances:")
print(f" L-L vs LC-L (diff history, same R2=L): {df.loc['L-L','LC-L']:.3f}")
print(f" L-LC vs LC-LC (diff history, same R2=LC): {df.loc['L-LC','LC-LC']:.3f}")
print(f" L-L vs L-LC (same history=L, diff R2): {df.loc['L-L','L-LC']:.3f}")
print(f" LC-L vs LC-LC (same history=LC, diff R2): {df.loc['LC-L','LC-LC']:.3f}")
16S key distances: L-L vs LC-L (diff history, same R2=L): 0.507 L-LC vs LC-LC (diff history, same R2=LC): 0.486 L-L vs L-LC (same history=L, diff R2): 0.441 LC-L vs LC-LC (same history=LC, diff R2): 0.306 ITS key distances: L-L vs LC-L (diff history, same R2=L): 0.850 L-LC vs LC-LC (diff history, same R2=LC): 1.000 L-L vs L-LC (same history=L, diff R2): 1.000 LC-L vs LC-LC (same history=LC, diff R2): 0.959
Convergence Test and Memory Strength¶
In [1]:
for marker, bc in [('16S', bc_16s), ('ITS', bc_its)]:
samps = bc.index.tolist()
groups = [meta.loc[s,'group_label'] for s in samps]
g = lambda name: [i for i,x in enumerate(groups) if x==name]
d_hist_r2l = np.mean([bc.iloc[i,j] for i in g('L-L') for j in g('LC-L')])
d_hist_r2lc = np.mean([bc.iloc[i,j] for i in g('L-LC') for j in g('LC-LC')])
d_r2_l = np.mean([bc.iloc[i,j] for i in g('L-L') for j in g('L-LC')])
d_r2_lc = np.mean([bc.iloc[i,j] for i in g('LC-L') for j in g('LC-LC')])
d_ref = np.mean([bc.iloc[i,j] for i in g('Base') for j in range(len(samps)) if groups[j]!='Base'])
print(f"\n{marker}:")
print(f" History/R2 ratio (R2=L): {d_hist_r2l/d_r2_l:.2f}")
print(f" History/R2 ratio (R2=LC): {d_hist_r2lc/d_r2_lc:.2f}")
print(f" Memory index (R2=L): {d_hist_r2l/d_ref:.3f}")
print(f" Memory index (R2=LC): {d_hist_r2lc/d_ref:.3f}")
16S: History/R2 ratio (R2=L): 1.15 History/R2 ratio (R2=LC): 1.59 Memory index (R2=L): 0.513 Memory index (R2=LC): 0.492 ITS: History/R2 ratio (R2=L): 0.85 History/R2 ratio (R2=LC): 1.04 Memory index (R2=L): 0.876 Memory index (R2=LC): 1.031
OTU Retention Across Passages¶
In [1]:
for marker, otu in [('16S', otu_16s), ('ITS', otu_its)]:
print(f"\n{marker} OTU retention:")
go = {}
for g in group_order:
s = [x for x in otu.index if meta.loc[x,'group_label']==g]
go[g] = set(otu.columns[otu.loc[s].sum(axis=0) > 0])
for label, g0, g1, g2 in [("Base→L→L-L","Base","L","L-L"), ("Base→LC→LC-LC","Base","LC","LC-LC")]:
shared = len(go[g1] & go[g2])
pct = shared/len(go[g1])*100 if go[g1] else 0
print(f" {label}: {g1}={len(go[g1])} → {g2}={len(go[g2])}, shared={shared} ({pct:.0f}%)")
16S OTU retention: Base→L→L-L: L=190 → L-L=190, shared=154 (81%) Base→LC→LC-LC: LC=159 → LC-LC=176, shared=145 (91%) ITS OTU retention: Base→L→L-L: L=16 → L-L=10, shared=4 (25%) Base→LC→LC-LC: LC=12 → LC-LC=14, shared=5 (42%)
Community Trajectory Figure¶
In [1]:
def pcoa(dm):
n = dm.shape[0]; D2 = dm**2; H = np.eye(n)-np.ones((n,n))/n
B = -0.5*H@D2@H; ev, evec = np.linalg.eigh(B)
idx = np.argsort(ev)[::-1]; ev=ev[idx]; evec=evec[:,idx]
pos = ev>0; ev=ev[pos]; evec=evec[:,pos]
return evec*np.sqrt(ev), ev/ev.sum()*100
samps = bc_16s.index.tolist()
coords, var = pcoa(bc_16s.values)
cm = dict(zip(group_order, ['#4477AA','#EE6677','#CCBB44','#228833','#66CCEE','#AA3377','#BBBBBB']))
centroids = {}
for g in group_order:
idx = [i for i,s in enumerate(samps) if meta.loc[s,'group_label']==g]
centroids[g] = coords[idx,:2].mean(axis=0)
fig, ax = plt.subplots(figsize=(12, 9))
for g in group_order:
idx = [i for i,s in enumerate(samps) if meta.loc[s,'group_label']==g]
ax.scatter(coords[idx,0], coords[idx,1], c=cm[g], label=g, s=80, edgecolors='black', alpha=0.6)
for label, path, color, ls in [("Lignin", ['Base','L','L-L'], '#EE6677', '-'),
("Lignin→LC", ['Base','L','L-LC'], '#AA3377', '--'),
("Labile C", ['Base','LC','LC-LC'], '#CCBB44', '-'),
("Labile C→L", ['Base','LC','LC-L'], '#66CCEE', '--')]:
xs = [centroids[g][0] for g in path]; ys = [centroids[g][1] for g in path]
ax.plot(xs, ys, color=color, ls=ls, lw=2, alpha=0.8)
for g in group_order:
c = centroids[g]
ax.scatter(c[0], c[1], c=cm[g], s=200, edgecolors='black', lw=2, zorder=6, marker='D')
ax.annotate(g, c, fontsize=10, fontweight='bold', ha='center', va='bottom', xytext=(0,10), textcoords='offset points')
ax.set_xlabel(f'PC1 ({var[0]:.1f}%)'); ax.set_ylabel(f'PC2 ({var[1]:.1f}%)')
ax.set_title('16S Community Trajectories'); ax.legend(); ax.grid(alpha=0.3)
plt.tight_layout(); plt.savefig('figures/16S_community_trajectories.png', dpi=150); plt.close()
# Distance comparison bar plot
fig, (ax1,ax2) = plt.subplots(1, 2, figsize=(14, 6))
labels = ['Diff hist\nsame R2=L', 'Diff hist\nsame R2=LC', 'Same hist=L\ndiff R2', 'Same hist=LC\ndiff R2']
ax1.bar(range(4), [0.507, 0.486, 0.441, 0.306], color=['#EE6677','#EE6677','#4477AA','#4477AA'], edgecolor='black')
ax1.set_xticks(range(4)); ax1.set_xticklabels(labels, fontsize=8); ax1.set_ylabel('Mean BC')
ax1.set_title('16S: History > Current Condition'); ax1.set_ylim(0, 0.7)
ax2.bar(range(4), [0.850, 1.000, 1.000, 0.959], color=['#EE6677','#EE6677','#4477AA','#4477AA'], edgecolor='black')
ax2.set_xticks(range(4)); ax2.set_xticklabels(labels, fontsize=8); ax2.set_ylabel('Mean BC')
ax2.set_title('ITS: High replicate variability'); ax2.set_ylim(0, 1.15)
plt.tight_layout(); plt.savefig('figures/ecological_memory_distances.png', dpi=150); plt.close()
print("Saved trajectory and distance comparison figures")
Saved trajectory and distance comparison figures