import pandas as pd, numpy as np
from scipy import stats

otu_16s = pd.read_csv('data/16S/otu_table_rarefied.tsv', sep='\t', index_col=0)
otu_its = pd.read_csv('data/ITS/otu_table_rarefied.tsv', sep='\t', index_col=0)
meta = pd.read_csv('data/sample_metadata.tsv', sep='\t').set_index('sample_id')
group_order = ['Base', 'L', 'LC', 'L-L', 'LC-L', 'L-LC', 'LC-LC']

def observed(r): return (r > 0).sum()
def shannon(r):
    r = r[r > 0]; p = r / r.sum(); return -np.sum(p * np.log(p))
def simpson(r):
    r = r[r > 0]; p = r / r.sum(); return 1 - np.sum(p**2)
def pielou(r):
    h = shannon(r); s = observed(r)
    return h / np.log(s) if s > 1 else 0
def chao1(r):
    r = r[r > 0]; s = len(r); f1 = (r==1).sum(); f2 = (r==2).sum()
    if f2 == 0: return s + f1*(f1-1)/2 if f1 > 0 else s
    return s + f1**2 / (2*f2)

def compute_alpha(otu_df, marker):
    rows = []
    for s in otu_df.index:
        r = otu_df.loc[s]
        rows.append({'sample_id': s, 'marker': marker, 'observed_otus': observed(r),
                      'shannon': shannon(r), 'simpson': simpson(r),
                      'pielou_evenness': pielou(r), 'chao1': chao1(r)})
    df = pd.DataFrame(rows)
    df['group_label'] = df['sample_id'].map(meta['group_label'])
    return df

a16 = compute_alpha(otu_16s, '16S')
aits = compute_alpha(otu_its, 'ITS')
a16.to_csv('data/16S/alpha_diversity.tsv', sep='\t', index=False)
aits.to_csv('data/ITS/alpha_diversity.tsv', sep='\t', index=False)

for marker, df in [('16S', a16), ('ITS', aits)]:
    print(f"\n{marker} Alpha Diversity by Group:")
    for m in ['observed_otus', 'shannon', 'simpson']:
        print(f"  {m}:")
        for g in group_order:
            v = df[df['group_label']==g][m]
            if len(v) > 0: print(f"    {g:6s}: {v.mean():.2f} ± {v.std():.2f}")

16S Alpha Diversity by Group:
  observed_otus:
    Base  : 1594.33 ± 11.72
    L     : 163.33 ± 3.06
    LC    : 133.00 ± 4.36
    L-L   : 162.33 ± 2.52
    LC-L  : 177.67 ± 1.53
    L-LC  : 154.67 ± 11.93
    LC-LC : 153.00 ± 4.36
  shannon:
    Base  : 6.46 ± 0.02
    L     : 3.16 ± 0.01
    LC    : 2.41 ± 0.00
    L-L   : 3.19 ± 0.10
    LC-L  : 3.05 ± 0.06
    L-LC  : 2.85 ± 0.31
    LC-LC : 2.47 ± 0.05
  simpson:
    Base  : 1.00 ± 0.00
    L     : 0.92 ± 0.00
    LC    : 0.81 ± 0.00
    L-L   : 0.90 ± 0.01
    LC-L  : 0.88 ± 0.02
    L-LC  : 0.88 ± 0.03
    LC-LC : 0.83 ± 0.01

ITS Alpha Diversity by Group:
  observed_otus:
    Base  : 253.00 ± 19.97
    L     : 8.00 ± 1.00
    LC    : 6.33 ± 0.58
    L-L   : 6.00 ± 2.83
    LC-L  : 3.33 ± 0.58
    L-LC  : 4.33 ± 3.21
    LC-LC : 6.67 ± 1.53
  shannon:
    Base  : 3.50 ± 0.33
    L     : 1.37 ± 0.49
    LC    : 0.62 ± 0.17
    L-L   : 0.37 ± 0.50
    LC-L  : 0.17 ± 0.14
    L-LC  : 0.05 ± 0.09
    LC-LC : 0.49 ± 0.32
  simpson:
    Base  : 0.92 ± 0.03
    L     : 0.70 ± 0.16
    LC    : 0.34 ± 0.10
    L-L   : 0.21 ± 0.30
    LC-L  : 0.08 ± 0.08
    L-LC  : 0.02 ± 0.03
    LC-LC : 0.29 ± 0.23

print("Kruskal-Wallis across all 7 groups:")
for marker, df in [('16S', a16), ('ITS', aits)]:
    print(f"\n  {marker}:")
    for m in ['observed_otus', 'shannon', 'simpson', 'pielou_evenness', 'chao1']:
        groups = [df[df['group_label']==g][m].values for g in group_order if len(df[df['group_label']==g]) > 0]
        h, p = stats.kruskal(*groups)
        sig = '*' if p < 0.05 else ''
        print(f"    {m:20s}: H={h:.2f}, p={p:.4e} {sig}")

Kruskal-Wallis across all 7 groups:

  16S:
    observed_otus       : H=18.13, p=5.9273e-03 *
    shannon             : H=18.23, p=5.6738e-03 *
    simpson             : H=18.56, p=4.9694e-03 *
    pielou_evenness     : H=18.63, p=4.8324e-03 *
    chao1               : H=16.69, p=1.0482e-02 *

  ITS:
    observed_otus       : H=13.10, p=4.1448e-02 *
    shannon             : H=15.84, p=1.4623e-02 *
    simpson             : H=15.67, p=1.5659e-02 *
    pielou_evenness     : H=15.78, p=1.4979e-02 *
    chao1               : H=13.64, p=3.3866e-02 *

import matplotlib; matplotlib.use('Agg')
import matplotlib.pyplot as plt

colors = ['#4477AA', '#EE6677', '#CCBB44', '#228833', '#66CCEE', '#AA3377', '#BBBBBB']
metrics = [('observed_otus','Observed OTUs'),('shannon','Shannon'),('simpson','Simpson'),('chao1','Chao1')]

for marker, df in [('16S', a16), ('ITS', aits)]:
    fig, axes = plt.subplots(1, 4, figsize=(18, 5))
    for ax, (m, label) in zip(axes, metrics):
        data = [df[df['group_label']==g][m].values for g in group_order]
        bp = ax.boxplot(data, tick_labels=group_order, patch_artist=True, widths=0.6)
        for patch, c in zip(bp['boxes'], colors):
            patch.set_facecolor(c); patch.set_alpha(0.7)
        for i, d in enumerate(data):
            ax.scatter([i+1]*len(d), d, color=colors[i], edgecolor='black', s=40, zorder=5)
        ax.set_ylabel(label); ax.set_xticklabels(group_order, rotation=45, ha='right')
        ax.grid(axis='y', alpha=0.3)
    fig.suptitle(f'{marker} Alpha Diversity', fontsize=14, fontweight='bold')
    plt.tight_layout()
    plt.savefig(f'figures/{marker}_alpha_diversity.png', dpi=150, bbox_inches='tight')
    plt.close()
print("Saved alpha diversity figures")

Saved alpha diversity figures

Nb03 Alpha Diversity

NB03: Alpha Diversity¶

Kruskal-Wallis Test (all 7 groups)¶

Alpha Diversity Figures¶