import os, json, time
import pandas as pd, numpy as np, requests
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from scipy.stats import mannwhitneyu
from pathlib import Path
DATA = Path('../data'); FIG = Path('../figures')
pd.set_option('display.max_columns', 50); pd.set_option('display.width', 240)

master = pd.read_csv(DATA / 'census_master_summary.tsv', sep='\t')
res = pd.read_csv(DATA / 'resolved_compounds.tsv', sep='\t')
print('master compounds:', len(master), '| callable:', int(master['callable'].sum()),
      '| by source:', dict(master['source_short'].value_counts()))

master compounds: 83 | callable: 9 | by source: {'groundwater': np.int64(59), 'necromass': np.int64(24)}

PROPS = ['MolecularWeight','XLogP','TPSA','HBondDonorCount','HBondAcceptorCount',
         'RotatableBondCount','Complexity','HeavyAtomCount','Charge']
cache_path = DATA / 'pubchem_properties_cache.json'
cache = json.loads(cache_path.read_text()) if cache_path.exists() else {}
cids = [int(c) for c in res['cid'].dropna().unique().tolist()]
todo = [c for c in cids if str(c) not in cache]
def fetch_batch(batch):
    url = ('https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/'
           + ','.join(map(str, batch)) + '/property/' + ','.join(PROPS) + '/JSON')
    r = requests.get(url, timeout=30)
    if r.status_code != 200: return
    for rec in r.json()['PropertyTable']['Properties']:
        cache[str(rec['CID'])] = {p: rec.get(p) for p in PROPS}
for i in range(0, len(todo), 50):
    fetch_batch(todo[i:i+50]); time.sleep(0.25)
cache_path.write_text(json.dumps(cache))
props = pd.DataFrame([{'cid': int(k), **v} for k, v in cache.items()])
for p in PROPS: props[p] = pd.to_numeric(props[p], errors='coerce')
print('properties cached for %d/%d CIDs' % (props['cid'].notna().sum(), len(cids)))
pc = res[['compound_id','cid']].dropna().copy(); pc['cid'] = pc['cid'].astype(int)
phys = master.merge(pc, on='compound_id', how='left').merge(props, on='cid', how='left')
phys.to_csv(DATA / 'compound_physicochem.tsv', sep='\t', index=False)
print('wrote data/compound_physicochem.tsv', phys.shape)

properties cached for 83/83 CIDs
wrote data/compound_physicochem.tsv (83, 25)

def contrast(df, group_col, a_label, b_label, props=PROPS):
    A = df[df[group_col] == a_label]; B = df[df[group_col] == b_label]
    rows = []
    for p in props:
        xa = A[p].dropna(); xb = B[p].dropna()
        if len(xa) < 2 or len(xb) < 2: continue
        try: _, pv = mannwhitneyu(xa, xb, alternative='two-sided')
        except ValueError: pv = np.nan
        rows.append(dict(prop=p, med_a=xa.median(), med_b=xb.median(),
                         dir=('a>b' if xa.median() > xb.median() else 'a<b'), p_dir=pv))
    return pd.DataFrame(rows)

phys['call_lbl'] = np.where(phys['callable'], 'callable', 'dark')
cvd = contrast(phys, 'call_lbl', 'callable', 'dark')
print('=== callable (n=%d) vs dark (n=%d) — DIRECTIONAL ONLY, underpowered ===' %
      (int(phys['callable'].sum()), int((~phys['callable']).sum())))
print(cvd.to_string(index=False, formatters={'med_a':'{:.1f}'.format,'med_b':'{:.1f}'.format,
      'p_dir':'{:.3f}'.format}))
gvn = contrast(phys, 'source_short', 'groundwater', 'necromass')
print('\n=== groundwater (n=59) vs necromass (n=24) ===')
print(gvn.to_string(index=False, formatters={'med_a':'{:.1f}'.format,'med_b':'{:.1f}'.format,
      'p_dir':'{:.3f}'.format}))

=== callable (n=9) vs dark (n=74) — DIRECTIONAL ONLY, underpowered ===
              prop med_a med_b dir p_dir
   MolecularWeight 152.1 178.7 a<b 0.066
             XLogP   1.5   2.0 a<b 0.281
              TPSA  57.5  41.4 a>b 0.093
   HBondDonorCount   2.0   1.0 a>b 0.086
HBondAcceptorCount   3.0   2.0 a>b 0.142
RotatableBondCount   2.0   2.0 a<b 0.935
        Complexity 133.0 207.5 a<b 0.034
    HeavyAtomCount  11.0  13.0 a<b 0.057
            Charge   0.0   0.0 a<b 1.000

=== groundwater (n=59) vs necromass (n=24) ===
              prop med_a med_b dir p_dir
   MolecularWeight 174.2 178.2 a<b 0.265
             XLogP   2.0   2.0 a<b 0.144
              TPSA  46.3  43.5 a>b 0.441
   HBondDonorCount   1.0   1.0 a<b 0.640
HBondAcceptorCount   2.0   2.5 a<b 0.755
RotatableBondCount   2.0   2.0 a<b 0.862
        Complexity 203.0 193.0 a>b 0.495
    HeavyAtomCount  13.0  13.0 a<b 0.185
            Charge   0.0   0.0 a<b 1.000

fig, axes = plt.subplots(2, 3, figsize=(13, 7.5))
panel = ['MolecularWeight','XLogP','TPSA','HBondDonorCount','Complexity','RotatableBondCount']
for ax, p in zip(axes.ravel(), panel):
    groups = [phys[phys['call_lbl']==g][p].dropna() for g in ['callable','dark']]
    ax.boxplot(groups, labels=['callable\n(n=%d)'%len(groups[0]),'dark\n(n=%d)'%len(groups[1])],
               widths=0.6, showfliers=False)
    for i, gv in enumerate(groups, 1):
        ax.scatter(np.random.normal(i, 0.05, len(gv)), gv, s=12, alpha=0.5,
                   color=['#2b8cbe','#b30000'][i-1])
    ax.set_title(p, fontsize=10)
fig.suptitle('Physicochemistry: callable vs organism-dark (descriptive; n=9 vs 74)', fontsize=12)
fig.tight_layout(); fig.savefig(FIG / '09a_physicochem_callable_dark.png', dpi=150)
print('saved 09a_physicochem_callable_dark.png'); plt.close(fig)

/tmp/ipykernel_96825/904434457.py:5: MatplotlibDeprecationWarning: The 'labels' parameter of boxplot() has been renamed 'tick_labels' since Matplotlib 3.9; support for the old name will be dropped in 3.11.
  ax.boxplot(groups, labels=['callable\n(n=%d)'%len(groups[0]),'dark\n(n=%d)'%len(groups[1])],
/tmp/ipykernel_96825/904434457.py:5: MatplotlibDeprecationWarning: The 'labels' parameter of boxplot() has been renamed 'tick_labels' since Matplotlib 3.9; support for the old name will be dropped in 3.11.
  ax.boxplot(groups, labels=['callable\n(n=%d)'%len(groups[0]),'dark\n(n=%d)'%len(groups[1])],
/tmp/ipykernel_96825/904434457.py:5: MatplotlibDeprecationWarning: The 'labels' parameter of boxplot() has been renamed 'tick_labels' since Matplotlib 3.9; support for the old name will be dropped in 3.11.
  ax.boxplot(groups, labels=['callable\n(n=%d)'%len(groups[0]),'dark\n(n=%d)'%len(groups[1])],
/tmp/ipykernel_96825/904434457.py:5: MatplotlibDeprecationWarning: The 'labels' parameter of boxplot() has been renamed 'tick_labels' since Matplotlib 3.9; support for the old name will be dropped in 3.11.
  ax.boxplot(groups, labels=['callable\n(n=%d)'%len(groups[0]),'dark\n(n=%d)'%len(groups[1])],
/tmp/ipykernel_96825/904434457.py:5: MatplotlibDeprecationWarning: The 'labels' parameter of boxplot() has been renamed 'tick_labels' since Matplotlib 3.9; support for the old name will be dropped in 3.11.
  ax.boxplot(groups, labels=['callable\n(n=%d)'%len(groups[0]),'dark\n(n=%d)'%len(groups[1])],
/tmp/ipykernel_96825/904434457.py:5: MatplotlibDeprecationWarning: The 'labels' parameter of boxplot() has been renamed 'tick_labels' since Matplotlib 3.9; support for the old name will be dropped in 3.11.
  ax.boxplot(groups, labels=['callable\n(n=%d)'%len(groups[0]),'dark\n(n=%d)'%len(groups[1])],

saved 09a_physicochem_callable_dark.png

# bioavailability ceiling: logP vs TPSA scatter, sized by MW, coloured by source, marked callable
fig, ax = plt.subplots(figsize=(9, 6.5))
for src, col in [('groundwater','#1f78b4'), ('necromass','#33a02c')]:
    s = phys[phys['source_short']==src]
    ax.scatter(s['XLogP'], s['TPSA'], s=np.clip(s['MolecularWeight']/2, 10, 300),
               alpha=0.55, color=col, edgecolor='k', linewidth=0.3, label=src)
cal = phys[phys['callable']]
ax.scatter(cal['XLogP'], cal['TPSA'], s=60, facecolors='none', edgecolors='red',
           linewidths=1.6, label='callable')
ax.axvspan(-100, 0, color='grey', alpha=0.08); ax.axvspan(5, 100, color='grey', alpha=0.08)
ax.axhline(140, color='grey', ls='--', lw=0.8)
ax.set_xlim(phys['XLogP'].min()-1, phys['XLogP'].max()+1)
ax.set_ylim(-5, max(160, phys['TPSA'].max()+10))
ax.set_xlabel('XLogP (lipophilicity)'); ax.set_ylabel('TPSA (polar surface area, A^2)')
ax.set_title('Bioavailability space (marker size = MW). Shaded/dashed = transport-unfavourable')
ax.legend(loc='upper right', fontsize=9)
fig.tight_layout(); fig.savefig(FIG / '09a_bioavailability_space.png', dpi=150)
print('saved 09a_bioavailability_space.png'); plt.close(fig)

saved 09a_bioavailability_space.png

pop = pd.read_csv(DATA / 'compound_organism_predictions.tsv', sep='\t')
vers = pop.groupby('genome_id')['compound_id'].nunique()
dist = vers.value_counts().sort_index()
print('chassis versatility (catabolic capacities per genome, of 8):')
for k, v in dist.items(): print(f'  {k}: {v:4d} genomes')
n_spec = int((vers == 1).sum()); n_gen = int((vers >= 3).sum())
print(f'\nspecialists (1 capacity): {n_spec}  |  generalists (>=3): {n_gen}')
# generalist taxonomy
tax = pop[['genome_id','taxon_name']].drop_duplicates('genome_id').set_index('genome_id')
gen_ids = vers[vers >= 3].index
gen_tax = tax.loc[gen_ids].copy()
gen_tax['genus'] = gen_tax['taxon_name'].astype(str).str.split().str[0]
print('\ngeneralist chassis by genus:')
print(gen_tax['genus'].value_counts().to_string())
# provenance audit
print('\nprovenance of callable evidence (tier of each genome-compound call):')
print(pop['tier'].value_counts().to_string())
print('-> 100% catabolic-direction (NB03 filter); biosynthetic-only signatures were excluded')
print('   and fall in the dark set (see Part 4 only_biosynthetic_signatures bucket).')

chassis versatility (catabolic capacities per genome, of 8):
  1:  675 genomes
  2:  107 genomes
  3:   13 genomes
  4:    5 genomes

specialists (1 capacity): 675  |  generalists (>=3): 18

generalist chassis by genus:
genus
Paraburkholderia    5
Burkholderia        4
Hydrogenophaga      2
Burkholderiaceae    1
Cupriavidus         1
Myxococcaceae       1
Caballeronia        1
Polaromonas         1
Pigmentiphaga       1
Alcaligenes         1

provenance of callable evidence (tier of each genome-compound call):
tier
T2_pathway      846
T3_signature    102
-> 100% catabolic-direction (NB03 filter); biosynthetic-only signatures were excluded
   and fall in the dark set (see Part 4 only_biosynthetic_signatures bucket).

# per-compound chassis breadth: how many genomes / genera carry each capacity
pop['genus'] = pop['taxon_name'].astype(str).str.split().str[0]
cap = (pop.groupby('name')
       .agg(n_genomes=('genome_id','nunique'), n_genera=('genus','nunique'))
       .sort_values('n_genomes', ascending=False))
print('capacity breadth across chassis:')
print(cap.to_string())

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(13, 5))
ax1.bar(dist.index, dist.values, color='#41b6c4', edgecolor='k', linewidth=0.4)
ax1.set_xlabel('catabolic capacities per genome (of 8)'); ax1.set_ylabel('genomes')
ax1.set_title('Chassis versatility (n=%d genomes)' % len(vers))
ax1.set_yscale('log')
cap_s = cap.sort_values('n_genomes')
ax2.barh(cap_s.index, cap_s['n_genomes'], color='#fc8d59', edgecolor='k', linewidth=0.4)
ax2.set_xlabel('genomes carrying capacity'); ax2.set_title('Per-compound chassis breadth')
for i, (n_g, nm) in enumerate(zip(cap_s['n_genomes'], cap_s.index)):
    ax2.text(n_g+3, i, str(int(n_g)), va='center', fontsize=8)
fig.tight_layout(); fig.savefig(FIG / '09b_chassis.png', dpi=150)
print('saved 09b_chassis.png'); plt.close(fig)

capacity breadth across chassis:
                       n_genomes  n_genera
name                                      
3-hydroxybenzoic acid        296        53
salicylic acid               197        28
4-hydroxybenzaldehyde        175        43
TEREPHTHALIC ACID             97        21
phthalic acid                 81        22
Phenylethylamine              81        13
xanthine                      19        11
Abscisic acid                  2         2

saved 09b_chassis.png

# denominator: all genomes per genus in the depot is not available offline; use the
# phylo map which carries genus + n_genomes per (compound,taxon). Build genus x compound
# prevalence from the predictions (carriers) over genus genome totals from the predictions.
genus_tot = pop.groupby('genus')['genome_id'].nunique()
big = genus_tot[genus_tot >= 5].index  # genera with >=5 sampled genomes
carr = (pop[pop['genus'].isin(big)]
        .groupby(['genus','name'])['genome_id'].nunique().unstack(fill_value=0))
frac = carr.div(genus_tot.loc[carr.index], axis=0)
frac = frac.loc[genus_tot.loc[frac.index].sort_values(ascending=False).index]
print('genus x compound carrier fraction (genera with >=5 sampled genomes):')
print((frac*100).round(0).astype(int).to_string())
# conservation summary: per compound, max within-genus prevalence
print('\nper-compound peak within-genus conservation:')
for c in frac.columns:
    top = frac[c].sort_values(ascending=False).head(1)
    print(f'  {c:24s} peak {top.iloc[0]*100:3.0f}% in {top.index[0]}')

genus x compound carrier fraction (genera with >=5 sampled genomes):
name               3-hydroxybenzoic acid  4-hydroxybenzaldehyde  Phenylethylamine  TEREPHTHALIC ACID  phthalic acid  salicylic acid  xanthine
genus                                                                                                                                        
Cupriavidus                           52                     12                41                  0              1               1         0
Pseudomonas                           32                     12                 0                  0              0              85         0
Variovorax                             5                      2                 0                 68              0              33         0
Novosphingobium                        0                    100                 0                  0              0               0         0
Caulobacter                           91                      0                 0                  0              0               6         3
Paenarthrobacter                       0                      0                 0                  0            100               0         0
environmental                         28                      9                12                 25              6              22        12
Paraburkholderia                      61                     87                17                  4             17               0         0
Duganella                             39                      0                 0                  0              0              61         0
Unknown                               37                     16                11                 21             11              16         5
Acidovorax                            62                     12                12                  6              0              62         0
Castellaniella                       100                      0                 0                  0              0               0         0
Pseudomonadaceae                       8                     15                 0                  0              0              85         0
Comamonas                             92                     85                 0                  0              0               0         8
Hylemonella                          100                      0                 0                  8              0               0         0
Sphingobium                            0                     92                 0                  0              0               8         0
Rhizobium                             58                      0                 0                  0              0               0        42
Rhodoferax                            18                      9                 0                  0              0              73         0
Burkholderia                          70                     50                60                 30              0               0         0
Collimonas                             0                     20                 0                 70              0              30         0
Ensifer                               50                      0                 0                  0             50               0         0
Bosea                                100                     25                 0                  0             12               0         0
Pseudarthrobacter                      0                      0                 0                  0            100               0         0
Curvibacter                            0                      0               100                  0              0               0         0
Ferrovibrio                            0                      0                 0                100              0               0         0
Xylophilus                             0                      0                 0                  0              0             100         0
Streptomyces                           0                     67                 0                  0             50               0         0
Alcaligenes                           20                      0               100                  0            100               0         0
Achromobacter                         40                     60               100                  0              0               0         0
Escherichia                            0                      0                 0                  0              0             100         0
Burkholderiaceae                      20                     20                 0                 40             20              40         0
Mucilaginibacter                      80                      0                 0                  0              0               0        20
Hydrogenophaga                        40                     40                 0                 80              0              20         0
Ramlibacter                           20                      0                 0                 80              0               0         0
Rhodococcus                            0                     20                 0                  0             80               0         0

per-compound peak within-genus conservation:
  3-hydroxybenzoic acid    peak 100% in Bosea
  4-hydroxybenzaldehyde    peak 100% in Novosphingobium
  Phenylethylamine         peak 100% in Curvibacter
  TEREPHTHALIC ACID        peak 100% in Ferrovibrio
  phthalic acid            peak 100% in Alcaligenes
  salicylic acid           peak 100% in Escherichia
  xanthine                 peak  42% in Rhizobium

fig, ax = plt.subplots(figsize=(10, max(4, 0.4*len(frac))))
im = ax.imshow(frac.values, cmap='YlGnBu', vmin=0, vmax=1, aspect='auto')
ax.set_xticks(range(len(frac.columns)))
ax.set_xticklabels(frac.columns, rotation=45, ha='right', fontsize=8)
ax.set_yticks(range(len(frac.index)))
ax.set_yticklabels([f'{g} (n={genus_tot[g]})' for g in frac.index], fontsize=8)
for i in range(len(frac.index)):
    for j in range(len(frac.columns)):
        v = frac.values[i, j]
        if v > 0: ax.text(j, i, f'{v*100:.0f}', ha='center', va='center', fontsize=6,
                          color='white' if v > 0.5 else 'black')
ax.set_title('Within-genus carrier fraction (%) — clade conservation of catabolic capacity')
fig.colorbar(im, ax=ax, shrink=0.6, label='fraction of genus genomes')
fig.tight_layout(); fig.savefig(FIG / '09c_clade_conservation.png', dpi=150)
print('saved 09c_clade_conservation.png'); plt.close(fig)

saved 09c_clade_conservation.png

dark = pd.read_csv(DATA / 'compound_organism_dark.tsv', sep='\t')
# review I1: lauric acid was reclassified callable (measured_fitness); drop any compound that
# is now callable in master so the dark taxonomy and the callable count stay consistent.
still_dark = set(master[~master['callable']]['compound_id'])
n_drop = (~dark['compound_id'].isin(still_dark)).sum()
dark = dark[dark['compound_id'].isin(still_dark)].copy()
print(f'(dropped {n_drop} now-callable compound(s) from the dark set after I1)')
BUCKET = {'only_biosynthetic_signatures':'biosynthesis-known/catabolism-unknown',
          'kegg_no_rxn_in_genomes':'KEGG-linked/no-reaction-in-genomes',
          'only_generic_rxns':'only-generic-reactions',
          'no_kegg':'fully-orphan (no KEGG)'}
dark['bucket'] = dark['organism_dark_reason'].map(BUCKET).fillna(dark['organism_dark_reason'])
dark = dark.merge(master[['compound_id','source_short','npc_pathway']], on='compound_id',
                  how='left', suffixes=('','_m'))
dark['mibig_consult'] = dark['organism_dark_reason'] == 'only_biosynthetic_signatures'
dark.to_csv(DATA / 'dark_matter_taxonomy.tsv', sep='\t', index=False)
print('dark compounds:', len(dark))
print('\nby bucket:')
print(dark['bucket'].value_counts().to_string())
print('\nbiosynthesis-known (consult MIBiG) compounds:')
bk = dark[dark['mibig_consult']]
print(bk[['name','npc_pathway','source_short']].to_string(index=False))

(dropped 1 now-callable compound(s) from the dark set after I1)
dark compounds: 74

by bucket:
bucket
KEGG-linked/no-reaction-in-genomes       33
fully-orphan (no KEGG)                   29
biosynthesis-known/catabolism-unknown     6
only-generic-reactions                    6

biosynthesis-known (consult MIBiG) compounds:
                name                     npc_pathway source_short
            Tyramine                       Alkaloids  groundwater
GUANIDINEACETIC ACID        Amino acids and Peptides  groundwater
       Cinnamic acid Shikimates and Phenylpropanoids  groundwater
        caffeic acid Shikimates and Phenylpropanoids  groundwater
       Palmitic Acid                     Fatty acids    necromass
            Farnesol                      Terpenoids    necromass

# source-stratified dark fraction
tot_src = master.groupby('source_short')['compound_id'].nunique()
dark_src = master[~master['callable']].groupby('source_short')['compound_id'].nunique()
frac_src = (dark_src / tot_src * 100).round(0)
print('dark fraction by source:')
for s in tot_src.index:
    print(f'  {s:12s} {int(dark_src.get(s,0))}/{int(tot_src[s])} dark ({frac_src.get(s,0):.0f}%)')
# bucket x source
bx = dark.groupby(['bucket','source_short']).size().unstack(fill_value=0)
print('\nbucket x source:'); print(bx.to_string())

dark fraction by source:
  groundwater  53/59 dark (90%)
  necromass    21/24 dark (88%)

bucket x source:
source_short                           groundwater  necromass
bucket                                                       
KEGG-linked/no-reaction-in-genomes              28          5
biosynthesis-known/catabolism-unknown            4          2
fully-orphan (no KEGG)                          16         13
only-generic-reactions                           5          1

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(13, 5))
bc = dark['bucket'].value_counts()
colors = ['#b30000','#e34a33','#fc8d59','#fdcc8a'][:len(bc)]
ax1.barh(range(len(bc)), bc.values, color=colors, edgecolor='k', linewidth=0.4)
ax1.set_yticks(range(len(bc))); ax1.set_yticklabels(bc.index, fontsize=8); ax1.invert_yaxis()
for i, v in enumerate(bc.values): ax1.text(v+0.3, i, str(int(v)), va='center', fontsize=9)
ax1.set_xlabel('compounds'); ax1.set_title('Dark-matter taxonomy (74 organism-dark compounds)')
bxp = bx.reindex(bc.index)
bottom = np.zeros(len(bxp))
for src, col in [('groundwater','#1f78b4'),('necromass','#33a02c')]:
    if src in bxp.columns:
        ax2.barh(range(len(bxp)), bxp[src].values, left=bottom, color=col, label=src,
                 edgecolor='k', linewidth=0.4)
        bottom += bxp[src].values
ax2.set_yticks(range(len(bxp))); ax2.set_yticklabels(bxp.index, fontsize=8); ax2.invert_yaxis()
ax2.set_xlabel('compounds'); ax2.set_title('Dark bucket by source'); ax2.legend(fontsize=9)
fig.tight_layout(); fig.savefig(FIG / '09d_dark_taxonomy.png', dpi=150)
print('saved 09d_dark_taxonomy.png'); plt.close(fig)

saved 09d_dark_taxonomy.png

print('=== NB09 OUTPUTS ===')
for f in ['compound_physicochem.tsv','dark_matter_taxonomy.tsv']:
    d = pd.read_csv(DATA / f, sep='\t'); print(f'  data/{f}: {d.shape}')
for f in ['09a_physicochem_callable_dark.png','09a_bioavailability_space.png',
          '09b_chassis.png','09c_clade_conservation.png','09d_dark_taxonomy.png']:
    print('  figures/' + f, '✓' if (FIG / f).exists() else 'MISSING')

=== NB09 OUTPUTS ===
  data/compound_physicochem.tsv: (83, 25)
  data/dark_matter_taxonomy.tsv: (74, 10)
  figures/09a_physicochem_callable_dark.png ✓
  figures/09a_bioavailability_space.png ✓
  figures/09b_chassis.png ✓
  figures/09c_clade_conservation.png ✓
  figures/09d_dark_taxonomy.png ✓

09 Deepening

NB09 — Deepening the census: bioavailability, chassis, clade conservation, dark-matter taxonomy¶

Part 1 — Physicochemistry & bioavailability¶

Part 2 — Utilizer→compound transpose: the chassis view¶

Part 3 — Per-clade conservation of utilization¶

Part 4 — Dark-matter taxonomy¶

NB09 summary¶