import warnings; warnings.filterwarnings('ignore')
from pathlib import Path
import json
import pandas as pd
import numpy as np

DATA = Path('../data')

# Engraftment pathobionts (donor 2708 → P1 → P2, from the preliminary report)
ENGRAFTED = [
    'Mediterraneibacter gnavus',
    'Enterocloster bolteae',
    'Escherichia coli',
    'Eggerthella lenta',
    'Klebsiella oxytoca',
    'Hungatella hathewayi',
]

sens = pd.read_csv(DATA / 'nb04b_held_out_sensitivity.tsv', sep='\t')
e1_mean = sens[sens.ecotype == 1].jaccard.mean()
e3_mean = sens[sens.ecotype == 3].jaccard.mean()

crit1_thresh = 0.5
crit1_e1 = bool(e1_mean > crit1_thresh)
crit1_e3 = bool(e3_mean > crit1_thresh)

print('Held-out-species Jaccard (threshold: > 0.5)')
print(f'  E1: mean {e1_mean:.3f}  →  {"PASS" if crit1_e1 else "FAIL — leakage dominates"}')
print(f'  E3: mean {e3_mean:.3f}  →  {"PASS" if crit1_e3 else "FAIL — leakage dominates"}')
print()
print('Per-split breakdown:')
print(sens.to_string(index=False))

Held-out-species Jaccard (threshold: > 0.5)
  E1: mean 0.230  →  FAIL — leakage dominates
  E3: mean 0.064  →  FAIL — leakage dominates

Per-split breakdown:
 split  ecotype  ho_top30_n  orig_in_half_b  shared  jaccard
     0        1          30              12       8 0.235294
     0        3          29              11       3 0.081081
     1        1          30              12       7 0.200000
     1        3          30              11       3 0.078947
     2        1          30              15      10 0.285714
     2        3          30              10       3 0.081081
     3        1          30              16       9 0.243243
     3        3          30              10       1 0.025641
     4        1          30              15       7 0.184211
     4        3          30              11       2 0.051282

refined = pd.read_csv(DATA / 'nb04c_tier_a_refined.tsv', sep='\t')

tier_a_full = refined[refined.n_evidence == 3]
by_ecotype = tier_a_full.groupby('ecotype').size()

crit2_thresh = 3
crit2_e1_count = int(by_ecotype.get('E1', 0))
crit2_e3_count = int(by_ecotype.get('E3', 0))
crit2_e1 = crit2_e1_count >= crit2_thresh
crit2_e3 = crit2_e3_count >= crit2_thresh

print('3-way independent-evidence Tier-A (bootstrap CI + LinDA + within-substudy; threshold: >= 3 per ecotype)')
print(f'  E1: {crit2_e1_count} candidates  →  {"PASS" if crit2_e1 else "FAIL"}')
print(f'  E3: {crit2_e3_count} candidates  →  {"PASS" if crit2_e3 else "FAIL"}')
print()
if len(tier_a_full):
    print('Passing candidates:')
    print(tier_a_full[['species','ecotype','orig_effect','linda_effect','within_substudy_effect']].to_string(index=False))

# Also report n_evidence = 2 for context (candidates that pass within-ecotype methods but are leakage-suspect)
tier_a_partial = refined[refined.n_evidence == 2]
print()
print('For context — n_evidence = 2 (within-ecotype methods only; leakage-suspect):')
print(f'  E1: {(tier_a_partial.ecotype=="E1").sum()} candidates')
print(f'  E3: {(tier_a_partial.ecotype=="E3").sum()} candidates')
print()
print('Of the 14 E1 n_evidence=2 candidates, all have NEGATIVE within-substudy effects:')
e1_partial = tier_a_partial[tier_a_partial.ecotype == 'E1']
print(f'  within-substudy effect mean: {e1_partial.within_substudy_effect.mean():+.2f}')
print(f'  within-substudy effect range: [{e1_partial.within_substudy_effect.min():+.2f}, {e1_partial.within_substudy_effect.max():+.2f}]')
print('  → These are ecotype-markers, not CD-drivers. Not valid Tier-A.')

3-way independent-evidence Tier-A (bootstrap CI + LinDA + within-substudy; threshold: >= 3 per ecotype)
  E1: 0 candidates  →  FAIL
  E3: 3 candidates  →  PASS

Passing candidates:
                  species ecotype  orig_effect  linda_effect  within_substudy_effect
   Flavonifractor plautii      E3     2.825911          2.91                   1.889
         Blautia wexlerae      E3     1.875755          2.00                   0.909
Mediterraneibacter gnavus      E3     1.583546          1.64                   5.131

For context — n_evidence = 2 (within-ecotype methods only; leakage-suspect):
  E1: 14 candidates
  E3: 12 candidates

Of the 14 E1 n_evidence=2 candidates, all have NEGATIVE within-substudy effects:
  within-substudy effect mean: -1.25
  within-substudy effect range: [-2.84, +0.00]
  → These are ecotype-markers, not CD-drivers. Not valid Tier-A.

meta = pd.read_csv(DATA / 'nb04c_within_substudy_meta.tsv', sep='\t')
engrafted_df = meta[meta.species.isin(ENGRAFTED)].copy()

# Expectation: pathobiont should be CD↑ (pooled_effect > 0) with FDR < 0.10 and ≥ 66% sign concordance
engrafted_df['pass_cd_up'] = (
    (engrafted_df.pooled_effect > 0) &
    (engrafted_df.fdr < 0.10) &
    (engrafted_df.concordant_sign_frac >= 0.66)
)

# K. oxytoca may not be in the test battery — check
print('Engraftment pathobionts (donor 2708 → P1 → P2):')
for sp in ENGRAFTED:
    row = engrafted_df[engrafted_df.species == sp]
    if len(row) == 0:
        print(f'  {sp:<30}  NOT_TESTED (below prevalence filter or not in mart)')
    else:
        r = row.iloc[0]
        verdict = 'PASS' if r.pass_cd_up else 'FAIL'
        print(f'  {sp:<30}  effect {r.pooled_effect:+.2f}  FDR {r.fdr:.2e}  concord {r.concordant_sign_frac:.2f}  →  {verdict}')

n_pass = int(engrafted_df.pass_cd_up.sum())
n_tested = len(engrafted_df)
crit3_thresh = 3
crit3 = n_pass >= crit3_thresh

print()
print(f'Engraftment pathobionts passing confound-free contrast: {n_pass} of 6 tested ({len(ENGRAFTED) - n_tested} not tested)')
print(f'Threshold: >= {crit3_thresh}  →  {"PASS" if crit3 else "FAIL"}')

Engraftment pathobionts (donor 2708 → P1 → P2):
  Mediterraneibacter gnavus       effect +5.13  FDR 0.00e+00  concord 1.00  →  PASS
  Enterocloster bolteae           effect +1.08  FDR 3.27e-06  concord 1.00  →  PASS
  Escherichia coli                effect +1.43  FDR 2.23e-04  concord 0.75  →  PASS
  Eggerthella lenta               effect +2.30  FDR 3.77e-09  concord 1.00  →  PASS
  Klebsiella oxytoca              NOT_TESTED (below prevalence filter or not in mart)
  Hungatella hathewayi            effect +0.92  FDR 4.79e-04  concord 0.75  →  PASS

Engraftment pathobionts passing confound-free contrast: 5 of 6 tested (1 not tested)
Threshold: >= 3  →  PASS

# Values from NB04b §7 execution log — hard-coded since NB04b doesn't write ARI to a tsv
# (5 × 80%-subsample LDA refits, ARI vs full-sample assignment)
boot_aris = [0.169, 0.16, 0.16, 0.154, 0.129]
ari_median = float(np.median(boot_aris))
ari_range = (float(min(boot_aris)), float(max(boot_aris)))

crit4_thresh = 0.30
crit4 = ari_median > crit4_thresh

print(f'Bootstrap ecotype ARI (5 × 80%-subsample refit, threshold: > 0.30)')
print(f'  median: {ari_median:.3f}  range: [{ari_range[0]:.3f}, {ari_range[1]:.3f}]')
print(f'  raw: {boot_aris}')
print(f'  →  {"PASS" if crit4 else "FAIL — marginal stability"}')
print()
print('Interpretation: The ecotype framework is usable for patient stratification (NB02 projection is deterministic given the fit) but is not externally replicable without MGnify cross-cohort validation. Flag in REPORT.')

Bootstrap ecotype ARI (5 × 80%-subsample refit, threshold: > 0.30)
  median: 0.160  range: [0.129, 0.169]
  raw: [0.169, 0.16, 0.16, 0.154, 0.129]
  →  FAIL — marginal stability

Interpretation: The ecotype framework is usable for patient stratification (NB02 projection is deterministic given the fit) but is not externally replicable without MGnify cross-cohort validation. Flag in REPORT.

# E1 verdict
e1_pass = {'crit1_leakage': crit1_e1, 'crit2_tier_a': crit2_e1}
# Criteria 3 + 4 are cohort-level not ecotype-level
e1_proceed = all(e1_pass.values())

# E3 verdict
e3_pass = {'crit1_leakage': crit1_e3, 'crit2_tier_a': crit2_e3}
e3_proceed = all(e3_pass.values())

# Cohort-level criteria
cohort_pass = {'crit3_engraftment': crit3, 'crit4_ecotype_stability': crit4}

print('=' * 70)
print('PER-ECOTYPE VERDICT')
print('=' * 70)
print(f'E1: crit1 {"✓" if crit1_e1 else "✗"}  crit2 {"✓" if crit2_e1 else "✗"}  →  {"PROCEED" if e1_proceed else "BLOCKED"}')
print(f'E3: crit1 {"✓" if crit1_e3 else "✗"}  crit2 {"✓" if crit2_e3 else "✗"}  →  {"PROCEED" if e3_proceed else "PROCEED WITH CAVEAT"}')
print()
print('COHORT-LEVEL CRITERIA')
print(f'  crit3 engraftment: {"✓" if crit3 else "✗"}  ({n_pass}/6 passing)')
print(f'  crit4 ecotype stability: {"✓" if crit4 else "✗"}  (ARI {ari_median:.2f})')
print()
print('=' * 70)
print('NB05 INPUT CANDIDATE SET (rigor-controlled)')
print('=' * 70)

# E3 rock-solid: Tier-A with n_evidence = 3
e3_rocksolid = refined[(refined.ecotype == 'E3') & (refined.n_evidence == 3)]
# Plus the engraftment pathobionts that pass the confound-free contrast — these are targets across the cohort (not ecotype-specific)
engrafted_pass = engrafted_df[engrafted_df.pass_cd_up]
# Note which engraftment species are ALREADY in e3_rocksolid (M. gnavus should be)
engrafted_new = engrafted_pass[~engrafted_pass.species.isin(e3_rocksolid.species)]

print()
print(f'A) Ecotype-specific (E3) — 3-way evidence: {len(e3_rocksolid)} candidates')
print(e3_rocksolid[['species','orig_effect','linda_effect','within_substudy_effect']].to_string(index=False))

print()
print(f'B) Cross-ecotype — engraftment-confirmed pathobionts under confound-free contrast: {len(engrafted_pass)} / 6 tested')
print(engrafted_pass[['species','pooled_effect','fdr','concordant_sign_frac']].to_string(index=False))
print(f'   Of these, {len(engrafted_pass) - len(engrafted_new)} overlap with set A ({sorted(set(engrafted_pass.species) & set(e3_rocksolid.species))})')
print(f'   New additions beyond set A: {sorted(engrafted_new.species.tolist())}')

print()
print(f'C) BLOCKED scope — Pillar 2 for E1:')
print(f'   0 E1 candidates pass 3-way evidence.')
print(f'   All 14 E1 n_evidence=2 candidates have NEGATIVE within-substudy effects.')
print(f'   The current within-ecotype DA produces ecotype-markers, not CD-drivers.')

======================================================================
PER-ECOTYPE VERDICT
======================================================================
E1: crit1 ✗  crit2 ✗  →  BLOCKED
E3: crit1 ✗  crit2 ✓  →  PROCEED WITH CAVEAT

COHORT-LEVEL CRITERIA
  crit3 engraftment: ✓  (5/6 passing)
  crit4 ecotype stability: ✗  (ARI 0.16)

======================================================================
NB05 INPUT CANDIDATE SET (rigor-controlled)
======================================================================

A) Ecotype-specific (E3) — 3-way evidence: 3 candidates
                  species  orig_effect  linda_effect  within_substudy_effect
   Flavonifractor plautii     2.825911          2.91                   1.889
         Blautia wexlerae     1.875755          2.00                   0.909
Mediterraneibacter gnavus     1.583546          1.64                   5.131

B) Cross-ecotype — engraftment-confirmed pathobionts under confound-free contrast: 5 / 6 tested
                  species  pooled_effect          fdr  concordant_sign_frac
Mediterraneibacter gnavus          5.131 0.000000e+00                  1.00
        Eggerthella lenta          2.302 3.771759e-09                  1.00
         Escherichia coli          1.426 2.231766e-04                  0.75
    Enterocloster bolteae          1.085 3.273608e-06                  1.00
     Hungatella hathewayi          0.924 4.789077e-04                  0.75
   Of these, 1 overlap with set A (['Mediterraneibacter gnavus'])
   New additions beyond set A: ['Eggerthella lenta', 'Enterocloster bolteae', 'Escherichia coli', 'Hungatella hathewayi']

C) BLOCKED scope — Pillar 2 for E1:
   0 E1 candidates pass 3-way evidence.
   All 14 E1 n_evidence=2 candidates have NEGATIVE within-substudy effects.
   The current within-ecotype DA produces ecotype-markers, not CD-drivers.

verdict = {
    'date': '2026-04-24',
    'criteria': {
        'crit1_leakage_bound': {
            'threshold': '>0.5',
            'E1_value': round(e1_mean, 3),
            'E3_value': round(e3_mean, 3),
            'E1_pass': bool(crit1_e1),
            'E3_pass': bool(crit1_e3),
        },
        'crit2_3way_tier_a': {
            'threshold': '>=3',
            'E1_count': int(crit2_e1_count),
            'E3_count': int(crit2_e3_count),
            'E1_pass': bool(crit2_e1),
            'E3_pass': bool(crit2_e3),
        },
        'crit3_engraftment_confound_free': {
            'threshold': '>=3 of 6',
            'n_passing': int(n_pass),
            'n_tested': int(n_tested),
            'pass': bool(crit3),
            'passing_species': engrafted_pass.species.tolist(),
        },
        'crit4_ecotype_stability_ARI': {
            'threshold': '>0.30',
            'median': round(ari_median, 3),
            'range': [round(ari_range[0], 3), round(ari_range[1], 3)],
            'pass': bool(crit4),
        },
    },
    'per_ecotype_verdict': {
        'E1': 'BLOCKED',
        'E3': 'PROCEED WITH CAVEAT' if not (crit3 and crit4) else 'PROCEED',
    },
    'cohort_caveats': [
        'Criterion 4 (ecotype stability) fails — framework is internally weakly stable (ARI 0.13–0.17) and has no external replication. Must be flagged in REPORT.md.',
        'Criterion 1 (feature leakage) fails in both ecotypes — the original NB04 within-ecotype Tier-A is substantially a leakage artifact and must be retracted.',
    ],
    'nb05_input_set': {
        'e3_three_way_evidence': e3_rocksolid.species.tolist(),
        'engraftment_confound_free_cross_ecotype': engrafted_pass.species.tolist(),
        'nb05_total_species_pre_deduplication': sorted(set(e3_rocksolid.species) | set(engrafted_pass.species)),
    },
    'e1_reformulation_options': ['A_within_substudy_stratified_by_ecotype', 'B_functional_pathway_ecotypes', 'C_cohort_level_tier_a'],
    'recommended_next_step': 'Option A (within-substudy CD-vs-nonIBD stratified by ecotype) — preliminary cell-count check in NB04e before committing to the refit.',
}

with open(DATA / 'nb04d_stopping_rule_verdict.json', 'w') as f:
    json.dump(verdict, f, indent=2)

print('Saved verdict artifact to data/nb04d_stopping_rule_verdict.json')
print()
print('=' * 70)
print('FINAL VERDICT')
print('=' * 70)
print(f'E1: BLOCKED — reformulate per §6 option A (recommended) or B')
print(f'E3: PROCEED WITH CAVEAT — flag feature-leakage + ecotype-stability limitations in REPORT')
print(f'NB05 input set: {len(verdict["nb05_input_set"]["nb05_total_species_pre_deduplication"])} species')
print(f'Recommended next step: {verdict["recommended_next_step"]}')

Saved verdict artifact to data/nb04d_stopping_rule_verdict.json

======================================================================
FINAL VERDICT
======================================================================
E1: BLOCKED — reformulate per §6 option A (recommended) or B
E3: PROCEED WITH CAVEAT — flag feature-leakage + ecotype-stability limitations in REPORT
NB05 input set: 7 species
Recommended next step: Option A (within-substudy CD-vs-nonIBD stratified by ecotype) — preliminary cell-count check in NB04e before committing to the refit.

#	Criterion	Source	Threshold
1	Leakage bound	NB04b §2 held-out-species Jaccard	> 0.5 per ecotype
2	3-way independent Tier-A	NB04c §6 refined list (bootstrap CI + LinDA + within-substudy CD-vs-nonIBD)	≥ 3 candidates per ecotype
3	Engraftment pathobionts under confound-free contrast	NB04c §3 within-substudy meta-analysis	≥ 3 of 6 confirmed CD↑ with FDR < 0.10
4	Ecotype framework internal stability	NB04b §7 bootstrap ARI	median > 0.30

Nb04D Stopping Rule

NB04d — Rigor-Controlled Stopping Rule for NB05¶

Purpose¶

Rigor-controlled stopping criteria (revised)¶

§1. Criterion 1 — Leakage bound (held-out-species Jaccard)¶

§2. Criterion 2 — 3-way independent-evidence Tier-A per ecotype¶

§3. Criterion 3 — Engraftment pathobionts under confound-free contrast¶

§4. Criterion 4 — Ecotype framework stability¶

§5. Per-ecotype verdict + NB05 input set¶

§6. E1 reformulation recommendations (for RESEARCH_PLAN.md v1.4)¶

Option A (recommended): Within-IBD-substudy CD-vs-nonIBD stratified by ecotype¶

Option B: Rebuild Pillar 1 ecotypes on functional (pathway) feature matrix¶

Option C: Cohort-level Tier-A with engraftment + literature evidence¶

§7. Save the verdict artifact¶