Figure 7

Pairwise Combination Recall Under a Fixed Top-50 Budget

Figure 7 shows the effect of combining prediction tools on recall. Recall of true off-target sites is measured among the top 50 ranked candidates after combining pairs of prediction tools using Reciprocal Rank Fusion. The analysis is restricted to full-length off-target sites without bulges to allow inclusion of ML-based models.

Input

The heatmap uses the no-bulge mixed standard-plus-ML benchmark output:

results/benchmark_runs/no_bulge_ml_comparison/benchmark_budget_constrained_pairwise_recall.csv

The table is already in long format, with one row per tool pair and rank cutoff.

Code

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from offtarget_benchmark.layout import repo_layout

layout = repo_layout()
figure_dir = layout.docs_dir / "generated_figures"
figure_dir.mkdir(parents=True, exist_ok=True)

pairwise = pd.read_csv(
    layout.results_dir / "benchmark_runs" / "no_bulge_ml_comparison" / "benchmark_budget_constrained_pairwise_recall.csv"
)
pairwise.head(10)

	tool_a	tool_b	k	macro_recall	micro_recall	hits_sum	n_truth_true_sum	n_guides	fusion_method	rrf_constant
0	CCTop	CCTop	50	0.475297	0.315789	36.0	114.0	22	rrf	60.0
1	CCTop	CRISOT	50	0.652234	0.473684	54.0	114.0	22	rrf	60.0
2	CCTop	CRISPOR	50	0.562617	0.421053	48.0	114.0	22	rrf	60.0
3	CCTop	CRISPR-IP	50	0.617507	0.438596	50.0	114.0	22	rrf	60.0
4	CCTop	CRISPR-OFFT	50	0.580198	0.464912	53.0	114.0	22	rrf	60.0
5	CCTop	CRISPROFF	50	0.677716	0.438596	50.0	114.0	22	rrf	60.0
6	CCTop	CRISPRitz_cfd	50	0.568969	0.429825	49.0	114.0	22	rrf	60.0
7	CCTop	CRISPRitz_mismatch	50	0.480347	0.324561	37.0	114.0	22	rrf	60.0
8	CCTop	CRISPert	50	0.629131	0.491228	56.0	114.0	22	rrf	60.0
9	CCTop	Cas-OFFinder	50	0.462264	0.307018	35.0	114.0	22	rrf	60.0

Select Top-50 Reciprocal-Rank-Fusion Results

Code

TOOL_ORDER = [
    "CRISPert",
    "CRISPR-IP",
    "CnnCRISPR",
    "CRISPROFF",
    "MOFF",
    "CRISPR-OFFT",
    "CRISOT",
    "CRISPOR",
    "CRISPRitz_cfd",
    "CCTop",
    "CRISPRitz_mismatch",
    "Cas-OFFinder",
    "GuideScan2",
    "FlashFry",
]
TOOL_LABELS = {
    "CRISPRitz_mismatch": "CRISPRitz_mm",
    "Cas-OFFinder": "Cas-OFFinder",
    "GuideScan2": "GuideScan",
}

plot_df = pairwise[pairwise["k"].astype(int) == 50].copy()
plot_df = plot_df[plot_df["tool_a"].isin(TOOL_ORDER) & plot_df["tool_b"].isin(TOOL_ORDER)]
matrix = (
    plot_df.pivot_table(index="tool_a", columns="tool_b", values="macro_recall", aggfunc="mean")
    .reindex(index=TOOL_ORDER, columns=TOOL_ORDER)
)
matrix.iloc[:6, :6]

tool_b	CRISPert	CRISPR-IP	CnnCRISPR	CRISPROFF	MOFF	CRISPR-OFFT
tool_a
CRISPert	0.691305	0.692647	0.685950	0.694126	0.710228	0.708316
CRISPR-IP	0.692647	0.673102	0.683504	0.685724	0.653148	0.651633
CnnCRISPR	0.685950	0.683504	0.656644	0.684674	0.704872	0.697658
CRISPROFF	0.694126	0.685724	0.684674	0.661650	0.685327	0.702046
MOFF	0.710228	0.653148	0.704872	0.685327	0.652495	0.667338
CRISPR-OFFT	0.708316	0.651633	0.697658	0.702046	0.667338	0.649068

The matrix follows the manuscript display order used for the mixed no-bulge comparison. Each cell is the macro recall for the top 50 fused candidates for that pair.

Render the Figure

Code

fig, ax = plt.subplots(figsize=(10.8, 9.2))
im = ax.imshow(matrix.to_numpy(dtype=float), cmap="viridis", vmin=0.0, vmax=0.8)
ax.set_xticks(np.arange(len(TOOL_ORDER)))
ax.set_yticks(np.arange(len(TOOL_ORDER)))
ax.set_xticklabels([TOOL_LABELS.get(tool, tool) for tool in TOOL_ORDER], rotation=45, ha="right")
ax.set_yticklabels([TOOL_LABELS.get(tool, tool) for tool in TOOL_ORDER])
ax.set_title("Figure 7. Pairwise RRF recall among top 50 no-bulge candidates", loc="left", fontweight="bold", pad=14)
ax.set_xlabel("Tool B")
ax.set_ylabel("Tool A")
ax.set_xticks(np.arange(-0.5, len(TOOL_ORDER), 1), minor=True)
ax.set_yticks(np.arange(-0.5, len(TOOL_ORDER), 1), minor=True)
ax.grid(which="minor", color="white", linewidth=0.7)
ax.tick_params(which="minor", bottom=False, left=False)

arr = matrix.to_numpy(dtype=float)
for i in range(arr.shape[0]):
    for j in range(arr.shape[1]):
        value = arr[i, j]
        if np.isfinite(value):
            ax.text(j, i, f"{value:.2f}", ha="center", va="center", fontsize=6.5, color="white" if value >= 0.55 else "black")


cbar = fig.colorbar(im, ax=ax, fraction=0.046, pad=0.04)
cbar.set_label("Macro recall at k=50 after RRF")
fig.tight_layout()
fig.savefig(figure_dir / "figure_7_rrf_pairwise_recall_top50_no_bulge_ml.pdf", dpi=300, bbox_inches="tight")
fig.savefig(figure_dir / "figure_7_rrf_pairwise_recall_top50_no_bulge_ml.png", dpi=300, bbox_inches="tight")
plt.close(fig)