Figure 4

Overview

Figure 4 compares tool score with observed editing signal at validated true off target sites.

Input table

  • benchmark_matched_truth_long.csv
Code
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from IPython.display import Image, display

from offtarget_benchmark.layout import repo_layout

layout = repo_layout()
benchmark_dir = layout.results_dir / 'benchmark_runs' / 'manuscript_primary'
matched_long = pd.read_csv(benchmark_dir / 'benchmark_matched_truth_long.csv', low_memory=False)

matched_long[['tool', 'guide_key', 'offtarget_seq', 'truth_status', 'delta_indels', 'raw_score', 'rank']].head(10)
tool guide_key offtarget_seq truth_status delta_indels raw_score rank
0 Cas-OFFinder AAAGAACATCCAAGGCTGAGCGG AAAGAACATCCTAGGCTGAGTGG True 4.08 -2.0 48
1 Cas-OFFinder AAAGAACATCCAAGGCTGAGCGG GTTGAACATCCAAGGCTGAGAGG True 21.05 -4.0 51
2 Cas-OFFinder AATCTACATTATATTGCCCCTGG CCCCTACATTAAATTGCCCCAGG False 0.00 -5.0 130
3 Cas-OFFinder AATCTACATTATATTGCCCCTGG TGTCTACAATATATAGTCCCTGG False 0.00 -5.0 130
4 Cas-OFFinder AATCTACATTATATTGCCCCTGG TTTCTACACAATATTGCCCCAGG False 0.00 -5.0 130
5 Cas-OFFinder ACCCACCAAATAGAACCCCCAGG ACTCTTTAAATAGAACTCCCAGG False -0.02 -5.0 121
6 Cas-OFFinder ACCCACCAAATAGAACCCCCAGG CTGTACCAAATAGAACCTCCGGG False 0.00 -6.0 684
7 Cas-OFFinder ACCCACCAAATAGAACCCCCAGG GTTCTATAAATAGAACCCCCAGG False 0.00 -6.0 684
8 Cas-OFFinder ACCCACCAAATAGAACCCCCAGG TATTCCCAAATAGAACCCCCAGG False -0.06 -5.0 121
9 Cas-OFFinder ACTTTGCTGCCACAATACCTTGG ACTTCACTGCCATAATACCTGGG True 0.10 -4.0 53

Truth labels and score classes

Code
DISCRETE_TOOLS = {'Cas-OFFinder', 'GuideScan2', 'CCTop', 'FlashFry', 'CRISPRitz_mismatch'}

def normalize_truth_status(value):
    if pd.isna(value):
        return 'unknown'
    if isinstance(value, (bool, np.bool_)):
        return 'true' if bool(value) else 'false'
    text = str(value).strip().lower()
    if text in {'true', 'off'}:
        return 'true'
    if text in {'false', 'on'}:
        return 'false'
    return 'unknown'

figure4_df = matched_long.copy()
figure4_df['truth_status'] = figure4_df['truth_status'].map(normalize_truth_status)
figure4_df['is_self_match'] = (
    figure4_df['guide_key'].astype('string').eq(figure4_df['offtarget_seq'].astype('string'))
    & figure4_df['guide_key'].notna()
    & figure4_df['offtarget_seq'].notna()
)
figure4_df['score_class'] = figure4_df['tool'].astype(str).map(
    lambda tool: 'coarse_ordinal' if tool in DISCRETE_TOOLS else 'continuous_site_level'
)
figure4_df[['tool', 'truth_status', 'raw_score', 'delta_indels', 'score_class']].head(10)
tool truth_status raw_score delta_indels score_class
0 Cas-OFFinder true -2.0 4.08 coarse_ordinal
1 Cas-OFFinder true -4.0 21.05 coarse_ordinal
2 Cas-OFFinder false -5.0 0.00 coarse_ordinal
3 Cas-OFFinder false -5.0 0.00 coarse_ordinal
4 Cas-OFFinder false -5.0 0.00 coarse_ordinal
5 Cas-OFFinder false -5.0 -0.02 coarse_ordinal
6 Cas-OFFinder false -6.0 0.00 coarse_ordinal
7 Cas-OFFinder false -6.0 0.00 coarse_ordinal
8 Cas-OFFinder false -5.0 -0.06 coarse_ordinal
9 Cas-OFFinder true -4.0 0.10 coarse_ordinal

The notebook distinguishes continuous score scales from coarse ordinal score scales before plotting.

Filtered matched rows

Code
figure4_df = figure4_df[
    (figure4_df['truth_status'] == 'true')
    & (~figure4_df['is_self_match'])
    & figure4_df['delta_indels'].notna()
    & figure4_df['raw_score'].notna()
].copy()

figure4_df[['tool', 'guide_key', 'delta_indels', 'raw_score', 'score_class']].head(10)
tool guide_key delta_indels raw_score score_class
0 Cas-OFFinder AAAGAACATCCAAGGCTGAGCGG 4.080 -2.0 coarse_ordinal
1 Cas-OFFinder AAAGAACATCCAAGGCTGAGCGG 21.050 -4.0 coarse_ordinal
9 Cas-OFFinder ACTTTGCTGCCACAATACCTTGG 0.100 -4.0 coarse_ordinal
14 Cas-OFFinder ACTTTGCTGCCACAATACCTTGG 0.143 -4.0 coarse_ordinal
32 Cas-OFFinder ATAGGAGAAGATGATGTATAGGG 0.736 -3.0 coarse_ordinal
38 Cas-OFFinder ATAGGAGAAGATGATGTATAGGG 0.266 -5.0 coarse_ordinal
45 Cas-OFFinder ATAGGAGAAGATGATGTATAGGG 0.182 -5.0 coarse_ordinal
57 Cas-OFFinder CATACAGGGCTCTGTACCCAGGG 0.292 -5.0 coarse_ordinal
62 Cas-OFFinder CCCTGGCTACCTCCCCTACCCGG 1.704 -3.0 coarse_ordinal
63 Cas-OFFinder CCCTGGCTACCTCCCCTACCCGG 0.547 -3.0 coarse_ordinal

Rows are restricted to validated true off target sites with numeric score and numeric delta_indels. Self matches are removed.

Correlation helper

Code
def spearman_no_scipy(x: pd.Series, y: pd.Series) -> float:
    paired = pd.DataFrame({'x': pd.to_numeric(x, errors='coerce'), 'y': pd.to_numeric(y, errors='coerce')}).dropna()
    if len(paired) < 2 or paired['x'].nunique() < 2 or paired['y'].nunique() < 2:
        return np.nan
    xr = paired['x'].rank(method='average')
    yr = paired['y'].rank(method='average')
    return float(xr.corr(yr, method='pearson'))

The figure annotates each panel with a Spearman correlation computed directly from the plotted values.

Figure generation

Code
figure_dir = layout.docs_dir / 'generated_figures'
figure_dir.mkdir(parents=True, exist_ok=True)

tool_list = sorted(figure4_df['tool'].dropna().astype(str).unique().tolist())
n_cols = 3
n_rows = int(np.ceil(len(tool_list) / n_cols))
fig, axes = plt.subplots(n_rows, n_cols, figsize=(4.7 * n_cols, 4.2 * n_rows), squeeze=False)
flat_axes = axes.ravel()

for idx, tool in enumerate(tool_list):
    ax = flat_axes[idx]
    sub = figure4_df[figure4_df['tool'] == tool].copy()
    plot_df = pd.DataFrame({
        'raw_score': pd.to_numeric(sub['raw_score'], errors='coerce'),
        'delta_indels': pd.to_numeric(sub['delta_indels'], errors='coerce'),
    }).dropna()

    if sub['score_class'].iloc[0] == 'continuous_site_level':
        ax.scatter(plot_df['raw_score'], plot_df['delta_indels'], alpha=0.7, s=24)
        ax.set_xlabel('Raw score')
    else:
        buckets = sorted(plot_df['raw_score'].dropna().astype(float).unique().tolist())
        for pos, bucket in enumerate(buckets, start=1):
            values = plot_df.loc[plot_df['raw_score'] == bucket, 'delta_indels'].to_numpy()
            jitter = np.linspace(-0.12, 0.12, num=len(values)) if len(values) > 1 else np.zeros(len(values))
            ax.scatter(np.full(len(values), pos) + jitter, values, s=18, alpha=0.55)
        ax.set_xticks(range(1, len(buckets) + 1))
        ax.set_xticklabels([str(bucket) for bucket in buckets])
        ax.set_xlabel('Raw score bucket')

    rho = spearman_no_scipy(sub['raw_score'], sub['delta_indels'])
    ax.text(
        0.03,
        0.97,
        f"Spearman r={rho:.3f}" if pd.notna(rho) else 'Spearman r=n/a',
        transform=ax.transAxes,
        ha='left',
        va='top',
        fontsize=8,
        bbox={'boxstyle': 'round,pad=0.25', 'facecolor': 'white', 'alpha': 0.8, 'edgecolor': 'none'},
    )
    ax.set_title(tool)
    ax.set_ylabel('Delta indels')
    ax.grid(alpha=0.2)

for ax in flat_axes[len(tool_list):]:
    ax.set_axis_off()

fig.suptitle('Figure 4. Score versus delta indels at validated true off target sites')
fig.tight_layout(rect=[0, 0, 1, 0.96])
out_path = figure_dir / 'figure_4_score_vs_indels_by_tool.png'
fig.savefig(out_path, dpi=300)
plt.close(fig)

display(Image(filename=str(out_path)))