Download benchmarking data from S3 with Neuroglancer

This notebook explains how to:

  1. Read benchmarking data from S3 via Neuroglancer

  2. download raw benchmarking data to your local computer

Quick notes on the benchmarking data:

In octree format, data is labled in folders, labeled test_1 through test_25 and validation_1 through validation_25.

If when downloading, you get a reshape error, try first uploading segments and then re-uploading the volumes.

Known issues with a few of the files:

  • test_9,test_10 - didnt seem to have good swc alignment

  • test_24 - issues with the image

  • validation_11 - seems to be a shift between swcs and the image

[1]:
import napari
from napari.utils import nbscreenshot

Define locations

[2]:
from brainlit.utils import session
from brainlit.utils.Neuron_trace import NeuronTrace

# #Can change to test_"1-25", validation_"1-25"
dest = "s3://open-neurodata/brainlit/benchmarking_data/validation_7"
dest_segments = "s3://open-neurodata/brainlit/benchmarking_data/validation_7"
/opt/buildhome/python3.7/lib/python3.7/site-packages/nilearn/datasets/__init__.py:96: FutureWarning: Fetchers from the nilearn.datasets module will be updated in version 0.9 to return python strings instead of bytes and Pandas dataframes instead of Numpy arrays.
  "Numpy arrays.", FutureWarning)

Create Neuroglancer session & download benchmarking volume

[3]:
%%capture
sess = session.NeuroglancerSession(url=dest, url_segments=dest_segments, mip=0)  # create session object
img, bounds, vertices = sess.pull_vertex_list(1, [1], 0, expand=True)  # get full benchmarking image

Download a specific .swc

[4]:
seg_id = 1 # Can change

G_paths = sess.get_segments(seg_id, bounds, rounding = False)
G = G_paths[0]
paths = G_paths[1]

Visualize with napari

[5]:
#viewer = napari.Viewer(ndisplay=3)
#viewer.add_image(img)
#viewer.add_shapes(data=paths, shape_type='path', edge_width=1.0, edge_color='blue', opacity=0.8)
[6]:
#nbscreenshot(viewer, canvas_only = True)

Download raw benchmarking data

This will download the benchmarking data in .tif and .swc format to a local destination

[7]:
import boto3
from botocore import UNSIGNED
from botocore.client import Config
import os
from pathlib import Path
import numpy as np
from skimage import io
from tqdm import tqdm
[8]:
cwd = Path(os.path.abspath(''))
data_dir = os.path.join(cwd, "data")
print(f"Downloading segments to {data_dir}")
if not os.path.exists(data_dir):
    os.makedirs(data_dir)

im_dir = os.path.join(data_dir, "sample-tif-location")
if not os.path.exists(im_dir):
    os.makedirs(im_dir)

swc_dir = os.path.join(data_dir, "sample-swc-location")
if not os.path.exists(swc_dir):
    os.makedirs(swc_dir)
Downloading segments to /opt/build/repo/docs/notebooks/utils/data

On mac/linux, we use os.path.join to construct the s3 path. However on windows you should set prefix to “brainlit/benchmarking_data/tif-files”

[9]:
s3 = boto3.resource("s3", config=Config(signature_version=UNSIGNED))
bucket = s3.Bucket("open-neurodata")
prefix = "brainlit/benchmarking_data/tif-files" #use this for windows
# prefix = os.path.join("brainlit", "benchmarking_data", "tif-files") #use this for mac/linux
im_count = 0
for _ in bucket.objects.filter(Prefix=prefix):
    im_count += 1
for i, im_obj in enumerate(tqdm(bucket.objects.filter(Prefix=prefix))):
    if im_obj.key[-4:] == '.tif':
        im_name = os.path.basename(im_obj.key)
        im_path = os.path.join(im_dir, im_name)
        bucket.download_file(im_obj.key, im_path)
52it [00:23,  2.23it/s]

The below code can visualize a specified .tif file.

[10]:
file_name = "test_10-gfp.tif" # Can change to any image (test 1-25, validation 1-25)

im_file = Path(im_dir) / file_name
im = io.imread(im_file, plugin="tifffile")

#viewer = napari.Viewer(ndisplay=3)
#viewer.add_image(im)
[11]:
#nbscreenshot(viewer, canvas_only = True)

Again, on windows you need to make the variable called prefix a string

[12]:
s3 = boto3.resource("s3", config=Config(signature_version=UNSIGNED))
bucket = s3.Bucket("open-neurodata")
prefix = "brainlit/benchmarking_data/Manual-GT" #use this for windows
# prefix = os.path.join("brainlit", "benchmarking_data", "Manual-GT") #use this for mac/linux
swc_count = 0
for _ in bucket.objects.filter(Prefix=prefix):
    swc_count += 1
for i, swc_obj in enumerate(tqdm(bucket.objects.filter(Prefix=prefix))):
    if swc_obj.key[-4:] == '.swc':
        idx = swc_obj.key.find('Manual-GT')
        swc_name = swc_obj.key[idx:]
        swc_path = os.path.join(swc_dir, swc_name)
        dir = os.path.dirname(swc_path)
        if not os.path.exists(dir):
            os.makedirs(dir)
        bucket.download_file(swc_obj.key, swc_path)
601it [01:10,  8.49it/s]
[13]:
from brainlit.utils.benchmarking_params import brain_offsets, vol_offsets, scales, type_to_date
from brainlit.utils.Neuron_trace import NeuronTrace
from pathlib import Path
import numpy as np
from skimage import io
[14]:
im_dir = Path(im_dir)
swc_base_path = Path(swc_dir) / "Manual-GT"

gfp_files = list(im_dir.glob("**/*-gfp.tif"))
[15]:
for im_num, im_path in enumerate(gfp_files):
    print(f"Image {im_num+1}/{len(gfp_files)}")
    print(im_path)

    f = im_path.parts[-1][:-8].split("_")
    image = f[0]
    date = type_to_date[image]
    num = int(f[1])

    scale = scales[date]
    brain_offset = brain_offsets[date]
    vol_offset = vol_offsets[date][num]
    im_offset = np.add(brain_offset, vol_offset)

    lower = int(np.floor((num - 1) / 5) * 5 + 1)
    upper = int(np.floor((num - 1) / 5) * 5 + 5)
    dir1 = date + "_" + image + "_" + str(lower) + "-" + str(upper)
    dir2 = date + "_" + image + "_" + str(num)
    swc_path = swc_base_path / dir1 / dir2
    swc_files = list(swc_path.glob("**/*.swc"))
    im = io.imread(im_path, plugin="tifffile")
    print(f"Image shape: {im.shape}")

    paths_total = []
    for swc_num, swc in enumerate(swc_files):
        if "0" in swc.parts[-1]:
            # skip the bounding box swc
            continue

        swc_trace = NeuronTrace(path=str(swc))
        paths = swc_trace.get_paths()
        swc_offset, _, _, _ = swc_trace.get_df_arguments()
        offset_diff = np.subtract(swc_offset, im_offset)

        for path_num, p in enumerate(paths):
            pvox = (p + offset_diff) / (scale) * 1000
            paths_total.append(pvox)

    break
Image 1/50
/opt/build/repo/docs/notebooks/utils/data/sample-tif-location/validation_20-gfp.tif
Image shape: (100, 330, 330)
[16]:
#viewer = napari.Viewer(ndisplay=3)
#viewer.add_image(np.swapaxes(im,0,2))
#viewer.add_shapes(data=paths_total, shape_type='path', edge_width=1.0, edge_color='blue', opacity=0.8)
[17]:
#nbscreenshot(viewer, canvas_only = True)