Ensemble sorting of a Neuropixels recording 2
Ensemble sorting of a Neuropixel recording (2)¶
This notebook reproduces supplemental figure S2 from the paper SpikeInterface, a unified framework for spike sorting.
The recording was made by André Marques-Smith in the lab of Adam Kampff. Reference:
Marques-Smith, A., Neto, J.P., Lopes, G., Nogueira, J., Calcaterra, L., Frazão, J., Kim, D., Phillips, M., Dimitriadis, G., Kampff, A.R. (2018). Recording from the same neuron with high-density CMOS probes and patch-clamp: a ground-truth dataset and an experiment in collaboration. bioRxiv 370080; doi: https://doi.org/10.1101/370080
The data set for this notebook is available on the Dandi Archive: https://gui.dandiarchive.org/#/dandiset/000034
The entire data archive can be downloaded with the command dandi download https://gui.dandiarchive.org/#/dandiset/000034/draft
(about 75GB).
File required to run the code:
- the raw data: sub-c1_ecephys.nwb
This file should be in the same directory where the notebook is located (otherwise adjust paths below).
Author: Matthias Hennig, University of Edinburgh, 25 Aug 2020
Requirements¶
For this need you will need the following Python packages:
- numpy
- pandas
- matplotlib
- seaborn
- spikeinterface
- dandi
To run the MATLAB-based sorters, you would also need a MATLAB license. For other sorters, please refer to the documentation on how to install sorters.
import os
# Matlab sorter paths:
# change these to match your environment
os.environ["IRONCLUST_PATH"] = "./ironclust"
os.environ["KILOSORT2_PATH"] = "./Kilosort2"
os.environ["HDSORT_PATH"] = "./HDsort"
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
import pandas as pd
import seaborn as sns
from collections import defaultdict
from matplotlib_venn import venn3
import spikeinterface as si
import spikeextractors as se
import spiketoolkit as st
import spikesorters as ss
import spikecomparison as sc
import spikewidgets as sw
from spikecomparison import GroundTruthStudy, MultiSortingComparison
%matplotlib inline
def clear_axes(ax):
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
# print version information
si.print_spikeinterface_version()
ss.print_sorter_versions()
# the recording file, downloaded from Dandi in NWB:N format
data_file = './sub-c1_ecephys.nwb'
# paths for spike sorter outputs
p = Path('./')
# select spike sorters to be used, note Kilosort2 requires a NVIDIA GPU to run
sorter_list = ['herdingspikes', 'kilosort2', 'ironclust', 'tridesclous', 'spykingcircus', 'hdsort']
sorter_params = {
# 'kilosort2': {'keep_good_only': True}, # removes good units!
'mountainsort4': {'adjacency_radius': 50},
'spyking_circus': {'adjacency_radius': 50},
'herdingspikes': {'filter': True,
}
}
sorter_names = ['HerdingSpikes', 'Kilosort2', 'Ironclust','Tridesclous', 'SpykingCircus', 'HDSort']
sorter_names_short = ['HS', 'KS', 'IC', 'TDC', 'SC', 'HDS']
# create a recording extractor, this gives access to the raw data in the NWB:N file
recording = se.NwbRecordingExtractor(data_file)
# NWB:N files store the data in (channels:time) order, but for spike sorting the transposed format is much
# more efficient. Therefore here we can create a CacheRecordingExtractor that re-writes the data
# as a binary file in the desired order. This will take some time, but speeds up subsequent steps:
# recording = se.CacheRecordingExtractor(recording)
# print some info
print("Sampling rate: {}Hz".format(recording.get_sampling_frequency()))
print("Duration: {}s".format(recording.get_num_frames()/recording.get_sampling_frequency()))
print("Number of channels: {}".format(recording.get_num_channels()))
Run spike sorters and perform comparison between all outputs¶
# now create the study environment and run all spike sorters
# note that this function will not re-run a spike sorter if the sorting is already present in
# the working folder
study_folder = p / 'study/'
working_folder = p / 'working/'
if not study_folder.is_dir():
print('Setting up study folder:', study_folder)
os.mkdir(study_folder)
rec_dict = {'rec': recording}
result_dict = ss.run_sorters(sorter_list=sorter_list, recording_dict_or_list=rec_dict, with_output=True,
sorter_params=sorter_params, working_folder=working_folder, engine='loop',
mode='keep', verbose=True)
# when done, load all sortings into a handly list
sortings = []
for s in sorter_list:
sortings.append(result_dict['rec',s])
# run a multi-sorting comparison, an all-to-all comparison
# results are saved and just loaded from a file if this exists
if not os.path.isfile(study_folder / 'multicomparison.gpickle'):
mcmp = sc.compare_multiple_sorters(sorting_list=sortings, name_list=sorter_names_short,
verbose=True)
print('saving multicomparison')
mcmp.dump(study_folder)
else:
print('loading multicomparison')
mcmp = sc.MultiSortingComparison.load_multicomparison(study_folder)
# plot an activity map
# the method uses a rather simpe (and slow) threshold spike detection
plt.figure(figsize=(16,2))
ax = plt.subplot(111)
w = sw.plot_activity_map(recording, transpose=True, ax=ax, background='w', frame=True)
ax.plot((50,150),(-40,-40),'k-')
ax.annotate('100$\\mu m$',(100,-115), ha='center');
# raw data traces
plt.figure(figsize=(12,3))
ax = plt.subplot(111)
w = sw.plot_timeseries(recording, channel_ids=range(160,168), color='k', ax=ax, trange=(7,8))
ax.axis('off')
ax.plot((7.01,7.11),(20,20),'k-')
ax.annotate('100ms',(7.051,-190), ha='center');
# number of units found by each sorter
ax = plt.subplot(111)
ax.bar(range(len(sortings)), [len(s.get_unit_ids()) for s in sortings])
ax.set_xticks(range(len(sorter_names)))
ax.set_xticklabels(sorter_names_short, rotation=60, ha='center')
ax.set_ylabel('Units detected')
# spikewidgets provides handy widgets to plot summary statistics of the comparison
# show the number of units agreed upon by k sorters, in aggregate
plt.figure()
ax = plt.subplot(111)
w = sw.plot_multicomp_agreement(mcmp, plot_type='pie', ax=ax)
# show the number of units agreed upon by k sorters, per sorter
plt.figure()
ax = plt.subplot(111)
w = sw.plot_multicomp_agreement_by_sorter(mcmp, show_legend=True, ax=ax)