MSI Export to HDF5
This notebook reads the MSI data and exports it as HDF5:
[5]:
# import custom modules
from miaaim.io.imwrite._export import HDIexporter
# import other modules
import numpy as np
from scipy.interpolate import griddata
from scipy.spatial import KDTree
import pandas as pd
from pathlib import Path
from tqdm import tqdm
# check versions and environment executable
import sys
sys.version
sys.executable
[5]:
'/opt/conda/envs/miaaim-dev/bin/python'
Set paths to csv data with root mean square normalized MSI data
[6]:
# set path to data folder
data_folder = Path("/opt/miaaim-20220912-TMA4/data")
# get list of cores
coreFolders = [ x for x in list(data_folder.iterdir()) if x.is_dir() ]
Convert to HDF5 and export
[7]:
# read csv file with MSI data
msiRMS = pd.read_csv(data_folder.joinpath("msi_rms_tma_peptides.csv"))
[8]:
# get unique TMA core names
coreNames = msiRMS.Region.unique()
# get columns of csv file with m/z information
mzCols = [ msiRMS.columns[i] for i in range(len(msiRMS.columns)) if 'm.z' in msiRMS.columns[i] ]
# set the size of a pixel in the MSI data
pixRes = 20.0
[9]:
# iterate through cores and extract data
for c in tqdm(coreNames):
# get the name to match to the core
reg = c.replace(" ","")
# get the corresponding core folder
coreFolder = [ x for x in coreFolders if x.name.endswith(reg) ][0]
# get the name of this folder
name = coreFolder.name
# get the input folder
inputFolder = coreFolder.joinpath("input/msi")
# get the data for this core
data = msiRMS[msiRMS.Region == c]
# write the data as a csv to msi data folder
# data.to_csv(inputFolder.joinpath(name+".csv"))
# get xy centroids
x,y = data.x, data.y
# get mz data
mzs = data[mzCols]
# get max and min xy positions
max_x = int(np.ceil(x.max()))
min_x = int(np.floor(x.min()))
max_y = int(np.ceil(y.max()))
min_y = int(np.floor(y.min()))
# subtract xy to scale to origin (0,0)
x = (max_x - x)/pixRes
y = (max_y - y)/pixRes
# interpolation
# generate a grid where the interpolation will be calculated
X, Y = np.meshgrid(np.arange((max_x-min_x)/pixRes), np.arange((max_y-min_y)/pixRes))
# iterate over pixels and create image with nearest neighbor interpolation
# to preserve range
data_cube = griddata(np.vstack((x, y)).T, mzs, (X, Y), method='nearest')
# get interpolated data for nan outside of convex hull
data_cube_nan_idx = griddata(np.vstack((x, y)).T, mzs, (X, Y), method='cubic')
# create mask for pixel data
mask = np.zeros((data_cube.shape[0], data_cube.shape[1]),dtype='uint8')
mask[~np.isnan(data_cube_nan_idx[:,:,0])] = 255
# replace nan with 0s
data_cube[np.isnan(data_cube)] = 0
# flip left right for alignment to IMC data
data_cube = np.fliplr(data_cube)
mask = np.fliplr(mask)
# write array to hdf5
HDIexporter(data_cube,inputFolder.joinpath(name+".hdf5"))
# write mask
HDIexporter(mask,inputFolder.joinpath(name+"_mask.tiff"))
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 48/48 [02:24<00:00, 3.01s/it]