import numpy as np
import pandas as pd
import os
import json
import requests
import shutil
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import normalize
import plotly.express as px
from _cde_compute_edges_from_nodes import *
'display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option(
# suppress warnings
import warnings
"ignore") warnings.filterwarnings(
Distance Analysis: bonemarrow-codex-chop
Analyze and visualize cell-to-nearest-endothelial-cell distance distributions for the
bonemarrow-codex-chop
dataset.
= "/u/yashjain/hra-cell-distance-analysis/data"
basepath = "bonemarrow-codex-chop"
dataset_dir = os.path.join("data-processed-nodes-with-harmonized-cell-types", dataset_dir)
data_filedir = os.path.join("data-processed-edges", dataset_dir)
output_edge_dir = "generated-figures" figures_output_dir
# Function to load your data
def load_data(path, edges=False):
if edges:
= ['cell_id', 'x1', 'y1', 'z1', 'x2', 'y2', 'z2']
column_names = pd.read_csv(path, header=None, names=column_names)
data else:
= pd.read_csv(path)
data return data
# Function to read all files ending with "-nodes.csv" in the `data_filedir` directory into a single DataFrame.
# Another additional column `Dataset` is added to identify the dataset name which comes from the filename before the `-nodes.csv` suffix.
# Additionally, function reads all files ending with "-edges.csv" in the `output_edge_dir` directory into a single DataFrame.
# Three additional columns are added "Dataset", "Anchor Cell Type", and "Anchor Cell Type Level" to identify the dataset name, anchor cell type, and anchor cell type level respectively which come from the filename before the `.csv` suffix.
# The three additional columns are created by splitting the filename on the `-` character, and extracting the relevant parts.
# On splitting, the first part is the dataset name, second part is the anchor cell type level, and third part is the anchor cell type, and last part is the `edges` suffix.
# When reading files, check if the file has the correct format (i.e., ends with `-edges.csv`).
# Additionally, the function merges the edges DataFrame with the nodes DataFrame to get the cell type information for the anchor cells.
# This is done by reading the corresponding nodes file from the `data_filedir` directory for each edges file, and merging it with the edges DataFrame on the `cell_id` column.
# The merged DataFrame contains the edges with additional columns for the cell type information.
# The function returns three DataFrames:
# 1. `merged_nodes`: DataFrame containing all nodes with an additional column `Dataset`.
# 2. `merged_edges`: DataFrame containing all edges with additional columns `Dataset`, `Anchor Cell Type`, and `Anchor Cell Type Level`.
# 3. `merged_nodes_for_all_edges`: DataFrame containing all edges with additional columns `Dataset`, `Anchor Cell Type`, `Anchor Cell Type Level`, and the cell type information for cells.
def read_all_edge_datasets(basepath, data_filedir, output_edge_dir):
= []
all_nodes_files = []
all_edges_files = []
all_nodes_edges_files for file in os.listdir(os.path.join(basepath, output_edge_dir)):
if file.endswith("-edges.csv"):
= os.path.join(basepath, output_edge_dir, file)
file_path = file.replace("-edges.csv", "").split('-')
dataset_name, anchor_cell_type_level, anchor_cell_type = load_data(file_path, edges=False)
edges_df 'Dataset'] = dataset_name
edges_df['Anchor Cell Type'] = anchor_cell_type
edges_df['Anchor Cell Type Level'] = anchor_cell_type_level
edges_df[={"distance": "Distance"}, inplace=True) # Rename column "distance" to "Distance".
edges_df.rename(columns
all_edges_files.append(edges_df)
# Read the corresponding nodes file from data_filedir to get the cell type information
= os.path.join(basepath, data_filedir, f"{dataset_name}-nodes.csv")
nodes_file_path = load_data(nodes_file_path)
nodes_df 'Dataset'] = dataset_name
nodes_df[
all_nodes_files.append(nodes_df)
# Add a new 'cell_id' column to nodes_df
'cell_id'] = range(len(nodes_df))
nodes_df[# Set 'cell_id' column as index for nodes_df
'cell_id', inplace=True)
nodes_df.set_index(# Merge edges_df with nodes_df to get the cell type information for the anchor cells
= pd.merge(edges_df, nodes_df[['Level Three Cell Type', 'Level Two Cell Type', 'Level One Cell Type']], how='left', left_on='cell_id', right_index=True)
edges_nodes_df
all_nodes_edges_files.append(edges_nodes_df)
= pd.concat(all_edges_files, ignore_index=True)
merged_edges = pd.concat(all_nodes_files, ignore_index=True)
merged_nodes = pd.concat(all_nodes_edges_files, ignore_index=True)
merged_nodes_for_all_edges
return merged_nodes, merged_edges, merged_nodes_for_all_edges
def create_directory(directory):
if not os.path.exists(directory):
os.makedirs(directory)print(f"Directory '{directory}' created successfully.")
else:
print(f"Directory '{directory}' already exists.")
Get initial statistics and identify endothelial cell categories for dataset.
= read_all_edge_datasets(basepath, data_filedir, output_edge_dir) df_all_nodes, df_all_edges, df_all_edges_with_cell_types
5) df_all_nodes.head(
x | y | Original Cell Type | Level Three Cell Type | Level Three CL Label | Level Three CL ID | CL_Match/3 | Level Two Cell Type | Level Two CL Label | Level Two CL ID | CL_Match/2 | Level One Cell Type | Level One CL Label | Level One CL ID | CL_Match/1 | Dataset | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 5776.546667 | 4000.546667 | Erythroid | erythroid lineage cell | erythroid lineage cell | CL:0000764 | skos:exactMatch | erythroid precursor | erythroid progenitor cell | CL:0000038 | skos:exactMatch | hematopoietic precursor cell | hematopoietic precursor cell | CL:0008001 | skos:exactMatch | SB67_NBM48_NSM1_1720 |
1 | 6406.298077 | 4001.134615 | B-Cells | b cell | B cell | CL:0000236 | skos:exactMatch | b cell | B cell | CL:0000236 | skos:exactMatch | immune cell | leukocyte | CL:0000738 | skos:exactMatch | SB67_NBM48_NSM1_1720 |
2 | 6805.010526 | 4000.701754 | Erythroid | erythroid lineage cell | erythroid lineage cell | CL:0000764 | skos:exactMatch | erythroid precursor | erythroid progenitor cell | CL:0000038 | skos:exactMatch | hematopoietic precursor cell | hematopoietic precursor cell | CL:0008001 | skos:exactMatch | SB67_NBM48_NSM1_1720 |
3 | 6032.794326 | 4002.039007 | AEC | endothelial cell of artery | endothelial cell of artery | CL:1000413 | skos:exactMatch | endothelial cell of artery | endothelial cell of artery | CL:1000413 | skos:exactMatch | endothelial cell | endothelial cell | CL:0000115 | skos:exactMatch | SB67_NBM48_NSM1_1720 |
4 | 6159.116364 | 4001.763636 | Early Myeloid Progenitor | myeloid progenitor cell:common | common myeloid progenitor | CL:0000049 | skos:exactMatch | progenitor cell | hematopoietic precursor cell | CL:0008001 | skos:exactMatch | hematopoietic precursor cell | hematopoietic precursor cell | CL:0008001 | skos:exactMatch | SB67_NBM48_NSM1_1720 |
# Print the total number of unique cell types per dataset. Compute separately for each cell type column (Level One Cell Type, Level Two Cell Type, Level Three Cell Type, Original Cell Type).
print("Total number of unique cell types per cell type annnotation level:")
= {
unique_cell_types 'Original Cell Type': df_all_nodes['Original Cell Type'].nunique(),
'Level Three Cell Type': df_all_nodes['Level Three Cell Type'].nunique(),
'Level Two Cell Type': df_all_nodes['Level Two Cell Type'].nunique(),
'Level One Cell Type': df_all_nodes['Level One Cell Type'].nunique()
}for cell_type, count in unique_cell_types.items():
print(f"{cell_type}: {count}")
Total number of unique cell types per cell type annnotation level:
Original Cell Type: 37
Level Three Cell Type: 33
Level Two Cell Type: 22
Level One Cell Type: 6
# Save the unique cell types containing "endothelial" in name per cell type column (Level One Cell Type, Level Two Cell Type, Level Three Cell Type, Original Cell Type) to a dictionary where the key is the level and the value is a list of unique cell types.
= {
endothelial_cell_types 'Original Cell Type': df_all_nodes[df_all_nodes['Original Cell Type'].str.contains("endothelial", case=False, na=False)]['Original Cell Type'].unique().tolist(),
'Level Three Cell Type': df_all_nodes[df_all_nodes['Level Three Cell Type'].str.contains("endothelial", case=False, na=False)]['Level Three Cell Type'].unique().tolist(),
'Level Two Cell Type': df_all_nodes[df_all_nodes['Level Two Cell Type'].str.contains("endothelial", case=False, na=False)]['Level Two Cell Type'].unique().tolist(),
'Level One Cell Type': df_all_nodes[df_all_nodes['Level One Cell Type'].str.contains("endothelial", case=False, na=False)]['Level One Cell Type'].unique().tolist()
}
print("\nEndothelial cell types per cell type annotation level:")
for level, cell_types in endothelial_cell_types.items():
print(f"\n{level}:")
for cell in cell_types:
print(f" - {cell}")
Endothelial cell types per cell type annotation level:
Original Cell Type:
Level Three Cell Type:
- endothelial cell of artery
- endothelial cell of sinusoid
Level Two Cell Type:
- endothelial cell of artery
- endothelial cell of sinusoid
Level One Cell Type:
- endothelial cell
= ["Level Three Cell Type", "Level Two Cell Type", "Level One Cell Type"] # Skipping Original Cell Type as it is not a hierarchical level.
type_field_list
# Define the anchor cell type (type of endothelial cell) for each level in type_field_list based on available categories in the previous cell. The distance analysis at all three levels will be limited to the specified anchor cell type.
= {
anchor_cell_type_dict 'Level Three Cell Type': 'endothelial cell of sinusoid', # Picking sinusoid instead of artery because the cell count of sinusoid is higher than artery in the dataset.
'Level Two Cell Type': 'endothelial cell of sinusoid',
'Level One Cell Type': 'endothelial cell'
}
Process datasets to add region information to Nodes files.
# Print unique values in Dataset column as a list.
print("\nUnique values in Dataset column:")
print(df_all_edges['Dataset'].unique().tolist())
Unique values in Dataset column:
['SB67_NBM48_NSM1_1720', 'SB67_NBM49_NSM2_1086', 'SB67_NBM47_NSM3_1996', 'SB67_NBM51_AML1_183', 'SB67_NBM46_AML1_382', 'SB67_NBM44_AML2_191', 'SB67_NBM52_AML3_1329', 'SB67_NBM54_AML3_1443', 'SB67_NBM27_H10', 'SB67_NBM28_H14', 'SB67_NBM36_H26', 'SB67_NBM41_H27', 'SB67_NBM31_H32', 'SB67_NBM38_H33', 'SB67_NBM37_H35', 'SB67_NBM33_H36', 'SB67_NBM32_H37', 'SB67_NBM34_H38', 'SB67_NBM40_H39', 'SB67_NBM39_H41']
# Create a dictionary to map bone marrow regions to correct region names (conditions).
# AML = Acute Myeloid Leukemia
# NSM = Negative lymphoma Staging bone Marrow biopsies
# NBM = Normal Bone Marrow
= {
region_map 'SB67_NBM48_NSM1_1720': 'NSM',
'SB67_NBM49_NSM2_1086': 'NSM',
'SB67_NBM47_NSM3_1996': 'NSM',
'SB67_NBM51_AML1_183': 'AML',
'SB67_NBM46_AML1_382': 'AML',
'SB67_NBM44_AML2_191': 'AML',
'SB67_NBM52_AML3_1329': 'AML',
'SB67_NBM54_AML3_1443': 'AML',
'SB67_NBM27_H10': 'NBM',
'SB67_NBM28_H14': 'NBM',
'SB67_NBM36_H26': 'NBM',
'SB67_NBM41_H27': 'NBM',
'SB67_NBM31_H32': 'NBM',
'SB67_NBM38_H33': 'NBM',
'SB67_NBM37_H35': 'NBM',
'SB67_NBM33_H36': 'NBM',
'SB67_NBM32_H37': 'NBM',
'SB67_NBM34_H38': 'NBM',
'SB67_NBM40_H39': 'NBM',
'SB67_NBM39_H41': 'NBM'
}
df_all_nodes.head()
x | y | Original Cell Type | Level Three Cell Type | Level Three CL Label | Level Three CL ID | CL_Match/3 | Level Two Cell Type | Level Two CL Label | Level Two CL ID | CL_Match/2 | Level One Cell Type | Level One CL Label | Level One CL ID | CL_Match/1 | Dataset | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 5776.546667 | 4000.546667 | Erythroid | erythroid lineage cell | erythroid lineage cell | CL:0000764 | skos:exactMatch | erythroid precursor | erythroid progenitor cell | CL:0000038 | skos:exactMatch | hematopoietic precursor cell | hematopoietic precursor cell | CL:0008001 | skos:exactMatch | SB67_NBM48_NSM1_1720 |
1 | 6406.298077 | 4001.134615 | B-Cells | b cell | B cell | CL:0000236 | skos:exactMatch | b cell | B cell | CL:0000236 | skos:exactMatch | immune cell | leukocyte | CL:0000738 | skos:exactMatch | SB67_NBM48_NSM1_1720 |
2 | 6805.010526 | 4000.701754 | Erythroid | erythroid lineage cell | erythroid lineage cell | CL:0000764 | skos:exactMatch | erythroid precursor | erythroid progenitor cell | CL:0000038 | skos:exactMatch | hematopoietic precursor cell | hematopoietic precursor cell | CL:0008001 | skos:exactMatch | SB67_NBM48_NSM1_1720 |
3 | 6032.794326 | 4002.039007 | AEC | endothelial cell of artery | endothelial cell of artery | CL:1000413 | skos:exactMatch | endothelial cell of artery | endothelial cell of artery | CL:1000413 | skos:exactMatch | endothelial cell | endothelial cell | CL:0000115 | skos:exactMatch | SB67_NBM48_NSM1_1720 |
4 | 6159.116364 | 4001.763636 | Early Myeloid Progenitor | myeloid progenitor cell:common | common myeloid progenitor | CL:0000049 | skos:exactMatch | progenitor cell | hematopoietic precursor cell | CL:0008001 | skos:exactMatch | hematopoietic precursor cell | hematopoietic precursor cell | CL:0008001 | skos:exactMatch | SB67_NBM48_NSM1_1720 |
# Iterate through the df_all_data dataframe to create new column "Unique Region" based on the "Dataset" column.
# The "Unique Region" column is created by mapping the region names based on the full dataset name.
'Unique Region'] = df_all_nodes['Dataset'].map(region_map)
df_all_nodes[# df_all_nodes['Unique Region'] = df_all_nodes['Dataset'].str.split('-').str[1].map(region_map)
# Check if the new columns are created correctly.
'Dataset', 'Unique Region']].head(5) df_all_nodes[[
Dataset | Unique Region | |
---|---|---|
0 | SB67_NBM48_NSM1_1720 | NSM |
1 | SB67_NBM48_NSM1_1720 | NSM |
2 | SB67_NBM48_NSM1_1720 | NSM |
3 | SB67_NBM48_NSM1_1720 | NSM |
4 | SB67_NBM48_NSM1_1720 | NSM |
# Print all unique regions in the data.
print("\nUnique Regions in the data:")
print(df_all_nodes['Unique Region'].unique())
# Print the total number of unique regions.
print(f"Total number of unique regions: {df_all_nodes['Unique Region'].nunique()}")
# Print number of unique datasets per unique region.
print("\nNumber of unique datasets per unique region:")
for region in df_all_nodes['Unique Region'].unique():
= df_all_nodes[df_all_nodes['Unique Region'] == region]['Dataset'].nunique()
num_datasets print(f"{region}: {num_datasets}")
Unique Regions in the data:
['NSM' 'AML' 'NBM']
Total number of unique regions: 3
Number of unique datasets per unique region:
NSM: 3
AML: 5
NBM: 12
Process datasets to add region information to Edges files.
5) df_all_edges.head(
cell_id | x1 | y1 | z1 | x2 | y2 | z2 | Distance | Dataset | Anchor Cell Type | Anchor Cell Type Level | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1117 | 5746.650000 | 4541.361111 | 0 | 5895.403361 | 4573.613445 | 0 | 152.209644 | SB67_NBM48_NSM1_1720 | endothelial cell of artery | Level Three Cell Type |
1 | 1145 | 5734.788679 | 4551.400000 | 0 | 5895.403361 | 4573.613445 | 0 | 162.143496 | SB67_NBM48_NSM1_1720 | endothelial cell of artery | Level Three Cell Type |
2 | 1151 | 5756.300000 | 4553.104348 | 0 | 5895.403361 | 4573.613445 | 0 | 140.607141 | SB67_NBM48_NSM1_1720 | endothelial cell of artery | Level Three Cell Type |
3 | 1185 | 5746.238095 | 4563.619048 | 0 | 5895.403361 | 4573.613445 | 0 | 149.499714 | SB67_NBM48_NSM1_1720 | endothelial cell of artery | Level Three Cell Type |
4 | 1268 | 5624.227027 | 4594.664865 | 0 | 5541.917031 | 4700.170306 | 0 | 133.814549 | SB67_NBM48_NSM1_1720 | endothelial cell of artery | Level Three Cell Type |
# Process the edge data to create new columns "Unique Region" based on the "Dataset" column, similar to how it was done for the node data.
'Unique Region'] = df_all_edges['Dataset'].map(region_map)
df_all_edges[
# Check if the new columns are created correctly.
'Dataset', 'Unique Region']].head(5) df_all_edges[[
Dataset | Unique Region | |
---|---|---|
0 | SB67_NBM48_NSM1_1720 | NSM |
1 | SB67_NBM48_NSM1_1720 | NSM |
2 | SB67_NBM48_NSM1_1720 | NSM |
3 | SB67_NBM48_NSM1_1720 | NSM |
4 | SB67_NBM48_NSM1_1720 | NSM |
# Print all unique regions in the data.
print("\nUnique Regions in the data:")
print(df_all_edges['Unique Region'].unique())
# Print the total number of unique regions.
print(f"Total number of unique regions: {df_all_edges['Unique Region'].nunique()}")
# Print number of unique datasets per unique region.
print("\nNumber of unique datasets per unique region:")
for region in df_all_edges['Unique Region'].unique():
= df_all_edges[df_all_edges['Unique Region'] == region]['Dataset'].nunique()
num_datasets print(f"{region}: {num_datasets}")
Unique Regions in the data:
['NSM' 'AML' 'NBM']
Total number of unique regions: 3
Number of unique datasets per unique region:
NSM: 3
AML: 5
NBM: 12
'Unique Region'] = df_all_edges_with_cell_types['Dataset'].map(region_map)
df_all_edges_with_cell_types[
# Check if the new columns are created correctly.
'Dataset', 'Unique Region']].head(5) df_all_edges_with_cell_types[[
Dataset | Unique Region | |
---|---|---|
0 | SB67_NBM48_NSM1_1720 | NSM |
1 | SB67_NBM48_NSM1_1720 | NSM |
2 | SB67_NBM48_NSM1_1720 | NSM |
3 | SB67_NBM48_NSM1_1720 | NSM |
4 | SB67_NBM48_NSM1_1720 | NSM |
1) df_all_nodes.head(
x | y | Original Cell Type | Level Three Cell Type | Level Three CL Label | Level Three CL ID | CL_Match/3 | Level Two Cell Type | Level Two CL Label | Level Two CL ID | CL_Match/2 | Level One Cell Type | Level One CL Label | Level One CL ID | CL_Match/1 | Dataset | Unique Region | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 5776.546667 | 4000.546667 | Erythroid | erythroid lineage cell | erythroid lineage cell | CL:0000764 | skos:exactMatch | erythroid precursor | erythroid progenitor cell | CL:0000038 | skos:exactMatch | hematopoietic precursor cell | hematopoietic precursor cell | CL:0008001 | skos:exactMatch | SB67_NBM48_NSM1_1720 | NSM |
1) df_all_edges.head(
cell_id | x1 | y1 | z1 | x2 | y2 | z2 | Distance | Dataset | Anchor Cell Type | Anchor Cell Type Level | Unique Region | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1117 | 5746.65 | 4541.361111 | 0 | 5895.403361 | 4573.613445 | 0 | 152.209644 | SB67_NBM48_NSM1_1720 | endothelial cell of artery | Level Three Cell Type | NSM |
1) df_all_edges_with_cell_types.head(
cell_id | x1 | y1 | z1 | x2 | y2 | z2 | Distance | Dataset | Anchor Cell Type | Anchor Cell Type Level | Level Three Cell Type | Level Two Cell Type | Level One Cell Type | Unique Region | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1117 | 5746.65 | 4541.361111 | 0 | 5895.403361 | 4573.613445 | 0 | 152.209644 | SB67_NBM48_NSM1_1720 | endothelial cell of artery | Level Three Cell Type | erythroid lineage cell | erythroid precursor | hematopoietic precursor cell | NSM |
Node Analysis
# Plot number of cells per cell type in the same plot. Color by cell type and unique region. Output figure saved in existing `figures_output_dir`.
def plot_cells_per_celltype(df, type_field, output_dir):
=(20, 8))
plt.figure(figsize"svg.fonttype"] = 'none' # to store text as text, not as path
plt.rcParams[=df, x=type_field, palette='Spectral', hue='Unique Region')
sns.countplot(dataf'Number of Cells per {type_field} in `{dataset_dir}`')
plt.title(=90)
plt.xticks(rotation
plt.tight_layout()f'{dataset_dir}_cells_per_celltype_{type_field}.png'), dpi=300,
plt.savefig(os.path.join(output_dir, ='tight',
bbox_inches=0.5)
pad_inchesf'{dataset_dir}_cells_per_celltype_{type_field}.svg'), dpi=300,
plt.savefig(os.path.join(output_dir, ='tight',
bbox_inches=0.5)
pad_inches='Unique Region', bbox_to_anchor=(0.85, 1), loc='upper left')
plt.legend(title
plt.xlabel(type_field)
# For numbers on y-axis, do not use scientific notation.
='plain', axis='y')
plt.ticklabel_format(style# Set y-axis label
'Number of Cells')
plt.ylabel(='y', linestyle='--', alpha=0.7)
plt.grid(axis
plt.tight_layout()# Show the plot
plt.show()
plt.close()for type_field in type_field_list:
plot_cells_per_celltype(df_all_nodes, type_field, os.path.join(basepath, figures_output_dir))
Distance Analysis
# Get mean, median, minimum, maximum distance per unique region per anchor cell type.
= df_all_edges_with_cell_types.groupby(['Unique Region', 'Anchor Cell Type', 'Anchor Cell Type Level']).agg(
df_distance_stats =('Distance', 'mean'),
mean_distance=('Distance', 'median'),
median_distance=('Distance', 'min'),
min_distance=('Distance', 'max')
max_distance
).reset_index()# Print the first few rows of the distance statistics DataFrame.
df_distance_stats
Unique Region | Anchor Cell Type | Anchor Cell Type Level | mean_distance | median_distance | min_distance | max_distance | |
---|---|---|---|---|---|---|---|
0 | AML | endothelial cell | Level One Cell Type | 74.382151 | 66.806253 | 6.056625 | 199.988548 |
1 | AML | endothelial cell of artery | Level Three Cell Type | 111.758598 | 112.688440 | 8.275107 | 199.997878 |
2 | AML | endothelial cell of artery | Level Two Cell Type | 111.758598 | 112.688440 | 8.275107 | 199.997878 |
3 | AML | endothelial cell of sinusoid | Level Three Cell Type | 79.390329 | 72.177834 | 6.056625 | 199.988548 |
4 | AML | endothelial cell of sinusoid | Level Two Cell Type | 79.390329 | 72.177834 | 6.056625 | 199.988548 |
5 | NBM | endothelial cell | Level One Cell Type | 90.322695 | 85.240398 | 5.750231 | 199.999142 |
6 | NBM | endothelial cell of artery | Level Three Cell Type | 115.284424 | 119.163665 | 5.750231 | 199.999998 |
7 | NBM | endothelial cell of artery | Level Two Cell Type | 115.284424 | 119.163665 | 5.750231 | 199.999998 |
8 | NBM | endothelial cell of sinusoid | Level Three Cell Type | 95.298876 | 91.351118 | 7.165130 | 199.999142 |
9 | NBM | endothelial cell of sinusoid | Level Two Cell Type | 95.298876 | 91.351118 | 7.165130 | 199.999142 |
10 | NSM | endothelial cell | Level One Cell Type | 93.842789 | 87.951406 | 7.675593 | 199.997910 |
11 | NSM | endothelial cell of artery | Level Three Cell Type | 117.211880 | 121.868198 | 8.340683 | 199.999051 |
12 | NSM | endothelial cell of artery | Level Two Cell Type | 117.211880 | 121.868198 | 8.340683 | 199.999051 |
13 | NSM | endothelial cell of sinusoid | Level Three Cell Type | 95.636834 | 90.176577 | 7.675593 | 199.997910 |
14 | NSM | endothelial cell of sinusoid | Level Two Cell Type | 95.636834 | 90.176577 | 7.675593 | 199.997910 |
Level One Cell Type Analysis
# Get mean, median, minimum, maximum distance per cell type in all unique regions.
= 'Level One Cell Type'
cell_type_level = df_all_edges_with_cell_types[(df_all_edges_with_cell_types['Anchor Cell Type Level'] == cell_type_level) & (df_all_edges_with_cell_types['Anchor Cell Type'] == anchor_cell_type_dict[cell_type_level])]
df_all_edges_with_cell_type_level
= df_all_edges_with_cell_type_level.groupby([cell_type_level, 'Unique Region']).agg(
df_distance_stats_cell_type_level =('Distance', 'mean'),
mean_distance=('Distance', 'median'),
median_distance=('Distance', 'min'),
min_distance=('Distance', 'max')
max_distance
).reset_index() df_distance_stats_cell_type_level
Level One Cell Type | Unique Region | mean_distance | median_distance | min_distance | max_distance | |
---|---|---|---|---|---|---|
0 | hematopoietic precursor cell | AML | 75.807458 | 68.350787 | 6.056625 | 199.982016 |
1 | hematopoietic precursor cell | NBM | 91.437465 | 86.347062 | 7.368943 | 199.999142 |
2 | hematopoietic precursor cell | NSM | 95.233485 | 89.641506 | 8.036234 | 199.985663 |
3 | immune cell | AML | 72.244373 | 64.713204 | 8.247845 | 199.988548 |
4 | immune cell | NBM | 89.656605 | 84.596595 | 5.750231 | 199.998770 |
5 | immune cell | NSM | 93.334766 | 87.175896 | 7.675593 | 199.997383 |
6 | mesenchymal cell | AML | 68.072424 | 59.045392 | 8.275107 | 198.703670 |
7 | mesenchymal cell | NBM | 82.079626 | 75.429180 | 7.596170 | 199.992343 |
8 | mesenchymal cell | NSM | 79.140648 | 69.671854 | 8.344688 | 199.997910 |
9 | neural cell | AML | 79.843632 | 79.843632 | 61.290397 | 98.396868 |
10 | neural cell | NBM | 57.869702 | 49.944344 | 9.815320 | 187.956746 |
11 | unknown cell | NBM | 91.914579 | 87.038869 | 6.586107 | 199.984000 |
# Get top five and bottom five cell types with respect to mean distance in each unique region separately.
def get_top_bottom_cell_types_by_mean(df, cell_type_level, unique_region, top_n=5):
# Filter the DataFrame for the specified unique region and cell type level
= df[df['Unique Region'] == unique_region]
df_filtered
# Group by the specified cell type level and calculate mean distance
= df_filtered.groupby(cell_type_level).agg(mean_distance=('Distance', 'mean')).reset_index()
df_grouped
# Sort by mean distance to get top and bottom cell types
= df_grouped.sort_values(by='mean_distance', ascending=False)
df_sorted
# Get top N and bottom N cell types
= df_sorted.head(top_n)
top_cell_types = df_sorted.tail(top_n)
bottom_cell_types
return top_cell_types, bottom_cell_types
# Get top and bottom cell types for each unique region in the dataset.
= df_all_edges_with_cell_type_level['Unique Region'].unique()
unique_regions for region in unique_regions:
= get_top_bottom_cell_types_by_mean(df_all_edges_with_cell_type_level, cell_type_level, region)
top_bottom print(f"\nTop 5 cell types in {region}:")
print(top_bottom[0])
print(f"\nBottom 5 cell types in {region}:")
print(top_bottom[1])
Top 5 cell types in NSM:
Level One Cell Type mean_distance
0 hematopoietic precursor cell 95.233485
1 immune cell 93.334766
2 mesenchymal cell 79.140648
Bottom 5 cell types in NSM:
Level One Cell Type mean_distance
0 hematopoietic precursor cell 95.233485
1 immune cell 93.334766
2 mesenchymal cell 79.140648
Top 5 cell types in AML:
Level One Cell Type mean_distance
3 neural cell 79.843632
0 hematopoietic precursor cell 75.807458
1 immune cell 72.244373
2 mesenchymal cell 68.072424
Bottom 5 cell types in AML:
Level One Cell Type mean_distance
3 neural cell 79.843632
0 hematopoietic precursor cell 75.807458
1 immune cell 72.244373
2 mesenchymal cell 68.072424
Top 5 cell types in NBM:
Level One Cell Type mean_distance
4 unknown cell 91.914579
0 hematopoietic precursor cell 91.437465
1 immune cell 89.656605
2 mesenchymal cell 82.079626
3 neural cell 57.869702
Bottom 5 cell types in NBM:
Level One Cell Type mean_distance
4 unknown cell 91.914579
0 hematopoietic precursor cell 91.437465
1 immune cell 89.656605
2 mesenchymal cell 82.079626
3 neural cell 57.869702
# Get top five and bottom five cell types with respect to median distance in each unique region separately.
def get_top_bottom_cell_types_by_median(df, cell_type_level, unique_region, top_n=5):
# Filter the DataFrame for the specified unique region and cell type level
= df[df['Unique Region'] == unique_region]
df_filtered
# Group by the specified cell type level and calculate median distance
= df_filtered.groupby(cell_type_level).agg(median_distance=('Distance', 'median')).reset_index()
df_grouped
# Sort by median distance to get top and bottom cell types
= df_grouped.sort_values(by='median_distance', ascending=False)
df_sorted
# Get top N and bottom N cell types
= df_sorted.head(top_n)
top_cell_types = df_sorted.tail(top_n)
bottom_cell_types
return top_cell_types, bottom_cell_types
# Get top and bottom cell types for each unique region in the dataset.
= df_all_edges_with_cell_type_level['Unique Region'].unique()
unique_regions for region in unique_regions:
= get_top_bottom_cell_types_by_median(df_all_edges_with_cell_type_level, cell_type_level, region)
top_bottom print(f"\nTop 5 cell types in {region}:")
print(top_bottom[0])
print(f"\nBottom 5 cell types in {region}:")
print(top_bottom[1])
Top 5 cell types in NSM:
Level One Cell Type median_distance
0 hematopoietic precursor cell 89.641506
1 immune cell 87.175896
2 mesenchymal cell 69.671854
Bottom 5 cell types in NSM:
Level One Cell Type median_distance
0 hematopoietic precursor cell 89.641506
1 immune cell 87.175896
2 mesenchymal cell 69.671854
Top 5 cell types in AML:
Level One Cell Type median_distance
3 neural cell 79.843632
0 hematopoietic precursor cell 68.350787
1 immune cell 64.713204
2 mesenchymal cell 59.045392
Bottom 5 cell types in AML:
Level One Cell Type median_distance
3 neural cell 79.843632
0 hematopoietic precursor cell 68.350787
1 immune cell 64.713204
2 mesenchymal cell 59.045392
Top 5 cell types in NBM:
Level One Cell Type median_distance
4 unknown cell 87.038869
0 hematopoietic precursor cell 86.347062
1 immune cell 84.596595
2 mesenchymal cell 75.429180
3 neural cell 49.944344
Bottom 5 cell types in NBM:
Level One Cell Type median_distance
4 unknown cell 87.038869
0 hematopoietic precursor cell 86.347062
1 immune cell 84.596595
2 mesenchymal cell 75.429180
3 neural cell 49.944344
# Calculate regional variability
def calculate_regional_variability(df_all_edges_with_cell_type_level, cell_type_level):
""" Calculate regional variability for distances in the given DataFrame.
"""
= df_all_edges_with_cell_type_level.groupby('Unique Region')['Distance'].agg([
regional_variability 'mean', 'mean'),
('std', 'std')
(round(2)
]).
# Add CV as percentage
'CV (%)'] = (regional_variability['std'] / regional_variability['mean'] * 100).round(1)
regional_variability[
print("\nRegional Variability Analysis:")
print("Mean: Average distance in each region")
print("Std: Standard deviation of distances")
print("CV: Coefficient of Variation (std/mean * 100%)")
print(regional_variability)
# Calculate variability for each cell type
= df_all_edges_with_cell_type_level.groupby(cell_type_level)['Distance'].agg([
cell_type_variability 'mean', 'mean'),
('std', 'std')
(round(2)
]).
# Add CV as percentage
'CV (%)'] = (cell_type_variability['std'] / cell_type_variability['mean'] * 100).round(1)
cell_type_variability[
print("\nCell Type Variability Analysis (sorted by CV):")
print(cell_type_variability.sort_values('CV (%)', ascending=False))
calculate_regional_variability(df_all_edges_with_cell_type_level, cell_type_level)
Regional Variability Analysis:
Mean: Average distance in each region
Std: Standard deviation of distances
CV: Coefficient of Variation (std/mean * 100%)
mean std CV (%)
Unique Region
AML 74.38 42.82 57.6
NBM 90.32 48.07 53.2
NSM 93.84 50.99 54.3
Cell Type Variability Analysis (sorted by CV):
mean std CV (%)
Level One Cell Type
neural cell 58.71 38.97 66.4
mesenchymal cell 79.84 49.34 61.8
immune cell 88.12 48.41 54.9
hematopoietic precursor cell 88.63 47.43 53.5
unknown cell 91.91 48.28 52.5
# Define the standard region sequence for plots
= ['NBM', 'AML', 'NSM'] regions
# Generate Violin Plot
def plot_violin_cells_per_celltype(df_all_edges_with_cell_type_level, cell_type_level, output_dir, density_norm='area'):
"whitegrid")
sns.set_style("notebook", rc={"grid.linewidth": 2})
sns.set_context(=(10, 6))
plt.figure(figsize"svg.fonttype"] = 'none' # to store text as text, not as path
plt.rcParams[
=df_all_edges_with_cell_type_level, x=cell_type_level, y="Distance", density_norm=density_norm, common_norm=True, cut=0, inner="box", split=False, palette='Spectral', alpha=.9)
sns.violinplot(data
="whitegrid")
sns.set_theme(style"paper")
sns.set_context(
= 10
font_size =font_size)
plt.legend(fontsize
f'Violin Plot of distances by {cell_type_level} (Density Normalization: {density_norm})', fontsize=font_size)
plt.title(
f'{cell_type_level}', fontsize=font_size)
plt.xlabel('Distance (\u03bcm)', fontsize=font_size)
plt.ylabel(
# Increase font size for all text in the figure
=font_size)
plt.xticks(fontsize=90)
plt.xticks(rotation=font_size)
plt.yticks(fontsize
plt.tight_layout()
f'{dataset_dir}_violin_cells_per_celltype_{cell_type_level}.png'), dpi=300,
plt.savefig(os.path.join(output_dir, ='tight',
bbox_inches=0.5)
pad_inchesf'{dataset_dir}_violin_cells_per_celltype_{cell_type_level}.svg'), dpi=300,
plt.savefig(os.path.join(output_dir, ='tight',
bbox_inches=0.5)
pad_inches
plt.show()
='area') plot_violin_cells_per_celltype(df_all_edges_with_cell_type_level, cell_type_level, os.path.join(basepath, figures_output_dir), density_norm
# Boxplots of distribution of distances by cell type and region.
def plot_distance_distribution_boxplots_by_region(df_all_edges_with_cell_type_level, cell_type_level, output_dir):
=(16, 8))
plt.figure(figsize"svg.fonttype"] = 'none' # to store text as text, not as path
plt.rcParams[# Create categorical type with only the regions that exist in the data
= [r for r in regions if r in df_all_edges_with_cell_type_level['Unique Region'].unique()]
available_regions 'Unique Region'] = pd.Categorical(
df_all_edges_with_cell_type_level['Unique Region'],
df_all_edges_with_cell_type_level[=available_regions,
categories=True
ordered
)
# Make box plot.
=df_all_edges_with_cell_type_level, x=cell_type_level, y='Distance', hue='Unique Region', showfliers=False, palette='Spectral') # viridis or Spectral palette for better color distinction
sns.boxplot(data= 10
font_size =90, ha='right', fontsize=font_size)
plt.xticks(rotation=font_size)
plt.yticks(fontsizef'Distribution of distances by {cell_type_level} and region', fontsize=font_size)
plt.title(f'{cell_type_level}', fontsize=font_size)
plt.xlabel('Distance (\u03bcm)', fontsize=font_size)
plt.ylabel(=(1, 1), loc='upper left')
plt.legend(bbox_to_anchor
plt.tight_layout()
f'{dataset_dir}_distance_distribution_boxplots_by_region_{cell_type_level}.png'), dpi=300,
plt.savefig(os.path.join(output_dir, ='tight',
bbox_inches=0.5)
pad_inchesf'{dataset_dir}_distance_distribution_boxplots_by_region_{cell_type_level}.svg'), dpi=300,
plt.savefig(os.path.join(output_dir, ='tight',
bbox_inches=0.5)
pad_inches
plt.show()
plot_distance_distribution_boxplots_by_region(df_all_edges_with_cell_type_level, cell_type_level, os.path.join(basepath, figures_output_dir))
# Boxplots of distribution of distances by cell type and region.
def plot_distance_distribution_heatmap(df_all_edges_with_cell_type_level, cell_type_level, output_dir):
= df_all_edges_with_cell_type_level.pivot_table(
pivot_data ='Distance',
values=cell_type_level,
index='Unique Region',
columns='median'
aggfunc
)
=(15, 10))
plt.figure(figsize"svg.fonttype"] = 'none' # to store text as text, not as path
plt.rcParams[=True, fmt='.1f', cmap='Spectral')
sns.heatmap(pivot_data, annotf'Heatmap of median distances by {cell_type_level}', fontsize=12)
plt.title(
= 10
font_size =90, ha='right', fontsize=font_size)
plt.xticks(rotation=font_size)
plt.yticks(fontsize
'Unique Region', fontsize=font_size)
plt.xlabel(f'{cell_type_level}', fontsize=font_size)
plt.ylabel(
plt.tight_layout()
f'{dataset_dir}_distance_distribution_heatmap_{cell_type_level}.png'), dpi=300,
plt.savefig(os.path.join(output_dir, ='tight',
bbox_inches=0.5)
pad_inchesf'{dataset_dir}_distance_distribution_heatmap_{cell_type_level}.svg'), dpi=300,
plt.savefig(os.path.join(output_dir, ='tight',
bbox_inches=0.5)
pad_inches
plt.show()
plot_distance_distribution_heatmap(df_all_edges_with_cell_type_level, cell_type_level, os.path.join(basepath, figures_output_dir))
# Generate Violin Plot per unique region in both small intestine and large intestine. Create for all 8 regions as 8 subplots.
def plot_violin_plots_all_regions(df_all_edges_with_cell_type_level, cell_type_level, output_dir, density_norm="area"):
"whitegrid")
sns.set_style("notebook", rc={"grid.linewidth": 1})
sns.set_context("svg.fonttype"] = 'none' # to store text as text, not as path
plt.rcParams[= 10
font_size = plt.subplots(3, 1, figsize=(10, 15)) # Adjusted figsize for horizontal layout
fig, axs f'Distance distribution per {cell_type_level} in `{dataset_dir}` (density normalization = {density_norm})', fontsize=font_size, y=1)
fig.suptitle(
# Keep the sequence of Cell Types consistent across plots.
= sorted(df_all_edges_with_cell_type_level[cell_type_level].unique())
cell_types
# Create a color palette based on the number of unique classes
= sns.color_palette("Spectral", n_colors=len(cell_types))
color_palette
# Create a dictionary mapping class to color
= dict(zip(cell_types, color_palette))
class_color_dict
for i, region in enumerate(regions):
= df_all_edges_with_cell_type_level[df_all_edges_with_cell_type_level['Unique Region'] == region]
data_reg =data_reg, x=cell_type_level, y="Distance", density_norm=density_norm, common_norm=True, cut=0, inner="box", split=False, palette=class_color_dict, alpha=.9, ax=axs[i], hue=cell_type_level, legend=False, order=cell_types, fill=True)
sns.violinplot(data=font_size)
axs[i].set_title(region, fontsize'', fontsize=font_size)
axs[i].set_xlabel('Distance (\u03bcm)', fontsize=font_size)
axs[i].set_ylabel(# axs[i].tick_params(axis='x', labelrotation=90, labelsize=font_size)
# only show xtick labels for the last subplot
if i < len(regions) - 1:
axs[i].set_xticklabels([])else:
=font_size, rotation=90, ha='right')
axs[i].set_xticklabels(cell_types, fontsize# axs[i].set_ylim(0, data_reg['Distance'].max() * 1.1) # Set y-limits to be consistent across all plots
='both', labelsize=font_size)
axs[i].tick_params(axis
# Use fig.text for precise label positioning
0.5, -0.02, f'{cell_type_level}', ha='center', va='bottom', fontsize=font_size)
fig.figure.text(
plt.tight_layout()
f'{dataset_dir}_violin_plots_all_regions_{cell_type_level}.png'), dpi=300,
plt.savefig(os.path.join(output_dir, ='tight',
bbox_inches=0.5)
pad_inchesf'{dataset_dir}_violin_plots_all_regions_{cell_type_level}.svg'), dpi=300,
plt.savefig(os.path.join(output_dir, ='tight',
bbox_inches=0.5)
pad_inches
plt.show()
="count") # density_norm="count" or "area" can be used based on preference. plot_violin_plots_all_regions(df_all_edges_with_cell_type_level, cell_type_level, os.path.join(basepath, figures_output_dir), density_norm
Level Two Cell Type Analysis
# Get mean, median, minimum, maximum distance per cell type in all unique regions.
= 'Level Two Cell Type'
cell_type_level = df_all_edges_with_cell_types[(df_all_edges_with_cell_types['Anchor Cell Type Level'] == cell_type_level) & (df_all_edges_with_cell_types['Anchor Cell Type'] == anchor_cell_type_dict[cell_type_level])]
df_all_edges_with_cell_type_level
= df_all_edges_with_cell_type_level.groupby([cell_type_level, 'Unique Region']).agg(
df_distance_stats_cell_type_level =('Distance', 'mean'),
mean_distance=('Distance', 'median'),
median_distance=('Distance', 'min'),
min_distance=('Distance', 'max')
max_distance
).reset_index() df_distance_stats_cell_type_level
Level Two Cell Type | Unique Region | mean_distance | median_distance | min_distance | max_distance | |
---|---|---|---|---|---|---|
0 | abnormal cell | AML | 84.612826 | 78.755366 | 7.350124 | 199.982016 |
1 | adipocyte | AML | 72.044065 | 61.569803 | 9.299294 | 198.520789 |
2 | adipocyte | NBM | 92.656243 | 88.435461 | 8.040248 | 199.992343 |
3 | adipocyte | NSM | 84.007363 | 76.639037 | 8.344688 | 199.997910 |
4 | b cell | AML | 78.392682 | 71.243188 | 9.244128 | 199.966621 |
5 | b cell | NBM | 97.615652 | 95.012793 | 8.025751 | 199.996640 |
6 | b cell | NSM | 96.232882 | 91.578635 | 9.240345 | 199.991504 |
7 | dendritic cell | AML | 79.071847 | 72.219946 | 9.452268 | 199.541677 |
8 | dendritic cell | NBM | 94.921339 | 90.375897 | 8.032959 | 199.978479 |
9 | dendritic cell | NSM | 96.121120 | 90.482325 | 10.456776 | 199.356229 |
10 | endothelial cell of artery | AML | 66.552461 | 57.188080 | 9.598652 | 197.775548 |
11 | endothelial cell of artery | NBM | 80.875474 | 71.598061 | 7.165130 | 199.696671 |
12 | endothelial cell of artery | NSM | 65.333875 | 55.348929 | 8.340683 | 199.496217 |
13 | erythroid precursor | AML | 75.974098 | 67.872764 | 7.687746 | 199.911019 |
14 | erythroid precursor | NBM | 95.055450 | 90.578380 | 7.368943 | 199.990928 |
15 | erythroid precursor | NSM | 97.332711 | 92.296305 | 8.124669 | 199.985663 |
16 | hematopoietic stem and progenitor cell | AML | 54.563528 | 37.139774 | 9.123395 | 189.847535 |
17 | hematopoietic stem and progenitor cell | NBM | 95.280715 | 93.347852 | 9.670863 | 199.738928 |
18 | hematopoietic stem and progenitor cell | NSM | 104.959083 | 98.965058 | 13.524865 | 196.588555 |
19 | macrophage | AML | 73.826747 | 67.164529 | 9.347675 | 198.307993 |
20 | macrophage | NBM | 96.799495 | 91.931956 | 8.152219 | 199.997797 |
21 | macrophage | NSM | 90.324484 | 82.094705 | 9.073456 | 199.512374 |
22 | megakaryocyte | AML | 77.991861 | 68.126728 | 9.953335 | 199.430617 |
23 | megakaryocyte | NBM | 98.795997 | 94.345559 | 8.774989 | 199.937106 |
24 | megakaryocyte | NSM | 98.796581 | 92.705512 | 12.087462 | 199.884221 |
25 | mesenchymal stem cell | AML | 78.724108 | 71.696786 | 8.585049 | 198.703670 |
26 | mesenchymal stem cell | NBM | 91.666993 | 87.259370 | 9.507509 | 199.986823 |
27 | mesenchymal stem cell | NSM | 87.206186 | 83.498779 | 9.652741 | 199.570947 |
28 | mesenchymal stem/stromal cell | AML | 75.223250 | 65.025012 | 10.898685 | 199.967713 |
29 | mesenchymal stem/stromal cell | NBM | 91.820716 | 86.924087 | 7.572935 | 199.852192 |
30 | mesenchymal stem/stromal cell | NSM | 91.464706 | 80.083330 | 10.301344 | 199.472893 |
31 | monocyte | AML | 82.273906 | 76.137286 | 8.868460 | 199.876784 |
32 | monocyte | NBM | 91.519407 | 86.756461 | 7.351953 | 199.985575 |
33 | monocyte | NSM | 92.240440 | 85.765454 | 8.268363 | 199.934336 |
34 | muscle cell | AML | 69.735721 | 59.183423 | 10.271701 | 194.082988 |
35 | muscle cell | NBM | 94.216885 | 87.575318 | 9.427550 | 199.834792 |
36 | muscle cell | NSM | 55.313443 | 46.465657 | 10.248018 | 198.935026 |
37 | myeloid cell | AML | 76.635516 | 68.660555 | 8.373370 | 199.748940 |
38 | myeloid cell | NBM | 93.908272 | 89.618397 | 7.514845 | 199.998770 |
39 | myeloid cell | NSM | 95.283475 | 89.390289 | 7.675593 | 199.984961 |
40 | myeloid precursor | AML | 80.963771 | 73.387438 | 8.604672 | 199.919874 |
41 | myeloid precursor | NBM | 96.919348 | 93.418496 | 7.505009 | 199.999142 |
42 | myeloid precursor | NSM | 97.037575 | 92.060765 | 8.173025 | 199.969054 |
43 | neuroglial cell | AML | 128.321993 | 128.321993 | 128.321993 | 128.321993 |
44 | neuroglial cell | NBM | 86.411262 | 78.933059 | 17.934662 | 175.871271 |
45 | progenitor cell | AML | 78.395834 | 70.509229 | 6.056625 | 199.797032 |
46 | progenitor cell | NBM | 98.953368 | 96.413601 | 8.000469 | 199.988637 |
47 | progenitor cell | NSM | 97.117247 | 92.464861 | 9.423956 | 199.969595 |
48 | skeletal stromal cell | AML | 78.238144 | 65.728809 | 13.246899 | 195.060591 |
49 | skeletal stromal cell | NBM | 94.062042 | 88.538574 | 8.954561 | 199.829571 |
50 | skeletal stromal cell | NSM | 89.169229 | 77.967966 | 11.309889 | 198.710354 |
51 | stem cell | AML | 70.097786 | 59.523007 | 10.801688 | 193.191236 |
52 | stem cell | NBM | 96.060575 | 92.318654 | 9.741716 | 199.970348 |
53 | stem cell | NSM | 92.740997 | 90.714633 | 8.036234 | 199.237795 |
54 | t cell | AML | 80.957774 | 73.440678 | 8.247845 | 199.988548 |
55 | t cell | NBM | 94.412441 | 90.318472 | 8.190426 | 199.995542 |
56 | t cell | NSM | 94.730211 | 88.963905 | 9.018322 | 199.997383 |
57 | unknown cell | NBM | 96.310960 | 92.693095 | 8.043118 | 199.984452 |
# Get top and bottom cell types for each unique region in the dataset.
= df_all_edges_with_cell_type_level['Unique Region'].unique()
unique_regions for region in unique_regions:
= get_top_bottom_cell_types_by_mean(df_all_edges_with_cell_type_level, cell_type_level, region)
top_bottom print(f"\nTop 5 cell types in {region}:")
print(top_bottom[0])
print(f"\nBottom 5 cell types in {region}:")
print(top_bottom[1])
Top 5 cell types in NSM:
Level Two Cell Type mean_distance
5 hematopoietic stem and progenitor cell 104.959083
7 megakaryocyte 98.796581
4 erythroid precursor 97.332711
14 progenitor cell 97.117247
13 myeloid precursor 97.037575
Bottom 5 cell types in NSM:
Level Two Cell Type mean_distance
15 skeletal stromal cell 89.169229
8 mesenchymal stem cell 87.206186
0 adipocyte 84.007363
3 endothelial cell of artery 65.333875
11 muscle cell 55.313443
Top 5 cell types in AML:
Level Two Cell Type mean_distance
15 neuroglial cell 128.321993
0 abnormal cell 84.612826
11 monocyte 82.273906
14 myeloid precursor 80.963771
19 t cell 80.957774
Bottom 5 cell types in AML:
Level Two Cell Type mean_distance
1 adipocyte 72.044065
18 stem cell 70.097786
12 muscle cell 69.735721
4 endothelial cell of artery 66.552461
6 hematopoietic stem and progenitor cell 54.563528
Top 5 cell types in NBM:
Level Two Cell Type mean_distance
15 progenitor cell 98.953368
7 megakaryocyte 98.795997
1 b cell 97.615652
13 myeloid precursor 96.919348
6 macrophage 96.799495
Bottom 5 cell types in NBM:
Level Two Cell Type mean_distance
9 mesenchymal stem/stromal cell 91.820716
8 mesenchymal stem cell 91.666993
10 monocyte 91.519407
14 neuroglial cell 86.411262
3 endothelial cell of artery 80.875474
# Get top and bottom cell types for each unique region in the dataset.
= df_all_edges_with_cell_type_level['Unique Region'].unique()
unique_regions for region in unique_regions:
= get_top_bottom_cell_types_by_median(df_all_edges_with_cell_type_level, cell_type_level, region)
top_bottom print(f"\nTop 5 cell types in {region}:")
print(top_bottom[0])
print(f"\nBottom 5 cell types in {region}:")
print(top_bottom[1])
Top 5 cell types in NSM:
Level Two Cell Type median_distance
5 hematopoietic stem and progenitor cell 98.965058
7 megakaryocyte 92.705512
14 progenitor cell 92.464861
4 erythroid precursor 92.296305
13 myeloid precursor 92.060765
Bottom 5 cell types in NSM:
Level Two Cell Type median_distance
9 mesenchymal stem/stromal cell 80.083330
15 skeletal stromal cell 77.967966
0 adipocyte 76.639037
3 endothelial cell of artery 55.348929
11 muscle cell 46.465657
Top 5 cell types in AML:
Level Two Cell Type median_distance
15 neuroglial cell 128.321993
0 abnormal cell 78.755366
11 monocyte 76.137286
19 t cell 73.440678
14 myeloid precursor 73.387438
Bottom 5 cell types in AML:
Level Two Cell Type median_distance
1 adipocyte 61.569803
18 stem cell 59.523007
12 muscle cell 59.183423
4 endothelial cell of artery 57.188080
6 hematopoietic stem and progenitor cell 37.139774
Top 5 cell types in NBM:
Level Two Cell Type median_distance
15 progenitor cell 96.413601
1 b cell 95.012793
7 megakaryocyte 94.345559
13 myeloid precursor 93.418496
5 hematopoietic stem and progenitor cell 93.347852
Bottom 5 cell types in NBM:
Level Two Cell Type median_distance
8 mesenchymal stem cell 87.259370
9 mesenchymal stem/stromal cell 86.924087
10 monocyte 86.756461
14 neuroglial cell 78.933059
3 endothelial cell of artery 71.598061
calculate_regional_variability(df_all_edges_with_cell_type_level, cell_type_level)
Regional Variability Analysis:
Mean: Average distance in each region
Std: Standard deviation of distances
CV: Coefficient of Variation (std/mean * 100%)
mean std CV (%)
Unique Region
AML 79.39 44.80 56.4
NBM 95.30 48.83 51.2
NSM 95.64 51.03 53.4
Cell Type Variability Analysis (sorted by CV):
mean std CV (%)
Level Two Cell Type
endothelial cell of artery 77.69 52.16 67.1
muscle cell 87.24 50.87 58.3
skeletal stromal cell 92.68 52.72 56.9
adipocyte 89.36 50.19 56.2
monocyte 89.60 49.68 55.4
mesenchymal stem cell 87.59 48.55 55.4
mesenchymal stem/stromal cell 89.52 48.91 54.6
dendritic cell 89.61 48.91 54.6
t cell 92.82 49.73 53.6
hematopoietic stem and progenitor cell 93.87 49.72 53.0
myeloid cell 93.01 49.27 53.0
abnormal cell 84.61 44.08 52.1
erythroid precursor 92.48 48.09 52.0
myeloid precursor 94.87 49.06 51.7
stem cell 94.15 48.57 51.6
macrophage 92.25 47.60 51.6
b cell 94.91 48.86 51.5
progenitor cell 95.58 49.02 51.3
unknown cell 96.31 48.92 50.8
neuroglial cell 87.43 43.59 49.9
megakaryocyte 94.69 47.09 49.7
='area') plot_violin_cells_per_celltype(df_all_edges_with_cell_type_level, cell_type_level, os.path.join(basepath, figures_output_dir), density_norm
plot_distance_distribution_boxplots_by_region(df_all_edges_with_cell_type_level, cell_type_level, os.path.join(basepath, figures_output_dir))
plot_distance_distribution_heatmap(df_all_edges_with_cell_type_level, cell_type_level, os.path.join(basepath, figures_output_dir))
="count") # Or, density_norm="count" or "area" based on preference. plot_violin_plots_all_regions(df_all_edges_with_cell_type_level, cell_type_level, os.path.join(basepath, figures_output_dir), density_norm
Level Three Cell Type Analysis
# Get mean, median, minimum, maximum distance per cell type in all unique regions.
= 'Level Three Cell Type'
cell_type_level = df_all_edges_with_cell_types[(df_all_edges_with_cell_types['Anchor Cell Type Level'] == cell_type_level) & (df_all_edges_with_cell_types['Anchor Cell Type'] == anchor_cell_type_dict[cell_type_level])]
df_all_edges_with_cell_type_level
= df_all_edges_with_cell_type_level.groupby([cell_type_level, 'Unique Region']).agg(
df_distance_stats_cell_type_level =('Distance', 'mean'),
mean_distance=('Distance', 'median'),
median_distance=('Distance', 'min'),
min_distance=('Distance', 'max')
max_distance
).reset_index() df_distance_stats_cell_type_level
Level Three Cell Type | Unique Region | mean_distance | median_distance | min_distance | max_distance | |
---|---|---|---|---|---|---|
0 | adipocyte | AML | 72.044065 | 61.569803 | 9.299294 | 198.520789 |
1 | adipocyte | NBM | 92.656243 | 88.435461 | 8.040248 | 199.992343 |
2 | adipocyte | NSM | 84.007363 | 76.639037 | 8.344688 | 199.997910 |
3 | b cell | AML | 76.143453 | 68.752778 | 9.244128 | 199.966621 |
4 | b cell | NBM | 96.138631 | 92.508923 | 8.025751 | 199.972480 |
5 | b cell | NSM | 94.707460 | 89.148689 | 9.240345 | 199.991504 |
6 | b cell:immature | AML | 82.461384 | 75.703532 | 10.072227 | 199.483659 |
7 | b cell:immature | NBM | 99.501175 | 96.731118 | 8.874948 | 199.954603 |
8 | b cell:immature | NSM | 97.478212 | 92.747527 | 9.555029 | 199.569086 |
9 | cell:cd34+ cd61+ | AML | 74.287460 | 57.285741 | 11.620897 | 170.297284 |
10 | cell:cd34+ cd61+ | NBM | 97.082979 | 99.005114 | 9.337088 | 198.401642 |
11 | cell:cd34+ cd61+ | NSM | 54.635086 | 52.248486 | 48.701567 | 67.252810 |
12 | dendritic cell:plasmacytoid | AML | 79.071847 | 72.219946 | 9.452268 | 199.541677 |
13 | dendritic cell:plasmacytoid | NBM | 94.921339 | 90.375897 | 8.032959 | 199.978479 |
14 | dendritic cell:plasmacytoid | NSM | 96.121120 | 90.482325 | 10.456776 | 199.356229 |
15 | endosteal cell | AML | 78.238144 | 65.728809 | 13.246899 | 195.060591 |
16 | endosteal cell | NBM | 94.062042 | 88.538574 | 8.954561 | 199.829571 |
17 | endosteal cell | NSM | 89.169229 | 77.967966 | 11.309889 | 198.710354 |
18 | endothelial cell of artery | AML | 66.552461 | 57.188080 | 9.598652 | 197.775548 |
19 | endothelial cell of artery | NBM | 80.875474 | 71.598061 | 7.165130 | 199.696671 |
20 | endothelial cell of artery | NSM | 65.333875 | 55.348929 | 8.340683 | 199.496217 |
21 | erythroblast | AML | 74.204032 | 67.595458 | 9.345520 | 199.710488 |
22 | erythroblast | NBM | 97.493233 | 93.445013 | 9.438919 | 199.976653 |
23 | erythroblast | NSM | 99.975515 | 94.923044 | 10.526460 | 199.917168 |
24 | erythroblast:basophilic | AML | 52.951975 | 49.722590 | 12.385871 | 159.571485 |
25 | erythroblast:basophilic | NBM | 93.121025 | 93.171093 | 9.946056 | 199.013021 |
26 | erythroblast:basophilic | NSM | 69.175800 | 50.068324 | 21.363562 | 131.777261 |
27 | erythroid lineage cell | AML | 76.378711 | 67.967844 | 7.687746 | 199.911019 |
28 | erythroid lineage cell | NBM | 94.931105 | 90.427830 | 7.368943 | 199.990928 |
29 | erythroid lineage cell | NSM | 96.871888 | 91.762574 | 8.124669 | 199.985663 |
30 | granulocyte monocyte progenitor cell | AML | 70.212475 | 62.399469 | 8.821407 | 193.766544 |
31 | granulocyte monocyte progenitor cell | NBM | 99.994036 | 96.974877 | 10.086847 | 199.377339 |
32 | granulocyte monocyte progenitor cell | NSM | 113.471237 | 118.935939 | 24.621301 | 195.612217 |
33 | granulocyte monocyte progenitor cell/myeloblast | AML | 72.415964 | 63.423578 | 10.196329 | 190.981269 |
34 | granulocyte monocyte progenitor cell/myeloblast | NBM | 96.890873 | 95.757879 | 10.523154 | 199.686919 |
35 | granulocyte monocyte progenitor cell/myeloblast | NSM | 94.198042 | 90.199936 | 12.603882 | 198.890799 |
36 | hematopoietic stem and progenitor cell:spink2+ | AML | 54.563528 | 37.139774 | 9.123395 | 189.847535 |
37 | hematopoietic stem and progenitor cell:spink2+ | NBM | 95.280715 | 93.347852 | 9.670863 | 199.738928 |
38 | hematopoietic stem and progenitor cell:spink2+ | NSM | 104.959083 | 98.965058 | 13.524865 | 196.588555 |
39 | hematopoietic stem cell | AML | 70.097786 | 59.523007 | 10.801688 | 193.191236 |
40 | hematopoietic stem cell | NBM | 96.060575 | 92.318654 | 9.741716 | 199.970348 |
41 | hematopoietic stem cell | NSM | 92.740997 | 90.714633 | 8.036234 | 199.237795 |
42 | lymphoid progenitor cell:common | AML | 70.369095 | 33.321110 | 11.891835 | 184.838570 |
43 | lymphoid progenitor cell:common | NBM | 101.297912 | 103.775147 | 8.703396 | 198.679098 |
44 | lymphoid progenitor cell:common | NSM | 48.644547 | 48.644547 | 40.392837 | 56.896257 |
45 | macrophage | AML | 73.826747 | 67.164529 | 9.347675 | 198.307993 |
46 | macrophage | NBM | 96.799495 | 91.931956 | 8.152219 | 199.997797 |
47 | macrophage | NSM | 90.324484 | 82.094705 | 9.073456 | 199.512374 |
48 | megakaryocyte:gata1+ | AML | 76.859027 | 67.759673 | 11.187479 | 199.345585 |
49 | megakaryocyte:gata1+ | NBM | 96.565594 | 89.912296 | 15.171096 | 199.835750 |
50 | megakaryocyte:gata1+ | NSM | 99.813171 | 94.046165 | 12.087462 | 199.884221 |
51 | megakaryocyte:gata1- | AML | 81.549157 | 71.445335 | 9.953335 | 199.430617 |
52 | megakaryocyte:gata1- | NBM | 99.683106 | 95.220876 | 8.774989 | 199.937106 |
53 | megakaryocyte:gata1- | NSM | 96.343558 | 87.702763 | 14.433768 | 198.666219 |
54 | mesenchymal stem cell of adipose tissue | AML | 78.724108 | 71.696786 | 8.585049 | 198.703670 |
55 | mesenchymal stem cell of adipose tissue | NBM | 91.666993 | 87.259370 | 9.507509 | 199.986823 |
56 | mesenchymal stem cell of adipose tissue | NSM | 87.206186 | 83.498779 | 9.652741 | 199.570947 |
57 | mesenchymal stem/stromal cell:thy1+ | AML | 75.223250 | 65.025012 | 10.898685 | 199.967713 |
58 | mesenchymal stem/stromal cell:thy1+ | NBM | 91.820716 | 86.924087 | 7.572935 | 199.852192 |
59 | mesenchymal stem/stromal cell:thy1+ | NSM | 91.464706 | 80.083330 | 10.301344 | 199.472893 |
60 | monocyte | AML | 81.998821 | 75.975206 | 8.868460 | 199.876784 |
61 | monocyte | NBM | 91.803183 | 87.198437 | 7.351953 | 199.985575 |
62 | monocyte | NSM | 91.957353 | 85.202021 | 8.268363 | 199.934336 |
63 | monocyte:non-classical | AML | 84.404224 | 77.154945 | 10.042509 | 199.557398 |
64 | monocyte:non-classical | NBM | 89.394134 | 83.017617 | 8.634223 | 199.962583 |
65 | monocyte:non-classical | NSM | 94.928483 | 89.610892 | 10.244014 | 199.859496 |
66 | muscle cell:smooth | AML | 69.735721 | 59.183423 | 10.271701 | 194.082988 |
67 | muscle cell:smooth | NBM | 94.216885 | 87.575318 | 9.427550 | 199.834792 |
68 | muscle cell:smooth | NSM | 55.313443 | 46.465657 | 10.248018 | 198.935026 |
69 | mutant blast:npm1 | AML | 84.612826 | 78.755366 | 7.350124 | 199.982016 |
70 | myeloid cell:intermediate | AML | 81.373170 | 73.925935 | 8.604672 | 199.919874 |
71 | myeloid cell:intermediate | NBM | 96.900964 | 93.360407 | 7.505009 | 199.999142 |
72 | myeloid cell:intermediate | NSM | 97.020277 | 92.018090 | 8.173025 | 199.969054 |
73 | myeloid cell:mature | AML | 76.635516 | 68.660555 | 8.373370 | 199.748940 |
74 | myeloid cell:mature | NBM | 93.908272 | 89.618397 | 7.514845 | 199.998770 |
75 | myeloid cell:mature | NSM | 95.283475 | 89.390289 | 7.675593 | 199.984961 |
76 | myeloid progenitor cell:common | AML | 78.406261 | 70.510293 | 6.056625 | 199.797032 |
77 | myeloid progenitor cell:common | NBM | 98.947282 | 96.400048 | 8.000469 | 199.988637 |
78 | myeloid progenitor cell:common | NSM | 97.127281 | 92.476823 | 9.423956 | 199.969595 |
79 | plasma cell | AML | 79.030258 | 71.973497 | 9.340869 | 199.876610 |
80 | plasma cell | NBM | 100.456866 | 99.888396 | 8.273445 | 199.996640 |
81 | plasma cell | NSM | 96.900720 | 93.114820 | 10.602960 | 199.657054 |
82 | schwann cell | AML | 128.321993 | 128.321993 | 128.321993 | 128.321993 |
83 | schwann cell | NBM | 86.411262 | 78.933059 | 17.934662 | 175.871271 |
84 | t cell:cd4+ alpha-beta | AML | 82.044430 | 74.457114 | 8.247845 | 199.988548 |
85 | t cell:cd4+ alpha-beta | NBM | 94.775404 | 90.768274 | 8.443550 | 199.987582 |
86 | t cell:cd4+ alpha-beta | NSM | 92.912509 | 87.549465 | 9.815604 | 199.997383 |
87 | t cell:cd8+ alpha-beta regulatory | AML | 78.082735 | 70.753019 | 10.387187 | 199.927831 |
88 | t cell:cd8+ alpha-beta regulatory | NBM | 93.990508 | 89.772743 | 8.190426 | 199.995542 |
89 | t cell:cd8+ alpha-beta regulatory | NSM | 95.344181 | 89.646218 | 9.018322 | 199.899349 |
90 | unknown cell | NBM | 96.310960 | 92.693095 | 8.043118 | 199.984452 |
# Get top and bottom cell types for each unique region in the dataset.
= df_all_edges_with_cell_type_level['Unique Region'].unique()
unique_regions for region in unique_regions:
= get_top_bottom_cell_types_by_mean(df_all_edges_with_cell_type_level, cell_type_level, region)
top_bottom print(f"\nTop 5 cell types in {region}:")
print(top_bottom[0])
print(f"\nBottom 5 cell types in {region}:")
print(top_bottom[1])
Top 5 cell types in NSM:
Level Three Cell Type mean_distance
10 granulocyte monocyte progenitor cell 113.471237
12 hematopoietic stem and progenitor cell:spink2+ 104.959083
7 erythroblast 99.975515
16 megakaryocyte:gata1+ 99.813171
2 b cell:immature 97.478212
Bottom 5 cell types in NSM:
Level Three Cell Type mean_distance
8 erythroblast:basophilic 69.175800
6 endothelial cell of artery 65.333875
22 muscle cell:smooth 55.313443
3 cell:cd34+ cd61+ 54.635086
14 lymphoid progenitor cell:common 48.644547
Top 5 cell types in AML:
Level Three Cell Type mean_distance
28 schwann cell 128.321993
23 mutant blast:npm1 84.612826
21 monocyte:non-classical 84.404224
2 b cell:immature 82.461384
29 t cell:cd4+ alpha-beta 82.044430
Bottom 5 cell types in AML:
Level Three Cell Type mean_distance
13 hematopoietic stem cell 70.097786
22 muscle cell:smooth 69.735721
6 endothelial cell of artery 66.552461
12 hematopoietic stem and progenitor cell:spink2+ 54.563528
8 erythroblast:basophilic 52.951975
Top 5 cell types in NBM:
Level Three Cell Type mean_distance
14 lymphoid progenitor cell:common 101.297912
26 plasma cell 100.456866
10 granulocyte monocyte progenitor cell 99.994036
17 megakaryocyte:gata1- 99.683106
2 b cell:immature 99.501175
Bottom 5 cell types in NBM:
Level Three Cell Type mean_distance
20 monocyte 91.803183
18 mesenchymal stem cell of adipose tissue 91.666993
21 monocyte:non-classical 89.394134
27 schwann cell 86.411262
6 endothelial cell of artery 80.875474
# Get top and bottom cell types for each unique region in the dataset.
= df_all_edges_with_cell_type_level['Unique Region'].unique()
unique_regions for region in unique_regions:
= get_top_bottom_cell_types_by_median(df_all_edges_with_cell_type_level, cell_type_level, region)
top_bottom print(f"\nTop 5 cell types in {region}:")
print(top_bottom[0])
print(f"\nBottom 5 cell types in {region}:")
print(top_bottom[1])
Top 5 cell types in NSM:
Level Three Cell Type median_distance
10 granulocyte monocyte progenitor cell 118.935939
12 hematopoietic stem and progenitor cell:spink2+ 98.965058
7 erythroblast 94.923044
16 megakaryocyte:gata1+ 94.046165
26 plasma cell 93.114820
Bottom 5 cell types in NSM:
Level Three Cell Type median_distance
6 endothelial cell of artery 55.348929
3 cell:cd34+ cd61+ 52.248486
8 erythroblast:basophilic 50.068324
14 lymphoid progenitor cell:common 48.644547
22 muscle cell:smooth 46.465657
Top 5 cell types in AML:
Level Three Cell Type median_distance
28 schwann cell 128.321993
23 mutant blast:npm1 78.755366
21 monocyte:non-classical 77.154945
20 monocyte 75.975206
2 b cell:immature 75.703532
Bottom 5 cell types in AML:
Level Three Cell Type median_distance
3 cell:cd34+ cd61+ 57.285741
6 endothelial cell of artery 57.188080
8 erythroblast:basophilic 49.722590
12 hematopoietic stem and progenitor cell:spink2+ 37.139774
14 lymphoid progenitor cell:common 33.321110
Top 5 cell types in NBM:
Level Three Cell Type median_distance
14 lymphoid progenitor cell:common 103.775147
26 plasma cell 99.888396
3 cell:cd34+ cd61+ 99.005114
10 granulocyte monocyte progenitor cell 96.974877
2 b cell:immature 96.731118
Bottom 5 cell types in NBM:
Level Three Cell Type median_distance
20 monocyte 87.198437
19 mesenchymal stem/stromal cell:thy1+ 86.924087
21 monocyte:non-classical 83.017617
27 schwann cell 78.933059
6 endothelial cell of artery 71.598061
calculate_regional_variability(df_all_edges_with_cell_type_level, cell_type_level)
Regional Variability Analysis:
Mean: Average distance in each region
Std: Standard deviation of distances
CV: Coefficient of Variation (std/mean * 100%)
mean std CV (%)
Unique Region
AML 79.39 44.80 56.4
NBM 95.30 48.83 51.2
NSM 95.64 51.03 53.4
Cell Type Variability Analysis (sorted by CV):
mean std CV (%)
Level Three Cell Type
endothelial cell of artery 77.69 52.16 67.1
muscle cell:smooth 87.24 50.87 58.3
endosteal cell 92.68 52.72 56.9
monocyte:non-classical 88.77 50.37 56.7
adipocyte 89.36 50.19 56.2
mesenchymal stem cell of adipose tissue 87.59 48.55 55.4
monocyte 89.70 49.59 55.3
lymphoid progenitor cell:common 97.64 53.83 55.1
dendritic cell:plasmacytoid 89.61 48.91 54.6
mesenchymal stem/stromal cell:thy1+ 89.52 48.91 54.6
erythroblast:basophilic 92.10 49.70 54.0
t cell:cd4+ alpha-beta 92.64 49.75 53.7
granulocyte monocyte progenitor cell 87.89 47.14 53.6
t cell:cd8+ alpha-beta regulatory 93.04 49.71 53.4
myeloid cell:mature 93.01 49.27 53.0
hematopoietic stem and progenitor cell:spink2+ 93.87 49.72 53.0
cell:cd34+ cd61+ 95.01 49.89 52.5
mutant blast:npm1 84.61 44.08 52.1
erythroblast 90.48 47.17 52.1
erythroid lineage cell 92.67 48.17 52.0
granulocyte monocyte progenitor cell/myeloblast 94.91 49.05 51.7
myeloid cell:intermediate 94.93 49.07 51.7
b cell:immature 96.95 50.01 51.6
b cell 94.57 48.82 51.6
macrophage 92.25 47.60 51.6
hematopoietic stem cell 94.15 48.57 51.6
megakaryocyte:gata1+ 90.24 46.49 51.5
myeloid progenitor cell:common 95.58 49.01 51.3
plasma cell 94.90 48.57 51.2
unknown cell 96.31 48.92 50.8
schwann cell 87.43 43.59 49.9
megakaryocyte:gata1- 97.85 46.95 48.0
='area') plot_violin_cells_per_celltype(df_all_edges_with_cell_type_level, cell_type_level, os.path.join(basepath, figures_output_dir), density_norm
plot_distance_distribution_boxplots_by_region(df_all_edges_with_cell_type_level, cell_type_level, os.path.join(basepath, figures_output_dir))
plot_distance_distribution_heatmap(df_all_edges_with_cell_type_level, cell_type_level, os.path.join(basepath, figures_output_dir))
="count") # Or, density_norm="count" or "area" based on preference. plot_violin_plots_all_regions(df_all_edges_with_cell_type_level, cell_type_level, os.path.join(basepath, figures_output_dir), density_norm