import numpy as np
import pandas as pd
import os
import json
import requests
import shutil
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import normalize
import plotly.express as px
from _cde_compute_edges_from_nodes import *
'display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option(
# suppress warnings
import warnings
"ignore") warnings.filterwarnings(
Distance Analysis: lymphnode-codex-yale
Analyze and visualize cell-to-nearest-endothelial-cell distance distributions for the
lymphnode-codex-yale
dataset.
= "/u/yashjain/hra-cell-distance-analysis/data"
basepath = "lymphnode-codex-yale"
dataset_dir = os.path.join("data-processed-nodes-with-harmonized-cell-types", dataset_dir)
data_filedir = os.path.join("data-processed-edges", dataset_dir)
output_edge_dir = "generated-figures" figures_output_dir
# Function to load your data
def load_data(path, edges=False):
if edges:
= ['cell_id', 'x1', 'y1', 'z1', 'x2', 'y2', 'z2']
column_names = pd.read_csv(path, header=None, names=column_names)
data else:
= pd.read_csv(path)
data return data
# Function to read all files ending with "-nodes.csv" in the `data_filedir` directory into a single DataFrame.
# Another additional column `Dataset` is added to identify the dataset name which comes from the filename before the `-nodes.csv` suffix.
# Additionally, function reads all files ending with "-edges.csv" in the `output_edge_dir` directory into a single DataFrame.
# Three additional columns are added "Dataset", "Anchor Cell Type", and "Anchor Cell Type Level" to identify the dataset name, anchor cell type, and anchor cell type level respectively which come from the filename before the `.csv` suffix.
# The three additional columns are created by splitting the filename on the `-` character, and extracting the relevant parts.
# On splitting, the first part is the dataset name, second part is the anchor cell type level, and third part is the anchor cell type, and last part is the `edges` suffix.
# When reading files, check if the file has the correct format (i.e., ends with `-edges.csv`).
# Additionally, the function merges the edges DataFrame with the nodes DataFrame to get the cell type information for the anchor cells.
# This is done by reading the corresponding nodes file from the `data_filedir` directory for each edges file, and merging it with the edges DataFrame on the `cell_id` column.
# The merged DataFrame contains the edges with additional columns for the cell type information.
# The function returns three DataFrames:
# 1. `merged_nodes`: DataFrame containing all nodes with an additional column `Dataset`.
# 2. `merged_edges`: DataFrame containing all edges with additional columns `Dataset`, `Anchor Cell Type`, and `Anchor Cell Type Level`.
# 3. `merged_nodes_for_all_edges`: DataFrame containing all edges with additional columns `Dataset`, `Anchor Cell Type`, `Anchor Cell Type Level`, and the cell type information for cells.
def read_all_edge_datasets(basepath, data_filedir, output_edge_dir):
= []
all_nodes_files = []
all_edges_files = []
all_nodes_edges_files for file in os.listdir(os.path.join(basepath, output_edge_dir)):
if file.endswith("-edges.csv"):
= os.path.join(basepath, output_edge_dir, file)
file_path = file.replace("-edges.csv", "").split('-')
dataset_name, anchor_cell_type_level, anchor_cell_type = load_data(file_path, edges=False)
edges_df 'Dataset'] = dataset_name
edges_df['Anchor Cell Type'] = anchor_cell_type
edges_df['Anchor Cell Type Level'] = anchor_cell_type_level
edges_df[={"distance": "Distance"}, inplace=True) # Rename column "distance" to "Distance".
edges_df.rename(columns
all_edges_files.append(edges_df)
# Read the corresponding nodes file from data_filedir to get the cell type information
= os.path.join(basepath, data_filedir, f"{dataset_name}-nodes.csv")
nodes_file_path = load_data(nodes_file_path)
nodes_df 'Dataset'] = dataset_name
nodes_df[
all_nodes_files.append(nodes_df)
# Add a new 'cell_id' column to nodes_df
'cell_id'] = range(len(nodes_df))
nodes_df[# Set 'cell_id' column as index for nodes_df
'cell_id', inplace=True)
nodes_df.set_index(# Merge edges_df with nodes_df to get the cell type information for the anchor cells
= pd.merge(edges_df, nodes_df[['Level Three Cell Type', 'Level Two Cell Type', 'Level One Cell Type']], how='left', left_on='cell_id', right_index=True)
edges_nodes_df
all_nodes_edges_files.append(edges_nodes_df)
= pd.concat(all_edges_files, ignore_index=True)
merged_edges = pd.concat(all_nodes_files, ignore_index=True)
merged_nodes = pd.concat(all_nodes_edges_files, ignore_index=True)
merged_nodes_for_all_edges
return merged_nodes, merged_edges, merged_nodes_for_all_edges
def create_directory(directory):
if not os.path.exists(directory):
os.makedirs(directory)print(f"Directory '{directory}' created successfully.")
else:
print(f"Directory '{directory}' already exists.")
Get initial statistics and identify endothelial cell categories for dataset.
= read_all_edge_datasets(basepath, data_filedir, output_edge_dir) df_all_nodes, df_all_edges, df_all_edges_with_cell_types
5) df_all_nodes.head(
x | y | Original Cell Type | Level Three Cell Type | Level Three CL Label | Level Three CL ID | CL_Match/3 | Level Two Cell Type | Level Two CL Label | Level Two CL ID | CL_Match/2 | Level One Cell Type | Level One CL Label | Level One CL ID | CL_Match/1 | Dataset | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 26180.447514 | 8865.939227 | FDC | dendritic cell:follicular | follicular dendritic cell | CL:0000442 | skos:exactMatch | dendritic cell | dendritic cell | CL:0000451 | skos:exactMatch | mesenchymal cell | mesenchymal cell | CL:0008019 | skos:exactMatch | LN00560 |
1 | 15214.596774 | 11441.217742 | FDC | dendritic cell:follicular | follicular dendritic cell | CL:0000442 | skos:exactMatch | dendritic cell | dendritic cell | CL:0000451 | skos:exactMatch | mesenchymal cell | mesenchymal cell | CL:0008019 | skos:exactMatch | LN00560 |
2 | 7498.846154 | 12306.583333 | FDC | dendritic cell:follicular | follicular dendritic cell | CL:0000442 | skos:exactMatch | dendritic cell | dendritic cell | CL:0000451 | skos:exactMatch | mesenchymal cell | mesenchymal cell | CL:0008019 | skos:exactMatch | LN00560 |
3 | 26618.031142 | 18349.024221 | FDC | dendritic cell:follicular | follicular dendritic cell | CL:0000442 | skos:exactMatch | dendritic cell | dendritic cell | CL:0000451 | skos:exactMatch | mesenchymal cell | mesenchymal cell | CL:0008019 | skos:exactMatch | LN00560 |
4 | 8325.500000 | 12150.222222 | FDC | dendritic cell:follicular | follicular dendritic cell | CL:0000442 | skos:exactMatch | dendritic cell | dendritic cell | CL:0000451 | skos:exactMatch | mesenchymal cell | mesenchymal cell | CL:0008019 | skos:exactMatch | LN00560 |
# Print the total number of unique cell types per dataset. Compute separately for each cell type column (Level One Cell Type, Level Two Cell Type, Level Three Cell Type, Original Cell Type).
print("Total number of unique cell types per cell type annnotation level:")
= {
unique_cell_types 'Original Cell Type': df_all_nodes['Original Cell Type'].nunique(),
'Level Three Cell Type': df_all_nodes['Level Three Cell Type'].nunique(),
'Level Two Cell Type': df_all_nodes['Level Two Cell Type'].nunique(),
'Level One Cell Type': df_all_nodes['Level One Cell Type'].nunique()
}for cell_type, count in unique_cell_types.items():
print(f"{cell_type}: {count}")
Total number of unique cell types per cell type annnotation level:
Original Cell Type: 34
Level Three Cell Type: 29
Level Two Cell Type: 10
Level One Cell Type: 3
# Save the unique cell types containing "endothelial" in name per cell type column (Level One Cell Type, Level Two Cell Type, Level Three Cell Type, Original Cell Type) to a dictionary where the key is the level and the value is a list of unique cell types.
= {
endothelial_cell_types 'Original Cell Type': df_all_nodes[df_all_nodes['Original Cell Type'].str.contains("endothelial", case=False, na=False)]['Original Cell Type'].unique().tolist(),
'Level Three Cell Type': df_all_nodes[df_all_nodes['Level Three Cell Type'].str.contains("endothelial", case=False, na=False)]['Level Three Cell Type'].unique().tolist(),
'Level Two Cell Type': df_all_nodes[df_all_nodes['Level Two Cell Type'].str.contains("endothelial", case=False, na=False)]['Level Two Cell Type'].unique().tolist(),
'Level One Cell Type': df_all_nodes[df_all_nodes['Level One Cell Type'].str.contains("endothelial", case=False, na=False)]['Level One Cell Type'].unique().tolist()
}
print("\nEndothelial cell types per cell type annotation level:")
for level, cell_types in endothelial_cell_types.items():
print(f"\n{level}:")
for cell in cell_types:
print(f" - {cell}")
Endothelial cell types per cell type annotation level:
Original Cell Type:
- Endothelial
Level Three Cell Type:
- endothelial cell
Level Two Cell Type:
- endothelial cell
Level One Cell Type:
- endothelial cell
= ["Level Three Cell Type", "Level Two Cell Type", "Level One Cell Type"] # Skipping Original Cell Type as it is not a hierarchical level.
type_field_list
# Define the anchor cell type (type of endothelial cell) for each level in type_field_list based on available categories in the previous cell. The distance analysis at all three levels will be limited to the specified anchor cell type.
= {
anchor_cell_type_dict 'Level Three Cell Type': 'endothelial cell',
'Level Two Cell Type': 'endothelial cell',
'Level One Cell Type': 'endothelial cell'
}
Process datasets to add region information to Nodes files.
df_all_nodes.head()
x | y | Original Cell Type | Level Three Cell Type | Level Three CL Label | Level Three CL ID | CL_Match/3 | Level Two Cell Type | Level Two CL Label | Level Two CL ID | CL_Match/2 | Level One Cell Type | Level One CL Label | Level One CL ID | CL_Match/1 | Dataset | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 26180.447514 | 8865.939227 | FDC | dendritic cell:follicular | follicular dendritic cell | CL:0000442 | skos:exactMatch | dendritic cell | dendritic cell | CL:0000451 | skos:exactMatch | mesenchymal cell | mesenchymal cell | CL:0008019 | skos:exactMatch | LN00560 |
1 | 15214.596774 | 11441.217742 | FDC | dendritic cell:follicular | follicular dendritic cell | CL:0000442 | skos:exactMatch | dendritic cell | dendritic cell | CL:0000451 | skos:exactMatch | mesenchymal cell | mesenchymal cell | CL:0008019 | skos:exactMatch | LN00560 |
2 | 7498.846154 | 12306.583333 | FDC | dendritic cell:follicular | follicular dendritic cell | CL:0000442 | skos:exactMatch | dendritic cell | dendritic cell | CL:0000451 | skos:exactMatch | mesenchymal cell | mesenchymal cell | CL:0008019 | skos:exactMatch | LN00560 |
3 | 26618.031142 | 18349.024221 | FDC | dendritic cell:follicular | follicular dendritic cell | CL:0000442 | skos:exactMatch | dendritic cell | dendritic cell | CL:0000451 | skos:exactMatch | mesenchymal cell | mesenchymal cell | CL:0008019 | skos:exactMatch | LN00560 |
4 | 8325.500000 | 12150.222222 | FDC | dendritic cell:follicular | follicular dendritic cell | CL:0000442 | skos:exactMatch | dendritic cell | dendritic cell | CL:0000451 | skos:exactMatch | mesenchymal cell | mesenchymal cell | CL:0008019 | skos:exactMatch | LN00560 |
# Iterate through the df_all_data dataframe to create new column "Unique Region" based on the "Dataset" column.
# The "Unique Region" column is created by mapping the region names based on the full dataset name.
'Unique Region'] = df_all_nodes['Dataset']
df_all_nodes[# df_all_nodes['Unique Region'] = df_all_nodes['Dataset'].map(region_map)
# df_all_nodes['Unique Region'] = df_all_nodes['Dataset'].str.split('-').str[1].map(region_map)
# Check if the new columns are created correctly.
'Dataset', 'Unique Region']].head(5) df_all_nodes[[
Dataset | Unique Region | |
---|---|---|
0 | LN00560 | LN00560 |
1 | LN00560 | LN00560 |
2 | LN00560 | LN00560 |
3 | LN00560 | LN00560 |
4 | LN00560 | LN00560 |
# Print all unique regions in the data.
print("\nUnique Regions in the data:")
print(df_all_nodes['Unique Region'].unique())
# Print the total number of unique regions.
print(f"Total number of unique regions: {df_all_nodes['Unique Region'].nunique()}")
# Print number of unique datasets per unique region.
print("\nNumber of unique datasets per unique region:")
for region in df_all_nodes['Unique Region'].unique():
= df_all_nodes[df_all_nodes['Unique Region'] == region]['Dataset'].nunique()
num_datasets print(f"{region}: {num_datasets}")
Unique Regions in the data:
['LN00560' 'LN00837' 'LN22921' 'LN24336' 'LN27766']
Total number of unique regions: 5
Number of unique datasets per unique region:
LN00560: 1
LN00837: 1
LN22921: 1
LN24336: 1
LN27766: 1
Process datasets to add region information to Edges files.
5) df_all_edges.head(
cell_id | x1 | y1 | z1 | x2 | y2 | z2 | Distance | Dataset | Anchor Cell Type | Anchor Cell Type Level | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 26180.447514 | 8865.939227 | 0 | 26183.569536 | 8838.370861 | 0 | 27.744582 | LN00560 | endothelial cell | Level Three Cell Type |
1 | 1297 | 26171.400000 | 8972.626667 | 0 | 26264.875000 | 8904.781250 | 0 | 115.501412 | LN00560 | endothelial cell | Level Three Cell Type |
2 | 3106 | 26140.166667 | 8974.675000 | 0 | 26054.031088 | 8889.103627 | 0 | 121.415805 | LN00560 | endothelial cell | Level Three Cell Type |
3 | 3497 | 26051.509259 | 8961.282407 | 0 | 26054.031088 | 8889.103627 | 0 | 72.222822 | LN00560 | endothelial cell | Level Three Cell Type |
4 | 49397 | 26015.800000 | 8832.882759 | 0 | 26054.031088 | 8889.103627 | 0 | 67.988250 | LN00560 | endothelial cell | Level Three Cell Type |
# Process the edge data to create new columns "Unique Region" based on the "Dataset" column, similar to how it was done for the node data.
'Unique Region'] = df_all_edges['Dataset']
df_all_edges[# df_all_edges['Unique Region'] = df_all_edges['Dataset'].map(region_map)
# Check if the new columns are created correctly.
'Dataset', 'Unique Region']].head(5) df_all_edges[[
Dataset | Unique Region | |
---|---|---|
0 | LN00560 | LN00560 |
1 | LN00560 | LN00560 |
2 | LN00560 | LN00560 |
3 | LN00560 | LN00560 |
4 | LN00560 | LN00560 |
# Print all unique regions in the data.
print("\nUnique Regions in the data:")
print(df_all_edges['Unique Region'].unique())
# Print the total number of unique regions.
print(f"Total number of unique regions: {df_all_edges['Unique Region'].nunique()}")
# Print number of unique datasets per unique region.
print("\nNumber of unique datasets per unique region:")
for region in df_all_edges['Unique Region'].unique():
= df_all_edges[df_all_edges['Unique Region'] == region]['Dataset'].nunique()
num_datasets print(f"{region}: {num_datasets}")
Unique Regions in the data:
['LN00560' 'LN00837' 'LN22921' 'LN24336' 'LN27766']
Total number of unique regions: 5
Number of unique datasets per unique region:
LN00560: 1
LN00837: 1
LN22921: 1
LN24336: 1
LN27766: 1
'Unique Region'] = df_all_edges_with_cell_types['Dataset']
df_all_edges_with_cell_types[# df_all_edges_with_cell_types['Unique Region'] = df_all_edges_with_cell_types['Dataset'].map(region_map)
# Check if the new columns are created correctly.
'Dataset', 'Unique Region']].head(5) df_all_edges_with_cell_types[[
Dataset | Unique Region | |
---|---|---|
0 | LN00560 | LN00560 |
1 | LN00560 | LN00560 |
2 | LN00560 | LN00560 |
3 | LN00560 | LN00560 |
4 | LN00560 | LN00560 |
1) df_all_nodes.head(
x | y | Original Cell Type | Level Three Cell Type | Level Three CL Label | Level Three CL ID | CL_Match/3 | Level Two Cell Type | Level Two CL Label | Level Two CL ID | CL_Match/2 | Level One Cell Type | Level One CL Label | Level One CL ID | CL_Match/1 | Dataset | Unique Region | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 26180.447514 | 8865.939227 | FDC | dendritic cell:follicular | follicular dendritic cell | CL:0000442 | skos:exactMatch | dendritic cell | dendritic cell | CL:0000451 | skos:exactMatch | mesenchymal cell | mesenchymal cell | CL:0008019 | skos:exactMatch | LN00560 | LN00560 |
1) df_all_edges.head(
cell_id | x1 | y1 | z1 | x2 | y2 | z2 | Distance | Dataset | Anchor Cell Type | Anchor Cell Type Level | Unique Region | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 26180.447514 | 8865.939227 | 0 | 26183.569536 | 8838.370861 | 0 | 27.744582 | LN00560 | endothelial cell | Level Three Cell Type | LN00560 |
1) df_all_edges_with_cell_types.head(
cell_id | x1 | y1 | z1 | x2 | y2 | z2 | Distance | Dataset | Anchor Cell Type | Anchor Cell Type Level | Level Three Cell Type | Level Two Cell Type | Level One Cell Type | Unique Region | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 26180.447514 | 8865.939227 | 0 | 26183.569536 | 8838.370861 | 0 | 27.744582 | LN00560 | endothelial cell | Level Three Cell Type | dendritic cell:follicular | dendritic cell | mesenchymal cell | LN00560 |
Node Analysis
# Plot number of cells per cell type in the same plot. Color by cell type and unique region. Output figure saved in existing `figures_output_dir`.
def plot_cells_per_celltype(df, type_field, output_dir):
=(20, 8))
plt.figure(figsize"svg.fonttype"] = 'none' # to store text as text, not as path
plt.rcParams[=df, x=type_field, palette='Spectral', hue='Unique Region')
sns.countplot(dataf'Number of Cells per {type_field} in `{dataset_dir}`')
plt.title(=90)
plt.xticks(rotation
plt.tight_layout()f'{dataset_dir}_cells_per_celltype_{type_field}.png'), dpi=300,
plt.savefig(os.path.join(output_dir, ='tight',
bbox_inches=0.5)
pad_inchesf'{dataset_dir}_cells_per_celltype_{type_field}.svg'), dpi=300,
plt.savefig(os.path.join(output_dir, ='tight',
bbox_inches=0.5)
pad_inches='Unique Region', bbox_to_anchor=(0.85, 1), loc='upper left')
plt.legend(title
plt.xlabel(type_field)
# For numbers on y-axis, do not use scientific notation.
='plain', axis='y')
plt.ticklabel_format(style# Set y-axis label
'Number of Cells')
plt.ylabel(='y', linestyle='--', alpha=0.7)
plt.grid(axis
plt.tight_layout()# Show the plot
plt.show()
plt.close()for type_field in type_field_list:
plot_cells_per_celltype(df_all_nodes, type_field, os.path.join(basepath, figures_output_dir))
Distance Analysis
# Get mean, median, minimum, maximum distance per unique region per anchor cell type.
= df_all_edges_with_cell_types.groupby(['Unique Region', 'Anchor Cell Type', 'Anchor Cell Type Level']).agg(
df_distance_stats =('Distance', 'mean'),
mean_distance=('Distance', 'median'),
median_distance=('Distance', 'min'),
min_distance=('Distance', 'max')
max_distance
).reset_index()# Print the first few rows of the distance statistics DataFrame.
df_distance_stats
Unique Region | Anchor Cell Type | Anchor Cell Type Level | mean_distance | median_distance | min_distance | max_distance | |
---|---|---|---|---|---|---|---|
0 | LN00560 | endothelial cell | Level One Cell Type | 86.248270 | 78.322089 | 4.875021 | 199.999931 |
1 | LN00560 | endothelial cell | Level Three Cell Type | 86.248270 | 78.322089 | 4.875021 | 199.999931 |
2 | LN00560 | endothelial cell | Level Two Cell Type | 86.248270 | 78.322089 | 4.875021 | 199.999931 |
3 | LN00837 | endothelial cell | Level One Cell Type | 65.648760 | 55.365786 | 6.102839 | 199.990502 |
4 | LN00837 | endothelial cell | Level Three Cell Type | 65.648760 | 55.365786 | 6.102839 | 199.990502 |
5 | LN00837 | endothelial cell | Level Two Cell Type | 65.648760 | 55.365786 | 6.102839 | 199.990502 |
6 | LN22921 | endothelial cell | Level One Cell Type | 69.028245 | 58.886066 | 4.797734 | 199.999849 |
7 | LN22921 | endothelial cell | Level Three Cell Type | 69.028245 | 58.886066 | 4.797734 | 199.999849 |
8 | LN22921 | endothelial cell | Level Two Cell Type | 69.028245 | 58.886066 | 4.797734 | 199.999849 |
9 | LN24336 | endothelial cell | Level One Cell Type | 62.287459 | 51.618910 | 4.235994 | 199.999774 |
10 | LN24336 | endothelial cell | Level Three Cell Type | 62.287459 | 51.618910 | 4.235994 | 199.999774 |
11 | LN24336 | endothelial cell | Level Two Cell Type | 62.287459 | 51.618910 | 4.235994 | 199.999774 |
12 | LN27766 | endothelial cell | Level One Cell Type | 48.661838 | 37.794400 | 5.664908 | 199.999790 |
13 | LN27766 | endothelial cell | Level Three Cell Type | 48.661838 | 37.794400 | 5.664908 | 199.999790 |
14 | LN27766 | endothelial cell | Level Two Cell Type | 48.661838 | 37.794400 | 5.664908 | 199.999790 |
Level One Cell Type Analysis
# Get mean, median, minimum, maximum distance per cell type in all unique regions.
= 'Level One Cell Type'
cell_type_level = df_all_edges_with_cell_types[(df_all_edges_with_cell_types['Anchor Cell Type Level'] == cell_type_level) & (df_all_edges_with_cell_types['Anchor Cell Type'] == anchor_cell_type_dict[cell_type_level])]
df_all_edges_with_cell_type_level
= df_all_edges_with_cell_type_level.groupby([cell_type_level, 'Unique Region']).agg(
df_distance_stats_cell_type_level =('Distance', 'mean'),
mean_distance=('Distance', 'median'),
median_distance=('Distance', 'min'),
min_distance=('Distance', 'max')
max_distance
).reset_index() df_distance_stats_cell_type_level
Level One Cell Type | Unique Region | mean_distance | median_distance | min_distance | max_distance | |
---|---|---|---|---|---|---|
0 | immune cell | LN00560 | 86.230167 | 78.283379 | 4.875021 | 199.999931 |
1 | immune cell | LN00837 | 65.658556 | 55.426203 | 6.102839 | 199.990502 |
2 | immune cell | LN22921 | 69.098860 | 58.969275 | 4.797734 | 199.999849 |
3 | immune cell | LN24336 | 62.325526 | 51.657698 | 4.235994 | 199.999774 |
4 | immune cell | LN27766 | 48.323776 | 37.625713 | 5.664908 | 199.999790 |
5 | mesenchymal cell | LN00560 | 88.029798 | 81.864293 | 6.239511 | 199.836667 |
6 | mesenchymal cell | LN00837 | 64.156297 | 47.402660 | 9.376279 | 199.972438 |
7 | mesenchymal cell | LN22921 | 42.860136 | 30.269759 | 6.260871 | 199.945312 |
8 | mesenchymal cell | LN24336 | 45.284939 | 34.253833 | 7.974461 | 199.351793 |
9 | mesenchymal cell | LN27766 | 63.171914 | 49.198156 | 6.110041 | 199.979704 |
# Get top five and bottom five cell types with respect to mean distance in each unique region separately.
def get_top_bottom_cell_types_by_mean(df, cell_type_level, unique_region, top_n=5):
# Filter the DataFrame for the specified unique region and cell type level
= df[df['Unique Region'] == unique_region]
df_filtered
# Group by the specified cell type level and calculate mean distance
= df_filtered.groupby(cell_type_level).agg(mean_distance=('Distance', 'mean')).reset_index()
df_grouped
# Sort by mean distance to get top and bottom cell types
= df_grouped.sort_values(by='mean_distance', ascending=False)
df_sorted
# Get top N and bottom N cell types
= df_sorted.head(top_n)
top_cell_types = df_sorted.tail(top_n)
bottom_cell_types
return top_cell_types, bottom_cell_types
# Get top and bottom cell types for each unique region in the dataset.
= df_all_edges_with_cell_type_level['Unique Region'].unique()
unique_regions for region in unique_regions:
= get_top_bottom_cell_types_by_mean(df_all_edges_with_cell_type_level, cell_type_level, region)
top_bottom print(f"\nTop 5 cell types in {region}:")
print(top_bottom[0])
print(f"\nBottom 5 cell types in {region}:")
print(top_bottom[1])
Top 5 cell types in LN00560:
Level One Cell Type mean_distance
1 mesenchymal cell 88.029798
0 immune cell 86.230167
Bottom 5 cell types in LN00560:
Level One Cell Type mean_distance
1 mesenchymal cell 88.029798
0 immune cell 86.230167
Top 5 cell types in LN00837:
Level One Cell Type mean_distance
0 immune cell 65.658556
1 mesenchymal cell 64.156297
Bottom 5 cell types in LN00837:
Level One Cell Type mean_distance
0 immune cell 65.658556
1 mesenchymal cell 64.156297
Top 5 cell types in LN22921:
Level One Cell Type mean_distance
0 immune cell 69.098860
1 mesenchymal cell 42.860136
Bottom 5 cell types in LN22921:
Level One Cell Type mean_distance
0 immune cell 69.098860
1 mesenchymal cell 42.860136
Top 5 cell types in LN24336:
Level One Cell Type mean_distance
0 immune cell 62.325526
1 mesenchymal cell 45.284939
Bottom 5 cell types in LN24336:
Level One Cell Type mean_distance
0 immune cell 62.325526
1 mesenchymal cell 45.284939
Top 5 cell types in LN27766:
Level One Cell Type mean_distance
1 mesenchymal cell 63.171914
0 immune cell 48.323776
Bottom 5 cell types in LN27766:
Level One Cell Type mean_distance
1 mesenchymal cell 63.171914
0 immune cell 48.323776
# Get top five and bottom five cell types with respect to median distance in each unique region separately.
def get_top_bottom_cell_types_by_median(df, cell_type_level, unique_region, top_n=5):
# Filter the DataFrame for the specified unique region and cell type level
= df[df['Unique Region'] == unique_region]
df_filtered
# Group by the specified cell type level and calculate median distance
= df_filtered.groupby(cell_type_level).agg(median_distance=('Distance', 'median')).reset_index()
df_grouped
# Sort by median distance to get top and bottom cell types
= df_grouped.sort_values(by='median_distance', ascending=False)
df_sorted
# Get top N and bottom N cell types
= df_sorted.head(top_n)
top_cell_types = df_sorted.tail(top_n)
bottom_cell_types
return top_cell_types, bottom_cell_types
# Get top and bottom cell types for each unique region in the dataset.
= df_all_edges_with_cell_type_level['Unique Region'].unique()
unique_regions for region in unique_regions:
= get_top_bottom_cell_types_by_median(df_all_edges_with_cell_type_level, cell_type_level, region)
top_bottom print(f"\nTop 5 cell types in {region}:")
print(top_bottom[0])
print(f"\nBottom 5 cell types in {region}:")
print(top_bottom[1])
Top 5 cell types in LN00560:
Level One Cell Type median_distance
1 mesenchymal cell 81.864293
0 immune cell 78.283379
Bottom 5 cell types in LN00560:
Level One Cell Type median_distance
1 mesenchymal cell 81.864293
0 immune cell 78.283379
Top 5 cell types in LN00837:
Level One Cell Type median_distance
0 immune cell 55.426203
1 mesenchymal cell 47.402660
Bottom 5 cell types in LN00837:
Level One Cell Type median_distance
0 immune cell 55.426203
1 mesenchymal cell 47.402660
Top 5 cell types in LN22921:
Level One Cell Type median_distance
0 immune cell 58.969275
1 mesenchymal cell 30.269759
Bottom 5 cell types in LN22921:
Level One Cell Type median_distance
0 immune cell 58.969275
1 mesenchymal cell 30.269759
Top 5 cell types in LN24336:
Level One Cell Type median_distance
0 immune cell 51.657698
1 mesenchymal cell 34.253833
Bottom 5 cell types in LN24336:
Level One Cell Type median_distance
0 immune cell 51.657698
1 mesenchymal cell 34.253833
Top 5 cell types in LN27766:
Level One Cell Type median_distance
1 mesenchymal cell 49.198156
0 immune cell 37.625713
Bottom 5 cell types in LN27766:
Level One Cell Type median_distance
1 mesenchymal cell 49.198156
0 immune cell 37.625713
# Calculate regional variability
def calculate_regional_variability(df_all_edges_with_cell_type_level, cell_type_level):
""" Calculate regional variability for distances in the given DataFrame.
"""
= df_all_edges_with_cell_type_level.groupby('Unique Region')['Distance'].agg([
regional_variability 'mean', 'mean'),
('std', 'std')
(round(2)
]).
# Add CV as percentage
'CV (%)'] = (regional_variability['std'] / regional_variability['mean'] * 100).round(1)
regional_variability[
print("\nRegional Variability Analysis:")
print("Mean: Average distance in each region")
print("Std: Standard deviation of distances")
print("CV: Coefficient of Variation (std/mean * 100%)")
print(regional_variability)
# Calculate variability for each cell type
= df_all_edges_with_cell_type_level.groupby(cell_type_level)['Distance'].agg([
cell_type_variability 'mean', 'mean'),
('std', 'std')
(round(2)
]).
# Add CV as percentage
'CV (%)'] = (cell_type_variability['std'] / cell_type_variability['mean'] * 100).round(1)
cell_type_variability[
print("\nCell Type Variability Analysis (sorted by CV):")
print(cell_type_variability.sort_values('CV (%)', ascending=False))
calculate_regional_variability(df_all_edges_with_cell_type_level, cell_type_level)
Regional Variability Analysis:
Mean: Average distance in each region
Std: Standard deviation of distances
CV: Coefficient of Variation (std/mean * 100%)
mean std CV (%)
Unique Region
LN00560 86.25 50.97 59.1
LN00837 65.65 42.86 65.3
LN22921 69.03 43.55 63.1
LN24336 62.29 41.07 65.9
LN27766 48.66 35.83 73.6
Cell Type Variability Analysis (sorted by CV):
mean std CV (%)
Level One Cell Type
mesenchymal cell 65.39 48.18 73.7
immune cell 67.80 44.76 66.0
# Define the standard region sequence for plots (Get all unique values in Unique Region column as a list.)
= df_all_edges_with_cell_types['Unique Region'].unique().tolist()
regions print("\nRegions in the data:")
print(regions)
Regions in the data:
['LN00560', 'LN00837', 'LN22921', 'LN24336', 'LN27766']
# Generate Violin Plot
def plot_violin_cells_per_celltype(df_all_edges_with_cell_type_level, cell_type_level, output_dir, density_norm='area'):
"whitegrid")
sns.set_style("notebook", rc={"grid.linewidth": 2})
sns.set_context(=(10, 6))
plt.figure(figsize"svg.fonttype"] = 'none' # to store text as text, not as path
plt.rcParams[
=df_all_edges_with_cell_type_level, x=cell_type_level, y="Distance", density_norm=density_norm, common_norm=True, cut=0, inner="box", split=False, palette='Spectral', alpha=.9)
sns.violinplot(data
="whitegrid")
sns.set_theme(style"paper")
sns.set_context(
= 10
font_size =font_size)
plt.legend(fontsize
f'Violin Plot of distances by {cell_type_level} (Density Normalization: {density_norm})', fontsize=font_size)
plt.title(
f'{cell_type_level}', fontsize=font_size)
plt.xlabel('Distance (\u03bcm)', fontsize=font_size)
plt.ylabel(
# Increase font size for all text in the figure
=font_size)
plt.xticks(fontsize=90)
plt.xticks(rotation=font_size)
plt.yticks(fontsize
plt.tight_layout()
f'{dataset_dir}_violin_cells_per_celltype_{cell_type_level}.png'), dpi=300,
plt.savefig(os.path.join(output_dir, ='tight',
bbox_inches=0.5)
pad_inchesf'{dataset_dir}_violin_cells_per_celltype_{cell_type_level}.svg'), dpi=300,
plt.savefig(os.path.join(output_dir, ='tight',
bbox_inches=0.5)
pad_inches
plt.show()
='area') plot_violin_cells_per_celltype(df_all_edges_with_cell_type_level, cell_type_level, os.path.join(basepath, figures_output_dir), density_norm
# Boxplots of distribution of distances by cell type and region.
def plot_distance_distribution_boxplots_by_region(df_all_edges_with_cell_type_level, cell_type_level, output_dir):
=(16, 8))
plt.figure(figsize"svg.fonttype"] = 'none' # to store text as text, not as path
plt.rcParams[# Create categorical type with only the regions that exist in the data
= [r for r in regions if r in df_all_edges_with_cell_type_level['Unique Region'].unique()]
available_regions 'Unique Region'] = pd.Categorical(
df_all_edges_with_cell_type_level['Unique Region'],
df_all_edges_with_cell_type_level[=available_regions,
categories=True
ordered
)
# Make box plot.
=df_all_edges_with_cell_type_level, x=cell_type_level, y='Distance', hue='Unique Region', showfliers=False, palette='Spectral') # viridis or Spectral palette for better color distinction
sns.boxplot(data= 10
font_size =90, ha='right', fontsize=font_size)
plt.xticks(rotation=font_size)
plt.yticks(fontsizef'Distribution of distances by {cell_type_level} and region', fontsize=font_size)
plt.title(f'{cell_type_level}', fontsize=font_size)
plt.xlabel('Distance (\u03bcm)', fontsize=font_size)
plt.ylabel(=(1, 1), loc='upper left')
plt.legend(bbox_to_anchor
plt.tight_layout()
f'{dataset_dir}_distance_distribution_boxplots_by_region_{cell_type_level}.png'), dpi=300,
plt.savefig(os.path.join(output_dir, ='tight',
bbox_inches=0.5)
pad_inchesf'{dataset_dir}_distance_distribution_boxplots_by_region_{cell_type_level}.svg'), dpi=300,
plt.savefig(os.path.join(output_dir, ='tight',
bbox_inches=0.5)
pad_inches
plt.show()
plot_distance_distribution_boxplots_by_region(df_all_edges_with_cell_type_level, cell_type_level, os.path.join(basepath, figures_output_dir))
# Boxplots of distribution of distances by cell type and region.
def plot_distance_distribution_heatmap(df_all_edges_with_cell_type_level, cell_type_level, output_dir):
= df_all_edges_with_cell_type_level.pivot_table(
pivot_data ='Distance',
values=cell_type_level,
index='Unique Region',
columns='median'
aggfunc
)
=(15, 10))
plt.figure(figsize"svg.fonttype"] = 'none' # to store text as text, not as path
plt.rcParams[=True, fmt='.1f', cmap='Spectral')
sns.heatmap(pivot_data, annotf'Heatmap of median distances by {cell_type_level}', fontsize=12)
plt.title(
= 10
font_size =90, ha='right', fontsize=font_size)
plt.xticks(rotation=font_size)
plt.yticks(fontsize
'Unique Region', fontsize=font_size)
plt.xlabel(f'{cell_type_level}', fontsize=font_size)
plt.ylabel(
plt.tight_layout()
f'{dataset_dir}_distance_distribution_heatmap_{cell_type_level}.png'), dpi=300,
plt.savefig(os.path.join(output_dir, ='tight',
bbox_inches=0.5)
pad_inchesf'{dataset_dir}_distance_distribution_heatmap_{cell_type_level}.svg'), dpi=300,
plt.savefig(os.path.join(output_dir, ='tight',
bbox_inches=0.5)
pad_inches
plt.show()
plot_distance_distribution_heatmap(df_all_edges_with_cell_type_level, cell_type_level, os.path.join(basepath, figures_output_dir))
# Generate Violin Plot per unique region.
def plot_violin_plots_all_regions(df_all_edges_with_cell_type_level, cell_type_level, output_dir, density_norm="area"):
"whitegrid")
sns.set_style("notebook", rc={"grid.linewidth": 1})
sns.set_context(= 10
font_size = plt.subplots(5, 1, figsize=(10, 20)) # Adjusted figsize for horizontal layout
fig, axs f'Distance distribution per {cell_type_level} in `{dataset_dir}` (density normalization = {density_norm})', fontsize=font_size, y=1)
fig.suptitle("svg.fonttype"] = 'none' # to store text as text, not as path
plt.rcParams[
# Keep the sequence of Cell Types consistent across plots.
= sorted(df_all_edges_with_cell_type_level[cell_type_level].unique())
cell_types
# Create a color palette based on the number of unique classes
= sns.color_palette("Spectral", n_colors=len(cell_types))
color_palette
# Create a dictionary mapping class to color
= dict(zip(cell_types, color_palette))
class_color_dict
for i, region in enumerate(regions):
= df_all_edges_with_cell_type_level[df_all_edges_with_cell_type_level['Unique Region'] == region]
data_reg =data_reg, x=cell_type_level, y="Distance", density_norm=density_norm, common_norm=True, cut=0, inner="box", split=False, palette=class_color_dict, alpha=.9, ax=axs[i], hue=cell_type_level, legend=False, order=cell_types, fill=True)
sns.violinplot(data=font_size)
axs[i].set_title(region, fontsize'', fontsize=font_size)
axs[i].set_xlabel('Distance (\u03bcm)', fontsize=font_size)
axs[i].set_ylabel(# axs[i].tick_params(axis='x', labelrotation=90, labelsize=font_size)
# only show xtick labels for the last subplot
if i < len(regions) - 1:
axs[i].set_xticklabels([])else:
=font_size, rotation=90, ha='right')
axs[i].set_xticklabels(cell_types, fontsize# axs[i].set_ylim(0, data_reg['Distance'].max() * 1.1) # Set y-limits to be consistent across all plots
='both', labelsize=font_size)
axs[i].tick_params(axis
# Use fig.text for precise label positioning
0.5, -0.02, f'{cell_type_level}', ha='center', va='bottom', fontsize=font_size)
fig.figure.text(
plt.tight_layout()
f'{dataset_dir}_violin_plots_all_regions_{cell_type_level}.png'), dpi=300,
plt.savefig(os.path.join(output_dir, ='tight',
bbox_inches=0.5)
pad_inchesf'{dataset_dir}_violin_plots_all_regions_{cell_type_level}.svg'), dpi=300,
plt.savefig(os.path.join(output_dir, ='tight',
bbox_inches=0.5)
pad_inches
plt.show()
="count") # density_norm="count" or "area" can be used based on preference. plot_violin_plots_all_regions(df_all_edges_with_cell_type_level, cell_type_level, os.path.join(basepath, figures_output_dir), density_norm
Level Two Cell Type Analysis
# Get mean, median, minimum, maximum distance per cell type in all unique regions.
= 'Level Two Cell Type'
cell_type_level = df_all_edges_with_cell_types[(df_all_edges_with_cell_types['Anchor Cell Type Level'] == cell_type_level) & (df_all_edges_with_cell_types['Anchor Cell Type'] == anchor_cell_type_dict[cell_type_level])]
df_all_edges_with_cell_type_level
= df_all_edges_with_cell_type_level.groupby([cell_type_level, 'Unique Region']).agg(
df_distance_stats_cell_type_level =('Distance', 'mean'),
mean_distance=('Distance', 'median'),
median_distance=('Distance', 'min'),
min_distance=('Distance', 'max')
max_distance
).reset_index() df_distance_stats_cell_type_level
Level Two Cell Type | Unique Region | mean_distance | median_distance | min_distance | max_distance | |
---|---|---|---|---|---|---|
0 | b cell | LN00560 | 88.672883 | 81.031234 | 4.875021 | 199.999931 |
1 | b cell | LN00837 | 71.823837 | 61.529348 | 6.638355 | 199.990502 |
2 | b cell | LN22921 | 82.039046 | 72.283854 | 4.797734 | 199.999849 |
3 | b cell | LN24336 | 64.496210 | 53.491989 | 5.385425 | 199.999276 |
4 | b cell | LN27766 | 58.734683 | 45.181006 | 6.945505 | 199.999790 |
5 | dendritic cell | LN00560 | 69.328460 | 57.716266 | 6.239511 | 199.990815 |
6 | dendritic cell | LN00837 | 56.462073 | 48.013498 | 6.578968 | 199.972438 |
7 | dendritic cell | LN22921 | 73.651623 | 60.772678 | 7.006536 | 199.967413 |
8 | dendritic cell | LN24336 | 59.716750 | 50.896772 | 7.264940 | 199.995753 |
9 | dendritic cell | LN27766 | 40.751088 | 31.531565 | 7.060869 | 199.936083 |
10 | lymphoid cell | LN00560 | 87.916868 | 81.987260 | 6.472932 | 199.985560 |
11 | lymphoid cell | LN00837 | 47.074148 | 39.426220 | 6.102839 | 199.566202 |
12 | lymphoid cell | LN22921 | 66.159509 | 52.261744 | 5.680996 | 199.945578 |
13 | lymphoid cell | LN24336 | 71.342591 | 58.007388 | 6.837350 | 199.999774 |
14 | lymphoid cell | LN27766 | 52.863464 | 34.486844 | 6.130197 | 199.989882 |
15 | macrophage | LN00560 | 78.607505 | 68.799241 | 7.336973 | 199.994850 |
16 | macrophage | LN00837 | 56.992624 | 50.840886 | 9.008524 | 199.753164 |
17 | macrophage | LN22921 | 54.901756 | 47.901377 | 6.583548 | 199.929894 |
18 | macrophage | LN24336 | 79.116158 | 67.811076 | 7.957836 | 199.937893 |
19 | macrophage | LN27766 | 51.519747 | 48.035642 | 7.765481 | 197.595104 |
20 | mast cell | LN00560 | 99.372310 | 96.167629 | 10.595659 | 199.849460 |
21 | mast cell | LN00837 | 65.670488 | 55.672843 | 10.448412 | 197.309722 |
22 | mast cell | LN22921 | 52.593234 | 44.533122 | 5.595661 | 198.746019 |
23 | mast cell | LN24336 | 85.567104 | 74.870233 | 9.498461 | 199.956401 |
24 | mast cell | LN27766 | 43.276579 | 33.640270 | 8.008464 | 199.891157 |
25 | monocyte | LN00560 | 89.144747 | 82.996331 | 7.532715 | 199.991027 |
26 | monocyte | LN00837 | 55.526799 | 45.104939 | 8.037716 | 199.980528 |
27 | monocyte | LN22921 | 50.021763 | 40.595156 | 5.302953 | 199.957450 |
28 | monocyte | LN24336 | 60.846991 | 44.274679 | 7.100671 | 199.964176 |
29 | monocyte | LN27766 | 49.144810 | 45.352857 | 7.274465 | 199.927731 |
30 | muscle cell | LN00560 | 86.522830 | 80.072416 | 7.366499 | 199.827904 |
31 | muscle cell | LN00837 | 59.032529 | 44.506487 | 9.537860 | 196.964611 |
32 | muscle cell | LN22921 | 39.150671 | 28.447433 | 6.260871 | 199.593551 |
33 | muscle cell | LN24336 | 43.536294 | 30.191094 | 7.974461 | 199.351793 |
34 | muscle cell | LN27766 | 62.912204 | 48.334648 | 6.110041 | 199.979704 |
35 | natural killer cell | LN00560 | 86.771851 | 79.778126 | 6.896002 | 199.981161 |
36 | natural killer cell | LN00837 | 57.099449 | 50.561814 | 7.570372 | 199.852375 |
37 | natural killer cell | LN22921 | 57.361924 | 51.657575 | 6.314864 | 199.963468 |
38 | natural killer cell | LN24336 | 85.475053 | 75.480916 | 6.704395 | 199.995869 |
39 | natural killer cell | LN27766 | 35.518688 | 30.995884 | 5.664908 | 199.029670 |
40 | t cell | LN00560 | 86.289131 | 78.881215 | 5.603447 | 199.999549 |
41 | t cell | LN00837 | 64.029926 | 54.434361 | 6.556713 | 199.971033 |
42 | t cell | LN22921 | 60.493650 | 53.470688 | 5.481371 | 199.996959 |
43 | t cell | LN24336 | 56.612429 | 47.942598 | 4.235994 | 199.997767 |
44 | t cell | LN27766 | 40.031514 | 32.772272 | 5.864464 | 199.874886 |
# Get top and bottom cell types for each unique region in the dataset.
= df_all_edges_with_cell_type_level['Unique Region'].unique()
unique_regions for region in unique_regions:
= get_top_bottom_cell_types_by_mean(df_all_edges_with_cell_type_level, cell_type_level, region)
top_bottom print(f"\nTop 5 cell types in {region}:")
print(top_bottom[0])
print(f"\nBottom 5 cell types in {region}:")
print(top_bottom[1])
Top 5 cell types in LN00560:
Level Two Cell Type mean_distance
4 mast cell 99.372310
5 monocyte 89.144747
0 b cell 88.672883
2 lymphoid cell 87.916868
7 natural killer cell 86.771851
Bottom 5 cell types in LN00560:
Level Two Cell Type mean_distance
7 natural killer cell 86.771851
6 muscle cell 86.522830
8 t cell 86.289131
3 macrophage 78.607505
1 dendritic cell 69.328460
Top 5 cell types in LN00837:
Level Two Cell Type mean_distance
0 b cell 71.823837
4 mast cell 65.670488
8 t cell 64.029926
6 muscle cell 59.032529
7 natural killer cell 57.099449
Bottom 5 cell types in LN00837:
Level Two Cell Type mean_distance
7 natural killer cell 57.099449
3 macrophage 56.992624
1 dendritic cell 56.462073
5 monocyte 55.526799
2 lymphoid cell 47.074148
Top 5 cell types in LN22921:
Level Two Cell Type mean_distance
0 b cell 82.039046
1 dendritic cell 73.651623
2 lymphoid cell 66.159509
8 t cell 60.493650
7 natural killer cell 57.361924
Bottom 5 cell types in LN22921:
Level Two Cell Type mean_distance
7 natural killer cell 57.361924
3 macrophage 54.901756
4 mast cell 52.593234
5 monocyte 50.021763
6 muscle cell 39.150671
Top 5 cell types in LN24336:
Level Two Cell Type mean_distance
4 mast cell 85.567104
7 natural killer cell 85.475053
3 macrophage 79.116158
2 lymphoid cell 71.342591
0 b cell 64.496210
Bottom 5 cell types in LN24336:
Level Two Cell Type mean_distance
0 b cell 64.496210
5 monocyte 60.846991
1 dendritic cell 59.716750
8 t cell 56.612429
6 muscle cell 43.536294
Top 5 cell types in LN27766:
Level Two Cell Type mean_distance
6 muscle cell 62.912204
0 b cell 58.734683
2 lymphoid cell 52.863464
3 macrophage 51.519747
5 monocyte 49.144810
Bottom 5 cell types in LN27766:
Level Two Cell Type mean_distance
5 monocyte 49.144810
4 mast cell 43.276579
1 dendritic cell 40.751088
8 t cell 40.031514
7 natural killer cell 35.518688
# Get top and bottom cell types for each unique region in the dataset.
= df_all_edges_with_cell_type_level['Unique Region'].unique()
unique_regions for region in unique_regions:
= get_top_bottom_cell_types_by_median(df_all_edges_with_cell_type_level, cell_type_level, region)
top_bottom print(f"\nTop 5 cell types in {region}:")
print(top_bottom[0])
print(f"\nBottom 5 cell types in {region}:")
print(top_bottom[1])
Top 5 cell types in LN00560:
Level Two Cell Type median_distance
4 mast cell 96.167629
5 monocyte 82.996331
2 lymphoid cell 81.987260
0 b cell 81.031234
6 muscle cell 80.072416
Bottom 5 cell types in LN00560:
Level Two Cell Type median_distance
6 muscle cell 80.072416
7 natural killer cell 79.778126
8 t cell 78.881215
3 macrophage 68.799241
1 dendritic cell 57.716266
Top 5 cell types in LN00837:
Level Two Cell Type median_distance
0 b cell 61.529348
4 mast cell 55.672843
8 t cell 54.434361
3 macrophage 50.840886
7 natural killer cell 50.561814
Bottom 5 cell types in LN00837:
Level Two Cell Type median_distance
7 natural killer cell 50.561814
1 dendritic cell 48.013498
5 monocyte 45.104939
6 muscle cell 44.506487
2 lymphoid cell 39.426220
Top 5 cell types in LN22921:
Level Two Cell Type median_distance
0 b cell 72.283854
1 dendritic cell 60.772678
8 t cell 53.470688
2 lymphoid cell 52.261744
7 natural killer cell 51.657575
Bottom 5 cell types in LN22921:
Level Two Cell Type median_distance
7 natural killer cell 51.657575
3 macrophage 47.901377
4 mast cell 44.533122
5 monocyte 40.595156
6 muscle cell 28.447433
Top 5 cell types in LN24336:
Level Two Cell Type median_distance
7 natural killer cell 75.480916
4 mast cell 74.870233
3 macrophage 67.811076
2 lymphoid cell 58.007388
0 b cell 53.491989
Bottom 5 cell types in LN24336:
Level Two Cell Type median_distance
0 b cell 53.491989
1 dendritic cell 50.896772
8 t cell 47.942598
5 monocyte 44.274679
6 muscle cell 30.191094
Top 5 cell types in LN27766:
Level Two Cell Type median_distance
6 muscle cell 48.334648
3 macrophage 48.035642
5 monocyte 45.352857
0 b cell 45.181006
2 lymphoid cell 34.486844
Bottom 5 cell types in LN27766:
Level Two Cell Type median_distance
2 lymphoid cell 34.486844
4 mast cell 33.640270
8 t cell 32.772272
1 dendritic cell 31.531565
7 natural killer cell 30.995884
calculate_regional_variability(df_all_edges_with_cell_type_level, cell_type_level)
Regional Variability Analysis:
Mean: Average distance in each region
Std: Standard deviation of distances
CV: Coefficient of Variation (std/mean * 100%)
mean std CV (%)
Unique Region
LN00560 86.25 50.97 59.1
LN00837 65.65 42.86 65.3
LN22921 69.03 43.55 63.1
LN24336 62.29 41.07 65.9
LN27766 48.66 35.83 73.6
Cell Type Variability Analysis (sorted by CV):
mean std CV (%)
Level Two Cell Type
muscle cell 65.17 48.20 74.0
mast cell 54.53 40.09 73.5
lymphoid cell 69.90 50.05 71.6
monocyte 57.09 39.95 70.0
dendritic cell 62.58 43.15 69.0
t cell 62.64 40.69 65.0
b cell 74.12 47.83 64.5
natural killer cell 60.56 38.98 64.4
macrophage 60.91 38.83 63.7
='area') plot_violin_cells_per_celltype(df_all_edges_with_cell_type_level, cell_type_level, os.path.join(basepath, figures_output_dir), density_norm
plot_distance_distribution_boxplots_by_region(df_all_edges_with_cell_type_level, cell_type_level, os.path.join(basepath, figures_output_dir))
plot_distance_distribution_heatmap(df_all_edges_with_cell_type_level, cell_type_level, os.path.join(basepath, figures_output_dir))
="count") # Or, density_norm="count" or "area" based on preference. plot_violin_plots_all_regions(df_all_edges_with_cell_type_level, cell_type_level, os.path.join(basepath, figures_output_dir), density_norm
Level Three Cell Type Analysis
# Get mean, median, minimum, maximum distance per cell type in all unique regions.
= 'Level Three Cell Type'
cell_type_level = df_all_edges_with_cell_types[(df_all_edges_with_cell_types['Anchor Cell Type Level'] == cell_type_level) & (df_all_edges_with_cell_types['Anchor Cell Type'] == anchor_cell_type_dict[cell_type_level])]
df_all_edges_with_cell_type_level
= df_all_edges_with_cell_type_level.groupby([cell_type_level, 'Unique Region']).agg(
df_distance_stats_cell_type_level =('Distance', 'mean'),
mean_distance=('Distance', 'median'),
median_distance=('Distance', 'min'),
min_distance=('Distance', 'max')
max_distance
).reset_index() df_distance_stats_cell_type_level
Level Three Cell Type | Unique Region | mean_distance | median_distance | min_distance | max_distance | |
---|---|---|---|---|---|---|
0 | b cell:activated | LN00560 | 89.268466 | 82.035282 | 8.847019 | 199.985969 |
1 | b cell:activated | LN00837 | 103.892159 | 103.148832 | 10.202282 | 199.981139 |
2 | b cell:activated | LN22921 | 109.313037 | 109.134587 | 8.085613 | 199.973049 |
3 | b cell:activated | LN24336 | 78.439047 | 66.929947 | 7.599001 | 199.996026 |
4 | b cell:activated | LN27766 | 78.187423 | 70.452448 | 8.815551 | 199.923293 |
5 | b cell:germinal center | LN00560 | 110.025798 | 109.326467 | 8.953891 | 199.992071 |
6 | b cell:germinal center | LN00837 | 78.530974 | 70.589407 | 6.764925 | 199.948612 |
7 | b cell:germinal center | LN22921 | 126.062410 | 132.087548 | 8.704566 | 199.997308 |
8 | b cell:germinal center | LN24336 | 55.226407 | 46.745157 | 6.177177 | 199.984885 |
9 | b cell:germinal center | LN27766 | 38.146639 | 31.915673 | 6.971266 | 199.976434 |
10 | b cell:germinal center pre-plasmablast | LN00560 | 108.711157 | 108.905779 | 10.916243 | 199.885291 |
11 | b cell:germinal center pre-plasmablast | LN00837 | 83.001064 | 73.750379 | 10.423996 | 198.874785 |
12 | b cell:germinal center pre-plasmablast | LN22921 | 124.921662 | 129.269694 | 21.259508 | 199.839116 |
13 | b cell:germinal center pre-plasmablast | LN24336 | 49.903464 | 44.611971 | 9.241914 | 199.310578 |
14 | b cell:germinal center pre-plasmablast | LN27766 | 35.862025 | 30.864602 | 9.355446 | 185.874318 |
15 | b cell:interferon | LN00560 | 102.444714 | 99.240528 | 12.591493 | 199.567126 |
16 | b cell:interferon | LN00837 | 114.785742 | 112.568316 | 11.572951 | 199.377967 |
17 | b cell:interferon | LN22921 | 131.162456 | 139.595461 | 10.238354 | 199.992291 |
18 | b cell:interferon | LN24336 | 110.398002 | 112.429843 | 9.327697 | 199.967757 |
19 | b cell:interferon | LN27766 | 105.614725 | 104.382767 | 10.816878 | 197.792760 |
20 | b cell:memory | LN00560 | 102.444467 | 100.377290 | 4.875021 | 199.999560 |
21 | b cell:memory | LN00837 | 78.391621 | 70.584824 | 6.930883 | 199.990502 |
22 | b cell:memory | LN22921 | 79.602200 | 69.989911 | 4.797734 | 199.999849 |
23 | b cell:memory | LN24336 | 80.076569 | 70.279767 | 5.385425 | 199.999276 |
24 | b cell:memory | LN27766 | 63.402763 | 51.599412 | 7.332202 | 199.999790 |
25 | b cell:naive | LN00560 | 69.258328 | 54.441120 | 6.145788 | 199.999931 |
26 | b cell:naive | LN00837 | 76.218061 | 67.309817 | 8.581020 | 199.947632 |
27 | b cell:naive | LN22921 | 92.860766 | 88.747737 | 7.604196 | 199.990872 |
28 | b cell:naive | LN24336 | 59.469003 | 50.735010 | 6.191827 | 199.997376 |
29 | b cell:naive | LN27766 | 75.836945 | 66.107927 | 8.170217 | 199.982514 |
30 | b cell:pre-germinal center | LN00560 | 125.145570 | 131.985475 | 10.090936 | 199.946647 |
31 | b cell:pre-germinal center | LN00837 | 91.106536 | 86.702315 | 11.493943 | 199.388265 |
32 | b cell:pre-germinal center | LN22921 | 132.997491 | 140.357258 | 9.438865 | 199.996039 |
33 | b cell:pre-germinal center | LN24336 | 105.067771 | 103.209786 | 9.577587 | 199.794374 |
34 | b cell:pre-germinal center | LN27766 | 106.766845 | 109.227324 | 10.393205 | 199.091629 |
35 | b cell:proliferating | LN00560 | 74.076170 | 61.983066 | 6.553306 | 199.999329 |
36 | b cell:proliferating | LN00837 | 51.889082 | 42.257876 | 6.638355 | 199.729762 |
37 | b cell:proliferating | LN22921 | 85.020031 | 73.684262 | 5.545646 | 199.994709 |
38 | b cell:proliferating | LN24336 | 51.541644 | 44.761244 | 6.192560 | 199.986801 |
39 | b cell:proliferating | LN27766 | 37.535815 | 31.282443 | 6.945505 | 199.958507 |
40 | dendritic cell:conventional 1 | LN00560 | 74.747687 | 65.742189 | 8.728357 | 199.915652 |
41 | dendritic cell:conventional 1 | LN00837 | 56.373332 | 49.048383 | 10.157812 | 195.342718 |
42 | dendritic cell:conventional 1 | LN22921 | 54.710012 | 47.481574 | 7.840607 | 199.269117 |
43 | dendritic cell:conventional 1 | LN24336 | 120.905504 | 124.830580 | 9.217633 | 199.995753 |
44 | dendritic cell:conventional 1 | LN27766 | 38.420549 | 34.650392 | 9.251431 | 198.827691 |
45 | dendritic cell:conventional 2 | LN00560 | 68.057708 | 57.691652 | 7.978659 | 199.949543 |
46 | dendritic cell:conventional 2 | LN00837 | 54.012741 | 45.497958 | 6.952205 | 199.902788 |
47 | dendritic cell:conventional 2 | LN22921 | 48.714428 | 41.297008 | 7.880744 | 199.807328 |
48 | dendritic cell:conventional 2 | LN24336 | 129.199183 | 136.077984 | 8.925008 | 199.994250 |
49 | dendritic cell:conventional 2 | LN27766 | 50.585663 | 45.848785 | 9.638823 | 196.531393 |
50 | dendritic cell:follicular | LN00560 | 90.484649 | 85.545859 | 6.239511 | 199.836667 |
51 | dendritic cell:follicular | LN00837 | 68.051364 | 50.274675 | 9.376279 | 199.972438 |
52 | dendritic cell:follicular | LN22921 | 46.702944 | 32.311309 | 7.006536 | 199.945312 |
53 | dendritic cell:follicular | LN24336 | 46.933889 | 37.744882 | 8.222905 | 198.852000 |
54 | dendritic cell:follicular | LN27766 | 63.576654 | 50.547417 | 7.463081 | 199.936083 |
55 | dendritic cell:migratory | LN00560 | 74.874860 | 65.090342 | 8.664038 | 199.904567 |
56 | dendritic cell:migratory | LN00837 | 67.620372 | 61.629700 | 10.996842 | 199.452052 |
57 | dendritic cell:migratory | LN22921 | 63.298214 | 56.380191 | 10.370947 | 194.049890 |
58 | dendritic cell:migratory | LN24336 | 120.640773 | 127.698045 | 9.189187 | 199.945018 |
59 | dendritic cell:migratory | LN27766 | 43.524350 | 37.308495 | 10.455990 | 163.623264 |
60 | dendritic cell:plasmacytoid | LN00560 | 65.255968 | 51.800420 | 6.239585 | 199.990815 |
61 | dendritic cell:plasmacytoid | LN00837 | 54.379345 | 47.567074 | 6.578968 | 198.465924 |
62 | dendritic cell:plasmacytoid | LN22921 | 98.059750 | 93.010721 | 8.675605 | 199.967413 |
63 | dendritic cell:plasmacytoid | LN24336 | 53.708142 | 48.208903 | 7.264940 | 199.760738 |
64 | dendritic cell:plasmacytoid | LN27766 | 31.272411 | 27.099603 | 7.060869 | 199.496027 |
65 | lymphoid cell:innate | LN00560 | 87.916868 | 81.987260 | 6.472932 | 199.985560 |
66 | lymphoid cell:innate | LN00837 | 47.074148 | 39.426220 | 6.102839 | 199.566202 |
67 | lymphoid cell:innate | LN22921 | 66.159509 | 52.261744 | 5.680996 | 199.945578 |
68 | lymphoid cell:innate | LN24336 | 71.342591 | 58.007388 | 6.837350 | 199.999774 |
69 | lymphoid cell:innate | LN27766 | 52.863464 | 34.486844 | 6.130197 | 199.989882 |
70 | macrophage | LN00560 | 78.607505 | 68.799241 | 7.336973 | 199.994850 |
71 | macrophage | LN00837 | 56.992624 | 50.840886 | 9.008524 | 199.753164 |
72 | macrophage | LN22921 | 54.901756 | 47.901377 | 6.583548 | 199.929894 |
73 | macrophage | LN24336 | 79.116158 | 67.811076 | 7.957836 | 199.937893 |
74 | macrophage | LN27766 | 51.519747 | 48.035642 | 7.765481 | 197.595104 |
75 | mast cell | LN00560 | 99.372310 | 96.167629 | 10.595659 | 199.849460 |
76 | mast cell | LN00837 | 65.670488 | 55.672843 | 10.448412 | 197.309722 |
77 | mast cell | LN22921 | 52.593234 | 44.533122 | 5.595661 | 198.746019 |
78 | mast cell | LN24336 | 85.567104 | 74.870233 | 9.498461 | 199.956401 |
79 | mast cell | LN27766 | 43.276579 | 33.640270 | 8.008464 | 199.891157 |
80 | monocyte | LN00560 | 89.144747 | 82.996331 | 7.532715 | 199.991027 |
81 | monocyte | LN00837 | 55.526799 | 45.104939 | 8.037716 | 199.980528 |
82 | monocyte | LN22921 | 50.021763 | 40.595156 | 5.302953 | 199.957450 |
83 | monocyte | LN24336 | 60.846991 | 44.274679 | 7.100671 | 199.964176 |
84 | monocyte | LN27766 | 49.144810 | 45.352857 | 7.274465 | 199.927731 |
85 | muscle cell:smooth | LN00560 | 86.522830 | 80.072416 | 7.366499 | 199.827904 |
86 | muscle cell:smooth | LN00837 | 59.032529 | 44.506487 | 9.537860 | 196.964611 |
87 | muscle cell:smooth | LN22921 | 39.150671 | 28.447433 | 6.260871 | 199.593551 |
88 | muscle cell:smooth | LN24336 | 43.536294 | 30.191094 | 7.974461 | 199.351793 |
89 | muscle cell:smooth | LN27766 | 62.912204 | 48.334648 | 6.110041 | 199.979704 |
90 | natural killer cell | LN00560 | 86.771851 | 79.778126 | 6.896002 | 199.981161 |
91 | natural killer cell | LN00837 | 57.099449 | 50.561814 | 7.570372 | 199.852375 |
92 | natural killer cell | LN22921 | 57.361924 | 51.657575 | 6.314864 | 199.963468 |
93 | natural killer cell | LN24336 | 85.475053 | 75.480916 | 6.704395 | 199.995869 |
94 | natural killer cell | LN27766 | 35.518688 | 30.995884 | 5.664908 | 199.029670 |
95 | plasma cell | LN00560 | 72.331097 | 59.404645 | 7.521666 | 199.951464 |
96 | plasma cell | LN00837 | 44.038633 | 35.479895 | 6.715191 | 199.593083 |
97 | plasma cell | LN22921 | 57.740290 | 52.704438 | 6.326851 | 199.734748 |
98 | plasma cell | LN24336 | 46.465649 | 41.440832 | 6.788876 | 199.974351 |
99 | plasma cell | LN27766 | 71.756242 | 57.171075 | 7.824266 | 199.998972 |
100 | t cell:cd4+ alpha-beta effector | LN00560 | 95.993355 | 91.765471 | 6.121488 | 199.999549 |
101 | t cell:cd4+ alpha-beta effector | LN00837 | 84.232618 | 72.495092 | 9.560197 | 199.960420 |
102 | t cell:cd4+ alpha-beta effector | LN22921 | 59.880497 | 51.975392 | 5.481371 | 199.966756 |
103 | t cell:cd4+ alpha-beta effector | LN24336 | 49.530880 | 41.003228 | 4.235994 | 199.994424 |
104 | t cell:cd4+ alpha-beta effector | LN27766 | 38.377382 | 30.867354 | 7.735967 | 199.330477 |
105 | t cell:cd4+ alpha-beta naive thymus-derived | LN00560 | 78.090613 | 68.183839 | 6.663214 | 199.996131 |
106 | t cell:cd4+ alpha-beta naive thymus-derived | LN00837 | 65.922173 | 58.227278 | 7.790914 | 199.942717 |
107 | t cell:cd4+ alpha-beta naive thymus-derived | LN22921 | 65.158677 | 57.901879 | 6.411245 | 199.952146 |
108 | t cell:cd4+ alpha-beta naive thymus-derived | LN24336 | 63.160522 | 53.431121 | 6.536176 | 199.997767 |
109 | t cell:cd4+ alpha-beta naive thymus-derived | LN27766 | 51.208692 | 41.425802 | 8.157567 | 199.874886 |
110 | t cell:cd8+ | LN00560 | 86.615013 | 79.180151 | 7.102615 | 199.996365 |
111 | t cell:cd8+ | LN00837 | 55.437898 | 46.240026 | 6.556713 | 199.971033 |
112 | t cell:cd8+ | LN22921 | 56.181049 | 49.659042 | 6.259795 | 199.940448 |
113 | t cell:cd8+ | LN24336 | 55.152276 | 48.343775 | 6.651539 | 199.966610 |
114 | t cell:cd8+ | LN27766 | 35.375142 | 30.384688 | 7.360558 | 198.852391 |
115 | t cell:cd8+ cd161+ | LN00560 | 90.056320 | 84.555500 | 7.182956 | 199.994788 |
116 | t cell:cd8+ cd161+ | LN00837 | 59.376177 | 44.384574 | 9.255595 | 199.566477 |
117 | t cell:cd8+ cd161+ | LN22921 | 66.069266 | 59.345402 | 6.111516 | 199.991352 |
118 | t cell:cd8+ cd161+ | LN24336 | 48.486466 | 40.243374 | 7.509943 | 198.654975 |
119 | t cell:cd8+ cd161+ | LN27766 | 37.296980 | 29.442390 | 8.137273 | 199.500140 |
120 | t cell:follicular helper | LN00560 | 82.058488 | 73.460999 | 5.603447 | 199.999450 |
121 | t cell:follicular helper | LN00837 | 61.714580 | 51.712027 | 7.083180 | 199.822547 |
122 | t cell:follicular helper | LN22921 | 63.786746 | 57.161618 | 6.077661 | 199.989836 |
123 | t cell:follicular helper | LN24336 | 54.536331 | 46.164723 | 6.099143 | 199.990253 |
124 | t cell:follicular helper | LN27766 | 42.271050 | 35.354972 | 7.276333 | 199.864371 |
125 | t cell:mature natural killer | LN00560 | 96.336149 | 93.203428 | 9.809964 | 199.942835 |
126 | t cell:mature natural killer | LN00837 | 67.373921 | 52.184751 | 8.295802 | 199.948155 |
127 | t cell:mature natural killer | LN22921 | 54.308640 | 46.321960 | 7.835377 | 199.907410 |
128 | t cell:mature natural killer | LN24336 | 47.357061 | 39.144706 | 6.153474 | 199.952564 |
129 | t cell:mature natural killer | LN27766 | 44.643981 | 32.162381 | 7.923749 | 199.151376 |
130 | t cell:regulatory | LN00560 | 100.303430 | 96.757788 | 9.233340 | 199.998294 |
131 | t cell:regulatory | LN00837 | 77.401648 | 68.048508 | 8.143654 | 199.771234 |
132 | t cell:regulatory | LN22921 | 52.599146 | 45.693505 | 5.852943 | 199.996959 |
133 | t cell:regulatory | LN24336 | 56.138669 | 47.849818 | 7.000655 | 199.993808 |
134 | t cell:regulatory | LN27766 | 31.841954 | 26.530015 | 5.864464 | 199.380478 |
135 | t cell:tim3+ | LN00560 | 90.805956 | 84.536161 | 7.447616 | 199.922781 |
136 | t cell:tim3+ | LN00837 | 65.234680 | 60.787009 | 13.362004 | 175.288619 |
137 | t cell:tim3+ | LN22921 | 51.362474 | 44.746693 | 8.064281 | 197.946082 |
138 | t cell:tim3+ | LN24336 | 59.124041 | 50.708968 | 7.018645 | 199.806968 |
139 | t cell:tim3+ | LN27766 | 29.328329 | 24.992139 | 8.628643 | 169.319098 |
# Get top and bottom cell types for each unique region in the dataset.
= df_all_edges_with_cell_type_level['Unique Region'].unique()
unique_regions for region in unique_regions:
= get_top_bottom_cell_types_by_mean(df_all_edges_with_cell_type_level, cell_type_level, region)
top_bottom print(f"\nTop 5 cell types in {region}:")
print(top_bottom[0])
print(f"\nBottom 5 cell types in {region}:")
print(top_bottom[1])
Top 5 cell types in LN00560:
Level Three Cell Type mean_distance
6 b cell:pre-germinal center 125.145570
1 b cell:germinal center 110.025798
2 b cell:germinal center pre-plasmablast 108.711157
3 b cell:interferon 102.444714
4 b cell:memory 102.444467
Bottom 5 cell types in LN00560:
Level Three Cell Type mean_distance
7 b cell:proliferating 74.076170
19 plasma cell 72.331097
5 b cell:naive 69.258328
9 dendritic cell:conventional 2 68.057708
12 dendritic cell:plasmacytoid 65.255968
Top 5 cell types in LN00837:
Level Three Cell Type mean_distance
3 b cell:interferon 114.785742
0 b cell:activated 103.892159
6 b cell:pre-germinal center 91.106536
20 t cell:cd4+ alpha-beta effector 84.232618
2 b cell:germinal center pre-plasmablast 83.001064
Bottom 5 cell types in LN00837:
Level Three Cell Type mean_distance
12 dendritic cell:plasmacytoid 54.379345
9 dendritic cell:conventional 2 54.012741
7 b cell:proliferating 51.889082
13 lymphoid cell:innate 47.074148
19 plasma cell 44.038633
Top 5 cell types in LN22921:
Level Three Cell Type mean_distance
6 b cell:pre-germinal center 132.997491
3 b cell:interferon 131.162456
1 b cell:germinal center 126.062410
2 b cell:germinal center pre-plasmablast 124.921662
0 b cell:activated 109.313037
Bottom 5 cell types in LN22921:
Level Three Cell Type mean_distance
27 t cell:tim3+ 51.362474
16 monocyte 50.021763
9 dendritic cell:conventional 2 48.714428
10 dendritic cell:follicular 46.702944
17 muscle cell:smooth 39.150671
Top 5 cell types in LN24336:
Level Three Cell Type mean_distance
9 dendritic cell:conventional 2 129.199183
8 dendritic cell:conventional 1 120.905504
11 dendritic cell:migratory 120.640773
3 b cell:interferon 110.398002
6 b cell:pre-germinal center 105.067771
Bottom 5 cell types in LN24336:
Level Three Cell Type mean_distance
23 t cell:cd8+ cd161+ 48.486466
25 t cell:mature natural killer 47.357061
10 dendritic cell:follicular 46.933889
19 plasma cell 46.465649
17 muscle cell:smooth 43.536294
Top 5 cell types in LN27766:
Level Three Cell Type mean_distance
6 b cell:pre-germinal center 106.766845
3 b cell:interferon 105.614725
0 b cell:activated 78.187423
5 b cell:naive 75.836945
19 plasma cell 71.756242
Bottom 5 cell types in LN27766:
Level Three Cell Type mean_distance
18 natural killer cell 35.518688
22 t cell:cd8+ 35.375142
26 t cell:regulatory 31.841954
12 dendritic cell:plasmacytoid 31.272411
27 t cell:tim3+ 29.328329
# Get top and bottom cell types for each unique region in the dataset.
= df_all_edges_with_cell_type_level['Unique Region'].unique()
unique_regions for region in unique_regions:
= get_top_bottom_cell_types_by_median(df_all_edges_with_cell_type_level, cell_type_level, region)
top_bottom print(f"\nTop 5 cell types in {region}:")
print(top_bottom[0])
print(f"\nBottom 5 cell types in {region}:")
print(top_bottom[1])
Top 5 cell types in LN00560:
Level Three Cell Type median_distance
6 b cell:pre-germinal center 131.985475
1 b cell:germinal center 109.326467
2 b cell:germinal center pre-plasmablast 108.905779
4 b cell:memory 100.377290
3 b cell:interferon 99.240528
Bottom 5 cell types in LN00560:
Level Three Cell Type median_distance
7 b cell:proliferating 61.983066
19 plasma cell 59.404645
9 dendritic cell:conventional 2 57.691652
5 b cell:naive 54.441120
12 dendritic cell:plasmacytoid 51.800420
Top 5 cell types in LN00837:
Level Three Cell Type median_distance
3 b cell:interferon 112.568316
0 b cell:activated 103.148832
6 b cell:pre-germinal center 86.702315
2 b cell:germinal center pre-plasmablast 73.750379
20 t cell:cd4+ alpha-beta effector 72.495092
Bottom 5 cell types in LN00837:
Level Three Cell Type median_distance
17 muscle cell:smooth 44.506487
23 t cell:cd8+ cd161+ 44.384574
7 b cell:proliferating 42.257876
13 lymphoid cell:innate 39.426220
19 plasma cell 35.479895
Top 5 cell types in LN22921:
Level Three Cell Type median_distance
6 b cell:pre-germinal center 140.357258
3 b cell:interferon 139.595461
1 b cell:germinal center 132.087548
2 b cell:germinal center pre-plasmablast 129.269694
0 b cell:activated 109.134587
Bottom 5 cell types in LN22921:
Level Three Cell Type median_distance
15 mast cell 44.533122
9 dendritic cell:conventional 2 41.297008
16 monocyte 40.595156
10 dendritic cell:follicular 32.311309
17 muscle cell:smooth 28.447433
Top 5 cell types in LN24336:
Level Three Cell Type median_distance
9 dendritic cell:conventional 2 136.077984
11 dendritic cell:migratory 127.698045
8 dendritic cell:conventional 1 124.830580
3 b cell:interferon 112.429843
6 b cell:pre-germinal center 103.209786
Bottom 5 cell types in LN24336:
Level Three Cell Type median_distance
20 t cell:cd4+ alpha-beta effector 41.003228
23 t cell:cd8+ cd161+ 40.243374
25 t cell:mature natural killer 39.144706
10 dendritic cell:follicular 37.744882
17 muscle cell:smooth 30.191094
Top 5 cell types in LN27766:
Level Three Cell Type median_distance
6 b cell:pre-germinal center 109.227324
3 b cell:interferon 104.382767
0 b cell:activated 70.452448
5 b cell:naive 66.107927
19 plasma cell 57.171075
Bottom 5 cell types in LN27766:
Level Three Cell Type median_distance
22 t cell:cd8+ 30.384688
23 t cell:cd8+ cd161+ 29.442390
12 dendritic cell:plasmacytoid 27.099603
26 t cell:regulatory 26.530015
27 t cell:tim3+ 24.992139
calculate_regional_variability(df_all_edges_with_cell_type_level, cell_type_level)
Regional Variability Analysis:
Mean: Average distance in each region
Std: Standard deviation of distances
CV: Coefficient of Variation (std/mean * 100%)
mean std CV (%)
Unique Region
LN00560 86.25 50.97 59.1
LN00837 65.65 42.86 65.3
LN22921 69.03 43.55 63.1
LN24336 62.29 41.07 65.9
LN27766 48.66 35.83 73.6
Cell Type Variability Analysis (sorted by CV):
mean std CV (%)
Level Three Cell Type
muscle cell:smooth 65.17 48.20 74.0
mast cell 54.53 40.09 73.5
dendritic cell:follicular 65.68 48.16 73.3
lymphoid cell:innate 69.90 50.05 71.6
t cell:mature natural killer 54.65 39.02 71.4
t cell:regulatory 55.45 39.00 70.3
b cell:germinal center 70.57 49.44 70.1
monocyte 57.09 39.95 70.0
b cell:proliferating 65.55 45.77 69.8
dendritic cell:conventional 2 66.62 45.99 69.0
dendritic cell:plasmacytoid 59.26 40.45 68.3
t cell:cd4+ alpha-beta effector 68.30 45.44 66.5
t cell:cd8+ 61.29 40.36 65.9
t cell:cd8+ cd161+ 66.32 42.96 64.8
natural killer cell 60.56 38.98 64.4
dendritic cell:conventional 1 74.03 47.68 64.4
t cell:tim3+ 69.06 44.31 64.2
b cell:naive 71.23 45.65 64.1
macrophage 60.91 38.83 63.7
b cell:germinal center pre-plasmablast 52.90 33.19 62.7
dendritic cell:migratory 80.92 50.55 62.5
t cell:follicular helper 63.41 39.65 62.5
t cell:cd4+ alpha-beta naive thymus-derived 64.99 40.45 62.2
plasma cell 54.58 33.43 61.2
b cell:memory 82.07 49.33 60.1
b cell:activated 87.40 49.44 56.6
b cell:interferon 115.91 51.01 44.0
b cell:pre-germinal center 124.16 48.98 39.4
='area') plot_violin_cells_per_celltype(df_all_edges_with_cell_type_level, cell_type_level, os.path.join(basepath, figures_output_dir), density_norm
plot_distance_distribution_boxplots_by_region(df_all_edges_with_cell_type_level, cell_type_level, os.path.join(basepath, figures_output_dir))
plot_distance_distribution_heatmap(df_all_edges_with_cell_type_level, cell_type_level, os.path.join(basepath, figures_output_dir))
="count") # Or, density_norm="count" or "area" based on preference. plot_violin_plots_all_regions(df_all_edges_with_cell_type_level, cell_type_level, os.path.join(basepath, figures_output_dir), density_norm