import numpy as np
import pandas as pd
import os
import json
import requests
import shutil
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import normalize
import plotly.express as px
from _cde_compute_edges_from_nodes import *
'display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option(
# suppress warnings
import warnings
"ignore") warnings.filterwarnings(
Distance Analysis: spleen-codex-ufl
Analyze and visualize cell-to-nearest-endothelial-cell distance distributions for the
spleen-codex-ufl
dataset.
= "/u/yashjain/hra-cell-distance-analysis/data"
basepath = "spleen-codex-ufl"
dataset_dir = os.path.join("data-processed-nodes-with-harmonized-cell-types", dataset_dir)
data_filedir = os.path.join("data-processed-edges", dataset_dir)
output_edge_dir = "generated-figures" figures_output_dir
# Function to load your data
def load_data(path, edges=False):
if edges:
= ['cell_id', 'x1', 'y1', 'z1', 'x2', 'y2', 'z2']
column_names = pd.read_csv(path, header=None, names=column_names)
data else:
= pd.read_csv(path)
data return data
# Function to read all files ending with "-nodes.csv" in the `data_filedir` directory into a single DataFrame.
# Another additional column `Dataset` is added to identify the dataset name which comes from the filename before the `-nodes.csv` suffix.
# Additionally, function reads all files ending with "-edges.csv" in the `output_edge_dir` directory into a single DataFrame.
# Three additional columns are added "Dataset", "Anchor Cell Type", and "Anchor Cell Type Level" to identify the dataset name, anchor cell type, and anchor cell type level respectively which come from the filename before the `.csv` suffix.
# The three additional columns are created by splitting the filename on the `-` character, and extracting the relevant parts.
# On splitting, the first part is the dataset name, second part is the anchor cell type level, and third part is the anchor cell type, and last part is the `edges` suffix.
# When reading files, check if the file has the correct format (i.e., ends with `-edges.csv`).
# Additionally, the function merges the edges DataFrame with the nodes DataFrame to get the cell type information for the anchor cells.
# This is done by reading the corresponding nodes file from the `data_filedir` directory for each edges file, and merging it with the edges DataFrame on the `cell_id` column.
# The merged DataFrame contains the edges with additional columns for the cell type information.
# The function returns three DataFrames:
# 1. `merged_nodes`: DataFrame containing all nodes with an additional column `Dataset`.
# 2. `merged_edges`: DataFrame containing all edges with additional columns `Dataset`, `Anchor Cell Type`, and `Anchor Cell Type Level`.
# 3. `merged_nodes_for_all_edges`: DataFrame containing all edges with additional columns `Dataset`, `Anchor Cell Type`, `Anchor Cell Type Level`, and the cell type information for cells.
def read_all_edge_datasets(basepath, data_filedir, output_edge_dir):
= []
all_nodes_files = []
all_edges_files = []
all_nodes_edges_files for file in os.listdir(os.path.join(basepath, output_edge_dir)):
if file.endswith("-edges.csv"):
= os.path.join(basepath, output_edge_dir, file)
file_path = file.replace("-edges.csv", "").split('-')
dataset_name, anchor_cell_type_level, anchor_cell_type = load_data(file_path, edges=False)
edges_df 'Dataset'] = dataset_name
edges_df['Anchor Cell Type'] = anchor_cell_type
edges_df['Anchor Cell Type Level'] = anchor_cell_type_level
edges_df[={"distance": "Distance"}, inplace=True) # Rename column "distance" to "Distance".
edges_df.rename(columns
all_edges_files.append(edges_df)
# Read the corresponding nodes file from data_filedir to get the cell type information
= os.path.join(basepath, data_filedir, f"{dataset_name}-nodes.csv")
nodes_file_path = load_data(nodes_file_path)
nodes_df 'Dataset'] = dataset_name
nodes_df[
all_nodes_files.append(nodes_df)
# Add a new 'cell_id' column to nodes_df
'cell_id'] = range(len(nodes_df))
nodes_df[# Set 'cell_id' column as index for nodes_df
'cell_id', inplace=True)
nodes_df.set_index(# Merge edges_df with nodes_df to get the cell type information for the anchor cells
= pd.merge(edges_df, nodes_df[['Level Three Cell Type', 'Level Two Cell Type', 'Level One Cell Type']], how='left', left_on='cell_id', right_index=True)
edges_nodes_df
all_nodes_edges_files.append(edges_nodes_df)
= pd.concat(all_edges_files, ignore_index=True)
merged_edges = pd.concat(all_nodes_files, ignore_index=True)
merged_nodes = pd.concat(all_nodes_edges_files, ignore_index=True)
merged_nodes_for_all_edges
return merged_nodes, merged_edges, merged_nodes_for_all_edges
def create_directory(directory):
if not os.path.exists(directory):
os.makedirs(directory)print(f"Directory '{directory}' created successfully.")
else:
print(f"Directory '{directory}' already exists.")
Get initial statistics and identify endothelial cell categories for dataset.
= read_all_edge_datasets(basepath, data_filedir, output_edge_dir) df_all_nodes, df_all_edges, df_all_edges_with_cell_types
5) df_all_nodes.head(
x | y | Original Cell Type | Level Three Cell Type | Level Three CL Label | Level Three CL ID | CL_Match/3 | Level Two Cell Type | Level Two CL Label | Level Two CL ID | CL_Match/2 | Level One Cell Type | Level One CL Label | Level One CL ID | CL_Match/1 | Dataset | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.0 | 3403.933 | Macrophages | macrophage | macrophage | CL:0000235 | skos:exactMatch | macrophage | macrophage | CL:0000235 | skos:exactMatch | immune cell | leukocyte | CL:0000738 | skos:exactMatch | FSLD |
1 | 0.0 | 3356.808 | Sinusoidal cells | endothelial cell of sinusoid | endothelial cell of sinusoid | CL:0002262 | skos:exactMatch | endothelial cell of sinusoid | endothelial cell of sinusoid | CL:0002262 | skos:exactMatch | endothelial cell | endothelial cell | CL:0000115 | skos:exactMatch | FSLD |
2 | 0.0 | 3313.830 | Sinusoidal cells | endothelial cell of sinusoid | endothelial cell of sinusoid | CL:0002262 | skos:exactMatch | endothelial cell of sinusoid | endothelial cell of sinusoid | CL:0002262 | skos:exactMatch | endothelial cell | endothelial cell | CL:0000115 | skos:exactMatch | FSLD |
3 | 0.0 | 3304.405 | indistinct | unknown cell | cell:unknown | CL:0000000 | skos:narrowMatch | unknown cell | cell:unknown | CL:0000000 | skos:narrowMatch | unknown cell | cell:unknown | CL:0000000 | skos:narrowMatch | FSLD |
4 | 0.0 | 3281.031 | indistinct | unknown cell | cell:unknown | CL:0000000 | skos:narrowMatch | unknown cell | cell:unknown | CL:0000000 | skos:narrowMatch | unknown cell | cell:unknown | CL:0000000 | skos:narrowMatch | FSLD |
# Print the total number of unique cell types per dataset. Compute separately for each cell type column (Level One Cell Type, Level Two Cell Type, Level Three Cell Type, Original Cell Type).
print("Total number of unique cell types per cell type annnotation level:")
= {
unique_cell_types 'Original Cell Type': df_all_nodes['Original Cell Type'].nunique(),
'Level Three Cell Type': df_all_nodes['Level Three Cell Type'].nunique(),
'Level Two Cell Type': df_all_nodes['Level Two Cell Type'].nunique(),
'Level One Cell Type': df_all_nodes['Level One Cell Type'].nunique()
}for cell_type, count in unique_cell_types.items():
print(f"{cell_type}: {count}")
Total number of unique cell types per cell type annnotation level:
Original Cell Type: 12
Level Three Cell Type: 12
Level Two Cell Type: 9
Level One Cell Type: 3
# Save the unique cell types containing "endothelial" in name per cell type column (Level One Cell Type, Level Two Cell Type, Level Three Cell Type, Original Cell Type) to a dictionary where the key is the level and the value is a list of unique cell types.
= {
endothelial_cell_types 'Original Cell Type': df_all_nodes[df_all_nodes['Original Cell Type'].str.contains("endothelial", case=False, na=False)]['Original Cell Type'].unique().tolist(),
'Level Three Cell Type': df_all_nodes[df_all_nodes['Level Three Cell Type'].str.contains("endothelial", case=False, na=False)]['Level Three Cell Type'].unique().tolist(),
'Level Two Cell Type': df_all_nodes[df_all_nodes['Level Two Cell Type'].str.contains("endothelial", case=False, na=False)]['Level Two Cell Type'].unique().tolist(),
'Level One Cell Type': df_all_nodes[df_all_nodes['Level One Cell Type'].str.contains("endothelial", case=False, na=False)]['Level One Cell Type'].unique().tolist()
}
print("\nEndothelial cell types per cell type annotation level:")
for level, cell_types in endothelial_cell_types.items():
print(f"\n{level}:")
for cell in cell_types:
print(f" - {cell}")
Endothelial cell types per cell type annotation level:
Original Cell Type:
- blood endothelial
Level Three Cell Type:
- endothelial cell of sinusoid
- endothelial cell of lymphatic vessel
- endothelial cell of vascular tree
Level Two Cell Type:
- endothelial cell of sinusoid
- endothelial cell of lymphatic vessel
- endothelial cell of vascular tree
Level One Cell Type:
- endothelial cell
= ["Level Three Cell Type", "Level Two Cell Type", "Level One Cell Type"] # Skipping Original Cell Type as it is not a hierarchical level.
type_field_list
# Define the anchor cell type (type of endothelial cell) for each level in type_field_list based on available categories in the previous cell. The distance analysis at all three levels will be limited to the specified anchor cell type.
= {
anchor_cell_type_dict 'Level Three Cell Type': 'endothelial cell of sinusoid', # Choosing sinusoid due to high number of instances in the dataset compared to other endothelial cell types.
'Level Two Cell Type': 'endothelial cell of sinusoid',
'Level One Cell Type': 'endothelial cell'
}
Process datasets to add region information to Nodes files.
df_all_nodes.head()
x | y | Original Cell Type | Level Three Cell Type | Level Three CL Label | Level Three CL ID | CL_Match/3 | Level Two Cell Type | Level Two CL Label | Level Two CL ID | CL_Match/2 | Level One Cell Type | Level One CL Label | Level One CL ID | CL_Match/1 | Dataset | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.0 | 3403.933 | Macrophages | macrophage | macrophage | CL:0000235 | skos:exactMatch | macrophage | macrophage | CL:0000235 | skos:exactMatch | immune cell | leukocyte | CL:0000738 | skos:exactMatch | FSLD |
1 | 0.0 | 3356.808 | Sinusoidal cells | endothelial cell of sinusoid | endothelial cell of sinusoid | CL:0002262 | skos:exactMatch | endothelial cell of sinusoid | endothelial cell of sinusoid | CL:0002262 | skos:exactMatch | endothelial cell | endothelial cell | CL:0000115 | skos:exactMatch | FSLD |
2 | 0.0 | 3313.830 | Sinusoidal cells | endothelial cell of sinusoid | endothelial cell of sinusoid | CL:0002262 | skos:exactMatch | endothelial cell of sinusoid | endothelial cell of sinusoid | CL:0002262 | skos:exactMatch | endothelial cell | endothelial cell | CL:0000115 | skos:exactMatch | FSLD |
3 | 0.0 | 3304.405 | indistinct | unknown cell | cell:unknown | CL:0000000 | skos:narrowMatch | unknown cell | cell:unknown | CL:0000000 | skos:narrowMatch | unknown cell | cell:unknown | CL:0000000 | skos:narrowMatch | FSLD |
4 | 0.0 | 3281.031 | indistinct | unknown cell | cell:unknown | CL:0000000 | skos:narrowMatch | unknown cell | cell:unknown | CL:0000000 | skos:narrowMatch | unknown cell | cell:unknown | CL:0000000 | skos:narrowMatch | FSLD |
# Iterate through the df_all_data dataframe to create new column "Unique Region" based on the "Dataset" column.
# The "Unique Region" column is created by mapping the region names based on the full dataset name.
'Unique Region'] = df_all_nodes['Dataset']
df_all_nodes[# df_all_nodes['Unique Region'] = df_all_nodes['Dataset'].map(region_map)
# df_all_nodes['Unique Region'] = df_all_nodes['Dataset'].str.split('-').str[1].map(region_map)
# Check if the new columns are created correctly.
'Dataset', 'Unique Region']].head(5) df_all_nodes[[
Dataset | Unique Region | |
---|---|---|
0 | FSLD | FSLD |
1 | FSLD | FSLD |
2 | FSLD | FSLD |
3 | FSLD | FSLD |
4 | FSLD | FSLD |
# Print all unique regions in the data.
print("\nUnique Regions in the data:")
print(df_all_nodes['Unique Region'].unique())
# Print the total number of unique regions.
print(f"Total number of unique regions: {df_all_nodes['Unique Region'].nunique()}")
# Print number of unique datasets per unique region.
print("\nNumber of unique datasets per unique region:")
for region in df_all_nodes['Unique Region'].unique():
= df_all_nodes[df_all_nodes['Unique Region'] == region]['Dataset'].nunique()
num_datasets print(f"{region}: {num_datasets}")
Unique Regions in the data:
['FSLD' 'KSFB' 'NGPL' 'PBVN' 'PKHL' 'XXCD']
Total number of unique regions: 6
Number of unique datasets per unique region:
FSLD: 1
KSFB: 1
NGPL: 1
PBVN: 1
PKHL: 1
XXCD: 1
Process datasets to add region information to Edges files.
5) df_all_edges.head(
cell_id | x1 | y1 | z1 | x2 | y2 | z2 | Distance | Dataset | Anchor Cell Type | Anchor Cell Type Level | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 0.000 | 3403.933 | 0 | 9.425 | 3409.211 | 0 | 10.802218 | FSLD | endothelial cell of sinusoid | Level Three Cell Type |
1 | 24 | 1.131 | 3416.374 | 0 | 9.425 | 3409.211 | 0 | 10.958969 | FSLD | endothelial cell of sinusoid | Level Three Cell Type |
2 | 43 | 12.818 | 3415.620 | 0 | 9.425 | 3409.211 | 0 | 7.251740 | FSLD | endothelial cell of sinusoid | Level Three Cell Type |
3 | 95 | 21.866 | 3407.703 | 0 | 9.425 | 3409.211 | 0 | 12.532061 | FSLD | endothelial cell of sinusoid | Level Three Cell Type |
4 | 115 | 22.997 | 3415.997 | 0 | 9.425 | 3409.211 | 0 | 15.173957 | FSLD | endothelial cell of sinusoid | Level Three Cell Type |
# Process the edge data to create new columns "Unique Region" based on the "Dataset" column, similar to how it was done for the node data.
'Unique Region'] = df_all_edges['Dataset']
df_all_edges[# df_all_edges['Unique Region'] = df_all_edges['Dataset'].map(region_map)
# Check if the new columns are created correctly.
'Dataset', 'Unique Region']].head(5) df_all_edges[[
Dataset | Unique Region | |
---|---|---|
0 | FSLD | FSLD |
1 | FSLD | FSLD |
2 | FSLD | FSLD |
3 | FSLD | FSLD |
4 | FSLD | FSLD |
# Print all unique regions in the data.
print("\nUnique Regions in the data:")
print(df_all_edges['Unique Region'].unique())
# Print the total number of unique regions.
print(f"Total number of unique regions: {df_all_edges['Unique Region'].nunique()}")
# Print number of unique datasets per unique region.
print("\nNumber of unique datasets per unique region:")
for region in df_all_edges['Unique Region'].unique():
= df_all_edges[df_all_edges['Unique Region'] == region]['Dataset'].nunique()
num_datasets print(f"{region}: {num_datasets}")
Unique Regions in the data:
['FSLD' 'KSFB' 'NGPL' 'PBVN' 'PKHL' 'XXCD']
Total number of unique regions: 6
Number of unique datasets per unique region:
FSLD: 1
KSFB: 1
NGPL: 1
PBVN: 1
PKHL: 1
XXCD: 1
'Unique Region'] = df_all_edges_with_cell_types['Dataset']
df_all_edges_with_cell_types[# df_all_edges_with_cell_types['Unique Region'] = df_all_edges_with_cell_types['Dataset'].map(region_map)
# Check if the new columns are created correctly.
'Dataset', 'Unique Region']].head(5) df_all_edges_with_cell_types[[
Dataset | Unique Region | |
---|---|---|
0 | FSLD | FSLD |
1 | FSLD | FSLD |
2 | FSLD | FSLD |
3 | FSLD | FSLD |
4 | FSLD | FSLD |
1) df_all_nodes.head(
x | y | Original Cell Type | Level Three Cell Type | Level Three CL Label | Level Three CL ID | CL_Match/3 | Level Two Cell Type | Level Two CL Label | Level Two CL ID | CL_Match/2 | Level One Cell Type | Level One CL Label | Level One CL ID | CL_Match/1 | Dataset | Unique Region | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.0 | 3403.933 | Macrophages | macrophage | macrophage | CL:0000235 | skos:exactMatch | macrophage | macrophage | CL:0000235 | skos:exactMatch | immune cell | leukocyte | CL:0000738 | skos:exactMatch | FSLD | FSLD |
1) df_all_edges.head(
cell_id | x1 | y1 | z1 | x2 | y2 | z2 | Distance | Dataset | Anchor Cell Type | Anchor Cell Type Level | Unique Region | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 0.0 | 3403.933 | 0 | 9.425 | 3409.211 | 0 | 10.802218 | FSLD | endothelial cell of sinusoid | Level Three Cell Type | FSLD |
1) df_all_edges_with_cell_types.head(
cell_id | x1 | y1 | z1 | x2 | y2 | z2 | Distance | Dataset | Anchor Cell Type | Anchor Cell Type Level | Level Three Cell Type | Level Two Cell Type | Level One Cell Type | Unique Region | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 0.0 | 3403.933 | 0 | 9.425 | 3409.211 | 0 | 10.802218 | FSLD | endothelial cell of sinusoid | Level Three Cell Type | macrophage | macrophage | immune cell | FSLD |
Node Analysis
# Plot number of cells per cell type in the same plot. Color by cell type and unique region. Output figure saved in existing `figures_output_dir`.
def plot_cells_per_celltype(df, type_field, output_dir):
=(20, 8))
plt.figure(figsize"svg.fonttype"] = 'none' # to store text as text, not as path
plt.rcParams[=df, x=type_field, palette='Spectral', hue='Unique Region')
sns.countplot(dataf'Number of Cells per {type_field} in `{dataset_dir}`')
plt.title(=90)
plt.xticks(rotation
plt.tight_layout()f'{dataset_dir}_cells_per_celltype_{type_field}.png'), dpi=300,
plt.savefig(os.path.join(output_dir, ='tight',
bbox_inches=0.5)
pad_inchesf'{dataset_dir}_cells_per_celltype_{type_field}.svg'), dpi=300,
plt.savefig(os.path.join(output_dir, ='tight',
bbox_inches=0.5)
pad_inches='Unique Region', bbox_to_anchor=(0.85, 1), loc='upper left')
plt.legend(title
plt.xlabel(type_field)
# For numbers on y-axis, do not use scientific notation.
='plain', axis='y')
plt.ticklabel_format(style# Set y-axis label
'Number of Cells')
plt.ylabel(='y', linestyle='--', alpha=0.7)
plt.grid(axis
plt.tight_layout()# Show the plot
plt.show()
plt.close()for type_field in type_field_list:
plot_cells_per_celltype(df_all_nodes, type_field, os.path.join(basepath, figures_output_dir))
Distance Analysis
# Get mean, median, minimum, maximum distance per unique region per anchor cell type.
= df_all_edges_with_cell_types.groupby(['Unique Region', 'Anchor Cell Type', 'Anchor Cell Type Level']).agg(
df_distance_stats =('Distance', 'mean'),
mean_distance=('Distance', 'median'),
median_distance=('Distance', 'min'),
min_distance=('Distance', 'max')
max_distance
).reset_index()# Print the first few rows of the distance statistics DataFrame.
df_distance_stats
Unique Region | Anchor Cell Type | Anchor Cell Type Level | mean_distance | median_distance | min_distance | max_distance | |
---|---|---|---|---|---|---|---|
0 | FSLD | endothelial cell | Level One Cell Type | 11.933050 | 10.185979 | 0.377000 | 120.663560 |
1 | FSLD | endothelial cell of lymphatic vessel | Level Three Cell Type | 69.140742 | 62.332820 | 0.377000 | 199.993437 |
2 | FSLD | endothelial cell of lymphatic vessel | Level Two Cell Type | 69.140742 | 62.332820 | 0.377000 | 199.993437 |
3 | FSLD | endothelial cell of sinusoid | Level Three Cell Type | 17.758929 | 12.486613 | 0.377000 | 174.062170 |
4 | FSLD | endothelial cell of sinusoid | Level Two Cell Type | 17.758929 | 12.486613 | 0.377000 | 174.062170 |
5 | FSLD | endothelial cell of vascular tree | Level Three Cell Type | 31.444898 | 28.859590 | 0.377000 | 134.193466 |
6 | FSLD | endothelial cell of vascular tree | Level Two Cell Type | 31.444898 | 28.859590 | 0.377000 | 134.193466 |
7 | KSFB | endothelial cell | Level One Cell Type | 18.516381 | 12.999668 | 0.377000 | 159.194472 |
8 | KSFB | endothelial cell of lymphatic vessel | Level Three Cell Type | 100.106671 | 98.720745 | 0.377000 | 199.996635 |
9 | KSFB | endothelial cell of lymphatic vessel | Level Two Cell Type | 100.106671 | 98.720745 | 0.377000 | 199.996635 |
10 | KSFB | endothelial cell of sinusoid | Level Three Cell Type | 32.301586 | 17.541644 | 0.377000 | 199.992015 |
11 | KSFB | endothelial cell of sinusoid | Level Two Cell Type | 32.301586 | 17.541644 | 0.377000 | 199.992015 |
12 | KSFB | endothelial cell of vascular tree | Level Three Cell Type | 34.072529 | 30.051417 | 0.377000 | 162.403006 |
13 | KSFB | endothelial cell of vascular tree | Level Two Cell Type | 34.072529 | 30.051417 | 0.377000 | 162.403006 |
14 | NGPL | endothelial cell | Level One Cell Type | 19.649715 | 12.756869 | 0.377000 | 199.990594 |
15 | NGPL | endothelial cell of lymphatic vessel | Level Three Cell Type | 105.868068 | 107.070655 | 0.377000 | 199.996635 |
16 | NGPL | endothelial cell of lymphatic vessel | Level Two Cell Type | 105.868068 | 107.070655 | 0.377000 | 199.996635 |
17 | NGPL | endothelial cell of sinusoid | Level Three Cell Type | 32.165822 | 17.358384 | 0.377000 | 199.946882 |
18 | NGPL | endothelial cell of sinusoid | Level Two Cell Type | 32.165822 | 17.358384 | 0.377000 | 199.946882 |
19 | NGPL | endothelial cell of vascular tree | Level Three Cell Type | 36.527379 | 31.724052 | 0.377000 | 199.990594 |
20 | NGPL | endothelial cell of vascular tree | Level Two Cell Type | 36.527379 | 31.724052 | 0.377000 | 199.990594 |
21 | PBVN | endothelial cell | Level One Cell Type | 16.689185 | 12.823543 | 0.377000 | 181.684667 |
22 | PBVN | endothelial cell of lymphatic vessel | Level Three Cell Type | 90.564477 | 86.386076 | 0.533159 | 199.999832 |
23 | PBVN | endothelial cell of lymphatic vessel | Level Two Cell Type | 90.564477 | 86.386076 | 0.533159 | 199.999832 |
24 | PBVN | endothelial cell of sinusoid | Level Three Cell Type | 24.695179 | 16.694759 | 0.377000 | 199.993437 |
25 | PBVN | endothelial cell of sinusoid | Level Two Cell Type | 24.695179 | 16.694759 | 0.377000 | 199.993437 |
26 | PBVN | endothelial cell of vascular tree | Level Three Cell Type | 33.754132 | 30.265846 | 0.377000 | 191.104792 |
27 | PBVN | endothelial cell of vascular tree | Level Two Cell Type | 33.754132 | 30.265846 | 0.377000 | 191.104792 |
28 | PKHL | endothelial cell | Level One Cell Type | 22.572654 | 14.562159 | 0.377000 | 165.169461 |
29 | PKHL | endothelial cell of lymphatic vessel | Level Three Cell Type | 113.542690 | 116.746498 | 0.842998 | 199.996635 |
30 | PKHL | endothelial cell of lymphatic vessel | Level Two Cell Type | 113.542690 | 116.746498 | 0.842998 | 199.996635 |
31 | PKHL | endothelial cell of sinusoid | Level Three Cell Type | 32.273382 | 17.602307 | 0.377000 | 199.996635 |
32 | PKHL | endothelial cell of sinusoid | Level Two Cell Type | 32.273382 | 17.602307 | 0.377000 | 199.996635 |
33 | PKHL | endothelial cell of vascular tree | Level Three Cell Type | 41.424710 | 36.574829 | 0.377000 | 177.757399 |
34 | PKHL | endothelial cell of vascular tree | Level Two Cell Type | 41.424710 | 36.574829 | 0.377000 | 177.757399 |
35 | XXCD | endothelial cell | Level One Cell Type | 16.957493 | 12.582993 | 0.377000 | 182.868318 |
36 | XXCD | endothelial cell of lymphatic vessel | Level Three Cell Type | 107.389387 | 108.560945 | 0.377000 | 199.999832 |
37 | XXCD | endothelial cell of lymphatic vessel | Level Two Cell Type | 107.389387 | 108.560945 | 0.377000 | 199.999832 |
38 | XXCD | endothelial cell of sinusoid | Level Three Cell Type | 24.938923 | 15.197356 | 0.377000 | 199.913826 |
39 | XXCD | endothelial cell of sinusoid | Level Two Cell Type | 24.938923 | 15.197356 | 0.377000 | 199.913826 |
40 | XXCD | endothelial cell of vascular tree | Level Three Cell Type | 38.484744 | 34.769948 | 0.377000 | 182.868318 |
41 | XXCD | endothelial cell of vascular tree | Level Two Cell Type | 38.484744 | 34.769948 | 0.377000 | 182.868318 |
Level One Cell Type Analysis
# Get mean, median, minimum, maximum distance per cell type in all unique regions.
= 'Level One Cell Type'
cell_type_level = df_all_edges_with_cell_types[(df_all_edges_with_cell_types['Anchor Cell Type Level'] == cell_type_level) & (df_all_edges_with_cell_types['Anchor Cell Type'] == anchor_cell_type_dict[cell_type_level])]
df_all_edges_with_cell_type_level
= df_all_edges_with_cell_type_level.groupby([cell_type_level, 'Unique Region']).agg(
df_distance_stats_cell_type_level =('Distance', 'mean'),
mean_distance=('Distance', 'median'),
median_distance=('Distance', 'min'),
min_distance=('Distance', 'max')
max_distance
).reset_index() df_distance_stats_cell_type_level
Level One Cell Type | Unique Region | mean_distance | median_distance | min_distance | max_distance | |
---|---|---|---|---|---|---|
0 | immune cell | FSLD | 12.325330 | 10.515530 | 0.377 | 120.663560 |
1 | immune cell | KSFB | 19.574997 | 13.498495 | 0.377 | 159.194472 |
2 | immune cell | NGPL | 21.022792 | 13.514280 | 0.377 | 199.990594 |
3 | immune cell | PBVN | 17.059760 | 12.983258 | 0.377 | 181.684667 |
4 | immune cell | PKHL | 22.544364 | 14.330960 | 0.377 | 165.169461 |
5 | immune cell | XXCD | 17.306820 | 12.644964 | 0.377 | 182.868318 |
6 | unknown cell | FSLD | 9.336397 | 7.997378 | 0.377 | 83.688056 |
7 | unknown cell | KSFB | 12.696620 | 10.633133 | 0.377 | 150.979439 |
8 | unknown cell | NGPL | 10.879824 | 9.311213 | 0.377 | 97.890139 |
9 | unknown cell | PBVN | 14.605778 | 11.783889 | 0.377 | 145.222827 |
10 | unknown cell | PKHL | 22.720231 | 16.105447 | 0.377 | 146.043105 |
11 | unknown cell | XXCD | 14.710918 | 12.308920 | 0.377 | 161.352917 |
# Get top five and bottom five cell types with respect to mean distance in each unique region separately.
def get_top_bottom_cell_types_by_mean(df, cell_type_level, unique_region, top_n=5):
# Filter the DataFrame for the specified unique region and cell type level
= df[df['Unique Region'] == unique_region]
df_filtered
# Group by the specified cell type level and calculate mean distance
= df_filtered.groupby(cell_type_level).agg(mean_distance=('Distance', 'mean')).reset_index()
df_grouped
# Sort by mean distance to get top and bottom cell types
= df_grouped.sort_values(by='mean_distance', ascending=False)
df_sorted
# Get top N and bottom N cell types
= df_sorted.head(top_n)
top_cell_types = df_sorted.tail(top_n)
bottom_cell_types
return top_cell_types, bottom_cell_types
# Get top and bottom cell types for each unique region in the dataset.
= df_all_edges_with_cell_type_level['Unique Region'].unique()
unique_regions for region in unique_regions:
= get_top_bottom_cell_types_by_mean(df_all_edges_with_cell_type_level, cell_type_level, region)
top_bottom print(f"\nTop 5 cell types in {region}:")
print(top_bottom[0])
print(f"\nBottom 5 cell types in {region}:")
print(top_bottom[1])
Top 5 cell types in FSLD:
Level One Cell Type mean_distance
0 immune cell 12.325330
1 unknown cell 9.336397
Bottom 5 cell types in FSLD:
Level One Cell Type mean_distance
0 immune cell 12.325330
1 unknown cell 9.336397
Top 5 cell types in KSFB:
Level One Cell Type mean_distance
0 immune cell 19.574997
1 unknown cell 12.696620
Bottom 5 cell types in KSFB:
Level One Cell Type mean_distance
0 immune cell 19.574997
1 unknown cell 12.696620
Top 5 cell types in NGPL:
Level One Cell Type mean_distance
0 immune cell 21.022792
1 unknown cell 10.879824
Bottom 5 cell types in NGPL:
Level One Cell Type mean_distance
0 immune cell 21.022792
1 unknown cell 10.879824
Top 5 cell types in PBVN:
Level One Cell Type mean_distance
0 immune cell 17.059760
1 unknown cell 14.605778
Bottom 5 cell types in PBVN:
Level One Cell Type mean_distance
0 immune cell 17.059760
1 unknown cell 14.605778
Top 5 cell types in PKHL:
Level One Cell Type mean_distance
1 unknown cell 22.720231
0 immune cell 22.544364
Bottom 5 cell types in PKHL:
Level One Cell Type mean_distance
1 unknown cell 22.720231
0 immune cell 22.544364
Top 5 cell types in XXCD:
Level One Cell Type mean_distance
0 immune cell 17.306820
1 unknown cell 14.710918
Bottom 5 cell types in XXCD:
Level One Cell Type mean_distance
0 immune cell 17.306820
1 unknown cell 14.710918
# Get top five and bottom five cell types with respect to median distance in each unique region separately.
def get_top_bottom_cell_types_by_median(df, cell_type_level, unique_region, top_n=5):
# Filter the DataFrame for the specified unique region and cell type level
= df[df['Unique Region'] == unique_region]
df_filtered
# Group by the specified cell type level and calculate median distance
= df_filtered.groupby(cell_type_level).agg(median_distance=('Distance', 'median')).reset_index()
df_grouped
# Sort by median distance to get top and bottom cell types
= df_grouped.sort_values(by='median_distance', ascending=False)
df_sorted
# Get top N and bottom N cell types
= df_sorted.head(top_n)
top_cell_types = df_sorted.tail(top_n)
bottom_cell_types
return top_cell_types, bottom_cell_types
# Get top and bottom cell types for each unique region in the dataset.
= df_all_edges_with_cell_type_level['Unique Region'].unique()
unique_regions for region in unique_regions:
= get_top_bottom_cell_types_by_median(df_all_edges_with_cell_type_level, cell_type_level, region)
top_bottom print(f"\nTop 5 cell types in {region}:")
print(top_bottom[0])
print(f"\nBottom 5 cell types in {region}:")
print(top_bottom[1])
Top 5 cell types in FSLD:
Level One Cell Type median_distance
0 immune cell 10.515530
1 unknown cell 7.997378
Bottom 5 cell types in FSLD:
Level One Cell Type median_distance
0 immune cell 10.515530
1 unknown cell 7.997378
Top 5 cell types in KSFB:
Level One Cell Type median_distance
0 immune cell 13.498495
1 unknown cell 10.633133
Bottom 5 cell types in KSFB:
Level One Cell Type median_distance
0 immune cell 13.498495
1 unknown cell 10.633133
Top 5 cell types in NGPL:
Level One Cell Type median_distance
0 immune cell 13.514280
1 unknown cell 9.311213
Bottom 5 cell types in NGPL:
Level One Cell Type median_distance
0 immune cell 13.514280
1 unknown cell 9.311213
Top 5 cell types in PBVN:
Level One Cell Type median_distance
0 immune cell 12.983258
1 unknown cell 11.783889
Bottom 5 cell types in PBVN:
Level One Cell Type median_distance
0 immune cell 12.983258
1 unknown cell 11.783889
Top 5 cell types in PKHL:
Level One Cell Type median_distance
1 unknown cell 16.105447
0 immune cell 14.330960
Bottom 5 cell types in PKHL:
Level One Cell Type median_distance
1 unknown cell 16.105447
0 immune cell 14.330960
Top 5 cell types in XXCD:
Level One Cell Type median_distance
0 immune cell 12.644964
1 unknown cell 12.308920
Bottom 5 cell types in XXCD:
Level One Cell Type median_distance
0 immune cell 12.644964
1 unknown cell 12.308920
# Calculate regional variability
def calculate_regional_variability(df_all_edges_with_cell_type_level, cell_type_level):
""" Calculate regional variability for distances in the given DataFrame.
"""
= df_all_edges_with_cell_type_level.groupby('Unique Region')['Distance'].agg([
regional_variability 'mean', 'mean'),
('std', 'std')
(round(2)
]).
# Add CV as percentage
'CV (%)'] = (regional_variability['std'] / regional_variability['mean'] * 100).round(1)
regional_variability[
print("\nRegional Variability Analysis:")
print("Mean: Average distance in each region")
print("Std: Standard deviation of distances")
print("CV: Coefficient of Variation (std/mean * 100%)")
print(regional_variability)
# Calculate variability for each cell type
= df_all_edges_with_cell_type_level.groupby(cell_type_level)['Distance'].agg([
cell_type_variability 'mean', 'mean'),
('std', 'std')
(round(2)
]).
# Add CV as percentage
'CV (%)'] = (cell_type_variability['std'] / cell_type_variability['mean'] * 100).round(1)
cell_type_variability[
print("\nCell Type Variability Analysis (sorted by CV):")
print(cell_type_variability.sort_values('CV (%)', ascending=False))
calculate_regional_variability(df_all_edges_with_cell_type_level, cell_type_level)
Regional Variability Analysis:
Mean: Average distance in each region
Std: Standard deviation of distances
CV: Coefficient of Variation (std/mean * 100%)
mean std CV (%)
Unique Region
FSLD 11.93 7.39 61.9
KSFB 18.52 18.52 100.0
NGPL 19.65 21.17 107.7
PBVN 16.69 14.62 87.6
PKHL 22.57 22.69 100.5
XXCD 16.96 16.01 94.4
Cell Type Variability Analysis (sorted by CV):
mean std CV (%)
Level One Cell Type
immune cell 18.10 18.36 101.4
unknown cell 14.16 12.15 85.8
# Define the standard region sequence for plots (Get all unique values in Unique Region column as a list.)
= df_all_edges_with_cell_types['Unique Region'].unique().tolist()
regions print("\nRegions in the data:")
print(regions)
Regions in the data:
['FSLD', 'KSFB', 'NGPL', 'PBVN', 'PKHL', 'XXCD']
# Generate Violin Plot
def plot_violin_cells_per_celltype(df_all_edges_with_cell_type_level, cell_type_level, output_dir, density_norm='area'):
"whitegrid")
sns.set_style("notebook", rc={"grid.linewidth": 2})
sns.set_context(=(10, 6))
plt.figure(figsize"svg.fonttype"] = 'none' # to store text as text, not as path
plt.rcParams[
=df_all_edges_with_cell_type_level, x=cell_type_level, y="Distance", density_norm=density_norm, common_norm=True, cut=0, inner="box", split=False, palette='Spectral', alpha=.9)
sns.violinplot(data
="whitegrid")
sns.set_theme(style"paper")
sns.set_context(
= 10
font_size =font_size)
plt.legend(fontsize
f'Violin Plot of distances by {cell_type_level} (Density Normalization: {density_norm})', fontsize=font_size)
plt.title(
f'{cell_type_level}', fontsize=font_size)
plt.xlabel('Distance (\u03bcm)', fontsize=font_size)
plt.ylabel(
# Increase font size for all text in the figure
=font_size)
plt.xticks(fontsize=90)
plt.xticks(rotation=font_size)
plt.yticks(fontsize
plt.tight_layout()
f'{dataset_dir}_violin_cells_per_celltype_{cell_type_level}.png'), dpi=300,
plt.savefig(os.path.join(output_dir, ='tight',
bbox_inches=0.5)
pad_inchesf'{dataset_dir}_violin_cells_per_celltype_{cell_type_level}.svg'), dpi=300,
plt.savefig(os.path.join(output_dir, ='tight',
bbox_inches=0.5)
pad_inches
plt.show()
='area') plot_violin_cells_per_celltype(df_all_edges_with_cell_type_level, cell_type_level, os.path.join(basepath, figures_output_dir), density_norm
# Boxplots of distribution of distances by cell type and region.
def plot_distance_distribution_boxplots_by_region(df_all_edges_with_cell_type_level, cell_type_level, output_dir):
=(16, 8))
plt.figure(figsize"svg.fonttype"] = 'none' # to store text as text, not as path
plt.rcParams[# Create categorical type with only the regions that exist in the data
= [r for r in regions if r in df_all_edges_with_cell_type_level['Unique Region'].unique()]
available_regions 'Unique Region'] = pd.Categorical(
df_all_edges_with_cell_type_level['Unique Region'],
df_all_edges_with_cell_type_level[=available_regions,
categories=True
ordered
)
# Make box plot.
=df_all_edges_with_cell_type_level, x=cell_type_level, y='Distance', hue='Unique Region', showfliers=False, palette='Spectral') # viridis or Spectral palette for better color distinction
sns.boxplot(data= 10
font_size =90, ha='right', fontsize=font_size)
plt.xticks(rotation=font_size)
plt.yticks(fontsizef'Distribution of distances by {cell_type_level} and region', fontsize=font_size)
plt.title(f'{cell_type_level}', fontsize=font_size)
plt.xlabel('Distance (\u03bcm)', fontsize=font_size)
plt.ylabel(=(1, 1), loc='upper left')
plt.legend(bbox_to_anchor
plt.tight_layout()
f'{dataset_dir}_distance_distribution_boxplots_by_region_{cell_type_level}.png'), dpi=300,
plt.savefig(os.path.join(output_dir, ='tight',
bbox_inches=0.5)
pad_inchesf'{dataset_dir}_distance_distribution_boxplots_by_region_{cell_type_level}.svg'), dpi=300,
plt.savefig(os.path.join(output_dir, ='tight',
bbox_inches=0.5)
pad_inches
plt.show()
plot_distance_distribution_boxplots_by_region(df_all_edges_with_cell_type_level, cell_type_level, os.path.join(basepath, figures_output_dir))
# Boxplots of distribution of distances by cell type and region.
def plot_distance_distribution_heatmap(df_all_edges_with_cell_type_level, cell_type_level, output_dir):
= df_all_edges_with_cell_type_level.pivot_table(
pivot_data ='Distance',
values=cell_type_level,
index='Unique Region',
columns='median'
aggfunc
)
=(15, 10))
plt.figure(figsize"svg.fonttype"] = 'none' # to store text as text, not as path
plt.rcParams[=True, fmt='.1f', cmap='Spectral')
sns.heatmap(pivot_data, annotf'Heatmap of median distances by {cell_type_level}', fontsize=12)
plt.title(
= 10
font_size =90, ha='right', fontsize=font_size)
plt.xticks(rotation=font_size)
plt.yticks(fontsize
'Unique Region', fontsize=font_size)
plt.xlabel(f'{cell_type_level}', fontsize=font_size)
plt.ylabel(
plt.tight_layout()
f'{dataset_dir}_distance_distribution_heatmap_{cell_type_level}.png'), dpi=300,
plt.savefig(os.path.join(output_dir, ='tight',
bbox_inches=0.5)
pad_inchesf'{dataset_dir}_distance_distribution_heatmap_{cell_type_level}.svg'), dpi=300,
plt.savefig(os.path.join(output_dir, ='tight',
bbox_inches=0.5)
pad_inches
plt.show()
plot_distance_distribution_heatmap(df_all_edges_with_cell_type_level, cell_type_level, os.path.join(basepath, figures_output_dir))
# Generate Violin Plot per unique region.
def plot_violin_plots_all_regions(df_all_edges_with_cell_type_level, cell_type_level, output_dir, density_norm="area"):
"whitegrid")
sns.set_style("notebook", rc={"grid.linewidth": 1})
sns.set_context("svg.fonttype"] = 'none' # to store text as text, not as path
plt.rcParams[= 10
font_size = plt.subplots(6, 1, figsize=(10, 20)) # Adjusted figsize for horizontal layout
fig, axs f'Distance distribution per {cell_type_level} in `{dataset_dir}` (density normalization = {density_norm})', fontsize=font_size, y=1)
fig.suptitle(
# Keep the sequence of Cell Types consistent across plots.
= sorted(df_all_edges_with_cell_type_level[cell_type_level].unique())
cell_types
# Create a color palette based on the number of unique classes
= sns.color_palette("Spectral", n_colors=len(cell_types))
color_palette
# Create a dictionary mapping class to color
= dict(zip(cell_types, color_palette))
class_color_dict
for i, region in enumerate(regions):
= df_all_edges_with_cell_type_level[df_all_edges_with_cell_type_level['Unique Region'] == region]
data_reg =data_reg, x=cell_type_level, y="Distance", density_norm=density_norm, common_norm=True, cut=0, inner="box", split=False, palette=class_color_dict, alpha=.9, ax=axs[i], hue=cell_type_level, legend=False, order=cell_types, fill=True)
sns.violinplot(data=font_size)
axs[i].set_title(region, fontsize'', fontsize=font_size)
axs[i].set_xlabel('Distance (\u03bcm)', fontsize=font_size)
axs[i].set_ylabel(# axs[i].tick_params(axis='x', labelrotation=90, labelsize=font_size)
# only show xtick labels for the last subplot
if i < len(regions) - 1:
axs[i].set_xticklabels([])else:
=font_size, rotation=90, ha='right')
axs[i].set_xticklabels(cell_types, fontsize# axs[i].set_ylim(0, data_reg['Distance'].max() * 1.1) # Set y-limits to be consistent across all plots
='both', labelsize=font_size)
axs[i].tick_params(axis
# Use fig.text for precise label positioning
0.5, -0.02, f'{cell_type_level}', ha='center', va='bottom', fontsize=font_size)
fig.figure.text(
plt.tight_layout()
f'{dataset_dir}_violin_plots_all_regions_{cell_type_level}.png'), dpi=300,
plt.savefig(os.path.join(output_dir, ='tight',
bbox_inches=0.5)
pad_inchesf'{dataset_dir}_violin_plots_all_regions_{cell_type_level}.svg'), dpi=300,
plt.savefig(os.path.join(output_dir, ='tight',
bbox_inches=0.5)
pad_inches
plt.show()
="count") # density_norm="count" or "area" can be used based on preference. plot_violin_plots_all_regions(df_all_edges_with_cell_type_level, cell_type_level, os.path.join(basepath, figures_output_dir), density_norm
Level Two Cell Type Analysis
# Get mean, median, minimum, maximum distance per cell type in all unique regions.
= 'Level Two Cell Type'
cell_type_level = df_all_edges_with_cell_types[(df_all_edges_with_cell_types['Anchor Cell Type Level'] == cell_type_level) & (df_all_edges_with_cell_types['Anchor Cell Type'] == anchor_cell_type_dict[cell_type_level])]
df_all_edges_with_cell_type_level
= df_all_edges_with_cell_type_level.groupby([cell_type_level, 'Unique Region']).agg(
df_distance_stats_cell_type_level =('Distance', 'mean'),
mean_distance=('Distance', 'median'),
median_distance=('Distance', 'min'),
min_distance=('Distance', 'max')
max_distance
).reset_index() df_distance_stats_cell_type_level
Level Two Cell Type | Unique Region | mean_distance | median_distance | min_distance | max_distance | |
---|---|---|---|---|---|---|
0 | b cell | FSLD | 28.793129 | 18.193912 | 0.377000 | 174.062170 |
1 | b cell | KSFB | 66.936676 | 47.283075 | 0.533159 | 199.992015 |
2 | b cell | NGPL | 62.721335 | 50.108392 | 0.377000 | 199.946882 |
3 | b cell | PBVN | 43.296183 | 27.544230 | 0.377000 | 199.993437 |
4 | b cell | PKHL | 74.037735 | 63.134837 | 0.533159 | 199.996635 |
5 | b cell | XXCD | 60.047765 | 41.497409 | 0.533159 | 199.913826 |
6 | endothelial cell of lymphatic vessel | FSLD | 29.642101 | 22.399004 | 1.685995 | 170.821358 |
7 | endothelial cell of lymphatic vessel | KSFB | 61.887568 | 50.581903 | 3.243077 | 199.493211 |
8 | endothelial cell of lymphatic vessel | NGPL | 43.458134 | 39.208000 | 2.871146 | 198.593137 |
9 | endothelial cell of lymphatic vessel | PBVN | 32.893007 | 27.726806 | 3.039471 | 136.266075 |
10 | endothelial cell of lymphatic vessel | PKHL | 56.864136 | 48.143203 | 7.952824 | 183.699222 |
11 | endothelial cell of lymphatic vessel | XXCD | 36.088648 | 32.225229 | 2.639000 | 129.051896 |
12 | endothelial cell of vascular tree | FSLD | 21.913759 | 20.119229 | 0.377000 | 171.695254 |
13 | endothelial cell of vascular tree | KSFB | 29.505573 | 23.603934 | 1.131000 | 198.672919 |
14 | endothelial cell of vascular tree | NGPL | 26.593129 | 22.220624 | 0.533159 | 196.830469 |
15 | endothelial cell of vascular tree | PBVN | 23.699591 | 21.791122 | 0.533159 | 132.553972 |
16 | endothelial cell of vascular tree | PKHL | 25.871987 | 23.120276 | 2.665793 | 183.957844 |
17 | endothelial cell of vascular tree | XXCD | 25.129076 | 22.500300 | 2.871146 | 174.160943 |
18 | macrophage | FSLD | 12.964001 | 11.638253 | 0.533159 | 156.988703 |
19 | macrophage | KSFB | 19.576427 | 16.003639 | 0.377000 | 185.984841 |
20 | macrophage | NGPL | 19.867353 | 15.475379 | 0.377000 | 198.830958 |
21 | macrophage | PBVN | 16.594531 | 14.813750 | 0.377000 | 124.547586 |
22 | macrophage | PKHL | 17.784159 | 15.084712 | 0.377000 | 158.327882 |
23 | macrophage | XXCD | 16.401596 | 14.395280 | 0.377000 | 158.600096 |
24 | myeloid cell | FSLD | 12.219064 | 10.122992 | 0.377000 | 128.742602 |
25 | myeloid cell | KSFB | 19.470228 | 14.640032 | 0.377000 | 195.567455 |
26 | myeloid cell | NGPL | 18.322417 | 13.660724 | 0.377000 | 189.631000 |
27 | myeloid cell | PBVN | 14.973737 | 12.823543 | 0.377000 | 113.148371 |
28 | myeloid cell | PKHL | 17.663756 | 13.994776 | 0.377000 | 157.639204 |
29 | myeloid cell | XXCD | 15.418976 | 12.795804 | 0.377000 | 185.362891 |
30 | neutrophil | FSLD | 13.274293 | 10.616416 | 0.377000 | 140.427313 |
31 | neutrophil | KSFB | 17.949726 | 13.151844 | 0.377000 | 191.740362 |
32 | neutrophil | NGPL | 17.447778 | 12.069889 | 0.377000 | 187.014041 |
33 | neutrophil | PBVN | 16.360062 | 13.328963 | 0.377000 | 136.539595 |
34 | neutrophil | PKHL | 15.311728 | 12.492303 | 0.377000 | 151.875329 |
35 | neutrophil | XXCD | 14.615880 | 12.140337 | 0.377000 | 194.779158 |
36 | t cell | FSLD | 19.170687 | 13.723007 | 0.533159 | 173.112390 |
37 | t cell | KSFB | 37.426450 | 20.902262 | 0.377000 | 199.818891 |
38 | t cell | NGPL | 36.643007 | 21.725801 | 0.377000 | 199.150585 |
39 | t cell | PBVN | 26.348933 | 17.818984 | 0.533159 | 199.797551 |
40 | t cell | PKHL | 31.271041 | 17.670807 | 0.533159 | 199.732451 |
41 | t cell | XXCD | 19.165004 | 13.707463 | 0.377000 | 199.459011 |
42 | unknown cell | FSLD | 11.225060 | 8.328202 | 0.377000 | 160.251610 |
43 | unknown cell | KSFB | 20.793916 | 12.210378 | 0.377000 | 197.669193 |
44 | unknown cell | NGPL | 14.866453 | 9.960223 | 0.377000 | 194.954206 |
45 | unknown cell | PBVN | 20.411115 | 14.025211 | 0.377000 | 175.661774 |
46 | unknown cell | PKHL | 32.481525 | 19.752066 | 0.377000 | 199.245838 |
47 | unknown cell | XXCD | 20.205180 | 14.483867 | 0.377000 | 199.347818 |
# Get top and bottom cell types for each unique region in the dataset.
= df_all_edges_with_cell_type_level['Unique Region'].unique()
unique_regions for region in unique_regions:
= get_top_bottom_cell_types_by_mean(df_all_edges_with_cell_type_level, cell_type_level, region)
top_bottom print(f"\nTop 5 cell types in {region}:")
print(top_bottom[0])
print(f"\nBottom 5 cell types in {region}:")
print(top_bottom[1])
Top 5 cell types in FSLD:
Level Two Cell Type mean_distance
1 endothelial cell of lymphatic vessel 29.642101
0 b cell 28.793129
2 endothelial cell of vascular tree 21.913759
6 t cell 19.170687
5 neutrophil 13.274293
Bottom 5 cell types in FSLD:
Level Two Cell Type mean_distance
6 t cell 19.170687
5 neutrophil 13.274293
3 macrophage 12.964001
4 myeloid cell 12.219064
7 unknown cell 11.225060
Top 5 cell types in KSFB:
Level Two Cell Type mean_distance
0 b cell 66.936676
1 endothelial cell of lymphatic vessel 61.887568
6 t cell 37.426450
2 endothelial cell of vascular tree 29.505573
7 unknown cell 20.793916
Bottom 5 cell types in KSFB:
Level Two Cell Type mean_distance
2 endothelial cell of vascular tree 29.505573
7 unknown cell 20.793916
3 macrophage 19.576427
4 myeloid cell 19.470228
5 neutrophil 17.949726
Top 5 cell types in NGPL:
Level Two Cell Type mean_distance
0 b cell 62.721335
1 endothelial cell of lymphatic vessel 43.458134
6 t cell 36.643007
2 endothelial cell of vascular tree 26.593129
3 macrophage 19.867353
Bottom 5 cell types in NGPL:
Level Two Cell Type mean_distance
2 endothelial cell of vascular tree 26.593129
3 macrophage 19.867353
4 myeloid cell 18.322417
5 neutrophil 17.447778
7 unknown cell 14.866453
Top 5 cell types in PBVN:
Level Two Cell Type mean_distance
0 b cell 43.296183
1 endothelial cell of lymphatic vessel 32.893007
6 t cell 26.348933
2 endothelial cell of vascular tree 23.699591
7 unknown cell 20.411115
Bottom 5 cell types in PBVN:
Level Two Cell Type mean_distance
2 endothelial cell of vascular tree 23.699591
7 unknown cell 20.411115
3 macrophage 16.594531
5 neutrophil 16.360062
4 myeloid cell 14.973737
Top 5 cell types in PKHL:
Level Two Cell Type mean_distance
0 b cell 74.037735
1 endothelial cell of lymphatic vessel 56.864136
7 unknown cell 32.481525
6 t cell 31.271041
2 endothelial cell of vascular tree 25.871987
Bottom 5 cell types in PKHL:
Level Two Cell Type mean_distance
6 t cell 31.271041
2 endothelial cell of vascular tree 25.871987
3 macrophage 17.784159
4 myeloid cell 17.663756
5 neutrophil 15.311728
Top 5 cell types in XXCD:
Level Two Cell Type mean_distance
0 b cell 60.047765
1 endothelial cell of lymphatic vessel 36.088648
2 endothelial cell of vascular tree 25.129076
7 unknown cell 20.205180
6 t cell 19.165004
Bottom 5 cell types in XXCD:
Level Two Cell Type mean_distance
7 unknown cell 20.205180
6 t cell 19.165004
3 macrophage 16.401596
4 myeloid cell 15.418976
5 neutrophil 14.615880
# Get top and bottom cell types for each unique region in the dataset.
= df_all_edges_with_cell_type_level['Unique Region'].unique()
unique_regions for region in unique_regions:
= get_top_bottom_cell_types_by_median(df_all_edges_with_cell_type_level, cell_type_level, region)
top_bottom print(f"\nTop 5 cell types in {region}:")
print(top_bottom[0])
print(f"\nBottom 5 cell types in {region}:")
print(top_bottom[1])
Top 5 cell types in FSLD:
Level Two Cell Type median_distance
1 endothelial cell of lymphatic vessel 22.399004
2 endothelial cell of vascular tree 20.119229
0 b cell 18.193912
6 t cell 13.723007
3 macrophage 11.638253
Bottom 5 cell types in FSLD:
Level Two Cell Type median_distance
6 t cell 13.723007
3 macrophage 11.638253
5 neutrophil 10.616416
4 myeloid cell 10.122992
7 unknown cell 8.328202
Top 5 cell types in KSFB:
Level Two Cell Type median_distance
1 endothelial cell of lymphatic vessel 50.581903
0 b cell 47.283075
2 endothelial cell of vascular tree 23.603934
6 t cell 20.902262
3 macrophage 16.003639
Bottom 5 cell types in KSFB:
Level Two Cell Type median_distance
6 t cell 20.902262
3 macrophage 16.003639
4 myeloid cell 14.640032
5 neutrophil 13.151844
7 unknown cell 12.210378
Top 5 cell types in NGPL:
Level Two Cell Type median_distance
0 b cell 50.108392
1 endothelial cell of lymphatic vessel 39.208000
2 endothelial cell of vascular tree 22.220624
6 t cell 21.725801
3 macrophage 15.475379
Bottom 5 cell types in NGPL:
Level Two Cell Type median_distance
6 t cell 21.725801
3 macrophage 15.475379
4 myeloid cell 13.660724
5 neutrophil 12.069889
7 unknown cell 9.960223
Top 5 cell types in PBVN:
Level Two Cell Type median_distance
1 endothelial cell of lymphatic vessel 27.726806
0 b cell 27.544230
2 endothelial cell of vascular tree 21.791122
6 t cell 17.818984
3 macrophage 14.813750
Bottom 5 cell types in PBVN:
Level Two Cell Type median_distance
6 t cell 17.818984
3 macrophage 14.813750
7 unknown cell 14.025211
5 neutrophil 13.328963
4 myeloid cell 12.823543
Top 5 cell types in PKHL:
Level Two Cell Type median_distance
0 b cell 63.134837
1 endothelial cell of lymphatic vessel 48.143203
2 endothelial cell of vascular tree 23.120276
7 unknown cell 19.752066
6 t cell 17.670807
Bottom 5 cell types in PKHL:
Level Two Cell Type median_distance
7 unknown cell 19.752066
6 t cell 17.670807
3 macrophage 15.084712
4 myeloid cell 13.994776
5 neutrophil 12.492303
Top 5 cell types in XXCD:
Level Two Cell Type median_distance
0 b cell 41.497409
1 endothelial cell of lymphatic vessel 32.225229
2 endothelial cell of vascular tree 22.500300
7 unknown cell 14.483867
3 macrophage 14.395280
Bottom 5 cell types in XXCD:
Level Two Cell Type median_distance
7 unknown cell 14.483867
3 macrophage 14.395280
6 t cell 13.707463
4 myeloid cell 12.795804
5 neutrophil 12.140337
calculate_regional_variability(df_all_edges_with_cell_type_level, cell_type_level)
Regional Variability Analysis:
Mean: Average distance in each region
Std: Standard deviation of distances
CV: Coefficient of Variation (std/mean * 100%)
mean std CV (%)
Unique Region
FSLD 17.76 17.69 99.6
KSFB 32.30 37.95 117.5
NGPL 32.17 35.55 110.5
PBVN 24.70 24.92 100.9
PKHL 32.27 36.56 113.3
XXCD 24.94 29.75 119.3
Cell Type Variability Analysis (sorted by CV):
mean std CV (%)
Level Two Cell Type
unknown cell 19.98 22.82 114.2
t cell 27.79 30.32 109.1
b cell 54.90 48.74 88.8
neutrophil 15.81 13.10 82.9
myeloid cell 15.79 12.86 81.4
endothelial cell of lymphatic vessel 42.11 30.14 71.6
macrophage 16.86 11.93 70.8
endothelial cell of vascular tree 25.07 15.68 62.5
='area') plot_violin_cells_per_celltype(df_all_edges_with_cell_type_level, cell_type_level, os.path.join(basepath, figures_output_dir), density_norm
plot_distance_distribution_boxplots_by_region(df_all_edges_with_cell_type_level, cell_type_level, os.path.join(basepath, figures_output_dir))
plot_distance_distribution_heatmap(df_all_edges_with_cell_type_level, cell_type_level, os.path.join(basepath, figures_output_dir))
="count") # Or, density_norm="count" or "area" based on preference. plot_violin_plots_all_regions(df_all_edges_with_cell_type_level, cell_type_level, os.path.join(basepath, figures_output_dir), density_norm
Level Three Cell Type Analysis
# Get mean, median, minimum, maximum distance per cell type in all unique regions.
= 'Level Three Cell Type'
cell_type_level = df_all_edges_with_cell_types[(df_all_edges_with_cell_types['Anchor Cell Type Level'] == cell_type_level) & (df_all_edges_with_cell_types['Anchor Cell Type'] == anchor_cell_type_dict[cell_type_level])]
df_all_edges_with_cell_type_level
= df_all_edges_with_cell_type_level.groupby([cell_type_level, 'Unique Region']).agg(
df_distance_stats_cell_type_level =('Distance', 'mean'),
mean_distance=('Distance', 'median'),
median_distance=('Distance', 'min'),
min_distance=('Distance', 'max')
max_distance
).reset_index() df_distance_stats_cell_type_level
Level Three Cell Type | Unique Region | mean_distance | median_distance | min_distance | max_distance | |
---|---|---|---|---|---|---|
0 | b cell | FSLD | 15.017328 | 12.756869 | 0.377000 | 150.453233 |
1 | b cell | KSFB | 21.320057 | 16.424398 | 0.533159 | 191.699959 |
2 | b cell | NGPL | 21.343961 | 15.945808 | 0.377000 | 197.501949 |
3 | b cell | PBVN | 18.469944 | 15.530387 | 0.377000 | 167.593356 |
4 | b cell | PKHL | 18.087155 | 14.218947 | 0.533159 | 156.409572 |
5 | b cell | XXCD | 16.399071 | 13.592928 | 0.533159 | 147.880623 |
6 | b cell:follicular | FSLD | 44.741041 | 33.688278 | 0.533159 | 174.062170 |
7 | b cell:follicular | KSFB | 98.262190 | 94.780079 | 3.016000 | 199.992015 |
8 | b cell:follicular | NGPL | 86.597350 | 82.378598 | 0.533159 | 199.946882 |
9 | b cell:follicular | PBVN | 62.807603 | 52.209394 | 3.221089 | 199.993437 |
10 | b cell:follicular | PKHL | 83.720368 | 75.842618 | 2.744601 | 199.996635 |
11 | b cell:follicular | XXCD | 76.025086 | 66.886430 | 2.871146 | 199.913826 |
12 | cell:proliferating | FSLD | 18.250368 | 13.070542 | 2.639000 | 160.251610 |
13 | cell:proliferating | KSFB | 16.776125 | 12.308920 | 0.377000 | 194.953112 |
14 | cell:proliferating | NGPL | 16.035974 | 11.196329 | 0.377000 | 194.954206 |
15 | cell:proliferating | PBVN | 22.825014 | 15.856425 | 0.377000 | 175.661774 |
16 | cell:proliferating | PKHL | 22.113843 | 16.118679 | 0.377000 | 195.479862 |
17 | cell:proliferating | XXCD | 17.848101 | 14.508379 | 0.533159 | 199.347818 |
18 | endothelial cell of lymphatic vessel | FSLD | 29.642101 | 22.399004 | 1.685995 | 170.821358 |
19 | endothelial cell of lymphatic vessel | KSFB | 61.887568 | 50.581903 | 3.243077 | 199.493211 |
20 | endothelial cell of lymphatic vessel | NGPL | 43.458134 | 39.208000 | 2.871146 | 198.593137 |
21 | endothelial cell of lymphatic vessel | PBVN | 32.893007 | 27.726806 | 3.039471 | 136.266075 |
22 | endothelial cell of lymphatic vessel | PKHL | 56.864136 | 48.143203 | 7.952824 | 183.699222 |
23 | endothelial cell of lymphatic vessel | XXCD | 36.088648 | 32.225229 | 2.639000 | 129.051896 |
24 | endothelial cell of vascular tree | FSLD | 21.913759 | 20.119229 | 0.377000 | 171.695254 |
25 | endothelial cell of vascular tree | KSFB | 29.505573 | 23.603934 | 1.131000 | 198.672919 |
26 | endothelial cell of vascular tree | NGPL | 26.593129 | 22.220624 | 0.533159 | 196.830469 |
27 | endothelial cell of vascular tree | PBVN | 23.699591 | 21.791122 | 0.533159 | 132.553972 |
28 | endothelial cell of vascular tree | PKHL | 25.871987 | 23.120276 | 2.665793 | 183.957844 |
29 | endothelial cell of vascular tree | XXCD | 25.129076 | 22.500300 | 2.871146 | 174.160943 |
30 | macrophage | FSLD | 12.964001 | 11.638253 | 0.533159 | 156.988703 |
31 | macrophage | KSFB | 19.576427 | 16.003639 | 0.377000 | 185.984841 |
32 | macrophage | NGPL | 19.867353 | 15.475379 | 0.377000 | 198.830958 |
33 | macrophage | PBVN | 16.594531 | 14.813750 | 0.377000 | 124.547586 |
34 | macrophage | PKHL | 17.784159 | 15.084712 | 0.377000 | 158.327882 |
35 | macrophage | XXCD | 16.401596 | 14.395280 | 0.377000 | 158.600096 |
36 | myeloid cell | FSLD | 12.219064 | 10.122992 | 0.377000 | 128.742602 |
37 | myeloid cell | KSFB | 19.470228 | 14.640032 | 0.377000 | 195.567455 |
38 | myeloid cell | NGPL | 18.322417 | 13.660724 | 0.377000 | 189.631000 |
39 | myeloid cell | PBVN | 14.973737 | 12.823543 | 0.377000 | 113.148371 |
40 | myeloid cell | PKHL | 17.663756 | 13.994776 | 0.377000 | 157.639204 |
41 | myeloid cell | XXCD | 15.418976 | 12.795804 | 0.377000 | 185.362891 |
42 | neutrophil/monocyte | FSLD | 13.274293 | 10.616416 | 0.377000 | 140.427313 |
43 | neutrophil/monocyte | KSFB | 17.949726 | 13.151844 | 0.377000 | 191.740362 |
44 | neutrophil/monocyte | NGPL | 17.447778 | 12.069889 | 0.377000 | 187.014041 |
45 | neutrophil/monocyte | PBVN | 16.360062 | 13.328963 | 0.377000 | 136.539595 |
46 | neutrophil/monocyte | PKHL | 15.311728 | 12.492303 | 0.377000 | 151.875329 |
47 | neutrophil/monocyte | XXCD | 14.615880 | 12.140337 | 0.377000 | 194.779158 |
48 | t cell:cd4+ alpha-beta memory | FSLD | 30.214247 | 19.784420 | 0.842998 | 173.112390 |
49 | t cell:cd4+ alpha-beta memory | KSFB | 59.324035 | 41.748388 | 2.293201 | 199.818891 |
50 | t cell:cd4+ alpha-beta memory | NGPL | 56.807808 | 47.174738 | 2.744601 | 199.150585 |
51 | t cell:cd4+ alpha-beta memory | PBVN | 41.516702 | 27.747302 | 2.639000 | 199.797551 |
52 | t cell:cd4+ alpha-beta memory | PKHL | 56.975496 | 42.997011 | 1.922330 | 199.732451 |
53 | t cell:cd4+ alpha-beta memory | XXCD | 35.364842 | 20.820506 | 1.922330 | 199.459011 |
54 | t cell:cd8+ alpha-beta memory | FSLD | 13.932302 | 12.169569 | 0.533159 | 117.974499 |
55 | t cell:cd8+ alpha-beta memory | KSFB | 18.510546 | 15.197356 | 0.377000 | 183.973682 |
56 | t cell:cd8+ alpha-beta memory | NGPL | 18.357679 | 14.552395 | 0.377000 | 195.575450 |
57 | t cell:cd8+ alpha-beta memory | PBVN | 16.785163 | 14.871205 | 0.533159 | 127.954155 |
58 | t cell:cd8+ alpha-beta memory | PKHL | 17.167939 | 14.171389 | 0.533159 | 169.717009 |
59 | t cell:cd8+ alpha-beta memory | XXCD | 14.440700 | 12.549061 | 0.377000 | 155.671330 |
60 | unknown cell | FSLD | 9.064110 | 7.511672 | 0.377000 | 156.522210 |
61 | unknown cell | KSFB | 25.388738 | 12.022695 | 0.377000 | 197.669193 |
62 | unknown cell | NGPL | 13.683767 | 8.744450 | 0.377000 | 164.182093 |
63 | unknown cell | PBVN | 19.382652 | 13.086842 | 0.377000 | 160.660840 |
64 | unknown cell | PKHL | 39.160130 | 25.699678 | 0.377000 | 199.245838 |
65 | unknown cell | XXCD | 23.403814 | 14.417472 | 0.377000 | 199.051002 |
# Get top and bottom cell types for each unique region in the dataset.
= df_all_edges_with_cell_type_level['Unique Region'].unique()
unique_regions for region in unique_regions:
= get_top_bottom_cell_types_by_mean(df_all_edges_with_cell_type_level, cell_type_level, region)
top_bottom print(f"\nTop 5 cell types in {region}:")
print(top_bottom[0])
print(f"\nBottom 5 cell types in {region}:")
print(top_bottom[1])
Top 5 cell types in FSLD:
Level Three Cell Type mean_distance
1 b cell:follicular 44.741041
8 t cell:cd4+ alpha-beta memory 30.214247
3 endothelial cell of lymphatic vessel 29.642101
4 endothelial cell of vascular tree 21.913759
2 cell:proliferating 18.250368
Bottom 5 cell types in FSLD:
Level Three Cell Type mean_distance
9 t cell:cd8+ alpha-beta memory 13.932302
7 neutrophil/monocyte 13.274293
5 macrophage 12.964001
6 myeloid cell 12.219064
10 unknown cell 9.064110
Top 5 cell types in KSFB:
Level Three Cell Type mean_distance
1 b cell:follicular 98.262190
3 endothelial cell of lymphatic vessel 61.887568
8 t cell:cd4+ alpha-beta memory 59.324035
4 endothelial cell of vascular tree 29.505573
10 unknown cell 25.388738
Bottom 5 cell types in KSFB:
Level Three Cell Type mean_distance
5 macrophage 19.576427
6 myeloid cell 19.470228
9 t cell:cd8+ alpha-beta memory 18.510546
7 neutrophil/monocyte 17.949726
2 cell:proliferating 16.776125
Top 5 cell types in NGPL:
Level Three Cell Type mean_distance
1 b cell:follicular 86.597350
8 t cell:cd4+ alpha-beta memory 56.807808
3 endothelial cell of lymphatic vessel 43.458134
4 endothelial cell of vascular tree 26.593129
0 b cell 21.343961
Bottom 5 cell types in NGPL:
Level Three Cell Type mean_distance
9 t cell:cd8+ alpha-beta memory 18.357679
6 myeloid cell 18.322417
7 neutrophil/monocyte 17.447778
2 cell:proliferating 16.035974
10 unknown cell 13.683767
Top 5 cell types in PBVN:
Level Three Cell Type mean_distance
1 b cell:follicular 62.807603
8 t cell:cd4+ alpha-beta memory 41.516702
3 endothelial cell of lymphatic vessel 32.893007
4 endothelial cell of vascular tree 23.699591
2 cell:proliferating 22.825014
Bottom 5 cell types in PBVN:
Level Three Cell Type mean_distance
0 b cell 18.469944
9 t cell:cd8+ alpha-beta memory 16.785163
5 macrophage 16.594531
7 neutrophil/monocyte 16.360062
6 myeloid cell 14.973737
Top 5 cell types in PKHL:
Level Three Cell Type mean_distance
1 b cell:follicular 83.720368
8 t cell:cd4+ alpha-beta memory 56.975496
3 endothelial cell of lymphatic vessel 56.864136
10 unknown cell 39.160130
4 endothelial cell of vascular tree 25.871987
Bottom 5 cell types in PKHL:
Level Three Cell Type mean_distance
0 b cell 18.087155
5 macrophage 17.784159
6 myeloid cell 17.663756
9 t cell:cd8+ alpha-beta memory 17.167939
7 neutrophil/monocyte 15.311728
Top 5 cell types in XXCD:
Level Three Cell Type mean_distance
1 b cell:follicular 76.025086
3 endothelial cell of lymphatic vessel 36.088648
8 t cell:cd4+ alpha-beta memory 35.364842
4 endothelial cell of vascular tree 25.129076
10 unknown cell 23.403814
Bottom 5 cell types in XXCD:
Level Three Cell Type mean_distance
5 macrophage 16.401596
0 b cell 16.399071
6 myeloid cell 15.418976
7 neutrophil/monocyte 14.615880
9 t cell:cd8+ alpha-beta memory 14.440700
# Get top and bottom cell types for each unique region in the dataset.
= df_all_edges_with_cell_type_level['Unique Region'].unique()
unique_regions for region in unique_regions:
= get_top_bottom_cell_types_by_median(df_all_edges_with_cell_type_level, cell_type_level, region)
top_bottom print(f"\nTop 5 cell types in {region}:")
print(top_bottom[0])
print(f"\nBottom 5 cell types in {region}:")
print(top_bottom[1])
Top 5 cell types in FSLD:
Level Three Cell Type median_distance
1 b cell:follicular 33.688278
3 endothelial cell of lymphatic vessel 22.399004
4 endothelial cell of vascular tree 20.119229
8 t cell:cd4+ alpha-beta memory 19.784420
2 cell:proliferating 13.070542
Bottom 5 cell types in FSLD:
Level Three Cell Type median_distance
9 t cell:cd8+ alpha-beta memory 12.169569
5 macrophage 11.638253
7 neutrophil/monocyte 10.616416
6 myeloid cell 10.122992
10 unknown cell 7.511672
Top 5 cell types in KSFB:
Level Three Cell Type median_distance
1 b cell:follicular 94.780079
3 endothelial cell of lymphatic vessel 50.581903
8 t cell:cd4+ alpha-beta memory 41.748388
4 endothelial cell of vascular tree 23.603934
0 b cell 16.424398
Bottom 5 cell types in KSFB:
Level Three Cell Type median_distance
9 t cell:cd8+ alpha-beta memory 15.197356
6 myeloid cell 14.640032
7 neutrophil/monocyte 13.151844
2 cell:proliferating 12.308920
10 unknown cell 12.022695
Top 5 cell types in NGPL:
Level Three Cell Type median_distance
1 b cell:follicular 82.378598
8 t cell:cd4+ alpha-beta memory 47.174738
3 endothelial cell of lymphatic vessel 39.208000
4 endothelial cell of vascular tree 22.220624
0 b cell 15.945808
Bottom 5 cell types in NGPL:
Level Three Cell Type median_distance
9 t cell:cd8+ alpha-beta memory 14.552395
6 myeloid cell 13.660724
7 neutrophil/monocyte 12.069889
2 cell:proliferating 11.196329
10 unknown cell 8.744450
Top 5 cell types in PBVN:
Level Three Cell Type median_distance
1 b cell:follicular 52.209394
8 t cell:cd4+ alpha-beta memory 27.747302
3 endothelial cell of lymphatic vessel 27.726806
4 endothelial cell of vascular tree 21.791122
2 cell:proliferating 15.856425
Bottom 5 cell types in PBVN:
Level Three Cell Type median_distance
9 t cell:cd8+ alpha-beta memory 14.871205
5 macrophage 14.813750
7 neutrophil/monocyte 13.328963
10 unknown cell 13.086842
6 myeloid cell 12.823543
Top 5 cell types in PKHL:
Level Three Cell Type median_distance
1 b cell:follicular 75.842618
3 endothelial cell of lymphatic vessel 48.143203
8 t cell:cd4+ alpha-beta memory 42.997011
10 unknown cell 25.699678
4 endothelial cell of vascular tree 23.120276
Bottom 5 cell types in PKHL:
Level Three Cell Type median_distance
5 macrophage 15.084712
0 b cell 14.218947
9 t cell:cd8+ alpha-beta memory 14.171389
6 myeloid cell 13.994776
7 neutrophil/monocyte 12.492303
Top 5 cell types in XXCD:
Level Three Cell Type median_distance
1 b cell:follicular 66.886430
3 endothelial cell of lymphatic vessel 32.225229
4 endothelial cell of vascular tree 22.500300
8 t cell:cd4+ alpha-beta memory 20.820506
2 cell:proliferating 14.508379
Bottom 5 cell types in XXCD:
Level Three Cell Type median_distance
5 macrophage 14.395280
0 b cell 13.592928
6 myeloid cell 12.795804
9 t cell:cd8+ alpha-beta memory 12.549061
7 neutrophil/monocyte 12.140337
calculate_regional_variability(df_all_edges_with_cell_type_level, cell_type_level)
Regional Variability Analysis:
Mean: Average distance in each region
Std: Standard deviation of distances
CV: Coefficient of Variation (std/mean * 100%)
mean std CV (%)
Unique Region
FSLD 17.76 17.69 99.6
KSFB 32.30 37.95 117.5
NGPL 32.17 35.55 110.5
PBVN 24.70 24.92 100.9
PKHL 32.27 36.56 113.3
XXCD 24.94 29.75 119.3
Cell Type Variability Analysis (sorted by CV):
mean std CV (%)
Level Three Cell Type
unknown cell 21.03 25.66 122.0
cell:proliferating 18.51 18.05 97.5
t cell:cd4+ alpha-beta memory 47.75 40.36 84.5
neutrophil/monocyte 15.81 13.10 82.9
myeloid cell 15.79 12.86 81.4
b cell 18.27 14.68 80.4
t cell:cd8+ alpha-beta memory 16.14 11.70 72.5
endothelial cell of lymphatic vessel 42.11 30.14 71.6
macrophage 16.86 11.93 70.8
b cell:follicular 76.64 48.88 63.8
endothelial cell of vascular tree 25.07 15.68 62.5
='area') plot_violin_cells_per_celltype(df_all_edges_with_cell_type_level, cell_type_level, os.path.join(basepath, figures_output_dir), density_norm
plot_distance_distribution_boxplots_by_region(df_all_edges_with_cell_type_level, cell_type_level, os.path.join(basepath, figures_output_dir))
plot_distance_distribution_heatmap(df_all_edges_with_cell_type_level, cell_type_level, os.path.join(basepath, figures_output_dir))
="count") # Or, density_norm="count" or "area" based on preference. plot_violin_plots_all_regions(df_all_edges_with_cell_type_level, cell_type_level, os.path.join(basepath, figures_output_dir), density_norm