import numpy as np
import pandas as pd
import os
import json
import requests
import shutil
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import normalize
import plotly.express as px
from _cde_compute_edges_from_nodes import *
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
# suppress warnings
import warnings
warnings.filterwarnings("ignore")Distance Analysis: codex-intestine-stanford
Analyze and visualize cell-to-nearest-endothelial-cell distance distributions for the
codex-intestine-stanford dataset.
basepath = "/u/yashjain/hra-cell-distance-analysis/data"
dataset_dir = "intestine-codex-stanford"
data_filedir = os.path.join("data-processed-nodes-with-harmonized-cell-types", dataset_dir)
output_edge_dir = os.path.join("data-processed-edges", dataset_dir)
figures_output_dir = "generated-figures"# Function to load your data
def load_data(path, edges=False):
if edges:
column_names = ['cell_id', 'x1', 'y1', 'z1', 'x2', 'y2', 'z2']
data = pd.read_csv(path, header=None, names=column_names)
else:
data = pd.read_csv(path)
return data# Function to read all files ending with "-nodes.csv" in the `data_filedir` directory into a single DataFrame.
# Another additional column `Dataset` is added to identify the dataset name which comes from the filename before the `-nodes.csv` suffix.
# Additionally, function reads all files ending with "-edges.csv" in the `output_edge_dir` directory into a single DataFrame.
# Three additional columns are added "Dataset", "Anchor Cell Type", and "Anchor Cell Type Level" to identify the dataset name, anchor cell type, and anchor cell type level respectively which come from the filename before the `.csv` suffix.
# The three additional columns are created by splitting the filename on the `-` character, and extracting the relevant parts.
# On splitting, the first part is the dataset name, second part is the anchor cell type level, and third part is the anchor cell type, and last part is the `edges` suffix.
# When reading files, check if the file has the correct format (i.e., ends with `-edges.csv`).
# Additionally, the function merges the edges DataFrame with the nodes DataFrame to get the cell type information for the anchor cells.
# This is done by reading the corresponding nodes file from the `data_filedir` directory for each edges file, and merging it with the edges DataFrame on the `cell_id` column.
# The merged DataFrame contains the edges with additional columns for the cell type information.
# The function returns three DataFrames:
# 1. `merged_nodes`: DataFrame containing all nodes with an additional column `Dataset`.
# 2. `merged_edges`: DataFrame containing all edges with additional columns `Dataset`, `Anchor Cell Type`, and `Anchor Cell Type Level`.
# 3. `merged_nodes_for_all_edges`: DataFrame containing all edges with additional columns `Dataset`, `Anchor Cell Type`, `Anchor Cell Type Level`, and the cell type information for cells.
def read_all_edge_datasets(basepath, data_filedir, output_edge_dir):
all_nodes_files = []
all_edges_files = []
all_nodes_edges_files = []
for file in os.listdir(os.path.join(basepath, output_edge_dir)):
if file.endswith("-edges.csv"):
file_path = os.path.join(basepath, output_edge_dir, file)
dataset_name, anchor_cell_type_level, anchor_cell_type = file.replace("-edges.csv", "").split('-')
edges_df = load_data(file_path, edges=False)
edges_df['Dataset'] = dataset_name
edges_df['Anchor Cell Type'] = anchor_cell_type
edges_df['Anchor Cell Type Level'] = anchor_cell_type_level
edges_df.rename(columns={"distance": "Distance"}, inplace=True) # Rename column "distance" to "Distance".
all_edges_files.append(edges_df)
# Read the corresponding nodes file from data_filedir to get the cell type information
nodes_file_path = os.path.join(basepath, data_filedir, f"{dataset_name}-nodes.csv")
nodes_df = load_data(nodes_file_path)
nodes_df['Dataset'] = dataset_name
all_nodes_files.append(nodes_df)
# Add a new 'cell_id' column to nodes_df
nodes_df['cell_id'] = range(len(nodes_df))
# Set 'cell_id' column as index for nodes_df
nodes_df.set_index('cell_id', inplace=True)
# Merge edges_df with nodes_df to get the cell type information for the anchor cells
edges_nodes_df = pd.merge(edges_df, nodes_df[['Level Three Cell Type', 'Level Two Cell Type', 'Level One Cell Type']], how='left', left_on='cell_id', right_index=True)
all_nodes_edges_files.append(edges_nodes_df)
merged_edges = pd.concat(all_edges_files, ignore_index=True)
merged_nodes = pd.concat(all_nodes_files, ignore_index=True)
merged_nodes_for_all_edges = pd.concat(all_nodes_edges_files, ignore_index=True)
return merged_nodes, merged_edges, merged_nodes_for_all_edgesdef create_directory(directory):
if not os.path.exists(directory):
os.makedirs(directory)
print(f"Directory '{directory}' created successfully.")
else:
print(f"Directory '{directory}' already exists.")Get initial statistics and identify endothelial cell categories for dataset.
df_all_nodes, df_all_edges, df_all_edges_with_cell_types = read_all_edge_datasets(basepath, data_filedir, output_edge_dir)df_all_nodes.head(5)| x | y | Original Cell Type | Level Three Cell Type | Level Three CL Label | Level Three CL ID | CL_Match/3 | Level Two Cell Type | Level Two CL Label | Level Two CL ID | CL_Match/2 | Level One Cell Type | Level One CL Label | Level One CL ID | CL_Match/1 | Dataset | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1503.64128 | 1278.32154 | NK | natural killer cell | natural killer cell | CL:0000623 | skos:exactMatch | natural killer cell | natural killer cell | CL:0000623 | skos:exactMatch | immune cell | leukocyte | CL:0000738 | skos:exactMatch | B004_Ascending |
| 1 | 1958.05496 | 1553.46072 | NK | natural killer cell | natural killer cell | CL:0000623 | skos:exactMatch | natural killer cell | natural killer cell | CL:0000623 | skos:exactMatch | immune cell | leukocyte | CL:0000738 | skos:exactMatch | B004_Ascending |
| 2 | 2290.93940 | 1187.36332 | NK | natural killer cell | natural killer cell | CL:0000623 | skos:exactMatch | natural killer cell | natural killer cell | CL:0000623 | skos:exactMatch | immune cell | leukocyte | CL:0000738 | skos:exactMatch | B004_Ascending |
| 3 | 2863.48554 | 891.08862 | NK | natural killer cell | natural killer cell | CL:0000623 | skos:exactMatch | natural killer cell | natural killer cell | CL:0000623 | skos:exactMatch | immune cell | leukocyte | CL:0000738 | skos:exactMatch | B004_Ascending |
| 4 | 2563.43664 | 1468.54122 | NK | natural killer cell | natural killer cell | CL:0000623 | skos:exactMatch | natural killer cell | natural killer cell | CL:0000623 | skos:exactMatch | immune cell | leukocyte | CL:0000738 | skos:exactMatch | B004_Ascending |
# Print the total number of unique cell types per dataset. Compute separately for each cell type column (Level One Cell Type, Level Two Cell Type, Level Three Cell Type, Original Cell Type).
print("Total number of unique cell types per cell type annnotation level:")
unique_cell_types = {
'Original Cell Type': df_all_nodes['Original Cell Type'].nunique(),
'Level Three Cell Type': df_all_nodes['Level Three Cell Type'].nunique(),
'Level Two Cell Type': df_all_nodes['Level Two Cell Type'].nunique(),
'Level One Cell Type': df_all_nodes['Level One Cell Type'].nunique()
}
for cell_type, count in unique_cell_types.items():
print(f"{cell_type}: {count}")Total number of unique cell types per cell type annnotation level:
Original Cell Type: 25
Level Three Cell Type: 25
Level Two Cell Type: 17
Level One Cell Type: 5
# Save the unique cell types containing "endothelial" in name per cell type column (Level One Cell Type, Level Two Cell Type, Level Three Cell Type, Original Cell Type) to a dictionary where the key is the level and the value is a list of unique cell types.
endothelial_cell_types = {
'Original Cell Type': df_all_nodes[df_all_nodes['Original Cell Type'].str.contains("endothelial", case=False, na=False)]['Original Cell Type'].unique().tolist(),
'Level Three Cell Type': df_all_nodes[df_all_nodes['Level Three Cell Type'].str.contains("endothelial", case=False, na=False)]['Level Three Cell Type'].unique().tolist(),
'Level Two Cell Type': df_all_nodes[df_all_nodes['Level Two Cell Type'].str.contains("endothelial", case=False, na=False)]['Level Two Cell Type'].unique().tolist(),
'Level One Cell Type': df_all_nodes[df_all_nodes['Level One Cell Type'].str.contains("endothelial", case=False, na=False)]['Level One Cell Type'].unique().tolist()
}
print("\nEndothelial cell types per cell type annotation level:")
for level, cell_types in endothelial_cell_types.items():
print(f"\n{level}:")
for cell in cell_types:
print(f" - {cell}")
Endothelial cell types per cell type annotation level:
Original Cell Type:
- Endothelial
Level Three Cell Type:
- endothelial cell of lymphatic vessel
- endothelial cell
Level Two Cell Type:
- endothelial cell of lymphatic vessel
- endothelial cell
Level One Cell Type:
- endothelial cell
type_field_list = ["Level Three Cell Type", "Level Two Cell Type", "Level One Cell Type"] # Skipping Original Cell Type as it is not a hierarchical level.
# Define the anchor cell type (type of endothelial cell) for each level in type_field_list based on available categories in the previous cell. The distance analysis at all three levels will be limited to the specified anchor cell type.
anchor_cell_type_dict = {
'Level Three Cell Type': 'endothelial cell',
'Level Two Cell Type': 'endothelial cell',
'Level One Cell Type': 'endothelial cell'
}Process datasets to add region information to Nodes files.
# List of regions (based on filenames) in small intestine (si) and large intestine (li).
si = ['Duodenum', 'Ileum', 'Mid', 'ProximalJejunum', 'Midjejunum', 'Proximaljejunum']
li = ['Ascending', 'Descending', 'Transverse', 'Left', 'Right', 'Sigmoid', 'Trans']
# Create a dictionary to map si and li regions to correct region names.
region_map = {
'Duodenum': 'Duodenum',
'Ileum': 'Ileum',
'Mid': 'Mid Jejunum',
'ProximalJejunum': 'Proximal Jejunum',
'Midjejunum': 'Mid Jejunum',
'Proximaljejunum': 'Proximal Jejunum',
'Ascending': 'Ascending',
'Descending': 'Descending',
'Transverse': 'Transverse',
'Left': 'Descending',
'Right': 'Ascending',
'Sigmoid': 'Sigmoid',
'Trans': 'Transverse'
}df_all_nodes.head()| x | y | Original Cell Type | Level Three Cell Type | Level Three CL Label | Level Three CL ID | CL_Match/3 | Level Two Cell Type | Level Two CL Label | Level Two CL ID | CL_Match/2 | Level One Cell Type | Level One CL Label | Level One CL ID | CL_Match/1 | Dataset | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1503.64128 | 1278.32154 | NK | natural killer cell | natural killer cell | CL:0000623 | skos:exactMatch | natural killer cell | natural killer cell | CL:0000623 | skos:exactMatch | immune cell | leukocyte | CL:0000738 | skos:exactMatch | B004_Ascending |
| 1 | 1958.05496 | 1553.46072 | NK | natural killer cell | natural killer cell | CL:0000623 | skos:exactMatch | natural killer cell | natural killer cell | CL:0000623 | skos:exactMatch | immune cell | leukocyte | CL:0000738 | skos:exactMatch | B004_Ascending |
| 2 | 2290.93940 | 1187.36332 | NK | natural killer cell | natural killer cell | CL:0000623 | skos:exactMatch | natural killer cell | natural killer cell | CL:0000623 | skos:exactMatch | immune cell | leukocyte | CL:0000738 | skos:exactMatch | B004_Ascending |
| 3 | 2863.48554 | 891.08862 | NK | natural killer cell | natural killer cell | CL:0000623 | skos:exactMatch | natural killer cell | natural killer cell | CL:0000623 | skos:exactMatch | immune cell | leukocyte | CL:0000738 | skos:exactMatch | B004_Ascending |
| 4 | 2563.43664 | 1468.54122 | NK | natural killer cell | natural killer cell | CL:0000623 | skos:exactMatch | natural killer cell | natural killer cell | CL:0000623 | skos:exactMatch | immune cell | leukocyte | CL:0000738 | skos:exactMatch | B004_Ascending |
# Iterate through the df_all_data dataframe to create new columns "Donor" and "Unique Region" based on the "Dataset" column.
# The "Donor" column is created by extracting the donor name from the "Dataset" column, for example B004 from B004_Duodenum.
# The "Unique Region" column is created by mapping the region names based on the full dataset name.
df_all_nodes['Donor'] = df_all_nodes['Dataset'].str.split('_').str[0]
df_all_nodes['Unique Region'] = df_all_nodes['Dataset'].str.split('_').str[1].map(region_map)
# Check if the new columns are created correctly.
df_all_nodes[['Dataset', 'Donor', 'Unique Region']].head(5)| Dataset | Donor | Unique Region | |
|---|---|---|---|
| 0 | B004_Ascending | B004 | Ascending |
| 1 | B004_Ascending | B004 | Ascending |
| 2 | B004_Ascending | B004 | Ascending |
| 3 | B004_Ascending | B004 | Ascending |
| 4 | B004_Ascending | B004 | Ascending |
# Iterate through the df_all_data dataframe to create a new column "Tissue Subtype" based on Dataset column value after "_".
df_all_nodes['Tissue Subtype'] = df_all_nodes['Dataset'].str.split('_').str[1].apply(lambda x: 'Small Intestine' if x in si else ('Large Intestine' if x in li else 'Unknown'))# Print all unique regions in the data.
print("\nUnique Regions in the data:")
print(df_all_nodes['Unique Region'].unique())
# Print all unique donors in the data.
print("\nUnique Donors in the data:")
print(df_all_nodes['Donor'].unique())
# Print unique values in Tissue Subtype.
print("\nUnique Tissue Subtypes in the data:")
print(df_all_nodes['Tissue Subtype'].unique())
# Print number of donors in small intestine and large intestine.
print("\nNumber of donors in Small Intestine:")
print(df_all_nodes[df_all_nodes['Tissue Subtype'] == 'Small Intestine']['Donor'].nunique())
print("\nNumber of donors in Large Intestine:")
print(df_all_nodes[df_all_nodes['Tissue Subtype'] == 'Large Intestine']['Donor'].nunique())
# Print the total number of unique donors and unique regions.
print(f"\nTotal number of unique donors: {df_all_nodes['Donor'].nunique()}")
print(f"\nTotal number of unique donors: {df_all_nodes['Donor'].nunique()}")
print(f"Total number of unique regions: {df_all_nodes['Unique Region'].nunique()}")
# Print number of unique datasets per small intestine and large intestine.
print(f"\nTotal number of unique datasets in Small Intestine: {df_all_nodes[df_all_nodes['Tissue Subtype'] == 'Small Intestine']['Dataset'].nunique()}")
print(f"Total number of unique datasets in Large Intestine: {df_all_nodes[df_all_nodes['Tissue Subtype'] == 'Large Intestine']['Dataset'].nunique()}")
Unique Regions in the data:
['Ascending' 'Descending' 'Duodenum' 'Ileum' 'Mid Jejunum'
'Proximal Jejunum' 'Transverse' 'Sigmoid']
Unique Donors in the data:
['B004' 'B005' 'B006' 'B009' 'B010' 'B011' 'B012' 'B008']
Unique Tissue Subtypes in the data:
['Large Intestine' 'Small Intestine']
Number of donors in Small Intestine:
8
Number of donors in Large Intestine:
8
Total number of unique donors: 8
Total number of unique donors: 8
Total number of unique regions: 8
Total number of unique datasets in Small Intestine: 32
Total number of unique datasets in Large Intestine: 32
Process datasets to add region information to Edges files.
df_all_edges.head(5)| cell_id | x1 | y1 | z1 | x2 | y2 | z2 | Distance | Dataset | Anchor Cell Type | Anchor Cell Type Level | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 1503.64128 | 1278.32154 | 0 | 1541.00586 | 1229.63436 | 0 | 61.372252 | B004_Ascending | endothelial cell of lymphatic vessel | Level Three Cell Type |
| 1 | 726 | 1428.15728 | 1210.38594 | 0 | 1404.75724 | 1149.62132 | 0 | 65.114522 | B004_Ascending | endothelial cell of lymphatic vessel | Level Three Cell Type |
| 2 | 727 | 1428.15728 | 1216.80208 | 0 | 1404.75724 | 1149.62132 | 0 | 71.139415 | B004_Ascending | endothelial cell of lymphatic vessel | Level Three Cell Type |
| 3 | 729 | 1433.06374 | 1202.83754 | 0 | 1404.75724 | 1149.62132 | 0 | 60.276231 | B004_Ascending | endothelial cell of lymphatic vessel | Level Three Cell Type |
| 4 | 730 | 1437.21536 | 1248.50536 | 0 | 1404.75724 | 1149.62132 | 0 | 104.074891 | B004_Ascending | endothelial cell of lymphatic vessel | Level Three Cell Type |
# Process the edge data to create new columns "Donor", "Unique Region" and Tissue Subtype based on the "Dataset" column, similar to how it was done for the node data.
df_all_edges['Donor'] = df_all_edges['Dataset'].str.split('_').str[0]
df_all_edges['Unique Region'] = df_all_edges['Dataset'].str.split('_').str[1].map(region_map)
df_all_edges['Tissue Subtype'] = df_all_edges['Dataset'].str.split('_').str[1].apply(lambda x: 'Small Intestine' if x in si else ('Large Intestine' if x in li else 'Unknown'))
# Check if the new columns are created correctly.
df_all_edges[['Dataset', 'Donor', 'Unique Region', 'Tissue Subtype']].head(5)| Dataset | Donor | Unique Region | Tissue Subtype | |
|---|---|---|---|---|
| 0 | B004_Ascending | B004 | Ascending | Large Intestine |
| 1 | B004_Ascending | B004 | Ascending | Large Intestine |
| 2 | B004_Ascending | B004 | Ascending | Large Intestine |
| 3 | B004_Ascending | B004 | Ascending | Large Intestine |
| 4 | B004_Ascending | B004 | Ascending | Large Intestine |
# Print all unique regions in the data.
print("\nUnique Regions in the data:")
print(df_all_edges['Unique Region'].unique())
# Print all unique donors in the data.
print("\nUnique Donors in the data:")
print(df_all_edges['Donor'].unique())
# Print unique values in Tissue Subtype.
print("\nUnique Tissue Subtypes in the data:")
print(df_all_edges['Tissue Subtype'].unique())
# Print number of donors in small intestine and large intestine.
print("\nNumber of donors in Small Intestine:")
print(df_all_edges[df_all_edges['Tissue Subtype'] == 'Small Intestine']['Donor'].nunique())
print("\nNumber of donors in Large Intestine:")
print(df_all_edges[df_all_edges['Tissue Subtype'] == 'Large Intestine']['Donor'].nunique())
# Print the total number of unique donors and unique regions.
print(f"\nTotal number of unique donors: {df_all_edges['Donor'].nunique()}")
print(f"\nTotal number of unique donors: {df_all_edges['Donor'].nunique()}")
print(f"Total number of unique regions: {df_all_edges['Unique Region'].nunique()}")
# Print number of unique datasets per small intestine and large intestine.
print(f"\nTotal number of unique datasets in Small Intestine: {df_all_edges[df_all_edges['Tissue Subtype'] == 'Small Intestine']['Dataset'].nunique()}")
print(f"Total number of unique datasets in Large Intestine: {df_all_edges[df_all_edges['Tissue Subtype'] == 'Large Intestine']['Dataset'].nunique()}")
Unique Regions in the data:
['Ascending' 'Descending' 'Duodenum' 'Ileum' 'Mid Jejunum'
'Proximal Jejunum' 'Transverse' 'Sigmoid']
Unique Donors in the data:
['B004' 'B005' 'B006' 'B009' 'B010' 'B011' 'B012' 'B008']
Unique Tissue Subtypes in the data:
['Large Intestine' 'Small Intestine']
Number of donors in Small Intestine:
8
Number of donors in Large Intestine:
8
Total number of unique donors: 8
Total number of unique donors: 8
Total number of unique regions: 8
Total number of unique datasets in Small Intestine: 32
Total number of unique datasets in Large Intestine: 32
df_all_edges_with_cell_types['Donor'] = df_all_edges_with_cell_types['Dataset'].str.split('_').str[0]
df_all_edges_with_cell_types['Unique Region'] = df_all_edges_with_cell_types['Dataset'].str.split('_').str[1].map(region_map)
df_all_edges_with_cell_types['Tissue Subtype'] = df_all_edges_with_cell_types['Dataset'].str.split('_').str[1].apply(lambda x: 'Small Intestine' if x in si else ('Large Intestine' if x in li else 'Unknown'))
# Check if the new columns are created correctly.
df_all_edges_with_cell_types[['Dataset', 'Donor', 'Unique Region', 'Tissue Subtype']].head(5)| Dataset | Donor | Unique Region | Tissue Subtype | |
|---|---|---|---|---|
| 0 | B004_Ascending | B004 | Ascending | Large Intestine |
| 1 | B004_Ascending | B004 | Ascending | Large Intestine |
| 2 | B004_Ascending | B004 | Ascending | Large Intestine |
| 3 | B004_Ascending | B004 | Ascending | Large Intestine |
| 4 | B004_Ascending | B004 | Ascending | Large Intestine |
df_all_nodes.head(1)| x | y | Original Cell Type | Level Three Cell Type | Level Three CL Label | Level Three CL ID | CL_Match/3 | Level Two Cell Type | Level Two CL Label | Level Two CL ID | CL_Match/2 | Level One Cell Type | Level One CL Label | Level One CL ID | CL_Match/1 | Dataset | Donor | Unique Region | Tissue Subtype | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1503.64128 | 1278.32154 | NK | natural killer cell | natural killer cell | CL:0000623 | skos:exactMatch | natural killer cell | natural killer cell | CL:0000623 | skos:exactMatch | immune cell | leukocyte | CL:0000738 | skos:exactMatch | B004_Ascending | B004 | Ascending | Large Intestine |
df_all_edges.head(1)| cell_id | x1 | y1 | z1 | x2 | y2 | z2 | Distance | Dataset | Anchor Cell Type | Anchor Cell Type Level | Donor | Unique Region | Tissue Subtype | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 1503.64128 | 1278.32154 | 0 | 1541.00586 | 1229.63436 | 0 | 61.372252 | B004_Ascending | endothelial cell of lymphatic vessel | Level Three Cell Type | B004 | Ascending | Large Intestine |
df_all_edges_with_cell_types.head(1)| cell_id | x1 | y1 | z1 | x2 | y2 | z2 | Distance | Dataset | Anchor Cell Type | Anchor Cell Type Level | Level Three Cell Type | Level Two Cell Type | Level One Cell Type | Donor | Unique Region | Tissue Subtype | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 1503.64128 | 1278.32154 | 0 | 1541.00586 | 1229.63436 | 0 | 61.372252 | B004_Ascending | endothelial cell of lymphatic vessel | Level Three Cell Type | natural killer cell | natural killer cell | immune cell | B004 | Ascending | Large Intestine |
Node Analysis
# Plot number of cells per cell type in large intestine in the same plot. Color by cell type and unique region. Output figure saved in existing `figures_output_dir`.
def plot_cells_per_celltype(df, type_field, intestine_type, output_dir):
plt.figure(figsize=(14, 8))
plt.rcParams["svg.fonttype"] = 'none' # to store text as text, not as path
sns.countplot(data=df[df['Tissue Subtype'] == intestine_type], x=type_field, palette='Spectral', hue='Unique Region')
plt.title(f'Number of Cells per {type_field} in {intestine_type}')
plt.xticks(rotation=90)
plt.tight_layout()
plt.savefig(os.path.join(output_dir, f'{dataset_dir}_cells_per_celltype_{type_field}_{intestine_type}.png'), dpi=300,
bbox_inches='tight',
pad_inches=0.5)
plt.savefig(os.path.join(output_dir, f'{dataset_dir}_cells_per_celltype_{type_field}_{intestine_type}.svg'), dpi=300,
bbox_inches='tight',
pad_inches=0.5)
plt.legend(title='Unique Region', bbox_to_anchor=(0.85, 1), loc='upper left')
plt.xlabel(type_field)
plt.ylabel('Number of Cells')
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
# Show the plot
plt.show()
plt.close()
for type_field in type_field_list:
plot_cells_per_celltype(df_all_nodes, type_field, 'Large Intestine', os.path.join(basepath, figures_output_dir))
# Plot number of cells per cell type in small intestine in the same plot. Color by cell type and unique region. Output figure saved in existing `figures_output_dir`.
for type_field in type_field_list:
plot_cells_per_celltype(df_all_nodes, type_field, 'Small Intestine', os.path.join(basepath, figures_output_dir))





Distance Analysis
# Get mean, median, minimum, maximum distance per unique region in each tissue subtype per anchor cell type.
df_distance_stats = df_all_edges_with_cell_types.groupby(['Unique Region', 'Tissue Subtype', 'Anchor Cell Type', 'Anchor Cell Type Level']).agg(
mean_distance=('Distance', 'mean'),
median_distance=('Distance', 'median'),
min_distance=('Distance', 'min'),
max_distance=('Distance', 'max')
).reset_index()
# Print the first few rows of the distance statistics DataFrame.
df_distance_stats| Unique Region | Tissue Subtype | Anchor Cell Type | Anchor Cell Type Level | mean_distance | median_distance | min_distance | max_distance | |
|---|---|---|---|---|---|---|---|---|
| 0 | Ascending | Large Intestine | endothelial cell | Level One Cell Type | 32.465985 | 23.412212 | 0.377420 | 199.984527 |
| 1 | Ascending | Large Intestine | endothelial cell | Level Three Cell Type | 41.197999 | 31.965045 | 0.533752 | 199.984527 |
| 2 | Ascending | Large Intestine | endothelial cell | Level Two Cell Type | 41.197999 | 31.965045 | 0.533752 | 199.984527 |
| 3 | Ascending | Large Intestine | endothelial cell of lymphatic vessel | Level Three Cell Type | 48.645392 | 36.122071 | 0.377420 | 199.984527 |
| 4 | Ascending | Large Intestine | endothelial cell of lymphatic vessel | Level Two Cell Type | 48.645392 | 36.122071 | 0.377420 | 199.984527 |
| 5 | Descending | Large Intestine | endothelial cell | Level One Cell Type | 33.751636 | 24.981099 | 0.000000 | 199.998416 |
| 6 | Descending | Large Intestine | endothelial cell | Level Three Cell Type | 40.669880 | 31.271246 | 0.000000 | 199.998416 |
| 7 | Descending | Large Intestine | endothelial cell | Level Two Cell Type | 40.669880 | 31.271246 | 0.000000 | 199.998416 |
| 8 | Descending | Large Intestine | endothelial cell of lymphatic vessel | Level Three Cell Type | 55.376327 | 44.605871 | 0.377420 | 199.985951 |
| 9 | Descending | Large Intestine | endothelial cell of lymphatic vessel | Level Two Cell Type | 55.376327 | 44.605871 | 0.377420 | 199.985951 |
| 10 | Duodenum | Small Intestine | endothelial cell | Level One Cell Type | 32.457991 | 22.898541 | 0.533752 | 199.954608 |
| 11 | Duodenum | Small Intestine | endothelial cell | Level Three Cell Type | 36.243342 | 26.615470 | 0.533752 | 199.987020 |
| 12 | Duodenum | Small Intestine | endothelial cell | Level Two Cell Type | 36.243342 | 26.615470 | 0.533752 | 199.987020 |
| 13 | Duodenum | Small Intestine | endothelial cell of lymphatic vessel | Level Three Cell Type | 57.516441 | 45.767525 | 0.843937 | 199.997703 |
| 14 | Duodenum | Small Intestine | endothelial cell of lymphatic vessel | Level Two Cell Type | 57.516441 | 45.767525 | 0.843937 | 199.997703 |
| 15 | Ileum | Small Intestine | endothelial cell | Level One Cell Type | 32.578045 | 22.318897 | 0.000000 | 199.975979 |
| 16 | Ileum | Small Intestine | endothelial cell | Level Three Cell Type | 37.434302 | 27.216143 | 0.000000 | 199.977404 |
| 17 | Ileum | Small Intestine | endothelial cell | Level Two Cell Type | 37.434302 | 27.216143 | 0.000000 | 199.977404 |
| 18 | Ileum | Small Intestine | endothelial cell of lymphatic vessel | Level Three Cell Type | 51.209250 | 39.180850 | 0.000000 | 199.977404 |
| 19 | Ileum | Small Intestine | endothelial cell of lymphatic vessel | Level Two Cell Type | 51.209250 | 39.180850 | 0.000000 | 199.977404 |
| 20 | Mid Jejunum | Small Intestine | endothelial cell | Level One Cell Type | 37.485418 | 24.636595 | 0.000000 | 199.994498 |
| 21 | Mid Jejunum | Small Intestine | endothelial cell | Level Three Cell Type | 41.185966 | 29.137215 | 0.000000 | 199.987020 |
| 22 | Mid Jejunum | Small Intestine | endothelial cell | Level Two Cell Type | 41.185966 | 29.137215 | 0.000000 | 199.987020 |
| 23 | Mid Jejunum | Small Intestine | endothelial cell of lymphatic vessel | Level Three Cell Type | 55.117326 | 41.727532 | 0.000000 | 199.997703 |
| 24 | Mid Jejunum | Small Intestine | endothelial cell of lymphatic vessel | Level Two Cell Type | 55.117326 | 41.727532 | 0.000000 | 199.997703 |
| 25 | Proximal Jejunum | Small Intestine | endothelial cell | Level One Cell Type | 36.900594 | 24.532300 | 0.000000 | 199.983458 |
| 26 | Proximal Jejunum | Small Intestine | endothelial cell | Level Three Cell Type | 41.262498 | 29.392756 | 0.000000 | 199.974554 |
| 27 | Proximal Jejunum | Small Intestine | endothelial cell | Level Two Cell Type | 41.262498 | 29.392756 | 0.000000 | 199.974554 |
| 28 | Proximal Jejunum | Small Intestine | endothelial cell of lymphatic vessel | Level Three Cell Type | 56.616775 | 42.250816 | 0.533752 | 199.987020 |
| 29 | Proximal Jejunum | Small Intestine | endothelial cell of lymphatic vessel | Level Two Cell Type | 56.616775 | 42.250816 | 0.533752 | 199.987020 |
| 30 | Sigmoid | Large Intestine | endothelial cell | Level One Cell Type | 37.371448 | 28.286364 | 1.687874 | 199.975979 |
| 31 | Sigmoid | Large Intestine | endothelial cell | Level Three Cell Type | 48.924570 | 38.727412 | 1.687874 | 199.984527 |
| 32 | Sigmoid | Large Intestine | endothelial cell | Level Two Cell Type | 48.924570 | 38.727412 | 1.687874 | 199.984527 |
| 33 | Sigmoid | Large Intestine | endothelial cell of lymphatic vessel | Level Three Cell Type | 59.624433 | 45.405053 | 2.416667 | 199.984527 |
| 34 | Sigmoid | Large Intestine | endothelial cell of lymphatic vessel | Level Two Cell Type | 59.624433 | 45.405053 | 2.416667 | 199.984527 |
| 35 | Transverse | Large Intestine | endothelial cell | Level One Cell Type | 34.822264 | 25.800184 | 0.377420 | 199.977404 |
| 36 | Transverse | Large Intestine | endothelial cell | Level Three Cell Type | 44.694353 | 35.455390 | 0.843937 | 199.977404 |
| 37 | Transverse | Large Intestine | endothelial cell | Level Two Cell Type | 44.694353 | 35.455390 | 0.843937 | 199.977404 |
| 38 | Transverse | Large Intestine | endothelial cell of lymphatic vessel | Level Three Cell Type | 53.638178 | 39.751124 | 0.377420 | 199.997703 |
| 39 | Transverse | Large Intestine | endothelial cell of lymphatic vessel | Level Two Cell Type | 53.638178 | 39.751124 | 0.377420 | 199.997703 |
Level One Cell Type Analysis
# Get mean, median, minimum, maximum distance per cell type in all tissue subtypes.
cell_type_level = 'Level One Cell Type'
df_all_edges_with_cell_type_level = df_all_edges_with_cell_types[(df_all_edges_with_cell_types['Anchor Cell Type Level'] == cell_type_level) & (df_all_edges_with_cell_types['Anchor Cell Type'] == anchor_cell_type_dict[cell_type_level])]
df_distance_stats_cell_type_level = df_all_edges_with_cell_type_level.groupby([cell_type_level, 'Tissue Subtype']).agg(
mean_distance=('Distance', 'mean'),
median_distance=('Distance', 'median'),
min_distance=('Distance', 'min'),
max_distance=('Distance', 'max')
).reset_index()
df_distance_stats_cell_type_level| Level One Cell Type | Tissue Subtype | mean_distance | median_distance | min_distance | max_distance | |
|---|---|---|---|---|---|---|
| 0 | epithelial cell | Large Intestine | 32.875946 | 26.085702 | 0.377420 | 199.984527 |
| 1 | epithelial cell | Small Intestine | 43.213764 | 29.083389 | 0.000000 | 199.994498 |
| 2 | immune cell | Large Intestine | 25.267128 | 18.151509 | 0.000000 | 199.947128 |
| 3 | immune cell | Small Intestine | 26.838213 | 17.229543 | 0.000000 | 199.987020 |
| 4 | mesenchymal cell | Large Intestine | 42.273309 | 32.458120 | 0.000000 | 199.998416 |
| 5 | mesenchymal cell | Small Intestine | 32.811427 | 24.166671 | 0.000000 | 199.793187 |
| 6 | neural cell | Large Intestine | 37.306252 | 28.396936 | 0.377420 | 198.398031 |
| 7 | neural cell | Small Intestine | 27.669692 | 20.812925 | 0.533752 | 198.517180 |
# Get top five and bottom five cell types with respect to mean distance in small intestine and large intestine separately.
def get_top_bottom_cell_types(df, cell_type_level, tissue_subtype, top_n=5):
# Filter the DataFrame for the specified tissue subtype and cell type level
df_filtered = df[df['Tissue Subtype'] == tissue_subtype]
# Group by the specified cell type level and calculate mean distance
df_grouped = df_filtered.groupby(cell_type_level).agg(mean_distance=('Distance', 'mean')).reset_index()
# Sort by mean distance to get top and bottom cell types
df_sorted = df_grouped.sort_values(by='mean_distance', ascending=False)
# Get top N and bottom N cell types
top_cell_types = df_sorted.head(top_n)
bottom_cell_types = df_sorted.tail(top_n)
return top_cell_types, bottom_cell_types
# Get top and bottom cell types for small intestine
top_bottom_si = get_top_bottom_cell_types(df_all_edges_with_cell_type_level, cell_type_level, 'Small Intestine')
print("\nTop 5 cell types in Small Intestine:")
print(top_bottom_si[0])
print("\nBottom 5 cell types in Small Intestine:")
print(top_bottom_si[1])
# Get top and bottom cell types for large intestine
top_bottom_li = get_top_bottom_cell_types(df_all_edges_with_cell_type_level, cell_type_level, 'Large Intestine')
print("\nTop 5 cell types in Large Intestine:")
print(top_bottom_li[0])
print("\nBottom 5 cell types in Large Intestine:")
print(top_bottom_li[1])
Top 5 cell types in Small Intestine:
Level One Cell Type mean_distance
0 epithelial cell 43.213764
2 mesenchymal cell 32.811427
3 neural cell 27.669692
1 immune cell 26.838213
Bottom 5 cell types in Small Intestine:
Level One Cell Type mean_distance
0 epithelial cell 43.213764
2 mesenchymal cell 32.811427
3 neural cell 27.669692
1 immune cell 26.838213
Top 5 cell types in Large Intestine:
Level One Cell Type mean_distance
2 mesenchymal cell 42.273309
3 neural cell 37.306252
0 epithelial cell 32.875946
1 immune cell 25.267128
Bottom 5 cell types in Large Intestine:
Level One Cell Type mean_distance
2 mesenchymal cell 42.273309
3 neural cell 37.306252
0 epithelial cell 32.875946
1 immune cell 25.267128
# Get top five and bottom five cell types with respect to median distance in small intestine and large intestine separately.
def get_top_bottom_cell_types(df, cell_type_level, tissue_subtype, top_n=5):
# Filter the DataFrame for the specified tissue subtype and cell type level
df_filtered = df[df['Tissue Subtype'] == tissue_subtype]
# Group by the specified cell type level and calculate median distance
df_grouped = df_filtered.groupby(cell_type_level).agg(median_distance=('Distance', 'median')).reset_index()
# Sort by median distance to get top and bottom cell types
df_sorted = df_grouped.sort_values(by='median_distance', ascending=False)
# Get top N and bottom N cell types
top_cell_types = df_sorted.head(top_n)
bottom_cell_types = df_sorted.tail(top_n)
return top_cell_types, bottom_cell_types
# Get top and bottom cell types for small intestine
top_bottom_si = get_top_bottom_cell_types(df_all_edges_with_cell_type_level, cell_type_level, 'Small Intestine')
print("\nTop 5 cell types in Small Intestine:")
print(top_bottom_si[0])
print("\nBottom 5 cell types in Small Intestine:")
print(top_bottom_si[1])
# Get top and bottom cell types for large intestine
top_bottom_li = get_top_bottom_cell_types(df_all_edges_with_cell_type_level, cell_type_level, 'Large Intestine')
print("\nTop 5 cell types in Large Intestine:")
print(top_bottom_li[0])
print("\nBottom 5 cell types in Large Intestine:")
print(top_bottom_li[1])
Top 5 cell types in Small Intestine:
Level One Cell Type median_distance
0 epithelial cell 29.083389
2 mesenchymal cell 24.166671
3 neural cell 20.812925
1 immune cell 17.229543
Bottom 5 cell types in Small Intestine:
Level One Cell Type median_distance
0 epithelial cell 29.083389
2 mesenchymal cell 24.166671
3 neural cell 20.812925
1 immune cell 17.229543
Top 5 cell types in Large Intestine:
Level One Cell Type median_distance
2 mesenchymal cell 32.458120
3 neural cell 28.396936
0 epithelial cell 26.085702
1 immune cell 18.151509
Bottom 5 cell types in Large Intestine:
Level One Cell Type median_distance
2 mesenchymal cell 32.458120
3 neural cell 28.396936
0 epithelial cell 26.085702
1 immune cell 18.151509
# Calculate regional variability
def calculate_regional_variability(df_all_edges_with_cell_type_level, cell_type_level):
""" Calculate regional variability for distances in the given DataFrame.
"""
regional_variability = df_all_edges_with_cell_type_level.groupby('Unique Region')['Distance'].agg([
('mean', 'mean'),
('std', 'std')
]).round(2)
# Add CV as percentage
regional_variability['CV (%)'] = (regional_variability['std'] / regional_variability['mean'] * 100).round(1)
print("\nRegional Variability Analysis:")
print("Mean: Average distance in each region")
print("Std: Standard deviation of distances")
print("CV: Coefficient of Variation (std/mean * 100%)")
print(regional_variability)
# Calculate variability for each cell type
cell_type_variability = df_all_edges_with_cell_type_level.groupby(cell_type_level)['Distance'].agg([
('mean', 'mean'),
('std', 'std')
]).round(2)
# Add CV as percentage
cell_type_variability['CV (%)'] = (cell_type_variability['std'] / cell_type_variability['mean'] * 100).round(1)
print("\nCell Type Variability Analysis (sorted by CV):")
print(cell_type_variability.sort_values('CV (%)', ascending=False))
calculate_regional_variability(df_all_edges_with_cell_type_level, cell_type_level)
Regional Variability Analysis:
Mean: Average distance in each region
Std: Standard deviation of distances
CV: Coefficient of Variation (std/mean * 100%)
mean std CV (%)
Unique Region
Ascending 32.47 29.00 89.3
Descending 33.75 28.99 85.9
Duodenum 32.46 31.37 96.6
Ileum 32.58 32.14 98.6
Mid Jejunum 37.49 37.58 100.2
Proximal Jejunum 36.90 35.99 97.5
Sigmoid 37.37 29.56 79.1
Transverse 34.82 29.29 84.1
Cell Type Variability Analysis (sorted by CV):
mean std CV (%)
Level One Cell Type
immune cell 26.29 27.85 105.9
epithelial cell 39.19 35.37 90.3
mesenchymal cell 37.66 31.52 83.7
neural cell 32.18 26.83 83.4
# Define the standard region sequence for plots
regions = ['Duodenum', 'Proximal Jejunum', 'Mid Jejunum', 'Ileum', 'Ascending', 'Transverse', 'Descending', 'Sigmoid']# Generate Violin Plot
def plot_violin_cells_per_celltype_small_vs_large_intestine(df_all_edges_with_cell_type_level, cell_type_level, output_dir):
sns.set_style("whitegrid")
sns.set_context("notebook", rc={"grid.linewidth": 2})
plt.rcParams["svg.fonttype"] = 'none' # to store text as text, not as path
plt.figure(figsize=(10, 5))
sns.violinplot(data=df_all_edges_with_cell_type_level, x=cell_type_level, y="Distance", hue="Tissue Subtype", density_norm="area", common_norm=True, cut=0, inner="box", split=True, palette='Spectral', alpha=.9, hue_order=['Small Intestine', 'Large Intestine'])
sns.set_theme(style="whitegrid")
sns.set_context("paper")
font_size = 10
plt.legend(fontsize=font_size)
plt.xlabel('Cell Type', fontsize=font_size)
plt.ylabel('Distance (\u03bcm)', fontsize=font_size)
# Increase font size for all text in the figure
plt.xticks(fontsize=font_size)
plt.xticks(rotation=90)
plt.yticks(fontsize=font_size)
plt.tight_layout()
plt.savefig(os.path.join(output_dir, f'{dataset_dir}_violin_cells_per_celltype__small_vs_large_intestine_{cell_type_level}.png'), dpi=300,
bbox_inches='tight',
pad_inches=0.5)
plt.savefig(os.path.join(output_dir, f'{dataset_dir}_violin_cells_per_celltype__small_vs_large_intestine_{cell_type_level}.svg'), dpi=300,
bbox_inches='tight',
pad_inches=0.5)
plt.show()
plot_violin_cells_per_celltype_small_vs_large_intestine(df_all_edges_with_cell_type_level, cell_type_level, os.path.join(basepath, figures_output_dir))
# Boxplots of distribution of distances by cell type and region.
def plot_distance_distribution_boxplots_by_region(df_all_edges_with_cell_type_level, cell_type_level, output_dir):
plt.figure(figsize=(20, 10))
plt.rcParams["svg.fonttype"] = 'none' # to store text as text, not as path
# Create categorical type with only the regions that exist in the data
available_regions = [r for r in regions if r in df_all_edges_with_cell_type_level['Unique Region'].unique()]
df_all_edges_with_cell_type_level['Unique Region'] = pd.Categorical(
df_all_edges_with_cell_type_level['Unique Region'],
categories=available_regions,
ordered=True
)
sns.boxplot(data=df_all_edges_with_cell_type_level, x=cell_type_level, y='Distance', hue='Unique Region', showfliers=False, palette='Spectral') # viridis or Spectral palette for better color distinction
plt.xticks(rotation=90, ha='right')
plt.title(f'Distribution of distances by {cell_type_level} and region')
plt.legend(bbox_to_anchor=(1, 1), loc='upper left')
plt.tight_layout()
plt.savefig(os.path.join(output_dir, f'{dataset_dir}_distance_distribution_boxplots_by_region_{cell_type_level}.png'), dpi=300,
bbox_inches='tight',
pad_inches=0.5)
plt.savefig(os.path.join(output_dir, f'{dataset_dir}_distance_distribution_boxplots_by_region_{cell_type_level}.svg'), dpi=300,
bbox_inches='tight',
pad_inches=0.5)
plt.show()
plot_distance_distribution_boxplots_by_region(df_all_edges_with_cell_type_level, cell_type_level, os.path.join(basepath, figures_output_dir))
# Boxplots of distribution of distances by cell type and region.
def plot_distance_distribution_heatmap(df_all_edges_with_cell_type_level, cell_type_level, output_dir):
pivot_data = df_all_edges_with_cell_type_level.pivot_table(
values='Distance',
index=cell_type_level,
columns='Unique Region',
aggfunc='median'
)
plt.figure(figsize=(15, 15))
plt.rcParams["svg.fonttype"] = 'none' # to store text as text, not as path
sns.heatmap(pivot_data, annot=True, fmt='.1f', cmap='Spectral')
plt.title('Heatmap of median distances')
plt.tight_layout()
plt.savefig(os.path.join(output_dir, f'{dataset_dir}_distance_distribution_heatmap_{cell_type_level}.png'), dpi=300,
bbox_inches='tight',
pad_inches=0.5)
plt.savefig(os.path.join(output_dir, f'{dataset_dir}_distance_distribution_heatmap_{cell_type_level}.svg'), dpi=300,
bbox_inches='tight',
pad_inches=0.5)
plt.show()
plot_distance_distribution_heatmap(df_all_edges_with_cell_type_level, cell_type_level, os.path.join(basepath, figures_output_dir))
# Generate Violin Plot per unique region in both small intestine and large intestine. Create for all 8 regions as 8 subplots.
def plot_violin_plots_all_regions(df_all_edges_with_cell_type_level, cell_type_level, output_dir, density_norm="area"):
sns.set_style("whitegrid")
sns.set_context("notebook", rc={"grid.linewidth": 1})
plt.rcParams["svg.fonttype"] = 'none' # to store text as text, not as path
fig, axs = plt.subplots(4, 2, figsize=(15, 18))
fig.suptitle(f'Distance distribution per {cell_type_level} in small and large intestine (density normalization = {density_norm})', fontsize=18)
# Keep the sequence of Cell Types consistent across plots.
cell_types = sorted(df_all_edges_with_cell_type_level[cell_type_level].unique())
# Create a color palette based on the number of unique classes
color_palette = sns.color_palette("Spectral", n_colors=len(cell_types))
# Create a dictionary mapping class to color
class_color_dict = dict(zip(cell_types, color_palette))
for i, region in enumerate(regions):
data_reg = df_all_edges_with_cell_type_level[df_all_edges_with_cell_type_level['Unique Region'] == region]
sns.violinplot(data=data_reg, x=cell_type_level, y="Distance", density_norm=density_norm, common_norm=True, cut=0, inner="box", split=False, palette=class_color_dict, alpha=.9, ax=axs[i//2, i%2], hue=cell_type_level, legend=False, order=cell_types, fill=True)
axs[i//2, i%2].set_title(region)
axs[i//2, i%2].set_xlabel('Cell Type', fontsize=13)
axs[i//2, i%2].set_ylabel('Distance (\u03bcm)', fontsize=13)
axs[i//2, i%2].tick_params(axis='x', labelrotation=90, labelsize=8)
axs[i//2, i%2].tick_params(axis='both', labelsize=8)
axs[i//2, i%2].set_ylim(0, 200)
plt.tight_layout()
plt.savefig(os.path.join(output_dir, f'{dataset_dir}_violin_plots_all_regions_{cell_type_level}.png'), dpi=300,
bbox_inches='tight',
pad_inches=0.5)
plt.savefig(os.path.join(output_dir, f'{dataset_dir}_violin_plots_all_regions_{cell_type_level}.svg'), dpi=300,
bbox_inches='tight',
pad_inches=0.5)
plt.show()
plot_violin_plots_all_regions(df_all_edges_with_cell_type_level, cell_type_level, os.path.join(basepath, figures_output_dir), density_norm="count") # Or, density_norm="count"
Level Two Cell Type Analysis
# Get mean, median, minimum, maximum distance per cell type in all tissue subtypes.
cell_type_level = 'Level Two Cell Type'
df_all_edges_with_cell_type_level = df_all_edges_with_cell_types[(df_all_edges_with_cell_types['Anchor Cell Type Level'] == cell_type_level) & (df_all_edges_with_cell_types['Anchor Cell Type'] == anchor_cell_type_dict[cell_type_level])]
df_distance_stats_cell_type_level = df_all_edges_with_cell_type_level.groupby([cell_type_level, 'Tissue Subtype']).agg(
mean_distance=('Distance', 'mean'),
median_distance=('Distance', 'median'),
min_distance=('Distance', 'min'),
max_distance=('Distance', 'max')
).reset_index()
df_distance_stats_cell_type_level| Level Two Cell Type | Tissue Subtype | mean_distance | median_distance | min_distance | max_distance | |
|---|---|---|---|---|---|---|
| 0 | b cell | Large Intestine | 27.053305 | 21.526179 | 0.843937 | 197.546834 |
| 1 | b cell | Small Intestine | 21.818137 | 17.894644 | 0.000000 | 191.707070 |
| 2 | dendritic cell | Large Intestine | 28.327602 | 21.410062 | 0.843937 | 198.968722 |
| 3 | dendritic cell | Small Intestine | 22.322890 | 16.056992 | 0.377420 | 198.825486 |
| 4 | endocrine cell | Large Intestine | 36.294670 | 31.225661 | 1.601257 | 199.648045 |
| 5 | endocrine cell | Small Intestine | 40.786922 | 31.225661 | 1.193507 | 199.569903 |
| 6 | endothelial cell of lymphatic vessel | Large Intestine | 37.245417 | 27.113891 | 0.377420 | 199.705116 |
| 7 | endothelial cell of lymphatic vessel | Small Intestine | 26.096485 | 17.802866 | 0.377420 | 199.313497 |
| 8 | enterocyte | Large Intestine | 41.954515 | 34.853669 | 0.377420 | 199.984527 |
| 9 | enterocyte | Small Intestine | 49.309524 | 34.872056 | 0.000000 | 199.987020 |
| 10 | goblet cell | Large Intestine | 39.134738 | 33.146416 | 0.377420 | 199.371021 |
| 11 | goblet cell | Small Intestine | 42.001191 | 32.346017 | 0.000000 | 199.977404 |
| 12 | lymphoid cell | Large Intestine | 40.153142 | 32.768230 | 2.387014 | 189.225983 |
| 13 | lymphoid cell | Small Intestine | 63.133067 | 43.305553 | 0.000000 | 199.951759 |
| 14 | macrophage | Large Intestine | 41.718298 | 27.551660 | 0.533752 | 199.895115 |
| 15 | macrophage | Small Intestine | 28.724084 | 18.041308 | 0.533752 | 199.984527 |
| 16 | muscle cell | Large Intestine | 53.677561 | 45.674058 | 0.000000 | 199.998416 |
| 17 | muscle cell | Small Intestine | 35.881348 | 28.706259 | 0.000000 | 199.772153 |
| 18 | natural killer cell | Large Intestine | 29.557145 | 22.920304 | 3.042857 | 193.128436 |
| 19 | natural killer cell | Small Intestine | 36.501047 | 22.686050 | 0.000000 | 199.987020 |
| 20 | neurecto-epithelial cell | Large Intestine | 37.836789 | 26.159318 | 0.377420 | 199.877655 |
| 21 | neurecto-epithelial cell | Small Intestine | 27.136101 | 18.871000 | 0.843937 | 199.829545 |
| 22 | neuron | Large Intestine | 48.971272 | 39.629100 | 0.377420 | 199.878012 |
| 23 | neuron | Small Intestine | 34.567294 | 27.058671 | 0.533752 | 197.159960 |
| 24 | neutrophil | Large Intestine | 40.546310 | 24.832400 | 0.533752 | 199.580610 |
| 25 | neutrophil | Small Intestine | 32.473085 | 18.431857 | 0.377420 | 199.860551 |
| 26 | paneth cell | Large Intestine | 138.624783 | 154.596150 | 12.821175 | 199.554914 |
| 27 | paneth cell | Small Intestine | 32.606219 | 29.450854 | 0.533752 | 199.977404 |
| 28 | stromal cell | Large Intestine | 53.780477 | 41.228708 | 0.533752 | 199.984527 |
| 29 | stromal cell | Small Intestine | 45.676768 | 33.698344 | 0.000000 | 199.900103 |
| 30 | t cell | Large Intestine | 29.954248 | 23.765475 | 0.000000 | 199.947128 |
| 31 | t cell | Small Intestine | 34.966318 | 23.636257 | 0.000000 | 199.984527 |
# Get top five and bottom five cell types with respect to mean distance in small intestine and large intestine separately.
def get_top_bottom_cell_types(df, cell_type_level, tissue_subtype, top_n=5):
# Filter the DataFrame for the specified tissue subtype and cell type level
df_filtered = df[df['Tissue Subtype'] == tissue_subtype]
# Group by the specified cell type level and calculate mean distance
df_grouped = df_filtered.groupby(cell_type_level).agg(mean_distance=('Distance', 'mean')).reset_index()
# Sort by mean distance to get top and bottom cell types
df_sorted = df_grouped.sort_values(by='mean_distance', ascending=False)
# Get top N and bottom N cell types
top_cell_types = df_sorted.head(top_n)
bottom_cell_types = df_sorted.tail(top_n)
return top_cell_types, bottom_cell_types
# Get top and bottom cell types for small intestine
top_bottom_si = get_top_bottom_cell_types(df_all_edges_with_cell_type_level, cell_type_level, 'Small Intestine')
print("\nTop 5 cell types in Small Intestine:")
print(top_bottom_si[0])
print("\nBottom 5 cell types in Small Intestine:")
print(top_bottom_si[1])
# Get top and bottom cell types for large intestine
top_bottom_li = get_top_bottom_cell_types(df_all_edges_with_cell_type_level, cell_type_level, 'Large Intestine')
print("\nTop 5 cell types in Large Intestine:")
print(top_bottom_li[0])
print("\nBottom 5 cell types in Large Intestine:")
print(top_bottom_li[1])
Top 5 cell types in Small Intestine:
Level Two Cell Type mean_distance
6 lymphoid cell 63.133067
4 enterocyte 49.309524
14 stromal cell 45.676768
5 goblet cell 42.001191
2 endocrine cell 40.786922
Bottom 5 cell types in Small Intestine:
Level Two Cell Type mean_distance
7 macrophage 28.724084
10 neurecto-epithelial cell 27.136101
3 endothelial cell of lymphatic vessel 26.096485
1 dendritic cell 22.322890
0 b cell 21.818137
Top 5 cell types in Large Intestine:
Level Two Cell Type mean_distance
13 paneth cell 138.624783
14 stromal cell 53.780477
8 muscle cell 53.677561
11 neuron 48.971272
4 enterocyte 41.954515
Bottom 5 cell types in Large Intestine:
Level Two Cell Type mean_distance
2 endocrine cell 36.294670
15 t cell 29.954248
9 natural killer cell 29.557145
1 dendritic cell 28.327602
0 b cell 27.053305
# Get top five and bottom five cell types with respect to median distance in small intestine and large intestine separately.
def get_top_bottom_cell_types(df, cell_type_level, tissue_subtype, top_n=5):
# Filter the DataFrame for the specified tissue subtype and cell type level
df_filtered = df[df['Tissue Subtype'] == tissue_subtype]
# Group by the specified cell type level and calculate median distance
df_grouped = df_filtered.groupby(cell_type_level).agg(median_distance=('Distance', 'median')).reset_index()
# Sort by median distance to get top and bottom cell types
df_sorted = df_grouped.sort_values(by='median_distance', ascending=False)
# Get top N and bottom N cell types
top_cell_types = df_sorted.head(top_n)
bottom_cell_types = df_sorted.tail(top_n)
return top_cell_types, bottom_cell_types
# Get top and bottom cell types for small intestine
top_bottom_si = get_top_bottom_cell_types(df_all_edges_with_cell_type_level, cell_type_level, 'Small Intestine')
print("\nTop 5 cell types in Small Intestine:")
print(top_bottom_si[0])
print("\nBottom 5 cell types in Small Intestine:")
print(top_bottom_si[1])
# Get top and bottom cell types for large intestine
top_bottom_li = get_top_bottom_cell_types(df_all_edges_with_cell_type_level, cell_type_level, 'Large Intestine')
print("\nTop 5 cell types in Large Intestine:")
print(top_bottom_li[0])
print("\nBottom 5 cell types in Large Intestine:")
print(top_bottom_li[1])
Top 5 cell types in Small Intestine:
Level Two Cell Type median_distance
6 lymphoid cell 43.305553
4 enterocyte 34.872056
14 stromal cell 33.698344
5 goblet cell 32.346017
2 endocrine cell 31.225661
Bottom 5 cell types in Small Intestine:
Level Two Cell Type median_distance
12 neutrophil 18.431857
7 macrophage 18.041308
0 b cell 17.894644
3 endothelial cell of lymphatic vessel 17.802866
1 dendritic cell 16.056992
Top 5 cell types in Large Intestine:
Level Two Cell Type median_distance
13 paneth cell 154.596150
8 muscle cell 45.674058
14 stromal cell 41.228708
11 neuron 39.629100
4 enterocyte 34.853669
Bottom 5 cell types in Large Intestine:
Level Two Cell Type median_distance
12 neutrophil 24.832400
15 t cell 23.765475
9 natural killer cell 22.920304
0 b cell 21.526179
1 dendritic cell 21.410062
calculate_regional_variability(df_all_edges_with_cell_type_level, cell_type_level)
Regional Variability Analysis:
Mean: Average distance in each region
Std: Standard deviation of distances
CV: Coefficient of Variation (std/mean * 100%)
mean std CV (%)
Unique Region
Ascending 41.20 32.11 77.9
Descending 40.67 32.26 79.3
Duodenum 36.24 32.45 89.5
Ileum 37.43 33.54 89.6
Mid Jejunum 41.19 37.64 91.4
Proximal Jejunum 41.26 36.84 89.3
Sigmoid 48.92 35.83 73.2
Transverse 44.69 33.58 75.1
Cell Type Variability Analysis (sorted by CV):
mean std CV (%)
Level Two Cell Type
neutrophil 35.60 38.33 107.7
natural killer cell 33.64 33.72 100.2
macrophage 34.30 34.07 99.3
t cell 33.65 32.40 96.3
neurecto-epithelial cell 31.59 29.57 93.6
endothelial cell of lymphatic vessel 30.87 28.45 92.2
dendritic cell 25.43 22.42 88.2
lymphoid cell 57.05 47.42 83.1
stromal cell 49.58 40.53 81.7
enterocyte 46.48 36.86 79.3
neuron 41.30 32.40 78.5
endocrine cell 39.37 30.36 77.1
muscle cell 45.43 34.12 75.1
goblet cell 40.65 29.59 72.8
b cell 23.99 17.20 71.7
paneth cell 34.14 22.89 67.0
plot_violin_cells_per_celltype_small_vs_large_intestine(df_all_edges_with_cell_type_level, cell_type_level, os.path.join(basepath, figures_output_dir))
plot_distance_distribution_boxplots_by_region(df_all_edges_with_cell_type_level, cell_type_level, os.path.join(basepath, figures_output_dir))
plot_distance_distribution_heatmap(df_all_edges_with_cell_type_level, cell_type_level, os.path.join(basepath, figures_output_dir))
plot_violin_plots_all_regions(df_all_edges_with_cell_type_level, cell_type_level, os.path.join(basepath, figures_output_dir), density_norm="count") # Or, density_norm="count"
Level Three Cell Type Analysis
# Get mean, median, minimum, maximum distance per cell type in all tissue subtypes.
cell_type_level = 'Level Three Cell Type'
df_all_edges_with_cell_type_level = df_all_edges_with_cell_types[(df_all_edges_with_cell_types['Anchor Cell Type Level'] == cell_type_level) & (df_all_edges_with_cell_types['Anchor Cell Type'] == anchor_cell_type_dict[cell_type_level])]
df_distance_stats_cell_type_level = df_all_edges_with_cell_type_level.groupby([cell_type_level, 'Tissue Subtype']).agg(
mean_distance=('Distance', 'mean'),
median_distance=('Distance', 'median'),
min_distance=('Distance', 'min'),
max_distance=('Distance', 'max')
).reset_index()
df_distance_stats_cell_type_level| Level Three Cell Type | Tissue Subtype | mean_distance | median_distance | min_distance | max_distance | |
|---|---|---|---|---|---|---|
| 0 | b cell | Large Intestine | 29.018398 | 22.347599 | 1.193507 | 195.356325 |
| 1 | b cell | Small Intestine | 27.518600 | 22.960665 | 0.377420 | 156.771564 |
| 2 | dendritic cell | Large Intestine | 28.327602 | 21.410062 | 0.843937 | 198.968722 |
| 3 | dendritic cell | Small Intestine | 22.322890 | 16.056992 | 0.377420 | 198.825486 |
| 4 | endothelial cell of lymphatic vessel | Large Intestine | 37.245417 | 27.113891 | 0.377420 | 199.705116 |
| 5 | endothelial cell of lymphatic vessel | Small Intestine | 26.096485 | 17.802866 | 0.377420 | 199.313497 |
| 6 | enterocyte | Large Intestine | 39.822866 | 33.086197 | 0.377420 | 199.984527 |
| 7 | enterocyte | Small Intestine | 50.623921 | 35.085853 | 0.000000 | 199.977404 |
| 8 | enterocyte:cd57+ | Large Intestine | 33.451914 | 30.388738 | 2.641940 | 156.161145 |
| 9 | enterocyte:cd57+ | Small Intestine | 31.409334 | 25.782228 | 3.717156 | 199.954608 |
| 10 | enterocyte:cd66+ | Large Intestine | 54.150643 | 44.593095 | 1.193507 | 199.899034 |
| 11 | enterocyte:cd66+ | Small Intestine | 77.146245 | 62.703991 | 0.533752 | 199.878724 |
| 12 | enterocyte:muc1+ | Large Intestine | 45.036560 | 40.120296 | 1.360807 | 198.518974 |
| 13 | enterocyte:muc1+ | Small Intestine | 32.640887 | 29.031916 | 1.132260 | 198.929343 |
| 14 | goblet cell | Large Intestine | 39.134738 | 33.146416 | 0.377420 | 199.371021 |
| 15 | goblet cell | Small Intestine | 42.001191 | 32.346017 | 0.000000 | 199.977404 |
| 16 | interstitial cell of cajal | Large Intestine | 37.836789 | 26.159318 | 0.377420 | 199.877655 |
| 17 | interstitial cell of cajal | Small Intestine | 27.136101 | 18.871000 | 0.843937 | 199.829545 |
| 18 | lymphocyte:cd7+ | Large Intestine | 40.153142 | 32.768230 | 2.387014 | 189.225983 |
| 19 | lymphocyte:cd7+ | Small Intestine | 63.133067 | 43.305553 | 0.000000 | 199.951759 |
| 20 | macrophage | Large Intestine | 44.336284 | 29.528141 | 0.533752 | 199.895115 |
| 21 | macrophage | Small Intestine | 30.807581 | 19.614950 | 0.843937 | 199.984527 |
| 22 | macrophage:inflammatory | Large Intestine | 27.235706 | 20.935756 | 1.360807 | 193.407037 |
| 23 | macrophage:inflammatory | Small Intestine | 18.758805 | 12.986758 | 0.533752 | 199.290626 |
| 24 | muscle cell:smooth | Large Intestine | 53.677561 | 45.674058 | 0.000000 | 199.998416 |
| 25 | muscle cell:smooth | Small Intestine | 35.881348 | 28.706259 | 0.000000 | 199.772153 |
| 26 | natural killer cell | Large Intestine | 29.557145 | 22.920304 | 3.042857 | 193.128436 |
| 27 | natural killer cell | Small Intestine | 36.501047 | 22.686050 | 0.000000 | 199.987020 |
| 28 | neuroendocrine cell | Large Intestine | 36.294670 | 31.225661 | 1.601257 | 199.648045 |
| 29 | neuroendocrine cell | Small Intestine | 40.786922 | 31.225661 | 1.193507 | 199.569903 |
| 30 | neuron | Large Intestine | 48.971272 | 39.629100 | 0.377420 | 199.878012 |
| 31 | neuron | Small Intestine | 34.567294 | 27.058671 | 0.533752 | 197.159960 |
| 32 | neutrophil | Large Intestine | 40.546310 | 24.832400 | 0.533752 | 199.580610 |
| 33 | neutrophil | Small Intestine | 32.473085 | 18.431857 | 0.377420 | 199.860551 |
| 34 | paneth cell | Large Intestine | 138.624783 | 154.596150 | 12.821175 | 199.554914 |
| 35 | paneth cell | Small Intestine | 32.606219 | 29.450854 | 0.533752 | 199.977404 |
| 36 | plasma cell | Large Intestine | 26.523197 | 21.330074 | 0.843937 | 197.546834 |
| 37 | plasma cell | Small Intestine | 20.214029 | 16.849172 | 0.000000 | 191.707070 |
| 38 | stromal cell | Large Intestine | 53.780477 | 41.228708 | 0.533752 | 199.984527 |
| 39 | stromal cell | Small Intestine | 45.676768 | 33.698344 | 0.000000 | 199.900103 |
| 40 | t cell:cd4+ | Large Intestine | 27.552400 | 21.516250 | 0.000000 | 199.626640 |
| 41 | t cell:cd4+ | Small Intestine | 23.445133 | 17.802866 | 0.377420 | 199.681220 |
| 42 | t cell:cd8+ alpha-beta | Large Intestine | 32.725200 | 26.289680 | 1.193507 | 199.947128 |
| 43 | t cell:cd8+ alpha-beta | Small Intestine | 40.672900 | 27.197819 | 0.000000 | 199.984527 |
| 44 | transit amplifying cell | Large Intestine | 40.889106 | 34.347294 | 0.754840 | 199.984527 |
| 45 | transit amplifying cell | Small Intestine | 47.585919 | 35.930304 | 0.377420 | 199.987020 |
| 46 | transit amplifying cell:proliferating | Large Intestine | 38.313899 | 33.266528 | 1.132260 | 199.946060 |
| 47 | transit amplifying cell:proliferating | Small Intestine | 39.587575 | 32.425189 | 0.843937 | 199.793187 |
# Get top five and bottom five cell types with respect to mean distance in small intestine and large intestine separately.
def get_top_bottom_cell_types(df, cell_type_level, tissue_subtype, top_n=5):
# Filter the DataFrame for the specified tissue subtype and cell type level
df_filtered = df[df['Tissue Subtype'] == tissue_subtype]
# Group by the specified cell type level and calculate mean distance
df_grouped = df_filtered.groupby(cell_type_level).agg(mean_distance=('Distance', 'mean')).reset_index()
# Sort by mean distance to get top and bottom cell types
df_sorted = df_grouped.sort_values(by='mean_distance', ascending=False)
# Get top N and bottom N cell types
top_cell_types = df_sorted.head(top_n)
bottom_cell_types = df_sorted.tail(top_n)
return top_cell_types, bottom_cell_types
# Get top and bottom cell types for small intestine
top_bottom_si = get_top_bottom_cell_types(df_all_edges_with_cell_type_level, cell_type_level, 'Small Intestine')
print("\nTop 5 cell types in Small Intestine:")
print(top_bottom_si[0])
print("\nBottom 5 cell types in Small Intestine:")
print(top_bottom_si[1])
# Get top and bottom cell types for large intestine
top_bottom_li = get_top_bottom_cell_types(df_all_edges_with_cell_type_level, cell_type_level, 'Large Intestine')
print("\nTop 5 cell types in Large Intestine:")
print(top_bottom_li[0])
print("\nBottom 5 cell types in Large Intestine:")
print(top_bottom_li[1])
Top 5 cell types in Small Intestine:
Level Three Cell Type mean_distance
5 enterocyte:cd66+ 77.146245
9 lymphocyte:cd7+ 63.133067
3 enterocyte 50.623921
22 transit amplifying cell 47.585919
19 stromal cell 45.676768
Bottom 5 cell types in Small Intestine:
Level Three Cell Type mean_distance
2 endothelial cell of lymphatic vessel 26.096485
20 t cell:cd4+ 23.445133
1 dendritic cell 22.322890
18 plasma cell 20.214029
11 macrophage:inflammatory 18.758805
Top 5 cell types in Large Intestine:
Level Three Cell Type mean_distance
17 paneth cell 138.624783
5 enterocyte:cd66+ 54.150643
19 stromal cell 53.780477
12 muscle cell:smooth 53.677561
15 neuron 48.971272
Bottom 5 cell types in Large Intestine:
Level Three Cell Type mean_distance
0 b cell 29.018398
1 dendritic cell 28.327602
20 t cell:cd4+ 27.552400
11 macrophage:inflammatory 27.235706
18 plasma cell 26.523197
# Get top five and bottom five cell types with respect to median distance in small intestine and large intestine separately.
def get_top_bottom_cell_types(df, cell_type_level, tissue_subtype, top_n=5):
# Filter the DataFrame for the specified tissue subtype and cell type level
df_filtered = df[df['Tissue Subtype'] == tissue_subtype]
# Group by the specified cell type level and calculate median distance
df_grouped = df_filtered.groupby(cell_type_level).agg(median_distance=('Distance', 'median')).reset_index()
# Sort by median distance to get top and bottom cell types
df_sorted = df_grouped.sort_values(by='median_distance', ascending=False)
# Get top N and bottom N cell types
top_cell_types = df_sorted.head(top_n)
bottom_cell_types = df_sorted.tail(top_n)
return top_cell_types, bottom_cell_types
# Get top and bottom cell types for small intestine
top_bottom_si = get_top_bottom_cell_types(df_all_edges_with_cell_type_level, cell_type_level, 'Small Intestine')
print("\nTop 5 cell types in Small Intestine:")
print(top_bottom_si[0])
print("\nBottom 5 cell types in Small Intestine:")
print(top_bottom_si[1])
# Get top and bottom cell types for large intestine
top_bottom_li = get_top_bottom_cell_types(df_all_edges_with_cell_type_level, cell_type_level, 'Large Intestine')
print("\nTop 5 cell types in Large Intestine:")
print(top_bottom_li[0])
print("\nBottom 5 cell types in Large Intestine:")
print(top_bottom_li[1])
Top 5 cell types in Small Intestine:
Level Three Cell Type median_distance
5 enterocyte:cd66+ 62.703991
9 lymphocyte:cd7+ 43.305553
22 transit amplifying cell 35.930304
3 enterocyte 35.085853
19 stromal cell 33.698344
Bottom 5 cell types in Small Intestine:
Level Three Cell Type median_distance
2 endothelial cell of lymphatic vessel 17.802866
20 t cell:cd4+ 17.802866
18 plasma cell 16.849172
1 dendritic cell 16.056992
11 macrophage:inflammatory 12.986758
Top 5 cell types in Large Intestine:
Level Three Cell Type median_distance
17 paneth cell 154.596150
12 muscle cell:smooth 45.674058
5 enterocyte:cd66+ 44.593095
19 stromal cell 41.228708
6 enterocyte:muc1+ 40.120296
Bottom 5 cell types in Large Intestine:
Level Three Cell Type median_distance
0 b cell 22.347599
20 t cell:cd4+ 21.516250
1 dendritic cell 21.410062
18 plasma cell 21.330074
11 macrophage:inflammatory 20.935756
calculate_regional_variability(df_all_edges_with_cell_type_level, cell_type_level)
Regional Variability Analysis:
Mean: Average distance in each region
Std: Standard deviation of distances
CV: Coefficient of Variation (std/mean * 100%)
mean std CV (%)
Unique Region
Ascending 41.20 32.11 77.9
Descending 40.67 32.26 79.3
Duodenum 36.24 32.45 89.5
Ileum 37.43 33.54 89.6
Mid Jejunum 41.19 37.64 91.4
Proximal Jejunum 41.26 36.84 89.3
Sigmoid 48.92 35.83 73.2
Transverse 44.69 33.58 75.1
Cell Type Variability Analysis (sorted by CV):
mean std CV (%)
Level Three Cell Type
neutrophil 35.60 38.33 107.7
natural killer cell 33.64 33.72 100.2
macrophage 36.69 35.65 97.2
t cell:cd8+ alpha-beta 39.10 37.03 94.7
interstitial cell of cajal 31.59 29.57 93.6
macrophage:inflammatory 22.15 20.61 93.0
endothelial cell of lymphatic vessel 30.87 28.45 92.2
dendritic cell 25.43 22.42 88.2
lymphocyte:cd7+ 57.05 47.42 83.1
enterocyte 47.21 38.67 81.9
stromal cell 49.58 40.53 81.7
t cell:cd4+ 24.94 20.31 81.4
neuron 41.30 32.40 78.5
neuroendocrine cell 39.37 30.36 77.1
enterocyte:cd57+ 31.65 23.87 75.4
muscle cell:smooth 45.43 34.12 75.1
transit amplifying cell 44.71 32.94 73.7
goblet cell 40.65 29.59 72.8
plasma cell 22.84 16.21 71.0
b cell 28.13 19.82 70.5
enterocyte:cd66+ 58.73 39.94 68.0
transit amplifying cell:proliferating 38.97 26.29 67.5
paneth cell 34.14 22.89 67.0
enterocyte:muc1+ 38.46 24.06 62.6
plot_violin_cells_per_celltype_small_vs_large_intestine(df_all_edges_with_cell_type_level, cell_type_level, os.path.join(basepath, figures_output_dir))
plot_distance_distribution_boxplots_by_region(df_all_edges_with_cell_type_level, cell_type_level, os.path.join(basepath, figures_output_dir))
plot_distance_distribution_heatmap(df_all_edges_with_cell_type_level, cell_type_level, os.path.join(basepath, figures_output_dir))
plot_violin_plots_all_regions(df_all_edges_with_cell_type_level, cell_type_level, os.path.join(basepath, figures_output_dir), density_norm="count") # Or, density_norm="count"