def convert_to_jpeg(image_path, quality=80):\n \"\"\"\n Convert a TIFF image to a JPEG image with a quality of score\n\n Parameters\n ----------\n image_path : str\n Path to the image file\n quality : int (default=80)\n Quality score for the JPEG image\n\n Returns\n -------\n new_image_path : str\n Path to the JPEG image file\n\n \"\"\"\n\n # Load the TIFF image\n image = pyvips.Image.new_from_file(image_path, access=\"sequential\")\n\n # Save the image as a JPEG with a quality of 80\n new_image_path = image_path.replace(\".tif\", \".jpeg\")\n image.jpegsave(new_image_path, Q=quality)\n\n return new_image_path\n
def convert_to_png(image_path):\n \"\"\"\n Convert a TIFF image to a JPEG image with a quality of score\n\n Parameters\n ----------\n image_path : str\n Path to the image file\n quality : int (default=80)\n Quality score for the JPEG image\n\n Returns\n -------\n new_image_path : str\n Path to the JPEG image file\n\n \"\"\"\n\n # Load the TIFF image\n image = pyvips.Image.new_from_file(image_path, access=\"sequential\")\n\n # Save the image as a JPEG with a quality of 80\n new_image_path = image_path.replace(\".tif\", \".png\")\n image.pngsave(new_image_path)\n\n return new_image_path\n
def convert_to_webp(image_path, quality=100):\n \"\"\"\n Convert a TIFF image to a WEBP image with a specified quality score.\n\n Parameters\n ----------\n image_path : str\n Path to the image file\n quality : int (default=100)\n Quality score for the WEBP image (higher is better quality)\n\n Returns\n -------\n new_image_path : str\n Path to the WEBP image file\n \"\"\"\n # Load the TIFF image\n image = pyvips.Image.new_from_file(image_path, access=\"sequential\")\n\n # Save the image as a WEBP with specified quality\n new_image_path = image_path.replace(\".tif\", \".webp\")\n image.webpsave(new_image_path, Q=quality)\n\n return new_image_path\n
Returns the maximum zoom level based on the highest-numbered directory in the specified path_image_pyramid.
Name Type Description Default path_image_pyramidstr
The path to the directory containing zoom level directories.
Name Type Description max_pyramid_zoomint
The maximum zoom level.
Source code in src/celldega/pre/
def get_max_zoom_level(path_image_pyramid):\n \"\"\"\n Returns the maximum zoom level based on the highest-numbered directory\n in the specified path_image_pyramid.\n\n Parameters:\n path_image_pyramid (str): The path to the directory containing zoom level directories.\n\n Returns:\n max_pyramid_zoom (int): The maximum zoom level.\n \"\"\"\n # List all entries in the path_image_pyramid that are directories and can be converted to integers\n zoom_levels = [\n entry\n for entry in os.listdir(path_image_pyramid)\n if os.path.isdir(os.path.join(path_image_pyramid, entry)) and entry.isdigit()\n ]\n\n # Convert to integer and find the maximum value\n max_pyramid_zoom = max(map(int, zoom_levels)) if zoom_levels else None\n\n return max_pyramid_zoom\n
image_path : str Path to the JPEG image file tile_size : int (default=512) Tile size for the DeepZoom pyramid overlap : int (default=0) Overlap size for the DeepZoom pyramid suffix : str (default='jpeg') Suffix for the DeepZoom pyramid tiles
def make_deepzoom_pyramid(\n image_path, output_path, pyramid_name, tile_size=512, overlap=0, suffix=\".jpeg\"\n):\n \"\"\"\n Create a DeepZoom image pyramid from a JPEG image\n\n Parameters\n ----------\n image_path : str\n Path to the JPEG image file\n tile_size : int (default=512)\n Tile size for the DeepZoom pyramid\n overlap : int (default=0)\n Overlap size for the DeepZoom pyramid\n suffix : str (default='jpeg')\n Suffix for the DeepZoom pyramid tiles\n\n Returns\n -------\n None\n\n \"\"\"\n\n # Define the output path\n output_path = Path(output_path)\n\n # Load the JPEG image\n image = pyvips.Image.new_from_file(image_path, access=\"sequential\")\n\n # check if the output path exists and create it if it does not\n output_path.mkdir(parents=True, exist_ok=True)\n\n # append the pyramid name to the output path\n output_path = output_path / pyramid_name\n\n # Save the image as a DeepZoom image pyramid\n image.dzsave(output_path, tile_size=tile_size, overlap=overlap, suffix=suffix)\n
technology : str The technology used to generate the data, Xenium and MERSCOPE are supported. path_transformation_matrix : str Path to the transformation matrix file path_meta_cell_micron : str Path to the meta cell file with coordinates in microns path_meta_cell_image : str Path to save the meta cell file with coordinates in pixels
technology : str The technology used to generate the data, Xenium and MERSCOPE are supported. path_cbg : str Path to the cell-by-gene matrix data (the data format can vary based on technology) path_output : str Path to save the meta gene file
def make_meta_gene(technology, path_cbg, path_output):\n \"\"\"\n Create a DataFrame with genes and their assigned colors\n\n Parameters\n ----------\n technology : str\n The technology used to generate the data, Xenium and MERSCOPE are supported.\n path_cbg : str\n Path to the cell-by-gene matrix data (the data format can vary based on technology)\n path_output : str\n Path to save the meta gene file\n\n Returns\n -------\n None\n\n Examples\n --------\n >>> make_meta_gene(\n ... technology='Xenium',\n ... path_cbg='data/',\n ... path_output='data/meta_gene.parquet'\n ... )\n \"\"\"\n\n if technology == \"MERSCOPE\":\n cbg = pd.read_csv(path_cbg, index_col=0)\n genes = cbg.columns.tolist()\n elif technology == \"Xenium\":\n # genes = pd.read_csv(path_cbg + 'features.tsv.gz', sep='\\t', header=None)[1].values.tolist()\n cbg = read_cbg_mtx(path_cbg)\n genes = cbg.columns.tolist()\n\n # Get all categorical color palettes from Matplotlib and flatten them into a single list of colors\n palettes = [plt.get_cmap(name).colors for name in plt.colormaps() if \"tab\" in name]\n flat_colors = [color for palette in palettes for color in palette]\n\n # Convert RGB tuples to hex codes\n flat_colors_hex = [to_hex(color) for color in flat_colors]\n\n # Use modular arithmetic to assign a color to each gene, white for genes with \"Blank\"\n colors = [\n flat_colors_hex[i % len(flat_colors_hex)] if \"Blank\" not in gene else \"#FFFFFF\"\n for i, gene in enumerate(genes)\n ]\n\n # Create a DataFrame with genes and their assigned colors\n ser_color = pd.Series(colors, index=genes)\n\n # calculate gene expression metadata\n meta_gene = calc_meta_gene_data(cbg)\n meta_gene['color'] = ser_color\n\n meta_gene.to_parquet(path_output)\n
def make_trx_tiles(\n technology,\n path_trx,\n path_transformation_matrix,\n path_trx_tiles,\n tile_size=1000,\n chunk_size=1000000,\n verbose=False,\n image_scale = 0.5\n):\n \"\"\" \"\"\"\n\n tile_size_x = tile_size\n tile_size_y = tile_size\n\n transformation_matrix = pd.read_csv(\n path_transformation_matrix, header=None, sep=\" \"\n ).values\n\n if technology == \"MERSCOPE\":\n trx_ini = pd.read_csv(path_trx, usecols=[\"gene\", \"global_x\", \"global_y\"])\n\n trx_ini.columns = [x.replace(\"global_\", \"\") for x in trx_ini.columns.tolist()]\n trx_ini.rename(columns={\"gene\": \"name\"}, inplace=True)\n\n elif technology == \"Xenium\":\n trx_ini = pd.read_parquet(\n path_trx, columns=[\"feature_name\", \"x_location\", \"y_location\"]\n )\n\n # trx_ini['feature_name'] = trx_ini['feature_name'].apply(lambda x: x.decode('utf-8'))\n trx_ini.columns = [x.replace(\"_location\", \"\") for x in trx_ini.columns.tolist()]\n trx_ini.rename(columns={\"feature_name\": \"name\"}, inplace=True)\n\n trx = pd.DataFrame() # Initialize empty DataFrame for results\n\n for start_row in range(0, trx_ini.shape[0], chunk_size):\n # print(start_row/1e6)\n chunk = trx_ini.iloc[start_row : start_row + chunk_size].copy()\n points = np.hstack((chunk[[\"x\", \"y\"]], np.ones((chunk.shape[0], 1))))\n transformed_points =, transformation_matrix.T)[:, :2]\n chunk[[\"x\", \"y\"]] = (\n transformed_points # Update chunk with transformed coordinates\n )\n\n # add this as an argument that can be modified\n chunk[\"x\"] = chunk[\"x\"] * image_scale\n chunk[\"y\"] = chunk[\"y\"] * image_scale\n\n chunk[\"x\"] = chunk[\"x\"].round(2)\n chunk[\"y\"] = chunk[\"y\"].round(2)\n trx = pd.concat([trx, chunk], ignore_index=True)\n\n if not os.path.exists(path_trx_tiles):\n os.mkdir(path_trx_tiles)\n\n x_min = 0\n x_max = trx[\"x\"].max()\n y_min = 0\n y_max = trx[\"y\"].max()\n\n # Calculate the number of tiles needed\n n_tiles_x = int(np.ceil((x_max - x_min) / tile_size_x))\n n_tiles_y = int(np.ceil((y_max - y_min) / tile_size_y))\n\n for i in range(n_tiles_x):\n\n if i % 2 == 0 and verbose:\n print(\"row\", i)\n\n for j in range(n_tiles_y):\n # calculate polygon from these bounds\n tile_x_min = x_min + i * tile_size_x\n tile_x_max = tile_x_min + tile_size_x\n tile_y_min = y_min + j * tile_size_y\n tile_y_max = tile_y_min + tile_size_y\n\n # Filter trx to get only the data within the current tile's bounds\n # We need to make this more efficient\n # option 1: make a GeoDataFrame and filter using sindex and the tile polygon\n # option 2: remove transcripts that have been assigned to a tile from the DataFrame\n tile_trx = trx[\n (trx.x >= tile_x_min)\n & (trx.x < tile_x_max)\n & (trx.y >= tile_y_min)\n & (trx.y < tile_y_max)\n ].copy()\n\n # this actually slows things down - will try to move to Polars later\n # # drop trx that have been assigned to a tile from the original trx DataFrame\n # trx = trx[~trx.index.isin(tile_trx.index)]\n\n # make 'geometry' column\n tile_trx = tile_trx.assign(\n geometry=tile_trx.apply(lambda row: [row[\"x\"], row[\"y\"]], axis=1)\n )\n\n # add some logic to skip tiles where there are no transcripts\n\n # Define the filename based on the tile's coordinates\n filename = f\"{path_trx_tiles}/transcripts_tile_{i}_{j}.parquet\"\n\n # Save the filtered DataFrame to a Parquet file\n if tile_trx.shape[0] > 0:\n tile_trx[[\"name\", \"geometry\"]].to_parquet(filename)\n\n tile_bonds = {\n \"x_min\": x_min,\n \"x_max\": x_max,\n \"y_min\": y_min,\n \"y_max\": y_max,\n }\n\n return tile_bonds\n
cbg : pandas.DataFrame A sparse DataFrame with genes as columns and barcodes as rows
Source code in src/celldega/pre/
def read_cbg_mtx(base_path):\n \"\"\"\n Read the cell-by-gene matrix from the mtx files\n\n Parameters\n ----------\n base_path : str\n The base path to the directory containing the mtx files\n\n Returns\n -------\n cbg : pandas.DataFrame\n A sparse DataFrame with genes as columns and barcodes as rows\n\n \"\"\"\n\n print(\"read mtx file from \", base_path)\n\n # File paths\n barcodes_path = base_path + \"barcodes.tsv.gz\"\n features_path = base_path + \"features.tsv.gz\"\n matrix_path = base_path + \"matrix.mtx.gz\"\n\n # Read barcodes and features\n barcodes = pd.read_csv(barcodes_path, header=None, compression=\"gzip\")\n features = pd.read_csv(features_path, header=None, compression=\"gzip\", sep=\"\\t\")\n\n # Read the gene expression matrix and transpose it\n # Transpose and convert to CSC format for fast column slicing\n matrix = mmread(matrix_path).transpose().tocsc()\n\n # Create a sparse DataFrame with genes as columns and barcodes as rows\n cbg = pd.DataFrame.sparse.from_spmatrix(\n matrix, index=barcodes[0], columns=features[1]\n )\n\n return cbg\n
base_path : str The base path to the parent directory containing the landscape_files directory cbg : pandas.DataFrame A sparse DataFrame with genes as columns and barcodes as rows verbose : bool Whether to print progress information
def save_cbg_gene_parquets(base_path, cbg, verbose=False):\n \"\"\"\n Save the cell-by-gene matrix as gene specific Parquet files\n\n Parameters\n ----------\n base_path : str\n The base path to the parent directory containing the landscape_files directory\n cbg : pandas.DataFrame\n A sparse DataFrame with genes as columns and barcodes as rows\n verbose : bool\n Whether to print progress information\n\n Returns\n -------\n None\n\n \"\"\"\n\n output_dir = base_path + \"cbg/\"\n os.makedirs(output_dir, exist_ok=True)\n\n for index, gene in enumerate(cbg.columns):\n\n if verbose:\n if index % 100 == 0:\n print(index)\n\n # Extract the column as a DataFrame as a copy\n col_df = cbg[[gene]].copy()\n\n col_df = col_df.sparse.to_dense()\n col_df = col_df.astype(int)\n\n # necessary to prevent error in to_parquet\n inst_df = pd.DataFrame(\n col_df.values, columns=[gene], index=col_df.index.tolist()\n )\n\n inst_df.replace(0, pd.NA, inplace=True)\n inst_df.dropna(how=\"all\", inplace=True)\n\n if inst_df.shape[0] > 0:\n inst_df.to_parquet(os.path.join(output_dir, f\"{gene}.parquet\"))\n
A widget for visualizing a 'landscape' view of spatial omics data.
Name Type Description Default ini_xfloat
The initial x-coordinate of the view.
required ini_yfloat
The initial y-coordinate of the view.
required ini_zoomfloat
The initial zoom level of the view.
required bounce_timeint
The time taken for the view to bounce back after panning.
required tokenstr
The token traitlet.
required base_urlstr
The base URL for the widget.
required dataset_namestr
The name of the dataset to visualize. This will show up in the user interface bar.
Name Type Description componentstr
The name of the component.
The technology used.
The base URL for the widget.
The token traitlet.
The initial x-coordinate of the view.
The initial y-coordinate of the view.
The initial z-coordinate of the view.
The initial zoom level of the view.
The name of the dataset to visualize.
The dictionary to trigger updates.
The dictionary containing cell cluster information.
Name Type Description Landscape
A widget for visualizing a 'landscape' view of spatial omics data.
Source code in src/celldega/viz/
class Landscape(anywidget.AnyWidget):\n \"\"\"\n A widget for visualizing a 'landscape' view of spatial omics data.\n\n Args:\n ini_x (float): The initial x-coordinate of the view.\n ini_y (float): The initial y-coordinate of the view.\n ini_zoom (float): The initial zoom level of the view.\n bounce_time (int): The time taken for the view to bounce back after panning.\n token (str): The token traitlet.\n base_url (str): The base URL for the widget.\n dataset_name (str, optional): The name of the dataset to visualize. This will show up in the user interface bar.\n\n Attributes:\n component (str): The name of the component.\n technology (str): The technology used.\n base_url (str): The base URL for the widget.\n token (str): The token traitlet.\n ini_x (float): The initial x-coordinate of the view.\n ini_y (float): The initial y-coordinate of the view.\n ini_z (float): The initial z-coordinate of the view.\n ini_zoom (float): The initial zoom level of the view.\n dataset_name (str): The name of the dataset to visualize.\n update_trigger (dict): The dictionary to trigger updates.\n cell_clusters (dict): The dictionary containing cell cluster information.\n\n Returns:\n Landscape: A widget for visualizing a 'landscape' view of spatial omics data.\n \"\"\"\n\n _esm = pathlib.Path(__file__).parent / \"../static\" / \"widget.js\"\n _css = pathlib.Path(__file__).parent / \"../static\" / \"widget.css\"\n component = traitlets.Unicode(\"Landscape\").tag(sync=True)\n\n technology = traitlets.Unicode(\"sst\").tag(sync=True)\n base_url = traitlets.Unicode(\"\").tag(sync=True)\n token = traitlets.Unicode(\"\").tag(sync=True)\n ini_x = traitlets.Float(1000).tag(sync=True)\n ini_y = traitlets.Float(1000).tag(sync=True)\n ini_z = traitlets.Float(0).tag(sync=True)\n ini_zoom = traitlets.Float(0).tag(sync=True)\n dataset_name = traitlets.Unicode(\"\").tag(sync=True)\n\n update_trigger = traitlets.Dict().tag(sync=True)\n cell_clusters = traitlets.Dict().tag(sync=True)\n\n def trigger_update(self, new_value):\n # This method updates the update_trigger traitlet with a new value\n # You can pass any information necessary for the update, or just a timestamp\n self.update_trigger = new_value\n\n def update_cell_clusters(self, new_clusters):\n # Convert the new_clusters to a JSON serializable format if necessary\n self.cell_clusters = new_clusters\n
"},{"location":"python/pre/api/","title":"Pre Module API Reference","text":""},{"location":"python/pre/api/#landscape-functions","title":"Landscape Functions","text":""},{"location":"python/pre/api/#celldega.pre.landscape.calc_meta_gene_data","title":"calc_meta_gene_data(cbg)","text":"
Calculate gene metadata from the cell-by-gene matrix
cbg : pandas.DataFrame A sparse DataFrame with genes as columns and barcodes as rows
Source code in src/celldega/pre/
def read_cbg_mtx(base_path):\n \"\"\"\n Read the cell-by-gene matrix from the mtx files\n\n Parameters\n ----------\n base_path : str\n The base path to the directory containing the mtx files\n\n Returns\n -------\n cbg : pandas.DataFrame\n A sparse DataFrame with genes as columns and barcodes as rows\n\n \"\"\"\n\n print(\"read mtx file from \", base_path)\n\n # File paths\n barcodes_path = base_path + \"barcodes.tsv.gz\"\n features_path = base_path + \"features.tsv.gz\"\n matrix_path = base_path + \"matrix.mtx.gz\"\n\n # Read barcodes and features\n barcodes = pd.read_csv(barcodes_path, header=None, compression=\"gzip\")\n features = pd.read_csv(features_path, header=None, compression=\"gzip\", sep=\"\\t\")\n\n # Read the gene expression matrix and transpose it\n # Transpose and convert to CSC format for fast column slicing\n matrix = mmread(matrix_path).transpose().tocsc()\n\n # Create a sparse DataFrame with genes as columns and barcodes as rows\n cbg = pd.DataFrame.sparse.from_spmatrix(\n matrix, index=barcodes[0], columns=features[1]\n )\n\n return cbg\n
base_path : str The base path to the parent directory containing the landscape_files directory cbg : pandas.DataFrame A sparse DataFrame with genes as columns and barcodes as rows verbose : bool Whether to print progress information
def save_cbg_gene_parquets(base_path, cbg, verbose=False):\n \"\"\"\n Save the cell-by-gene matrix as gene specific Parquet files\n\n Parameters\n ----------\n base_path : str\n The base path to the parent directory containing the landscape_files directory\n cbg : pandas.DataFrame\n A sparse DataFrame with genes as columns and barcodes as rows\n verbose : bool\n Whether to print progress information\n\n Returns\n -------\n None\n\n \"\"\"\n\n output_dir = base_path + \"cbg/\"\n os.makedirs(output_dir, exist_ok=True)\n\n for index, gene in enumerate(cbg.columns):\n\n if verbose:\n if index % 100 == 0:\n print(index)\n\n # Extract the column as a DataFrame as a copy\n col_df = cbg[[gene]].copy()\n\n col_df = col_df.sparse.to_dense()\n col_df = col_df.astype(int)\n\n # necessary to prevent error in to_parquet\n inst_df = pd.DataFrame(\n col_df.values, columns=[gene], index=col_df.index.tolist()\n )\n\n inst_df.replace(0, pd.NA, inplace=True)\n inst_df.dropna(how=\"all\", inplace=True)\n\n if inst_df.shape[0] > 0:\n inst_df.to_parquet(os.path.join(output_dir, f\"{gene}.parquet\"))\n
landscape: Functions related to landscape operations.
init: Initialization functions and utilities.
"},{"location":"python/viz/api/","title":"Viz Module API Reference","text":""},{"location":"python/viz/api/#widget-classes","title":"Widget Classes","text":""},{"location":"python/viz/api/#celldega.viz.widget.Landscape","title":"Landscape","text":"
Bases: AnyWidget
A widget for visualizing a 'landscape' view of spatial omics data.
Name Type Description Default ini_xfloat
The initial x-coordinate of the view.
required ini_yfloat
The initial y-coordinate of the view.
required ini_zoomfloat
The initial zoom level of the view.
required bounce_timeint
The time taken for the view to bounce back after panning.
required tokenstr
The token traitlet.
required base_urlstr
The base URL for the widget.
required dataset_namestr
The name of the dataset to visualize. This will show up in the user interface bar.
Name Type Description componentstr
The name of the component.
The technology used.
The base URL for the widget.
The token traitlet.
The initial x-coordinate of the view.
The initial y-coordinate of the view.
The initial z-coordinate of the view.
The initial zoom level of the view.
The name of the dataset to visualize.
The dictionary to trigger updates.
The dictionary containing cell cluster information.
Name Type Description Landscape
A widget for visualizing a 'landscape' view of spatial omics data.
Source code in src/celldega/viz/
class Landscape(anywidget.AnyWidget):\n \"\"\"\n A widget for visualizing a 'landscape' view of spatial omics data.\n\n Args:\n ini_x (float): The initial x-coordinate of the view.\n ini_y (float): The initial y-coordinate of the view.\n ini_zoom (float): The initial zoom level of the view.\n bounce_time (int): The time taken for the view to bounce back after panning.\n token (str): The token traitlet.\n base_url (str): The base URL for the widget.\n dataset_name (str, optional): The name of the dataset to visualize. This will show up in the user interface bar.\n\n Attributes:\n component (str): The name of the component.\n technology (str): The technology used.\n base_url (str): The base URL for the widget.\n token (str): The token traitlet.\n ini_x (float): The initial x-coordinate of the view.\n ini_y (float): The initial y-coordinate of the view.\n ini_z (float): The initial z-coordinate of the view.\n ini_zoom (float): The initial zoom level of the view.\n dataset_name (str): The name of the dataset to visualize.\n update_trigger (dict): The dictionary to trigger updates.\n cell_clusters (dict): The dictionary containing cell cluster information.\n\n Returns:\n Landscape: A widget for visualizing a 'landscape' view of spatial omics data.\n \"\"\"\n\n _esm = pathlib.Path(__file__).parent / \"../static\" / \"widget.js\"\n _css = pathlib.Path(__file__).parent / \"../static\" / \"widget.css\"\n component = traitlets.Unicode(\"Landscape\").tag(sync=True)\n\n technology = traitlets.Unicode(\"sst\").tag(sync=True)\n base_url = traitlets.Unicode(\"\").tag(sync=True)\n token = traitlets.Unicode(\"\").tag(sync=True)\n ini_x = traitlets.Float(1000).tag(sync=True)\n ini_y = traitlets.Float(1000).tag(sync=True)\n ini_z = traitlets.Float(0).tag(sync=True)\n ini_zoom = traitlets.Float(0).tag(sync=True)\n dataset_name = traitlets.Unicode(\"\").tag(sync=True)\n\n update_trigger = traitlets.Dict().tag(sync=True)\n cell_clusters = traitlets.Dict().tag(sync=True)\n\n def trigger_update(self, new_value):\n # This method updates the update_trigger traitlet with a new value\n # You can pass any information necessary for the update, or just a timestamp\n self.update_trigger = new_value\n\n def update_cell_clusters(self, new_clusters):\n # Convert the new_clusters to a JSON serializable format if necessary\n self.cell_clusters = new_clusters\n
widget: Widgets for visualizing spatial omics data.
\ No newline at end of file
+{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"Welcome to Celldega's Documentation","text":"
This is the official documentation for the Celldega library.
"},{"location":"gallery_xenium/","title":"Celldega Xenium Gallery","text":""},{"location":"gallery_xenium/#xenium-prime-mouse-brain-coronal-ff","title":"Xenium Prime Mouse Brain Coronal FF","text":""},{"location":"gallery_xenium/#xenium-human-pancreas-ffpe","title":"Xenium Human Pancreas FFPE","text":""},{"location":"gallery_xenium/#bone-marrow","title":"Bone Marrow","text":""},{"location":"gallery_xenium_human_skin/","title":"Xenium Prime Human Skin FFPE outs","text":""},{"location":"gallery_xenium_mouse_brain/","title":"Xenium Prime Mouse Brain Coronal FF","text":""},{"location":"getting_started/","title":"Getting Started","text":"
def convert_to_jpeg(image_path, quality=80):\n \"\"\"\n Convert a TIFF image to a JPEG image with a quality of score\n\n Parameters\n ----------\n image_path : str\n Path to the image file\n quality : int (default=80)\n Quality score for the JPEG image\n\n Returns\n -------\n new_image_path : str\n Path to the JPEG image file\n\n \"\"\"\n\n # Load the TIFF image\n image = pyvips.Image.new_from_file(image_path, access=\"sequential\")\n\n # Save the image as a JPEG with a quality of 80\n new_image_path = image_path.replace(\".tif\", \".jpeg\")\n image.jpegsave(new_image_path, Q=quality)\n\n return new_image_path\n
def convert_to_png(image_path):\n \"\"\"\n Convert a TIFF image to a JPEG image with a quality of score\n\n Parameters\n ----------\n image_path : str\n Path to the image file\n quality : int (default=80)\n Quality score for the JPEG image\n\n Returns\n -------\n new_image_path : str\n Path to the JPEG image file\n\n \"\"\"\n\n # Load the TIFF image\n image = pyvips.Image.new_from_file(image_path, access=\"sequential\")\n\n # Save the image as a JPEG with a quality of 80\n new_image_path = image_path.replace(\".tif\", \".png\")\n image.pngsave(new_image_path)\n\n return new_image_path\n
def convert_to_webp(image_path, quality=100):\n \"\"\"\n Convert a TIFF image to a WEBP image with a specified quality score.\n\n Parameters\n ----------\n image_path : str\n Path to the image file\n quality : int (default=100)\n Quality score for the WEBP image (higher is better quality)\n\n Returns\n -------\n new_image_path : str\n Path to the WEBP image file\n \"\"\"\n # Load the TIFF image\n image = pyvips.Image.new_from_file(image_path, access=\"sequential\")\n\n # Save the image as a WEBP with specified quality\n new_image_path = image_path.replace(\".tif\", \".webp\")\n image.webpsave(new_image_path, Q=quality)\n\n return new_image_path\n
Returns the maximum zoom level based on the highest-numbered directory in the specified path_image_pyramid.
Name Type Description Default path_image_pyramidstr
The path to the directory containing zoom level directories.
Name Type Description max_pyramid_zoomint
The maximum zoom level.
Source code in src/celldega/pre/
def get_max_zoom_level(path_image_pyramid):\n \"\"\"\n Returns the maximum zoom level based on the highest-numbered directory\n in the specified path_image_pyramid.\n\n Parameters:\n path_image_pyramid (str): The path to the directory containing zoom level directories.\n\n Returns:\n max_pyramid_zoom (int): The maximum zoom level.\n \"\"\"\n # List all entries in the path_image_pyramid that are directories and can be converted to integers\n zoom_levels = [\n entry\n for entry in os.listdir(path_image_pyramid)\n if os.path.isdir(os.path.join(path_image_pyramid, entry)) and entry.isdigit()\n ]\n\n # Convert to integer and find the maximum value\n max_pyramid_zoom = max(map(int, zoom_levels)) if zoom_levels else None\n\n return max_pyramid_zoom\n
image_path : str Path to the JPEG image file tile_size : int (default=512) Tile size for the DeepZoom pyramid overlap : int (default=0) Overlap size for the DeepZoom pyramid suffix : str (default='jpeg') Suffix for the DeepZoom pyramid tiles
def make_deepzoom_pyramid(\n image_path, output_path, pyramid_name, tile_size=512, overlap=0, suffix=\".jpeg\"\n):\n \"\"\"\n Create a DeepZoom image pyramid from a JPEG image\n\n Parameters\n ----------\n image_path : str\n Path to the JPEG image file\n tile_size : int (default=512)\n Tile size for the DeepZoom pyramid\n overlap : int (default=0)\n Overlap size for the DeepZoom pyramid\n suffix : str (default='jpeg')\n Suffix for the DeepZoom pyramid tiles\n\n Returns\n -------\n None\n\n \"\"\"\n\n # Define the output path\n output_path = Path(output_path)\n\n # Load the JPEG image\n image = pyvips.Image.new_from_file(image_path, access=\"sequential\")\n\n # check if the output path exists and create it if it does not\n output_path.mkdir(parents=True, exist_ok=True)\n\n # append the pyramid name to the output path\n output_path = output_path / pyramid_name\n\n # Save the image as a DeepZoom image pyramid\n image.dzsave(output_path, tile_size=tile_size, overlap=overlap, suffix=suffix)\n
technology : str The technology used to generate the data, Xenium and MERSCOPE are supported. path_transformation_matrix : str Path to the transformation matrix file path_meta_cell_micron : str Path to the meta cell file with coordinates in microns path_meta_cell_image : str Path to save the meta cell file with coordinates in pixels
technology : str The technology used to generate the data, Xenium and MERSCOPE are supported. path_cbg : str Path to the cell-by-gene matrix data (the data format can vary based on technology) path_output : str Path to save the meta gene file
def make_meta_gene(technology, path_cbg, path_output):\n \"\"\"\n Create a DataFrame with genes and their assigned colors\n\n Parameters\n ----------\n technology : str\n The technology used to generate the data, Xenium and MERSCOPE are supported.\n path_cbg : str\n Path to the cell-by-gene matrix data (the data format can vary based on technology)\n path_output : str\n Path to save the meta gene file\n\n Returns\n -------\n None\n\n Examples\n --------\n >>> make_meta_gene(\n ... technology='Xenium',\n ... path_cbg='data/',\n ... path_output='data/meta_gene.parquet'\n ... )\n \"\"\"\n\n if technology == \"MERSCOPE\":\n cbg = pd.read_csv(path_cbg, index_col=0)\n genes = cbg.columns.tolist()\n elif technology == \"Xenium\":\n # genes = pd.read_csv(path_cbg + 'features.tsv.gz', sep='\\t', header=None)[1].values.tolist()\n cbg = read_cbg_mtx(path_cbg)\n genes = cbg.columns.tolist()\n\n # Get all categorical color palettes from Matplotlib and flatten them into a single list of colors\n palettes = [plt.get_cmap(name).colors for name in plt.colormaps() if \"tab\" in name]\n flat_colors = [color for palette in palettes for color in palette]\n\n # Convert RGB tuples to hex codes\n flat_colors_hex = [to_hex(color) for color in flat_colors]\n\n # Use modular arithmetic to assign a color to each gene, white for genes with \"Blank\"\n colors = [\n flat_colors_hex[i % len(flat_colors_hex)] if \"Blank\" not in gene else \"#FFFFFF\"\n for i, gene in enumerate(genes)\n ]\n\n # Create a DataFrame with genes and their assigned colors\n ser_color = pd.Series(colors, index=genes)\n\n # calculate gene expression metadata\n meta_gene = calc_meta_gene_data(cbg)\n meta_gene['color'] = ser_color\n\n meta_gene.to_parquet(path_output)\n
def make_trx_tiles(\n technology,\n path_trx,\n path_transformation_matrix,\n path_trx_tiles,\n tile_size=1000,\n chunk_size=1000000,\n verbose=False,\n image_scale = 0.5\n):\n \"\"\" \"\"\"\n\n tile_size_x = tile_size\n tile_size_y = tile_size\n\n transformation_matrix = pd.read_csv(\n path_transformation_matrix, header=None, sep=\" \"\n ).values\n\n if technology == \"MERSCOPE\":\n trx_ini = pd.read_csv(path_trx, usecols=[\"gene\", \"global_x\", \"global_y\"])\n\n trx_ini.columns = [x.replace(\"global_\", \"\") for x in trx_ini.columns.tolist()]\n trx_ini.rename(columns={\"gene\": \"name\"}, inplace=True)\n\n elif technology == \"Xenium\":\n trx_ini = pd.read_parquet(\n path_trx, columns=[\"feature_name\", \"x_location\", \"y_location\"]\n )\n\n # trx_ini['feature_name'] = trx_ini['feature_name'].apply(lambda x: x.decode('utf-8'))\n trx_ini.columns = [x.replace(\"_location\", \"\") for x in trx_ini.columns.tolist()]\n trx_ini.rename(columns={\"feature_name\": \"name\"}, inplace=True)\n\n trx = pd.DataFrame() # Initialize empty DataFrame for results\n\n for start_row in range(0, trx_ini.shape[0], chunk_size):\n # print(start_row/1e6)\n chunk = trx_ini.iloc[start_row : start_row + chunk_size].copy()\n points = np.hstack((chunk[[\"x\", \"y\"]], np.ones((chunk.shape[0], 1))))\n transformed_points =, transformation_matrix.T)[:, :2]\n chunk[[\"x\", \"y\"]] = (\n transformed_points # Update chunk with transformed coordinates\n )\n\n # add this as an argument that can be modified\n chunk[\"x\"] = chunk[\"x\"] * image_scale\n chunk[\"y\"] = chunk[\"y\"] * image_scale\n\n chunk[\"x\"] = chunk[\"x\"].round(2)\n chunk[\"y\"] = chunk[\"y\"].round(2)\n trx = pd.concat([trx, chunk], ignore_index=True)\n\n if not os.path.exists(path_trx_tiles):\n os.mkdir(path_trx_tiles)\n\n x_min = 0\n x_max = trx[\"x\"].max()\n y_min = 0\n y_max = trx[\"y\"].max()\n\n # Calculate the number of tiles needed\n n_tiles_x = int(np.ceil((x_max - x_min) / tile_size_x))\n n_tiles_y = int(np.ceil((y_max - y_min) / tile_size_y))\n\n for i in range(n_tiles_x):\n\n if i % 2 == 0 and verbose:\n print(\"row\", i)\n\n for j in range(n_tiles_y):\n # calculate polygon from these bounds\n tile_x_min = x_min + i * tile_size_x\n tile_x_max = tile_x_min + tile_size_x\n tile_y_min = y_min + j * tile_size_y\n tile_y_max = tile_y_min + tile_size_y\n\n # Filter trx to get only the data within the current tile's bounds\n # We need to make this more efficient\n # option 1: make a GeoDataFrame and filter using sindex and the tile polygon\n # option 2: remove transcripts that have been assigned to a tile from the DataFrame\n tile_trx = trx[\n (trx.x >= tile_x_min)\n & (trx.x < tile_x_max)\n & (trx.y >= tile_y_min)\n & (trx.y < tile_y_max)\n ].copy()\n\n # this actually slows things down - will try to move to Polars later\n # # drop trx that have been assigned to a tile from the original trx DataFrame\n # trx = trx[~trx.index.isin(tile_trx.index)]\n\n # make 'geometry' column\n tile_trx = tile_trx.assign(\n geometry=tile_trx.apply(lambda row: [row[\"x\"], row[\"y\"]], axis=1)\n )\n\n # add some logic to skip tiles where there are no transcripts\n\n # Define the filename based on the tile's coordinates\n filename = f\"{path_trx_tiles}/transcripts_tile_{i}_{j}.parquet\"\n\n # Save the filtered DataFrame to a Parquet file\n if tile_trx.shape[0] > 0:\n tile_trx[[\"name\", \"geometry\"]].to_parquet(filename)\n\n tile_bonds = {\n \"x_min\": x_min,\n \"x_max\": x_max,\n \"y_min\": y_min,\n \"y_max\": y_max,\n }\n\n return tile_bonds\n
cbg : pandas.DataFrame A sparse DataFrame with genes as columns and barcodes as rows
Source code in src/celldega/pre/
def read_cbg_mtx(base_path):\n \"\"\"\n Read the cell-by-gene matrix from the mtx files\n\n Parameters\n ----------\n base_path : str\n The base path to the directory containing the mtx files\n\n Returns\n -------\n cbg : pandas.DataFrame\n A sparse DataFrame with genes as columns and barcodes as rows\n\n \"\"\"\n\n print(\"read mtx file from \", base_path)\n\n # File paths\n barcodes_path = base_path + \"barcodes.tsv.gz\"\n features_path = base_path + \"features.tsv.gz\"\n matrix_path = base_path + \"matrix.mtx.gz\"\n\n # Read barcodes and features\n barcodes = pd.read_csv(barcodes_path, header=None, compression=\"gzip\")\n features = pd.read_csv(features_path, header=None, compression=\"gzip\", sep=\"\\t\")\n\n # Read the gene expression matrix and transpose it\n # Transpose and convert to CSC format for fast column slicing\n matrix = mmread(matrix_path).transpose().tocsc()\n\n # Create a sparse DataFrame with genes as columns and barcodes as rows\n cbg = pd.DataFrame.sparse.from_spmatrix(\n matrix, index=barcodes[0], columns=features[1]\n )\n\n return cbg\n
base_path : str The base path to the parent directory containing the landscape_files directory cbg : pandas.DataFrame A sparse DataFrame with genes as columns and barcodes as rows verbose : bool Whether to print progress information
def save_cbg_gene_parquets(base_path, cbg, verbose=False):\n \"\"\"\n Save the cell-by-gene matrix as gene specific Parquet files\n\n Parameters\n ----------\n base_path : str\n The base path to the parent directory containing the landscape_files directory\n cbg : pandas.DataFrame\n A sparse DataFrame with genes as columns and barcodes as rows\n verbose : bool\n Whether to print progress information\n\n Returns\n -------\n None\n\n \"\"\"\n\n output_dir = base_path + \"cbg/\"\n os.makedirs(output_dir, exist_ok=True)\n\n for index, gene in enumerate(cbg.columns):\n\n if verbose:\n if index % 100 == 0:\n print(index)\n\n # Extract the column as a DataFrame as a copy\n col_df = cbg[[gene]].copy()\n\n col_df = col_df.sparse.to_dense()\n col_df = col_df.astype(int)\n\n # necessary to prevent error in to_parquet\n inst_df = pd.DataFrame(\n col_df.values, columns=[gene], index=col_df.index.tolist()\n )\n\n inst_df.replace(0, pd.NA, inplace=True)\n inst_df.dropna(how=\"all\", inplace=True)\n\n if inst_df.shape[0] > 0:\n inst_df.to_parquet(os.path.join(output_dir, f\"{gene}.parquet\"))\n
A widget for visualizing a 'landscape' view of spatial omics data.
Name Type Description Default ini_xfloat
The initial x-coordinate of the view.
required ini_yfloat
The initial y-coordinate of the view.
required ini_zoomfloat
The initial zoom level of the view.
required bounce_timeint
The time taken for the view to bounce back after panning.
required tokenstr
The token traitlet.
required base_urlstr
The base URL for the widget.
required dataset_namestr
The name of the dataset to visualize. This will show up in the user interface bar.
Name Type Description componentstr
The name of the component.
The technology used.
The base URL for the widget.
The token traitlet.
The initial x-coordinate of the view.
The initial y-coordinate of the view.
The initial z-coordinate of the view.
The initial zoom level of the view.
The name of the dataset to visualize.
The dictionary to trigger updates.
The dictionary containing cell cluster information.
Name Type Description Landscape
A widget for visualizing a 'landscape' view of spatial omics data.
Source code in src/celldega/viz/
class Landscape(anywidget.AnyWidget):\n \"\"\"\n A widget for visualizing a 'landscape' view of spatial omics data.\n\n Args:\n ini_x (float): The initial x-coordinate of the view.\n ini_y (float): The initial y-coordinate of the view.\n ini_zoom (float): The initial zoom level of the view.\n bounce_time (int): The time taken for the view to bounce back after panning.\n token (str): The token traitlet.\n base_url (str): The base URL for the widget.\n dataset_name (str, optional): The name of the dataset to visualize. This will show up in the user interface bar.\n\n Attributes:\n component (str): The name of the component.\n technology (str): The technology used.\n base_url (str): The base URL for the widget.\n token (str): The token traitlet.\n ini_x (float): The initial x-coordinate of the view.\n ini_y (float): The initial y-coordinate of the view.\n ini_z (float): The initial z-coordinate of the view.\n ini_zoom (float): The initial zoom level of the view.\n dataset_name (str): The name of the dataset to visualize.\n update_trigger (dict): The dictionary to trigger updates.\n cell_clusters (dict): The dictionary containing cell cluster information.\n\n Returns:\n Landscape: A widget for visualizing a 'landscape' view of spatial omics data.\n \"\"\"\n\n _esm = pathlib.Path(__file__).parent / \"../static\" / \"widget.js\"\n _css = pathlib.Path(__file__).parent / \"../static\" / \"widget.css\"\n component = traitlets.Unicode(\"Landscape\").tag(sync=True)\n\n technology = traitlets.Unicode(\"sst\").tag(sync=True)\n base_url = traitlets.Unicode(\"\").tag(sync=True)\n token = traitlets.Unicode(\"\").tag(sync=True)\n ini_x = traitlets.Float(1000).tag(sync=True)\n ini_y = traitlets.Float(1000).tag(sync=True)\n ini_z = traitlets.Float(0).tag(sync=True)\n ini_zoom = traitlets.Float(0).tag(sync=True)\n dataset_name = traitlets.Unicode(\"\").tag(sync=True)\n\n update_trigger = traitlets.Dict().tag(sync=True)\n cell_clusters = traitlets.Dict().tag(sync=True)\n\n def trigger_update(self, new_value):\n # This method updates the update_trigger traitlet with a new value\n # You can pass any information necessary for the update, or just a timestamp\n self.update_trigger = new_value\n\n def update_cell_clusters(self, new_clusters):\n # Convert the new_clusters to a JSON serializable format if necessary\n self.cell_clusters = new_clusters\n
"},{"location":"python/pre/api/","title":"Pre Module API Reference","text":""},{"location":"python/pre/api/#landscape-functions","title":"Landscape Functions","text":""},{"location":"python/pre/api/#celldega.pre.landscape.calc_meta_gene_data","title":"calc_meta_gene_data(cbg)","text":"
Calculate gene metadata from the cell-by-gene matrix
cbg : pandas.DataFrame A sparse DataFrame with genes as columns and barcodes as rows
Source code in src/celldega/pre/
def read_cbg_mtx(base_path):\n \"\"\"\n Read the cell-by-gene matrix from the mtx files\n\n Parameters\n ----------\n base_path : str\n The base path to the directory containing the mtx files\n\n Returns\n -------\n cbg : pandas.DataFrame\n A sparse DataFrame with genes as columns and barcodes as rows\n\n \"\"\"\n\n print(\"read mtx file from \", base_path)\n\n # File paths\n barcodes_path = base_path + \"barcodes.tsv.gz\"\n features_path = base_path + \"features.tsv.gz\"\n matrix_path = base_path + \"matrix.mtx.gz\"\n\n # Read barcodes and features\n barcodes = pd.read_csv(barcodes_path, header=None, compression=\"gzip\")\n features = pd.read_csv(features_path, header=None, compression=\"gzip\", sep=\"\\t\")\n\n # Read the gene expression matrix and transpose it\n # Transpose and convert to CSC format for fast column slicing\n matrix = mmread(matrix_path).transpose().tocsc()\n\n # Create a sparse DataFrame with genes as columns and barcodes as rows\n cbg = pd.DataFrame.sparse.from_spmatrix(\n matrix, index=barcodes[0], columns=features[1]\n )\n\n return cbg\n
base_path : str The base path to the parent directory containing the landscape_files directory cbg : pandas.DataFrame A sparse DataFrame with genes as columns and barcodes as rows verbose : bool Whether to print progress information
def save_cbg_gene_parquets(base_path, cbg, verbose=False):\n \"\"\"\n Save the cell-by-gene matrix as gene specific Parquet files\n\n Parameters\n ----------\n base_path : str\n The base path to the parent directory containing the landscape_files directory\n cbg : pandas.DataFrame\n A sparse DataFrame with genes as columns and barcodes as rows\n verbose : bool\n Whether to print progress information\n\n Returns\n -------\n None\n\n \"\"\"\n\n output_dir = base_path + \"cbg/\"\n os.makedirs(output_dir, exist_ok=True)\n\n for index, gene in enumerate(cbg.columns):\n\n if verbose:\n if index % 100 == 0:\n print(index)\n\n # Extract the column as a DataFrame as a copy\n col_df = cbg[[gene]].copy()\n\n col_df = col_df.sparse.to_dense()\n col_df = col_df.astype(int)\n\n # necessary to prevent error in to_parquet\n inst_df = pd.DataFrame(\n col_df.values, columns=[gene], index=col_df.index.tolist()\n )\n\n inst_df.replace(0, pd.NA, inplace=True)\n inst_df.dropna(how=\"all\", inplace=True)\n\n if inst_df.shape[0] > 0:\n inst_df.to_parquet(os.path.join(output_dir, f\"{gene}.parquet\"))\n
landscape: Functions related to landscape operations.
init: Initialization functions and utilities.
"},{"location":"python/viz/api/","title":"Viz Module API Reference","text":""},{"location":"python/viz/api/#widget-classes","title":"Widget Classes","text":""},{"location":"python/viz/api/#celldega.viz.widget.Landscape","title":"Landscape","text":"
Bases: AnyWidget
A widget for visualizing a 'landscape' view of spatial omics data.
Name Type Description Default ini_xfloat
The initial x-coordinate of the view.
required ini_yfloat
The initial y-coordinate of the view.
required ini_zoomfloat
The initial zoom level of the view.
required bounce_timeint
The time taken for the view to bounce back after panning.
required tokenstr
The token traitlet.
required base_urlstr
The base URL for the widget.
required dataset_namestr
The name of the dataset to visualize. This will show up in the user interface bar.
Name Type Description componentstr
The name of the component.
The technology used.
The base URL for the widget.
The token traitlet.
The initial x-coordinate of the view.
The initial y-coordinate of the view.
The initial z-coordinate of the view.
The initial zoom level of the view.
The name of the dataset to visualize.
The dictionary to trigger updates.
The dictionary containing cell cluster information.
Name Type Description Landscape
A widget for visualizing a 'landscape' view of spatial omics data.
Source code in src/celldega/viz/
class Landscape(anywidget.AnyWidget):\n \"\"\"\n A widget for visualizing a 'landscape' view of spatial omics data.\n\n Args:\n ini_x (float): The initial x-coordinate of the view.\n ini_y (float): The initial y-coordinate of the view.\n ini_zoom (float): The initial zoom level of the view.\n bounce_time (int): The time taken for the view to bounce back after panning.\n token (str): The token traitlet.\n base_url (str): The base URL for the widget.\n dataset_name (str, optional): The name of the dataset to visualize. This will show up in the user interface bar.\n\n Attributes:\n component (str): The name of the component.\n technology (str): The technology used.\n base_url (str): The base URL for the widget.\n token (str): The token traitlet.\n ini_x (float): The initial x-coordinate of the view.\n ini_y (float): The initial y-coordinate of the view.\n ini_z (float): The initial z-coordinate of the view.\n ini_zoom (float): The initial zoom level of the view.\n dataset_name (str): The name of the dataset to visualize.\n update_trigger (dict): The dictionary to trigger updates.\n cell_clusters (dict): The dictionary containing cell cluster information.\n\n Returns:\n Landscape: A widget for visualizing a 'landscape' view of spatial omics data.\n \"\"\"\n\n _esm = pathlib.Path(__file__).parent / \"../static\" / \"widget.js\"\n _css = pathlib.Path(__file__).parent / \"../static\" / \"widget.css\"\n component = traitlets.Unicode(\"Landscape\").tag(sync=True)\n\n technology = traitlets.Unicode(\"sst\").tag(sync=True)\n base_url = traitlets.Unicode(\"\").tag(sync=True)\n token = traitlets.Unicode(\"\").tag(sync=True)\n ini_x = traitlets.Float(1000).tag(sync=True)\n ini_y = traitlets.Float(1000).tag(sync=True)\n ini_z = traitlets.Float(0).tag(sync=True)\n ini_zoom = traitlets.Float(0).tag(sync=True)\n dataset_name = traitlets.Unicode(\"\").tag(sync=True)\n\n update_trigger = traitlets.Dict().tag(sync=True)\n cell_clusters = traitlets.Dict().tag(sync=True)\n\n def trigger_update(self, new_value):\n # This method updates the update_trigger traitlet with a new value\n # You can pass any information necessary for the update, or just a timestamp\n self.update_trigger = new_value\n\n def update_cell_clusters(self, new_clusters):\n # Convert the new_clusters to a JSON serializable format if necessary\n self.cell_clusters = new_clusters\n