Source code for clabtoolkit.morphometrytools

import os
from typing import Union, Tuple, Optional, Dict, List
import copy
import json
import warnings
import tempfile

import pandas as pd
import nibabel as nib
from nibabel.processing import resample_from_to
import numpy as np
from pathlib import Path

# Importing local modules
from . import misctools as cltmisc
from . import surfacetools as cltsurf
from . import parcellationtools as cltparc
from . import bidstools as cltbids
from . import freesurfertools as cltfree
from . import colorstools as cltcol


####################################################################################################
####################################################################################################
############                                                                            ############
############                                                                            ############
############      Section 1: Methods dedicated to compute metrics from surfaces         ############
############                                                                            ############
############                                                                            ############
####################################################################################################
####################################################################################################

[docs]
def compute_reg_val_fromannot(
    metric_file: Union[str, np.ndarray],
    parc_file: Union[str, cltfree.AnnotParcellation],
    hemi: str,
    output_table: str = None,
    nonzeros_only: bool = False,
    metric: str = "unknown",
    units: str = None,
    stats_list: Union[str, list] = ["value", "median", "std", "min", "max"],
    table_type: str = "metric",
    include_unknown: bool = False,
    include_global: bool = True,
    add_bids_entities: bool = True,
) -> Tuple[pd.DataFrame, np.ndarray, Optional[str]]:
    """
    Compute regional statistics from a surface metric file and an annotation file.

    This function extracts regional values by combining vertex-wise surface metrics with
    anatomical parcellation data. It supports various statistical measures and output formats.

    Parameters
    ----------
    metric_file : str or np.ndarray
        Path to the surface map file or array containing metric values for each vertex.

    parc_file : str or cltfree.AnnotParcellation
        Path to the annotation file or AnnotParcellation object defining regions.

    hemi : str
        Hemisphere identifier ('lh' or 'rh').

    output_table : str, optional
        Path to save the resulting table. If None, the table is not saved.

    nonzeros_only : bool, default=False
        Whether to compute statistics using only non-zero values within each region. If False, all values are used.

    metric : str, default="unknown"
        Name of the metric being analyzed. Used for naming columns in the output DataFrame.

    units : str, optional
        Units of the metric. If None, units are determined from the metric name.

    stats_list : str or list, default=["value", "median", "std", "min", "max"]
        Statistics to compute for each region. Note: "value" is equivalent to the mean.

    table_type : str, default="metric"
        Output format specification:
        - "metric": Each column represents a specific statistic for each region
        - "region": Each column represents a region, with rows for different statistics

    include_unknown : bool, default=False
        Whether to include non-anatomical regions (medialwall, unknown, corpuscallosum).

    include_global : bool, default=True
        Whether to include hemisphere-wide statistics in the output.

    add_bids_entities : bool, default=True
        Whether to include BIDS entities as columns in the resulting DataFrame.

    Returns
    -------
    df : pd.DataFrame
        DataFrame containing the computed regional statistics.

    metric_vect : np.ndarray
        Array of metric values.

    output_path : str or None
        Path where the table was saved, or None if no table was saved.

    Examples
    --------
    Basic usage with default parameters:

    >>> import os
    >>> import clabtoolkit.morphometrytools as morpho
    >>> hemi = 'lh'
    >>> metric_name = 'thickness'
    >>> fs_dir = os.environ.get('FREESURFER_HOME')
    >>> metric_file = os.path.join(fs_dir, 'subjects', 'bert', 'surf', f'{hemi}.{metric_name}')
    >>> parc_file = os.path.join(fs_dir, 'subjects', 'bert', 'label', f'{hemi}.aparc.annot')
    >>> df_region, metric_values, _ = morpho.compute_reg_val_fromannot(
    ...     metric_file, parc_file, hemi, metric=metric_name, include_global=False
    ... )

    Using region format for output:

    >>> df_metric, _, _ = morpho.compute_reg_val_fromannot(
    ...     metric_file, parc_file, hemi, metric=metric_name,
    ...     include_global=False, table_type="region", add_bids_entities=True
    ... )

    Including hemisphere-wide statistics and saving to file:

    >>> output_path = '/path/to/output/regional_stats.csv'
    >>> df_global, _, saved_path = morpho.compute_reg_val_fromannot(
    ...     metric_file, parc_file, hemi, output_table=output_path,
    ...     metric=metric_name, include_global=True
    ... )
    >>> print(df_global.head())
    """

    # Input validation
    if isinstance(stats_list, str):
        stats_list = [stats_list]

    stats_list = [stat.lower() for stat in stats_list]

    if table_type not in ["region", "metric"]:
        raise ValueError(
            f"Invalid table_type: '{table_type}'. Expected 'region' or 'metric'."
        )

    # Process parcellation file
    if isinstance(parc_file, str):
        if not os.path.exists(parc_file):
            raise FileNotFoundError(f"Annotation file not found: {parc_file}")

        sparc_data = cltfree.AnnotParcellation()
        sparc_data.load_from_file(parc_file=parc_file)

    elif isinstance(parc_file, cltfree.AnnotParcellation):
        sparc_data = copy.deepcopy(parc_file)
    else:
        raise TypeError(
            f"parc_file must be a string or AnnotParcellation object, got {type(parc_file)}"
        )

    # Process metric file
    filename = ""
    if isinstance(metric_file, str):
        if not os.path.exists(metric_file):
            raise FileNotFoundError(f"Metric file not found: {metric_file}")

        metric_vect = nib.freesurfer.io.read_morph_data(metric_file)
        filename = metric_file
    elif isinstance(metric_file, np.ndarray):
        metric_vect = metric_file
    else:
        raise TypeError(
            f"metric_file must be a string or numpy array, got {type(metric_file)}"
        )

    # Filter unknown regions if needed
    if not include_unknown:
        tmp_names = sparc_data.regnames
        unk_indexes = cltmisc.get_indexes_by_substring(
            tmp_names, ["medialwall", "unknown", "corpuscallosum"]
        )

        if len(unk_indexes) > 0:
            unk_codes = sparc_data.regtable[unk_indexes, 4]
            unk_vert = np.isin(sparc_data.codes, unk_codes)

            sparc_data.codes[unk_vert] = 0
            sparc_data.regnames = np.delete(sparc_data.regnames, unk_indexes).tolist()
            sparc_data.regtable = np.delete(sparc_data.regtable, unk_indexes, axis=0)

    # Clean up codes that don't exist in the region table
    unique_codes = np.unique(sparc_data.codes)
    not_in_table = np.setdiff1d(unique_codes, sparc_data.regtable[:, 4])
    sparc_data.codes[np.isin(sparc_data.codes, not_in_table)] = 0

    # Get unique valid region codes
    sts = np.unique(sparc_data.codes)
    sts = sts[sts != 0]

    # Prepare data structures for results
    dict_of_cols = {}

    # Compute global hemisphere statistics if requested
    if include_global:
        valid_vertices = np.isin(sparc_data.codes, sparc_data.regtable[:, 4])
        global_stats = stats_from_vector(
            metric_vect[valid_vertices], stats_list, nonzeros_only=nonzeros_only
        )
        dict_of_cols[f"ctx-{hemi}-hemisphere"] = global_stats

    # Compute statistics for each region
    for regname in sparc_data.regnames:
        index = cltmisc.get_indexes_by_substring(
            sparc_data.regnames, regname, match_entire_word=True
        )

        if len(index):
            region_mask = sparc_data.codes == sparc_data.regtable[index, 4]
            region_stats = stats_from_vector(
                metric_vect[region_mask], stats_list, nonzeros_only=nonzeros_only
            )
            dict_of_cols[regname] = region_stats
        else:
            dict_of_cols[regname] = [0] * len(stats_list)

    # Create DataFrame
    df = pd.DataFrame.from_dict(dict_of_cols)

    # Add column prefixes
    colnames = df.columns.tolist()
    colnames = cltmisc.correct_names(colnames, prefix=f"ctx-{hemi}-")
    df.columns = colnames

    # Format table according to specified type
    if table_type == "region":
        # Create region-oriented table
        df.index = [stat_name.title() for stat_name in stats_list]
        df = df.reset_index()
        df = df.rename(columns={"index": "Statistics"})
    else:
        # Create metric-oriented table
        df = df.T
        df.columns = [stat_name.title() for stat_name in stats_list]
        df = df.reset_index()
        df = df.rename(columns={"index": "Region"})

        # Split region names into components
        reg_names = df["Region"].str.split("-", expand=True)
        df.insert(0, "Supraregion", reg_names[0])
        df.insert(1, "Hemisphere", reg_names[1])

    # Add metadata columns
    nrows = df.shape[0]

    if units is None:
        units = get_units(metric)[0]

    df.insert(0, "Source", ["vertices"] * nrows)
    df.insert(1, "Metric", [metric] * nrows)
    df.insert(2, "Units", [units] * nrows)
    df.insert(3, "MetricFile", [filename] * nrows)

    # Add BIDS entities if requested
    if add_bids_entities and isinstance(metric_file, str):
        ent_list = cltbids.entities4table()
        df_add = cltbids.entities_to_table(
            filepath=metric_file, entities_to_extract=ent_list
        )
        df = cltmisc.expand_and_concatenate(df_add, df)

    # Save table if requested
    if output_table is not None:
        output_dir = os.path.dirname(output_table)
        if output_dir and not os.path.exists(output_dir):
            raise FileNotFoundError(
                f"Directory does not exist: {output_dir}. Please create the directory before saving."
            )

        df.to_csv(output_table, sep="\t", index=False)

    return df, metric_vect, output_table



####################################################################################################

[docs]
def compute_reg_area_fromsurf(
    surf_file: Union[str, cltsurf.Surface],
    parc_file: Union[str, cltfree.AnnotParcellation],
    hemi: str,
    table_type: str = "metric",
    surf_type: str = "",
    include_unknown: bool = False,
    include_global: bool = True,
    add_bids_entities: bool = True,
    output_table: str = None,
) -> Tuple[pd.DataFrame, Optional[str]]:
    """
    Compute surface area for each region defined in an annotation file.

    This function calculates the area for anatomical regions by combining
    surface mesh data with parcellation information. It supports different
    output formats and can include global hemisphere measurements.

    Parameters
    ----------
    surf_file : str or cltsurf.Surface
        Path to the surface file or Surface object containing mesh data.
    parc_file : str or cltfree.AnnotParcellation
        Path to the annotation file or AnnotParcellation object defining regions.
    hemi : str
        Hemisphere identifier ('lh' or 'rh').
    table_type : str, default="metric"
        Output format specification:
        - "metric": Each row represents a region with area value in a column
        - "region": Each column represents a region with area values in rows
    surf_type : str, default=""
        Description of the surface type (e.g., "white", "pial"). Used for metadata.
    include_unknown : bool, default=False
        Whether to include non-anatomical regions (medialwall, unknown, corpuscallosum).
    include_global : bool, default=True
        Whether to include hemisphere-wide area calculations in the output.
    add_bids_entities : bool, default=True
        Whether to include BIDS entities as columns in the resulting DataFrame.
    output_table : str, optional
        Path to save the resulting table. If None, the table is not saved.

    Returns
    -------
    df : pd.DataFrame
        DataFrame containing the computed regional area values.
    output_path : str or None
        Path where the table was saved, or None if no table was saved.

    Examples
    --------
    Basic usage with default parameters:

    >>> import os
    >>> import clabtoolkit.morphometrytools as morpho
    >>> fs_dir = os.environ.get('FREESURFER_HOME')
    >>> surf_file = os.path.join(fs_dir, 'subjects', 'fsaverage', 'surf', 'lh.white')
    >>> parc_file = os.path.join(fs_dir, 'subjects', 'fsaverage', 'label', 'lh.aparc.annot')
    >>> df_area, _ = morpho.compute_reg_area_fromsurf(surf_file, parc_file, 'lh', surf_type="white")
    >>> print(df_area.head())

    Using region format for output:

    >>> df_region, _ = morpho.compute_reg_area_fromsurf(
    ...     surf_file, parc_file, 'lh',
    ...     table_type="region", surf_type="white", include_global=False
    ... )
    >>> print(df_region.head())

    Using Surface and AnnotParcellation objects:

    >>> import clabtoolkit.surfacetools as cltsurf
    >>> import clabtoolkit.freesurfertools as cltfree
    >>> surf = cltsurf.Surface(surface_file=surf_file)
    >>> annot = cltfree.AnnotParcellation(parc_file=parc_file)
    >>> df_obj, _ = morpho.compute_reg_area_fromsurf(surf, annot, 'lh', surf_type="white")
    >>> print(df_obj.head())

    Saving results to a file:

    >>> output_path = '/path/to/area_stats.tsv'
    >>> df_out, saved_path = morpho.compute_reg_area_fromsurf(
    ...     surf_file, parc_file, 'lh', output_table=output_path, surf_type="white"
    ... )
    >>> print(f"Table saved to: {saved_path}")
    """
    # Input validation
    if table_type not in ["region", "metric"]:
        raise ValueError(
            f"Invalid table_type: '{table_type}'. Expected 'region' or 'metric'."
        )

    # Process parcellation file
    if isinstance(parc_file, str):
        if not os.path.exists(parc_file):
            raise FileNotFoundError(f"Annotation file not found: {parc_file}")

        sparc_data = cltfree.AnnotParcellation()
        sparc_data.load_from_file(parc_file=parc_file)

    elif isinstance(parc_file, cltfree.AnnotParcellation):
        sparc_data = copy.deepcopy(parc_file)
    else:
        raise TypeError(
            f"parc_file must be a string or AnnotParcellation object, got {type(parc_file)}"
        )

    # Process surface file
    filename = ""
    if isinstance(surf_file, str):
        if not os.path.exists(surf_file):
            raise FileNotFoundError(f"Surface file not found: {surf_file}")

        surf = cltsurf.Surface(surface_file=surf_file)
        filename = surf_file
    elif isinstance(surf_file, cltsurf.Surface):
        surf = copy.deepcopy(surf_file)
    else:
        raise TypeError(
            f"surf_file must be a string or Surface object, got {type(surf_file)}"
        )

    # Extract mesh data
    coords = surf.mesh.points
    faces = surf.get_faces()

    # Filter unknown regions if needed
    if not include_unknown:
        tmp_names = sparc_data.regnames
        unk_indexes = cltmisc.get_indexes_by_substring(
            tmp_names, ["medialwall", "unknown", "corpuscallosum"]
        )

        if len(unk_indexes) > 0:
            unk_codes = sparc_data.regtable[unk_indexes, 4]
            unk_vert = np.isin(sparc_data.codes, unk_codes)

            sparc_data.codes[unk_vert] = 0
            sparc_data.regnames = np.delete(sparc_data.regnames, unk_indexes).tolist()
            sparc_data.regtable = np.delete(sparc_data.regtable, unk_indexes, axis=0)

    # Clean up codes that don't exist in the region table
    unique_codes = np.unique(sparc_data.codes)
    not_in_table = np.setdiff1d(unique_codes, sparc_data.regtable[:, 4])
    sparc_data.codes[np.isin(sparc_data.codes, not_in_table)] = 0

    # Calculate area for each region
    dict_of_cols = {}

    for regname in sparc_data.regnames:
        # Get the index of the region in the color table
        index = cltmisc.get_indexes_by_substring(
            sparc_data.regnames, regname, match_entire_word=True
        )

        if len(index):
            # Find vertices belonging to this region
            ind = np.where(sparc_data.codes == sparc_data.regtable[index, 4])

            # Identify faces with different numbers of vertices in this region
            temp = np.isin(faces, ind).astype(int)
            nps = np.sum(temp, axis=1)

            # Group faces by how many vertices belong to the region
            reg_faces_3v = np.squeeze(
                faces[np.where(nps == 3), :]
            )  # All vertices in region
            reg_faces_2v = np.squeeze(
                faces[np.where(nps == 2), :]
            )  # Two vertices in region
            reg_faces_1v = np.squeeze(
                faces[np.where(nps == 1), :]
            )  # One vertex in region

            # Calculate area for each group
            temp_3v, _ = area_from_mesh(coords, reg_faces_3v)
            temp_2v, _ = area_from_mesh(coords, reg_faces_2v)
            temp_1v, _ = area_from_mesh(coords, reg_faces_1v)

            # Sum areas
            dict_of_cols[regname] = [temp_3v + temp_2v + temp_1v]
        else:
            dict_of_cols[regname] = [0]

    # Create DataFrame
    df = pd.DataFrame.from_dict(dict_of_cols)

    # Add global area if requested
    if include_global:
        df.insert(0, f"ctx-{hemi}-hemisphere", df.sum(axis=1))

    # Add column prefixes
    colnames = df.columns.tolist()
    colnames = cltmisc.correct_names(colnames, prefix=f"ctx-{hemi}-")
    df.columns = colnames

    # Format table according to specified type
    if table_type == "region":
        # Create region-oriented table
        df.index = ["Value"]
        df = df.reset_index()
        df = df.rename(columns={"index": "Statistics"})
    else:
        # Create metric-oriented table
        df = df.T
        df.columns = ["Value"]
        df = df.reset_index()
        df = df.rename(columns={"index": "Region"})

        # Split region names into components
        reg_names = df["Region"].str.split("-", expand=True)
        df.insert(0, "Supraregion", reg_names[0])
        df.insert(1, "Hemisphere", reg_names[1])

    # Add metadata columns
    nrows = df.shape[0]
    units = get_units("area")[0]

    df.insert(0, "Source", [surf_type] * nrows)
    df.insert(1, "Metric", ["area"] * nrows)
    df.insert(2, "Units", [units] * nrows)
    df.insert(3, "MetricFile", [filename] * nrows)

    # Add BIDS entities if requested
    if add_bids_entities and isinstance(parc_file, str):
        ent_list = cltbids.entities4table()
        df_add = cltbids.entities_to_table(
            filepath=parc_file, entities_to_extract=ent_list
        )
        df = cltmisc.expand_and_concatenate(df_add, df)

    # Cleaning the dataframe to remove columns with all missing values if include_missing is False
    df = cltmisc.drop_empty_columns(df)

    # Save table if requested
    if output_table is not None:
        output_dir = os.path.dirname(output_table)
        if output_dir and not os.path.exists(output_dir):
            raise FileNotFoundError(
                f"Directory does not exist: {output_dir}. Please create the directory before saving."
            )

        df.to_csv(output_table, sep="\t", index=False)

    return df, output_table



####################################################################################################

[docs]
def compute_reg_nvertices_fromsurf(
    surf_file: Union[str, cltsurf.Surface],
    parc_file: Union[str, cltfree.AnnotParcellation],
    hemi: str,
    table_type: str = "metric",
    surf_type: str = "",
    include_unknown: bool = False,
    include_global: bool = True,
    add_bids_entities: bool = True,
    output_table: str = None,
) -> Tuple[pd.DataFrame, Optional[str]]:
    """
    Compute the number of vertices for each region defined in an annotation file.

    This function calculates the number of vertices for anatomical regions by combining
    surface mesh data with parcellation information. It supports different output formats
    and can include global hemisphere measurements.

    Parameters
    ----------
    surf_file : str or cltsurf.Surface
        Path to the surface file or Surface object containing mesh data.

    parc_file : str or cltfree.AnnotParcellation
        Path to the annotation file or AnnotParcellation object defining regions.

    hemi : str
        Hemisphere identifier ('lh' or 'rh').

    table_type : str, default="metric"
        Output format specification:
        - "metric": Each row represents a region with area value in a column
        - "region": Each column represents a region with area values in rows

    surf_type : str, default=""
        Description of the surface type (e.g., "white", "pial"). Used for metadata.

    include_unknown : bool, default=False
        Whether to include non-anatomical regions (medialwall, unknown, corpuscallosum).

    include_global : bool, default=True
        Whether to include hemisphere-wide area calculations in the output.

    add_bids_entities : bool, default=True
        Whether to include BIDS entities as columns in the resulting DataFrame.

    output_table : str, optional
        Path to save the resulting table. If None, the table is not saved.

    Returns
    -------
    df : pd.DataFrame
        DataFrame containing the computed regional area values.
    output_path : str or None
        Path where the table was saved, or None if no table was saved.

    Examples
    --------
    Basic usage with default parameters:

    >>> import os
    >>> import clabtoolkit.morphometrytools as morpho
    >>> fs_dir = os.environ.get('FREESURFER_HOME')
    >>> surf_file = os.path.join(fs_dir, 'subjects', 'fsaverage', 'surf', 'lh.white')
    >>> parc_file = os.path.join(fs_dir, 'subjects', 'fsaverage', 'label', 'lh.aparc.annot')
    >>> df_area, _ = morpho.compute_reg_area_fromsurf(surf_file, parc_file, 'lh', surf_type="white")
    >>> print(df_area.head())

    Using region format for output:

    >>> df_region, _ = morpho.compute_reg_area_fromsurf(
    ...     surf_file, parc_file, 'lh',
    ...     table_type="region", surf_type="white", include_global=False
    ... )
    >>> print(df_region.head())

    Using Surface and AnnotParcellation objects:

    >>> import clabtoolkit.surfacetools as cltsurf
    >>> import clabtoolkit.freesurfertools as cltfree
    >>> surf = cltsurf.Surface(surface_file=surf_file)
    >>> annot = cltfree.AnnotParcellation(parc_file=parc_file)
    >>> df_obj, _ = morpho.compute_reg_area_fromsurf(surf, annot, 'lh', surf_type="white")
    >>> print(df_obj.head())

    Saving results to a file:

    >>> output_path = '/path/to/area_stats.tsv'
    >>> df_out, saved_path = morpho.compute_reg_area_fromsurf(
    ...     surf_file, parc_file, 'lh', output_table=output_path, surf_type="white"
    ... )
    >>> print(f"Table saved to: {saved_path}")
    """
    # Input validation
    if table_type not in ["region", "metric"]:
        raise ValueError(
            f"Invalid table_type: '{table_type}'. Expected 'region' or 'metric'."
        )

    # Process parcellation file
    if isinstance(parc_file, str):
        if not os.path.exists(parc_file):
            raise FileNotFoundError(f"Annotation file not found: {parc_file}")

        sparc_data = cltfree.AnnotParcellation()
        sparc_data.load_from_file(parc_file=parc_file)

    elif isinstance(parc_file, cltfree.AnnotParcellation):
        sparc_data = copy.deepcopy(parc_file)
    else:
        raise TypeError(
            f"parc_file must be a string or AnnotParcellation object, got {type(parc_file)}"
        )

    # Process surface file
    filename = ""
    if isinstance(surf_file, str):
        if not os.path.exists(surf_file):
            raise FileNotFoundError(f"Surface file not found: {surf_file}")

        surf = cltsurf.Surface(surface_file=surf_file)
        filename = surf_file
    elif isinstance(surf_file, cltsurf.Surface):
        surf = copy.deepcopy(surf_file)
    else:
        raise TypeError(
            f"surf_file must be a string or Surface object, got {type(surf_file)}"
        )

    # Filter unknown regions if needed
    if not include_unknown:
        tmp_names = sparc_data.regnames
        unk_indexes = cltmisc.get_indexes_by_substring(
            tmp_names, ["medialwall", "unknown", "corpuscallosum"]
        )

        if len(unk_indexes) > 0:
            unk_codes = sparc_data.regtable[unk_indexes, 4]
            unk_vert = np.isin(sparc_data.codes, unk_codes)

            sparc_data.codes[unk_vert] = 0
            sparc_data.regnames = np.delete(sparc_data.regnames, unk_indexes).tolist()
            sparc_data.regtable = np.delete(sparc_data.regtable, unk_indexes, axis=0)

    # Clean up codes that don't exist in the region table
    unique_codes = np.unique(sparc_data.codes)
    not_in_table = np.setdiff1d(unique_codes, sparc_data.regtable[:, 4])
    sparc_data.codes[np.isin(sparc_data.codes, not_in_table)] = 0

    # Calculate area for each region
    dict_of_cols = {}

    for regname in sparc_data.regnames:
        # Get the index of the region in the color table

        index = cltmisc.get_indexes_by_substring(
            sparc_data.regnames, regname, match_entire_word=True
        )

        if len(index):
            # Find vertices belonging to this region
            ind = np.where(sparc_data.codes == sparc_data.regtable[index, 4])
            nvert = len(ind[0])

            # Sum areas
            dict_of_cols[regname] = [nvert]
        else:
            dict_of_cols[regname] = [0]

    # Create DataFrame
    df = pd.DataFrame.from_dict(dict_of_cols)

    # Add global area if requested
    if include_global:
        df.insert(0, f"ctx-{hemi}-hemisphere", df.sum(axis=1))

    # Add column prefixes
    colnames = df.columns.tolist()
    colnames = cltmisc.correct_names(colnames, prefix=f"ctx-{hemi}-")
    df.columns = colnames

    # Format table according to specified type
    if table_type == "region":
        # Create region-oriented table
        df.index = ["Value"]
        df = df.reset_index()
        df = df.rename(columns={"index": "Statistics"})
    else:
        # Create metric-oriented table
        df = df.T
        df.columns = ["Value"]
        df = df.reset_index()
        df = df.rename(columns={"index": "Region"})

        # Split region names into components
        reg_names = df["Region"].str.split("-", expand=True)
        df.insert(0, "Supraregion", reg_names[0])
        df.insert(1, "Hemisphere", reg_names[1])

    # Add metadata columns
    nrows = df.shape[0]
    units = get_units("nvertices")[0]

    df.insert(0, "Source", [surf_type] * nrows)
    df.insert(1, "Metric", ["nvertices"] * nrows)
    df.insert(2, "Units", [units] * nrows)
    df.insert(3, "MetricFile", [filename] * nrows)

    # Add BIDS entities if requested
    if add_bids_entities and isinstance(parc_file, str):
        ent_list = cltbids.entities4table()
        df_add = cltbids.entities_to_table(
            filepath=parc_file, entities_to_extract=ent_list
        )
        df = cltmisc.expand_and_concatenate(df_add, df)

    # Cleaning the dataframe to remove columns with all missing values if include_missing is False
    df = cltmisc.drop_empty_columns(df)

    # Save table if requested
    if output_table is not None:
        output_dir = os.path.dirname(output_table)
        if output_dir and not os.path.exists(output_dir):
            raise FileNotFoundError(
                f"Directory does not exist: {output_dir}. Please create the directory before saving."
            )

        df.to_csv(output_table, sep="\t", index=False)

    return df, output_table



####################################################################################################

[docs]
def compute_euler_fromsurf(
    surf_file: Union[str, cltsurf.Surface],
    hemi: str,
    output_table: str = None,
    table_type: str = "metric",
    surf_type: str = "",
    add_bids_entities: bool = True,
) -> Tuple[pd.DataFrame, Optional[str]]:
    """
    Compute the Euler characteristic of a surface mesh.

    This function calculates the Euler characteristic (χ = V - E + F) of a surface mesh,
    which is a topological invariant that provides information about the surface's topology.

    Parameters
    ----------
    surf_file : str or cltsurf.Surface
        Path to the surface file or Surface object containing the mesh.
    hemi : str
        Hemisphere identifier ('lh' or 'rh').
    output_table : str, optional
        Path to save the resulting table. If None, the table is not saved.
    table_type : str, default="metric"
        Output format specification:
        - "metric": Each column represents a specific metric for each region
        - "region": Each column represents a region, with rows for different metrics
    surf_type : str, default=""
        Type of surface (e.g., "white", "pial") for metadata. If empty, determined from filename.
    add_bids_entities : bool, default=True
        Whether to include BIDS entities as columns in the resulting DataFrame.

    Returns
    -------
    df : pd.DataFrame
        DataFrame containing the computed Euler characteristic.
    output_path : str or None
        Path where the table was saved, or None if no table was saved.

    Examples
    --------
    Basic usage with default parameters:

    >>> import os
    >>> import clabtoolkit.morphometrytools as morpho
    >>> fs_dir = os.environ.get('FREESURFER_HOME')
    >>> hemi = 'lh'
    >>> surf_file = os.path.join(fs_dir, 'subjects', 'bert', 'surf', f'{hemi}.white')
    >>> df, _ = morpho.compute_euler_fromsurf(surf_file, hemi)
    >>> print(df.head())

    Using region format for output:

    >>> df_region, _ = morpho.compute_euler_fromsurf(
    ...     surf_file, hemi, table_type="region", add_bids_entities=True
    ... )
    >>> print(df_region.head())

    Saving results to a file:

    >>> output_path = '/path/to/output/euler_stats.csv'
    >>> df_saved, saved_path = morpho.compute_euler_fromsurf(
    ...     surf_file, hemi, output_table=output_path
    ... )
    >>> print(f"Table saved to: {saved_path}")

    Notes
    -----
    The Euler characteristic (χ) is calculated as χ = V - E + F, where:
    - V is the number of vertices
    - E is the number of edges
    - F is the number of faces

    For a closed, orientable surface without boundaries, the Euler characteristic
    is related to the genus (g) by the formula: χ = 2 - 2g.
    """
    # Input validation
    if table_type not in ["region", "metric"]:
        raise ValueError(
            f"Invalid table_type: '{table_type}'. Expected 'region' or 'metric'."
        )

    # Process surface file
    filename = ""
    if isinstance(surf_file, str):
        if not os.path.exists(surf_file):
            raise FileNotFoundError(f"Surface file not found: {surf_file}")

        surf = cltsurf.Surface(surface_file=surf_file)
        filename = surf_file

        # Extract surface type from filename if not provided
        if not surf_type and os.path.basename(surf_file).split(".")[-1] not in [
            "gii",
            "vtk",
        ]:
            surf_type = os.path.basename(surf_file).split(".")[-1]
    elif isinstance(surf_file, cltsurf.Surface):
        surf = copy.deepcopy(surf_file)
    else:
        raise TypeError(
            f"surf_file must be a string or Surface object, got {type(surf_file)}"
        )

    # Extract mesh components
    coords = surf.mesh.points
    faces = surf.get_faces()

    # Compute Euler characteristic
    euler = euler_from_mesh(coords, faces)

    # Create dictionary for DataFrame
    dict_of_cols = {}
    dict_of_cols[f"ctx-{hemi}-hemisphere"] = [euler]

    # Create DataFrame
    df = pd.DataFrame.from_dict(dict_of_cols)

    # Add column prefixes
    colnames = df.columns.tolist()
    colnames = cltmisc.correct_names(colnames, prefix=f"ctx-{hemi}-")
    df.columns = colnames

    # Format table according to specified type
    if table_type == "region":
        # Create region-oriented table
        df.index = ["Value"]
        df = df.reset_index()
        df = df.rename(columns={"index": "Statistics"})
    else:
        # Create metric-oriented table
        df = df.T
        df.columns = ["Value"]
        df = df.reset_index()
        df = df.rename(columns={"index": "Region"})

        # Split region names into components
        reg_names = df["Region"].str.split("-", expand=True)
        df.insert(0, "Supraregion", reg_names[0])
        df.insert(1, "Hemisphere", reg_names[1])

    # Add metadata columns
    nrows = df.shape[0]
    units = (
        get_units("euler")[0]
        if isinstance(get_units("euler"), list)
        else get_units("euler")
    )

    df.insert(0, "Source", [surf_type] * nrows)
    df.insert(1, "Metric", ["euler"] * nrows)
    df.insert(2, "Units", [units] * nrows)
    df.insert(3, "MetricFile", [filename] * nrows)

    # Add BIDS entities if requested
    if add_bids_entities and isinstance(surf_file, str):
        ent_list = cltbids.entities4table()
        df_add = cltbids.entities_to_table(
            filepath=surf_file, entities_to_extract=ent_list
        )
        df = cltmisc.expand_and_concatenate(df_add, df)

    # Cleaning the dataframe to remove columns with all missing values if include_missing is False
    df = cltmisc.drop_empty_columns(df)

    # Save table if requested
    output_path = None
    if output_table is not None:
        output_dir = os.path.dirname(output_table)
        if output_dir and not os.path.exists(output_dir):
            raise FileNotFoundError(
                f"Directory does not exist: {output_dir}. Please create the directory before saving."
            )

        df.to_csv(output_table, sep="\t", index=False)
        output_path = output_table

    return df, output_path



####################################################################################################

[docs]
def area_from_mesh(coords: np.ndarray, faces: np.ndarray) -> Tuple[float, np.ndarray]:
    """
    Compute the total area and per-triangle areas of a mesh surface.

    This function calculates the area of each triangle in a mesh using Heron's formula
    and returns both the total surface area and individual triangle areas.

    Parameters
    ----------
    coords : np.ndarray
        Coordinates of the vertices of the mesh.
        Shape must be (n, 3) where n is the number of vertices.
        Each row contains the [x, y, z] coordinates of a vertex.

    faces : np.ndarray
        Triangular faces of the mesh defined by vertex indices.
        Shape must be (m, 3) where m is the number of faces.
        Each row contains three indices referring to vertices in the coords array.

    Returns
    -------
    face_area : float
        Total surface area of the mesh in square centimeters (cm²).

    tri_area : np.ndarray
        Array of areas for each triangle in the mesh in square centimeters (cm²).
        Shape is (m,) where m is the number of faces.

    Notes
    -----
    The function uses Heron's formula to calculate the area of each triangle:
        Area = √(s(s-a)(s-b)(s-c))
    where s is the semi-perimeter: s = (a + b + c)/2, and a, b, c are the side lengths.

    The resulting areas are converted to square centimeters (cm²) by dividing by 100
    (assuming the input coordinates are in millimeters).

    Examples
    --------
    Calculate area of a simple mesh with two triangles:

    >>> import numpy as np
    >>> coords = np.array([[0, 0, 0], [1, 0, 0], [0, 1, 0], [1, 1, 0]])
    >>> faces = np.array([[0, 1, 2], [1, 3, 2]])
    >>> total_area, triangle_areas = area_from_mesh(coords, faces)
    >>> print(f"Total area: {total_area:.4f} cm²")
    Total area: 1.0000 cm²
    >>> print(f"Triangle areas: {triangle_areas}")
    Triangle areas: [0.5 0.5]

    Calculate area of a pyramid:

    >>> coords = np.array([[0, 0, 0], [1, 0, 0], [1, 1, 0], [0, 1, 0], [0.5, 0.5, 1]])
    >>> faces = np.array([[0, 1, 4], [1, 2, 4], [2, 3, 4], [3, 0, 4], [0, 2, 1], [0, 3, 2]])
    >>> total_area, _ = area_from_mesh(coords, faces)
    >>> print(f"Total area: {total_area:.4f} cm²")
    Total area: ...
    """
    # Input validation
    if coords.ndim != 2 or coords.shape[1] != 3:
        raise ValueError(f"coords must have shape (n, 3), got {coords.shape}")

    if faces.ndim != 2 or faces.shape[1] != 3:
        raise ValueError(f"faces must have shape (m, 3), got {faces.shape}")

    if np.any(faces >= coords.shape[0]) or np.any(faces < 0):
        raise ValueError("faces contains invalid vertex indices")

    # Extract vertex coordinates for each face
    v1 = coords[faces[:, 0]]
    v2 = coords[faces[:, 1]]
    v3 = coords[faces[:, 2]]

    # Compute edge lengths using Euclidean distance
    d12 = np.sqrt(np.sum((v1 - v2) ** 2, axis=1))
    d23 = np.sqrt(np.sum((v2 - v3) ** 2, axis=1))
    d31 = np.sqrt(np.sum((v3 - v1) ** 2, axis=1))

    # Compute semi-perimeter for each triangle
    s = (d12 + d23 + d31) / 2

    # Compute area of each triangle using Heron's formula
    # Division by 100 converts from mm² to cm²
    tri_area = np.sqrt(np.maximum(0, s * (s - d12) * (s - d23) * (s - d31))) / 100

    # Compute total mesh area
    face_area = np.sum(tri_area)

    return face_area, tri_area



####################################################################################################

[docs]
def euler_from_mesh(coords: np.ndarray, faces: np.ndarray) -> int:
    """
    Compute the Euler characteristic of a mesh surface.

    The Euler characteristic (χ) is a topological invariant that describes the shape or
    structure of a topological space regardless of how it is bent or deformed. For a mesh,
    it is calculated as χ = V - E + F, where V is the number of vertices, E is the number
    of edges, and F is the number of faces.

    Parameters
    ----------
    coords : np.ndarray
        Coordinates of the vertices of the mesh.
        Shape must be (n, 3) where n is the number of vertices.
        Each row contains the [x, y, z] coordinates of a vertex.

    faces : np.ndarray
        Triangular faces of the mesh defined by vertex indices.
        Shape must be (m, 3) where m is the number of faces.
        Each row contains three indices referring to vertices in the coords array.

    Returns
    -------
    euler : int
        Euler characteristic of the mesh.
        For a closed manifold surface of genus g, χ = 2 - 2g.
        - Sphere: χ = 2 (genus 0)
        - Torus: χ = 0 (genus 1)
        - Double torus: χ = -2 (genus 2)

    Notes
    -----
    The Euler characteristic provides information about the topology of a mesh:
    - For closed, orientable surfaces: χ = 2 - 2g, where g is the genus (number of "holes")
    - For surfaces with boundaries (like cortical surfaces): χ = 2 - 2g - b, where b is the
    number of boundary components

    A change in the Euler characteristic can indicate topological defects in a surface.

    Examples
    --------
    Calculate Euler characteristic of a tetrahedron (a closed surface):

    >>> import numpy as np
    >>> coords = np.array([[0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1]])
    >>> faces = np.array([[0, 1, 2], [0, 1, 3], [0, 2, 3], [1, 2, 3]])
    >>> euler = euler_from_mesh(coords, faces)
    >>> print(f"Euler characteristic: {euler}")
    Euler characteristic: 2

    Calculate Euler characteristic of a simple two-triangle surface:

    >>> coords = np.array([[0, 0, 0], [1, 0, 0], [0, 1, 0], [1, 1, 0]])
    >>> faces = np.array([[0, 1, 2], [1, 2, 3]])
    >>> euler = euler_from_mesh(coords, faces)
    >>> print(f"Euler characteristic: {euler}")
    Euler characteristic: 1
    """
    # Input validation
    if coords.ndim != 2 or coords.shape[1] != 3:
        raise ValueError(f"coords must have shape (n, 3), got {coords.shape}")

    if faces.ndim != 2 or faces.shape[1] != 3:
        raise ValueError(f"faces must have shape (m, 3), got {faces.shape}")

    if np.any(faces >= coords.shape[0]) or np.any(faces < 0):
        raise ValueError("faces contains invalid vertex indices")

    # Step 1: Count vertices
    V = coords.shape[0]

    # Step 2: Count faces
    F = faces.shape[0]

    # Step 3: Count unique edges
    # Create an array of all edges from faces
    edges = np.vstack(
        [
            faces[:, [0, 1]],  # First edge of each face
            faces[:, [1, 2]],  # Second edge of each face
            faces[:, [2, 0]],  # Third edge of each face
        ]
    )

    # Sort each edge to ensure (v1,v2) and (v2,v1) are treated as the same edge
    edges = np.sort(edges, axis=1)

    # Remove duplicate edges using unique
    unique_edges = np.unique(edges, axis=0)

    # Count edges
    E = len(unique_edges)

    # Calculate Euler characteristic
    euler = V - E + F

    return euler



####################################################################################################
####################################################################################################
############                                                                            ############
############                                                                            ############
############     Section 2: Methods dedicated to compute metrics from parcellations     ############
############                                                                            ############
############                                                                            ############
####################################################################################################
####################################################################################################

[docs]
def compute_reg_val_fromparcellation(
    metric_file: Union[str, np.ndarray],
    parc_file: Union[str, cltparc.Parcellation, np.ndarray],
    output_table: str = None,
    metric: str = "unknown",
    units: str = None,
    stats_list: Union[str, list] = ["value", "median", "std", "min", "max"],
    nonzeros_only: bool = False,
    table_type: str = "metric",
    exclude_by_code: Union[list, np.ndarray] = None,
    exclude_by_name: Union[list, str] = None,
    include_by_code: Union[list, np.ndarray] = None,
    include_by_name: Union[list, str] = None,
    include_global: bool = True,
    add_bids_entities: bool = True,
    region_prefix: str = "supra-side",
    interp_method: str = "linear",
) -> Tuple[pd.DataFrame, np.ndarray, Optional[str]]:
    """
    Compute regional statistics from a volumetric metric map and a parcellation.

    This function extracts regional values by combining voxel-wise volumetric metrics with
    parcellation data defining anatomical regions. It supports various statistical measures
    and output formats to facilitate regional analysis of volumetric neuroimaging data.

    If the metric and parcellation files have different resolutions, the metric data will be
    automatically resampled to match the parcellation's resolution.

    Parameters
    ----------
    metric_file : str or np.ndarray
        Path to the volumetric metric file or array containing metric values for each voxel.
        If array, it should have the same dimensions as the parcellation data.

    parc_file : str, cltparc.Parcellation, or np.ndarray
        Path to the parcellation file, Parcellation object, or numpy array defining regions.
        Each unique integer value in the array represents a different anatomical region.

    output_table : str, optional
        Path to save the resulting table. If None, the table is not saved.

    metric : str, default="unknown"
        Name of the metric being analyzed. Used for naming columns in the output DataFrame
        and determining appropriate units.

    units : str, optional
        Units of the metric being analyzed. If None, units are determined based on the metric.
        Supported units include "intensity", "thickness", "area", "euler", "volume", etc.
        If not specified, the function will attempt to infer units from the metric name.

    stats_list : str or list, default=["value", "median", "std", "min", "max"]
        Statistics to compute for each region. Note: "value" is equivalent to the mean.
        Supported statistics: "value", "median", "std", "min", "max", "count", "sum".

    table_type : str, default="metric"
        Output format specification:
        - "metric": Each column represents a specific statistic for each region (regions as rows)
        - "region": Each column represents a region, with rows for different statistics

    exclude_by_code : list or np.ndarray, optional
        Region codes to exclude from the analysis. If None, no regions are excluded by code.
        Useful for excluding regions like ventricles or non-brain tissue.

    exclude_by_name : list or str, optional
        Region names to exclude from the analysis. If None, no regions are excluded by name.
        Example: ["Ventricles", "White-Matter"] to focus only on gray matter regions.

    include_by_code : list or np.ndarray, optional
        Region codes to include in the analysis. If None, all regions are included by code.
        This parameter takes precedence over exclude_by_code.

    include_by_name : list or str, optional
        Region names to include in the analysis. If None, all regions are included by name.
        This parameter takes precedence over exclude_by_name.

    include_global : bool, default=True
        Whether to include global statistics in the output DataFrame.

    add_bids_entities : bool, default=True
        Whether to include BIDS entities as columns in the resulting DataFrame.
        This extracts subject, session, and other metadata from the filename.

    region_prefix : str, default="region-unknown-"
        Prefix to use for region names when they cannot be determined from the parcellation object.
        The prefix will be combined with the region index number.

    interp_method : str, default="linear"
        Interpolation method to use when resampling the metric data to match parcellation resolution.
        Options include: "linear", "nearest", "cubic". Use "nearest" for categorical data.

    Returns
    -------
    df : pd.DataFrame
        DataFrame containing the computed regional statistics.

    metric_data : np.ndarray
        Array of metric values used in the calculation.

    output_path : str or None
        Path where the table was saved, or None if no table was saved.

    Examples
    --------
    Basic usage with default parameters:

    >>> import os
    >>> import clabtoolkit.morphometrytools as morpho
    >>> # Define paths to sample data
    >>> metric_file = os.path.join('data', 'sub-01', 'anat', 'sub-01_T1w_intensity.nii.gz')
    >>> parc_file = os.path.join('data', 'sub-01', 'anat', 'sub-01_T1w_parcellation.nii.gz')
    >>> # Compute regional statistics
    >>> df, metric_values, _ = morpho.compute_reg_val_fromparcellation(
    ...     metric_file, parc_file, metric='intensity'
    ... )
    >>> # Display the first few rows of results
    >>> print(f"Number of regions: {df.shape[0]}")
    >>> print(df[['Region', 'Value', 'Median', 'Std']].head())

    Using region format for output (regions as columns, statistics as rows):

    >>> df_region, _, _ = morpho.compute_reg_val_fromparcellation(
    ...     metric_file, parc_file, metric='intensity',
    ...     table_type="region", add_bids_entities=True
    ... )
    >>> # View statistics across regions
    >>> print(df_region[['Statistics', 'brain-brain-wholebrain']].head())

    Working with images that have different resolutions:

    >>> # High-resolution functional metric with lower-resolution anatomical parcellation
    >>> metric_file = os.path.join('data', 'sub-01', 'func', 'sub-01_task-rest_bold.nii.gz')
    >>> parc_file = os.path.join('data', 'sub-01', 'anat', 'sub-01_T1w_parcellation.nii.gz')
    >>> # The function will automatically resample the metric to the parcellation's space
    >>> df, _, _ = morpho.compute_reg_val_fromparcellation(
    ...     metric_file, parc_file, metric='bold',
    ...     interpolation='linear'  # Use linear interpolation for continuous data
    ... )
    >>> print(df.head())

    Computing only specific statistics for each region:

    >>> df_custom_stats, _, _ = morpho.compute_reg_val_fromparcellation(
    ...     metric_file, parc_file, metric='FA',
    ...     stats_list=['median', 'std'], table_type="metric"
    ... )
    >>> # View only median and standard deviation
    >>> print(df_custom_stats[['Region', 'Median', 'Std']].head())

    Excluding specific regions from analysis:

    >>> # Exclude regions by name
    >>> exclude_names = ["brain-left-ventricle", "brain-right-ventricle"]
    >>> df_filtered, _, _ = morpho.compute_reg_val_fromparcellation(
    ...     metric_file, parc_file, metric='thickness',
    ...     exclude_by_name=exclude_names
    ... )
    >>> # Check that ventricles are not in results
    >>> ventricle_count = sum(1 for r in df_filtered['Region'] if 'ventricle' in r.lower())
    >>> print(f"Ventricle regions in results: {ventricle_count}")

    Saving the results to a file:

    >>> output_path = os.path.join('results', 'sub-01_regional_intensity.tsv')
    >>> df_saved, _, saved_path = morpho.compute_reg_val_fromparcellation(
    ...     metric_file, parc_file, output_table=output_path,
    ...     metric='intensity'
    ... )
    >>> print(f"Table saved to: {saved_path}")
    >>> # You can load this table later with pandas
    >>> import pandas as pd
    >>> df_loaded = pd.read_csv(saved_path, sep='\t')

    Working with in-memory data instead of files:

    >>> import numpy as np
    >>> import nibabel as nib
    >>> # Load data into memory first
    >>> metric_obj = nib.load(metric_file)
    >>> metric_data = metric_obj.get_fdata()
    >>> parc_obj = nib.load(parc_file)
    >>> parc_data = parc_obj.get_fdata()
    >>> # Process the in-memory arrays
    >>> df_memory, _, _ = morpho.compute_reg_val_fromparcellation(
    ...     metric_data, parc_data, metric='intensity',
    ...     add_bids_entities=False  # No BIDS entities for in-memory data
    ... )
    >>> print(df_memory.head())

    Notes
    -----
    This function is designed for volumetric data, extracting statistics from voxel-wise
    metrics within each region defined by a parcellation. For surface-based metrics,
    consider using `compute_reg_val_fromannot` instead.

    The function handles both file paths and in-memory arrays, making it versatile for
    different workflows. When working with arrays directly, ensure the metric and
    parcellation arrays have the same dimensions.

    When metric and parcellation images have different resolutions, the metric data is
    automatically resampled to match the parcellation's resolution using the specified
    interpolation method. For continuous metrics (like intensity), linear or cubic
    interpolation is recommended. For categorical data, use 'nearest' interpolation.

    When working with BIDS-formatted data, setting `add_bids_entities=True` will extract
    subject, session, and other metadata from the filename to include in the output table.

    See Also
    --------
    compute_reg_val_fromannot : Similar function for surface-based metrics and annotations
    """

    # Input validation
    if isinstance(stats_list, str):
        stats_list = [stats_list]

    stats_list = [stat.lower() for stat in stats_list]

    if table_type not in ["region", "metric"]:
        raise ValueError(
            f"Invalid table_type: '{table_type}'. Expected 'region' or 'metric'."
        )

    if interp_method not in ["linear", "nearest", "cubic"]:
        raise ValueError(
            f"Invalid interpolation: '{interp_method}'. Expected 'linear', 'nearest', or 'cubic'."
        )

    # Process parcellation file
    parc_img = None
    if isinstance(parc_file, str):
        if not os.path.exists(parc_file):
            raise FileNotFoundError(f"Parcellation file not found: {parc_file}")

        parc_img = nib.load(parc_file)
        vparc_data = cltparc.Parcellation(parc_file=parc_file)
    elif isinstance(parc_file, cltparc.Parcellation):
        vparc_data = copy.deepcopy(parc_file)
        if hasattr(vparc_data, "img"):
            parc_img = vparc_data.img
    elif isinstance(parc_file, np.ndarray):
        vparc_data = cltparc.Parcellation(parc_file=parc_file)
    else:
        raise TypeError(
            f"parc_file must be a string, Parcellation object, or numpy array, got {type(parc_file)}"
        )

    # Process metric file
    metric_img = None
    filename = ""
    if isinstance(metric_file, str):
        if not os.path.exists(metric_file):
            raise FileNotFoundError(f"Metric file not found: {metric_file}")

        metric_img = nib.load(metric_file)
        metric_vol = metric_img.get_fdata()
        filename = metric_file
    elif isinstance(metric_file, np.ndarray):
        metric_vol = metric_file
    else:
        raise TypeError(
            f"metric_file must be a string or numpy array, got {type(metric_file)}"
        )

    # Handle resolution mismatch when we have both images
    temp_file = None
    if metric_img is not None and parc_img is not None:
        # Check if dimensions or affines don't match
        metric_shape = metric_img.shape
        parc_shape = parc_img.shape

        if (metric_shape != parc_shape) or not np.allclose(
            metric_img.affine, parc_img.affine
        ):
            warnings.warn(
                f"Metric image ({metric_shape}) and parcellation image ({parc_shape}) have different "
                f"dimensions or orientations. Resampling metric to match parcellation."
            )

            # Resample metric to match parcellation space
            resampled_metric_img = resample_from_to(
                metric_img,
                parc_img,
                order={"linear": 1, "nearest": 0, "cubic": 3}[interp_method],
            )

            # Save to temporary file if needed for other operations
            with tempfile.NamedTemporaryFile(suffix=".nii.gz", delete=False) as f:
                temp_file = f.name

            nib.save(resampled_metric_img, temp_file)

            # Update metric volume with resampled data
            metric_vol = resampled_metric_img.get_fdata()

    # Check that dimensions match with parcellation data
    if metric_vol.shape != vparc_data.data.shape:
        # If we already resampled and still don't match, there's a problem
        if temp_file:
            os.unlink(temp_file)  # Clean up temp file
            raise ValueError(
                f"Resampled metric data shape {metric_vol.shape} still does not match "
                f"parcellation data shape {vparc_data.data.shape}. Please check your inputs."
            )
        else:
            raise ValueError(
                f"Metric data shape {metric_vol.shape} does not match parcellation data shape "
                f"{vparc_data.data.shape}. Use file inputs instead of arrays for automatic resampling."
            )

    # Apply exclusions if specified
    if exclude_by_code is not None:
        vparc_data.remove_by_code(codes2remove=exclude_by_code)

    if exclude_by_name is not None:
        vparc_data.remove_by_name(names2remove=exclude_by_name)

    # Apply inclusion if specified
    if include_by_code is not None:
        vparc_data.keep_by_code(codes2keep=include_by_code)

    if include_by_name is not None:
        vparc_data.keep_by_name(names2keep=include_by_name)

    # Prepare data structures for results
    dict_of_cols = {}

    if include_global:
        # Compute global brain statistics (non-zero parcellation values)
        brain_mask = vparc_data.data != 0
        if np.any(brain_mask):  # Check if there are any non-zero values
            global_stats = stats_from_vector(
                metric_vol[brain_mask], stats_list, nonzeros_only=nonzeros_only
            )
            dict_of_cols["brain-brain-wholebrain"] = global_stats
        else:
            # Handle empty/invalid parcellation
            dict_of_cols["brain-brain-wholebrain"] = [0] * len(stats_list)

    # Compute statistics for each region
    # Use unique region indices from the data itself
    unique_indices = np.unique(vparc_data.data)
    unique_indices = unique_indices[unique_indices != 0]  # Exclude background

    for index in unique_indices:
        # Get region name from the parcellation object if available
        if hasattr(vparc_data, "name") and hasattr(vparc_data, "index"):
            idx_pos = np.where(np.array(vparc_data.index) == index)[0]
            if len(idx_pos) > 0:
                regname = vparc_data.name[idx_pos[0]]
            else:
                regname = cltmisc.create_names_from_indices(index, prefix=region_prefix)
        else:
            regname = cltmisc.create_names_from_indices(index, prefix=region_prefix)

        region_mask = vparc_data.data == index

        if np.any(region_mask):
            region_values = metric_vol[region_mask]
            if len(region_values) > 0:  # Check if there are any values
                region_stats = stats_from_vector(
                    region_values, stats_list, nonzeros_only=nonzeros_only
                )
                dict_of_cols[regname] = region_stats
            else:
                dict_of_cols[regname] = [0] * len(stats_list)
        else:
            dict_of_cols[regname] = [0] * len(stats_list)

    # Check if we found any regions
    if len(dict_of_cols) == 0:
        if temp_file:
            os.unlink(temp_file)  # Clean up temp file
        raise ValueError("No valid regions found in the parcellation data")

    # Create DataFrame
    df = pd.DataFrame.from_dict(dict_of_cols)

    # Format table according to specified type
    if table_type == "region":
        # Create region-oriented table
        df.index = [stat_name.title() for stat_name in stats_list]
        df = df.reset_index()
        df = df.rename(columns={"index": "Statistics"})
    else:
        # Create metric-oriented table
        df = df.T
        df.columns = [stat_name.title() for stat_name in stats_list]
        df = df.reset_index()
        df = df.rename(columns={"index": "Region"})

        # Split region names into components
        reg_names = df["Region"].str.split("-", expand=True)

        # Safely handle region names that might not have 3 components
        if reg_names.shape[1] >= 3:
            df.insert(0, "Supraregion", reg_names[0])
            df.insert(1, "Hemisphere", reg_names[1])
        elif reg_names.shape[1] == 2:
            df.insert(0, "Supraregion", reg_names[0])
            df.insert(1, "Hemisphere", "unknown")
        else:
            df.insert(0, "Supraregion", "unknown")
            df.insert(1, "Hemisphere", "unknown")

    # Add metadata columns
    nrows = df.shape[0]
    if units is None:
        units = get_units(metric)

    if isinstance(units, list) and len(units) > 0:
        units = units[0]
    elif units is None or (isinstance(units, list) and len(units) == 0):
        units = "unknown"

    df.insert(0, "Source", ["volume"] * nrows)
    df.insert(1, "Metric", [metric] * nrows)
    df.insert(2, "Units", [units] * nrows)
    df.insert(3, "MetricFile", [filename] * nrows)

    # Add BIDS entities if requested
    if add_bids_entities and isinstance(metric_file, str):
        try:
            ent_list = cltbids.entities4table()
            df_add = cltbids.entities_to_table(
                filepath=metric_file, entities_to_extract=ent_list
            )
            df = cltmisc.expand_and_concatenate(df_add, df)
        except Exception as e:
            warnings.warn(f"Could not add BIDS entities: {str(e)}")

    # Cleaning the dataframe to remove columns with all missing values if include_missing is False
    df = cltmisc.drop_empty_columns(df)

    # Save table if requested
    output_path = None
    if output_table is not None:
        output_dir = os.path.dirname(output_table)
        if output_dir and not os.path.exists(output_dir):
            raise FileNotFoundError(
                f"Directory does not exist: {output_dir}. Please create the directory before saving."
            )

        df.to_csv(output_table, sep="\t", index=False)
        output_path = output_table

    # Clean up temporary file if it exists
    if temp_file and os.path.exists(temp_file):
        os.unlink(temp_file)

    return df, metric_vol, output_path



####################################################################################################

[docs]
def compute_reg_volume_fromparcellation(
    parc_file: Union[str, cltparc.Parcellation, np.ndarray],
    output_table: str = None,
    table_type: str = "metric",
    exclude_by_code: Union[list, np.ndarray] = None,
    exclude_by_name: Union[list, str] = None,
    include_by_code: Union[list, np.ndarray] = None,
    include_by_name: Union[list, str] = None,
    add_bids_entities: bool = True,
    region_prefix: str = "supra-side",
    include_global: bool = True,
) -> Tuple[pd.DataFrame, Optional[str]]:
    """
    Compute volume for all regions in a parcellation.

    This function calculates the volume of each region defined in a parcellation by counting
    the number of voxels in each region and multiplying by the voxel volume. It supports
    various output formats and can exclude specific regions from the analysis.

    Parameters
    ----------
    parc_file : str, cltparc.Parcellation, or np.ndarray
        Path to the parcellation file, Parcellation object, or numpy array defining regions.
        Each unique integer value in the array represents a different anatomical region.
    output_table : str, optional
        Path to save the resulting table. If None, the table is not saved.
    table_type : str, default="metric"
        Output format specification:
        - "metric": Each column represents a specific statistic for each region (regions as rows)
        - "region": Each column represents a region, with rows for different statistics
    exclude_by_code : list or np.ndarray, optional
        Region codes to exclude from the analysis. If None, no regions are excluded by code.
        Useful for excluding regions like ventricles or non-brain tissue.
    exclude_by_name : list or str, optional
        Region names to exclude from the analysis. If None, no regions are excluded by name.
        Example: ["Ventricles", "White-Matter"] to focus only on gray matter regions.
    include_by_code : list or np.ndarray, optional
        Region codes to include in the analysis. If None, all regions are included.
        Useful for focusing on specific regions of interest.
    include_by_name : list or str, optional
        Region names to include in the analysis. If None, all regions are included.
        Example: ["Cortex", "Hippocampus"] to focus on specific structures.
    add_bids_entities : bool, default=True
        Whether to include BIDS entities as columns in the resulting DataFrame.
        This extracts subject, session, and other metadata from the filename.
    region_prefix : str, default="supra-side"
        Prefix to use for region names when they cannot be determined from the parcellation object.
        The prefix will be combined with the region index number.
    include_global : bool, default=True
        Whether to include a the total volume in the output table.
        If True, adds a row for the total volume calculated from the parcellation.

    Returns
    -------
    df : pd.DataFrame
        DataFrame containing the computed regional volumes.
    output_path : str or None
        Path where the table was saved, or None if no table was saved.

    Examples
    --------
    Basic usage with default parameters:

    >>> import os
    >>> import clabtoolkit.morphometrytools as morpho
    >>> # Define path to sample data
    >>> parc_file = os.path.join('data', 'sub-01', 'anat', 'sub-01_T1w_parcellation.nii.gz')
    >>> # Compute regional volumes
    >>> df, _ = morpho.compute_reg_volume_fromparcellation(parc_file)
    >>> # Display the first few rows of results
    >>> print(f"Number of regions: {df.shape[0]}")
    >>> print(df[['Region', 'Value']].head())

    Using region format for output (regions as columns):

    >>> df_region, _ = morpho.compute_reg_volume_fromparcellation(
    ...     parc_file, table_type="region", add_bids_entities=True
    ... )
    >>> # View volumes across regions
    >>> print(df_region.head())

    Excluding specific regions from analysis:

    >>> # Exclude regions by name
    >>> exclude_names = ["brain-left-ventricle", "brain-right-ventricle"]
    >>> df_filtered, _ = morpho.compute_reg_volume_fromparcellation(
    ...     parc_file, exclude_by_name=exclude_names
    ... )
    >>> # Check that ventricles are not in results
    >>> ventricle_count = sum(1 for r in df_filtered['Region'] if 'ventricle' in r.lower())
    >>> print(f"Ventricle regions in results: {ventricle_count}")

    Saving the results to a file:

    >>> output_path = os.path.join('results', 'sub-01_regional_volumes.tsv')
    >>> df_saved, saved_path = morpho.compute_reg_volume_fromparcellation(
    ...     parc_file, output_table=output_path
    ... )
    >>> print(f"Table saved to: {saved_path}")
    >>> # You can load this table later with pandas
    >>> import pandas as pd
    >>> df_loaded = pd.read_csv(saved_path, sep='\t')

    Working with in-memory data instead of files:

    >>> import numpy as np
    >>> import nibabel as nib
    >>> # Load data into memory first
    >>> parc_obj = nib.load(parc_file)
    >>> parc_data = parc_obj.get_fdata()
    >>> # Create a custom affine matrix (example: 1mm isotropic voxels)
    >>> affine = np.eye(4)
    >>> # Process the in-memory array
    >>> df_memory, _ = morpho.compute_reg_volume_fromparcellation(
    ...     parc_data, add_bids_entities=False
    ... )
    >>> print(df_memory.head())

    Notes
    -----
    This function calculates volumes in milliliters (ml) by default. The voxel volume is
    calculated from the affine transformation matrix of the parcellation image. For arrays
    without an affine matrix, an identity matrix is assumed (1mm isotropic voxels).

    The regional volumes are calculated by counting the number of voxels in each region
    and multiplying by the voxel volume in cubic millimeters, then dividing by 1000 to
    convert to milliliters.

    When working with BIDS-formatted data, setting `add_bids_entities=True` will extract
    subject, session, and other metadata from the filename to include in the output table.

    See Also
    --------
    compute_reg_val_fromparcellation : Calculate statistics for metric values within parcellation regions
    """

    # Input validation
    if table_type not in ["region", "metric"]:
        raise ValueError(
            f"Invalid table_type: '{table_type}'. Expected 'region' or 'metric'."
        )

    # Process parcellation file
    filename = ""
    if isinstance(parc_file, str):
        if not os.path.exists(parc_file):
            raise FileNotFoundError(f"Parcellation file not found: {parc_file}")

        vparc_data = cltparc.Parcellation(parc_file=parc_file)
        affine = vparc_data.affine
        filename = parc_file
    elif isinstance(parc_file, cltparc.Parcellation):
        vparc_data = copy.deepcopy(parc_file)
        affine = vparc_data.affine
        filename = vparc_data.parc_file
    elif isinstance(parc_file, np.ndarray):
        vparc_data = cltparc.Parcellation(parc_file=parc_file)
        affine = vparc_data.affine
        filename = "3darray"
    else:
        raise TypeError(
            f"parc_file must be a string, Parcellation object, or numpy array, got {type(parc_file)}"
        )

    # Apply exclusions if specified
    if exclude_by_code is not None:
        vparc_data.remove_by_code(codes2remove=exclude_by_code)

    if exclude_by_name is not None:
        vparc_data.remove_by_name(names2remove=exclude_by_name)

    # Apply inclusion if specified
    if include_by_code is not None:
        vparc_data.keep_by_code(codes2keep=include_by_code)

    if include_by_name is not None:
        vparc_data.keep_by_name(names2keep=include_by_name)

    # Computing the voxel volume (in cubic mm)
    vox_size = np.linalg.norm(affine[:3, :3], axis=1)
    vox_vol = np.prod(vox_size)

    # Prepare data structures for results
    dict_of_cols = {}

    # Compute global volume for the entire brain (convert to ml by dividing by 1000)
    if include_global:
        brain_mask = vparc_data.data != 0
        if np.any(brain_mask):  # Check if there are any non-zero values
            global_volume_ml = np.sum(brain_mask) * vox_vol / 1000
            dict_of_cols["brain-brain-wholebrain"] = [global_volume_ml]
        else:
            # Handle empty/invalid parcellation
            dict_of_cols["brain-brain-wholebrain"] = [0]

    # Compute volume for each region
    # Use unique region indices from the data itself
    unique_indices = np.unique(vparc_data.data)
    unique_indices = unique_indices[unique_indices != 0]  # Exclude background

    for index in unique_indices:
        # Get region name from the parcellation object if available
        if hasattr(vparc_data, "name") and hasattr(vparc_data, "index"):
            idx_pos = np.where(np.array(vparc_data.index) == index)[0]
            if len(idx_pos) > 0:
                regname = vparc_data.name[idx_pos[0]]
            else:
                regname = cltmisc.create_names_from_indices(index, prefix=region_prefix)
        else:
            regname = cltmisc.create_names_from_indices(index, prefix=region_prefix)

        region_mask = vparc_data.data == index
        region_volume_ml = np.sum(region_mask) * vox_vol / 1000

        if region_volume_ml > 0:
            dict_of_cols[regname] = [region_volume_ml]
        else:
            dict_of_cols[regname] = [0]

    # Check if we found any regions
    if len(dict_of_cols) == 0:
        raise ValueError("No valid regions found in the parcellation data")

    # Create DataFrame
    df = pd.DataFrame.from_dict(dict_of_cols)

    # Format table according to specified type
    if table_type == "region":
        # Create region-oriented table
        df.index = ["Value"]
        df = df.reset_index()
        df = df.rename(columns={"index": "Statistics"})
    else:
        # Create metric-oriented table
        df = df.T
        df.columns = ["Value"]
        df = df.reset_index()
        df = df.rename(columns={"index": "Region"})

        # Split region names into components
        reg_names = df["Region"].str.split("-", expand=True)

        # Safely handle region names that might not have 3 components
        if reg_names.shape[1] >= 3:
            df.insert(0, "Supraregion", reg_names[0])
            df.insert(1, "Hemisphere", reg_names[1])
        elif reg_names.shape[1] == 2:
            df.insert(0, "Supraregion", reg_names[0])
            df.insert(1, "Hemisphere", "unknown")
        else:
            df.insert(0, "Supraregion", "unknown")
            df.insert(1, "Hemisphere", "unknown")

    # Add metadata columns
    nrows = df.shape[0]
    units = get_units("volume")
    if isinstance(units, list) and len(units) > 0:
        units = units[0]
    elif units is None or (isinstance(units, list) and len(units) == 0):
        units = "ml"

    df.insert(0, "Source", ["parcellation"] * nrows)
    df.insert(1, "Metric", ["volume"] * nrows)
    df.insert(2, "Units", [units] * nrows)
    df.insert(3, "MetricFile", [filename] * nrows)

    # Add BIDS entities if requested
    if add_bids_entities and isinstance(parc_file, str):
        try:
            ent_list = cltbids.entities4table()
            df_add = cltbids.entities_to_table(
                filepath=parc_file, entities_to_extract=ent_list
            )
            df = cltmisc.expand_and_concatenate(df_add, df)
        except Exception as e:
            warnings.warn(f"Could not add BIDS entities: {str(e)}")

    # Cleaning the dataframe to remove columns with all missing values if include_missing is False
    df = cltmisc.drop_empty_columns(df)

    # Save table if requested
    output_path = None
    if output_table is not None:
        output_dir = os.path.dirname(output_table)
        if output_dir and not os.path.exists(output_dir):
            raise FileNotFoundError(
                f"Directory does not exist: {output_dir}. Please create the directory before saving."
            )

        df.to_csv(output_table, sep="\t", index=False)
        output_path = output_table

    return df, output_path



####################################################################################################
####################################################################################################
############                                                                            ############
############                                                                            ############
############  Section 3: Methods dedicated to parse stats file from freesurfer results  ############
############                                                                            ############
############                                                                            ############
####################################################################################################
####################################################################################################

[docs]
def parse_freesurfer_global_fromaseg(
    stat_file: str,
    output_table: str = None,
    table_type: str = "metric",
    add_bids_entities: bool = True,
    include_missing: bool = True,
    config_json: str = None,
) -> Tuple[pd.DataFrame, Optional[str]]:
    """
    Parse global volume measurements from a FreeSurfer aseg.stats file.

    This function extracts key global volumetric measurements from FreeSurfer's aseg.stats file,
    including intracranial volume, brain volume, gray/white matter volumes, and ventricle
    volumes. It converts values to milliliters and organizes them into a structured DataFrame.

    Parameters
    ----------
    stat_file : str
        Path to the aseg.stats file generated by FreeSurfer.
    output_table : str, optional
        Path to save the resulting table. If None, the table is not saved.
    table_type : str, default="metric"
        Output format specification:
        - "metric": Each column represents a specific statistic for each region (regions as rows)
        - "region": Each column represents a region, with rows for different statistics
    add_bids_entities : bool, default=True
        Whether to include BIDS entities as columns in the resulting DataFrame.
        This extracts subject, session, and other metadata from the filename.
    include_missing : bool, default=True
        Whether to include missing values as zeros in the output. If False, missing values
        will be excluded from the DataFrame.
    config_json : str, optional
        Path to a JSON configuration file defining volume measurements to extract.
        If None, a default configuration will be used.

    Returns
    -------
    df : pd.DataFrame
        DataFrame containing the extracted global volume measurements.
    output_path : str or None
        Path where the table was saved, or None if no table was saved.
    """

    # Verify if the file exists
    if not os.path.isfile(stat_file):
        raise FileNotFoundError(f"Stats file not found: {stat_file}")

    # Input validation
    if table_type not in ["region", "metric"]:
        raise ValueError(
            f"Invalid table_type: '{table_type}'. Expected 'region' or 'metric'."
        )

    # Load volume measurements from config file or use defaults
    if config_json and os.path.isfile(config_json):
        try:
            with open(config_json, "r") as f:
                config_data = json.load(f)
                # Check if the config has a "global" key
                if "global" in config_data:
                    volume_measurements = config_data["global"]
                else:
                    volume_measurements = config_data
        except Exception as e:
            warnings.warn(f"Error loading config file {config_json}: {e}")
            volume_measurements = get_stats_dictionary("global")
    else:
        volume_measurements = get_stats_dictionary("global")

    # Dictionary to store the extracted values
    extracted_values = {}

    # Read the stats file
    try:
        with open(stat_file, "r") as file:
            file_content = file.readlines()

            # Create dictionaries to store parsed data
            global_measures = {}  # For "# Measure" lines - global stats
            segmented_data = {}  # For tabular data - segmentation stats

            # First, parse all global measures (lines starting with "# Measure")
            for line in file_content:
                if line.startswith("# Measure"):
                    try:
                        parts = line.split(", ")
                        if len(parts) >= 4:
                            # Get description (which is parts[2]) and value (parts[3])
                            measure_description = parts[2].strip()
                            measure_value = float(parts[3].strip())
                            global_measures[measure_description] = measure_value

                            # Also store by the short name (parts[1]) for alternative lookup
                            if len(parts) >= 2:
                                short_name = parts[1].strip()
                                global_measures[short_name] = measure_value
                    except Exception as e:
                        warnings.warn(
                            f"Error parsing global measure line: {line.strip()}. Error: {e}"
                        )

            # Next, parse segmentation table (lines starting with a number)
            # First identify where the table starts
            table_start = False
            for i, line in enumerate(file_content):
                if line.startswith("# ColHeaders"):
                    table_start = True
                    continue

                if table_start and line.strip() and line.strip()[0].isdigit():
                    try:
                        parts = line.strip().split()
                        if len(parts) >= 5:
                            # Structure name is in position 4, volume is in position 3
                            seg_name = parts[4]
                            seg_volume = float(parts[3])
                            segmented_data[seg_name] = seg_volume
                    except Exception as e:
                        warnings.warn(
                            f"Error parsing table line: {line.strip()}. Error: {e}"
                        )

            # If we didn't find table data with the precise method, try a more general approach
            if not segmented_data:
                for line in file_content:
                    if line.strip() and line.strip()[0].isdigit():
                        try:
                            parts = line.strip().split()
                            if len(parts) >= 5:
                                seg_name = parts[4]
                                seg_volume = float(parts[3])
                                segmented_data[seg_name] = seg_volume
                        except Exception as e:
                            pass  # Silently skip lines that don't match expected format

            # Now extract the values based on the configuration
            for region_key, region_info in volume_measurements.items():
                value_found = False

                # Get configuration details
                key = region_info["key"]
                divisor = region_info.get("divisor", 1000)

                # STEP 1: Try to find in global measures
                if key in global_measures:
                    value = global_measures[key] / divisor
                    extracted_values[region_key] = [value]
                    value_found = True
                    continue

                # STEP 2: Try to find in segmented data
                if key in segmented_data:
                    value = segmented_data[key] / divisor
                    extracted_values[region_key] = [value]
                    value_found = True
                    continue

                # STEP 3: Try any alternate keys from config
                alt_keys = region_info.get("alternate_keys", [])
                for alt_key in alt_keys:
                    if alt_key in global_measures:
                        value = global_measures[alt_key] / divisor
                        extracted_values[region_key] = [value]
                        value_found = True
                        break
                    if alt_key in segmented_data:
                        value = segmented_data[alt_key] / divisor
                        extracted_values[region_key] = [value]
                        value_found = True
                        break

                if value_found:
                    continue

                # STEP 4: Fallback to line search (legacy method)
                if "index" in region_info:
                    for line in file_content:
                        if key in line:
                            try:
                                index = region_info["index"]
                                parts = line.split()

                                if index < 0:
                                    index = len(parts) + index

                                if 0 <= index < len(parts):
                                    value_str = parts[index].split(",")[0]
                                    value = float(value_str) / divisor
                                    extracted_values[region_key] = [value]
                                    value_found = True
                                    break
                            except (IndexError, ValueError) as e:
                                warnings.warn(
                                    f"Error parsing value for {region_key} using index: {e}"
                                )

                # If value not found and we're including missing values
                if not value_found and include_missing:
                    extracted_values[region_key] = [0.0]
                    warnings.warn(
                        f"Value for {region_key} (key: {key}) not found in {stat_file}"
                    )

    except Exception as e:
        raise RuntimeError(f"Error reading stats file {stat_file}: {e}")

    # Check if we found any values
    if not extracted_values:
        raise ValueError(f"No volume measurements found in {stat_file}")

    # Create a dataframe with the values
    df = pd.DataFrame.from_dict(extracted_values)

    # Format table according to specified type
    if table_type == "region":
        # Create region-oriented table
        df.index = ["Value"]
        df = df.reset_index()
        df = df.rename(columns={"index": "Statistics"})
    else:
        # Create metric-oriented table
        df = df.T
        df.columns = ["Value"]

        # Converting the row names to a new column called statistics
        df = df.reset_index()
        df = df.rename(columns={"index": "Region"})

        # Split region names into components
        reg_names = df["Region"].str.split("-", expand=True)

        # Safely handle region names that might not have 3 components
        if reg_names.shape[1] >= 3:
            df.insert(0, "Supraregion", reg_names[0])
            df.insert(1, "Hemisphere", reg_names[1])
        elif reg_names.shape[1] == 2:
            df.insert(0, "Supraregion", reg_names[0])
            df.insert(1, "Hemisphere", "unknown")
        else:
            df.insert(0, "Supraregion", "unknown")
            df.insert(1, "Hemisphere", "unknown")

    # Add metadata columns
    nrows = df.shape[0]
    units = get_units("volume")
    if isinstance(units, list) and len(units) > 0:
        units = units[0]
    elif units is None or (isinstance(units, list) and len(units) == 0):
        units = "ml"

    df.insert(0, "Source", ["statsfile"] * nrows)
    df.insert(1, "Metric", ["volume"] * nrows)
    df.insert(2, "Units", [units] * nrows)
    df.insert(3, "MetricFile", [stat_file] * nrows)

    # Add BIDS entities if requested
    if add_bids_entities:
        try:
            ent_list = cltbids.entities4table()
            df_add = cltbids.entities_to_table(
                filepath=stat_file, entities_to_extract=ent_list
            )
            df = cltmisc.expand_and_concatenate(df_add, df)
        except Exception as e:
            warnings.warn(f"Could not add BIDS entities: {str(e)}")

    # Cleaning the dataframe to remove columns with all missing values if include_missing is False
    df = cltmisc.drop_empty_columns(df)

    # Save table if requested
    output_path = None
    if output_table is not None:
        output_dir = os.path.dirname(output_table)
        if output_dir and not os.path.exists(output_dir):
            raise FileNotFoundError(
                f"Directory does not exist: {output_dir}. Please create the directory before saving."
            )

        df.to_csv(output_table, sep="\t", index=False)
        output_path = output_table

    return df, output_path



####################################################################################################

[docs]
def parse_freesurfer_stats_fromaseg(
    stat_file: str,
    output_table: str = None,
    table_type: str = "metric",
    add_bids_entities: bool = True,
    include_missing: bool = True,
    config_json: str = None,
) -> Tuple[pd.DataFrame, Optional[str]]:
    """
    Parse regional volume measurements from a FreeSurfer aseg.stats file.

    This function extracts volume measurements for specific brain regions from the tabular
    data section of FreeSurfer's aseg.stats file. It converts values to milliliters
    and organizes them into a structured DataFrame.

    Parameters
    ----------
    stat_file : str
        Path to the aseg.stats file generated by FreeSurfer.
    output_table : str, optional
        Path to save the resulting table. If None, the table is not saved.
    table_type : str, default="metric"
        Output format specification:
        - "metric": Each column represents a specific statistic for each region (regions as rows)
        - "region": Each column represents a region, with rows for different statistics
    add_bids_entities : bool, default=True
        Whether to include BIDS entities as columns in the resulting DataFrame.
        This extracts subject, session, and other metadata from the filename.
    include_missing : bool, default=True
        Whether to include missing values as zeros in the output. If False, missing values
        will be excluded from the DataFrame.
    config_json : str, optional
        Path to a JSON configuration file defining region measurements to extract.
        If None, a default configuration will be used.

    Returns
    -------
    df : pd.DataFrame
        DataFrame containing the extracted region volume measurements.
    output_path : str or None
        Path where the table was saved, or None if no table was saved.

    Examples
    --------
    Basic usage with default parameters:

    >>> import os
    >>> import clabtoolkit.morphometrytools as morpho
    >>> # Define path to FreeSurfer stats file
    >>> stats_file = os.path.join('freesurfer', 'sub-01', 'stats', 'aseg.stats')
    >>> # Parse the stats file
    >>> df, _ = morpho.parse_freesurfer_stats_fromaseg(stats_file)
    >>> # Display the first few rows of results
    >>> print(df[['Region', 'Value']].head())

    Using region format (regions as columns, statistics as rows):

    >>> df_region, _ = morpho.parse_freesurfer_stats_fromaseg(
    ...     stats_file, table_type="region"
    ... )
    >>> # View volumes across regions
    >>> print(df_region.head())

    Saving the results to a file:

    >>> output_path = os.path.join('results', 'sub-01_fs-regional-volumes.tsv')
    >>> df_saved, saved_path = morpho.parse_freesurfer_stats_fromaseg(
    ...     stats_file, output_table=output_path
    ... )
    >>> print(f"Table saved to: {saved_path}")

    Notes
    -----
    This function extracts regional volume measurements from the table section of the aseg.stats file.
    The aseg.stats file contains a table with measurements for different brain regions, with
    each row representing a different segmented region.

    All volumes are converted to milliliters (ml) by dividing the FreeSurfer values
    (typically in mm³) by 1000.

    The aseg.stats file is typically found in the `[subject]/stats/` directory of a
    FreeSurfer output directory.

    See Also
    --------
    parse_freesurfer_global_fromaseg : Extract global volume measurements from aseg.stats
    """

    # Verify if the file exists
    if not os.path.isfile(stat_file):
        raise FileNotFoundError(f"Stats file not found: {stat_file}")

    # Input validation
    if table_type not in ["region", "metric"]:
        raise ValueError(
            f"Invalid table_type: '{table_type}'. Expected 'region' or 'metric'."
        )

    # Load region measurements from config file or use defaults
    if config_json and os.path.isfile(config_json):
        try:
            with open(config_json, "r") as f:
                config_data = json.load(f)
                # Check if the config has an "aseg" key
                if "aseg" in config_data:
                    region_measurements = config_data["aseg"]
                else:
                    region_measurements = config_data
        except Exception as e:
            warnings.warn(f"Error loading config file {config_json}: {e}")
            region_measurements = get_stats_dictionary("aseg")
    else:
        region_measurements = get_stats_dictionary("aseg")

    # Dictionary to store the extracted values
    extracted_values = {}

    # Read the stats file
    try:
        with open(stat_file, "r") as file:
            file_content = file.readlines()

            # Parse the tabular data from the aseg.stats file
            segmented_data = {}
            region_data = {}

            # First determine the column positions by looking for TableCol definitions
            column_indices = {}
            volume_index = 3  # Default volume index if not specified

            for line in file_content:
                if line.startswith("# TableCol"):
                    try:
                        parts = line.split()
                        if len(parts) >= 4 and "ColHeader" in line:
                            col_num = int(parts[2]) - 1  # Convert to 0-based index
                            col_name = parts[-1]
                            column_indices[col_name] = col_num
                            if col_name == "Volume_mm3":
                                volume_index = col_num
                    except (ValueError, IndexError) as e:
                        warnings.warn(
                            f"Error parsing TableCol line: {line.strip()}. Error: {e}"
                        )

            # Parse the table rows (lines starting with a number)
            for line in file_content:
                if line.strip() and line.strip()[0].isdigit():
                    try:
                        parts = line.strip().split()
                        if len(parts) >= 5:
                            # Structure name is typically in position 4
                            seg_name = parts[4]
                            # Use detected volume index or default to 3
                            vol_idx = (
                                volume_index if 0 <= volume_index < len(parts) else 3
                            )
                            seg_volume = float(parts[vol_idx])
                            segmented_data[seg_name] = seg_volume

                            # Also store additional information about the region
                            region_data[seg_name] = {
                                "SegId": int(parts[1]),
                                "NVoxels": int(parts[2]),
                                "Volume": seg_volume,
                            }
                    except Exception as e:
                        warnings.warn(
                            f"Error parsing table line: {line.strip()}. Error: {e}"
                        )

            # Extract values based on the configuration
            for region_key, region_info in region_measurements.items():
                value_found = False

                # Get configuration details
                key = region_info["key"]
                divisor = region_info.get("divisor", 1000)

                # Try to find in segmented data
                if key in segmented_data:
                    value = segmented_data[key] / divisor
                    extracted_values[region_key] = [value]
                    value_found = True
                    continue

                # Try any alternate keys from config
                alt_keys = region_info.get("alternate_keys", [])
                for alt_key in alt_keys:
                    if alt_key in segmented_data:
                        value = segmented_data[alt_key] / divisor
                        extracted_values[region_key] = [value]
                        value_found = True
                        break

                if value_found:
                    continue

                # Fallback to SegId lookup if provided
                seg_id = region_info.get("seg_id")
                if seg_id is not None:
                    for name, data in region_data.items():
                        if data["SegId"] == seg_id:
                            value = data["Volume"] / divisor
                            extracted_values[region_key] = [value]
                            value_found = True
                            break

                # If value not found and we're including missing values
                if not value_found and include_missing:
                    extracted_values[region_key] = [0.0]
                    warnings.warn(
                        f"Value for {region_key} (key: {key}) not found in {stat_file}"
                    )

    except Exception as e:
        raise RuntimeError(f"Error reading stats file {stat_file}: {e}")

    # Check if we found any values
    if not extracted_values:
        raise ValueError(f"No region measurements found in {stat_file}")

    # Create a dataframe with the values
    df = pd.DataFrame.from_dict(extracted_values)

    # Format table according to specified type
    if table_type == "region":
        # Create region-oriented table
        df.index = ["Value"]
        df = df.reset_index()
        df = df.rename(columns={"index": "Statistics"})
    else:
        # Create metric-oriented table
        df = df.T
        df.columns = ["Value"]

        # Converting the row names to a new column called statistics
        df = df.reset_index()
        df = df.rename(columns={"index": "Region"})

        # Split region names into components
        reg_names = df["Region"].str.split("-", expand=True)

        # Safely handle region names that might not have 3 components
        if reg_names.shape[1] >= 3:
            df.insert(0, "Supraregion", reg_names[0])
            df.insert(1, "Hemisphere", reg_names[1])
        elif reg_names.shape[1] == 2:
            df.insert(0, "Supraregion", reg_names[0])
            df.insert(1, "Hemisphere", "unknown")
        else:
            df.insert(0, "Supraregion", "unknown")
            df.insert(1, "Hemisphere", "unknown")

    # Add metadata columns
    nrows = df.shape[0]
    units = get_units("volume")
    if isinstance(units, list) and len(units) > 0:
        units = units[0]
    elif units is None or (isinstance(units, list) and len(units) == 0):
        units = "ml"

    df.insert(0, "Source", ["statsfile"] * nrows)
    df.insert(1, "Metric", ["volume"] * nrows)
    df.insert(2, "Units", [units] * nrows)
    df.insert(3, "MetricFile", [stat_file] * nrows)

    # Add BIDS entities if requested
    if add_bids_entities:
        try:
            ent_list = cltbids.entities4table()
            df_add = cltbids.entities_to_table(
                filepath=stat_file, entities_to_extract=ent_list
            )
            df = cltmisc.expand_and_concatenate(df_add, df)
        except Exception as e:
            warnings.warn(f"Could not add BIDS entities: {str(e)}")

    # Cleaning the dataframe to remove columns with all missing values if include_missing is False
    df = cltmisc.drop_empty_columns(df)

    # Save table if requested
    output_path = None
    if output_table is not None:
        output_dir = os.path.dirname(output_table)
        if output_dir and not os.path.exists(output_dir):
            raise FileNotFoundError(
                f"Directory does not exist: {output_dir}. Please create the directory before saving."
            )

        df.to_csv(output_table, sep="\t", index=False)
        output_path = output_table

    return df, output_path



####################################################################################################

[docs]
def parse_freesurfer_cortex_stats(
    stats_file: str,
    output_table: str = None,
    table_type: str = "metric",
    add_bids_entities: bool = True,
    hemi: str = None,
    config_json: str = None,
    include_metrics: list = None,
) -> Tuple[pd.DataFrame, Optional[str]]:
    """
    Parse cortical parcellation statistics from a FreeSurfer aparc.stats file.

    This function extracts regional measurements from FreeSurfer's aparc.stats files,
    including surface area, gray matter volume, cortical thickness, and curvature
    for each cortical region. It organizes the data into a structured DataFrame.

    Surface area is automatically converted from mm² to cm² (divided by 100)
    and volume is automatically converted from mm³ to cm³ (divided by 1000).

    Parameters
    ----------
    stats_file : str
        Path to the aparc.stats file generated by FreeSurfer (lh.aparc.stats or rh.aparc.stats).
    output_table : str, optional
        Path to save the resulting table. If None, the table is not saved.
    table_type : str, default="metric"
        Output format specification:
        - "metric": Each row represents a region, with columns for different metrics
        - "region": Each row represents a metric, with columns for different regions
    add_bids_entities : bool, default=True
        Whether to include BIDS entities as columns in the resulting DataFrame.
        This extracts subject, session, and other metadata from the filename.
    hemi : str, optional
        Hemisphere identifier ('lh' for left or 'rh' for right). If None, it will be
        automatically detected from the filename or the file content.
    config_json : str, optional
        Path to a JSON configuration file defining cortical metrics to extract.
        If None, a default configuration will be used.
    include_metrics : list, optional
        List of metrics to extract from the stats file. If provided, only these metrics
        will be extracted from the configuration. If None, all metrics in the configuration
        will be extracted.

    Returns
    -------
    df : pd.DataFrame
        DataFrame containing the extracted cortical measurements.
    output_path : str or None
        Path where the table was saved, or None if no table was saved.

    Examples
    --------
    Basic usage with default parameters:

    >>> import os
    >>> import clabtoolkit.morphometrytools as morpho
    >>> # Define path to FreeSurfer stats file
    >>> stats_file = os.path.join('freesurfer', 'sub-01', 'stats', 'lh.aparc.stats')
    >>> # Parse the stats file
    >>> df, _ = morpho.parse_freesurfer_cortex_stats(stats_file)
    >>> # Display the results for thickness
    >>> thickness_df = df[df['Metric'] == 'thickness']
    >>> print(thickness_df[['Region', 'Value', 'Std']].head())

    Using a custom configuration file:

    >>> config_file = os.path.join('config', 'stats_mapping.json')
    >>> df, _ = morpho.parse_freesurfer_cortex_stats(stats_file, config_json=config_file)

    Extract only specific metrics:

    >>> df_area_vol, _ = morpho.parse_freesurfer_cortex_stats(
    ...     stats_file, include_metrics=["area", "volume"]
    ... )
    >>> print(df_area_vol['Metric'].unique())

    Using region format (metrics as rows, regions as columns):

    >>> df_region, _ = morpho.parse_freesurfer_cortex_stats(
    ...     stats_file, table_type="region"
    ... )
    >>> # View thickness across regions
    >>> thickness_row = df_region[df_region['Statistics'] == 'thickness']
    >>> print(thickness_row.iloc[:, :5])  # Print first 5 columns for thickness

    Saving the results to a file:

    >>> output_path = os.path.join('results', 'sub-01_lh_cortical-metrics.tsv')
    >>> df_saved, saved_path = morpho.parse_freesurfer_cortex_stats(
    ...     stats_file, output_table=output_path
    ... )
    >>> print(f"Table saved to: {saved_path}")

    Notes
    -----
    This function extracts metrics from aparc.stats files based on the configuration.
    By default, these include:
    - Surface area (SurfArea column) in cm² (converted from mm² by dividing by 100)
    - Gray matter volume (GrayVol column) in cm³ (converted from mm³ by dividing by 1000)
    - Cortical thickness (ThickAvg column) in mm (unchanged)
    - Thickness standard deviation (ThickStd column) in mm (unchanged)
    - Mean curvature (MeanCurv column) in mm⁻¹ (unchanged)

    The function automatically detects the hemisphere from the filename or file content
    if not specified.

    Cortical parcellation stats files (lh.aparc.stats and rh.aparc.stats) are typically
    found in the `[subject]/stats/` directory of a FreeSurfer output directory.

    See Also
    --------
    parse_freesurfer_global_fromaseg : Parse global volumes from aseg.stats file
    parse_freesurfer_stats_fromaseg : Parse regional volumes from aseg.stats file
    """

    # Verify if the file exists
    if not os.path.isfile(stats_file):
        raise FileNotFoundError(f"Stats file not found: {stats_file}")

    # Input validation
    if table_type not in ["region", "metric"]:
        raise ValueError(
            f"Invalid table_type: '{table_type}'. Expected 'region' or 'metric'."
        )

    # Detect hemisphere from filename or file content if not provided
    if hemi is None:
        # Try to detect from filename
        basename = os.path.basename(stats_file)
        if "lh." in basename:
            hemi = "lh"
        elif "rh." in basename:
            hemi = "rh"
        else:
            # Try to detect from file content
            with open(stats_file, "r") as f:
                content = f.read()
                if "# hemi lh" in content:
                    hemi = "lh"
                elif "# hemi rh" in content:
                    hemi = "rh"
                else:
                    warnings.warn(
                        f"Could not determine hemisphere from file: {stats_file}. Using 'lh' as default."
                    )
                    hemi = "lh"

    # Load metrics configuration from file or use defaults
    if config_json and os.path.isfile(config_json):
        try:
            with open(config_json, "r") as f:
                config_data = json.load(f)
                # Check if the config has a "cortex" key
                if "cortex" in config_data:
                    metric_mapping = config_data["cortex"]
                else:
                    metric_mapping = config_data

                # Update unit information in the configuration
                if (
                    "area" in metric_mapping
                    and metric_mapping["area"].get("unit") == "mm²"
                ):
                    metric_mapping["area"]["unit"] = "cm²"

                if (
                    "volume" in metric_mapping
                    and metric_mapping["volume"].get("unit") == "mm³"
                ):
                    metric_mapping["volume"]["unit"] = "cm³"

        except Exception as e:
            warnings.warn(f"Error loading config file {config_json}: {e}")
            metric_mapping = get_stats_dictionary("cortex")
    else:
        metric_mapping = get_stats_dictionary("cortex")

    # Filter metrics if include_metrics is provided
    if include_metrics:
        include_metrics = [m.lower() for m in include_metrics]
        metric_mapping = {
            k: v for k, v in metric_mapping.items() if k.lower() in include_metrics
        }

        # Validate that requested metrics exist
        if not metric_mapping:
            raise ValueError(
                f"None of the requested metrics {include_metrics} found in configuration."
            )

    # Validate metrics after filtering
    valid_metrics = list(metric_mapping.keys())
    if not valid_metrics:
        raise ValueError("No valid metrics found in configuration.")

    # Read the stats file
    try:
        with open(stats_file, "r") as file:
            lines = file.readlines()

        # Find the data section by looking for column headers
        col_headers_line = None
        column_headers = []
        for i, line in enumerate(lines):
            if "# ColHeaders" in line:
                col_headers_line = i
                column_headers = line.replace("# ColHeaders", "").strip().split()
                break

        # If column headers not found, try to find the end of the comment section
        if not column_headers:
            for i, line in enumerate(lines):
                if not line.startswith("#") and line.strip():
                    # This might be the start of the data section
                    if i > 0 and "# TableCol" in lines[i - 1]:
                        # The previous line was a table column definition, this is likely the data
                        parts = line.strip().split()
                        if len(parts) >= 7:  # Ensure enough columns
                            # Assume a fixed structure based on the example file
                            column_headers = [
                                "StructName",
                                "NumVert",
                                "SurfArea",
                                "GrayVol",
                                "ThickAvg",
                                "ThickStd",
                                "MeanCurv",
                                "GausCurv",
                                "FoldInd",
                                "CurvInd",
                            ]
                            col_headers_line = i - 1
                            print(
                                f"Inferred column headers at line {i}: {column_headers}"
                            )
                            break

        if not column_headers:
            # Final fallback: manually parse the TableCol definitions
            table_cols = {}
            for line in lines:
                if "# TableCol" in line and "ColHeader" in line:
                    try:
                        parts = line.split()
                        col_num = int(parts[2])
                        col_name = parts[-1]
                        table_cols[col_num] = col_name
                    except (ValueError, IndexError):
                        continue

            if table_cols:
                # Sort by column number
                column_headers = [table_cols[i] for i in sorted(table_cols.keys())]
                print(
                    f"Extracted column headers from TableCol definitions: {column_headers}"
                )

        if not column_headers:
            raise ValueError(f"Could not find column headers in {stats_file}")

        # Create column index mapping
        column_indices = {name: idx for idx, name in enumerate(column_headers)}

        # Determine where to start reading data
        data_lines = []
        if col_headers_line is not None:
            # Start from the line after the column headers
            data_start = col_headers_line + 1
        else:
            # Try to find the first non-comment line
            data_start = 0
            for i, line in enumerate(lines):
                if not line.startswith("#") and line.strip():
                    data_start = i
                    break

        # Extract data lines (non-comment, non-empty lines)
        for i in range(data_start, len(lines)):
            line = lines[i]
            if not line.startswith("#") and line.strip():
                data_lines.append(line)

        # Now parse the data lines to extract region metrics
        regions_data = []

        for line in data_lines:
            parts = line.strip().split()
            if len(parts) < len(column_headers):
                print(
                    f"Warning: Line has fewer parts ({len(parts)}) than headers ({len(column_headers)}): {line[:50]}..."
                )
                continue

            # Extract region name (first field in most aparc.stats files)
            region_name = parts[0]

            # Process each metric
            for metric_name, metric_info in metric_mapping.items():
                column = metric_info.get("column")

                # Get the column index
                col_idx = None
                if column in column_indices:
                    col_idx = column_indices[column]
                elif "index" in metric_info:
                    # Fallback to index if provided
                    col_idx = int(metric_info["index"])
                    if col_idx >= len(parts):
                        print(
                            f"Warning: Index {col_idx} out of range for line with {len(parts)} parts"
                        )
                        continue
                else:
                    print(
                        f"Warning: Could not find column {column} for metric {metric_name}"
                    )
                    continue

                # Get metric value
                try:
                    value = float(parts[col_idx])

                    # Apply unit conversions
                    # Convert area from mm² to cm²
                    if metric_name.lower() == "area" or column == "SurfArea":
                        value = value / 100.0  # mm² to cm²
                        # Update the unit in metric_info to reflect conversion
                        if "unit" in metric_info and metric_info["unit"] == "mm²":
                            metric_info["unit"] = "cm²"
                        elif "unit" not in metric_info:
                            metric_info["unit"] = "cm²"

                    # Convert volume from mm³ to cm³
                    elif metric_name.lower() == "volume" or column == "GrayVol":
                        value = value / 1000.0  # mm³ to cm³
                        # Update the unit in metric_info to reflect conversion
                        if "unit" in metric_info and metric_info["unit"] == "mm³":
                            metric_info["unit"] = "cm³"
                        elif "unit" not in metric_info:
                            metric_info["unit"] = "cm³"

                    # Get standard deviation if applicable
                    std_value = None
                    std_column = metric_info.get("std_index")

                    if std_column is not None and std_column not in (
                        None,
                        "null",
                        "None",
                    ):
                        if isinstance(std_column, int) or (
                            isinstance(std_column, str) and std_column.isdigit()
                        ):
                            std_idx = int(std_column)
                            if 0 <= std_idx < len(parts):
                                std_value = float(parts[std_idx])
                        elif (
                            "ThickStd" in column_indices
                            and metric_name.lower() == "thickness"
                        ):
                            std_idx = column_indices["ThickStd"]
                            std_value = float(parts[std_idx])

                    # Add to results
                    region_data = {
                        "Region": f"ctx-{hemi}-{region_name}",
                        "Metric": metric_name,
                        "Value": value,
                        "Source": metric_info.get("source", "statsfile"),
                        "Units": metric_info.get("unit", ""),
                    }

                    if std_value is not None:
                        region_data["Std"] = std_value

                    regions_data.append(region_data)
                except (IndexError, ValueError) as e:
                    print(f"Error parsing {metric_name} for region {region_name}: {e}")

        # Check if we found any data
        if not regions_data:
            print(f"Warning: No data parsed from {len(data_lines)} data lines")
            # Print a sample of the first data line for debugging
            if data_lines:
                print(f"Sample data line: {data_lines[0]}")
            raise ValueError(f"No cortical parcellation data found in {stats_file}")

        # Create DataFrame
        df = pd.DataFrame(regions_data)

        # Split region names into components
        df["Hemisphere"] = hemi
        df["Supraregion"] = "ctx"

        # Add metadata column for the source file
        df["MetricFile"] = stats_file

        # Reorder columns
        column_order = [
            "Source",
            "Metric",
            "Units",
            "MetricFile",
            "Supraregion",
            "Hemisphere",
            "Region",
            "Value",
        ]

        # Add Std if it exists
        if "Std" in df.columns:
            column_order.append("Std")

        # Filter columns to those that exist
        column_order = [col for col in column_order if col in df.columns]
        df = df[column_order]

        # Format table according to specified type
        if table_type == "region":
            # Create region-oriented table (pivot)
            value_cols = ["Value"]
            if "Std" in df.columns:
                value_cols.append("Std")

            pivot_df = pd.pivot_table(
                df,
                values=value_cols,
                index=[
                    "Source",
                    "Metric",
                    "Units",
                    "MetricFile",
                    "Supraregion",
                    "Hemisphere",
                ],
                columns="Region",
            )

            # Flatten the multi-index columns
            if isinstance(pivot_df.columns, pd.MultiIndex):
                pivot_df.columns = [
                    f"{col[0]}_{col[1]}" if col[0] != "" else col[1]
                    for col in pivot_df.columns
                ]

            # Reset index and extract metric as Statistics
            pivot_df = pivot_df.reset_index()
            pivot_df = pivot_df.rename(columns={"Metric": "Statistics"})

            # Final DataFrame
            df = pivot_df

        # Add BIDS entities if requested
        if add_bids_entities:
            try:
                ent_list = cltbids.entities4table()
                df_add = cltbids.entities_to_table(
                    filepath=stats_file, entities_to_extract=ent_list
                )
                df = cltmisc.expand_and_concatenate(df_add, df)
            except Exception as e:
                warnings.warn(f"Could not add BIDS entities: {str(e)}")

        # Cleaning the dataframe to remove columns with all missing values if include_missing is False
        df = cltmisc.drop_empty_columns(df)

        # Save table if requested
        output_path = None
        if output_table is not None:
            output_dir = os.path.dirname(output_table)
            if output_dir and not os.path.exists(output_dir):
                raise FileNotFoundError(
                    f"Directory does not exist: {output_dir}. Please create the directory before saving."
                )

            df.to_csv(output_table, sep="\t", index=False)
            output_path = output_table

        return df, output_path

    except Exception as e:
        raise RuntimeError(f"Error parsing stats file {stats_file}: {e}")



####################################################################################################

[docs]
def get_stats_dictionary(region_level: str = "global"):
    """
    Return the default global volume measurements configuration for FreeSurfer aseg.stats files.

    Returns
    -------
    dict
        Dictionary containing configuration for extracting global volume measurements.
    """

    # Get the absolute of this file
    cwd = os.path.dirname(os.path.abspath(__file__))
    mapping_stats_json = os.path.join(cwd, "config", "stats_mapping.json")

    with open(mapping_stats_json, encoding="utf-8") as f:
        mapp_dict = json.load(f)

    return mapp_dict[region_level]



####################################################################################################
####################################################################################################
############                                                                            ############
############                                                                            ############
############   Section 5: Methods dedicated to extract metrics from connectivity        ############
############            matrices based on graph theory                                  ############
############                                                                            ############
############                                                                            ############
####################################################################################################
####################################################################################################

[docs]
def network_metrics_to_table(
    conn_mat: np.ndarray,
    lut_file: Union[str, Path, Dict] = None,
    cmat_met: Union[str, List[str]] = "weight",
) -> pd.DataFrame:
    """
    Compute network metrics from a connectivity matrix and return them in a DataFrame.
    This function calculates various graph theory metrics from a given connectivity
    matrix using the Brain Connectivity Toolbox (BCT) and organizes the results into
    a pandas DataFrame.

    Parameters
    ----------
    conn_mat : np.ndarray
        Square connectivity matrix (2D numpy array) representing the connections
        between brain regions.

    lut_file : str, Path, or dict, optional
        Path to a lookup table (LUT) file or a dictionary defining region codes and names.
        If provided, it is used to map region indices to names. If None, regions are
        named generically as "auto-roi-0001", "auto-roi-0002", etc.

    cmat_met : str or list of str, default="weight"
        Description of the connectivity matrix type. This string is used to label
        the metric in the output DataFrame. It can be a single string or a list of
        strings.

    Returns
    -------
    pd.DataFrame
        DataFrame containing the computed network metrics for each region and
        global metrics.

    Examples
    --------
    Basic usage with a connectivity matrix:
    >>> import numpy as np
    >>> import clabtoolkit.morphometrytools as morpho
    >>> # Create a sample connectivity matrix
    >>> conn_mat = np.array([[0, 1, 2],
    ...                      [1, 0, 3],
    ...                      [2, 3, 0]])
    >>> # Compute network metrics
    >>> df_metrics = morpho.network_metrics_to_table(conn_mat)
    >>> print(df_metrics)
    """

    import bctpy as bct

    if lut_file is not None:
        if isinstance(lut_file, (str, Path)):
            if os.path.exists(lut_file):

                col_dict = cltcol.ColorTableLoader.load_colortable(lut_file)

            else:
                raise ValueError("The lut file does not exist")

        elif isinstance(lut_file, dict):
            col_dict = copy.deepcopy(lut_file)
        else:
            raise TypeError("lut_file must be a file path or a dictionary")

        st_codes = col_dict["index"]
        st_names = col_dict["name"]

    else:
        st_codes = list(range(1, conn_mat.shape[0] + 1))
        st_names = cltmisc.create_names_from_indices(st_codes)

    net_metrics = [
        "degree",
        "strength",
        "clustering_coeff",
        "betw_centrality",
        "loc_efficiency",
        "glob_efficiency",
        "transitivity",
        "density_coeff",
    ]
    cmat_bin = conn_mat > 0
    deg_coeff = bct.degree.degrees_und(cmat_bin)
    str_coeff = bct.degree.strengths_und(cmat)
    clu_coeff = bct.clustering_coef_bu(cmat_bin)
    btw_cent = bct.betweenness_bin(cmat_bin)
    sho_path = bct.distance_bin(cmat_bin)
    loc_eff = bct.efficiency_bin(cmat_bin, local=True)

    trans_coeff_g = bct.transitivity_bu(cmat_bin)
    glob_eff_g = bct.efficiency_bin(cmat_bin)
    den_coeff_g = bct.density_und(cmat_bin)

    dict_of_cols = {}
    dict_of_cols["metric"] = ["conn_matrix_" + cmat_met] * len(net_metrics)
    dict_of_cols["value"] = net_metrics
    dict_of_cols["units"] = ["au"] * len(net_metrics)
    dict_of_cols["total_brain"] = [""] * (len(net_metrics) - 3) + [
        glob_eff_g,
        trans_coeff_g,
        den_coeff_g[0],
    ]

    # Values for each region in the annot
    nreg = len(st_codes)
    # outvals = []
    # outnames = []
    for i in range(1, nreg + 1):
        if i < len(conn_mat):
            outvals = [
                deg_coeff[i - 1],
                str_coeff[i - 1],
                clu_coeff[i - 1],
                btw_cent[i - 1],
                loc_eff[i - 1],
            ] + [""] * 3
        else:
            outvals = [""] * (len(net_metrics))  #

        dict_of_cols[st_names[i - 1]] = outvals

    df = pd.DataFrame.from_dict(dict_of_cols)

    # Cleaning the dataframe to remove columns with all missing values if include_missing is False
    df = cltmisc.drop_empty_columns(df)
    return df



####################################################################################################
####################################################################################################
############                                                                            ############
############                                                                            ############
############                        Section 6: Auxiliary methods                        ############
############                                                                            ############
############                                                                            ############
####################################################################################################
####################################################################################################

[docs]
def stats_from_vector(
    metric_vect,
    stats_list: Union[list, tuple] = ["value", "median", "std", "min", "max"],
    nonzeros_only: bool = True,
) -> list:
    """
    Computes specified statistics from a numeric vector.

    Parameters
    ----------
    metric_vect : array-like
        Vector with the values of the metric. Will be coerced to a
        float64 numpy array before processing.

    stats_list : list or tuple
        List of statistics to compute. Supported values (case-insensitive):
        'mean', 'value' (alias for 'mean'), 'median', 'std', 'min', 'max'.

    nonzeros_only : bool, optional
        If True (default), statistics are computed only on non-zero elements.
        - If the input is empty, returns NaN for all statistics regardless of
        this flag.
        - If all elements are zero and nonzeros_only is True, returns 0.0 for all
        statistics (since the result is unambiguously zero).

    Returns
    -------
    list
        List of computed statistics as Python floats, in the same order as
        requested.

    Raises
    ------
    TypeError
        If ``stats_list`` is not a list or tuple.
    ValueError
        If an unsupported statistic name is requested.

    Examples
    --------
    >>> import numpy as np
    >>> data = np.array([1, 2, 3, 4, 5])
    >>> stats_from_vector(data, ['mean', 'median', 'std'])
    [3.0, 3.0, 1.4142135623730951]

    >>> stats_from_vector(data, ['min', 'max'])
    [1.0, 5.0]

    >>> # Case-insensitive statistic names
    >>> stats_from_vector(data, ['MEAN', 'Mean', 'mean'])
    [3.0, 3.0, 3.0]

    >>> # 'value' is an alias for 'mean'
    >>> stats_from_vector(data, ['mean', 'value'])
    [3.0, 3.0]

    >>> # Empty arrays return NaN for all statistics
    >>> stats_from_vector(np.array([]), ['mean', 'median'])
    [nan, nan]

    >>> # All-zero vector with nonzeros_only=True returns 0.0 for all statistics
    >>> stats_from_vector(np.array([0, 0, 0]), ['mean', 'std', 'min'])
    [0.0, 0.0, 0.0]

    >>> # Unsupported statistic raises ValueError
    >>> stats_from_vector(data, ['mean', 'mode'])
    Traceback (most recent call last):
        ...
    ValueError: Unsupported statistics: mode
    """
    if not isinstance(stats_list, (list, tuple)):
        raise TypeError("stats_list must be a list or tuple")

    # Coerce to a float64 numpy array to handle lists, int arrays, etc.
    metric_vect = np.asarray(metric_vect, dtype=np.float64).ravel()

    # Truly empty input → NaN regardless of nonzeros_only flag
    if len(metric_vect) == 0:
        return [float("nan")] * len(stats_list)

    if nonzeros_only:
        metric_vect = metric_vect[metric_vect != 0]
        # All values were zero: result is unambiguously 0 for every statistic
        if metric_vect.size == 0:
            return [0.0] * len(stats_list)

    stats_map = {
        "mean": np.mean,
        "value": np.mean,
        "median": np.median,
        "std": np.std,
        "min": np.min,
        "max": np.max,
    }

    lowercase_stats = [s.lower() for s in stats_list]

    unsupported = [s for s in lowercase_stats if s not in stats_map]
    if unsupported:
        raise ValueError(f"Unsupported statistics: {', '.join(unsupported)}")

    return [float(stats_map[stat](metric_vect)) for stat in lowercase_stats]



####################################################################################################

[docs]
def get_units(
    metrics: Union[str, List[str]], metrics_json: Optional[Union[str, Dict]] = None
) -> List[str]:
    """
    Get the units associated with specified metrics.

    Retrieves the corresponding units for one or more metrics from either a provided
    JSON file, dictionary, or the default configuration.

    Parameters
    ----------
    metrics : str or list of str
        Name(s) of the metrics. Can be a single metric as a string or multiple metrics as a list.

    metrics_json : str or dict, optional
        Either:
        - Path to a JSON file containing the metrics units dictionary
        - Dictionary directly containing the metrics units mapping
        - None (default), which uses the package's built-in configuration

    Returns
    -------
    list of str
        Units corresponding to each requested metric. Returns "unknown" for any metric
        not found in the dictionary.

    Raises
    ------
    ValueError
        If the provided JSON file path is invalid or the metrics_json structure is incorrect.

    Examples
    --------
    >>> import clabtoolkit.morphometrytools as clmorphtools
    >>> # Get unit for a single metric
    >>> clmorphtools.get_units('thickness')
    ['mm']
    >>>
    >>> # Get units for multiple metrics
    >>> clmorphtools.get_units(['thickness', 'area', 'volume'])
    ['mm', 'cm²', 'cm³']
    >>>
    >>> # Using a custom metrics dictionary
    >>> custom_dict = {"metrics_units": {"custom_metric": "kg"}}
    >>> clmorphtools.get_units('custom_metric', metrics_json=custom_dict)
    ['kg']
    >>>
    >>> # Handling unknown metrics
    >>> clmorphtools.get_units(['thickness', 'unknown_metric'])
    ['mm', 'unknown']
    """
    # Convert single metric to list for uniform processing
    if isinstance(metrics, str):
        metrics = [metrics]

    # Get metrics dictionary from appropriate source
    if metrics_json is None:
        # Use default configuration
        config_path = os.path.join(os.path.dirname(__file__), "config", "config.json")
        try:
            with open(config_path, "r", encoding="utf-8") as f:
                config_data = json.load(f)
            metric_dict = config_data.get("metrics_units", {})
        except (FileNotFoundError, json.JSONDecodeError) as e:
            raise ValueError(f"Error loading default configuration: {str(e)}")
    elif isinstance(metrics_json, str):
        # Load from provided JSON file path
        if not os.path.isfile(metrics_json):
            raise ValueError(f"Invalid JSON file path: {metrics_json}")
        try:
            with open(metrics_json, "r") as f:
                config_data = json.load(f)
            metric_dict = config_data.get("metrics_units", {})
            if not metric_dict:
                raise ValueError(
                    "Missing 'metrics_units' key in the provided JSON file"
                )
        except json.JSONDecodeError:
            raise ValueError(f"Invalid JSON format in file: {metrics_json}")
    elif isinstance(metrics_json, dict):
        # Use provided dictionary
        metric_dict = metrics_json.get("metrics_units", metrics_json)
    else:
        raise ValueError("metrics_json must be a file path, dictionary, or None")

    # Create a case-insensitive lookup dictionary (only once)
    lookup_dict = {k.lower(): v for k, v in metric_dict.items()}

    # Lookup units for each metric
    return [lookup_dict.get(metric.lower(), "unknown") for metric in metrics]



####################################################################################################