Source code for bio_compose.verifier

import os
import tempfile
from typing import *
from uuid import uuid4

import numpy as np
import requests
import seaborn as sns
import antimony
from matplotlib import pyplot as plt
from matplotlib.figure import Figure
from requests_toolbelt.multipart.encoder import MultipartEncoder

from bio_compose.data_model import Api, RequestError, save_plot


__all__ = [
    'Verifier',
    'VerificationResult'
]



[docs]
class Verifier(Api):
    """
    API for verifying (running and comparing) the results of multiple simulators for a given SBML model.
    """

    def __init__(self):
        """
        A new instance of the Verifier class. **NOTE**: this may clash with your record keeping in a notebook, so it is highly recommended that users treat instances of this class as quasi-singletons, although not necessary for fundamental interaction.
        """
        super().__init__()

    # -- api calls

[docs]
    def verify_omex(
            self,
            omex_filepath: str,
            simulators: List[str] = None,
            include_outputs: bool = True,
            comparison_id: str = None,
            expected_results: str = None,
            selection_list: List[str] = None,
            rTol: float = None,
            aTol: float = None,
            _steady_state: bool = False
    ) -> Union[Dict[str, str], RequestError]:
        """
        Submit a new uniform time course comparison job to the service and return confirmation of job submission.

        Args:
            - **omex_filepath**: `str`: The path to the omex file to submit.
            - **simulators**: `List[str]`: The list of simulators to include in comparison. Defaults to all utc simulators (amici, copasi, tellurium)
            - **include_outputs**: `bool, optional`: Whether to include the output data used to calculate comparison in the job results on result fetch. Defaults to True.
            - **comparison_id**: `str, optional`: The unique identifier for the comparison job. Defaults to None. If `None` is passed, a comparison id of `bio_check-request-<UUID>` is generated.
            - **expected_results**: `str, optional`: The path to the ground expected_results report file to include in comparison. Defaults to None.
            - **selection_list**: `List[str], optional`: The list of observables to include in comparison output. Defaults to None (all observables).
            - **rTol**: `float, optional`: The relative tolerance used to determine the relative distance in a pairwise comparison.
            - **aTol**: `float, optional`: The absolute tolerance used to determine the absolute distance in a pairwise comparison.
            - **_steady_state**: `bool, optional`: Whether to include the steady state analysis job. NOTE: This feature will currently throw an error as it is not yet implemented.

        Returns:
            A dictionary containing the job submission results. **Note**: the return status should read `PENDING`.
        """
        if _steady_state is not False:
            raise NotImplementedError("The steady state analysis of model files is not yet implemented and currently under development.")

        endpoint = self._format_endpoint('verify-omex')

        # configure params
        _id = comparison_id or "bio_check-request-" + str(uuid4())
        _omex = (omex_filepath.split('/')[-1], open(omex_filepath, 'rb'), 'application/octet-stream')
        _report = (expected_results.split('/')[-1], open(expected_results, 'rb'), 'application/octet-stream') if expected_results else None
        sims = simulators or ['amici', 'copasi', 'tellurium']

        encoder_fields = {
            'uploaded_file': _omex,
            'expected_results': _report
        }

        query_params = {
            'simulators': sims,
            'include_outputs': str(include_outputs).lower(),
            'comparison_id': _id,
        }

        if selection_list:
            query_params['selection_list'] = ','.join(selection_list)
        if rTol:
            query_params['rTol'] = str(rTol)
        if aTol:
            query_params['aTol'] = str(aTol)

        multidata = MultipartEncoder(fields=encoder_fields)
        headers = {'Content-Type': multidata.content_type}

        try:
            response = requests.post(endpoint, headers=headers, data=multidata, params=query_params)
            response.raise_for_status()
            self._check_response(response)
            output = response.json()
            self.submitted_jobs.append(output)
            return output
        except Exception as e:
            return RequestError(error=str(e))



[docs]
    def verify_sbml(
            self,
            entrypoint: str,
            start: int = 0,
            end: int = 10,
            steps: int = 10,
            simulators: List[str] = None,
            include_outputs: bool = True,
            comparison_id: str = None,
            expected_results: str = None,
            rTol: float = None,
            aTol: float = None,
            selection_list: List[str] = None,
            _steady_state: bool = False
    ) -> Union[Dict[str, str], RequestError]:
        """
        Submit a new uniform time course comparison job to the service and return confirmation of job submission.

        Args:
            - **entrypoint**: `str`: One of either: a path to a sbml OR an antimony model/string that can be converted to SBML. NOTE: Currently, only SBML is supported as an entrypoint.
            - **start**: `int`: The start time of the time course to include in comparison.
            - **end**: `int`: The end of the comparison job in seconds.
            - **steps**: `int`: The number of steps in the comparison job.
            - **simulators**: `List[str], optional`: The list of simulators to include in comparison. Defaults to all utc simulators (amici, copasi, tellurium)
            - **include_outputs**: `bool, optional`: Whether to include the output data used to calculate comparison in the job results on result fetch. Defaults to True.
            - **comparison_id**: `str, optional`: The unique identifier for the comparison job. Defaults to None. If `None` is passed, a comparison id of `bio_check-request-<UUID>` is generated.
            - **expected_results**: `str, optional`: The path to the ground expected_results report file to include in comparison. Defaults to None.
            - **rTol**: `float, optional`: The relative tolerance used to determine the relative distance in a pairwise comparison.
            - **aTol**: `float, optional`: The absolute tolerance used to determine the absolute distance in a pairwise comparison.
            - **selection_list**: `List[str], optional`: Observables to include in the output. If passed, all observable names NOT in this list will be excluded. Defaults to `None` (all observables).
            - **_steady_state**: `bool, optional`: Whether to include the steady state analysis job. NOTE: This feature will currently throw an error as it is not yet implemented.

        Returns:
            A dictionary containing the job submission results. **Note**: the return status should read `PENDING`.

        """
        if _steady_state is not False:
            raise NotImplementedError("The steady state analysis of model files is not yet implemented and currently under development.")

        endpoint = self._format_endpoint('verify-sbml')

        # TODO: fix and remove this
        # raise NotImplementedError("Submission of jobs with a SBML file is currently under development.")

        # handle entrypoint as antimony
        if not entrypoint.endswith('.xml'):
            dest = tempfile.mkdtemp()
            entrypoint = self._write_antimony_to_sbml(entrypoint, dest)

        sbml_fp = (entrypoint.split('/')[-1], open(entrypoint, 'rb'), 'application/octet-stream')
        _report = (expected_results.split('/')[-1], open(expected_results, 'rb'), 'application/octet-stream') if expected_results else None

        _id = comparison_id or "bio_check-request-" + str(uuid4())
        if simulators is None:
            simulators = ["copasi", "tellurium"]

        # create encoder fields
        encoder_fields = {
            'uploaded_file': sbml_fp,
            'expected_results': _report
        }

        query_params = {
            'simulators': simulators,  # ','.join(simulators),
            'include_outputs': str(include_outputs).lower(),
            'comparison_id': _id,
            'start': str(start),
            'end': str(end),
            'steps': str(steps)
        }

        if selection_list:
            query_params['selection_list'] = ','.join(selection_list)
        if rTol:
            query_params['rTol'] = str(rTol)
        if aTol:
            query_params['aTol'] = str(aTol)

        multidata = MultipartEncoder(fields=encoder_fields)
        # TODO: do we need to change the headers?
        headers = {'Content-Type': multidata.content_type}

        try:
            response = requests.post(url=endpoint, headers=headers, data=multidata, params=query_params)
            response.raise_for_status()
            self._check_response(response)
            output = response.json()
            self.submitted_jobs.append(output)
            return output
        except Exception as e:
            return RequestError(error=str(e))



[docs]
    def get_rmse(self, job_id: str) -> dict:
        """
        Get root-mean-square error scoring for all simulators involved in the last completed verification job.

        Args:
            - **job_id**: `str`: The unique identifier for the verification job.

        Returns:
            A dictionary mapping of simulator names to their respective root-mean-square error scores.
        """
        try:
            output = self.get_output(job_id=job_id)
            return output['content'].get('results').get('rmse', {}) 
        except:
            import traceback
            tb_str = traceback.format_exc()
            error_message = (
                f"Traceback:\n{tb_str}"
            )

            return {'error': error_message}



[docs]
    def get_compatible(self, file: str, versions: bool = False) -> Union[List[Tuple[Any, ...]], RequestError]:
        """
        Get all simulators and optionally their versions for a given file. The File is expected to be either an OMEX/COMBINE archive or SBML file.

        Args:
            - **file**: `str`: The path to the file to be checked.
            - **versions**: `bool`: Whether to return the compatible version of the given compatible simulator. Defaults to `False`.

        Returns:
            A dictionary of compatible simulators and the referenced file.
        """
        endpoint = self._format_endpoint('get-compatible-for-verification')
        fp = (file.split('/')[-1], open(file, 'rb'), 'application/octet-stream')

        encoder_fields = {'uploaded_file': fp}
        query_params = {'versions': str(versions).lower()}

        multidata = MultipartEncoder(fields=encoder_fields)
        # TODO: do we need to change the headers?
        headers = {'Content-Type': multidata.content_type}

        try:
            response = requests.post(url=endpoint, headers=headers, data=multidata, params=query_params)
            self._check_response(response)
            response = response.json()

            output = []
            for sim_data in response['simulators']:
                name = sim_data['name']
                version = sim_data.get('version')
                if version is not None:
                    data = tuple([name, version])
                    output.append(data)
                else:
                    output.append(name)

            return output
        except Exception as e:
            return RequestError(error=str(e))


    # -- visualizations

[docs]
    @save_plot
    def visualize_observables(self, job_id: str, hspace: float = 0.25, use_grid: bool = False, save_dest: str = None):
        """
        Visualize simulation output (observables) data, not comparison data, with subplots for each species.

        Args:
            - **job_id**: `str`: job id for the simulation observable output you wish to visualize.
            - **hspace**: `float`: horizontal spacing between subplots. Defaults to 0.25.
            - **use_grid**: `bool`: whether to use a grid for each subplot. Defaults to False.
            - **save_dest**: `str`: path to save the figure. If this value is passed, the figure will be saved in pdf format to this location.

         Returns:
            `Tuple[matplotlib.Figure, Dict]` of matplotlib Figure and simulation observables indexed by simulator

        Raises:
            `IOError`: If `job_id` does not contain a 'results' field.
        """
        # grab output from job id
        output = self.get_output(job_id)

        # extract the list of simulators from the `output_data` for one observable
        species_data_content = output['content'].get('results')
        if species_data_content is None:
            raise IOError(f"The job for {job_id} is either not ready or has an error. Please check the output.")
        excluded_observables = ['comparison_id', 'rmse', 'time', 'Time', 'data_generator_time', 'Time (dimensionless)']
        observables = [key for key in species_data_content.keys() if key not in excluded_observables]
        first_observable = species_data_content[observables[0]]
        simulators = list(first_observable['output_data'].keys())

        # post-process to handle any strings (errors)
        for sim in simulators:
            data = first_observable['output_data'][sim]
            if isinstance(data, str):
                simulators.remove(sim)
        
        # count post processed sims
        n_simulators = len(simulators)

        # create subplots
        fig, axes = plt.subplots(nrows=n_simulators, ncols=1, figsize=(4, 3 * n_simulators))

        # if only one simulator, `axes` won't be an array, so make it an array
        if n_simulators == 1:
            axes = [axes]

        obs = {observable: {} for observable in observables}
        # iterate over simulators and plot each observable (by iterating over observables)
        for idx, simulator in enumerate(simulators):
            ax = axes[idx]
            for observable in observables:
                value_data = species_data_content[observable]['output_data'][simulator]
                if isinstance(value_data, str):
                    continue 
                obs[observable][simulator] = value_data
                sns.lineplot(data=value_data, ax=ax, label=observable)

            sim = simulator.replace(simulator[0], simulator[0].upper())
            ax.set_title(f"{sim} Observable Results")
            ax.set_xlabel("Time")
            ax.set_ylabel("Value")
            ax.grid(use_grid)

            # hide the x-axis tick labels
            ax.set_xticks([])

        # adjust layout for better spacing
        plt.tight_layout()
        plt.subplots_adjust(hspace=hspace)
        plt.show()

        return fig, obs



[docs]
    @save_plot
    def visualize_rmse(self, job_id: str, fig_dimensions: tuple[int, int] = None, color_mapping: list[str] = None, save_dest: str = None):
        """
        Visualize the root-mean-squared error between simulator verification outputs as a heatmap.

        :param job_id: (`str`) verification job id. This value can be easily derived from either of `Verifier`'s `.verify_...` methods.
        :param fig_dimensions: (`Tuple[int, int], optional`) The value to use as the `figsize` parameter for a call to `matplotlib.pyplot.figure()`. If `None` is passed, default to (8, 6).
        :param color_mapping: (`List[str], optional`) list of colors to use for each simulator in the grid. Defaults to None.
        :param save_dest: `(str`) destination at which to save figure. Defaults to `None`.

        :return: matplotlib Figure and simulator RMSE scores
        :rtype: `Tuple[matplotlib.Figure, Dict]`
        """
        # extract data
        rmse_matrix = self.get_rmse(job_id)
        if not rmse_matrix or 'error' in rmse_matrix.keys():
            raise ValueError(f"The job for {job_id} is either not ready or has an error in rmse scoring. Please check the output.")

        simulators = list(rmse_matrix.keys())
        n_simulators = len(simulators)

        # extract rmse data and replace None with np.nan if needed
        rmse_data = []
        for sim_name, scores in rmse_matrix.items():
            if isinstance(scores, dict):
                score_vals = list(scores.values())
                for i, v in enumerate(score_vals):
                    if v is None:
                        score_vals.remove(v)
                        score_vals.insert(i, np.nan)
                rmse_data.append(score_vals)

        if color_mapping is None:
            color_mapping = "Blues"

        # set up figure
        dimensions = fig_dimensions or (4, 3)
        fig = plt.figure(figsize=dimensions)
        sns.heatmap(
            data=rmse_data,
            annot=True,
            xticklabels=simulators,
            yticklabels=simulators,
            cmap=color_mapping,
            vmin=-0,
            vmax=1,
            linewidths=1
        )
        # set up plot annotations
        plt.title('Pairwise Root Mean Square Error Between Simulators')
        plt.tight_layout()
        plt.show()

        return fig, dict(zip(simulators, rmse_data))



[docs]
    def visualize_comparison(self, data: Dict, simulators: List[str], comparison_type='proximity', color_mapping: List[str] = None) -> Figure:
        """
        Visualize simulation comparison matrix in the form of a heatmap.

        Args:
            - **data**: `dict`: simulation output data
            - **simulators**: `list[str]`: list of simulators
            - **comparison_type**: `str`: type of comparison. Defaults to `'proximity'`.
            - **color_mapping**: `list[str]`: list of colors to use for True and False responses. Defaults to None.

        Returns:
            `matplotlib.pyplot.Figure` of a plot grid
        """
        species_data_content = data['content']['results']
        species_names = list(species_data_content.keys())
        num_species = len(species_names)

        fig, axes = plt.subplots(nrows=num_species, figsize=(15, 5 * num_species))

        if color_mapping is not None:
            true_color = color_mapping[0]
            false_color = color_mapping[1]
        else:
            true_color = '#1E3A8A'  # dark blue
            false_color = '#D97706'  # dark orange

        if num_species == 1:
            axes = [axes]

        for i, species_name in enumerate(species_names):
            ax = axes[i]
            species_data = species_data_content[species_name]
            comparison_data = [list(col.values()) for col in list(species_data[comparison_type].values())]
            sns.heatmap(
                data=comparison_data,
                ax=ax,
                annot=True,
                xticklabels=simulators,
                yticklabels=simulators,
                cmap=[false_color, true_color],
                linewidths=1
            )
            ax.set_title(f"{species_name} comparison matrix")

        plt.tight_layout()
        plt.show()

        return fig


    def _write_antimony_to_sbml(self, antimony_string: str, dest: str, model_name: str = None) -> str:
        """
        Convert an antimony model to SBML. To be used as an entrypoint validator for `Verifier().verify_sbml()`.

        Args:
            - **antimony_string**: `str`: Antimony model to convert.
            - **dest**: `str`: Destination path to save the SBML file.
            - **model_name**: `str`: Model name to use for the converted model file. Defaults to `None` (a generic `'model.xml'`).

        Returns:
            Path to the written SBML file.
        """
        ant_ret = antimony.loadAntimonyString(antimony_string)
        if ant_ret == -1:
            raise IOError(f"This antimony string cannot be converted to SBML by Antimony: {antimony_string}. Please check the model and try again.")

        filename = model_name or 'model.xml'
        file_path = os.path.join(dest, filename)
        sbml_ret = antimony.writeSBMLFile(filename=file_path)
        if sbml_ret > 0:
            return file_path
        else:
            raise IOError(f"This SBML model: {filename} cannot be written to {file_path}. Please check your paths and try again.")




[docs]
class VerificationResult(dict):
    def __init__(self, data: dict):
        self.data = data
        self.update({'content': self.data.get('content')})
        self.job_id = self.data.get('content').get('job_id')
        self.verifier = Verifier()


[docs]
    def get_comparison(self, save_dest: str = None, fig_dimensions: tuple[int, int] = None, color_mapping: list[str] = None):
        """
        Visualize the root-mean-squared error between simulator verification outputs as a heatmap.

        :param save_dest: `(str`) destination at which to save figure. Defaults to `None`.
        :param fig_dimensions: (`Tuple[int, int], optional`) The value to use as the `figsize` parameter for a call to `matplotlib.pyplot.figure()`. If `None` is passed, default to (8, 6).
        :param color_mapping: (`List[str], optional`) list of colors to use for each simulator in the grid. Defaults to None.
        

        :return: matplotlib Figure and simulator RMSE scores
        :rtype: `Tuple[matplotlib.Figure, Dict]`
        """
        return self.verifier.visualize_rmse(job_id=self.job_id, save_dest=save_dest, fig_dimensions=fig_dimensions, color_mapping=color_mapping)



[docs]
    def get_output_observables(self, save_dest: str = None, hspace: float = 0.25, use_grid: bool = False):
        """
        Visualize simulation output (observables) data, not comparison data, with subplots for each species.

        :param save_dest: (`str`) path to save the figure. If this value is passed, the figure will be saved in pdf format to this location.
        :param hspace: (`float`) horizontal spacing between subplots. Defaults to 0.25.
        :param use_grid: (`bool`) whether to use a grid for each subplot. Defaults to False.
        
        :return: matplotlib Figure and simulation observables indexed by simulator
        :rtype: `Tuple[matplotlib.Figure, Dict]` 
        """
        return self.verifier.visualize_observables(job_id=self.job_id, save_dest=save_dest, hspace=hspace, use_grid=use_grid)




# tests

def test_verifier():
    # TODO: replace this
    verifier = Verifier()
    simulators = ['copasi', 'tellurium']
    sbml_fp = "../model-examples/sbml-core/Elowitz-Nature-2000-Repressilator/BIOMD0000000012_url.xml"
    end = 10
    steps = 100

    sbml_submission = verifier.verify_sbml(entrypoint=sbml_fp, steps=steps, start=0, end=end, simulators=simulators, comparison_id="notebook_test1")
    print(sbml_submission)