Source code for bio_compose.verifier

import os
import tempfile
from typing import *
from uuid import uuid4

import numpy as np
import requests
import seaborn as sns
import antimony
from matplotlib import pyplot as plt
from matplotlib.figure import Figure
from requests_toolbelt.multipart.encoder import MultipartEncoder

from bio_compose.data_model import Api, RequestError, save_plot


__all__ = [
    'Verifier',
    'VerificationResult'
]


[docs] class Verifier(Api): """ API for verifying (running and comparing) the results of multiple simulators for a given SBML model. """ def __init__(self): """ A new instance of the Verifier class. **NOTE**: this may clash with your record keeping in a notebook, so it is highly recommended that users treat instances of this class as quasi-singletons, although not necessary for fundamental interaction. """ super().__init__() # -- api calls
[docs] def verify_omex( self, omex_filepath: str, simulators: List[str] = None, include_outputs: bool = True, comparison_id: str = None, expected_results: str = None, selection_list: List[str] = None, rTol: float = None, aTol: float = None, _steady_state: bool = False ) -> Union[Dict[str, str], RequestError]: """ Submit a new uniform time course comparison job to the service and return confirmation of job submission. Args: - **omex_filepath**: `str`: The path to the omex file to submit. - **simulators**: `List[str]`: The list of simulators to include in comparison. Defaults to all utc simulators (amici, copasi, tellurium) - **include_outputs**: `bool, optional`: Whether to include the output data used to calculate comparison in the job results on result fetch. Defaults to True. - **comparison_id**: `str, optional`: The unique identifier for the comparison job. Defaults to None. If `None` is passed, a comparison id of `bio_check-request-<UUID>` is generated. - **expected_results**: `str, optional`: The path to the ground expected_results report file to include in comparison. Defaults to None. - **selection_list**: `List[str], optional`: The list of observables to include in comparison output. Defaults to None (all observables). - **rTol**: `float, optional`: The relative tolerance used to determine the relative distance in a pairwise comparison. - **aTol**: `float, optional`: The absolute tolerance used to determine the absolute distance in a pairwise comparison. - **_steady_state**: `bool, optional`: Whether to include the steady state analysis job. NOTE: This feature will currently throw an error as it is not yet implemented. Returns: A dictionary containing the job submission results. **Note**: the return status should read `PENDING`. """ if _steady_state is not False: raise NotImplementedError("The steady state analysis of model files is not yet implemented and currently under development.") endpoint = self._format_endpoint('verify-omex') # configure params _id = comparison_id or "bio_check-request-" + str(uuid4()) _omex = (omex_filepath.split('/')[-1], open(omex_filepath, 'rb'), 'application/octet-stream') _report = (expected_results.split('/')[-1], open(expected_results, 'rb'), 'application/octet-stream') if expected_results else None sims = simulators or ['amici', 'copasi', 'tellurium'] encoder_fields = { 'uploaded_file': _omex, 'expected_results': _report } query_params = { 'simulators': sims, 'include_outputs': str(include_outputs).lower(), 'comparison_id': _id, } if selection_list: query_params['selection_list'] = ','.join(selection_list) if rTol: query_params['rTol'] = str(rTol) if aTol: query_params['aTol'] = str(aTol) multidata = MultipartEncoder(fields=encoder_fields) headers = {'Content-Type': multidata.content_type} try: response = requests.post(endpoint, headers=headers, data=multidata, params=query_params) response.raise_for_status() self._check_response(response) output = response.json() self.submitted_jobs.append(output) return output except Exception as e: return RequestError(error=str(e))
[docs] def verify_sbml( self, entrypoint: str, start: int = 0, end: int = 10, steps: int = 10, simulators: List[str] = None, include_outputs: bool = True, comparison_id: str = None, expected_results: str = None, rTol: float = None, aTol: float = None, selection_list: List[str] = None, _steady_state: bool = False ) -> Union[Dict[str, str], RequestError]: """ Submit a new uniform time course comparison job to the service and return confirmation of job submission. Args: - **entrypoint**: `str`: One of either: a path to a sbml OR an antimony model/string that can be converted to SBML. NOTE: Currently, only SBML is supported as an entrypoint. - **start**: `int`: The start time of the time course to include in comparison. - **end**: `int`: The end of the comparison job in seconds. - **steps**: `int`: The number of steps in the comparison job. - **simulators**: `List[str], optional`: The list of simulators to include in comparison. Defaults to all utc simulators (amici, copasi, tellurium) - **include_outputs**: `bool, optional`: Whether to include the output data used to calculate comparison in the job results on result fetch. Defaults to True. - **comparison_id**: `str, optional`: The unique identifier for the comparison job. Defaults to None. If `None` is passed, a comparison id of `bio_check-request-<UUID>` is generated. - **expected_results**: `str, optional`: The path to the ground expected_results report file to include in comparison. Defaults to None. - **rTol**: `float, optional`: The relative tolerance used to determine the relative distance in a pairwise comparison. - **aTol**: `float, optional`: The absolute tolerance used to determine the absolute distance in a pairwise comparison. - **selection_list**: `List[str], optional`: Observables to include in the output. If passed, all observable names NOT in this list will be excluded. Defaults to `None` (all observables). - **_steady_state**: `bool, optional`: Whether to include the steady state analysis job. NOTE: This feature will currently throw an error as it is not yet implemented. Returns: A dictionary containing the job submission results. **Note**: the return status should read `PENDING`. """ if _steady_state is not False: raise NotImplementedError("The steady state analysis of model files is not yet implemented and currently under development.") endpoint = self._format_endpoint('verify-sbml') # TODO: fix and remove this # raise NotImplementedError("Submission of jobs with a SBML file is currently under development.") # handle entrypoint as antimony if not entrypoint.endswith('.xml'): dest = tempfile.mkdtemp() entrypoint = self._write_antimony_to_sbml(entrypoint, dest) sbml_fp = (entrypoint.split('/')[-1], open(entrypoint, 'rb'), 'application/octet-stream') _report = (expected_results.split('/')[-1], open(expected_results, 'rb'), 'application/octet-stream') if expected_results else None _id = comparison_id or "bio_check-request-" + str(uuid4()) if simulators is None: simulators = ["copasi", "tellurium"] # create encoder fields encoder_fields = { 'uploaded_file': sbml_fp, 'expected_results': _report } query_params = { 'simulators': simulators, # ','.join(simulators), 'include_outputs': str(include_outputs).lower(), 'comparison_id': _id, 'start': str(start), 'end': str(end), 'steps': str(steps) } if selection_list: query_params['selection_list'] = ','.join(selection_list) if rTol: query_params['rTol'] = str(rTol) if aTol: query_params['aTol'] = str(aTol) multidata = MultipartEncoder(fields=encoder_fields) # TODO: do we need to change the headers? headers = {'Content-Type': multidata.content_type} try: response = requests.post(url=endpoint, headers=headers, data=multidata, params=query_params) response.raise_for_status() self._check_response(response) output = response.json() self.submitted_jobs.append(output) return output except Exception as e: return RequestError(error=str(e))
[docs] def get_rmse(self, job_id: str) -> dict: """ Get root-mean-square error scoring for all simulators involved in the last completed verification job. Args: - **job_id**: `str`: The unique identifier for the verification job. Returns: A dictionary mapping of simulator names to their respective root-mean-square error scores. """ try: output = self.get_output(job_id=job_id) return output['content'].get('results').get('rmse', {}) except: import traceback tb_str = traceback.format_exc() error_message = ( f"Traceback:\n{tb_str}" ) return {'error': error_message}
[docs] def get_compatible(self, file: str, versions: bool = False) -> Union[List[Tuple[Any, ...]], RequestError]: """ Get all simulators and optionally their versions for a given file. The File is expected to be either an OMEX/COMBINE archive or SBML file. Args: - **file**: `str`: The path to the file to be checked. - **versions**: `bool`: Whether to return the compatible version of the given compatible simulator. Defaults to `False`. Returns: A dictionary of compatible simulators and the referenced file. """ endpoint = self._format_endpoint('get-compatible-for-verification') fp = (file.split('/')[-1], open(file, 'rb'), 'application/octet-stream') encoder_fields = {'uploaded_file': fp} query_params = {'versions': str(versions).lower()} multidata = MultipartEncoder(fields=encoder_fields) # TODO: do we need to change the headers? headers = {'Content-Type': multidata.content_type} try: response = requests.post(url=endpoint, headers=headers, data=multidata, params=query_params) self._check_response(response) response = response.json() output = [] for sim_data in response['simulators']: name = sim_data['name'] version = sim_data.get('version') if version is not None: data = tuple([name, version]) output.append(data) else: output.append(name) return output except Exception as e: return RequestError(error=str(e))
# -- visualizations
[docs] @save_plot def visualize_observables(self, job_id: str, hspace: float = 0.25, use_grid: bool = False, save_dest: str = None): """ Visualize simulation output (observables) data, not comparison data, with subplots for each species. Args: - **job_id**: `str`: job id for the simulation observable output you wish to visualize. - **hspace**: `float`: horizontal spacing between subplots. Defaults to 0.25. - **use_grid**: `bool`: whether to use a grid for each subplot. Defaults to False. - **save_dest**: `str`: path to save the figure. If this value is passed, the figure will be saved in pdf format to this location. Returns: `Tuple[matplotlib.Figure, Dict]` of matplotlib Figure and simulation observables indexed by simulator Raises: `IOError`: If `job_id` does not contain a 'results' field. """ # grab output from job id output = self.get_output(job_id) # extract the list of simulators from the `output_data` for one observable species_data_content = output['content'].get('results') if species_data_content is None: raise IOError(f"The job for {job_id} is either not ready or has an error. Please check the output.") excluded_observables = ['comparison_id', 'rmse', 'time', 'Time', 'data_generator_time', 'Time (dimensionless)'] observables = [key for key in species_data_content.keys() if key not in excluded_observables] first_observable = species_data_content[observables[0]] simulators = list(first_observable['output_data'].keys()) # post-process to handle any strings (errors) for sim in simulators: data = first_observable['output_data'][sim] if isinstance(data, str): simulators.remove(sim) # count post processed sims n_simulators = len(simulators) # create subplots fig, axes = plt.subplots(nrows=n_simulators, ncols=1, figsize=(4, 3 * n_simulators)) # if only one simulator, `axes` won't be an array, so make it an array if n_simulators == 1: axes = [axes] obs = {observable: {} for observable in observables} # iterate over simulators and plot each observable (by iterating over observables) for idx, simulator in enumerate(simulators): ax = axes[idx] for observable in observables: value_data = species_data_content[observable]['output_data'][simulator] if isinstance(value_data, str): continue obs[observable][simulator] = value_data sns.lineplot(data=value_data, ax=ax, label=observable) sim = simulator.replace(simulator[0], simulator[0].upper()) ax.set_title(f"{sim} Observable Results") ax.set_xlabel("Time") ax.set_ylabel("Value") ax.grid(use_grid) # hide the x-axis tick labels ax.set_xticks([]) # adjust layout for better spacing plt.tight_layout() plt.subplots_adjust(hspace=hspace) plt.show() return fig, obs
[docs] @save_plot def visualize_rmse(self, job_id: str, fig_dimensions: tuple[int, int] = None, color_mapping: list[str] = None, save_dest: str = None): """ Visualize the root-mean-squared error between simulator verification outputs as a heatmap. :param job_id: (`str`) verification job id. This value can be easily derived from either of `Verifier`'s `.verify_...` methods. :param fig_dimensions: (`Tuple[int, int], optional`) The value to use as the `figsize` parameter for a call to `matplotlib.pyplot.figure()`. If `None` is passed, default to (8, 6). :param color_mapping: (`List[str], optional`) list of colors to use for each simulator in the grid. Defaults to None. :param save_dest: `(str`) destination at which to save figure. Defaults to `None`. :return: matplotlib Figure and simulator RMSE scores :rtype: `Tuple[matplotlib.Figure, Dict]` """ # extract data rmse_matrix = self.get_rmse(job_id) if not rmse_matrix or 'error' in rmse_matrix.keys(): raise ValueError(f"The job for {job_id} is either not ready or has an error in rmse scoring. Please check the output.") simulators = list(rmse_matrix.keys()) n_simulators = len(simulators) # extract rmse data and replace None with np.nan if needed rmse_data = [] for sim_name, scores in rmse_matrix.items(): if isinstance(scores, dict): score_vals = list(scores.values()) for i, v in enumerate(score_vals): if v is None: score_vals.remove(v) score_vals.insert(i, np.nan) rmse_data.append(score_vals) if color_mapping is None: color_mapping = "Blues" # set up figure dimensions = fig_dimensions or (4, 3) fig = plt.figure(figsize=dimensions) sns.heatmap( data=rmse_data, annot=True, xticklabels=simulators, yticklabels=simulators, cmap=color_mapping, vmin=-0, vmax=1, linewidths=1 ) # set up plot annotations plt.title('Pairwise Root Mean Square Error Between Simulators') plt.tight_layout() plt.show() return fig, dict(zip(simulators, rmse_data))
[docs] def visualize_comparison(self, data: Dict, simulators: List[str], comparison_type='proximity', color_mapping: List[str] = None) -> Figure: """ Visualize simulation comparison matrix in the form of a heatmap. Args: - **data**: `dict`: simulation output data - **simulators**: `list[str]`: list of simulators - **comparison_type**: `str`: type of comparison. Defaults to `'proximity'`. - **color_mapping**: `list[str]`: list of colors to use for True and False responses. Defaults to None. Returns: `matplotlib.pyplot.Figure` of a plot grid """ species_data_content = data['content']['results'] species_names = list(species_data_content.keys()) num_species = len(species_names) fig, axes = plt.subplots(nrows=num_species, figsize=(15, 5 * num_species)) if color_mapping is not None: true_color = color_mapping[0] false_color = color_mapping[1] else: true_color = '#1E3A8A' # dark blue false_color = '#D97706' # dark orange if num_species == 1: axes = [axes] for i, species_name in enumerate(species_names): ax = axes[i] species_data = species_data_content[species_name] comparison_data = [list(col.values()) for col in list(species_data[comparison_type].values())] sns.heatmap( data=comparison_data, ax=ax, annot=True, xticklabels=simulators, yticklabels=simulators, cmap=[false_color, true_color], linewidths=1 ) ax.set_title(f"{species_name} comparison matrix") plt.tight_layout() plt.show() return fig
def _write_antimony_to_sbml(self, antimony_string: str, dest: str, model_name: str = None) -> str: """ Convert an antimony model to SBML. To be used as an entrypoint validator for `Verifier().verify_sbml()`. Args: - **antimony_string**: `str`: Antimony model to convert. - **dest**: `str`: Destination path to save the SBML file. - **model_name**: `str`: Model name to use for the converted model file. Defaults to `None` (a generic `'model.xml'`). Returns: Path to the written SBML file. """ ant_ret = antimony.loadAntimonyString(antimony_string) if ant_ret == -1: raise IOError(f"This antimony string cannot be converted to SBML by Antimony: {antimony_string}. Please check the model and try again.") filename = model_name or 'model.xml' file_path = os.path.join(dest, filename) sbml_ret = antimony.writeSBMLFile(filename=file_path) if sbml_ret > 0: return file_path else: raise IOError(f"This SBML model: {filename} cannot be written to {file_path}. Please check your paths and try again.")
[docs] class VerificationResult(dict): def __init__(self, data: dict): self.data = data self.update({'content': self.data.get('content')}) self.job_id = self.data.get('content').get('job_id') self.verifier = Verifier()
[docs] def get_comparison(self, save_dest: str = None, fig_dimensions: tuple[int, int] = None, color_mapping: list[str] = None): """ Visualize the root-mean-squared error between simulator verification outputs as a heatmap. :param save_dest: `(str`) destination at which to save figure. Defaults to `None`. :param fig_dimensions: (`Tuple[int, int], optional`) The value to use as the `figsize` parameter for a call to `matplotlib.pyplot.figure()`. If `None` is passed, default to (8, 6). :param color_mapping: (`List[str], optional`) list of colors to use for each simulator in the grid. Defaults to None. :return: matplotlib Figure and simulator RMSE scores :rtype: `Tuple[matplotlib.Figure, Dict]` """ return self.verifier.visualize_rmse(job_id=self.job_id, save_dest=save_dest, fig_dimensions=fig_dimensions, color_mapping=color_mapping)
[docs] def get_output_observables(self, save_dest: str = None, hspace: float = 0.25, use_grid: bool = False): """ Visualize simulation output (observables) data, not comparison data, with subplots for each species. :param save_dest: (`str`) path to save the figure. If this value is passed, the figure will be saved in pdf format to this location. :param hspace: (`float`) horizontal spacing between subplots. Defaults to 0.25. :param use_grid: (`bool`) whether to use a grid for each subplot. Defaults to False. :return: matplotlib Figure and simulation observables indexed by simulator :rtype: `Tuple[matplotlib.Figure, Dict]` """ return self.verifier.visualize_observables(job_id=self.job_id, save_dest=save_dest, hspace=hspace, use_grid=use_grid)
# tests def test_verifier(): # TODO: replace this verifier = Verifier() simulators = ['copasi', 'tellurium'] sbml_fp = "../model-examples/sbml-core/Elowitz-Nature-2000-Repressilator/BIOMD0000000012_url.xml" end = 10 steps = 100 sbml_submission = verifier.verify_sbml(entrypoint=sbml_fp, steps=steps, start=0, end=end, simulators=simulators, comparison_id="notebook_test1") print(sbml_submission)