ms_autoqc.DatabaseFunctions
1import traceback 2import warnings 3warnings.simplefilter(action="ignore", category=FutureWarning) 4 5import os, io, shutil, time 6import hashlib, json, ast 7import pandas as pd 8import numpy as np 9import sqlalchemy as sa 10from pydrive2.auth import GoogleAuth 11from pydrive2.drive import GoogleDrive 12from sqlalchemy import INTEGER, REAL, TEXT 13import base64 14from email.message import EmailMessage 15import google.auth as google_auth 16from googleapiclient.discovery import build 17from googleapiclient.errors import HttpError 18 19# Set ms_autoqc/src as the working directory 20src_folder = os.path.dirname(os.path.realpath(__file__)) 21os.chdir(src_folder) 22 23# Initialize directories 24root_directory = os.getcwd() 25data_directory = os.path.join(root_directory, "data") 26methods_directory = os.path.join(data_directory, "methods") 27auth_directory = os.path.join(root_directory, "auth") 28 29# Location of settings SQLite database 30settings_database = "sqlite:///data/methods/Settings.db" 31settings_db_file = os.path.join(methods_directory, "Settings.db") 32 33# Google Drive authentication files 34credentials_file = os.path.join(auth_directory, "credentials.txt") 35alt_credentials = os.path.join(auth_directory, "email_credentials.txt") 36drive_settings_file = os.path.join(auth_directory, "settings.yaml") 37auth_container = [GoogleAuth(settings_file=drive_settings_file)] 38 39""" 40The functions defined below operate on two database types: 41 42- One storing instrument run metadata, sample QC results, and biological standard QC results 43- The other storing instrument metadata, workspace settings for workspace access, chromatography methods, 44biological standards, QC configurations, and MS-DIAL configurations 45 46In addition, this file also contains methods for syncing data and settings with Google Drive. 47To get an overview of all functions, please visit the documentation on https://czbiohub.github.io/MS-AutoQC. 48""" 49 50def get_database_file(instrument_id, sqlite_conn=False, zip=False): 51 52 """ 53 Returns database file for a given instrument ID. 54 55 Args: 56 instrument_id (str): 57 Instrument ID that specifies which database file to retrieve 58 sqlite_conn (bool, default False): 59 Whether to receive the path for establishing a SQLite connection 60 zip (bool, default False): 61 Whether to receive the path of the database file in the local app directory 62 63 Returns: 64 str: Path for the database file 65 """ 66 67 if zip: 68 filename = instrument_id.replace(" ", "_") + ".zip" 69 else: 70 filename = instrument_id.replace(" ", "_") + ".db" 71 72 if sqlite_conn: 73 return "sqlite:///data/" + filename 74 else: 75 return os.path.join(data_directory, filename) 76 77 78def connect_to_database(name): 79 80 """ 81 Establishes a connection to a SQLite database of choice 82 83 Args: 84 name (str): 85 Name of the database, either "Settings" or an instrument ID 86 87 Returns: 88 sqlalchemy.MetaData: 89 A container object that consists of different features of a database being described 90 sqlalchemy.Connection: 91 An object that represents a single DBAPI connection, and always emits SQL statements within 92 the context of a transaction block 93 """ 94 95 if name == "Settings": 96 database_file = settings_database 97 else: 98 database_file = get_database_file(instrument_id=name, sqlite_conn=True) 99 100 engine = sa.create_engine(database_file) 101 db_metadata = sa.MetaData(bind=engine) 102 connection = engine.connect() 103 104 return db_metadata, connection 105 106 107def create_databases(instrument_id, new_instrument=False): 108 109 """ 110 Initializes SQLite databases for 1) instrument data and 2) workspace settings. 111 112 Creates the following tables in the instrument database: "runs", "bio_qc_results", "sample_qc_results". 113 114 Creates the following tables in the settings database: "biological_standards", "chromatography_methods", 115 "email_notifications", "instruments", "gdrive_users", "internal_standards", "msdial_parameters", "qc_parameters", 116 "targeted_features", "workspace". 117 118 Args: 119 instrument_id (str): 120 Instrument ID to name the new database ("Thermo QE 1" becomes "Thermo_QE_1.db") 121 new_instrument (bool, default False): 122 Whether a new instrument database is being added to a workspace, or whether a new 123 instrument database AND settings database are being created for the first time 124 125 Returns: 126 None 127 """ 128 129 # Create tables for instrument database 130 instrument_database = get_database_file(instrument_id=instrument_id, sqlite_conn=True) 131 qc_db_engine = sa.create_engine(instrument_database) 132 qc_db_metadata = sa.MetaData() 133 134 bio_qc_results = sa.Table( 135 "bio_qc_results", qc_db_metadata, 136 sa.Column("id", INTEGER, primary_key=True), 137 sa.Column("sample_id", TEXT), 138 sa.Column("run_id", TEXT), 139 sa.Column("polarity", TEXT), 140 sa.Column("precursor_mz", TEXT), 141 sa.Column("retention_time", TEXT), 142 sa.Column("intensity", TEXT), 143 sa.Column("md5", TEXT), 144 sa.Column("qc_dataframe", TEXT), 145 sa.Column("qc_result", TEXT), 146 sa.Column("biological_standard", TEXT), 147 sa.Column("position", TEXT) 148 ) 149 150 runs = sa.Table( 151 "runs", qc_db_metadata, 152 sa.Column("id", INTEGER, primary_key=True), 153 sa.Column("run_id", TEXT), 154 sa.Column("chromatography", TEXT), 155 sa.Column("acquisition_path", TEXT), 156 sa.Column("sequence", TEXT), 157 sa.Column("metadata", TEXT), 158 sa.Column("status", TEXT), 159 sa.Column("samples", INTEGER), 160 sa.Column("completed", INTEGER), 161 sa.Column("passes", INTEGER), 162 sa.Column("fails", INTEGER), 163 sa.Column("latest_sample", TEXT), 164 sa.Column("qc_config_id", TEXT), 165 sa.Column("biological_standards", TEXT), 166 sa.Column("pid", INTEGER), 167 sa.Column("drive_id", TEXT), 168 sa.Column("sample_status", TEXT), 169 sa.Column("job_type", TEXT) 170 ) 171 172 sample_qc_results = sa.Table( 173 "sample_qc_results", qc_db_metadata, 174 sa.Column("id", INTEGER, primary_key=True), 175 sa.Column("sample_id", TEXT), 176 sa.Column("run_id", TEXT), 177 sa.Column("polarity", TEXT), 178 sa.Column("position", TEXT), 179 sa.Column("md5", TEXT), 180 sa.Column("precursor_mz", TEXT), 181 sa.Column("retention_time", TEXT), 182 sa.Column("intensity", TEXT), 183 sa.Column("qc_dataframe", TEXT), 184 sa.Column("qc_result", TEXT) 185 ) 186 187 qc_db_metadata.create_all(qc_db_engine) 188 189 # If only creating instrument database, save and return here 190 if new_instrument: 191 set_device_identity(is_instrument_computer=True, instrument_id=instrument_id) 192 return None 193 194 # Create tables for Settings.db 195 settings_db_engine = sa.create_engine(settings_database) 196 settings_db_metadata = sa.MetaData() 197 198 instruments = sa.Table( 199 "instruments", settings_db_metadata, 200 sa.Column("id", INTEGER, primary_key=True), 201 sa.Column("name", TEXT), 202 sa.Column("vendor", TEXT), 203 sa.Column("drive_id", TEXT), 204 sa.Column("last_modified", TEXT) 205 ) 206 207 biological_standards = sa.Table( 208 "biological_standards", settings_db_metadata, 209 sa.Column("id", INTEGER, primary_key=True), 210 sa.Column("name", TEXT), 211 sa.Column("identifier", TEXT), 212 sa.Column("chromatography", TEXT), 213 sa.Column("num_pos_features", INTEGER), 214 sa.Column("num_neg_features", INTEGER), 215 sa.Column("pos_bio_msp_file", TEXT), 216 sa.Column("neg_bio_msp_file", TEXT), 217 sa.Column("pos_parameter_file", TEXT), 218 sa.Column("neg_parameter_file", TEXT), 219 sa.Column("msdial_config_id", TEXT) 220 ) 221 222 chromatography_methods = sa.Table( 223 "chromatography_methods", settings_db_metadata, 224 sa.Column("id", INTEGER, primary_key=True), 225 sa.Column("method_id", TEXT), 226 sa.Column("num_pos_standards", INTEGER), 227 sa.Column("num_neg_standards", INTEGER), 228 sa.Column("pos_istd_msp_file", TEXT), 229 sa.Column("neg_istd_msp_file", TEXT), 230 sa.Column("pos_parameter_file", TEXT), 231 sa.Column("neg_parameter_file", TEXT), 232 sa.Column("msdial_config_id", TEXT) 233 ) 234 235 gdrive_users = sa.Table( 236 "gdrive_users", settings_db_metadata, 237 sa.Column("id", INTEGER, primary_key=True), 238 sa.Column("name", TEXT), 239 sa.Column("email_address", TEXT), 240 sa.Column("permission_id", TEXT), 241 ) 242 243 internal_standards = sa.Table( 244 "internal_standards", settings_db_metadata, 245 sa.Column("id", INTEGER, primary_key=True), 246 sa.Column("name", TEXT), 247 sa.Column("chromatography", TEXT), 248 sa.Column("polarity", TEXT), 249 sa.Column("precursor_mz", REAL), 250 sa.Column("retention_time", REAL), 251 sa.Column("ms2_spectrum", TEXT), 252 sa.Column("inchikey", TEXT) 253 ) 254 255 msdial_parameters = sa.Table( 256 "msdial_parameters", settings_db_metadata, 257 sa.Column("id", INTEGER, primary_key=True), 258 sa.Column("config_name", TEXT), 259 sa.Column("rt_begin", INTEGER), 260 sa.Column("rt_end", INTEGER), 261 sa.Column("mz_begin", INTEGER), 262 sa.Column("mz_end", INTEGER), 263 sa.Column("ms1_centroid_tolerance", REAL), 264 sa.Column("ms2_centroid_tolerance", REAL), 265 sa.Column("smoothing_method", TEXT), 266 sa.Column("smoothing_level", INTEGER), 267 sa.Column("min_peak_width", INTEGER), 268 sa.Column("min_peak_height", INTEGER), 269 sa.Column("mass_slice_width", REAL), 270 sa.Column("post_id_rt_tolerance", REAL), 271 sa.Column("post_id_mz_tolerance", REAL), 272 sa.Column("post_id_score_cutoff", REAL), 273 sa.Column("alignment_rt_tolerance", REAL), 274 sa.Column("alignment_mz_tolerance", REAL), 275 sa.Column("alignment_rt_factor", REAL), 276 sa.Column("alignment_mz_factor", REAL), 277 sa.Column("peak_count_filter", INTEGER), 278 sa.Column("qc_at_least_filter", TEXT) 279 ) 280 281 email_notifications = sa.Table( 282 "email_notifications", settings_db_metadata, 283 sa.Column("id", INTEGER, primary_key=True), 284 sa.Column("email_address", TEXT), 285 ) 286 287 qc_parameters = sa.Table( 288 "qc_parameters", settings_db_metadata, 289 sa.Column("id", INTEGER, primary_key=True), 290 sa.Column("config_name", TEXT), 291 sa.Column("intensity_dropouts_cutoff", INTEGER), 292 sa.Column("library_rt_shift_cutoff", REAL), 293 sa.Column("in_run_rt_shift_cutoff", REAL), 294 sa.Column("library_mz_shift_cutoff", REAL), 295 sa.Column("intensity_enabled", INTEGER), 296 sa.Column("library_rt_enabled", INTEGER), 297 sa.Column("in_run_rt_enabled", INTEGER), 298 sa.Column("library_mz_enabled", INTEGER) 299 ) 300 301 targeted_features = sa.Table( 302 "targeted_features", settings_db_metadata, 303 sa.Column("id", INTEGER, primary_key=True), 304 sa.Column("name", TEXT), 305 sa.Column("chromatography", TEXT), 306 sa.Column("polarity", TEXT), 307 sa.Column("biological_standard", TEXT), 308 sa.Column("precursor_mz", REAL), 309 sa.Column("retention_time", REAL), 310 sa.Column("ms2_spectrum", TEXT), 311 sa.Column("inchikey", TEXT) 312 ) 313 314 workspace = sa.Table( 315 "workspace", settings_db_metadata, 316 sa.Column("id", INTEGER, primary_key=True), 317 sa.Column("slack_bot_token", TEXT), 318 sa.Column("slack_channel", TEXT), 319 sa.Column("slack_enabled", INTEGER), 320 sa.Column("gdrive_folder_id", TEXT), 321 sa.Column("methods_zip_file_id", TEXT), 322 sa.Column("methods_last_modified", TEXT), 323 sa.Column("msdial_directory", TEXT), 324 sa.Column("is_instrument_computer", INTEGER), 325 sa.Column("instrument_identity", TEXT) 326 ) 327 328 # Insert tables into database 329 settings_db_metadata.create_all(settings_db_engine) 330 331 # Insert default configurations for MS-DIAL and MS-AutoQC 332 add_msdial_configuration("Default") 333 add_qc_configuration("Default") 334 335 # Initialize workspace metadata 336 create_workspace_metadata() 337 338 # Save device identity based on setup values 339 set_device_identity(is_instrument_computer=True, instrument_id=instrument_id) 340 return None 341 342 343def execute_vacuum(database): 344 345 """ 346 Executes VACUUM command on the database of choice. 347 348 Args: 349 database (str): name of the database, either "Settings" or Instrument ID 350 351 Returns: 352 None 353 """ 354 355 db_metadata, connection = connect_to_database(database) 356 connection.execute("VACUUM") 357 connection.close() 358 359 360def get_drive_instance(): 361 362 """ 363 Returns user-authenticated Google Drive instance. 364 """ 365 366 return GoogleDrive(auth_container[0]) 367 368 369def launch_google_drive_authentication(): 370 371 """ 372 Launches Google Drive authentication flow and sets authentication instance. 373 """ 374 375 auth_container[0] = GoogleAuth(settings_file=drive_settings_file) 376 auth_container[0].LocalWebserverAuth() 377 378 379def save_google_drive_credentials(): 380 381 """ 382 Saves Google credentials to a credentials.txt file. 383 """ 384 385 auth_container[0].SaveCredentialsFile(credentials_file) 386 387 388def initialize_google_drive(): 389 390 """ 391 Initializes instance of Google Drive using credentials.txt and settings.yaml in /auth directory 392 393 Args: 394 None 395 396 Returns: 397 bool: Whether the Google client credentials file (in the "auth" directory) exists. 398 """ 399 400 # Create Google Drive instance 401 auth_container[0] = GoogleAuth(settings_file=drive_settings_file) 402 gauth = auth_container[0] 403 404 # If no credentials file, make user authenticate 405 if not os.path.exists(credentials_file) and is_valid(): 406 gauth.LocalWebserverAuth() 407 408 # Try to load saved client credentials 409 gauth.LoadCredentialsFile(credentials_file) 410 411 # Initialize saved credentials 412 if gauth.credentials is not None: 413 414 # Refresh credentials if expired 415 if gauth.access_token_expired: 416 gauth.Refresh() 417 418 # Otherwise, authorize saved credentials 419 else: 420 gauth.Authorize() 421 422 # If no saved credentials, make user authenticate again 423 elif gauth.credentials is None: 424 gauth.LocalWebserverAuth() 425 426 if not os.path.exists(credentials_file) and is_valid(): 427 save_google_drive_credentials() 428 429 # Makes small modification for emails (for usage with Google's google.auth) 430 if not os.path.exists(alt_credentials): 431 data = None 432 with open(credentials_file, "r") as file: 433 data = json.load(file) 434 data["type"] = "authorized_user" 435 with open(alt_credentials, "w") as file: 436 json.dump(data, file) 437 438 return os.path.exists(credentials_file) 439 440 441def is_valid(instrument_id=None): 442 443 """ 444 Checks that all required tables in all databases (or a single database of choice) are present. 445 446 Args: 447 instrument_id (str, default None): 448 Specified if validating a specific database 449 450 Returns: 451 None 452 """ 453 454 # Validate settings database 455 settings_db_required_tables = ["biological_standards", "chromatography_methods", "email_notifications", "instruments", 456 "gdrive_users", "internal_standards", "msdial_parameters", "qc_parameters", "targeted_features", "workspace"] 457 458 try: 459 settings_db_tables = sa.create_engine(settings_database).table_names() 460 if len(settings_db_tables) < len(settings_db_required_tables): 461 return False 462 except: 463 return False 464 465 # Validate instrument databases 466 instrument_db_required_tables = ["bio_qc_results", "runs", "sample_qc_results"] 467 468 # If given an instrument ID, only validate that instrument's database 469 try: 470 if instrument_id is not None: 471 database = get_database_file(instrument_id, sqlite_conn=True) 472 instrument_db_tables = sa.create_engine(database).table_names() 473 if len(instrument_db_tables) < len(instrument_db_required_tables): 474 return False 475 476 # Otherwise, validate all instrument databases 477 else: 478 database_files = [file.replace(".db", "") for file in os.listdir(data_directory) if ".db" in file and "journal.db" not in file] 479 databases = [get_database_file(f, sqlite_conn=True) for f in database_files] 480 481 for database in databases: 482 instrument_db_tables = sa.create_engine(database).table_names() 483 if len(instrument_db_tables) < len(instrument_db_required_tables): 484 return False 485 except: 486 return False 487 488 return True 489 490 491def sync_is_enabled(): 492 493 """ 494 Checks whether Google Drive sync is enabled simply by querying whether Google Drive ID's exist in the database. 495 496 Typically used for separating sync-specific functionality. 497 498 Returns: 499 bool: Whether Google Drive sync is enabled or not 500 """ 501 502 if not is_valid(): 503 return False 504 505 df_workspace = get_table("Settings", "workspace") 506 gdrive_folder_id = df_workspace["gdrive_folder_id"].values[0] 507 methods_zip_file_id = df_workspace["methods_zip_file_id"].values[0] 508 509 if gdrive_folder_id is not None and methods_zip_file_id is not None: 510 if gdrive_folder_id != "None" and methods_zip_file_id != "None": 511 if gdrive_folder_id != "" and methods_zip_file_id != "": 512 return True 513 514 return False 515 516 517def email_notifications_are_enabled(): 518 519 """ 520 Checks whether email notifications are enabled. 521 522 Returns True if databases are valid, Google Drive sync is enabled, and if email addresses were 523 registered by user in Settings > General. Returns False if any condition is not met. 524 525 Returns: 526 bool: True if email notifications are enabled, False if not 527 """ 528 529 if not is_valid(): 530 return False 531 532 if not sync_is_enabled(): 533 return False 534 535 if len(get_table("Settings", "email_notifications")) > 0: 536 return True 537 538 return False 539 540 541def slack_notifications_are_enabled(): 542 543 """ 544 Checks whether Slack notifications are enabled. 545 546 Returns True if user enabled Slack notifications in Settings > General, and False if not. 547 548 Returns: 549 bool: True if Slack notifications are enabled, False if not 550 """ 551 552 if not is_valid(): 553 return False 554 555 try: 556 return bool(get_table("Settings", "workspace")["slack_enabled"].astype(int).tolist()[0]) 557 except: 558 return False 559 560 561def is_instrument_computer(): 562 563 """ 564 Checks whether user's device is the instrument computer. 565 566 This is specified during setup. If the user created a new instrument, or signed in as an instrument device, then 567 this will return True. If the user signed in to their workspace from a non-instrument device, this will return False. 568 569 Typically used to organize / hide UI functions for instrument and non-instrument devices 570 that MS-AutoQC is installed on. 571 572 Returns: 573 True if device is instrument computer, False if not 574 """ 575 576 return bool(get_table("Settings", "workspace")["is_instrument_computer"].astype(int).tolist()[0]) 577 578 579def get_md5_for_settings_db(): 580 581 """ 582 Calculates and returns MD5 checksum for the settings database file. 583 584 Typically used for checking whether the user changed settings and prompting a Google Drive sync (if sync is enabled). 585 586 Returns: 587 An MD5 checksum of /data/methods/Settings.db 588 """ 589 590 hash_md5 = hashlib.md5() 591 592 with open(settings_db_file, "rb") as f: 593 for chunk in iter(lambda: f.read(4096), b""): 594 hash_md5.update(chunk) 595 596 return hash_md5.hexdigest() 597 598 599def settings_were_modified(md5_checksum): 600 601 """ 602 Checks whether settings database file has been modified. 603 604 This is done by comparing the checksum computed when Settings were opened (given as a parameter) 605 with the checksum computed when Settings were closed (in this function call). 606 607 Args: 608 md5_checksum (str): 609 An MD5 checksum of /data/methods/Settings.db that was computed when the user opened Settings in the app 610 611 Returns: 612 bool: True if checksums don't match, False if checksums match. 613 """ 614 615 if md5_checksum != get_md5_for_settings_db(): 616 return True 617 else: 618 return False 619 620 621def zip_database(instrument_id=None, filename=None): 622 623 """ 624 Compresses instrument database file into a ZIP archive in /data directory. 625 626 Used for fast downloads / uploads over network connections to Google Drive (if Google Drive sync is enabled). 627 628 The zip archive is accessible by filename and path in the /data directory. For example, zipping 629 the database for "Thermo QE 1" will generate a zip file with path "../data/Thermo_QE_1.zip". 630 631 Args: 632 instrument_id (str, default None): 633 If specified, selects a database to zip by instrument ID (ex: "Thermo QE 1") 634 filename (str, default None): 635 If specified, selects a database to zip by filename (ex: "Thermo_QE_1.zip") 636 637 Returns: 638 None 639 """ 640 641 if instrument_id is None and filename is None: 642 return None 643 644 if filename is not None: 645 db_zip_file = os.path.join(data_directory, filename) 646 filename = filename.replace(".zip", ".db") 647 648 elif instrument_id is not None: 649 db_zip_file = get_database_file(instrument_id, zip=True) 650 filename = instrument_id.replace(" ", "_") + ".db" 651 652 file_without_extension = db_zip_file.replace(".zip", "") 653 shutil.make_archive(file_without_extension, "zip", data_directory, filename) 654 655 656def unzip_database(instrument_id=None, filename=None): 657 658 """ 659 Unzips ZIP archive containing instrument database file and deletes the archive when complete. 660 661 Args: 662 instrument_id (str, default None): 663 If specified, selects a database to zip by instrument ID (ex: "Thermo QE 1") 664 filename (str, default None): 665 If specified, selects a database to zip by filename (ex: "Thermo_QE_1.zip") 666 667 Returns: 668 None 669 """ 670 671 if instrument_id is None and filename is None: 672 return None 673 674 if instrument_id is not None: 675 db_zip_file = get_database_file(instrument_id, zip=True) 676 elif filename is not None: 677 db_zip_file = os.path.join(data_directory, filename) 678 679 shutil.unpack_archive(db_zip_file, data_directory, "zip") 680 os.remove(db_zip_file) 681 682 683def zip_methods(): 684 685 """ 686 Compresses methods directory into a ZIP archive in /data directory. 687 688 Returns: 689 Path for zip archive of methods directory (ex: "../data/methods.zip") 690 """ 691 692 output_directory_and_name = os.path.join(data_directory, "methods.zip").replace(".zip", "") 693 shutil.make_archive(output_directory_and_name, "zip", methods_directory) 694 return output_directory_and_name + ".zip" 695 696 697def unzip_methods(): 698 699 """ 700 Unzips ZIP archive containing methods directory and deletes the archive when complete. 701 """ 702 703 input_zip = os.path.join(data_directory, "methods.zip") 704 shutil.unpack_archive(input_zip, methods_directory, "zip") 705 os.remove(input_zip) 706 707 708def zip_csv_files(input_directory, output_directory_and_name): 709 710 """ 711 Compresses CSV files into a ZIP archive in /data directory. 712 713 Used for fast upload of instrument run data to Google Drive during an active instrument run (if Google Drive sync is enabled). 714 715 Args: 716 input_directory (str): 717 The temporary directory for files pertaining to an instrument run, denoted as "Instrument_ID_Run_ID". 718 For example, a job with ID "BRDE001" created under instrument with ID "Thermo QE 1" would have its files 719 stored in "/data/Thermo_QE_1_BRDE001". 720 output_directory_and_name (str): 721 Essentially, the file path for the ZIP archive (ex: "/data/Instrument_ID_Run_ID"). 722 723 Returns: 724 Path for zip archive of CSV files with instrument run data (ex: "../data/Instrument_ID_Run_ID.zip") 725 """ 726 727 shutil.make_archive(output_directory_and_name, "zip", input_directory) 728 return output_directory_and_name + ".zip" 729 730 731def unzip_csv_files(input_zip, output_directory): 732 733 """ 734 Unzips ZIP archive of CSV files and deletes the archive upon completion. 735 """ 736 737 shutil.unpack_archive(input_zip, output_directory, "zip") 738 os.remove(input_zip) 739 740 741def get_table(database_name, table_name): 742 743 """ 744 Retrieves table from database as a pandas DataFrame object. 745 746 TODO: Improve this function to accept column and record queries 747 748 Args: 749 database_name (str): 750 The database to query, using instrument ID or "Settings" 751 table_name (str): 752 The table to retrieve 753 754 Returns: 755 DataFrame of table. 756 """ 757 758 if database_name == "Settings": 759 database = settings_database 760 else: 761 database = get_database_file(database_name, sqlite_conn=True) 762 763 engine = sa.create_engine(database) 764 return pd.read_sql("SELECT * FROM " + table_name, engine) 765 766 767def generate_client_settings_yaml(client_id, client_secret): 768 769 """ 770 Generates a settings.yaml file for Google authentication in the /auth directory. 771 772 Client ID and client secret are generated and provided by the user in the Google Cloud Console. 773 774 See: https://docs.iterative.ai/PyDrive2/oauth/#automatic-and-custom-authentication-with-settings-yaml 775 776 Args: 777 client_id (str): 778 The Client ID of the MS-AutoQC application, generated and provided by the user 779 client_secret (str): 780 The Client Secret of the MS-AutoQC application, generated and provided by the user 781 Returns: 782 None 783 """ 784 785 auth_directory = os.path.join(os.getcwd(), "auth") 786 if not os.path.exists(auth_directory): 787 os.makedirs(auth_directory) 788 789 settings_yaml_file = os.path.join(auth_directory, "settings.yaml") 790 791 lines = [ 792 "client_config_backend: settings", 793 "client_config:", 794 " client_id: " + client_id, 795 " client_secret: " + client_secret, 796 "\n", 797 "save_credentials: True", 798 "save_credentials_backend: file", 799 "save_credentials_file: auth/credentials.txt", 800 "\n", 801 "get_refresh_token: True", 802 "\n", 803 "oauth_scope:", 804 " - https://www.googleapis.com/auth/drive", 805 " - https://www.googleapis.com/auth/gmail.send", 806 " - https://www.googleapis.com/auth/userinfo.email" 807 ] 808 809 with open(settings_yaml_file, "w") as file: 810 for line in lines: 811 file.write(line) 812 if line != "\n" and line != lines[-1]: 813 file.write("\n") 814 815 816def insert_google_drive_ids(instrument_id, gdrive_folder_id, instrument_db_file_id, methods_zip_file_id): 817 818 """ 819 Inserts Google Drive ID's into corresponding tables to enable Google Drive sync. 820 821 This function is called when a user creates a new instrument in their workspace. 822 823 The ID's for the following files / folders in Google Drive are stored in the database: 824 1. MS-AutoQC folder 825 2. Instrument database zip file 826 3. Methods directory zip file 827 828 Args: 829 instrument_id (str): 830 Instrument ID 831 gdrive_folder_id (str): 832 Google Drive ID for the MS-AutoQC folder (found in the user's root directory in Drive) 833 instrument_db_file_id (str): 834 Google Drive ID for the instrument database ZIP file 835 methods_zip_file_id (str): 836 Google Drive ID for the methods directory ZIP file 837 838 Returns: 839 None 840 """ 841 842 db_metadata, connection = connect_to_database("Settings") 843 instruments_table = sa.Table("instruments", db_metadata, autoload=True) 844 workspace_table = sa.Table("workspace", db_metadata, autoload=True) 845 846 # Instruments database 847 connection.execute(( 848 sa.update(instruments_table) 849 .where((instruments_table.c.name == instrument_id)) 850 .values(drive_id=instrument_db_file_id) 851 )) 852 853 # MS-AutoQC folder and Methods folder 854 connection.execute(( 855 sa.update(workspace_table) 856 .where((workspace_table.c.id == 1)) 857 .values(gdrive_folder_id=gdrive_folder_id, 858 methods_zip_file_id=methods_zip_file_id) 859 )) 860 861 connection.close() 862 863 864def insert_new_instrument(name, vendor): 865 866 """ 867 Inserts a new instrument into the "instruments" table in the Settings database. 868 869 The name is the instrument ID, and the vendor is one of 5 options: Thermo Fisher, Agilent, Bruker, Sciex, and Waters. 870 871 Args: 872 name (str): 873 Instrument ID 874 vendor (str): 875 Instrument vendor 876 877 Returns: 878 None 879 """ 880 881 # Connect to database 882 db_metadata, connection = connect_to_database("Settings") 883 884 # Get "instruments" table 885 instruments_table = sa.Table("instruments", db_metadata, autoload=True) 886 887 # Prepare insert of new instrument 888 insert_instrument = instruments_table.insert().values( 889 {"name": name, 890 "vendor": vendor} 891 ) 892 893 # Execute the insert, then close the connection 894 connection.execute(insert_instrument) 895 connection.close() 896 897 898def get_instruments_list(): 899 900 """ 901 Returns list of instruments in database. 902 """ 903 904 # Connect to SQLite database 905 engine = sa.create_engine(settings_database) 906 907 # Get instruments table as DataFrame 908 df_instruments = pd.read_sql("SELECT * FROM instruments", engine) 909 910 # Return list of instruments 911 return df_instruments["name"].astype(str).tolist() 912 913 914def get_instrument(instrument_id): 915 916 """ 917 Returns record from "instruments" table as a DataFrame for a given instrument 918 919 Args: 920 instrument_id (str): Instrument ID 921 922 Returns: 923 DataFrame containing the name, vendor, and drive_id for the given instrument 924 """ 925 926 engine = sa.create_engine(settings_database) 927 return pd.read_sql("SELECT * FROM instruments WHERE name = '" + instrument_id + "'", engine) 928 929 930def get_filenames_from_sequence(sequence, vendor="Thermo Fisher"): 931 932 """ 933 Filters preblanks, washes, and shutdown injections from sequence file, and simultaneously assigns 934 polariy to each sample based on presence of "Pos" or "Neg" in Instrument Method column. 935 936 This function is called upon starting a new QC job. 937 938 TODO: Adapt this function for other instrument vendors. 939 TODO: Check the method filename, not entire file path, for "Pos" and "Neg". 940 A folder containing "Pos" or "Neg" will give incorrect polarity assignments. 941 942 Args: 943 sequence (str): 944 The acquisition sequence file, encoded as a JSON string in "split" format 945 vendor (str): 946 The instrument vendor (see to-do statements) 947 948 Returns: 949 DataFrame of acquisition sequence, with preblanks / washes / shutdowns filtered out and polarities assigned 950 """ 951 952 df_sequence = pd.read_json(sequence, orient="split") 953 954 # Filter out preblanks 955 df_sequence = df_sequence.loc[ 956 ~((df_sequence["File Name"].str.contains(r"_BK_", na=False)) & 957 (df_sequence["File Name"].str.contains(r"_pre_", na=False)))] 958 959 # Filter out wash and shutdown 960 df_sequence = df_sequence.loc[ 961 ~(df_sequence["File Name"].str.contains(r"_wash_", na=False)) & 962 ~(df_sequence["File Name"].str.contains(r"shutdown", na=False))] 963 964 # Derive polarity from instrument method filename 965 df_sequence.loc[df_sequence["Instrument Method"].str.contains(r"Pos", na=False), "Polarity"] = "Pos" 966 df_sequence.loc[df_sequence["Instrument Method"].str.contains(r"Neg", na=False), "Polarity"] = "Neg" 967 968 return df_sequence 969 970 971def get_polarity_for_sample(instrument_id, run_id, sample_id, status): 972 973 """ 974 Returns polarity for a given sample. 975 976 TODO: Loading hundreds of rows of data before querying for one sample is massively inefficient. 977 This function was written in haste and can be easily implemented in a much better way. 978 979 Args: 980 instrument_id (str): Instrument ID 981 run_id (str): Instrument run ID (job ID) 982 sample_id (str): Sample ID 983 status (str): Job status 984 985 Returns: 986 Polarity for the given sample, as either "Pos" or "Neg". 987 """ 988 989 if get_device_identity() != instrument_id and sync_is_enabled(): 990 if status == "Complete": 991 df = get_samples_in_run(instrument_id, run_id, "Both") 992 elif status == "Active": 993 df = get_samples_from_csv(instrument_id, run_id, "Both") 994 else: 995 df = get_samples_in_run(instrument_id, run_id, "Both") 996 997 try: 998 polarity = df.loc[df["sample_id"] == sample_id]["polarity"].astype(str).values[0] 999 except: 1000 print("Could not find polarity for sample in database.") 1001 polarity = "Neg" if "Neg" in sample_id else "Pos" 1002 1003 return polarity 1004 1005 1006def insert_new_run(run_id, instrument_id, chromatography, bio_standards, path, sequence, metadata, qc_config_id, job_type): 1007 1008 """ 1009 Initializes sample records in database for a new QC job. 1010 1011 Performs the following functions: 1012 1. Inserts a record for the new instrument run into the "runs" table 1013 2. Inserts sample rows into the "sample_qc_results" table 1014 3. Inserts biological standard sample rows into the "bio_qc_results" table 1015 1016 Args: 1017 run_id (str): 1018 Instrument run ID (job ID) 1019 instrument_id (str): 1020 Instrument ID 1021 chromatography (str): 1022 Chromatography method 1023 bio_standards (str): 1024 Biological standards 1025 path (str): 1026 Data acquisition path 1027 sequence (str): 1028 Acquisition sequence table, as JSON string in "records" format 1029 metadata (str): 1030 Sample metadata table, as JSON string in "records" format 1031 qc_config_id (str): 1032 Name of QC configuration 1033 job_type (str): 1034 Either "completed" or "active" 1035 1036 Returns: 1037 None 1038 """ 1039 1040 # Get list of samples from sequence 1041 df_sequence = get_filenames_from_sequence(sequence) 1042 1043 samples = df_sequence["File Name"].astype(str).tolist() 1044 polarities = df_sequence["Polarity"].astype(str).tolist() 1045 positions = df_sequence["Position"].astype(str).tolist() 1046 1047 num_samples = len(samples) 1048 1049 # Connect to database 1050 db_metadata, connection = connect_to_database(instrument_id) 1051 1052 # Get relevant tables 1053 runs_table = sa.Table("runs", db_metadata, autoload=True) 1054 sample_qc_results_table = sa.Table("sample_qc_results", db_metadata, autoload=True) 1055 bio_qc_results_table = sa.Table("bio_qc_results", db_metadata, autoload=True) 1056 1057 # Get identifiers for biological standard (if any) 1058 identifiers = get_biological_standard_identifiers(bio_standards) 1059 1060 # Prepare insert of user-inputted run data 1061 insert_run = runs_table.insert().values( 1062 {"run_id": run_id, 1063 "chromatography": chromatography, 1064 "acquisition_path": path, 1065 "sequence": sequence, 1066 "metadata": metadata, 1067 "status": "Active", 1068 "samples": num_samples, 1069 "completed": 0, 1070 "passes": 0, 1071 "fails": 0, 1072 "qc_config_id": qc_config_id, 1073 "biological_standards": str(bio_standards), 1074 "job_type": job_type}) 1075 1076 insert_samples = [] 1077 1078 for index, sample in enumerate(samples): 1079 # Check if the biological standard identifier is in the sample name 1080 is_bio_standard = False 1081 1082 for identifier in identifiers.keys(): 1083 if identifier in sample: 1084 is_bio_standard = True 1085 break 1086 1087 # Prepare insert of the sample row into the "sample_qc_results" table 1088 if not is_bio_standard: 1089 insert_sample = sample_qc_results_table.insert().values( 1090 {"sample_id": sample, 1091 "run_id": run_id, 1092 "polarity": polarities[index], 1093 "position": positions[index]}) 1094 1095 # Prepare insert of the sample row into the "bio_qc_results" table 1096 else: 1097 insert_sample = bio_qc_results_table.insert().values( 1098 {"sample_id": sample, 1099 "run_id": run_id, 1100 "polarity": polarities[index], 1101 "biological_standard": identifiers[identifier], 1102 "position": positions[index]}) 1103 1104 # Add this INSERT query into the list of insert queries 1105 insert_samples.append(insert_sample) 1106 1107 # Execute INSERT to database 1108 connection.execute(insert_run) 1109 1110 for insert_sample in insert_samples: 1111 connection.execute(insert_sample) 1112 1113 # Close the connection 1114 connection.close() 1115 1116 1117def get_instrument_run(instrument_id, run_id): 1118 1119 """ 1120 Returns DataFrame of given instrument run from "runs" table. 1121 1122 Args: 1123 instrument_id (str): Instrument ID 1124 run_id (str): Run ID 1125 1126 Returns: 1127 DataFrame containing record for instrument run 1128 """ 1129 1130 database = get_database_file(instrument_id=instrument_id, sqlite_conn=True) 1131 engine = sa.create_engine(database) 1132 query = "SELECT * FROM runs WHERE run_id = '" + run_id + "'" 1133 df_instrument_run = pd.read_sql(query, engine) 1134 return df_instrument_run 1135 1136 1137def get_instrument_run_from_csv(instrument_id, run_id): 1138 1139 """ 1140 Returns DataFrame of selected instrument run from CSV files during active instrument runs. 1141 1142 This function is called when a user views an active instrument run from an external device 1143 (to prevent downloading / uploading the database file with each sample acquisition). 1144 1145 Args: 1146 instrument_id (str): Instrument ID 1147 run_id (str): Run ID 1148 1149 Returns: 1150 DataFrame containing record for instrument run 1151 """ 1152 1153 id = instrument_id.replace(" ", "_") + "_" + run_id 1154 run_csv_file = os.path.join(data_directory, id, "csv", "run.csv") 1155 return pd.read_csv(run_csv_file, index_col=False) 1156 1157 1158def get_instrument_runs(instrument_id, as_list=False): 1159 1160 """ 1161 Returns DataFrame of all runs on a given instrument from "runs" table 1162 1163 Args: 1164 instrument_id (str): 1165 Instrument ID 1166 as_list (str, default False): 1167 If True, returns only a list of names of instrument runs (jobs) 1168 1169 Returns: 1170 DataFrame containing records for instrument runs (QC jobs) for the given instrument 1171 """ 1172 1173 database = get_database_file(instrument_id, sqlite_conn=True) 1174 engine = sa.create_engine(database) 1175 df = pd.read_sql("SELECT * FROM runs", engine) 1176 1177 if as_list: 1178 return df["run_id"].astype(str).tolist() 1179 else: 1180 return df 1181 1182 1183def delete_instrument_run(instrument_id, run_id): 1184 1185 """ 1186 Deletes all records for an instrument run (QC job) from the database. 1187 1188 Args: 1189 instrument_id (str): Instrument ID 1190 run_id (str): Run ID 1191 1192 Returns: 1193 None 1194 """ 1195 1196 # Connect to database 1197 db_metadata, connection = connect_to_database(instrument_id) 1198 1199 # Get relevant tables 1200 runs_table = sa.Table("runs", db_metadata, autoload=True) 1201 sample_qc_results_table = sa.Table("sample_qc_results", db_metadata, autoload=True) 1202 bio_qc_results_table = sa.Table("bio_qc_results", db_metadata, autoload=True) 1203 1204 # Delete from each table 1205 for table in [runs_table, sample_qc_results_table, bio_qc_results_table]: 1206 connection.execute(( 1207 sa.delete(table).where(table.c.run_id == run_id) 1208 )) 1209 1210 # Close the connection 1211 connection.close() 1212 1213 1214def get_acquisition_path(instrument_id, run_id): 1215 1216 """ 1217 Retrieves acquisition path for a given instrument run. 1218 1219 Args: 1220 instrument_id (str): Instrument ID 1221 run_id (str): Run ID 1222 1223 Returns: 1224 Acquisition path for the given instrument run 1225 """ 1226 1227 return get_instrument_run(instrument_id, run_id)["acquisition_path"].astype(str).tolist()[0] 1228 1229 1230def get_md5(instrument_id, sample_id): 1231 1232 """ 1233 Returns MD5 checksum for a data file in "sample_qc_results" table. 1234 1235 Used for comparing MD5 checksums during active instrument runs. 1236 1237 TODO: This function will return incorrect results if two different instrument runs 1238 have samples with the same sample ID. It needs to include "run_id" in the SQL query. 1239 1240 Args: 1241 instrument_id (str): Instrument ID 1242 sample_id (str): Sample ID 1243 1244 Returns: 1245 MD5 checksum stored for the data file. 1246 """ 1247 1248 # Connect to database 1249 database = get_database_file(instrument_id, sqlite_conn=True) 1250 engine = sa.create_engine(database) 1251 1252 # Check if sample is a biological standard 1253 table = "sample_qc_results" 1254 1255 for identifier in get_biological_standard_identifiers().keys(): 1256 if identifier in sample_id: 1257 table = "bio_qc_results" 1258 break 1259 1260 # Get sample from correct table 1261 df_sample_qc_results = pd.read_sql( 1262 "SELECT * FROM " + table + " WHERE sample_id = '" + sample_id + "'", engine) 1263 1264 return df_sample_qc_results["md5"].astype(str).values[0] 1265 1266 1267def update_md5_checksum(instrument_id, sample_id, md5_checksum): 1268 1269 """ 1270 Updates MD5 checksum for a data file during sample acquisition. 1271 1272 TODO: This function will return incorrect results if two different instrument runs 1273 have samples with the same sample ID. It needs to include "run_id" in the SQL query. 1274 1275 Args: 1276 instrument_id (str): 1277 Instrument ID 1278 sample_id (str): 1279 Sample ID (filename) of data file 1280 md5_checksum (str): 1281 MD5 checksum for the sample data file 1282 1283 Returns: 1284 None 1285 """ 1286 1287 # Connect to database 1288 db_metadata, connection = connect_to_database(instrument_id) 1289 1290 # Check if sample is a biological standard and get relevant table 1291 qc_results_table = sa.Table("sample_qc_results", db_metadata, autoload=True) 1292 1293 for identifier in get_biological_standard_identifiers().keys(): 1294 if identifier in sample_id: 1295 qc_results_table = sa.Table("bio_qc_results", db_metadata, autoload=True) 1296 break 1297 1298 # Prepare update of MD5 checksum at sample row 1299 update_md5 = ( 1300 sa.update(qc_results_table) 1301 .where(qc_results_table.c.sample_id == sample_id) 1302 .values(md5=md5_checksum) 1303 ) 1304 1305 # Execute UPDATE into database, then close the connection 1306 connection.execute(update_md5) 1307 connection.close() 1308 1309 1310def write_qc_results(sample_id, instrument_id, run_id, json_mz, json_rt, json_intensity, qc_dataframe, qc_result, is_bio_standard): 1311 1312 """ 1313 Writes QC results (as dictionary records) to sample record upon MS-DIAL processing completion. 1314 1315 QC results consist of m/z, RT, and intensity data for internal standards (or targeted metabolites in biological standards), 1316 as well as a DataFrame containing delta m/z, delta RT, in-run delta RT, warnings, and fails (qc_dataframe) and overall QC result 1317 (which will be "Pass" or "Fail"). 1318 1319 The data is encoded as dictionary in "records" format: [{'col1': 1, 'col2': 0.5}, {'col1': 2, 'col2': 0.75}]. 1320 This dictionary is cast to a string before being passed to this function. 1321 1322 TODO: Update names of arguments from json_x to record_x, as the data is no longer encoded as JSON strings. 1323 The data is now encoded in "records" format as a string. 1324 1325 Args: 1326 sample_id (str): 1327 Sample ID 1328 instrument_id (str): 1329 Instrument ID 1330 run_id (str): 1331 Instrument run ID (Job ID) 1332 json_mz (str): 1333 String dict of internal standard m/z data in "records" format 1334 json_rt (str): 1335 String dict of internal standard RT data in "records" format 1336 json_intensity (str): 1337 String dict of internal standard intensity data in "records" format 1338 qc_dataframe (str): 1339 String dict of various QC data in "records" format 1340 qc_result (str): 1341 QC result for sample, either "Pass" or "Fail" 1342 is_bio_standard (bool): 1343 Whether the sample is a biological standard 1344 1345 Returns: 1346 None 1347 """ 1348 1349 # Connect to database 1350 db_metadata, connection = connect_to_database(instrument_id) 1351 1352 # Get "sample_qc_results" or "bio_qc_results" table 1353 if not is_bio_standard: 1354 qc_results_table = sa.Table("sample_qc_results", db_metadata, autoload=True) 1355 else: 1356 qc_results_table = sa.Table("bio_qc_results", db_metadata, autoload=True) 1357 1358 # Prepare update (insert) of QC results to correct sample row 1359 update_qc_results = ( 1360 sa.update(qc_results_table) 1361 .where((qc_results_table.c.sample_id == sample_id) 1362 & (qc_results_table.c.run_id == run_id)) 1363 .values(precursor_mz=json_mz, 1364 retention_time=json_rt, 1365 intensity=json_intensity, 1366 qc_dataframe=qc_dataframe, 1367 qc_result=qc_result) 1368 ) 1369 1370 # Execute UPDATE into database, then close the connection 1371 connection.execute(update_qc_results) 1372 connection.close() 1373 1374 1375def get_chromatography_methods(): 1376 1377 """ 1378 Returns DataFrame of chromatography methods from the Settings database. 1379 """ 1380 1381 engine = sa.create_engine(settings_database) 1382 df_methods = pd.read_sql("SELECT * FROM chromatography_methods", engine) 1383 return df_methods 1384 1385 1386def get_chromatography_methods_list(): 1387 1388 """ 1389 Returns list of chromatography method ID's from the Settings database. 1390 """ 1391 1392 df_methods = get_chromatography_methods() 1393 return df_methods["method_id"].astype(str).tolist() 1394 1395 1396def insert_chromatography_method(method_id): 1397 1398 """ 1399 Inserts new chromatography method in the "chromatography_methods" table of the Settings database. 1400 1401 Args: 1402 method_id (str): Name of the chromatography method 1403 1404 Returns: 1405 None 1406 """ 1407 1408 # Connect to database 1409 db_metadata, connection = connect_to_database("Settings") 1410 1411 # Get "chromatography_methods" table and "biological_standards" table 1412 chromatography_table = sa.Table("chromatography_methods", db_metadata, autoload=True) 1413 biological_standards_table = sa.Table("biological_standards", db_metadata, autoload=True) 1414 1415 # Execute insert of chromatography method 1416 insert_method = chromatography_table.insert().values( 1417 {"method_id": method_id, 1418 "num_pos_standards": 0, 1419 "num_neg_standards": 0, 1420 "pos_istd_msp_file": "", 1421 "neg_istd_msp_file": "", 1422 "pos_parameter_file": "", 1423 "neg_parameter_file": "", 1424 "msdial_config_id": "Default"}) 1425 1426 connection.execute(insert_method) 1427 1428 # Execute insert of method for each biological standard 1429 df_biological_standards = get_biological_standards() 1430 biological_standards = df_biological_standards["name"].astype(str).unique().tolist() 1431 identifiers = df_biological_standards["identifier"].astype(str).tolist() 1432 1433 for index, biological_standard in enumerate(biological_standards): 1434 insert_method_for_bio_standard = biological_standards_table.insert().values({ 1435 "name": biological_standard, 1436 "identifier": identifiers[index], 1437 "chromatography": method_id, 1438 "num_pos_features": 0, 1439 "num_neg_features": 0, 1440 "msdial_config_id": "Default"}) 1441 connection.execute(insert_method_for_bio_standard) 1442 1443 # Execute INSERT to database, then close the connection 1444 connection.close() 1445 1446 1447def remove_chromatography_method(method_id): 1448 1449 """ 1450 Deletes chromatography method and all associated records from the Settings database. 1451 1452 Details: 1453 1. Removes chromatography method in "chromatography_methods" table 1454 2. Removes method from "biological_standards" table 1455 3. Removes associated internal standards from "internal_standards" table 1456 4. Removes associated targeted features from "targeted_features" table 1457 5. Deletes corresponding MSPs from folders 1458 6. Deletes corresponding MSPs from Google Drive (if sync is enabled) 1459 1460 Args: 1461 method_id (str): Name of the chromatography method 1462 1463 Returns: 1464 None 1465 """ 1466 1467 # Delete corresponding MSPs from "methods" directory 1468 df = get_table("Settings", "chromatography_methods") 1469 df = df.loc[df["method_id"] == method_id] 1470 1471 df2 = get_table("Settings", "biological_standards") 1472 df2 = df2.loc[df2["chromatography"] == method_id] 1473 1474 files_to_delete = df["pos_istd_msp_file"].astype(str).tolist() + df["neg_istd_msp_file"].astype(str).tolist() + \ 1475 df2["pos_bio_msp_file"].astype(str).tolist() + df2["neg_bio_msp_file"].astype(str).tolist() 1476 1477 for file in os.listdir(methods_directory): 1478 if file in files_to_delete: 1479 os.remove(os.path.join(methods_directory, file)) 1480 1481 # Connect to database and get relevant tables 1482 db_metadata, connection = connect_to_database("Settings") 1483 chromatography_table = sa.Table("chromatography_methods", db_metadata, autoload=True) 1484 biological_standards_table = sa.Table("biological_standards", db_metadata, autoload=True) 1485 internal_standards_table = sa.Table("internal_standards", db_metadata, autoload=True) 1486 targeted_features_table = sa.Table("targeted_features", db_metadata, autoload=True) 1487 1488 delete_queries = [] 1489 1490 # Remove from "chromatography_methods" table 1491 delete_chromatography_method = ( 1492 sa.delete(chromatography_table) 1493 .where((chromatography_table.c.method_id == method_id)) 1494 ) 1495 1496 delete_queries.append(delete_chromatography_method) 1497 1498 # Remove all entries in other tables associated with chromatography 1499 for table in [biological_standards_table, internal_standards_table, targeted_features_table]: 1500 delete_from_table = ( 1501 sa.delete(table) 1502 .where((table.c.chromatography == method_id)) 1503 ) 1504 delete_queries.append(delete_from_table) 1505 1506 # Execute all deletes, then close the connection 1507 for query in delete_queries: 1508 connection.execute(query) 1509 1510 connection.close() 1511 1512 1513def update_msdial_config_for_internal_standards(chromatography, config_id): 1514 1515 """ 1516 Updates MS-DIAL configuration for a given chromatography method. 1517 1518 This MS-DIAL configuration will be used to generate a parameters file 1519 for processing samples run with this chromatography method. 1520 1521 Args: 1522 chromatography (str): 1523 Chromatography method ID (name) 1524 config_id (str): 1525 MS-DIAL configuration ID (name) 1526 1527 Returns: 1528 None 1529 """ 1530 1531 # Connect to database and get relevant tables 1532 db_metadata, connection = connect_to_database("Settings") 1533 methods_table = sa.Table("chromatography_methods", db_metadata, autoload=True) 1534 1535 # Update MS-DIAL configuration for chromatography method 1536 update_msdial_config = ( 1537 sa.update(methods_table) 1538 .where(methods_table.c.method_id == chromatography) 1539 .values(msdial_config_id=config_id) 1540 ) 1541 1542 connection.execute(update_msdial_config) 1543 connection.close() 1544 1545 1546def add_msp_to_database(msp_file, chromatography, polarity, bio_standard=None): 1547 1548 """ 1549 Parses compounds from MSP into the Settings database. 1550 1551 This function writes features from an MSP file into the "internal_standards" or "targeted_features" table, 1552 and inserts location of pos/neg MSP files into "chromatography_methods" table. 1553 1554 TODO: The MSP/TXT libraries have standardized names; there is no need to store the filename in the database. 1555 1556 Args: 1557 msp_file (io.StringIO): 1558 In-memory text-stream file object 1559 chromatography (str): 1560 Chromatography method ID (name) 1561 polarity (str): 1562 Polarity for which MSP should be used for ("Positive Mode" or "Negative Mode") 1563 bio_standard (str, default None): 1564 Parses MSP and applies to biological standard within a chromatography-polarity combination 1565 1566 Returns: 1567 None 1568 """ 1569 1570 # Connect to database 1571 db_metadata, connection = connect_to_database("Settings") 1572 1573 # Write MSP file to folder, store file path in database (further down in function) 1574 if not os.path.exists(methods_directory): 1575 os.makedirs(methods_directory) 1576 1577 if bio_standard is not None: 1578 if polarity == "Positive Mode": 1579 filename = bio_standard.replace(" ", "_") + "_" + chromatography + "_Pos.msp" 1580 elif polarity == "Negative Mode": 1581 filename = bio_standard.replace(" ", "_") + "_" + chromatography + "_Neg.msp" 1582 else: 1583 if polarity == "Positive Mode": 1584 filename = chromatography + "_Pos.msp" 1585 elif polarity == "Negative Mode": 1586 filename = chromatography + "_Neg.msp" 1587 1588 msp_file_path = os.path.join(methods_directory, filename) 1589 1590 with open(msp_file_path, "w") as file: 1591 msp_file.seek(0) 1592 shutil.copyfileobj(msp_file, file) 1593 1594 # Read MSP file 1595 with open(msp_file_path, "r") as msp: 1596 1597 list_of_features = [] 1598 1599 # Split MSP into list of compounds 1600 data = msp.read().split("\n\n") 1601 data = [element.split("\n") for element in data] 1602 1603 # Add each line of each compound into a list 1604 for feature in data: 1605 if len(feature) != 1: 1606 list_of_features.append(feature) 1607 1608 features_dict = {} 1609 added_features = [] 1610 1611 # Iterate through features in MSP 1612 for feature_index, feature in enumerate(list_of_features): 1613 1614 features_dict[feature_index] = { 1615 "Name": None, 1616 "Precursor m/z": None, 1617 "Retention time": None, 1618 "INCHIKEY": None, 1619 "MS2 spectrum": None 1620 } 1621 1622 # Iterate through each line of each feature in the MSP 1623 for data_index, feature_data in enumerate(feature): 1624 1625 # Capture, name, inchikey, m/z, and RT 1626 if "NAME" in feature_data.upper(): 1627 feature_name = feature_data.split(": ")[-1] 1628 if feature_name not in added_features: 1629 added_features.append(feature_name) 1630 features_dict[feature_index]["Name"] = feature_name 1631 continue 1632 else: 1633 break 1634 elif "PRECURSORMZ" in feature_data.upper(): 1635 features_dict[feature_index]["Precursor m/z"] = feature_data.split(": ")[-1] 1636 continue 1637 elif "INCHIKEY" in feature_data.upper(): 1638 features_dict[feature_index]["INCHIKEY"] = feature_data.split(": ")[-1] 1639 continue 1640 elif "RETENTIONTIME" in feature_data.upper(): 1641 features_dict[feature_index]["Retention time"] = feature_data.split(": ")[-1] 1642 continue 1643 1644 # Capture MS2 spectrum 1645 elif "Num Peaks" in feature_data: 1646 1647 # Get number of peaks in MS2 spectrum 1648 num_peaks = int(feature_data.split(": ")[-1]) 1649 1650 # Each line in the MSP corresponds to a peak 1651 start_index = data_index + 1 1652 end_index = data_index + num_peaks + 1 1653 1654 # Each peak is represented as a string e.g. "56.04977\t247187" 1655 peaks_in_spectrum = [] 1656 for peak in feature[start_index:end_index]: 1657 peaks_in_spectrum.append(peak.replace("\t", ":")) 1658 1659 features_dict[feature_index]["MS2 spectrum"] = str(peaks_in_spectrum) 1660 break 1661 1662 features_dict = { key:value for key, value in features_dict.items() if value["Name"] is not None } 1663 1664 # Adding MSP for biological standards 1665 if bio_standard is not None: 1666 1667 # Get "targeted_features" table 1668 targeted_features_table = sa.Table("targeted_features", db_metadata, autoload=True) 1669 1670 # Prepare DELETE of old targeted features 1671 delete_old_targeted_features = ( 1672 sa.delete(targeted_features_table) 1673 .where((targeted_features_table.c.chromatography == chromatography) 1674 & (targeted_features_table.c.polarity == polarity) 1675 & (targeted_features_table.c.biological_standard == bio_standard)) 1676 ) 1677 1678 # Execute DELETE 1679 connection.execute(delete_old_targeted_features) 1680 1681 # Execute INSERT of each targeted feature into targeted_features table 1682 for feature in features_dict: 1683 insert_feature = targeted_features_table.insert().values( 1684 {"name": features_dict[feature]["Name"], 1685 "chromatography": chromatography, 1686 "polarity": polarity, 1687 "biological_standard": bio_standard, 1688 "precursor_mz": features_dict[feature]["Precursor m/z"], 1689 "retention_time": features_dict[feature]["Retention time"], 1690 "ms2_spectrum": features_dict[feature]["MS2 spectrum"], 1691 "inchikey": features_dict[feature]["INCHIKEY"]}) 1692 connection.execute(insert_feature) 1693 1694 # Get "biological_standards" table 1695 biological_standards_table = sa.Table("biological_standards", db_metadata, autoload=True) 1696 1697 # Write location of msp file to respective cell 1698 if polarity == "Positive Mode": 1699 update_msp_file = ( 1700 sa.update(biological_standards_table) 1701 .where((biological_standards_table.c.chromatography == chromatography) 1702 & (biological_standards_table.c.name == bio_standard)) 1703 .values(num_pos_features=len(features_dict), 1704 pos_bio_msp_file=filename) 1705 ) 1706 elif polarity == "Negative Mode": 1707 update_msp_file = ( 1708 sa.update(biological_standards_table) 1709 .where((biological_standards_table.c.chromatography == chromatography) 1710 & (biological_standards_table.c.name == bio_standard)) 1711 .values(num_neg_features=len(features_dict), 1712 neg_bio_msp_file=filename) 1713 ) 1714 1715 # Execute UPDATE of MSP file location 1716 connection.execute(update_msp_file) 1717 1718 # Adding MSP for internal standards 1719 else: 1720 1721 # Get internal_standards table 1722 internal_standards_table = sa.Table("internal_standards", db_metadata, autoload=True) 1723 1724 # Prepare DELETE of old internal standards 1725 delete_old_internal_standards = ( 1726 sa.delete(internal_standards_table) 1727 .where((internal_standards_table.c.chromatography == chromatography) 1728 & (internal_standards_table.c.polarity == polarity)) 1729 ) 1730 1731 # Execute DELETE 1732 connection.execute(delete_old_internal_standards) 1733 1734 # Execute INSERT of each internal standard into internal_standards table 1735 for feature in features_dict: 1736 insert_feature = internal_standards_table.insert().values( 1737 {"name": features_dict[feature]["Name"], 1738 "chromatography": chromatography, 1739 "polarity": polarity, 1740 "precursor_mz": features_dict[feature]["Precursor m/z"], 1741 "retention_time": features_dict[feature]["Retention time"], 1742 "ms2_spectrum": features_dict[feature]["MS2 spectrum"], 1743 "inchikey": features_dict[feature]["INCHIKEY"]}) 1744 connection.execute(insert_feature) 1745 1746 # Get "chromatography" table 1747 chromatography_table = sa.Table("chromatography_methods", db_metadata, autoload=True) 1748 1749 # Write location of msp file to respective cell 1750 if polarity == "Positive Mode": 1751 update_msp_file = ( 1752 sa.update(chromatography_table) 1753 .where(chromatography_table.c.method_id == chromatography) 1754 .values(num_pos_standards=len(features_dict), 1755 pos_istd_msp_file=filename) 1756 ) 1757 elif polarity == "Negative Mode": 1758 update_msp_file = ( 1759 sa.update(chromatography_table) 1760 .where(chromatography_table.c.method_id == chromatography) 1761 .values(num_neg_standards=len(features_dict), 1762 neg_istd_msp_file=filename) 1763 ) 1764 1765 # Execute UPDATE of MSP file location 1766 connection.execute(update_msp_file) 1767 1768 # If the corresponding TXT library existed, delete it 1769 txt_library = os.path.join(methods_directory, filename.replace(".msp", ".txt")) 1770 os.remove(txt_library) if os.path.exists(txt_library) else None 1771 1772 # Close the connection 1773 connection.close() 1774 1775 1776def add_csv_to_database(csv_file, chromatography, polarity): 1777 1778 """ 1779 Parses compounds from a CSV file into the Settings database. 1780 1781 Parses compounds from a CSV into the "internal_standards" table, and stores 1782 the location of the pos/neg TXT files in "chromatography_methods" table. 1783 1784 TODO: The MSP/TXT libraries have standardized names; there is no need to store the filename in the database. 1785 1786 Args: 1787 csv_file (io.StringIO): 1788 In-memory text-stream file object 1789 chromatography (str): 1790 Chromatography method ID (name) 1791 polarity (str): 1792 Polarity for which MSP should be used for ("Positive Mode" or "Negative Mode") 1793 1794 Returns: 1795 None 1796 """ 1797 1798 # Convert CSV file into Python dictionary 1799 df_internal_standards = pd.read_csv(csv_file, index_col=False) 1800 internal_standards_dict = df_internal_standards.to_dict("index") 1801 1802 # Create methods directory if it doesn't already exist 1803 if not os.path.exists(methods_directory): 1804 os.makedirs(methods_directory) 1805 1806 # Name file accordingly 1807 if polarity == "Positive Mode": 1808 filename = chromatography + "_Pos.txt" 1809 elif polarity == "Negative Mode": 1810 filename = chromatography + "_Neg.txt" 1811 1812 txt_file_path = os.path.join(methods_directory, filename) 1813 1814 # Write CSV columns to tab-delimited text file 1815 df_internal_standards.to_csv(txt_file_path, sep="\t", index=False) 1816 1817 # Connect to database 1818 db_metadata, connection = connect_to_database("Settings") 1819 1820 # Get internal_standards table 1821 internal_standards_table = sa.Table("internal_standards", db_metadata, autoload=True) 1822 1823 # Prepare DELETE of old internal standards 1824 delete_old_internal_standards = ( 1825 sa.delete(internal_standards_table) 1826 .where((internal_standards_table.c.chromatography == chromatography) 1827 & (internal_standards_table.c.polarity == polarity)) 1828 ) 1829 1830 # Execute DELETE 1831 connection.execute(delete_old_internal_standards) 1832 1833 # Execute INSERT of each internal standard into internal_standards table 1834 for row in internal_standards_dict.keys(): 1835 insert_standard = internal_standards_table.insert().values( 1836 {"name": internal_standards_dict[row]["Common Name"], 1837 "chromatography": chromatography, 1838 "polarity": polarity, 1839 "precursor_mz": internal_standards_dict[row]["MS1 m/z"], 1840 "retention_time": internal_standards_dict[row]["RT (min)"]}) 1841 connection.execute(insert_standard) 1842 1843 # Get "chromatography" table 1844 chromatography_table = sa.Table("chromatography_methods", db_metadata, autoload=True) 1845 1846 # Write location of CSV file to respective cell 1847 if polarity == "Positive Mode": 1848 update_msp_file = ( 1849 sa.update(chromatography_table) 1850 .where(chromatography_table.c.method_id == chromatography) 1851 .values(num_pos_standards=len(internal_standards_dict), 1852 pos_istd_msp_file=filename) 1853 ) 1854 elif polarity == "Negative Mode": 1855 update_msp_file = ( 1856 sa.update(chromatography_table) 1857 .where(chromatography_table.c.method_id == chromatography) 1858 .values(num_neg_standards=len(internal_standards_dict), 1859 neg_istd_msp_file=filename) 1860 ) 1861 1862 # Execute UPDATE of CSV file location 1863 connection.execute(update_msp_file) 1864 1865 # If the corresponding MSP library existed, delete it 1866 msp_library = os.path.join(methods_directory, filename.replace(".txt", ".msp")) 1867 os.remove(msp_library) if os.path.exists(msp_library) else None 1868 1869 # Close the connection 1870 connection.close() 1871 1872 1873def get_msdial_configurations(): 1874 1875 """ 1876 Returns list of user configurations of MS-DIAL parameters from Settings database. 1877 """ 1878 1879 engine = sa.create_engine(settings_database) 1880 df_msdial_configurations = pd.read_sql("SELECT * FROM msdial_parameters", engine) 1881 return df_msdial_configurations["config_name"].astype(str).tolist() 1882 1883 1884def generate_msdial_parameters_file(chromatography, polarity, msp_file_path, bio_standard=None): 1885 1886 """ 1887 Uses parameters from user-curated MS-DIAL configuration to create a parameters.txt file for MS-DIAL. 1888 1889 TODO: Currently, this function is only called upon a new job setup. To allow changes during a QC job, 1890 this function should be called every time the user makes a configuration save in Settings > MS-DIAL Configurations. 1891 1892 Args: 1893 chromatography (str): 1894 Chromatography method ID (name) 1895 polarity (str): 1896 Polarity ("Positive" or "Negative") 1897 msp_file_path (str): 1898 MSP library file path 1899 bio_standard (str, default None): 1900 Specifies that the parameters file is for a biological standard 1901 1902 Returns: 1903 None 1904 """ 1905 1906 # Get parameters of selected configuration 1907 if bio_standard is not None: 1908 df_bio_standards = get_biological_standards() 1909 df_bio_standards = df_bio_standards.loc[ 1910 (df_bio_standards["chromatography"] == chromatography) & (df_bio_standards["name"] == bio_standard)] 1911 config_name = df_bio_standards["msdial_config_id"].astype(str).values[0] 1912 else: 1913 df_methods = get_chromatography_methods() 1914 df_methods = df_methods.loc[df_methods["method_id"] == chromatography] 1915 config_name = df_methods["msdial_config_id"].astype(str).values[0] 1916 1917 parameters = get_msdial_configuration_parameters(config_name) 1918 1919 # Create "methods" directory if it does not exist 1920 if not os.path.exists(methods_directory): 1921 os.makedirs(methods_directory) 1922 1923 # Name parameters file accordingly 1924 if bio_standard is not None: 1925 if polarity == "Positive": 1926 filename = bio_standard.replace(" ", "_") + "_" + config_name.replace(" ", "_") + "_Parameters_Pos.txt" 1927 elif polarity == "Negative": 1928 filename = bio_standard.replace(" ", "_") + "_" + config_name.replace(" ", "_") + "_Parameters_Neg.txt" 1929 else: 1930 if polarity == "Positive": 1931 filename = chromatography.replace(" ", "_") + "_" + config_name.replace(" ", "_") + "_Parameters_Pos.txt" 1932 elif polarity == "Negative": 1933 filename = chromatography.replace(" ", "_") + "_" + config_name.replace(" ", "_") + "_Parameters_Neg.txt" 1934 1935 parameters_file = os.path.join(methods_directory, filename) 1936 1937 # Some specifications based on polarity / file type for the parameters 1938 if polarity == "Positive": 1939 adduct_type = "[M+H]+" 1940 elif polarity == "Negative": 1941 adduct_type = "[M-H]-" 1942 1943 if msp_file_path.endswith(".msp"): 1944 filepath = "MSP file: " + msp_file_path 1945 elif msp_file_path.endswith(".txt"): 1946 filepath = "Text file: " + msp_file_path 1947 1948 # Text file contents 1949 lines = [ 1950 "#Data type", 1951 "MS1 data type: Centroid", 1952 "MS2 data type: Centroid", 1953 "Ion mode: " + polarity, 1954 "DIA file:", "\n" 1955 1956 "#Data collection parameters", 1957 "Retention time begin: " + str(parameters[0]), 1958 "Retention time end: " + str(parameters[1]), 1959 "Mass range begin: " + str(parameters[2]), 1960 "Mass range end: " + str(parameters[3]), "\n", 1961 1962 "#Centroid parameters", 1963 "MS1 tolerance for centroid: " + str(parameters[4]), 1964 "MS2 tolerance for centroid: " + str(parameters[5]), "\n", 1965 1966 "#Peak detection parameters", 1967 "Smoothing method: " + str(parameters[6]), 1968 "Smoothing level: " + str(parameters[7]), 1969 "Minimum peak width: " + str(parameters[8]), 1970 "Minimum peak height: " + str(parameters[9]), 1971 "Mass slice width: " + str(parameters[10]), "\n", 1972 1973 "#Deconvolution parameters", 1974 "Sigma window value: 0.5", 1975 "Amplitude cut off: 0", "\n", 1976 1977 "#Adduct list", 1978 "Adduct list: " + adduct_type, "\n", 1979 1980 "#Text file and post identification (retention time and accurate mass based) setting", 1981 filepath, 1982 "Retention time tolerance for post identification: " + str(parameters[11]), 1983 "Accurate ms1 tolerance for post identification: " + str(parameters[12]), 1984 "Post identification score cut off: " + str(parameters[13]), "\n", 1985 1986 "#Alignment parameters setting", 1987 "Retention time tolerance for alignment: " + str(parameters[14]), 1988 "MS1 tolerance for alignment: " + str(parameters[15]), 1989 "Retention time factor for alignment: " + str(parameters[16]), 1990 "MS1 factor for alignment: " + str(parameters[17]), 1991 "Peak count filter: " + str(parameters[18]), 1992 "QC at least filter: " + str(parameters[19]), 1993 ] 1994 1995 # Write parameters to a text file 1996 with open(parameters_file, "w") as file: 1997 for line in lines: 1998 file.write(line) 1999 if line != "\n": 2000 file.write("\n") 2001 2002 # Write path of parameters text file to chromatography method in database 2003 db_metadata, connection = connect_to_database("Settings") 2004 chromatography_table = sa.Table("chromatography_methods", db_metadata, autoload=True) 2005 biological_standards_table = sa.Table("biological_standards", db_metadata, autoload=True) 2006 2007 # For processing biological standard samples 2008 if bio_standard is not None: 2009 if polarity == "Positive": 2010 update_parameter_file = ( 2011 sa.update(biological_standards_table) 2012 .where((biological_standards_table.c.chromatography == chromatography) 2013 & (biological_standards_table.c.name == bio_standard)) 2014 .values(pos_parameter_file=parameters_file) 2015 ) 2016 elif polarity == "Negative": 2017 update_parameter_file = ( 2018 sa.update(biological_standards_table) 2019 .where((biological_standards_table.c.chromatography == chromatography) 2020 & (biological_standards_table.c.name == bio_standard)) 2021 .values(neg_parameter_file=parameters_file) 2022 ) 2023 # For processing samples with internal standards 2024 else: 2025 if polarity == "Positive": 2026 update_parameter_file = ( 2027 sa.update(chromatography_table) 2028 .where(chromatography_table.c.method_id == chromatography) 2029 .values(pos_parameter_file=parameters_file) 2030 ) 2031 elif polarity == "Negative": 2032 update_parameter_file = ( 2033 sa.update(chromatography_table) 2034 .where(chromatography_table.c.method_id == chromatography) 2035 .values(neg_parameter_file=parameters_file) 2036 ) 2037 2038 connection.execute(update_parameter_file) 2039 connection.close() 2040 2041 2042def add_msdial_configuration(msdial_config_name): 2043 2044 """ 2045 Inserts new user configuration of MS-DIAL parameters into the "msdial_parameters" table in Settings database. 2046 2047 Args: 2048 msdial_config_name (str): MS-DIAL configuration ID 2049 2050 Returns: 2051 None 2052 """ 2053 2054 # Connect to database 2055 db_metadata, connection = connect_to_database("Settings") 2056 2057 # Get MS-DIAL parameters table 2058 msdial_parameters_table = sa.Table("msdial_parameters", db_metadata, autoload=True) 2059 2060 # Prepare insert of user-inputted run data 2061 insert_config = msdial_parameters_table.insert().values( 2062 {"config_name": msdial_config_name, 2063 "rt_begin": 0, 2064 "rt_end": 100, 2065 "mz_begin": 0, 2066 "mz_end": 2000, 2067 "ms1_centroid_tolerance": 0.008, 2068 "ms2_centroid_tolerance": 0.01, 2069 "smoothing_method": "LinearWeightedMovingAverage", 2070 "smoothing_level": 3, 2071 "min_peak_width": 3, 2072 "min_peak_height": 35000, 2073 "mass_slice_width": 0.1, 2074 "post_id_rt_tolerance": 0.3, 2075 "post_id_mz_tolerance": 0.008, 2076 "post_id_score_cutoff": 85, 2077 "alignment_rt_tolerance": 0.05, 2078 "alignment_mz_tolerance": 0.008, 2079 "alignment_rt_factor": 0.5, 2080 "alignment_mz_factor": 0.5, 2081 "peak_count_filter": 0, 2082 "qc_at_least_filter": "True"} 2083 ) 2084 2085 # Execute INSERT to database, then close the connection 2086 connection.execute(insert_config) 2087 connection.close() 2088 2089 2090def remove_msdial_configuration(msdial_config_name): 2091 2092 """ 2093 Deletes user configuration of MS-DIAL parameters from the "msdial_parameters" table. 2094 2095 Args: 2096 msdial_config_name (str): MS-DIAL configuration ID 2097 2098 Returns: 2099 None 2100 """ 2101 2102 # Connect to database 2103 db_metadata, connection = connect_to_database("Settings") 2104 2105 # Get MS-DIAL parameters table 2106 msdial_parameters_table = sa.Table("msdial_parameters", db_metadata, autoload=True) 2107 2108 # Prepare DELETE of MS-DIAL configuration 2109 delete_config = ( 2110 sa.delete(msdial_parameters_table) 2111 .where(msdial_parameters_table.c.config_name == msdial_config_name) 2112 ) 2113 2114 # Execute DELETE, then close the connection 2115 connection.execute(delete_config) 2116 connection.close() 2117 2118 2119def get_msdial_configuration_parameters(msdial_config_name, parameter=None): 2120 2121 """ 2122 Returns tuple of parameters defined for a selected MS-DIAL configuration. 2123 2124 TODO: The MS-DIAL configuration is returned as a tuple for a concise implementation of get_msdial_parameters_for_config() 2125 in the DashWebApp module. While convenient there, this function is not optimal for maintainability. Should return 2126 the entire DataFrame record instead. 2127 2128 See update_msdial_configuration() for details on parameters. 2129 2130 Args: 2131 msdial_config_name (str): 2132 MS-DIAL configuration ID 2133 parameter (str, default None): 2134 If specified, returns only the value for the given parameter 2135 2136 Returns: 2137 Tuple of parameters for the given MS-DIAL configuration, or single parameter value. 2138 """ 2139 2140 # Get "msdial_parameters" table from database as a DataFrame 2141 engine = sa.create_engine(settings_database) 2142 df_configurations = pd.read_sql("SELECT * FROM msdial_parameters", engine) 2143 2144 # Get selected configuration 2145 selected_config = df_configurations.loc[ 2146 df_configurations["config_name"] == msdial_config_name] 2147 2148 selected_config.drop(["id", "config_name"], inplace=True, axis=1) 2149 2150 if parameter is not None: 2151 return selected_config[parameter].values[0] 2152 else: 2153 return tuple(selected_config.to_records(index=False)[0]) 2154 2155 2156def update_msdial_configuration(config_name, rt_begin, rt_end, mz_begin, mz_end, ms1_centroid_tolerance, 2157 ms2_centroid_tolerance, smoothing_method, smoothing_level, mass_slice_width, min_peak_width, min_peak_height, 2158 post_id_rt_tolerance, post_id_mz_tolerance, post_id_score_cutoff, alignment_rt_tolerance, alignment_mz_tolerance, 2159 alignment_rt_factor, alignment_mz_factor, peak_count_filter, qc_at_least_filter): 2160 2161 """ 2162 Updates and saves changes of all parameters for a selected MS-DIAL configuration. 2163 2164 For details on MS-DIAL parameters, see: https://mtbinfo-team.github.io/mtbinfo.github.io/MS-DIAL/tutorial#section-2-3 2165 2166 Args: 2167 config_name (str): 2168 Name / ID of MS-DIAL configuration 2169 rt_begin (int): 2170 Minimum retention time in RT range for analysis range 2171 rt_end (int): 2172 Maximum retention time in RT range for analysis 2173 mz_begin (float): 2174 Minimum precursor mass in m/z range for analysis range 2175 mz_end (float): 2176 Maximum precursor mass in m/z range for analysis range 2177 ms1_centroid_tolerance (float): 2178 MS1 centroid tolerance 2179 ms2_centroid_tolerance (float): 2180 MS2 centroid tolerance 2181 smoothing_method (str): 2182 Peak smoothing method for peak detection 2183 smoothing_level (int): 2184 Peak smoothing level 2185 mass_slice_width (float): 2186 Mass slice width 2187 min_peak_width (int): 2188 Minimum peak width threshold 2189 min_peak_height (int): 2190 Minimum peak height threshold 2191 post_id_rt_tolerance (float): 2192 Post-identification retention time tolerance 2193 post_id_mz_tolerance (float): 2194 Post-identification precursor m/z tolerance 2195 post_id_score_cutoff (int): 2196 Similarity score cutoff after peak identification 2197 alignment_rt_tolerance (float): 2198 Post-alignment retention time tolerance 2199 alignment_mz_tolerance (float): 2200 Post-alignment precursor m/z tolerance 2201 alignment_rt_factor (float): 2202 Post-alignment retention time factor 2203 alignment_mz_factor (float): 2204 Post-alignment precursor m/z tolerance 2205 peak_count_filter (int): 2206 Peak count filter 2207 qc_at_least_filter (str): 2208 QC at least filter 2209 2210 Returns: 2211 None 2212 """ 2213 2214 # Connect to database 2215 db_metadata, connection = connect_to_database("Settings") 2216 2217 # Get MS-DIAL parameters table 2218 msdial_parameters_table = sa.Table("msdial_parameters", db_metadata, autoload=True) 2219 2220 # Prepare insert of user-inputted MS-DIAL parameters 2221 update_parameters = ( 2222 sa.update(msdial_parameters_table) 2223 .where(msdial_parameters_table.c.config_name == config_name) 2224 .values(rt_begin=rt_begin, 2225 rt_end=rt_end, 2226 mz_begin=mz_begin, 2227 mz_end=mz_end, 2228 ms1_centroid_tolerance=ms1_centroid_tolerance, 2229 ms2_centroid_tolerance=ms2_centroid_tolerance, 2230 smoothing_method=smoothing_method, 2231 smoothing_level=smoothing_level, 2232 min_peak_width=min_peak_width, 2233 min_peak_height=min_peak_height, 2234 mass_slice_width=mass_slice_width, 2235 post_id_rt_tolerance=post_id_rt_tolerance, 2236 post_id_mz_tolerance=post_id_mz_tolerance, 2237 post_id_score_cutoff=post_id_score_cutoff, 2238 alignment_rt_tolerance=alignment_rt_tolerance, 2239 alignment_mz_tolerance=alignment_mz_tolerance, 2240 alignment_rt_factor=alignment_rt_factor, 2241 alignment_mz_factor=alignment_mz_factor, 2242 peak_count_filter=peak_count_filter, 2243 qc_at_least_filter=qc_at_least_filter) 2244 ) 2245 2246 # Execute UPDATE to database, then close the connection 2247 connection.execute(update_parameters) 2248 connection.close() 2249 2250 2251def get_msp_file_path(chromatography, polarity, bio_standard=None): 2252 2253 """ 2254 Returns file paths of MSPs for a selected chromatography / polarity (both stored 2255 in the methods folder upon user upload) for MS-DIAL parameter file generation. 2256 2257 TODO: Once added to workspace, MSP / TXT library file names are standardized. No need to store / retrieve from database. 2258 Get the file path using the filename e.g. return directory + chromatography + "_" + polarity + ".msp". 2259 2260 Args: 2261 chromatography (str): 2262 Chromatography method ID 2263 polarity (str): 2264 Polarity, either "Positive" or "Negative" 2265 bio_standard (str, default None): 2266 Name of biological standard 2267 2268 Returns: 2269 MSP / TXT library file path. 2270 """ 2271 2272 # Connect to database 2273 engine = sa.create_engine(settings_database) 2274 2275 if bio_standard is not None: 2276 # Get selected biological standard 2277 query = "SELECT * FROM biological_standards WHERE name = '" + bio_standard + "' AND chromatography='" + chromatography + "'" 2278 df_biological_standards = pd.read_sql(query, engine) 2279 2280 # Get file path of MSP in requested polarity 2281 if polarity == "Positive": 2282 msp_file_path = df_biological_standards["pos_bio_msp_file"].astype(str).values[0] 2283 elif polarity == "Negative": 2284 msp_file_path = df_biological_standards["neg_bio_msp_file"].astype(str).values[0] 2285 2286 else: 2287 # Get selected chromatography method 2288 query = "SELECT * FROM chromatography_methods WHERE method_id='" + chromatography + "'" 2289 df_methods = pd.read_sql(query, engine) 2290 2291 # Get file path of MSP in requested polarity 2292 if polarity == "Positive": 2293 msp_file_path = df_methods["pos_istd_msp_file"].astype(str).values[0] 2294 elif polarity == "Negative": 2295 msp_file_path = df_methods["neg_istd_msp_file"].astype(str).values[0] 2296 2297 msp_file_path = os.path.join(methods_directory, msp_file_path) 2298 2299 # Return file path 2300 return msp_file_path 2301 2302 2303def get_parameter_file_path(chromatography, polarity, biological_standard=None): 2304 2305 """ 2306 Returns file path of parameters file stored in database. 2307 2308 TODO: Once generated, MS-DIAL parameter filenames are standardized. No need to store / retrieve from database. 2309 Get the file path using the filename e.g. return directory + chromatography + "_" + polarity + "_Parameters.txt". 2310 2311 Args: 2312 chromatography (str): 2313 Chromatography method ID 2314 polarity (str): 2315 Polarity, either "Positive" or "Negative" 2316 bio_standard (str, default None): 2317 Name of biological standard 2318 2319 Returns: 2320 File path for MS-DIAL parameters.txt file. 2321 """ 2322 2323 engine = sa.create_engine(settings_database) 2324 2325 if biological_standard is not None: 2326 query = "SELECT * FROM biological_standards WHERE chromatography='" + chromatography + \ 2327 "' AND name ='" + biological_standard + "'" 2328 else: 2329 query = "SELECT * FROM chromatography_methods WHERE method_id='" + chromatography + "'" 2330 2331 df = pd.read_sql(query, engine) 2332 2333 if polarity == "Pos": 2334 parameter_file = df["pos_parameter_file"].astype(str).values[0] 2335 elif polarity == "Neg": 2336 parameter_file = df["neg_parameter_file"].astype(str).values[0] 2337 2338 return parameter_file 2339 2340 2341def get_msdial_directory(): 2342 2343 """ 2344 Returns location of MS-DIAL directory. 2345 """ 2346 2347 return get_table("Settings", "workspace")["msdial_directory"].astype(str).values[0] 2348 2349 2350def get_msconvert_directory(): 2351 2352 """ 2353 Returns location of MSConvert directory. 2354 2355 This function uses the MS-DIAL directory path to retrieve user ID, which it then uses to 2356 retrieve the path for MSConvert.exe in C:/Users/<username>/AppData/Local/Apps. 2357 2358 TODO: There is probably a better way to implement this. 2359 2360 Returns: 2361 Location of MSConvert directory in C:/Users/<username>/AppData/Local/Apps/ProteoWizard. 2362 """ 2363 2364 user = get_msdial_directory().replace("\\", "/").split("/")[2] 2365 msconvert_folder = [f.path for f in os.scandir("C:/Users/" + user + "/AppData/Local/Apps/") if f.is_dir() and "ProteoWizard" in f.name][0] 2366 return msconvert_folder 2367 2368 2369def update_msdial_directory(msdial_directory): 2370 2371 """ 2372 Updates location of MS-DIAL directory, stored in "workspace" table of the Settings database. 2373 2374 Args: 2375 msdial_directory (str): New MS-DIAL directory location 2376 2377 Returns: 2378 None 2379 """ 2380 2381 db_metadata, connection = connect_to_database("Settings") 2382 workspace_table = sa.Table("workspace", db_metadata, autoload=True) 2383 2384 update_msdial_directory = ( 2385 sa.update(workspace_table) 2386 .where(workspace_table.c.id == 1) 2387 .values(msdial_directory=msdial_directory) 2388 ) 2389 2390 connection.execute(update_msdial_directory) 2391 connection.close() 2392 2393 2394def get_internal_standards_dict(chromatography, value_type): 2395 2396 """ 2397 Returns dictionary of internal standard keys mapped to either m/z or RT values. 2398 2399 This function is used to establish a y-axis range for internal standard retention time plots. 2400 See load_istd_rt_plot() in the PlotGeneration module. 2401 2402 TODO: This function needs to filter for polarity! 2403 2404 Args: 2405 chromatography (str): 2406 Chromatography method to retrieve internal standards for 2407 value_type (str): 2408 Data type ("precursor_mz", "retention_time", "ms2_spectrum") 2409 2410 Returns: 2411 Dictionary with key-value pairs of { internal_standard: value_type } 2412 """ 2413 2414 engine = sa.create_engine(settings_database) 2415 query = "SELECT * FROM internal_standards " + "WHERE chromatography='" + chromatography + "'" 2416 df_internal_standards = pd.read_sql(query, engine) 2417 2418 dict = {} 2419 keys = df_internal_standards["name"].astype(str).tolist() 2420 values = df_internal_standards[value_type].astype(float).tolist() 2421 2422 for index, key in enumerate(keys): 2423 dict[key] = values[index] 2424 2425 return dict 2426 2427 2428def get_internal_standards(chromatography, polarity): 2429 2430 """ 2431 Returns DataFrame of internal standards for a given chromatography method and polarity. 2432 2433 Args: 2434 chromatography (str): 2435 Chromatography method ID 2436 polarity (str): 2437 Polarity (either "Pos" or "Neg") 2438 2439 Returns: 2440 DataFrame of "internal_standards" table from Settings database, filtered by chromatography and polarity. 2441 """ 2442 2443 if polarity == "Pos": 2444 polarity = "Positive Mode" 2445 elif polarity == "Neg": 2446 polarity = "Negative Mode" 2447 2448 engine = sa.create_engine(settings_database) 2449 2450 query = "SELECT * FROM internal_standards " + \ 2451 "WHERE chromatography='" + chromatography + "' AND polarity='" + polarity + "'" 2452 2453 return pd.read_sql(query, engine) 2454 2455 2456def get_targeted_features(biological_standard, chromatography, polarity): 2457 2458 """ 2459 Returns DataFrame of metabolite targets for a given biological standard, chromatography, and polarity. 2460 2461 Args: 2462 biological_standard (str): 2463 Name of biological standard 2464 chromatography (str): 2465 Chromatography method ID (name) 2466 polarity (str): 2467 Polarity (either "Pos" or "Neg") 2468 2469 Returns: 2470 DataFrame of "targeted_features" table from Settings database, filtered by chromatography and polarity. 2471 """ 2472 2473 if polarity == "Pos": 2474 polarity = "Positive Mode" 2475 elif polarity == "Neg": 2476 polarity = "Negative Mode" 2477 2478 engine = sa.create_engine(settings_database) 2479 2480 query = "SELECT * FROM targeted_features " + \ 2481 "WHERE chromatography='" + chromatography + \ 2482 "' AND polarity='" + polarity + \ 2483 "' AND biological_standard ='" + biological_standard + "'" 2484 2485 return pd.read_sql(query, engine) 2486 2487 2488def get_biological_standards(): 2489 2490 """ 2491 Returns DataFrame of the "biological_standards" table from the Settings database. 2492 """ 2493 2494 # Get table from database as a DataFrame 2495 engine = sa.create_engine(settings_database) 2496 df_biological_standards = pd.read_sql("SELECT * FROM biological_standards", engine) 2497 return df_biological_standards 2498 2499 2500def get_biological_standards_list(): 2501 2502 """ 2503 Returns list of biological standards from the Settings database. 2504 """ 2505 2506 df_biological_standards = get_biological_standards() 2507 return df_biological_standards["name"].astype(str).unique().tolist() 2508 2509 2510def add_biological_standard(name, identifier): 2511 2512 """ 2513 Creates new biological standard with name and identifier. 2514 2515 The biological standard identifier is a text substring used to distinguish between sample and biological standard. 2516 MS-AutoQC checks filenames in the sequence for this identifier to process samples accordingly. 2517 2518 Args: 2519 name (str): 2520 Name of biological standard 2521 identifier (str): 2522 String identifier in filename for biological standard 2523 2524 Returns: 2525 None 2526 """ 2527 2528 # Get list of chromatography methods 2529 chromatography_methods = get_chromatography_methods()["method_id"].tolist() 2530 2531 # Connect to database and get "biological_standards" table 2532 db_metadata, connection = connect_to_database("Settings") 2533 biological_standards_table = sa.Table("biological_standards", db_metadata, autoload=True) 2534 2535 # Insert a biological standard row for each chromatography 2536 for method in chromatography_methods: 2537 insert = biological_standards_table.insert().values({ 2538 "name": name, 2539 "identifier": identifier, 2540 "chromatography": method, 2541 "num_pos_features": 0, 2542 "num_neg_features": 0, 2543 "msdial_config_id": "Default" 2544 }) 2545 connection.execute(insert) 2546 2547 # Close the connection 2548 connection.close() 2549 2550 2551def remove_biological_standard(name): 2552 2553 """ 2554 Deletes biological standard and corresponding MSPs from Settings database. 2555 2556 Args: 2557 name (str): Name of the biological standard 2558 2559 Returns: 2560 None 2561 """ 2562 2563 # Delete corresponding MSPs from "methods" directory 2564 df = get_table("Settings", "biological_standards") 2565 df = df.loc[df["name"] == name] 2566 files_to_delete = df["pos_bio_msp_file"].astype(str).tolist() + df["neg_bio_msp_file"].astype(str).tolist() 2567 2568 for file in os.listdir(methods_directory): 2569 if name in files_to_delete: 2570 os.remove(os.path.join(methods_directory, file)) 2571 2572 # Connect to database and get relevant tables 2573 db_metadata, connection = connect_to_database("Settings") 2574 biological_standards_table = sa.Table("biological_standards", db_metadata, autoload=True) 2575 targeted_features_table = sa.Table("targeted_features", db_metadata, autoload=True) 2576 2577 # Remove biological standard 2578 delete_biological_standard = ( 2579 sa.delete(biological_standards_table) 2580 .where((biological_standards_table.c.name == name)) 2581 ) 2582 connection.execute(delete_biological_standard) 2583 2584 # Remove targeted features for that biological standard 2585 delete_targeted_features = ( 2586 sa.delete(targeted_features_table) 2587 .where((targeted_features_table.c.biological_standard == name)) 2588 ) 2589 connection.execute(delete_targeted_features) 2590 2591 # Close the connection 2592 connection.close() 2593 2594 2595def update_msdial_config_for_bio_standard(biological_standard, chromatography, config_id): 2596 2597 """ 2598 Updates MS-DIAL configuration for given biological standard and chromatography method combination. 2599 2600 Args: 2601 biological_standard (str): 2602 Name of the biological standard 2603 chromatography (str): 2604 Chromatography method 2605 config_id (str): 2606 Name of MS-DIAL configuration to set for this biological standard - chromatography combination 2607 2608 Returns: 2609 None 2610 """ 2611 2612 # Connect to database and get relevant tables 2613 db_metadata, connection = connect_to_database("Settings") 2614 biological_standards_table = sa.Table("biological_standards", db_metadata, autoload=True) 2615 2616 # Update MS-DIAL configuration for biological standard 2617 update_msdial_config = ( 2618 sa.update(biological_standards_table) 2619 .where((biological_standards_table.c.name == biological_standard) 2620 & (biological_standards_table.c.chromatography == chromatography)) 2621 .values(msdial_config_id=config_id) 2622 ) 2623 2624 connection.execute(update_msdial_config) 2625 connection.close() 2626 2627 2628def get_biological_standard_identifiers(bio_standards=None): 2629 2630 """ 2631 Returns dictionary of identifiers for a given list of biological standards. 2632 2633 If no list is provided, returns dict of identifiers for all biological standards. 2634 2635 Args: 2636 bio_standards (list, default None): List of biological standards 2637 2638 Returns: 2639 Dictionary with key-value pairs of { identifier: biological_standard } 2640 """ 2641 2642 df_bio_standards = get_biological_standards() 2643 2644 identifiers = {} 2645 2646 if bio_standards is not None: 2647 if len(bio_standards) > 0: 2648 for bio_standard in bio_standards: 2649 df = df_bio_standards.loc[df_bio_standards["name"] == bio_standard] 2650 identifier = df["identifier"].astype(str).unique().tolist()[0] 2651 identifiers[identifier] = bio_standard 2652 else: 2653 names = df_bio_standards["name"].astype(str).unique().tolist() 2654 ids = df_bio_standards["identifier"].astype(str).unique().tolist() 2655 for index, name in enumerate(names): 2656 identifiers[ids[index]] = names[index] 2657 2658 return identifiers 2659 2660 2661def get_qc_configurations(): 2662 2663 """ 2664 Returns DataFrame of "qc_parameters" table from Settings database. 2665 """ 2666 2667 engine = sa.create_engine(settings_database) 2668 return pd.read_sql("SELECT * FROM qc_parameters", engine) 2669 2670 2671def get_qc_configurations_list(): 2672 2673 """ 2674 Returns list of names of QC configurations from Settings database. 2675 """ 2676 2677 return get_qc_configurations()["config_name"].astype(str).tolist() 2678 2679 2680def add_qc_configuration(qc_config_name): 2681 2682 """ 2683 Adds a new QC configuration to the "qc_parameters" table in the Settings database. 2684 2685 Args: 2686 qc_config_name (str): Name of the QC configuration 2687 2688 Returns: 2689 None 2690 """ 2691 2692 # Connect to database 2693 db_metadata, connection = connect_to_database("Settings") 2694 2695 # Get QC parameters table 2696 qc_parameters_table = sa.Table("qc_parameters", db_metadata, autoload=True) 2697 2698 # Prepare insert of user-inputted run data 2699 insert_config = qc_parameters_table.insert().values( 2700 {"config_name": qc_config_name, 2701 "intensity_dropouts_cutoff": 4, 2702 "library_rt_shift_cutoff": 0.1, 2703 "in_run_rt_shift_cutoff": 0.05, 2704 "library_mz_shift_cutoff": 0.005, 2705 "intensity_enabled": True, 2706 "library_rt_enabled": True, 2707 "in_run_rt_enabled": True, 2708 "library_mz_enabled": True} 2709 ) 2710 2711 # Execute INSERT to database, then close the connection 2712 connection.execute(insert_config) 2713 connection.close() 2714 2715 2716def remove_qc_configuration(qc_config_name): 2717 2718 """ 2719 Deletes QC configuration from the "qc_parameters" table in the Settings database. 2720 2721 Args: 2722 qc_config_name (str): Name of the QC configuration 2723 2724 Returns: 2725 None 2726 """ 2727 2728 # Connect to database 2729 db_metadata, connection = connect_to_database("Settings") 2730 2731 # Get QC parameters table 2732 qc_parameters_table = sa.Table("qc_parameters", db_metadata, autoload=True) 2733 2734 # Prepare DELETE of MS-DIAL configuration 2735 delete_config = ( 2736 sa.delete(qc_parameters_table) 2737 .where(qc_parameters_table.c.config_name == qc_config_name) 2738 ) 2739 2740 # Execute DELETE, then close the connection 2741 connection.execute(delete_config) 2742 connection.close() 2743 2744 2745def get_qc_configuration_parameters(config_name=None, instrument_id=None, run_id=None): 2746 2747 """ 2748 Returns DataFrame of parameters for a selected QC configuration. 2749 2750 The DataFrame has columns for each parameter, as well as for whether the parameter is enabled. 2751 2752 Args: 2753 config_name (str, default None): 2754 Name of QC configuration 2755 instrument_id (str, default None): 2756 Instrument ID (name) 2757 run_id (str, default None): 2758 Instrument run ID (job ID) 2759 2760 Returns: 2761 DataFrame of parameters for QC configuration. 2762 """ 2763 2764 df_configurations = get_table("Settings", "qc_parameters") 2765 2766 # Get selected configuration 2767 if config_name is not None: 2768 selected_config = df_configurations.loc[df_configurations["config_name"] == config_name] 2769 2770 elif instrument_id is not None and run_id is not None: 2771 df_runs = get_table(instrument_id, "runs") 2772 config_name = df_runs.loc[df_runs["run_id"] == run_id]["qc_config_id"].values[0] 2773 selected_config = df_configurations.loc[ 2774 df_configurations["config_name"] == config_name] 2775 2776 selected_config.drop(inplace=True, columns=["id", "config_name"]) 2777 2778 # Probably not the most efficient way to do this... 2779 for column in ["intensity_enabled", "library_rt_enabled", "in_run_rt_enabled", "library_mz_enabled"]: 2780 selected_config.loc[selected_config[column] == 1, column] = True 2781 selected_config.loc[selected_config[column] == 0, column] = False 2782 2783 # Return parameters of selected configuration as a tuple 2784 return selected_config 2785 2786 2787def update_qc_configuration(config_name, intensity_dropouts_cutoff, library_rt_shift_cutoff, in_run_rt_shift_cutoff, 2788 library_mz_shift_cutoff, intensity_enabled, library_rt_enabled, in_run_rt_enabled, library_mz_enabled): 2789 2790 """ 2791 Updates parameters for the given QC configuration. 2792 2793 Due to the database schema, booleans are stored as integers: 0 for False and 1 for True. They need to be 2794 cast back to booleans in get_qc_configuration_parameters(). A schema change would remove the bloat. 2795 2796 Args: 2797 config_name (str): 2798 Name of QC configuration 2799 intensity_dropouts_cutoff (int): 2800 Minimum number of internal standard intensity dropouts to constitute a QC fail 2801 library_rt_shift_cutoff (float): 2802 Maximum shift from library RT values to constitute a QC fail 2803 in_run_rt_shift_cutoff (float): 2804 Maximum shift from in-run RT values to constitute a QC fail 2805 library_mz_shift_cutoff (float): 2806 Maximum shift from library m/z values to constitute a QC fail 2807 intensity_enabled (bool): 2808 Enables / disables QC check for intensity dropout cutoffs 2809 library_rt_enabled (bool): 2810 Enables / disables QC check for library RT shifts 2811 in_run_rt_enabled (bool): 2812 Enables / disables QC check for in-run RT shifts 2813 library_mz_enabled (bool): 2814 Enables / disables QC check for library m/z shifts 2815 2816 Returns: 2817 None 2818 """ 2819 2820 # Connect to database 2821 db_metadata, connection = connect_to_database("Settings") 2822 2823 # Get QC parameters table 2824 qc_parameters_table = sa.Table("qc_parameters", db_metadata, autoload=True) 2825 2826 # Prepare insert of user-inputted QC parameters 2827 update_parameters = ( 2828 sa.update(qc_parameters_table) 2829 .where(qc_parameters_table.c.config_name == config_name) 2830 .values(intensity_dropouts_cutoff=intensity_dropouts_cutoff, 2831 library_rt_shift_cutoff=library_rt_shift_cutoff, 2832 in_run_rt_shift_cutoff=in_run_rt_shift_cutoff, 2833 library_mz_shift_cutoff=library_mz_shift_cutoff, 2834 intensity_enabled=intensity_enabled, 2835 library_rt_enabled=library_rt_enabled, 2836 in_run_rt_enabled=in_run_rt_enabled, 2837 library_mz_enabled=library_mz_enabled) 2838 ) 2839 2840 # Execute UPDATE to database, then close the connection 2841 connection.execute(update_parameters) 2842 connection.close() 2843 2844 2845def get_samples_in_run(instrument_id, run_id, sample_type="Both"): 2846 2847 """ 2848 Returns DataFrame of samples for a given instrument run from instrument database. 2849 2850 Args: 2851 instrument_id (str): 2852 Instrument ID 2853 run_id (str): 2854 Instrument run ID (job ID) 2855 sample_type (str): 2856 Sample type, either "Sample" or "Biological Standard" or "Both" 2857 2858 Returns: 2859 DataFrame of sample tables for a given instrument run. 2860 """ 2861 2862 if sample_type == "Sample": 2863 df = get_table(instrument_id, "sample_qc_results") 2864 2865 elif sample_type == "Biological Standard": 2866 df = get_table(instrument_id, "bio_qc_results") 2867 2868 elif sample_type == "Both": 2869 df_samples = get_table(instrument_id, "sample_qc_results") 2870 df_bio_standards = get_table(instrument_id, "bio_qc_results") 2871 df_bio_standards.drop(columns=["biological_standard"], inplace=True) 2872 df = df_bio_standards.append(df_samples, ignore_index=True) 2873 2874 return df.loc[df["run_id"] == run_id] 2875 2876 2877def get_samples_from_csv(instrument_id, run_id, sample_type="Both"): 2878 2879 """ 2880 Returns DataFrame of samples in a given run using CSV files from Google Drive. 2881 2882 CSV files of the run metadata, samples, and biological standards tables are stored 2883 in the ../data/Instrument_ID_Run_ID/csv directory, and removed on job completion. 2884 2885 Args: 2886 instrument_id (str): 2887 Instrument ID 2888 run_id (str): 2889 Instrument run ID (job ID) 2890 sample_type (str): 2891 Sample type, either "Sample" or "Biological Standard" or "Both" 2892 2893 Returns: 2894 DataFrame of samples for a given instrument run. 2895 """ 2896 2897 id = instrument_id.replace(" ", "_") + "_" + run_id 2898 csv_directory = os.path.join(data_directory, id, "csv") 2899 2900 samples_csv = os.path.join(csv_directory, "samples.csv") 2901 bio_standards_csv = os.path.join(csv_directory, "bio_standards.csv") 2902 2903 if sample_type == "Sample": 2904 df = pd.read_csv(samples_csv, index_col=False) 2905 2906 elif sample_type == "Biological Standard": 2907 df = pd.read_csv(bio_standards_csv, index_col=False) 2908 2909 elif sample_type == "Both": 2910 df_samples = pd.read_csv(samples_csv, index_col=False) 2911 df_bio_standards = pd.read_csv(bio_standards_csv, index_col=False) 2912 df_bio_standards.drop(columns=["biological_standard"], inplace=True) 2913 df = df_bio_standards.append(df_samples, ignore_index=True) 2914 2915 df = df.loc[df["run_id"] == run_id] 2916 2917 try: 2918 df.drop(columns=["id"], inplace=True) 2919 finally: 2920 return df 2921 2922 2923def get_next_sample(sample_id, instrument_id, run_id): 2924 2925 """ 2926 Returns sample following the given sample, or None if last sample. 2927 2928 Args: 2929 sample_id (str): 2930 Sample ID 2931 instrument_id (str): 2932 Instrument ID 2933 run_id (str): 2934 Instrument run ID (job ID) 2935 2936 Returns: 2937 str: The next sample in the instrument run after the given sample ID, or None if last sample. 2938 """ 2939 2940 # Get list of samples in run 2941 samples = get_samples_in_run(instrument_id, run_id, "Both")["sample_id"].astype(str).tolist() 2942 2943 # Find sample in list 2944 sample_index = samples.index(sample_id) 2945 next_sample_index = sample_index + 1 2946 2947 # Return next sample 2948 if next_sample_index != len(samples): 2949 return samples[next_sample_index] 2950 else: 2951 return None 2952 2953 2954def get_remaining_samples(instrument_id, run_id): 2955 2956 """ 2957 Returns list of samples remaining in a given instrument run (QC job). 2958 2959 TODO: This function should just return the samples with null values in the "qc_result" column. 2960 The "latest_sample" value in the "runs" table may be unreliable. 2961 2962 Args: 2963 instrument_id (str): 2964 Instrument ID 2965 run_id (str): 2966 Instrument run ID (job ID) 2967 2968 Returns: 2969 list: List of samples remaining in a QC job. 2970 """ 2971 2972 # Get last processed sample in run 2973 df_run = get_instrument_run(instrument_id, run_id) 2974 latest_sample = df_run["latest_sample"].astype(str).values[0] 2975 2976 # Get list of samples in run 2977 samples = get_samples_in_run(instrument_id, run_id, "Both")["sample_id"].astype(str).tolist() 2978 2979 # Return all samples if beginning of run 2980 if latest_sample == "None": 2981 return samples 2982 2983 # Get index of latest sample 2984 latest_sample_index = samples.index(latest_sample) 2985 2986 # Return list of samples starting at latest sample 2987 return samples[latest_sample_index:len(samples)] 2988 2989 2990def get_unprocessed_samples(instrument_id, run_id): 2991 2992 """ 2993 For an active run, returns 1) a list of samples that were not processed due to error / runtime termination, 2994 and 2) the current sample being monitored / processed. 2995 2996 Args: 2997 instrument_id (str): 2998 Instrument ID 2999 run_id (str): 3000 Instrument run ID (job ID) 3001 3002 Returns: 3003 tuple: List of unprocessed samples for the given instrument run, and current sample being monitored / processed. 3004 """ 3005 3006 # Get samples in run 3007 df_samples = get_samples_in_run(instrument_id, run_id, "Both") 3008 3009 # Get list of samples in run 3010 samples = df_samples["sample_id"].astype(str).tolist() 3011 3012 # Construct dictionary of unprocessed samples in instrument run 3013 df_unprocessed_samples = df_samples.loc[df_samples["qc_result"].isnull()] 3014 unprocessed_samples = df_unprocessed_samples["sample_id"].astype(str).tolist() 3015 3016 # Get acquisition path, data files, and data file extension 3017 acquisition_path = get_acquisition_path(instrument_id, run_id) 3018 extension = get_data_file_type(instrument_id) 3019 directory_files = os.listdir(acquisition_path) 3020 data_files = [file.split(".")[0] for file in directory_files if file.split(".")[0] in unprocessed_samples] 3021 3022 # Mark acquired data files 3023 df_unprocessed_samples.loc[ 3024 df_unprocessed_samples["sample_id"].isin(data_files), "found"] = "Found" 3025 unprocessed_samples = df_unprocessed_samples.dropna(subset=["found"])["sample_id"].astype(str).tolist() 3026 3027 # Get current sample 3028 if len(unprocessed_samples) > 0: 3029 current_sample = unprocessed_samples[-1] 3030 del unprocessed_samples[-1] 3031 else: 3032 current_sample = None 3033 3034 # Return as tuple 3035 return unprocessed_samples, current_sample 3036 3037 3038def get_current_sample(instrument_id, run_id): 3039 3040 """ 3041 Returns the current sample being monitored / processed. 3042 3043 TODO: The "latest_sample" is the last sample to be processed. Nomenclature needs to be updated in many places. 3044 3045 Args: 3046 instrument_id (str): 3047 Instrument ID 3048 run_id (str): 3049 Instrument run ID (job ID) 3050 3051 Returns: 3052 str: Current sample being monitored / processed. 3053 """ 3054 3055 # Get latest sample in run 3056 df_run = get_instrument_run(instrument_id, run_id) 3057 latest_sample = df_run["latest_sample"].astype(str).values[0] 3058 3059 # Return second sample if beginning of run 3060 if latest_sample == "None": 3061 return samples[1] 3062 3063 3064def parse_internal_standard_data(instrument_id, run_id, result_type, polarity, load_from, as_json=True): 3065 3066 """ 3067 Parses data from database into JSON-ified DataFrame for samples (as rows) vs. internal standards (as columns). 3068 3069 Data is stored in a column (for example, "retention_time") as a single-record string dict with the following structure: 3070 3071 | Sample | iSTD 1 | iSTD 2 | ... | 3072 | ---------- | ------ | ------ | ... | 3073 | SAMPLE_001 | 1.207 | 1.934 | ... | 3074 3075 These records are concatenated together with this function using pd.DataFrame(), which is 100x faster than pd.concat(). 3076 3077 Args: 3078 instrument_id (str): 3079 Instrument ID 3080 run_id (str): 3081 Instrument run ID (job ID) 3082 result_type (str): 3083 Column in sample_qc_results table to parse (either "retention_time" or "precursor_mz" or "intensity") 3084 polarity (str): 3085 Polarity ("Pos" or "Neg") 3086 load_from (str): 3087 Specifies whether to load data from CSV file (during Google Drive sync of active run) or instrument database 3088 as_json (bool, default True): 3089 Whether to return table as JSON string or as DataFrame 3090 3091 Returns: 3092 DataFrame of samples (rows) vs. internal standards (columns) as JSON string. 3093 """ 3094 3095 # Get relevant QC results table from database 3096 if load_from == "database" or load_from == "processing": 3097 df_samples = get_samples_in_run(instrument_id, run_id, "Sample") 3098 elif load_from == "csv": 3099 df_samples = get_samples_from_csv(instrument_id, run_id, "Sample") 3100 3101 # Filter by polarity 3102 df_samples = df_samples.loc[df_samples["polarity"] == polarity] 3103 sample_ids = df_samples["sample_id"].astype(str).tolist() 3104 3105 # Return None if results are None 3106 if load_from == "processing": 3107 if len(df_samples[result_type].dropna()) == 0: 3108 return None 3109 3110 # Initialize DataFrame with individual records of sample data 3111 results = df_samples[result_type].astype(str).tolist() 3112 results = [ast.literal_eval(result) if result != "None" and result != "nan" else {} for result in results] 3113 df_results = pd.DataFrame(results) 3114 df_results.drop(columns=["Name"], inplace=True) 3115 df_results["Sample"] = sample_ids 3116 3117 # Return DataFrame as JSON string 3118 if as_json: 3119 return df_results.to_json(orient="records") 3120 else: 3121 return df_results 3122 3123 3124def parse_biological_standard_data(instrument_id, run_id, result_type, polarity, biological_standard, load_from, as_json=True): 3125 3126 """ 3127 Parses biological standard data into JSON-ified DataFrame of targeted features (as columns) vs. instrument runs (as rows). 3128 3129 The bio_qc_results table in the instrument database is first filtered by biological standard, chromatography, and polarity. 3130 Then, the sample name is replaced with the instrument run it was associated with. 3131 3132 Data is stored in a column (for example, "intensity") as a single-record string dict with the following structure: 3133 3134 | Name | Metabolite 1 | Metabolite 2 | ... | 3135 | ------------------- | ------------ | ------------ | ... | 3136 | INSTRUMENT_RUN_001 | 13597340 | 53024853 | ... | 3137 3138 These records are concatenated together with this function using pd.DataFrame(), which is 100x faster than pd.concat(). 3139 3140 | Name | Metabolite 1 | Metabolite 2 | ... | 3141 | ------------------- | ------------ | ------------ | ... | 3142 | INSTRUMENT_RUN_001 | 13597340 | 53024853 | ... | 3143 | INSTRUMENT_RUN_002 | 23543246 | 102030406 | ... | 3144 | ... | ... | ... | ... | 3145 3146 Args: 3147 instrument_id (str): 3148 Instrument ID 3149 run_id (str): 3150 Instrument run ID (job ID) 3151 result_type (str): 3152 Column in bio_qc_results table to parse (either "retention_time" or "precursor_mz" or "intensity") 3153 polarity (str): 3154 Polarity ("Pos" or "Neg") 3155 biological_standard (str): 3156 Name of biological standard 3157 load_from (str): 3158 Specifies whether to load data from CSV file (during Google Drive sync of active run) or instrument database 3159 as_json (bool, default True): 3160 Whether to return table as JSON string or as DataFrame 3161 3162 Returns: 3163 JSON-ified DataFrame of targeted features for a biological standard (columns) vs. instrument runs (rows). 3164 """ 3165 3166 # Get relevant QC results table from database 3167 if load_from == "database": 3168 df_samples = get_table(instrument_id, "bio_qc_results") 3169 elif load_from == "csv": 3170 id = instrument_id.replace(" ", "_") + "_" + run_id 3171 bio_standards_csv = os.path.join(data_directory, id, "csv", "bio_standards.csv") 3172 df_samples = pd.read_csv(bio_standards_csv, index_col=False) 3173 3174 # Filter by biological standard type 3175 df_samples = df_samples.loc[df_samples["biological_standard"] == biological_standard] 3176 3177 # Filter by polarity 3178 df_samples = df_samples.loc[df_samples["polarity"] == polarity] 3179 3180 # Filter by instrument 3181 df_runs = get_table(instrument_id, "runs") 3182 chromatography = df_runs.loc[df_runs["run_id"] == run_id]["chromatography"].values[0] 3183 3184 # Filter by chromatography 3185 run_ids = df_runs.loc[df_runs["chromatography"] == chromatography]["run_id"].astype(str).tolist() 3186 df_samples = df_samples.loc[df_samples["run_id"].isin(run_ids)] 3187 run_ids = df_samples["run_id"].astype(str).tolist() 3188 3189 # Initialize DataFrame with individual records of sample data 3190 results = df_samples[result_type].fillna('{}').tolist() 3191 results = [ast.literal_eval(result) if result != "None" and result != "nan" else {} for result in results] 3192 df_results = pd.DataFrame(results) 3193 df_results["Name"] = run_ids 3194 3195 # Return DataFrame as JSON string 3196 if as_json: 3197 return df_results.to_json(orient="records") 3198 else: 3199 return df_results 3200 3201 3202def parse_internal_standard_qc_data(instrument_id, run_id, polarity, result_type, load_from, as_json=True): 3203 3204 """ 3205 Parses QC data into JSON-ified DataFrame for samples (as rows) vs. internal standards (as columns). 3206 3207 The QC DataFrame is stored in the "qc_dataframe" column as a single-record string dict with the following structure: 3208 3209 | Sample | Delta m/z | Delta RT | In-run delta RT | Warnings | Fails | 3210 | ---------- | --------- | -------- | --------------- | -------- | ----- | 3211 | SAMPLE_001 | 0.000001 | 0.001 | 0.00001 | None | None | 3212 3213 These records are concatenated together with this function using pd.DataFrame(), which is 100x faster than pd.concat(). 3214 3215 Args: 3216 instrument_id (str): 3217 Instrument ID 3218 run_id (str): 3219 Instrument run ID (job ID) 3220 polarity (str): 3221 Polarity ("Pos" or "Neg") 3222 result_type (str): 3223 Column in sample_qc_results table to parse (either "retention_time" or "precursor_mz" or "intensity") 3224 load_from (str): 3225 Specifies whether to load data from CSV file (during Google Drive sync of active run) or instrument database 3226 as_json (bool, default True): 3227 Whether to return table as JSON string or as DataFrame 3228 3229 Returns: 3230 JSON-ified DataFrame of QC data for samples (as rows) vs. internal standards (as columns). 3231 """ 3232 3233 # Get relevant QC results table from database 3234 if load_from == "database" or load_from == "processing": 3235 df_samples = get_samples_in_run(instrument_id, run_id, "Sample") 3236 elif load_from == "csv": 3237 df_samples = get_samples_from_csv(instrument_id, run_id, "Sample") 3238 3239 # Filter by polarity 3240 df_samples = df_samples.loc[df_samples["polarity"] == polarity] 3241 3242 # For results DataFrame, each index corresponds to the result type 3243 get_result_index = { 3244 "Delta m/z": 0, 3245 "Delta RT": 1, 3246 "In-run delta RT": 2, 3247 "Intensity dropout": 3, 3248 "Warnings": 4, 3249 "Fails": 5 3250 } 3251 3252 # Get list of results using result type 3253 sample_ids = df_samples["sample_id"].astype(str).tolist() 3254 results = df_samples["qc_dataframe"].fillna('[{}, {}, {}, {}, {}, {}]').astype(str).tolist() 3255 3256 type_index = get_result_index[result_type] 3257 results = [ast.literal_eval(result)[type_index] for result in results] 3258 df_results = pd.DataFrame(results) 3259 df_results.drop(columns=["Name"], inplace=True) 3260 df_results["Sample"] = sample_ids 3261 3262 # Return DataFrame as JSON string 3263 if as_json: 3264 return df_results.to_json(orient="records") 3265 else: 3266 return df_results 3267 3268 3269def get_workspace_users_list(): 3270 3271 """ 3272 Returns a list of users that have access to the MS-AutoQC workspace. 3273 """ 3274 3275 return get_table("Settings", "gdrive_users")["email_address"].astype(str).tolist() 3276 3277 3278def add_user_to_workspace(email_address): 3279 3280 """ 3281 Gives user access to workspace in Google Drive and stores email address in database. 3282 3283 Access is granted by sharing the MS-AutoQC folder in Google Drive with the user's Google account. 3284 3285 Args: 3286 email_address (str): Email address for Google account to grant access to workspace. 3287 3288 Returns: 3289 None 3290 """ 3291 3292 if email_address in get_workspace_users_list(): 3293 return "User already exists" 3294 3295 # Get Google Drive instance 3296 drive = get_drive_instance() 3297 3298 # Get ID of MS-AutoQC folder in Google Drive 3299 gdrive_folder_id = get_drive_folder_id() 3300 3301 if gdrive_folder_id is not None: 3302 # Add user access by updating permissions 3303 folder = drive.CreateFile({"id": gdrive_folder_id}) 3304 permission = folder.InsertPermission({ 3305 "type": "user", 3306 "role": "writer", 3307 "value": email_address}) 3308 3309 # Insert user email address in "gdrive_users" table 3310 db_metadata, connection = connect_to_database("Settings") 3311 gdrive_users_table = sa.Table("gdrive_users", db_metadata, autoload=True) 3312 3313 insert_user_email = gdrive_users_table.insert().values( 3314 {"name": permission["name"], 3315 "email_address": email_address, 3316 "permission_id": permission["id"]}) 3317 3318 connection.execute(insert_user_email) 3319 connection.close() 3320 3321 else: 3322 return "Error" 3323 3324 3325def delete_user_from_workspace(email_address): 3326 3327 """ 3328 Removes user access to workspace in Google Drive and deletes email from database. 3329 3330 Args: 3331 email_address (str): Email address for Google account whose access will to be revoked. 3332 3333 Returns: 3334 None 3335 """ 3336 3337 if email_address not in get_workspace_users_list(): 3338 return "User does not exist" 3339 3340 # Get Google Drive instance 3341 drive = get_drive_instance() 3342 3343 # Get ID of MS-AutoQC folder in Google Drive 3344 gdrive_folder_id = get_drive_folder_id() 3345 3346 if gdrive_folder_id is not None: 3347 # Get permission ID of user from database 3348 folder = drive.CreateFile({"id": gdrive_folder_id}) 3349 df_gdrive_users = get_table("Settings", "gdrive_users") 3350 df_gdrive_users = df_gdrive_users.loc[df_gdrive_users["email_address"] == email_address] 3351 permission_id = df_gdrive_users["permission_id"].astype(str).values[0] 3352 3353 # Delete user access by updating permissions 3354 folder.DeletePermission(permission_id) 3355 3356 # Delete user email address in "gdrive_users" table 3357 db_metadata, connection = connect_to_database("Settings") 3358 gdrive_users_table = sa.Table("gdrive_users", db_metadata, autoload=True) 3359 3360 delete_user_email = ( 3361 sa.delete(gdrive_users_table) 3362 .where((gdrive_users_table.c.email_address == email_address)) 3363 ) 3364 3365 connection.execute(delete_user_email) 3366 connection.close() 3367 3368 else: 3369 return "Error" 3370 3371 3372def get_qc_results(instrument_id, sample_list, is_bio_standard=False): 3373 3374 """ 3375 Returns DataFrame of QC results for a given sample list. 3376 3377 TODO: This function will break if samples in different runs have the same sample ID. Add run ID filter. 3378 3379 Args: 3380 instrument_id (str): 3381 Instrument ID 3382 sample_list (list): 3383 List of samples to query 3384 is_bio_standard (bool, default False): 3385 Whether the list is biological standards (True) or samples (False) 3386 3387 Returns: 3388 DataFrame of QC results for a given sample list. 3389 """ 3390 3391 if len(sample_list) == 0: 3392 return pd.DataFrame() 3393 3394 database = get_database_file(instrument_id=instrument_id, sqlite_conn=True) 3395 engine = sa.create_engine(database) 3396 3397 sample_list = str(sample_list).replace("[", "(").replace("]", ")") 3398 3399 if is_bio_standard: 3400 query = "SELECT sample_id, qc_result FROM bio_qc_results WHERE sample_id in " + sample_list 3401 else: 3402 query = "SELECT sample_id, qc_result FROM sample_qc_results WHERE sample_id in " + sample_list 3403 3404 return pd.read_sql(query, engine) 3405 3406 3407def create_workspace_metadata(): 3408 3409 """ 3410 Creates record in "workspace" table to store various metadata. 3411 """ 3412 3413 db_metadata, connection = connect_to_database("Settings") 3414 workspace_table = sa.Table("workspace", db_metadata, autoload=True) 3415 connection.execute(workspace_table.insert().values({"id": 1})) 3416 connection.close() 3417 3418 3419def get_device_identity(): 3420 3421 """ 3422 Returns device identity (either an Instrument ID or "Shared user"). 3423 """ 3424 3425 return get_table("Settings", "workspace")["instrument_identity"].astype(str).tolist()[0] 3426 3427 3428def set_device_identity(is_instrument_computer, instrument_id): 3429 3430 """ 3431 Indicates whether the user's device is the instrument PC or not. 3432 3433 Args: 3434 is_instrument_computer (bool): 3435 Whether the device is an instrument computer or not 3436 instrument_id (str): 3437 Instrument ID (if None, set to "Shared user") 3438 3439 Returns: 3440 None 3441 """ 3442 3443 if not is_instrument_computer: 3444 instrument_id = "Shared user" 3445 3446 db_metadata, connection = connect_to_database("Settings") 3447 workspace_table = sa.Table("workspace", db_metadata, autoload=True) 3448 3449 update_identity = ( 3450 sa.update(workspace_table) 3451 .where(workspace_table.c.id == 1) 3452 .values( 3453 is_instrument_computer=is_instrument_computer, 3454 instrument_identity=instrument_id 3455 ) 3456 ) 3457 3458 connection.execute(update_identity) 3459 connection.close() 3460 3461 3462def run_is_on_instrument_pc(instrument_id, run_id): 3463 3464 """ 3465 Validates that the current device is the instrument PC on which the run was started. 3466 3467 TODO: Use this function in PlotGeneration and DashWebApp module. 3468 3469 Args: 3470 instrument_id (str): 3471 Instrument ID 3472 run_id (str): 3473 Instrument run ID 3474 3475 Returns: 3476 True if instrument run was started on the current device, and False if not. 3477 """ 3478 3479 instrument_id = get_instrument_run(instrument_id, run_id)["instrument_id"].astype(str).tolist()[0] 3480 device_identity = get_table("Settings", "workspace")["instrument_identity"].astype(str).tolist()[0] 3481 3482 if instrument_id == device_identity: 3483 return True 3484 else: 3485 return False 3486 3487 3488def update_slack_bot_token(slack_bot_token): 3489 3490 """ 3491 Updates Slack bot user OAuth 2.0 token in "workspace" table of Settings database. 3492 3493 For details on the Slack API, see: https://slack.dev/python-slack-sdk/ 3494 3495 Args: 3496 slack_bot_token (str): Slack bot user OAuth token 3497 3498 Returns: 3499 None 3500 """ 3501 3502 db_metadata, connection = connect_to_database("Settings") 3503 workspace_table = sa.Table("workspace", db_metadata, autoload=True) 3504 3505 update_slack_bot_token = ( 3506 sa.update(workspace_table) 3507 .where(workspace_table.c.id == 1) 3508 .values(slack_bot_token=slack_bot_token) 3509 ) 3510 3511 connection.execute(update_slack_bot_token) 3512 connection.close() 3513 3514 3515def get_slack_bot_token(): 3516 3517 """ 3518 Returns Slack bot token stored in "workspace" table of Settings database. 3519 """ 3520 3521 return get_table("Settings", "workspace")["slack_bot_token"].astype(str).values[0] 3522 3523 3524def update_slack_channel(slack_channel, notifications_enabled): 3525 3526 """ 3527 Updates Slack channel registered for notifications in "workspace" table of Settings database. 3528 3529 Args: 3530 slack_channel (str): 3531 Slack channel to post messages to 3532 notifications_enabled (bool): 3533 Whether to send Slack notifications for QC warnings and fails 3534 3535 Returns: 3536 None 3537 """ 3538 3539 db_metadata, connection = connect_to_database("Settings") 3540 workspace_table = sa.Table("workspace", db_metadata, autoload=True) 3541 3542 update_slack_channel = ( 3543 sa.update(workspace_table) 3544 .where(workspace_table.c.id == 1) 3545 .values( 3546 slack_channel=slack_channel.replace("#", ""), 3547 slack_enabled=notifications_enabled) 3548 ) 3549 3550 connection.execute(update_slack_channel) 3551 connection.close() 3552 3553 3554def get_slack_channel(): 3555 3556 """ 3557 Returns Slack channel registered for notifications. 3558 """ 3559 3560 return get_table("Settings", "workspace")["slack_channel"].astype(str).values[0] 3561 3562 3563def get_slack_notifications_toggled(): 3564 3565 """ 3566 Returns Slack notification toggle setting. 3567 """ 3568 3569 try: 3570 return get_table("Settings", "workspace")["slack_enabled"].astype(int).tolist()[0] 3571 except: 3572 return None 3573 3574 3575def get_email_notifications_list(as_string=False): 3576 3577 """ 3578 Returns list of emails registered for email notifications for QC warnings and fails. 3579 3580 Args: 3581 as_string (bool, default False): 3582 Whether to return the list as a string (for Gmail API) or as list object (for display in Settings page) 3583 3584 Returns: 3585 List of emails registered for QC warning/fail notifications. 3586 """ 3587 3588 email_list = get_table("Settings", "email_notifications")["email_address"].astype(str).tolist() 3589 3590 if as_string: 3591 email_list_string = "" 3592 3593 for email in email_list: 3594 email_list_string += email 3595 if email != email_list[-1]: 3596 email_list_string += "," 3597 3598 return email_list_string 3599 3600 else: 3601 return email_list 3602 3603 3604def register_email_for_notifications(email_address): 3605 3606 """ 3607 Inserts email address into "email_notifications" table in Settings database. 3608 3609 Args: 3610 email_address (str): Email address to register for notifications. 3611 3612 Returns: 3613 None 3614 """ 3615 3616 db_metadata, connection = connect_to_database("Settings") 3617 email_notifications_table = sa.Table("email_notifications", db_metadata, autoload=True) 3618 3619 insert_email_address = email_notifications_table.insert().values({ 3620 "email_address": email_address 3621 }) 3622 3623 connection.execute(insert_email_address) 3624 connection.close() 3625 3626 3627def delete_email_from_notifications(email_address): 3628 3629 """ 3630 Deletes email address from "email_notifications" table in Settings database. 3631 3632 Args: 3633 email_address (str): Email address to unsubscribe from notifications. 3634 3635 Returns: 3636 None 3637 """ 3638 3639 db_metadata, connection = connect_to_database("Settings") 3640 email_notifications_table = sa.Table("email_notifications", db_metadata, autoload=True) 3641 3642 delete_email_address = ( 3643 sa.delete(email_notifications_table) 3644 .where((email_notifications_table.c.email_address == email_address)) 3645 ) 3646 3647 connection.execute(delete_email_address) 3648 connection.close() 3649 3650 3651def get_completed_samples_count(instrument_id, run_id, status): 3652 3653 """ 3654 Returns tuple containing count for completed samples and total samples in a given instrument run. 3655 3656 Args: 3657 instrument_id (str): 3658 Instrument ID 3659 run_id (str): 3660 Instrument run ID (job ID) 3661 status (str): 3662 Instrument run (QC job) status, either "Active" or "Complete" 3663 3664 Returns: 3665 Tuple with number of completed samples and total samples for a given instrument run. 3666 """ 3667 3668 if status == "Active" and sync_is_enabled(): 3669 if get_device_identity() == instrument_id: 3670 df_instrument_run = get_instrument_run(instrument_id, run_id) 3671 else: 3672 df_instrument_run = get_instrument_run_from_csv(instrument_id, run_id) 3673 else: 3674 df_instrument_run = get_instrument_run(instrument_id, run_id) 3675 3676 completed = df_instrument_run["completed"].astype(int).tolist()[0] 3677 total_samples = df_instrument_run["samples"].astype(int).tolist()[0] 3678 return (completed, total_samples) 3679 3680 3681def get_run_progress(instrument_id, run_id, status): 3682 3683 """ 3684 Returns progress of instrument run as a percentage of samples completed. 3685 3686 Args: 3687 instrument_id (str): 3688 Instrument ID 3689 run_id (str): 3690 Instrument run ID (job ID) 3691 status (str): 3692 Instrument run (QC job) status, either "Active" or "Complete" 3693 3694 Returns: 3695 float: Percent of samples processed for the given instrument run. 3696 """ 3697 3698 completed, total_samples = get_completed_samples_count(instrument_id, run_id, status) 3699 percent_complete = (completed / total_samples) * 100 3700 return round(percent_complete, 1) 3701 3702 3703def update_sample_counters_for_run(instrument_id, run_id, latest_sample): 3704 3705 """ 3706 Increments "completed" count, as well as "pass" and "fail" counts accordingly. 3707 3708 TODO: The "latest_sample" is the last sample to be processed / completed. 3709 Nomenclature should be updated for clarity. 3710 3711 Args: 3712 instrument_id (str): 3713 Instrument ID 3714 run_id (str): 3715 Instrument run ID (job ID) 3716 latest_sample (str): 3717 Last sample to be processed 3718 3719 Returns: 3720 None 3721 """ 3722 3723 df = get_samples_in_run(instrument_id, run_id, "Both") 3724 3725 try: 3726 passes = int(df["qc_result"].value_counts()["Pass"]) 3727 except: 3728 passes = 0 3729 3730 try: 3731 warnings = int(df["qc_result"].value_counts()["Warning"]) 3732 except: 3733 warnings = 0 3734 3735 try: 3736 fails = int(df["qc_result"].value_counts()["Fail"]) 3737 except: 3738 fails = 0 3739 3740 completed = passes + fails 3741 3742 db_metadata, connection = connect_to_database(instrument_id) 3743 instrument_runs_table = sa.Table("runs", db_metadata, autoload=True) 3744 3745 update_status = ( 3746 sa.update(instrument_runs_table) 3747 .where(instrument_runs_table.c.run_id == run_id) 3748 .values( 3749 completed=completed, 3750 passes=passes, 3751 fails=fails, 3752 latest_sample=latest_sample 3753 ) 3754 ) 3755 3756 connection.execute(update_status) 3757 connection.close() 3758 3759 3760def mark_run_as_completed(instrument_id, run_id): 3761 3762 """ 3763 Marks instrument run status as completed. 3764 3765 Args: 3766 instrument_id (str): 3767 Instrument ID 3768 run_id (str): 3769 Instrument run ID (job ID) 3770 3771 Returns: 3772 None 3773 """ 3774 3775 db_metadata, connection = connect_to_database(instrument_id) 3776 instrument_runs_table = sa.Table("runs", db_metadata, autoload=True) 3777 3778 update_status = ( 3779 sa.update(instrument_runs_table) 3780 .where(instrument_runs_table.c.run_id == run_id) 3781 .values(status="Complete") 3782 ) 3783 3784 connection.execute(update_status) 3785 connection.close() 3786 3787 3788def skip_sample(instrument_id, run_id): 3789 3790 """ 3791 Skips sample by setting "latest_sample" value for instrument run to the next sample. 3792 3793 This function was used after restarting the acquisition listener when MS-DIAL got stuck processing a corrupted file. 3794 Now that MS-DIAL runs in the background, it is deprecated and should be removed. 3795 3796 Args: 3797 instrument_id (str): 3798 Instrument ID 3799 run_id (str): 3800 Instrument run ID (job ID) 3801 3802 Returns: 3803 None 3804 """ 3805 3806 # Get next sample 3807 samples = get_remaining_samples(instrument_id, run_id) 3808 next_sample = samples[1] 3809 3810 # Set latest sample to next sample 3811 db_metadata, connection = connect_to_database(instrument_id) 3812 instrument_runs_table = sa.Table("runs", db_metadata, autoload=True) 3813 3814 connection.execute(( 3815 sa.update(instrument_runs_table) 3816 .where(instrument_runs_table.c.run_id == run_id) 3817 .values(latest_sample=next_sample) 3818 )) 3819 3820 connection.close() 3821 3822 3823def store_pid(instrument_id, run_id, pid): 3824 3825 """ 3826 Stores acquisition listener subprocess ID to allow for checkup and termination. 3827 3828 Args: 3829 instrument_id (str): 3830 Instrument ID 3831 run_id (str): 3832 Instrument run ID (job ID) 3833 pid (str): 3834 Process ID for acquisition listener subprocess 3835 3836 Returns: 3837 None 3838 """ 3839 3840 db_metadata, connection = connect_to_database(instrument_id) 3841 instrument_runs_table = sa.Table("runs", db_metadata, autoload=True) 3842 3843 update_pid = ( 3844 sa.update(instrument_runs_table) 3845 .where(instrument_runs_table.c.run_id == run_id) 3846 .values(pid=pid) 3847 ) 3848 3849 connection.execute(update_pid) 3850 connection.close() 3851 3852 3853def get_pid(instrument_id, run_id): 3854 3855 """ 3856 Retrieves acquisition listener process ID from "runs" table in Settings database. 3857 3858 Args: 3859 instrument_id (str): 3860 Instrument ID 3861 run_id (str): 3862 Instrument run ID (job ID) 3863 3864 Returns: 3865 None 3866 """ 3867 3868 try: 3869 return get_instrument_run(instrument_id, run_id)["pid"].astype(int).tolist()[0] 3870 except: 3871 return None 3872 3873 3874def upload_to_google_drive(file_dict): 3875 3876 """ 3877 Uploads files to MS-AutoQC folder in Google Drive. 3878 3879 Args: 3880 file_dict (dict): 3881 Dictionary with key-value structure { filename : file path } 3882 3883 Returns: 3884 dict: Dictionary with key-value structure { filename : Google Drive ID } 3885 """ 3886 3887 # Get Google Drive instance 3888 drive = get_drive_instance() 3889 3890 # Get Google Drive ID for the MS-AutoQC folder 3891 folder_id = get_drive_folder_id() 3892 3893 # Store Drive ID's of uploaded file(s) 3894 drive_ids = {} 3895 3896 # Validate Google Drive folder ID 3897 if folder_id is not None: 3898 if folder_id != "None" and folder_id != "": 3899 3900 # Upload each file to Google Drive 3901 for filename in file_dict.keys(): 3902 if os.path.exists(file_dict[filename]): 3903 metadata = { 3904 "title": filename, 3905 "parents": [{"id": folder_id}], 3906 } 3907 file = drive.CreateFile(metadata=metadata) 3908 file.SetContentFile(file_dict[filename]) 3909 file.Upload() 3910 3911 drive_ids[file["title"]] = file["id"] 3912 3913 return drive_ids 3914 3915 3916def upload_qc_results(instrument_id, run_id): 3917 3918 """ 3919 Uploads QC results for a given instrument run to Google Drive as CSV files. 3920 3921 Args: 3922 instrument_id (str): 3923 Instrument ID 3924 run_id (str): 3925 Instrument run ID (job ID) 3926 3927 Returns: 3928 None 3929 """ 3930 3931 id = instrument_id.replace(" ", "_") + "_" + run_id 3932 3933 # Get Google Drive instance 3934 drive = get_drive_instance() 3935 3936 # Define file names and file paths 3937 run_filename = "run.csv" 3938 samples_csv_filename = "samples.csv" 3939 bio_standards_csv_filename = "bio_standards.csv" 3940 3941 run_directory = os.path.join(data_directory, id) 3942 if not os.path.exists(run_directory): 3943 os.makedirs(run_directory) 3944 3945 csv_directory = os.path.join(run_directory, "csv") 3946 if not os.path.exists(csv_directory): 3947 os.makedirs(csv_directory) 3948 3949 run_csv_path = os.path.join(csv_directory, run_filename) 3950 samples_csv_path = os.path.join(csv_directory, samples_csv_filename) 3951 bio_standards_csv_path = os.path.join(csv_directory, bio_standards_csv_filename) 3952 3953 # Convert sample and biological standard QC results from database into CSV files 3954 df_run = get_instrument_run(instrument_id, run_id) 3955 df_run.to_csv(run_csv_path, index=False) 3956 3957 df_samples = get_samples_in_run(instrument_id=instrument_id, run_id=run_id, sample_type="Sample") 3958 if len(df_samples) > 0: 3959 df_samples.to_csv(samples_csv_path, index=False) 3960 3961 df_bio_standards = get_table(instrument_id, "bio_qc_results") 3962 if len(df_bio_standards) > 0: 3963 df_bio_standards.to_csv(bio_standards_csv_path, index=False) 3964 3965 # Compress CSV files into a ZIP archive for faster upload 3966 zip_filename = id + ".zip" 3967 zip_file_path = zip_csv_files( 3968 input_directory=csv_directory, output_directory_and_name=os.path.join(run_directory, id)) 3969 3970 zip_file = {zip_filename: zip_file_path} 3971 3972 # Get Google Drive ID for the CSV files ZIP archive 3973 zip_file_drive_id = get_instrument_run(instrument_id, run_id)["drive_id"].tolist()[0] 3974 3975 # Update existing ZIP archive in Google Drive 3976 if zip_file_drive_id is not None: 3977 3978 file = drive.CreateFile({ 3979 "id": zip_file_drive_id, 3980 "title": zip_filename, 3981 }) 3982 3983 # Execute upload 3984 file.SetContentFile(zip_file_path) 3985 file.Upload() 3986 3987 # If zip file Drive ID does not exist, 3988 else: 3989 3990 # Upload CSV files ZIP archive to Google Drive for first time 3991 drive_id = upload_to_google_drive(zip_file)[zip_filename] 3992 3993 # Store Drive ID of ZIP file in local database 3994 db_metadata, connection = connect_to_database(instrument_id) 3995 runs_table = sa.Table("runs", db_metadata, autoload=True) 3996 3997 connection.execute(( 3998 sa.update(runs_table) 3999 .where(runs_table.c.run_id == run_id) 4000 .values(drive_id=drive_id) 4001 )) 4002 4003 connection.close() 4004 4005 4006def download_qc_results(instrument_id, run_id): 4007 4008 """ 4009 Downloads CSV files of QC results from Google Drive and stores in /data directory. 4010 4011 Args: 4012 instrument_id (str): 4013 Instrument ID 4014 run_id (str): 4015 Instrument run ID (job ID) 4016 4017 Returns: 4018 tuple: Paths of run.csv, samples.csv, and bio_standards.csv, respectively. 4019 """ 4020 4021 id = instrument_id.replace(" ", "_") + "_" + run_id 4022 4023 # Get Google Drive instance 4024 drive = get_drive_instance() 4025 4026 # Initialize directories 4027 run_directory = os.path.join(data_directory, id) 4028 if not os.path.exists(run_directory): 4029 os.makedirs(run_directory) 4030 4031 csv_directory = os.path.join(run_directory, "csv") 4032 if not os.path.exists(csv_directory): 4033 os.makedirs(csv_directory) 4034 4035 # Zip file 4036 zip_filename = id + ".zip" 4037 zip_file_path = os.path.join(run_directory, zip_filename) 4038 4039 # Get Google Drive folder ID 4040 gdrive_folder_id = get_drive_folder_id() 4041 4042 # Find and download ZIP archive of CSV files from Google Drive 4043 for file in drive.ListFile({"q": "'" + gdrive_folder_id + "' in parents and trashed=false"}).GetList(): 4044 if file["title"] == zip_filename: 4045 os.chdir(run_directory) 4046 file.GetContentFile(file["title"]) 4047 os.chdir(root_directory) 4048 break 4049 4050 # Unzip archive 4051 unzip_csv_files(zip_file_path, csv_directory) 4052 4053 # Define and return file paths 4054 run_csv = os.path.join(csv_directory, "run.csv") 4055 samples_csv = os.path.join(csv_directory, "samples.csv") 4056 bio_standards_csv_file = os.path.join(csv_directory, "bio_standards.csv") 4057 4058 return (run_csv, samples_csv, bio_standards_csv_file) 4059 4060 4061def get_drive_folder_id(): 4062 4063 """ 4064 Returns Google Drive ID for the MS-AutoQC folder (found in user's root Drive directory). 4065 """ 4066 4067 return get_table("Settings", "workspace")["gdrive_folder_id"].values[0] 4068 4069 4070def get_database_drive_id(instrument_id): 4071 4072 """ 4073 Returns Google Drive ID for a given instrument's database. 4074 4075 Args: 4076 instrument_id (str): Instrument ID 4077 4078 Returns: 4079 str: Google Drive ID for the instrument database ZIP archive. 4080 """ 4081 4082 df = get_table("Settings", "instruments") 4083 return df.loc[df["name"] == instrument_id]["drive_id"].values[0] 4084 4085 4086def upload_database(instrument_id, sync_settings=False): 4087 4088 """ 4089 Uploads database file and methods directory to Google Drive as ZIP archives. 4090 4091 Args: 4092 instrument_id (str): 4093 Instrument ID for the instrument database to upload 4094 sync_settings (bool, default False): 4095 Whether to upload methods directory as well 4096 4097 Returns: 4098 str: Timestamp upon upload completion. 4099 """ 4100 4101 # Get Google Drive ID's for the MS-AutoQC folder and database file 4102 gdrive_folder_id = get_drive_folder_id() 4103 instrument_db_file_id = get_database_drive_id(instrument_id) 4104 4105 # Get Google Drive instance 4106 drive = get_drive_instance() 4107 4108 # Vacuum database to optimize size 4109 execute_vacuum(instrument_id) 4110 4111 # Upload methods directory to Google Drive 4112 if sync_settings == True: 4113 upload_methods() 4114 4115 # Upload database to Google Drive 4116 if gdrive_folder_id is not None and instrument_db_file_id is not None: 4117 4118 # Upload zipped database 4119 zip_database(instrument_id=instrument_id) 4120 file = drive.CreateFile( 4121 {"id": instrument_db_file_id, "title": instrument_id.replace(" ", "_") + ".zip"}) 4122 file.SetContentFile(get_database_file(instrument_id, zip=True)) 4123 file.Upload() 4124 4125 # Save modifiedDate of database file 4126 remember_last_modified(database=instrument_id, modified_date=file["modifiedDate"]) 4127 4128 else: 4129 return None 4130 4131 return time.strftime("%H:%M:%S") 4132 4133 4134def download_database(instrument_id, sync_settings=False): 4135 4136 """ 4137 Downloads instrument database ZIP file from Google Drive. 4138 4139 This function is called when accessing an instrument database from a device other than the given instrument. 4140 4141 Args: 4142 instrument_id (str): 4143 Instrument ID for the instrument database to download 4144 sync_settings (bool, default False): 4145 Whether to download methods directory as well 4146 4147 Returns: 4148 str: Timestamp upon download completion. 4149 """ 4150 4151 db_zip_file = instrument_id.replace(" ", "_") + ".zip" 4152 4153 # If the database was not modified by another instrument, skip download (for instruments only) 4154 if not database_was_modified(instrument_id): 4155 return None 4156 4157 # Get Google Drive instance 4158 drive = get_drive_instance() 4159 4160 # Get Google Drive ID's for the MS-AutoQC folder and database file 4161 gdrive_folder_id = get_drive_folder_id() 4162 instrument_db_file_id = get_instrument(instrument_id)["drive_id"].values[0] 4163 4164 # If Google Drive folder is found, look for database next 4165 if gdrive_folder_id is not None and instrument_db_file_id is not None: 4166 4167 # Download newly added / modified MSP files in MS-AutoQC > methods 4168 if sync_settings == True: 4169 download_methods(skip_check=True) 4170 4171 try: 4172 for file in drive.ListFile({"q": "'" + gdrive_folder_id + "' in parents and trashed=false"}).GetList(): 4173 if file["title"] == db_zip_file: 4174 4175 # Download and unzip database 4176 os.chdir(data_directory) # Change to data directory 4177 file.GetContentFile(file["title"]) # Download database and get file ID 4178 os.chdir(root_directory) # Return to root directory 4179 unzip_database(instrument_id=instrument_id) # Unzip database 4180 4181 # Save modifiedDate of database file 4182 remember_last_modified(database=instrument_id, modified_date=file["modifiedDate"]) 4183 4184 except Exception as error: 4185 print("Error downloading database from Google Drive:", error) 4186 return None 4187 else: 4188 return None 4189 4190 return time.strftime("%H:%M:%S") 4191 4192 4193def upload_methods(): 4194 4195 """ 4196 Uploads methods directory ZIP archive to Google Drive. 4197 """ 4198 4199 df_workspace = get_table("Settings", "workspace") 4200 methods_zip_file_id = df_workspace["methods_zip_file_id"].values[0] 4201 4202 # Vacuum database to optimize size 4203 execute_vacuum("Settings") 4204 4205 # Get Google Drive instance 4206 drive = get_drive_instance() 4207 4208 # Upload methods ZIP archive to Google Drive 4209 if methods_zip_file_id is not None: 4210 4211 # Upload zipped database 4212 methods_zip_file = zip_methods() 4213 file = drive.CreateFile({"id": methods_zip_file_id, "title": "methods.zip"}) 4214 file.SetContentFile(methods_zip_file) 4215 file.Upload() 4216 4217 # Save modifiedDate of methods ZIP file 4218 remember_last_modified(database="Settings", modified_date=file["modifiedDate"]) 4219 4220 else: 4221 return None 4222 4223 4224def download_methods(skip_check=False): 4225 4226 """ 4227 Downloads methods directory ZIP archive from Google Drive. 4228 4229 Args: 4230 skip_check (bool, default False): If True, skips checking whether database was modified 4231 4232 Returns: 4233 None 4234 """ 4235 4236 # If the database was not modified by another instrument, skip download (for instruments only) 4237 if not skip_check: 4238 if not database_was_modified("Settings"): 4239 return None 4240 4241 # Get device identity 4242 instrument_bool = is_instrument_computer() 4243 device_identity = get_device_identity() 4244 4245 # Get MS-DIAL directory 4246 try: 4247 msdial_directory = get_msdial_directory() 4248 except: 4249 msdial_directory = None 4250 4251 # Get Google Drive instance 4252 drive = get_drive_instance() 4253 4254 # Get Google Drive folder ID 4255 gdrive_folder_id = get_drive_folder_id() 4256 4257 try: 4258 # Download and unzip methods directory 4259 for file in drive.ListFile({"q": "'" + gdrive_folder_id + "' in parents and trashed=false"}).GetList(): 4260 if file["title"] == "methods.zip": 4261 os.chdir(data_directory) # Change to data directory 4262 file.GetContentFile(file["title"]) # Download methods ZIP archive 4263 os.chdir(root_directory) # Return to root directory 4264 unzip_methods() # Unzip methods directory 4265 4266 # Save modifiedDate of methods directory 4267 remember_last_modified(database="Settings", modified_date=file["modifiedDate"]) 4268 4269 except Exception as error: 4270 print("Error downloading methods from Google Drive:", error) 4271 return None 4272 4273 # Update MS-DIAL directory 4274 update_msdial_directory(msdial_directory) 4275 4276 # Update user device identity 4277 set_device_identity(is_instrument_computer=instrument_bool, instrument_id=device_identity) 4278 return time.strftime("%H:%M:%S") 4279 4280 4281def remember_last_modified(database, modified_date): 4282 4283 """ 4284 Stores last modified time of database file in Google Drive. 4285 4286 This function is called after file upload, and used for comparison before download. 4287 4288 Args: 4289 database (str): 4290 Name of database (either Instrument ID or "Settings") 4291 modified_date (str): 4292 Modified date of file uploaded to Google Drive 4293 4294 Returns: 4295 None 4296 """ 4297 4298 db_metadata, connection = connect_to_database("Settings") 4299 instruments_table = sa.Table("instruments", db_metadata, autoload=True) 4300 workspace_table = sa.Table("workspace", db_metadata, autoload=True) 4301 4302 if database == "Settings": 4303 connection.execute(( 4304 sa.update(workspace_table) 4305 .where((workspace_table.c.id == 1)) 4306 .values(methods_last_modified=modified_date) 4307 )) 4308 else: 4309 connection.execute(( 4310 sa.update(instruments_table) 4311 .where((instruments_table.c.name == database)) 4312 .values(last_modified=modified_date) 4313 )) 4314 4315 connection.close() 4316 4317 4318def database_was_modified(database_name): 4319 4320 """ 4321 Returns True if workspace file was modified by another instrument PC in Google Drive, and False if not. 4322 4323 Args: 4324 database_name (str): Name of database 4325 4326 Returns: 4327 Returns True if workspace file was modified by another instrument PC in Google Drive, and False if not. 4328 """ 4329 4330 # Get Google Drive folder ID from database 4331 gdrive_folder_id = get_drive_folder_id() 4332 4333 # Compare "last modified" values 4334 if database_name == "Settings": 4335 local_last_modified = get_table("Settings", "workspace")["methods_last_modified"].values[0] 4336 filename = "methods.zip" 4337 else: 4338 local_last_modified = get_instrument(database_name)["last_modified"].values[0] 4339 filename = database_name.replace(" ", "_") + ".zip" 4340 4341 # Get Google Drive instance 4342 drive = get_drive_instance() 4343 4344 drive_last_modified = None 4345 for file in drive.ListFile({"q": "'" + gdrive_folder_id + "' in parents and trashed=false"}).GetList(): 4346 if file["title"] == filename: 4347 drive_last_modified = file["modifiedDate"] 4348 break 4349 4350 if local_last_modified == drive_last_modified: 4351 return False 4352 else: 4353 return True 4354 4355 4356def send_sync_signal(folder_id): 4357 4358 """ 4359 Uploads empty file to signal that an instrument PC is syncing to Google Drive. 4360 4361 TODO: This method is deprecated. Please remove if no plans for usage. 4362 4363 Args: 4364 folder_id (str): Google Drive folder ID 4365 4366 Returns: 4367 bool: True if sync signal was sent, False if not. 4368 """ 4369 4370 # Get Google Drive instance 4371 drive = get_drive_instance() 4372 4373 try: 4374 drive.CreateFile(metadata={"title": "Syncing", "parents": [{"id": folder_id}]}).Upload() 4375 return True 4376 except: 4377 return False 4378 4379 4380def safe_to_upload(folder_id): 4381 4382 """ 4383 Returns False if another device is currently uploading to Google Drive, else True. 4384 4385 TODO: This method is deprecated. Please remove if no plans for usage. 4386 4387 Args: 4388 folder_id (str): Google Drive folder ID 4389 4390 Returns: 4391 bool: False if another device is currently uploading to Google Drive, True if not. 4392 """ 4393 4394 # Get Google Drive instance 4395 drive = get_drive_instance() 4396 4397 for file in drive.ListFile({"q": "'" + folder_id + "' in parents and trashed=false"}).GetList(): 4398 if file["title"] == "Syncing": 4399 return False 4400 4401 return True 4402 4403 4404def remove_sync_signal(folder_id): 4405 4406 """ 4407 Removes empty signal file to signal that an instrument PC has completed syncing to Google Drive. 4408 4409 TODO: This method is deprecated. Please remove if no plans for usage. 4410 4411 Args: 4412 folder_id (str): Google Drive folder ID 4413 4414 Returns: 4415 bool: True if sync signal was removed, False if not. 4416 """ 4417 4418 # Get Google Drive instance 4419 drive = get_drive_instance() 4420 4421 try: 4422 for file in drive.ListFile({"q": "'" + folder_id + "' in parents and trashed=false"}).GetList(): 4423 if file["title"] == "Syncing": 4424 file.Delete() 4425 return True 4426 except: 4427 return False 4428 4429 4430def delete_active_run_csv_files(instrument_id, run_id): 4431 4432 """ 4433 Checks for and deletes CSV files from Google Drive at the end of an active instrument run. 4434 4435 Args: 4436 instrument_id (str): 4437 Instrument ID 4438 run_id (str): 4439 Instrument run ID (job ID) 4440 4441 Returns: 4442 None 4443 """ 4444 4445 id = instrument_id.replace(" ", "_") + "_" + run_id 4446 4447 # Find zip archive of CSV files in Google Drive and delete it 4448 drive = get_drive_instance() 4449 gdrive_folder_id = get_drive_folder_id() 4450 4451 if gdrive_folder_id is not None: 4452 drive_file_list = drive.ListFile({"q": "'" + gdrive_folder_id + "' in parents and trashed=false"}).GetList() 4453 for file in drive_file_list: 4454 if file["title"] == id + ".zip": 4455 file.Delete() 4456 break 4457 4458 # Delete Drive ID from database 4459 db_metadata, connection = connect_to_database(instrument_id) 4460 runs_table = sa.Table("runs", db_metadata, autoload=True) 4461 4462 connection.execute(( 4463 sa.update(runs_table) 4464 .where(runs_table.c.run_id == run_id) 4465 .values(drive_id=None) 4466 )) 4467 4468 connection.close() 4469 4470 4471def sync_on_run_completion(instrument_id, run_id): 4472 4473 """ 4474 Syncs database with Google Drive at the end of an active instrument run. 4475 4476 Performs the following actions: 4477 1. Upload database to Google Drive 4478 2. Delete active run CSV files 4479 4480 Args: 4481 instrument_id (str): 4482 Instrument ID 4483 run_id (str): 4484 Instrument run ID (job ID) 4485 4486 Returns: 4487 None 4488 """ 4489 4490 # Get Google Drive instance and folder ID 4491 drive = get_drive_instance() 4492 gdrive_folder_id = get_drive_folder_id() 4493 4494 # Upload database to Google Drive 4495 try: 4496 upload_database(instrument_id) 4497 except Exception as error: 4498 print("sync_on_run_completion() – Error uploading database during sync", error) 4499 return None 4500 4501 # Delete active run CSV files 4502 try: 4503 delete_active_run_csv_files(instrument_id, run_id) 4504 except Exception as error: 4505 print("sync_on_run_completion() – Error deleting CSV files after sync", error) 4506 return None 4507 4508 4509def get_data_file_type(instrument_id): 4510 4511 """ 4512 Returns expected data file extension based on instrument vendor type. 4513 4514 TODO: Modify this function as needed when adding support for other instrument vendors. 4515 4516 Args: 4517 instrument_id (str): Instrument ID 4518 4519 Returns: 4520 Data file extension for instrument vendor. 4521 """ 4522 4523 engine = sa.create_engine(settings_database) 4524 df_instruments = pd.read_sql("SELECT * FROM instruments WHERE name='" + instrument_id + "'", engine) 4525 vendor = df_instruments["vendor"].astype(str).tolist()[0] 4526 4527 if vendor == "Thermo Fisher": 4528 return "raw" 4529 elif vendor == "Agilent": 4530 return "d" 4531 elif vendor == "Bruker": 4532 return "baf" 4533 elif vendor == "Waters": 4534 return "raw" 4535 elif vendor == "Sciex": 4536 return "wiff2" 4537 4538 4539def is_completed_run(instrument_id, run_id): 4540 4541 """ 4542 Returns True if the given QC job is for a completed run, and False if for an active run. 4543 4544 Args: 4545 instrument_id (str): 4546 Instrument ID 4547 run_id (str): 4548 Instrument run ID (job ID) 4549 4550 Returns: 4551 bool: True if the job is for a completed run, and False if job is for an active run. 4552 """ 4553 4554 try: 4555 job_type = get_instrument_run(instrument_id, run_id)["job_type"].astype(str).values[0] 4556 if job_type == "completed": 4557 return True 4558 else: 4559 return False 4560 except: 4561 print("Could not get MS-AutoQC job type.") 4562 traceback.print_exc() 4563 return False 4564 4565 4566def delete_temp_directory(instrument_id, run_id): 4567 4568 """ 4569 Deletes temporary data file directory in local app directory. 4570 4571 This function is called at the end of an instrument run (QC job). 4572 4573 Args: 4574 instrument_id (str): 4575 Instrument ID 4576 run_id (str): 4577 Instrument run ID (job ID) 4578 4579 Returns: 4580 None 4581 """ 4582 4583 # Delete temporary data file directory 4584 try: 4585 id = instrument_id.replace(" ", "_") + "_" + run_id 4586 temp_directory = os.path.join(data_directory, id) 4587 if os.path.exists(temp_directory): 4588 shutil.rmtree(temp_directory) 4589 except: 4590 print("Could not delete temporary data directory.") 4591 4592 4593def pipeline_valid(module=None): 4594 4595 """ 4596 Validates that MSConvert and MS-DIAL dependencies are installed. 4597 4598 This function is called during job setup validation. 4599 4600 Args: 4601 module (str, default None): If specified, only validates given module. 4602 4603 Returns: 4604 bool: Whether MSConvert.exe and MsdialConsoleApp.exe exist. 4605 """ 4606 4607 try: 4608 msconvert_installed = os.path.exists(os.path.join(get_msconvert_directory(), "msconvert.exe")) 4609 except: 4610 msconvert_installed = False 4611 4612 try: 4613 msdial_installed = os.path.exists(os.path.join(get_msdial_directory(), "MsdialConsoleApp.exe")) 4614 except: 4615 msdial_installed = False 4616 4617 if module == "msdial": 4618 return msdial_installed 4619 elif module == "msconvert": 4620 return msconvert_installed 4621 else: 4622 return msconvert_installed and msdial_installed 4623 4624 4625def send_email(subject, message_body): 4626 4627 """ 4628 Sends email using Google authenticated credentials. 4629 4630 This function is called for QC warnings and fails if: 4631 1. Google Drive sync is enabled 4632 2. Email addresses are registered for notifications 4633 4634 Args: 4635 subject (str): 4636 Subject of email 4637 message_body (str): 4638 Body of email 4639 4640 Returns: 4641 On success, an email.message.EmailMessage object. 4642 """ 4643 4644 try: 4645 credentials = google_auth.load_credentials_from_file(alt_credentials)[0] 4646 4647 service = build("gmail", "v1", credentials=credentials) 4648 message = EmailMessage() 4649 4650 message.set_content(message_body) 4651 4652 message["Subject"] = subject 4653 message["To"] = get_email_notifications_list(as_string=True) 4654 4655 encoded_message = base64.urlsafe_b64encode(message.as_bytes()).decode() 4656 create_message = { "raw": encoded_message } 4657 4658 send_message = (service.users().messages().send(userId="me", body=create_message).execute()) 4659 4660 except Exception as error: 4661 traceback.print_exc() 4662 send_message = None 4663 4664 return send_message
The functions defined below operate on two database types:
- One storing instrument run metadata, sample QC results, and biological standard QC results
- The other storing instrument metadata, workspace settings for workspace access, chromatography methods, biological standards, QC configurations, and MS-DIAL configurations
In addition, this file also contains methods for syncing data and settings with Google Drive. To get an overview of all functions, please visit the documentation on https://czbiohub.github.io/MS-AutoQC.
51def get_database_file(instrument_id, sqlite_conn=False, zip=False): 52 53 """ 54 Returns database file for a given instrument ID. 55 56 Args: 57 instrument_id (str): 58 Instrument ID that specifies which database file to retrieve 59 sqlite_conn (bool, default False): 60 Whether to receive the path for establishing a SQLite connection 61 zip (bool, default False): 62 Whether to receive the path of the database file in the local app directory 63 64 Returns: 65 str: Path for the database file 66 """ 67 68 if zip: 69 filename = instrument_id.replace(" ", "_") + ".zip" 70 else: 71 filename = instrument_id.replace(" ", "_") + ".db" 72 73 if sqlite_conn: 74 return "sqlite:///data/" + filename 75 else: 76 return os.path.join(data_directory, filename)
Returns database file for a given instrument ID.
Arguments:
- instrument_id (str): Instrument ID that specifies which database file to retrieve
- sqlite_conn (bool, default False): Whether to receive the path for establishing a SQLite connection
- zip (bool, default False): Whether to receive the path of the database file in the local app directory
Returns:
str: Path for the database file
79def connect_to_database(name): 80 81 """ 82 Establishes a connection to a SQLite database of choice 83 84 Args: 85 name (str): 86 Name of the database, either "Settings" or an instrument ID 87 88 Returns: 89 sqlalchemy.MetaData: 90 A container object that consists of different features of a database being described 91 sqlalchemy.Connection: 92 An object that represents a single DBAPI connection, and always emits SQL statements within 93 the context of a transaction block 94 """ 95 96 if name == "Settings": 97 database_file = settings_database 98 else: 99 database_file = get_database_file(instrument_id=name, sqlite_conn=True) 100 101 engine = sa.create_engine(database_file) 102 db_metadata = sa.MetaData(bind=engine) 103 connection = engine.connect() 104 105 return db_metadata, connection
Establishes a connection to a SQLite database of choice
Arguments:
- name (str): Name of the database, either "Settings" or an instrument ID
Returns:
sqlalchemy.MetaData: A container object that consists of different features of a database being described sqlalchemy.Connection: An object that represents a single DBAPI connection, and always emits SQL statements within the context of a transaction block
108def create_databases(instrument_id, new_instrument=False): 109 110 """ 111 Initializes SQLite databases for 1) instrument data and 2) workspace settings. 112 113 Creates the following tables in the instrument database: "runs", "bio_qc_results", "sample_qc_results". 114 115 Creates the following tables in the settings database: "biological_standards", "chromatography_methods", 116 "email_notifications", "instruments", "gdrive_users", "internal_standards", "msdial_parameters", "qc_parameters", 117 "targeted_features", "workspace". 118 119 Args: 120 instrument_id (str): 121 Instrument ID to name the new database ("Thermo QE 1" becomes "Thermo_QE_1.db") 122 new_instrument (bool, default False): 123 Whether a new instrument database is being added to a workspace, or whether a new 124 instrument database AND settings database are being created for the first time 125 126 Returns: 127 None 128 """ 129 130 # Create tables for instrument database 131 instrument_database = get_database_file(instrument_id=instrument_id, sqlite_conn=True) 132 qc_db_engine = sa.create_engine(instrument_database) 133 qc_db_metadata = sa.MetaData() 134 135 bio_qc_results = sa.Table( 136 "bio_qc_results", qc_db_metadata, 137 sa.Column("id", INTEGER, primary_key=True), 138 sa.Column("sample_id", TEXT), 139 sa.Column("run_id", TEXT), 140 sa.Column("polarity", TEXT), 141 sa.Column("precursor_mz", TEXT), 142 sa.Column("retention_time", TEXT), 143 sa.Column("intensity", TEXT), 144 sa.Column("md5", TEXT), 145 sa.Column("qc_dataframe", TEXT), 146 sa.Column("qc_result", TEXT), 147 sa.Column("biological_standard", TEXT), 148 sa.Column("position", TEXT) 149 ) 150 151 runs = sa.Table( 152 "runs", qc_db_metadata, 153 sa.Column("id", INTEGER, primary_key=True), 154 sa.Column("run_id", TEXT), 155 sa.Column("chromatography", TEXT), 156 sa.Column("acquisition_path", TEXT), 157 sa.Column("sequence", TEXT), 158 sa.Column("metadata", TEXT), 159 sa.Column("status", TEXT), 160 sa.Column("samples", INTEGER), 161 sa.Column("completed", INTEGER), 162 sa.Column("passes", INTEGER), 163 sa.Column("fails", INTEGER), 164 sa.Column("latest_sample", TEXT), 165 sa.Column("qc_config_id", TEXT), 166 sa.Column("biological_standards", TEXT), 167 sa.Column("pid", INTEGER), 168 sa.Column("drive_id", TEXT), 169 sa.Column("sample_status", TEXT), 170 sa.Column("job_type", TEXT) 171 ) 172 173 sample_qc_results = sa.Table( 174 "sample_qc_results", qc_db_metadata, 175 sa.Column("id", INTEGER, primary_key=True), 176 sa.Column("sample_id", TEXT), 177 sa.Column("run_id", TEXT), 178 sa.Column("polarity", TEXT), 179 sa.Column("position", TEXT), 180 sa.Column("md5", TEXT), 181 sa.Column("precursor_mz", TEXT), 182 sa.Column("retention_time", TEXT), 183 sa.Column("intensity", TEXT), 184 sa.Column("qc_dataframe", TEXT), 185 sa.Column("qc_result", TEXT) 186 ) 187 188 qc_db_metadata.create_all(qc_db_engine) 189 190 # If only creating instrument database, save and return here 191 if new_instrument: 192 set_device_identity(is_instrument_computer=True, instrument_id=instrument_id) 193 return None 194 195 # Create tables for Settings.db 196 settings_db_engine = sa.create_engine(settings_database) 197 settings_db_metadata = sa.MetaData() 198 199 instruments = sa.Table( 200 "instruments", settings_db_metadata, 201 sa.Column("id", INTEGER, primary_key=True), 202 sa.Column("name", TEXT), 203 sa.Column("vendor", TEXT), 204 sa.Column("drive_id", TEXT), 205 sa.Column("last_modified", TEXT) 206 ) 207 208 biological_standards = sa.Table( 209 "biological_standards", settings_db_metadata, 210 sa.Column("id", INTEGER, primary_key=True), 211 sa.Column("name", TEXT), 212 sa.Column("identifier", TEXT), 213 sa.Column("chromatography", TEXT), 214 sa.Column("num_pos_features", INTEGER), 215 sa.Column("num_neg_features", INTEGER), 216 sa.Column("pos_bio_msp_file", TEXT), 217 sa.Column("neg_bio_msp_file", TEXT), 218 sa.Column("pos_parameter_file", TEXT), 219 sa.Column("neg_parameter_file", TEXT), 220 sa.Column("msdial_config_id", TEXT) 221 ) 222 223 chromatography_methods = sa.Table( 224 "chromatography_methods", settings_db_metadata, 225 sa.Column("id", INTEGER, primary_key=True), 226 sa.Column("method_id", TEXT), 227 sa.Column("num_pos_standards", INTEGER), 228 sa.Column("num_neg_standards", INTEGER), 229 sa.Column("pos_istd_msp_file", TEXT), 230 sa.Column("neg_istd_msp_file", TEXT), 231 sa.Column("pos_parameter_file", TEXT), 232 sa.Column("neg_parameter_file", TEXT), 233 sa.Column("msdial_config_id", TEXT) 234 ) 235 236 gdrive_users = sa.Table( 237 "gdrive_users", settings_db_metadata, 238 sa.Column("id", INTEGER, primary_key=True), 239 sa.Column("name", TEXT), 240 sa.Column("email_address", TEXT), 241 sa.Column("permission_id", TEXT), 242 ) 243 244 internal_standards = sa.Table( 245 "internal_standards", settings_db_metadata, 246 sa.Column("id", INTEGER, primary_key=True), 247 sa.Column("name", TEXT), 248 sa.Column("chromatography", TEXT), 249 sa.Column("polarity", TEXT), 250 sa.Column("precursor_mz", REAL), 251 sa.Column("retention_time", REAL), 252 sa.Column("ms2_spectrum", TEXT), 253 sa.Column("inchikey", TEXT) 254 ) 255 256 msdial_parameters = sa.Table( 257 "msdial_parameters", settings_db_metadata, 258 sa.Column("id", INTEGER, primary_key=True), 259 sa.Column("config_name", TEXT), 260 sa.Column("rt_begin", INTEGER), 261 sa.Column("rt_end", INTEGER), 262 sa.Column("mz_begin", INTEGER), 263 sa.Column("mz_end", INTEGER), 264 sa.Column("ms1_centroid_tolerance", REAL), 265 sa.Column("ms2_centroid_tolerance", REAL), 266 sa.Column("smoothing_method", TEXT), 267 sa.Column("smoothing_level", INTEGER), 268 sa.Column("min_peak_width", INTEGER), 269 sa.Column("min_peak_height", INTEGER), 270 sa.Column("mass_slice_width", REAL), 271 sa.Column("post_id_rt_tolerance", REAL), 272 sa.Column("post_id_mz_tolerance", REAL), 273 sa.Column("post_id_score_cutoff", REAL), 274 sa.Column("alignment_rt_tolerance", REAL), 275 sa.Column("alignment_mz_tolerance", REAL), 276 sa.Column("alignment_rt_factor", REAL), 277 sa.Column("alignment_mz_factor", REAL), 278 sa.Column("peak_count_filter", INTEGER), 279 sa.Column("qc_at_least_filter", TEXT) 280 ) 281 282 email_notifications = sa.Table( 283 "email_notifications", settings_db_metadata, 284 sa.Column("id", INTEGER, primary_key=True), 285 sa.Column("email_address", TEXT), 286 ) 287 288 qc_parameters = sa.Table( 289 "qc_parameters", settings_db_metadata, 290 sa.Column("id", INTEGER, primary_key=True), 291 sa.Column("config_name", TEXT), 292 sa.Column("intensity_dropouts_cutoff", INTEGER), 293 sa.Column("library_rt_shift_cutoff", REAL), 294 sa.Column("in_run_rt_shift_cutoff", REAL), 295 sa.Column("library_mz_shift_cutoff", REAL), 296 sa.Column("intensity_enabled", INTEGER), 297 sa.Column("library_rt_enabled", INTEGER), 298 sa.Column("in_run_rt_enabled", INTEGER), 299 sa.Column("library_mz_enabled", INTEGER) 300 ) 301 302 targeted_features = sa.Table( 303 "targeted_features", settings_db_metadata, 304 sa.Column("id", INTEGER, primary_key=True), 305 sa.Column("name", TEXT), 306 sa.Column("chromatography", TEXT), 307 sa.Column("polarity", TEXT), 308 sa.Column("biological_standard", TEXT), 309 sa.Column("precursor_mz", REAL), 310 sa.Column("retention_time", REAL), 311 sa.Column("ms2_spectrum", TEXT), 312 sa.Column("inchikey", TEXT) 313 ) 314 315 workspace = sa.Table( 316 "workspace", settings_db_metadata, 317 sa.Column("id", INTEGER, primary_key=True), 318 sa.Column("slack_bot_token", TEXT), 319 sa.Column("slack_channel", TEXT), 320 sa.Column("slack_enabled", INTEGER), 321 sa.Column("gdrive_folder_id", TEXT), 322 sa.Column("methods_zip_file_id", TEXT), 323 sa.Column("methods_last_modified", TEXT), 324 sa.Column("msdial_directory", TEXT), 325 sa.Column("is_instrument_computer", INTEGER), 326 sa.Column("instrument_identity", TEXT) 327 ) 328 329 # Insert tables into database 330 settings_db_metadata.create_all(settings_db_engine) 331 332 # Insert default configurations for MS-DIAL and MS-AutoQC 333 add_msdial_configuration("Default") 334 add_qc_configuration("Default") 335 336 # Initialize workspace metadata 337 create_workspace_metadata() 338 339 # Save device identity based on setup values 340 set_device_identity(is_instrument_computer=True, instrument_id=instrument_id) 341 return None
Initializes SQLite databases for 1) instrument data and 2) workspace settings.
Creates the following tables in the instrument database: "runs", "bio_qc_results", "sample_qc_results".
Creates the following tables in the settings database: "biological_standards", "chromatography_methods", "email_notifications", "instruments", "gdrive_users", "internal_standards", "msdial_parameters", "qc_parameters", "targeted_features", "workspace".
Arguments:
- instrument_id (str): Instrument ID to name the new database ("Thermo QE 1" becomes "Thermo_QE_1.db")
- new_instrument (bool, default False): Whether a new instrument database is being added to a workspace, or whether a new instrument database AND settings database are being created for the first time
Returns:
None
344def execute_vacuum(database): 345 346 """ 347 Executes VACUUM command on the database of choice. 348 349 Args: 350 database (str): name of the database, either "Settings" or Instrument ID 351 352 Returns: 353 None 354 """ 355 356 db_metadata, connection = connect_to_database(database) 357 connection.execute("VACUUM") 358 connection.close()
Executes VACUUM command on the database of choice.
Arguments:
- database (str): name of the database, either "Settings" or Instrument ID
Returns:
None
361def get_drive_instance(): 362 363 """ 364 Returns user-authenticated Google Drive instance. 365 """ 366 367 return GoogleDrive(auth_container[0])
Returns user-authenticated Google Drive instance.
370def launch_google_drive_authentication(): 371 372 """ 373 Launches Google Drive authentication flow and sets authentication instance. 374 """ 375 376 auth_container[0] = GoogleAuth(settings_file=drive_settings_file) 377 auth_container[0].LocalWebserverAuth()
Launches Google Drive authentication flow and sets authentication instance.
380def save_google_drive_credentials(): 381 382 """ 383 Saves Google credentials to a credentials.txt file. 384 """ 385 386 auth_container[0].SaveCredentialsFile(credentials_file)
Saves Google credentials to a credentials.txt file.
389def initialize_google_drive(): 390 391 """ 392 Initializes instance of Google Drive using credentials.txt and settings.yaml in /auth directory 393 394 Args: 395 None 396 397 Returns: 398 bool: Whether the Google client credentials file (in the "auth" directory) exists. 399 """ 400 401 # Create Google Drive instance 402 auth_container[0] = GoogleAuth(settings_file=drive_settings_file) 403 gauth = auth_container[0] 404 405 # If no credentials file, make user authenticate 406 if not os.path.exists(credentials_file) and is_valid(): 407 gauth.LocalWebserverAuth() 408 409 # Try to load saved client credentials 410 gauth.LoadCredentialsFile(credentials_file) 411 412 # Initialize saved credentials 413 if gauth.credentials is not None: 414 415 # Refresh credentials if expired 416 if gauth.access_token_expired: 417 gauth.Refresh() 418 419 # Otherwise, authorize saved credentials 420 else: 421 gauth.Authorize() 422 423 # If no saved credentials, make user authenticate again 424 elif gauth.credentials is None: 425 gauth.LocalWebserverAuth() 426 427 if not os.path.exists(credentials_file) and is_valid(): 428 save_google_drive_credentials() 429 430 # Makes small modification for emails (for usage with Google's google.auth) 431 if not os.path.exists(alt_credentials): 432 data = None 433 with open(credentials_file, "r") as file: 434 data = json.load(file) 435 data["type"] = "authorized_user" 436 with open(alt_credentials, "w") as file: 437 json.dump(data, file) 438 439 return os.path.exists(credentials_file)
Initializes instance of Google Drive using credentials.txt and settings.yaml in /auth directory
Arguments:
- None
Returns:
bool: Whether the Google client credentials file (in the "auth" directory) exists.
442def is_valid(instrument_id=None): 443 444 """ 445 Checks that all required tables in all databases (or a single database of choice) are present. 446 447 Args: 448 instrument_id (str, default None): 449 Specified if validating a specific database 450 451 Returns: 452 None 453 """ 454 455 # Validate settings database 456 settings_db_required_tables = ["biological_standards", "chromatography_methods", "email_notifications", "instruments", 457 "gdrive_users", "internal_standards", "msdial_parameters", "qc_parameters", "targeted_features", "workspace"] 458 459 try: 460 settings_db_tables = sa.create_engine(settings_database).table_names() 461 if len(settings_db_tables) < len(settings_db_required_tables): 462 return False 463 except: 464 return False 465 466 # Validate instrument databases 467 instrument_db_required_tables = ["bio_qc_results", "runs", "sample_qc_results"] 468 469 # If given an instrument ID, only validate that instrument's database 470 try: 471 if instrument_id is not None: 472 database = get_database_file(instrument_id, sqlite_conn=True) 473 instrument_db_tables = sa.create_engine(database).table_names() 474 if len(instrument_db_tables) < len(instrument_db_required_tables): 475 return False 476 477 # Otherwise, validate all instrument databases 478 else: 479 database_files = [file.replace(".db", "") for file in os.listdir(data_directory) if ".db" in file and "journal.db" not in file] 480 databases = [get_database_file(f, sqlite_conn=True) for f in database_files] 481 482 for database in databases: 483 instrument_db_tables = sa.create_engine(database).table_names() 484 if len(instrument_db_tables) < len(instrument_db_required_tables): 485 return False 486 except: 487 return False 488 489 return True
Checks that all required tables in all databases (or a single database of choice) are present.
Arguments:
- instrument_id (str, default None): Specified if validating a specific database
Returns:
None
492def sync_is_enabled(): 493 494 """ 495 Checks whether Google Drive sync is enabled simply by querying whether Google Drive ID's exist in the database. 496 497 Typically used for separating sync-specific functionality. 498 499 Returns: 500 bool: Whether Google Drive sync is enabled or not 501 """ 502 503 if not is_valid(): 504 return False 505 506 df_workspace = get_table("Settings", "workspace") 507 gdrive_folder_id = df_workspace["gdrive_folder_id"].values[0] 508 methods_zip_file_id = df_workspace["methods_zip_file_id"].values[0] 509 510 if gdrive_folder_id is not None and methods_zip_file_id is not None: 511 if gdrive_folder_id != "None" and methods_zip_file_id != "None": 512 if gdrive_folder_id != "" and methods_zip_file_id != "": 513 return True 514 515 return False
Checks whether Google Drive sync is enabled simply by querying whether Google Drive ID's exist in the database.
Typically used for separating sync-specific functionality.
Returns:
bool: Whether Google Drive sync is enabled or not
518def email_notifications_are_enabled(): 519 520 """ 521 Checks whether email notifications are enabled. 522 523 Returns True if databases are valid, Google Drive sync is enabled, and if email addresses were 524 registered by user in Settings > General. Returns False if any condition is not met. 525 526 Returns: 527 bool: True if email notifications are enabled, False if not 528 """ 529 530 if not is_valid(): 531 return False 532 533 if not sync_is_enabled(): 534 return False 535 536 if len(get_table("Settings", "email_notifications")) > 0: 537 return True 538 539 return False
Checks whether email notifications are enabled.
Returns True if databases are valid, Google Drive sync is enabled, and if email addresses were registered by user in Settings > General. Returns False if any condition is not met.
Returns:
bool: True if email notifications are enabled, False if not
542def slack_notifications_are_enabled(): 543 544 """ 545 Checks whether Slack notifications are enabled. 546 547 Returns True if user enabled Slack notifications in Settings > General, and False if not. 548 549 Returns: 550 bool: True if Slack notifications are enabled, False if not 551 """ 552 553 if not is_valid(): 554 return False 555 556 try: 557 return bool(get_table("Settings", "workspace")["slack_enabled"].astype(int).tolist()[0]) 558 except: 559 return False
Checks whether Slack notifications are enabled.
Returns True if user enabled Slack notifications in Settings > General, and False if not.
Returns:
bool: True if Slack notifications are enabled, False if not
562def is_instrument_computer(): 563 564 """ 565 Checks whether user's device is the instrument computer. 566 567 This is specified during setup. If the user created a new instrument, or signed in as an instrument device, then 568 this will return True. If the user signed in to their workspace from a non-instrument device, this will return False. 569 570 Typically used to organize / hide UI functions for instrument and non-instrument devices 571 that MS-AutoQC is installed on. 572 573 Returns: 574 True if device is instrument computer, False if not 575 """ 576 577 return bool(get_table("Settings", "workspace")["is_instrument_computer"].astype(int).tolist()[0])
Checks whether user's device is the instrument computer.
This is specified during setup. If the user created a new instrument, or signed in as an instrument device, then this will return True. If the user signed in to their workspace from a non-instrument device, this will return False.
Typically used to organize / hide UI functions for instrument and non-instrument devices that MS-AutoQC is installed on.
Returns:
True if device is instrument computer, False if not
580def get_md5_for_settings_db(): 581 582 """ 583 Calculates and returns MD5 checksum for the settings database file. 584 585 Typically used for checking whether the user changed settings and prompting a Google Drive sync (if sync is enabled). 586 587 Returns: 588 An MD5 checksum of /data/methods/Settings.db 589 """ 590 591 hash_md5 = hashlib.md5() 592 593 with open(settings_db_file, "rb") as f: 594 for chunk in iter(lambda: f.read(4096), b""): 595 hash_md5.update(chunk) 596 597 return hash_md5.hexdigest()
Calculates and returns MD5 checksum for the settings database file.
Typically used for checking whether the user changed settings and prompting a Google Drive sync (if sync is enabled).
Returns:
An MD5 checksum of /data/methods/Settings.db
600def settings_were_modified(md5_checksum): 601 602 """ 603 Checks whether settings database file has been modified. 604 605 This is done by comparing the checksum computed when Settings were opened (given as a parameter) 606 with the checksum computed when Settings were closed (in this function call). 607 608 Args: 609 md5_checksum (str): 610 An MD5 checksum of /data/methods/Settings.db that was computed when the user opened Settings in the app 611 612 Returns: 613 bool: True if checksums don't match, False if checksums match. 614 """ 615 616 if md5_checksum != get_md5_for_settings_db(): 617 return True 618 else: 619 return False
Checks whether settings database file has been modified.
This is done by comparing the checksum computed when Settings were opened (given as a parameter) with the checksum computed when Settings were closed (in this function call).
Arguments:
- md5_checksum (str): An MD5 checksum of /data/methods/Settings.db that was computed when the user opened Settings in the app
Returns:
bool: True if checksums don't match, False if checksums match.
622def zip_database(instrument_id=None, filename=None): 623 624 """ 625 Compresses instrument database file into a ZIP archive in /data directory. 626 627 Used for fast downloads / uploads over network connections to Google Drive (if Google Drive sync is enabled). 628 629 The zip archive is accessible by filename and path in the /data directory. For example, zipping 630 the database for "Thermo QE 1" will generate a zip file with path "../data/Thermo_QE_1.zip". 631 632 Args: 633 instrument_id (str, default None): 634 If specified, selects a database to zip by instrument ID (ex: "Thermo QE 1") 635 filename (str, default None): 636 If specified, selects a database to zip by filename (ex: "Thermo_QE_1.zip") 637 638 Returns: 639 None 640 """ 641 642 if instrument_id is None and filename is None: 643 return None 644 645 if filename is not None: 646 db_zip_file = os.path.join(data_directory, filename) 647 filename = filename.replace(".zip", ".db") 648 649 elif instrument_id is not None: 650 db_zip_file = get_database_file(instrument_id, zip=True) 651 filename = instrument_id.replace(" ", "_") + ".db" 652 653 file_without_extension = db_zip_file.replace(".zip", "") 654 shutil.make_archive(file_without_extension, "zip", data_directory, filename)
Compresses instrument database file into a ZIP archive in /data directory.
Used for fast downloads / uploads over network connections to Google Drive (if Google Drive sync is enabled).
The zip archive is accessible by filename and path in the /data directory. For example, zipping the database for "Thermo QE 1" will generate a zip file with path "../data/Thermo_QE_1.zip".
Arguments:
- instrument_id (str, default None): If specified, selects a database to zip by instrument ID (ex: "Thermo QE 1")
- filename (str, default None): If specified, selects a database to zip by filename (ex: "Thermo_QE_1.zip")
Returns:
None
657def unzip_database(instrument_id=None, filename=None): 658 659 """ 660 Unzips ZIP archive containing instrument database file and deletes the archive when complete. 661 662 Args: 663 instrument_id (str, default None): 664 If specified, selects a database to zip by instrument ID (ex: "Thermo QE 1") 665 filename (str, default None): 666 If specified, selects a database to zip by filename (ex: "Thermo_QE_1.zip") 667 668 Returns: 669 None 670 """ 671 672 if instrument_id is None and filename is None: 673 return None 674 675 if instrument_id is not None: 676 db_zip_file = get_database_file(instrument_id, zip=True) 677 elif filename is not None: 678 db_zip_file = os.path.join(data_directory, filename) 679 680 shutil.unpack_archive(db_zip_file, data_directory, "zip") 681 os.remove(db_zip_file)
Unzips ZIP archive containing instrument database file and deletes the archive when complete.
Arguments:
- instrument_id (str, default None): If specified, selects a database to zip by instrument ID (ex: "Thermo QE 1")
- filename (str, default None): If specified, selects a database to zip by filename (ex: "Thermo_QE_1.zip")
Returns:
None
684def zip_methods(): 685 686 """ 687 Compresses methods directory into a ZIP archive in /data directory. 688 689 Returns: 690 Path for zip archive of methods directory (ex: "../data/methods.zip") 691 """ 692 693 output_directory_and_name = os.path.join(data_directory, "methods.zip").replace(".zip", "") 694 shutil.make_archive(output_directory_and_name, "zip", methods_directory) 695 return output_directory_and_name + ".zip"
Compresses methods directory into a ZIP archive in /data directory.
Returns:
Path for zip archive of methods directory (ex: "../data/methods.zip")
698def unzip_methods(): 699 700 """ 701 Unzips ZIP archive containing methods directory and deletes the archive when complete. 702 """ 703 704 input_zip = os.path.join(data_directory, "methods.zip") 705 shutil.unpack_archive(input_zip, methods_directory, "zip") 706 os.remove(input_zip)
Unzips ZIP archive containing methods directory and deletes the archive when complete.
709def zip_csv_files(input_directory, output_directory_and_name): 710 711 """ 712 Compresses CSV files into a ZIP archive in /data directory. 713 714 Used for fast upload of instrument run data to Google Drive during an active instrument run (if Google Drive sync is enabled). 715 716 Args: 717 input_directory (str): 718 The temporary directory for files pertaining to an instrument run, denoted as "Instrument_ID_Run_ID". 719 For example, a job with ID "BRDE001" created under instrument with ID "Thermo QE 1" would have its files 720 stored in "/data/Thermo_QE_1_BRDE001". 721 output_directory_and_name (str): 722 Essentially, the file path for the ZIP archive (ex: "/data/Instrument_ID_Run_ID"). 723 724 Returns: 725 Path for zip archive of CSV files with instrument run data (ex: "../data/Instrument_ID_Run_ID.zip") 726 """ 727 728 shutil.make_archive(output_directory_and_name, "zip", input_directory) 729 return output_directory_and_name + ".zip"
Compresses CSV files into a ZIP archive in /data directory.
Used for fast upload of instrument run data to Google Drive during an active instrument run (if Google Drive sync is enabled).
Arguments:
- input_directory (str): The temporary directory for files pertaining to an instrument run, denoted as "Instrument_ID_Run_ID". For example, a job with ID "BRDE001" created under instrument with ID "Thermo QE 1" would have its files stored in "/data/Thermo_QE_1_BRDE001".
- output_directory_and_name (str): Essentially, the file path for the ZIP archive (ex: "/data/Instrument_ID_Run_ID").
Returns:
Path for zip archive of CSV files with instrument run data (ex: "../data/Instrument_ID_Run_ID.zip")
732def unzip_csv_files(input_zip, output_directory): 733 734 """ 735 Unzips ZIP archive of CSV files and deletes the archive upon completion. 736 """ 737 738 shutil.unpack_archive(input_zip, output_directory, "zip") 739 os.remove(input_zip)
Unzips ZIP archive of CSV files and deletes the archive upon completion.
742def get_table(database_name, table_name): 743 744 """ 745 Retrieves table from database as a pandas DataFrame object. 746 747 TODO: Improve this function to accept column and record queries 748 749 Args: 750 database_name (str): 751 The database to query, using instrument ID or "Settings" 752 table_name (str): 753 The table to retrieve 754 755 Returns: 756 DataFrame of table. 757 """ 758 759 if database_name == "Settings": 760 database = settings_database 761 else: 762 database = get_database_file(database_name, sqlite_conn=True) 763 764 engine = sa.create_engine(database) 765 return pd.read_sql("SELECT * FROM " + table_name, engine)
Retrieves table from database as a pandas DataFrame object.
TODO: Improve this function to accept column and record queries
Arguments:
- database_name (str): The database to query, using instrument ID or "Settings"
- table_name (str): The table to retrieve
Returns:
DataFrame of table.
768def generate_client_settings_yaml(client_id, client_secret): 769 770 """ 771 Generates a settings.yaml file for Google authentication in the /auth directory. 772 773 Client ID and client secret are generated and provided by the user in the Google Cloud Console. 774 775 See: https://docs.iterative.ai/PyDrive2/oauth/#automatic-and-custom-authentication-with-settings-yaml 776 777 Args: 778 client_id (str): 779 The Client ID of the MS-AutoQC application, generated and provided by the user 780 client_secret (str): 781 The Client Secret of the MS-AutoQC application, generated and provided by the user 782 Returns: 783 None 784 """ 785 786 auth_directory = os.path.join(os.getcwd(), "auth") 787 if not os.path.exists(auth_directory): 788 os.makedirs(auth_directory) 789 790 settings_yaml_file = os.path.join(auth_directory, "settings.yaml") 791 792 lines = [ 793 "client_config_backend: settings", 794 "client_config:", 795 " client_id: " + client_id, 796 " client_secret: " + client_secret, 797 "\n", 798 "save_credentials: True", 799 "save_credentials_backend: file", 800 "save_credentials_file: auth/credentials.txt", 801 "\n", 802 "get_refresh_token: True", 803 "\n", 804 "oauth_scope:", 805 " - https://www.googleapis.com/auth/drive", 806 " - https://www.googleapis.com/auth/gmail.send", 807 " - https://www.googleapis.com/auth/userinfo.email" 808 ] 809 810 with open(settings_yaml_file, "w") as file: 811 for line in lines: 812 file.write(line) 813 if line != "\n" and line != lines[-1]: 814 file.write("\n")
Generates a settings.yaml file for Google authentication in the /auth directory.
Client ID and client secret are generated and provided by the user in the Google Cloud Console.
See: https://docs.iterative.ai/PyDrive2/oauth/#automatic-and-custom-authentication-with-settings-yaml
Arguments:
- client_id (str): The Client ID of the MS-AutoQC application, generated and provided by the user
- client_secret (str): The Client Secret of the MS-AutoQC application, generated and provided by the user
Returns:
None
817def insert_google_drive_ids(instrument_id, gdrive_folder_id, instrument_db_file_id, methods_zip_file_id): 818 819 """ 820 Inserts Google Drive ID's into corresponding tables to enable Google Drive sync. 821 822 This function is called when a user creates a new instrument in their workspace. 823 824 The ID's for the following files / folders in Google Drive are stored in the database: 825 1. MS-AutoQC folder 826 2. Instrument database zip file 827 3. Methods directory zip file 828 829 Args: 830 instrument_id (str): 831 Instrument ID 832 gdrive_folder_id (str): 833 Google Drive ID for the MS-AutoQC folder (found in the user's root directory in Drive) 834 instrument_db_file_id (str): 835 Google Drive ID for the instrument database ZIP file 836 methods_zip_file_id (str): 837 Google Drive ID for the methods directory ZIP file 838 839 Returns: 840 None 841 """ 842 843 db_metadata, connection = connect_to_database("Settings") 844 instruments_table = sa.Table("instruments", db_metadata, autoload=True) 845 workspace_table = sa.Table("workspace", db_metadata, autoload=True) 846 847 # Instruments database 848 connection.execute(( 849 sa.update(instruments_table) 850 .where((instruments_table.c.name == instrument_id)) 851 .values(drive_id=instrument_db_file_id) 852 )) 853 854 # MS-AutoQC folder and Methods folder 855 connection.execute(( 856 sa.update(workspace_table) 857 .where((workspace_table.c.id == 1)) 858 .values(gdrive_folder_id=gdrive_folder_id, 859 methods_zip_file_id=methods_zip_file_id) 860 )) 861 862 connection.close()
Inserts Google Drive ID's into corresponding tables to enable Google Drive sync.
This function is called when a user creates a new instrument in their workspace.
The ID's for the following files / folders in Google Drive are stored in the database:
- MS-AutoQC folder
- Instrument database zip file
- Methods directory zip file
Arguments:
- instrument_id (str): Instrument ID
- gdrive_folder_id (str): Google Drive ID for the MS-AutoQC folder (found in the user's root directory in Drive)
- instrument_db_file_id (str): Google Drive ID for the instrument database ZIP file
- methods_zip_file_id (str): Google Drive ID for the methods directory ZIP file
Returns:
None
865def insert_new_instrument(name, vendor): 866 867 """ 868 Inserts a new instrument into the "instruments" table in the Settings database. 869 870 The name is the instrument ID, and the vendor is one of 5 options: Thermo Fisher, Agilent, Bruker, Sciex, and Waters. 871 872 Args: 873 name (str): 874 Instrument ID 875 vendor (str): 876 Instrument vendor 877 878 Returns: 879 None 880 """ 881 882 # Connect to database 883 db_metadata, connection = connect_to_database("Settings") 884 885 # Get "instruments" table 886 instruments_table = sa.Table("instruments", db_metadata, autoload=True) 887 888 # Prepare insert of new instrument 889 insert_instrument = instruments_table.insert().values( 890 {"name": name, 891 "vendor": vendor} 892 ) 893 894 # Execute the insert, then close the connection 895 connection.execute(insert_instrument) 896 connection.close()
Inserts a new instrument into the "instruments" table in the Settings database.
The name is the instrument ID, and the vendor is one of 5 options: Thermo Fisher, Agilent, Bruker, Sciex, and Waters.
Arguments:
- name (str): Instrument ID
- vendor (str): Instrument vendor
Returns:
None
899def get_instruments_list(): 900 901 """ 902 Returns list of instruments in database. 903 """ 904 905 # Connect to SQLite database 906 engine = sa.create_engine(settings_database) 907 908 # Get instruments table as DataFrame 909 df_instruments = pd.read_sql("SELECT * FROM instruments", engine) 910 911 # Return list of instruments 912 return df_instruments["name"].astype(str).tolist()
Returns list of instruments in database.
915def get_instrument(instrument_id): 916 917 """ 918 Returns record from "instruments" table as a DataFrame for a given instrument 919 920 Args: 921 instrument_id (str): Instrument ID 922 923 Returns: 924 DataFrame containing the name, vendor, and drive_id for the given instrument 925 """ 926 927 engine = sa.create_engine(settings_database) 928 return pd.read_sql("SELECT * FROM instruments WHERE name = '" + instrument_id + "'", engine)
Returns record from "instruments" table as a DataFrame for a given instrument
Arguments:
- instrument_id (str): Instrument ID
Returns:
DataFrame containing the name, vendor, and drive_id for the given instrument
931def get_filenames_from_sequence(sequence, vendor="Thermo Fisher"): 932 933 """ 934 Filters preblanks, washes, and shutdown injections from sequence file, and simultaneously assigns 935 polariy to each sample based on presence of "Pos" or "Neg" in Instrument Method column. 936 937 This function is called upon starting a new QC job. 938 939 TODO: Adapt this function for other instrument vendors. 940 TODO: Check the method filename, not entire file path, for "Pos" and "Neg". 941 A folder containing "Pos" or "Neg" will give incorrect polarity assignments. 942 943 Args: 944 sequence (str): 945 The acquisition sequence file, encoded as a JSON string in "split" format 946 vendor (str): 947 The instrument vendor (see to-do statements) 948 949 Returns: 950 DataFrame of acquisition sequence, with preblanks / washes / shutdowns filtered out and polarities assigned 951 """ 952 953 df_sequence = pd.read_json(sequence, orient="split") 954 955 # Filter out preblanks 956 df_sequence = df_sequence.loc[ 957 ~((df_sequence["File Name"].str.contains(r"_BK_", na=False)) & 958 (df_sequence["File Name"].str.contains(r"_pre_", na=False)))] 959 960 # Filter out wash and shutdown 961 df_sequence = df_sequence.loc[ 962 ~(df_sequence["File Name"].str.contains(r"_wash_", na=False)) & 963 ~(df_sequence["File Name"].str.contains(r"shutdown", na=False))] 964 965 # Derive polarity from instrument method filename 966 df_sequence.loc[df_sequence["Instrument Method"].str.contains(r"Pos", na=False), "Polarity"] = "Pos" 967 df_sequence.loc[df_sequence["Instrument Method"].str.contains(r"Neg", na=False), "Polarity"] = "Neg" 968 969 return df_sequence
Filters preblanks, washes, and shutdown injections from sequence file, and simultaneously assigns polariy to each sample based on presence of "Pos" or "Neg" in Instrument Method column.
This function is called upon starting a new QC job.
TODO: Adapt this function for other instrument vendors. TODO: Check the method filename, not entire file path, for "Pos" and "Neg". A folder containing "Pos" or "Neg" will give incorrect polarity assignments.
Arguments:
- sequence (str): The acquisition sequence file, encoded as a JSON string in "split" format
- vendor (str): The instrument vendor (see to-do statements)
Returns:
DataFrame of acquisition sequence, with preblanks / washes / shutdowns filtered out and polarities assigned
972def get_polarity_for_sample(instrument_id, run_id, sample_id, status): 973 974 """ 975 Returns polarity for a given sample. 976 977 TODO: Loading hundreds of rows of data before querying for one sample is massively inefficient. 978 This function was written in haste and can be easily implemented in a much better way. 979 980 Args: 981 instrument_id (str): Instrument ID 982 run_id (str): Instrument run ID (job ID) 983 sample_id (str): Sample ID 984 status (str): Job status 985 986 Returns: 987 Polarity for the given sample, as either "Pos" or "Neg". 988 """ 989 990 if get_device_identity() != instrument_id and sync_is_enabled(): 991 if status == "Complete": 992 df = get_samples_in_run(instrument_id, run_id, "Both") 993 elif status == "Active": 994 df = get_samples_from_csv(instrument_id, run_id, "Both") 995 else: 996 df = get_samples_in_run(instrument_id, run_id, "Both") 997 998 try: 999 polarity = df.loc[df["sample_id"] == sample_id]["polarity"].astype(str).values[0] 1000 except: 1001 print("Could not find polarity for sample in database.") 1002 polarity = "Neg" if "Neg" in sample_id else "Pos" 1003 1004 return polarity
Returns polarity for a given sample.
TODO: Loading hundreds of rows of data before querying for one sample is massively inefficient. This function was written in haste and can be easily implemented in a much better way.
Arguments:
- instrument_id (str): Instrument ID
- run_id (str): Instrument run ID (job ID)
- sample_id (str): Sample ID
- status (str): Job status
Returns:
Polarity for the given sample, as either "Pos" or "Neg".
1007def insert_new_run(run_id, instrument_id, chromatography, bio_standards, path, sequence, metadata, qc_config_id, job_type): 1008 1009 """ 1010 Initializes sample records in database for a new QC job. 1011 1012 Performs the following functions: 1013 1. Inserts a record for the new instrument run into the "runs" table 1014 2. Inserts sample rows into the "sample_qc_results" table 1015 3. Inserts biological standard sample rows into the "bio_qc_results" table 1016 1017 Args: 1018 run_id (str): 1019 Instrument run ID (job ID) 1020 instrument_id (str): 1021 Instrument ID 1022 chromatography (str): 1023 Chromatography method 1024 bio_standards (str): 1025 Biological standards 1026 path (str): 1027 Data acquisition path 1028 sequence (str): 1029 Acquisition sequence table, as JSON string in "records" format 1030 metadata (str): 1031 Sample metadata table, as JSON string in "records" format 1032 qc_config_id (str): 1033 Name of QC configuration 1034 job_type (str): 1035 Either "completed" or "active" 1036 1037 Returns: 1038 None 1039 """ 1040 1041 # Get list of samples from sequence 1042 df_sequence = get_filenames_from_sequence(sequence) 1043 1044 samples = df_sequence["File Name"].astype(str).tolist() 1045 polarities = df_sequence["Polarity"].astype(str).tolist() 1046 positions = df_sequence["Position"].astype(str).tolist() 1047 1048 num_samples = len(samples) 1049 1050 # Connect to database 1051 db_metadata, connection = connect_to_database(instrument_id) 1052 1053 # Get relevant tables 1054 runs_table = sa.Table("runs", db_metadata, autoload=True) 1055 sample_qc_results_table = sa.Table("sample_qc_results", db_metadata, autoload=True) 1056 bio_qc_results_table = sa.Table("bio_qc_results", db_metadata, autoload=True) 1057 1058 # Get identifiers for biological standard (if any) 1059 identifiers = get_biological_standard_identifiers(bio_standards) 1060 1061 # Prepare insert of user-inputted run data 1062 insert_run = runs_table.insert().values( 1063 {"run_id": run_id, 1064 "chromatography": chromatography, 1065 "acquisition_path": path, 1066 "sequence": sequence, 1067 "metadata": metadata, 1068 "status": "Active", 1069 "samples": num_samples, 1070 "completed": 0, 1071 "passes": 0, 1072 "fails": 0, 1073 "qc_config_id": qc_config_id, 1074 "biological_standards": str(bio_standards), 1075 "job_type": job_type}) 1076 1077 insert_samples = [] 1078 1079 for index, sample in enumerate(samples): 1080 # Check if the biological standard identifier is in the sample name 1081 is_bio_standard = False 1082 1083 for identifier in identifiers.keys(): 1084 if identifier in sample: 1085 is_bio_standard = True 1086 break 1087 1088 # Prepare insert of the sample row into the "sample_qc_results" table 1089 if not is_bio_standard: 1090 insert_sample = sample_qc_results_table.insert().values( 1091 {"sample_id": sample, 1092 "run_id": run_id, 1093 "polarity": polarities[index], 1094 "position": positions[index]}) 1095 1096 # Prepare insert of the sample row into the "bio_qc_results" table 1097 else: 1098 insert_sample = bio_qc_results_table.insert().values( 1099 {"sample_id": sample, 1100 "run_id": run_id, 1101 "polarity": polarities[index], 1102 "biological_standard": identifiers[identifier], 1103 "position": positions[index]}) 1104 1105 # Add this INSERT query into the list of insert queries 1106 insert_samples.append(insert_sample) 1107 1108 # Execute INSERT to database 1109 connection.execute(insert_run) 1110 1111 for insert_sample in insert_samples: 1112 connection.execute(insert_sample) 1113 1114 # Close the connection 1115 connection.close()
Initializes sample records in database for a new QC job.
Performs the following functions:
- Inserts a record for the new instrument run into the "runs" table
- Inserts sample rows into the "sample_qc_results" table
- Inserts biological standard sample rows into the "bio_qc_results" table
Arguments:
- run_id (str): Instrument run ID (job ID)
- instrument_id (str): Instrument ID
- chromatography (str): Chromatography method
- bio_standards (str): Biological standards
- path (str): Data acquisition path
- sequence (str): Acquisition sequence table, as JSON string in "records" format
- metadata (str): Sample metadata table, as JSON string in "records" format
- qc_config_id (str): Name of QC configuration
- job_type (str): Either "completed" or "active"
Returns:
None
1118def get_instrument_run(instrument_id, run_id): 1119 1120 """ 1121 Returns DataFrame of given instrument run from "runs" table. 1122 1123 Args: 1124 instrument_id (str): Instrument ID 1125 run_id (str): Run ID 1126 1127 Returns: 1128 DataFrame containing record for instrument run 1129 """ 1130 1131 database = get_database_file(instrument_id=instrument_id, sqlite_conn=True) 1132 engine = sa.create_engine(database) 1133 query = "SELECT * FROM runs WHERE run_id = '" + run_id + "'" 1134 df_instrument_run = pd.read_sql(query, engine) 1135 return df_instrument_run
Returns DataFrame of given instrument run from "runs" table.
Arguments:
- instrument_id (str): Instrument ID
- run_id (str): Run ID
Returns:
DataFrame containing record for instrument run
1138def get_instrument_run_from_csv(instrument_id, run_id): 1139 1140 """ 1141 Returns DataFrame of selected instrument run from CSV files during active instrument runs. 1142 1143 This function is called when a user views an active instrument run from an external device 1144 (to prevent downloading / uploading the database file with each sample acquisition). 1145 1146 Args: 1147 instrument_id (str): Instrument ID 1148 run_id (str): Run ID 1149 1150 Returns: 1151 DataFrame containing record for instrument run 1152 """ 1153 1154 id = instrument_id.replace(" ", "_") + "_" + run_id 1155 run_csv_file = os.path.join(data_directory, id, "csv", "run.csv") 1156 return pd.read_csv(run_csv_file, index_col=False)
Returns DataFrame of selected instrument run from CSV files during active instrument runs.
This function is called when a user views an active instrument run from an external device (to prevent downloading / uploading the database file with each sample acquisition).
Arguments:
- instrument_id (str): Instrument ID
- run_id (str): Run ID
Returns:
DataFrame containing record for instrument run
1159def get_instrument_runs(instrument_id, as_list=False): 1160 1161 """ 1162 Returns DataFrame of all runs on a given instrument from "runs" table 1163 1164 Args: 1165 instrument_id (str): 1166 Instrument ID 1167 as_list (str, default False): 1168 If True, returns only a list of names of instrument runs (jobs) 1169 1170 Returns: 1171 DataFrame containing records for instrument runs (QC jobs) for the given instrument 1172 """ 1173 1174 database = get_database_file(instrument_id, sqlite_conn=True) 1175 engine = sa.create_engine(database) 1176 df = pd.read_sql("SELECT * FROM runs", engine) 1177 1178 if as_list: 1179 return df["run_id"].astype(str).tolist() 1180 else: 1181 return df
Returns DataFrame of all runs on a given instrument from "runs" table
Arguments:
- instrument_id (str): Instrument ID
- as_list (str, default False): If True, returns only a list of names of instrument runs (jobs)
Returns:
DataFrame containing records for instrument runs (QC jobs) for the given instrument
1184def delete_instrument_run(instrument_id, run_id): 1185 1186 """ 1187 Deletes all records for an instrument run (QC job) from the database. 1188 1189 Args: 1190 instrument_id (str): Instrument ID 1191 run_id (str): Run ID 1192 1193 Returns: 1194 None 1195 """ 1196 1197 # Connect to database 1198 db_metadata, connection = connect_to_database(instrument_id) 1199 1200 # Get relevant tables 1201 runs_table = sa.Table("runs", db_metadata, autoload=True) 1202 sample_qc_results_table = sa.Table("sample_qc_results", db_metadata, autoload=True) 1203 bio_qc_results_table = sa.Table("bio_qc_results", db_metadata, autoload=True) 1204 1205 # Delete from each table 1206 for table in [runs_table, sample_qc_results_table, bio_qc_results_table]: 1207 connection.execute(( 1208 sa.delete(table).where(table.c.run_id == run_id) 1209 )) 1210 1211 # Close the connection 1212 connection.close()
Deletes all records for an instrument run (QC job) from the database.
Arguments:
- instrument_id (str): Instrument ID
- run_id (str): Run ID
Returns:
None
1215def get_acquisition_path(instrument_id, run_id): 1216 1217 """ 1218 Retrieves acquisition path for a given instrument run. 1219 1220 Args: 1221 instrument_id (str): Instrument ID 1222 run_id (str): Run ID 1223 1224 Returns: 1225 Acquisition path for the given instrument run 1226 """ 1227 1228 return get_instrument_run(instrument_id, run_id)["acquisition_path"].astype(str).tolist()[0]
Retrieves acquisition path for a given instrument run.
Arguments:
- instrument_id (str): Instrument ID
- run_id (str): Run ID
Returns:
Acquisition path for the given instrument run
1231def get_md5(instrument_id, sample_id): 1232 1233 """ 1234 Returns MD5 checksum for a data file in "sample_qc_results" table. 1235 1236 Used for comparing MD5 checksums during active instrument runs. 1237 1238 TODO: This function will return incorrect results if two different instrument runs 1239 have samples with the same sample ID. It needs to include "run_id" in the SQL query. 1240 1241 Args: 1242 instrument_id (str): Instrument ID 1243 sample_id (str): Sample ID 1244 1245 Returns: 1246 MD5 checksum stored for the data file. 1247 """ 1248 1249 # Connect to database 1250 database = get_database_file(instrument_id, sqlite_conn=True) 1251 engine = sa.create_engine(database) 1252 1253 # Check if sample is a biological standard 1254 table = "sample_qc_results" 1255 1256 for identifier in get_biological_standard_identifiers().keys(): 1257 if identifier in sample_id: 1258 table = "bio_qc_results" 1259 break 1260 1261 # Get sample from correct table 1262 df_sample_qc_results = pd.read_sql( 1263 "SELECT * FROM " + table + " WHERE sample_id = '" + sample_id + "'", engine) 1264 1265 return df_sample_qc_results["md5"].astype(str).values[0]
Returns MD5 checksum for a data file in "sample_qc_results" table.
Used for comparing MD5 checksums during active instrument runs.
TODO: This function will return incorrect results if two different instrument runs have samples with the same sample ID. It needs to include "run_id" in the SQL query.
Arguments:
- instrument_id (str): Instrument ID
- sample_id (str): Sample ID
Returns:
MD5 checksum stored for the data file.
1268def update_md5_checksum(instrument_id, sample_id, md5_checksum): 1269 1270 """ 1271 Updates MD5 checksum for a data file during sample acquisition. 1272 1273 TODO: This function will return incorrect results if two different instrument runs 1274 have samples with the same sample ID. It needs to include "run_id" in the SQL query. 1275 1276 Args: 1277 instrument_id (str): 1278 Instrument ID 1279 sample_id (str): 1280 Sample ID (filename) of data file 1281 md5_checksum (str): 1282 MD5 checksum for the sample data file 1283 1284 Returns: 1285 None 1286 """ 1287 1288 # Connect to database 1289 db_metadata, connection = connect_to_database(instrument_id) 1290 1291 # Check if sample is a biological standard and get relevant table 1292 qc_results_table = sa.Table("sample_qc_results", db_metadata, autoload=True) 1293 1294 for identifier in get_biological_standard_identifiers().keys(): 1295 if identifier in sample_id: 1296 qc_results_table = sa.Table("bio_qc_results", db_metadata, autoload=True) 1297 break 1298 1299 # Prepare update of MD5 checksum at sample row 1300 update_md5 = ( 1301 sa.update(qc_results_table) 1302 .where(qc_results_table.c.sample_id == sample_id) 1303 .values(md5=md5_checksum) 1304 ) 1305 1306 # Execute UPDATE into database, then close the connection 1307 connection.execute(update_md5) 1308 connection.close()
Updates MD5 checksum for a data file during sample acquisition.
TODO: This function will return incorrect results if two different instrument runs have samples with the same sample ID. It needs to include "run_id" in the SQL query.
Arguments:
- instrument_id (str): Instrument ID
- sample_id (str): Sample ID (filename) of data file
- md5_checksum (str): MD5 checksum for the sample data file
Returns:
None
1311def write_qc_results(sample_id, instrument_id, run_id, json_mz, json_rt, json_intensity, qc_dataframe, qc_result, is_bio_standard): 1312 1313 """ 1314 Writes QC results (as dictionary records) to sample record upon MS-DIAL processing completion. 1315 1316 QC results consist of m/z, RT, and intensity data for internal standards (or targeted metabolites in biological standards), 1317 as well as a DataFrame containing delta m/z, delta RT, in-run delta RT, warnings, and fails (qc_dataframe) and overall QC result 1318 (which will be "Pass" or "Fail"). 1319 1320 The data is encoded as dictionary in "records" format: [{'col1': 1, 'col2': 0.5}, {'col1': 2, 'col2': 0.75}]. 1321 This dictionary is cast to a string before being passed to this function. 1322 1323 TODO: Update names of arguments from json_x to record_x, as the data is no longer encoded as JSON strings. 1324 The data is now encoded in "records" format as a string. 1325 1326 Args: 1327 sample_id (str): 1328 Sample ID 1329 instrument_id (str): 1330 Instrument ID 1331 run_id (str): 1332 Instrument run ID (Job ID) 1333 json_mz (str): 1334 String dict of internal standard m/z data in "records" format 1335 json_rt (str): 1336 String dict of internal standard RT data in "records" format 1337 json_intensity (str): 1338 String dict of internal standard intensity data in "records" format 1339 qc_dataframe (str): 1340 String dict of various QC data in "records" format 1341 qc_result (str): 1342 QC result for sample, either "Pass" or "Fail" 1343 is_bio_standard (bool): 1344 Whether the sample is a biological standard 1345 1346 Returns: 1347 None 1348 """ 1349 1350 # Connect to database 1351 db_metadata, connection = connect_to_database(instrument_id) 1352 1353 # Get "sample_qc_results" or "bio_qc_results" table 1354 if not is_bio_standard: 1355 qc_results_table = sa.Table("sample_qc_results", db_metadata, autoload=True) 1356 else: 1357 qc_results_table = sa.Table("bio_qc_results", db_metadata, autoload=True) 1358 1359 # Prepare update (insert) of QC results to correct sample row 1360 update_qc_results = ( 1361 sa.update(qc_results_table) 1362 .where((qc_results_table.c.sample_id == sample_id) 1363 & (qc_results_table.c.run_id == run_id)) 1364 .values(precursor_mz=json_mz, 1365 retention_time=json_rt, 1366 intensity=json_intensity, 1367 qc_dataframe=qc_dataframe, 1368 qc_result=qc_result) 1369 ) 1370 1371 # Execute UPDATE into database, then close the connection 1372 connection.execute(update_qc_results) 1373 connection.close()
Writes QC results (as dictionary records) to sample record upon MS-DIAL processing completion.
QC results consist of m/z, RT, and intensity data for internal standards (or targeted metabolites in biological standards), as well as a DataFrame containing delta m/z, delta RT, in-run delta RT, warnings, and fails (qc_dataframe) and overall QC result (which will be "Pass" or "Fail").
The data is encoded as dictionary in "records" format: [{'col1': 1, 'col2': 0.5}, {'col1': 2, 'col2': 0.75}]. This dictionary is cast to a string before being passed to this function.
TODO: Update names of arguments from json_x to record_x, as the data is no longer encoded as JSON strings. The data is now encoded in "records" format as a string.
Arguments:
- sample_id (str): Sample ID
- instrument_id (str): Instrument ID
- run_id (str): Instrument run ID (Job ID)
- json_mz (str): String dict of internal standard m/z data in "records" format
- json_rt (str): String dict of internal standard RT data in "records" format
- json_intensity (str): String dict of internal standard intensity data in "records" format
- qc_dataframe (str): String dict of various QC data in "records" format
- qc_result (str): QC result for sample, either "Pass" or "Fail"
- is_bio_standard (bool): Whether the sample is a biological standard
Returns:
None
1376def get_chromatography_methods(): 1377 1378 """ 1379 Returns DataFrame of chromatography methods from the Settings database. 1380 """ 1381 1382 engine = sa.create_engine(settings_database) 1383 df_methods = pd.read_sql("SELECT * FROM chromatography_methods", engine) 1384 return df_methods
Returns DataFrame of chromatography methods from the Settings database.
1387def get_chromatography_methods_list(): 1388 1389 """ 1390 Returns list of chromatography method ID's from the Settings database. 1391 """ 1392 1393 df_methods = get_chromatography_methods() 1394 return df_methods["method_id"].astype(str).tolist()
Returns list of chromatography method ID's from the Settings database.
1397def insert_chromatography_method(method_id): 1398 1399 """ 1400 Inserts new chromatography method in the "chromatography_methods" table of the Settings database. 1401 1402 Args: 1403 method_id (str): Name of the chromatography method 1404 1405 Returns: 1406 None 1407 """ 1408 1409 # Connect to database 1410 db_metadata, connection = connect_to_database("Settings") 1411 1412 # Get "chromatography_methods" table and "biological_standards" table 1413 chromatography_table = sa.Table("chromatography_methods", db_metadata, autoload=True) 1414 biological_standards_table = sa.Table("biological_standards", db_metadata, autoload=True) 1415 1416 # Execute insert of chromatography method 1417 insert_method = chromatography_table.insert().values( 1418 {"method_id": method_id, 1419 "num_pos_standards": 0, 1420 "num_neg_standards": 0, 1421 "pos_istd_msp_file": "", 1422 "neg_istd_msp_file": "", 1423 "pos_parameter_file": "", 1424 "neg_parameter_file": "", 1425 "msdial_config_id": "Default"}) 1426 1427 connection.execute(insert_method) 1428 1429 # Execute insert of method for each biological standard 1430 df_biological_standards = get_biological_standards() 1431 biological_standards = df_biological_standards["name"].astype(str).unique().tolist() 1432 identifiers = df_biological_standards["identifier"].astype(str).tolist() 1433 1434 for index, biological_standard in enumerate(biological_standards): 1435 insert_method_for_bio_standard = biological_standards_table.insert().values({ 1436 "name": biological_standard, 1437 "identifier": identifiers[index], 1438 "chromatography": method_id, 1439 "num_pos_features": 0, 1440 "num_neg_features": 0, 1441 "msdial_config_id": "Default"}) 1442 connection.execute(insert_method_for_bio_standard) 1443 1444 # Execute INSERT to database, then close the connection 1445 connection.close()
Inserts new chromatography method in the "chromatography_methods" table of the Settings database.
Arguments:
- method_id (str): Name of the chromatography method
Returns:
None
1448def remove_chromatography_method(method_id): 1449 1450 """ 1451 Deletes chromatography method and all associated records from the Settings database. 1452 1453 Details: 1454 1. Removes chromatography method in "chromatography_methods" table 1455 2. Removes method from "biological_standards" table 1456 3. Removes associated internal standards from "internal_standards" table 1457 4. Removes associated targeted features from "targeted_features" table 1458 5. Deletes corresponding MSPs from folders 1459 6. Deletes corresponding MSPs from Google Drive (if sync is enabled) 1460 1461 Args: 1462 method_id (str): Name of the chromatography method 1463 1464 Returns: 1465 None 1466 """ 1467 1468 # Delete corresponding MSPs from "methods" directory 1469 df = get_table("Settings", "chromatography_methods") 1470 df = df.loc[df["method_id"] == method_id] 1471 1472 df2 = get_table("Settings", "biological_standards") 1473 df2 = df2.loc[df2["chromatography"] == method_id] 1474 1475 files_to_delete = df["pos_istd_msp_file"].astype(str).tolist() + df["neg_istd_msp_file"].astype(str).tolist() + \ 1476 df2["pos_bio_msp_file"].astype(str).tolist() + df2["neg_bio_msp_file"].astype(str).tolist() 1477 1478 for file in os.listdir(methods_directory): 1479 if file in files_to_delete: 1480 os.remove(os.path.join(methods_directory, file)) 1481 1482 # Connect to database and get relevant tables 1483 db_metadata, connection = connect_to_database("Settings") 1484 chromatography_table = sa.Table("chromatography_methods", db_metadata, autoload=True) 1485 biological_standards_table = sa.Table("biological_standards", db_metadata, autoload=True) 1486 internal_standards_table = sa.Table("internal_standards", db_metadata, autoload=True) 1487 targeted_features_table = sa.Table("targeted_features", db_metadata, autoload=True) 1488 1489 delete_queries = [] 1490 1491 # Remove from "chromatography_methods" table 1492 delete_chromatography_method = ( 1493 sa.delete(chromatography_table) 1494 .where((chromatography_table.c.method_id == method_id)) 1495 ) 1496 1497 delete_queries.append(delete_chromatography_method) 1498 1499 # Remove all entries in other tables associated with chromatography 1500 for table in [biological_standards_table, internal_standards_table, targeted_features_table]: 1501 delete_from_table = ( 1502 sa.delete(table) 1503 .where((table.c.chromatography == method_id)) 1504 ) 1505 delete_queries.append(delete_from_table) 1506 1507 # Execute all deletes, then close the connection 1508 for query in delete_queries: 1509 connection.execute(query) 1510 1511 connection.close()
Deletes chromatography method and all associated records from the Settings database.
Details:
- Removes chromatography method in "chromatography_methods" table
- Removes method from "biological_standards" table
- Removes associated internal standards from "internal_standards" table
- Removes associated targeted features from "targeted_features" table
- Deletes corresponding MSPs from folders
- Deletes corresponding MSPs from Google Drive (if sync is enabled)
Arguments:
- method_id (str): Name of the chromatography method
Returns:
None
1514def update_msdial_config_for_internal_standards(chromatography, config_id): 1515 1516 """ 1517 Updates MS-DIAL configuration for a given chromatography method. 1518 1519 This MS-DIAL configuration will be used to generate a parameters file 1520 for processing samples run with this chromatography method. 1521 1522 Args: 1523 chromatography (str): 1524 Chromatography method ID (name) 1525 config_id (str): 1526 MS-DIAL configuration ID (name) 1527 1528 Returns: 1529 None 1530 """ 1531 1532 # Connect to database and get relevant tables 1533 db_metadata, connection = connect_to_database("Settings") 1534 methods_table = sa.Table("chromatography_methods", db_metadata, autoload=True) 1535 1536 # Update MS-DIAL configuration for chromatography method 1537 update_msdial_config = ( 1538 sa.update(methods_table) 1539 .where(methods_table.c.method_id == chromatography) 1540 .values(msdial_config_id=config_id) 1541 ) 1542 1543 connection.execute(update_msdial_config) 1544 connection.close()
Updates MS-DIAL configuration for a given chromatography method.
This MS-DIAL configuration will be used to generate a parameters file for processing samples run with this chromatography method.
Arguments:
- chromatography (str): Chromatography method ID (name)
- config_id (str): MS-DIAL configuration ID (name)
Returns:
None
1547def add_msp_to_database(msp_file, chromatography, polarity, bio_standard=None): 1548 1549 """ 1550 Parses compounds from MSP into the Settings database. 1551 1552 This function writes features from an MSP file into the "internal_standards" or "targeted_features" table, 1553 and inserts location of pos/neg MSP files into "chromatography_methods" table. 1554 1555 TODO: The MSP/TXT libraries have standardized names; there is no need to store the filename in the database. 1556 1557 Args: 1558 msp_file (io.StringIO): 1559 In-memory text-stream file object 1560 chromatography (str): 1561 Chromatography method ID (name) 1562 polarity (str): 1563 Polarity for which MSP should be used for ("Positive Mode" or "Negative Mode") 1564 bio_standard (str, default None): 1565 Parses MSP and applies to biological standard within a chromatography-polarity combination 1566 1567 Returns: 1568 None 1569 """ 1570 1571 # Connect to database 1572 db_metadata, connection = connect_to_database("Settings") 1573 1574 # Write MSP file to folder, store file path in database (further down in function) 1575 if not os.path.exists(methods_directory): 1576 os.makedirs(methods_directory) 1577 1578 if bio_standard is not None: 1579 if polarity == "Positive Mode": 1580 filename = bio_standard.replace(" ", "_") + "_" + chromatography + "_Pos.msp" 1581 elif polarity == "Negative Mode": 1582 filename = bio_standard.replace(" ", "_") + "_" + chromatography + "_Neg.msp" 1583 else: 1584 if polarity == "Positive Mode": 1585 filename = chromatography + "_Pos.msp" 1586 elif polarity == "Negative Mode": 1587 filename = chromatography + "_Neg.msp" 1588 1589 msp_file_path = os.path.join(methods_directory, filename) 1590 1591 with open(msp_file_path, "w") as file: 1592 msp_file.seek(0) 1593 shutil.copyfileobj(msp_file, file) 1594 1595 # Read MSP file 1596 with open(msp_file_path, "r") as msp: 1597 1598 list_of_features = [] 1599 1600 # Split MSP into list of compounds 1601 data = msp.read().split("\n\n") 1602 data = [element.split("\n") for element in data] 1603 1604 # Add each line of each compound into a list 1605 for feature in data: 1606 if len(feature) != 1: 1607 list_of_features.append(feature) 1608 1609 features_dict = {} 1610 added_features = [] 1611 1612 # Iterate through features in MSP 1613 for feature_index, feature in enumerate(list_of_features): 1614 1615 features_dict[feature_index] = { 1616 "Name": None, 1617 "Precursor m/z": None, 1618 "Retention time": None, 1619 "INCHIKEY": None, 1620 "MS2 spectrum": None 1621 } 1622 1623 # Iterate through each line of each feature in the MSP 1624 for data_index, feature_data in enumerate(feature): 1625 1626 # Capture, name, inchikey, m/z, and RT 1627 if "NAME" in feature_data.upper(): 1628 feature_name = feature_data.split(": ")[-1] 1629 if feature_name not in added_features: 1630 added_features.append(feature_name) 1631 features_dict[feature_index]["Name"] = feature_name 1632 continue 1633 else: 1634 break 1635 elif "PRECURSORMZ" in feature_data.upper(): 1636 features_dict[feature_index]["Precursor m/z"] = feature_data.split(": ")[-1] 1637 continue 1638 elif "INCHIKEY" in feature_data.upper(): 1639 features_dict[feature_index]["INCHIKEY"] = feature_data.split(": ")[-1] 1640 continue 1641 elif "RETENTIONTIME" in feature_data.upper(): 1642 features_dict[feature_index]["Retention time"] = feature_data.split(": ")[-1] 1643 continue 1644 1645 # Capture MS2 spectrum 1646 elif "Num Peaks" in feature_data: 1647 1648 # Get number of peaks in MS2 spectrum 1649 num_peaks = int(feature_data.split(": ")[-1]) 1650 1651 # Each line in the MSP corresponds to a peak 1652 start_index = data_index + 1 1653 end_index = data_index + num_peaks + 1 1654 1655 # Each peak is represented as a string e.g. "56.04977\t247187" 1656 peaks_in_spectrum = [] 1657 for peak in feature[start_index:end_index]: 1658 peaks_in_spectrum.append(peak.replace("\t", ":")) 1659 1660 features_dict[feature_index]["MS2 spectrum"] = str(peaks_in_spectrum) 1661 break 1662 1663 features_dict = { key:value for key, value in features_dict.items() if value["Name"] is not None } 1664 1665 # Adding MSP for biological standards 1666 if bio_standard is not None: 1667 1668 # Get "targeted_features" table 1669 targeted_features_table = sa.Table("targeted_features", db_metadata, autoload=True) 1670 1671 # Prepare DELETE of old targeted features 1672 delete_old_targeted_features = ( 1673 sa.delete(targeted_features_table) 1674 .where((targeted_features_table.c.chromatography == chromatography) 1675 & (targeted_features_table.c.polarity == polarity) 1676 & (targeted_features_table.c.biological_standard == bio_standard)) 1677 ) 1678 1679 # Execute DELETE 1680 connection.execute(delete_old_targeted_features) 1681 1682 # Execute INSERT of each targeted feature into targeted_features table 1683 for feature in features_dict: 1684 insert_feature = targeted_features_table.insert().values( 1685 {"name": features_dict[feature]["Name"], 1686 "chromatography": chromatography, 1687 "polarity": polarity, 1688 "biological_standard": bio_standard, 1689 "precursor_mz": features_dict[feature]["Precursor m/z"], 1690 "retention_time": features_dict[feature]["Retention time"], 1691 "ms2_spectrum": features_dict[feature]["MS2 spectrum"], 1692 "inchikey": features_dict[feature]["INCHIKEY"]}) 1693 connection.execute(insert_feature) 1694 1695 # Get "biological_standards" table 1696 biological_standards_table = sa.Table("biological_standards", db_metadata, autoload=True) 1697 1698 # Write location of msp file to respective cell 1699 if polarity == "Positive Mode": 1700 update_msp_file = ( 1701 sa.update(biological_standards_table) 1702 .where((biological_standards_table.c.chromatography == chromatography) 1703 & (biological_standards_table.c.name == bio_standard)) 1704 .values(num_pos_features=len(features_dict), 1705 pos_bio_msp_file=filename) 1706 ) 1707 elif polarity == "Negative Mode": 1708 update_msp_file = ( 1709 sa.update(biological_standards_table) 1710 .where((biological_standards_table.c.chromatography == chromatography) 1711 & (biological_standards_table.c.name == bio_standard)) 1712 .values(num_neg_features=len(features_dict), 1713 neg_bio_msp_file=filename) 1714 ) 1715 1716 # Execute UPDATE of MSP file location 1717 connection.execute(update_msp_file) 1718 1719 # Adding MSP for internal standards 1720 else: 1721 1722 # Get internal_standards table 1723 internal_standards_table = sa.Table("internal_standards", db_metadata, autoload=True) 1724 1725 # Prepare DELETE of old internal standards 1726 delete_old_internal_standards = ( 1727 sa.delete(internal_standards_table) 1728 .where((internal_standards_table.c.chromatography == chromatography) 1729 & (internal_standards_table.c.polarity == polarity)) 1730 ) 1731 1732 # Execute DELETE 1733 connection.execute(delete_old_internal_standards) 1734 1735 # Execute INSERT of each internal standard into internal_standards table 1736 for feature in features_dict: 1737 insert_feature = internal_standards_table.insert().values( 1738 {"name": features_dict[feature]["Name"], 1739 "chromatography": chromatography, 1740 "polarity": polarity, 1741 "precursor_mz": features_dict[feature]["Precursor m/z"], 1742 "retention_time": features_dict[feature]["Retention time"], 1743 "ms2_spectrum": features_dict[feature]["MS2 spectrum"], 1744 "inchikey": features_dict[feature]["INCHIKEY"]}) 1745 connection.execute(insert_feature) 1746 1747 # Get "chromatography" table 1748 chromatography_table = sa.Table("chromatography_methods", db_metadata, autoload=True) 1749 1750 # Write location of msp file to respective cell 1751 if polarity == "Positive Mode": 1752 update_msp_file = ( 1753 sa.update(chromatography_table) 1754 .where(chromatography_table.c.method_id == chromatography) 1755 .values(num_pos_standards=len(features_dict), 1756 pos_istd_msp_file=filename) 1757 ) 1758 elif polarity == "Negative Mode": 1759 update_msp_file = ( 1760 sa.update(chromatography_table) 1761 .where(chromatography_table.c.method_id == chromatography) 1762 .values(num_neg_standards=len(features_dict), 1763 neg_istd_msp_file=filename) 1764 ) 1765 1766 # Execute UPDATE of MSP file location 1767 connection.execute(update_msp_file) 1768 1769 # If the corresponding TXT library existed, delete it 1770 txt_library = os.path.join(methods_directory, filename.replace(".msp", ".txt")) 1771 os.remove(txt_library) if os.path.exists(txt_library) else None 1772 1773 # Close the connection 1774 connection.close()
Parses compounds from MSP into the Settings database.
This function writes features from an MSP file into the "internal_standards" or "targeted_features" table, and inserts location of pos/neg MSP files into "chromatography_methods" table.
TODO: The MSP/TXT libraries have standardized names; there is no need to store the filename in the database.
Arguments:
- msp_file (io.StringIO): In-memory text-stream file object
- chromatography (str): Chromatography method ID (name)
- polarity (str): Polarity for which MSP should be used for ("Positive Mode" or "Negative Mode")
- bio_standard (str, default None): Parses MSP and applies to biological standard within a chromatography-polarity combination
Returns:
None
1777def add_csv_to_database(csv_file, chromatography, polarity): 1778 1779 """ 1780 Parses compounds from a CSV file into the Settings database. 1781 1782 Parses compounds from a CSV into the "internal_standards" table, and stores 1783 the location of the pos/neg TXT files in "chromatography_methods" table. 1784 1785 TODO: The MSP/TXT libraries have standardized names; there is no need to store the filename in the database. 1786 1787 Args: 1788 csv_file (io.StringIO): 1789 In-memory text-stream file object 1790 chromatography (str): 1791 Chromatography method ID (name) 1792 polarity (str): 1793 Polarity for which MSP should be used for ("Positive Mode" or "Negative Mode") 1794 1795 Returns: 1796 None 1797 """ 1798 1799 # Convert CSV file into Python dictionary 1800 df_internal_standards = pd.read_csv(csv_file, index_col=False) 1801 internal_standards_dict = df_internal_standards.to_dict("index") 1802 1803 # Create methods directory if it doesn't already exist 1804 if not os.path.exists(methods_directory): 1805 os.makedirs(methods_directory) 1806 1807 # Name file accordingly 1808 if polarity == "Positive Mode": 1809 filename = chromatography + "_Pos.txt" 1810 elif polarity == "Negative Mode": 1811 filename = chromatography + "_Neg.txt" 1812 1813 txt_file_path = os.path.join(methods_directory, filename) 1814 1815 # Write CSV columns to tab-delimited text file 1816 df_internal_standards.to_csv(txt_file_path, sep="\t", index=False) 1817 1818 # Connect to database 1819 db_metadata, connection = connect_to_database("Settings") 1820 1821 # Get internal_standards table 1822 internal_standards_table = sa.Table("internal_standards", db_metadata, autoload=True) 1823 1824 # Prepare DELETE of old internal standards 1825 delete_old_internal_standards = ( 1826 sa.delete(internal_standards_table) 1827 .where((internal_standards_table.c.chromatography == chromatography) 1828 & (internal_standards_table.c.polarity == polarity)) 1829 ) 1830 1831 # Execute DELETE 1832 connection.execute(delete_old_internal_standards) 1833 1834 # Execute INSERT of each internal standard into internal_standards table 1835 for row in internal_standards_dict.keys(): 1836 insert_standard = internal_standards_table.insert().values( 1837 {"name": internal_standards_dict[row]["Common Name"], 1838 "chromatography": chromatography, 1839 "polarity": polarity, 1840 "precursor_mz": internal_standards_dict[row]["MS1 m/z"], 1841 "retention_time": internal_standards_dict[row]["RT (min)"]}) 1842 connection.execute(insert_standard) 1843 1844 # Get "chromatography" table 1845 chromatography_table = sa.Table("chromatography_methods", db_metadata, autoload=True) 1846 1847 # Write location of CSV file to respective cell 1848 if polarity == "Positive Mode": 1849 update_msp_file = ( 1850 sa.update(chromatography_table) 1851 .where(chromatography_table.c.method_id == chromatography) 1852 .values(num_pos_standards=len(internal_standards_dict), 1853 pos_istd_msp_file=filename) 1854 ) 1855 elif polarity == "Negative Mode": 1856 update_msp_file = ( 1857 sa.update(chromatography_table) 1858 .where(chromatography_table.c.method_id == chromatography) 1859 .values(num_neg_standards=len(internal_standards_dict), 1860 neg_istd_msp_file=filename) 1861 ) 1862 1863 # Execute UPDATE of CSV file location 1864 connection.execute(update_msp_file) 1865 1866 # If the corresponding MSP library existed, delete it 1867 msp_library = os.path.join(methods_directory, filename.replace(".txt", ".msp")) 1868 os.remove(msp_library) if os.path.exists(msp_library) else None 1869 1870 # Close the connection 1871 connection.close()
Parses compounds from a CSV file into the Settings database.
Parses compounds from a CSV into the "internal_standards" table, and stores the location of the pos/neg TXT files in "chromatography_methods" table.
TODO: The MSP/TXT libraries have standardized names; there is no need to store the filename in the database.
Arguments:
- csv_file (io.StringIO): In-memory text-stream file object
- chromatography (str): Chromatography method ID (name)
- polarity (str): Polarity for which MSP should be used for ("Positive Mode" or "Negative Mode")
Returns:
None
1874def get_msdial_configurations(): 1875 1876 """ 1877 Returns list of user configurations of MS-DIAL parameters from Settings database. 1878 """ 1879 1880 engine = sa.create_engine(settings_database) 1881 df_msdial_configurations = pd.read_sql("SELECT * FROM msdial_parameters", engine) 1882 return df_msdial_configurations["config_name"].astype(str).tolist()
Returns list of user configurations of MS-DIAL parameters from Settings database.
1885def generate_msdial_parameters_file(chromatography, polarity, msp_file_path, bio_standard=None): 1886 1887 """ 1888 Uses parameters from user-curated MS-DIAL configuration to create a parameters.txt file for MS-DIAL. 1889 1890 TODO: Currently, this function is only called upon a new job setup. To allow changes during a QC job, 1891 this function should be called every time the user makes a configuration save in Settings > MS-DIAL Configurations. 1892 1893 Args: 1894 chromatography (str): 1895 Chromatography method ID (name) 1896 polarity (str): 1897 Polarity ("Positive" or "Negative") 1898 msp_file_path (str): 1899 MSP library file path 1900 bio_standard (str, default None): 1901 Specifies that the parameters file is for a biological standard 1902 1903 Returns: 1904 None 1905 """ 1906 1907 # Get parameters of selected configuration 1908 if bio_standard is not None: 1909 df_bio_standards = get_biological_standards() 1910 df_bio_standards = df_bio_standards.loc[ 1911 (df_bio_standards["chromatography"] == chromatography) & (df_bio_standards["name"] == bio_standard)] 1912 config_name = df_bio_standards["msdial_config_id"].astype(str).values[0] 1913 else: 1914 df_methods = get_chromatography_methods() 1915 df_methods = df_methods.loc[df_methods["method_id"] == chromatography] 1916 config_name = df_methods["msdial_config_id"].astype(str).values[0] 1917 1918 parameters = get_msdial_configuration_parameters(config_name) 1919 1920 # Create "methods" directory if it does not exist 1921 if not os.path.exists(methods_directory): 1922 os.makedirs(methods_directory) 1923 1924 # Name parameters file accordingly 1925 if bio_standard is not None: 1926 if polarity == "Positive": 1927 filename = bio_standard.replace(" ", "_") + "_" + config_name.replace(" ", "_") + "_Parameters_Pos.txt" 1928 elif polarity == "Negative": 1929 filename = bio_standard.replace(" ", "_") + "_" + config_name.replace(" ", "_") + "_Parameters_Neg.txt" 1930 else: 1931 if polarity == "Positive": 1932 filename = chromatography.replace(" ", "_") + "_" + config_name.replace(" ", "_") + "_Parameters_Pos.txt" 1933 elif polarity == "Negative": 1934 filename = chromatography.replace(" ", "_") + "_" + config_name.replace(" ", "_") + "_Parameters_Neg.txt" 1935 1936 parameters_file = os.path.join(methods_directory, filename) 1937 1938 # Some specifications based on polarity / file type for the parameters 1939 if polarity == "Positive": 1940 adduct_type = "[M+H]+" 1941 elif polarity == "Negative": 1942 adduct_type = "[M-H]-" 1943 1944 if msp_file_path.endswith(".msp"): 1945 filepath = "MSP file: " + msp_file_path 1946 elif msp_file_path.endswith(".txt"): 1947 filepath = "Text file: " + msp_file_path 1948 1949 # Text file contents 1950 lines = [ 1951 "#Data type", 1952 "MS1 data type: Centroid", 1953 "MS2 data type: Centroid", 1954 "Ion mode: " + polarity, 1955 "DIA file:", "\n" 1956 1957 "#Data collection parameters", 1958 "Retention time begin: " + str(parameters[0]), 1959 "Retention time end: " + str(parameters[1]), 1960 "Mass range begin: " + str(parameters[2]), 1961 "Mass range end: " + str(parameters[3]), "\n", 1962 1963 "#Centroid parameters", 1964 "MS1 tolerance for centroid: " + str(parameters[4]), 1965 "MS2 tolerance for centroid: " + str(parameters[5]), "\n", 1966 1967 "#Peak detection parameters", 1968 "Smoothing method: " + str(parameters[6]), 1969 "Smoothing level: " + str(parameters[7]), 1970 "Minimum peak width: " + str(parameters[8]), 1971 "Minimum peak height: " + str(parameters[9]), 1972 "Mass slice width: " + str(parameters[10]), "\n", 1973 1974 "#Deconvolution parameters", 1975 "Sigma window value: 0.5", 1976 "Amplitude cut off: 0", "\n", 1977 1978 "#Adduct list", 1979 "Adduct list: " + adduct_type, "\n", 1980 1981 "#Text file and post identification (retention time and accurate mass based) setting", 1982 filepath, 1983 "Retention time tolerance for post identification: " + str(parameters[11]), 1984 "Accurate ms1 tolerance for post identification: " + str(parameters[12]), 1985 "Post identification score cut off: " + str(parameters[13]), "\n", 1986 1987 "#Alignment parameters setting", 1988 "Retention time tolerance for alignment: " + str(parameters[14]), 1989 "MS1 tolerance for alignment: " + str(parameters[15]), 1990 "Retention time factor for alignment: " + str(parameters[16]), 1991 "MS1 factor for alignment: " + str(parameters[17]), 1992 "Peak count filter: " + str(parameters[18]), 1993 "QC at least filter: " + str(parameters[19]), 1994 ] 1995 1996 # Write parameters to a text file 1997 with open(parameters_file, "w") as file: 1998 for line in lines: 1999 file.write(line) 2000 if line != "\n": 2001 file.write("\n") 2002 2003 # Write path of parameters text file to chromatography method in database 2004 db_metadata, connection = connect_to_database("Settings") 2005 chromatography_table = sa.Table("chromatography_methods", db_metadata, autoload=True) 2006 biological_standards_table = sa.Table("biological_standards", db_metadata, autoload=True) 2007 2008 # For processing biological standard samples 2009 if bio_standard is not None: 2010 if polarity == "Positive": 2011 update_parameter_file = ( 2012 sa.update(biological_standards_table) 2013 .where((biological_standards_table.c.chromatography == chromatography) 2014 & (biological_standards_table.c.name == bio_standard)) 2015 .values(pos_parameter_file=parameters_file) 2016 ) 2017 elif polarity == "Negative": 2018 update_parameter_file = ( 2019 sa.update(biological_standards_table) 2020 .where((biological_standards_table.c.chromatography == chromatography) 2021 & (biological_standards_table.c.name == bio_standard)) 2022 .values(neg_parameter_file=parameters_file) 2023 ) 2024 # For processing samples with internal standards 2025 else: 2026 if polarity == "Positive": 2027 update_parameter_file = ( 2028 sa.update(chromatography_table) 2029 .where(chromatography_table.c.method_id == chromatography) 2030 .values(pos_parameter_file=parameters_file) 2031 ) 2032 elif polarity == "Negative": 2033 update_parameter_file = ( 2034 sa.update(chromatography_table) 2035 .where(chromatography_table.c.method_id == chromatography) 2036 .values(neg_parameter_file=parameters_file) 2037 ) 2038 2039 connection.execute(update_parameter_file) 2040 connection.close()
Uses parameters from user-curated MS-DIAL configuration to create a parameters.txt file for MS-DIAL.
TODO: Currently, this function is only called upon a new job setup. To allow changes during a QC job, this function should be called every time the user makes a configuration save in Settings > MS-DIAL Configurations.
Arguments:
- chromatography (str): Chromatography method ID (name)
- polarity (str): Polarity ("Positive" or "Negative")
- msp_file_path (str): MSP library file path
- bio_standard (str, default None): Specifies that the parameters file is for a biological standard
Returns:
None
2043def add_msdial_configuration(msdial_config_name): 2044 2045 """ 2046 Inserts new user configuration of MS-DIAL parameters into the "msdial_parameters" table in Settings database. 2047 2048 Args: 2049 msdial_config_name (str): MS-DIAL configuration ID 2050 2051 Returns: 2052 None 2053 """ 2054 2055 # Connect to database 2056 db_metadata, connection = connect_to_database("Settings") 2057 2058 # Get MS-DIAL parameters table 2059 msdial_parameters_table = sa.Table("msdial_parameters", db_metadata, autoload=True) 2060 2061 # Prepare insert of user-inputted run data 2062 insert_config = msdial_parameters_table.insert().values( 2063 {"config_name": msdial_config_name, 2064 "rt_begin": 0, 2065 "rt_end": 100, 2066 "mz_begin": 0, 2067 "mz_end": 2000, 2068 "ms1_centroid_tolerance": 0.008, 2069 "ms2_centroid_tolerance": 0.01, 2070 "smoothing_method": "LinearWeightedMovingAverage", 2071 "smoothing_level": 3, 2072 "min_peak_width": 3, 2073 "min_peak_height": 35000, 2074 "mass_slice_width": 0.1, 2075 "post_id_rt_tolerance": 0.3, 2076 "post_id_mz_tolerance": 0.008, 2077 "post_id_score_cutoff": 85, 2078 "alignment_rt_tolerance": 0.05, 2079 "alignment_mz_tolerance": 0.008, 2080 "alignment_rt_factor": 0.5, 2081 "alignment_mz_factor": 0.5, 2082 "peak_count_filter": 0, 2083 "qc_at_least_filter": "True"} 2084 ) 2085 2086 # Execute INSERT to database, then close the connection 2087 connection.execute(insert_config) 2088 connection.close()
Inserts new user configuration of MS-DIAL parameters into the "msdial_parameters" table in Settings database.
Arguments:
- msdial_config_name (str): MS-DIAL configuration ID
Returns:
None
2091def remove_msdial_configuration(msdial_config_name): 2092 2093 """ 2094 Deletes user configuration of MS-DIAL parameters from the "msdial_parameters" table. 2095 2096 Args: 2097 msdial_config_name (str): MS-DIAL configuration ID 2098 2099 Returns: 2100 None 2101 """ 2102 2103 # Connect to database 2104 db_metadata, connection = connect_to_database("Settings") 2105 2106 # Get MS-DIAL parameters table 2107 msdial_parameters_table = sa.Table("msdial_parameters", db_metadata, autoload=True) 2108 2109 # Prepare DELETE of MS-DIAL configuration 2110 delete_config = ( 2111 sa.delete(msdial_parameters_table) 2112 .where(msdial_parameters_table.c.config_name == msdial_config_name) 2113 ) 2114 2115 # Execute DELETE, then close the connection 2116 connection.execute(delete_config) 2117 connection.close()
Deletes user configuration of MS-DIAL parameters from the "msdial_parameters" table.
Arguments:
- msdial_config_name (str): MS-DIAL configuration ID
Returns:
None
2120def get_msdial_configuration_parameters(msdial_config_name, parameter=None): 2121 2122 """ 2123 Returns tuple of parameters defined for a selected MS-DIAL configuration. 2124 2125 TODO: The MS-DIAL configuration is returned as a tuple for a concise implementation of get_msdial_parameters_for_config() 2126 in the DashWebApp module. While convenient there, this function is not optimal for maintainability. Should return 2127 the entire DataFrame record instead. 2128 2129 See update_msdial_configuration() for details on parameters. 2130 2131 Args: 2132 msdial_config_name (str): 2133 MS-DIAL configuration ID 2134 parameter (str, default None): 2135 If specified, returns only the value for the given parameter 2136 2137 Returns: 2138 Tuple of parameters for the given MS-DIAL configuration, or single parameter value. 2139 """ 2140 2141 # Get "msdial_parameters" table from database as a DataFrame 2142 engine = sa.create_engine(settings_database) 2143 df_configurations = pd.read_sql("SELECT * FROM msdial_parameters", engine) 2144 2145 # Get selected configuration 2146 selected_config = df_configurations.loc[ 2147 df_configurations["config_name"] == msdial_config_name] 2148 2149 selected_config.drop(["id", "config_name"], inplace=True, axis=1) 2150 2151 if parameter is not None: 2152 return selected_config[parameter].values[0] 2153 else: 2154 return tuple(selected_config.to_records(index=False)[0])
Returns tuple of parameters defined for a selected MS-DIAL configuration.
TODO: The MS-DIAL configuration is returned as a tuple for a concise implementation of get_msdial_parameters_for_config() in the DashWebApp module. While convenient there, this function is not optimal for maintainability. Should return the entire DataFrame record instead.
See update_msdial_configuration() for details on parameters.
Arguments:
- msdial_config_name (str): MS-DIAL configuration ID
- parameter (str, default None): If specified, returns only the value for the given parameter
Returns:
Tuple of parameters for the given MS-DIAL configuration, or single parameter value.
2157def update_msdial_configuration(config_name, rt_begin, rt_end, mz_begin, mz_end, ms1_centroid_tolerance, 2158 ms2_centroid_tolerance, smoothing_method, smoothing_level, mass_slice_width, min_peak_width, min_peak_height, 2159 post_id_rt_tolerance, post_id_mz_tolerance, post_id_score_cutoff, alignment_rt_tolerance, alignment_mz_tolerance, 2160 alignment_rt_factor, alignment_mz_factor, peak_count_filter, qc_at_least_filter): 2161 2162 """ 2163 Updates and saves changes of all parameters for a selected MS-DIAL configuration. 2164 2165 For details on MS-DIAL parameters, see: https://mtbinfo-team.github.io/mtbinfo.github.io/MS-DIAL/tutorial#section-2-3 2166 2167 Args: 2168 config_name (str): 2169 Name / ID of MS-DIAL configuration 2170 rt_begin (int): 2171 Minimum retention time in RT range for analysis range 2172 rt_end (int): 2173 Maximum retention time in RT range for analysis 2174 mz_begin (float): 2175 Minimum precursor mass in m/z range for analysis range 2176 mz_end (float): 2177 Maximum precursor mass in m/z range for analysis range 2178 ms1_centroid_tolerance (float): 2179 MS1 centroid tolerance 2180 ms2_centroid_tolerance (float): 2181 MS2 centroid tolerance 2182 smoothing_method (str): 2183 Peak smoothing method for peak detection 2184 smoothing_level (int): 2185 Peak smoothing level 2186 mass_slice_width (float): 2187 Mass slice width 2188 min_peak_width (int): 2189 Minimum peak width threshold 2190 min_peak_height (int): 2191 Minimum peak height threshold 2192 post_id_rt_tolerance (float): 2193 Post-identification retention time tolerance 2194 post_id_mz_tolerance (float): 2195 Post-identification precursor m/z tolerance 2196 post_id_score_cutoff (int): 2197 Similarity score cutoff after peak identification 2198 alignment_rt_tolerance (float): 2199 Post-alignment retention time tolerance 2200 alignment_mz_tolerance (float): 2201 Post-alignment precursor m/z tolerance 2202 alignment_rt_factor (float): 2203 Post-alignment retention time factor 2204 alignment_mz_factor (float): 2205 Post-alignment precursor m/z tolerance 2206 peak_count_filter (int): 2207 Peak count filter 2208 qc_at_least_filter (str): 2209 QC at least filter 2210 2211 Returns: 2212 None 2213 """ 2214 2215 # Connect to database 2216 db_metadata, connection = connect_to_database("Settings") 2217 2218 # Get MS-DIAL parameters table 2219 msdial_parameters_table = sa.Table("msdial_parameters", db_metadata, autoload=True) 2220 2221 # Prepare insert of user-inputted MS-DIAL parameters 2222 update_parameters = ( 2223 sa.update(msdial_parameters_table) 2224 .where(msdial_parameters_table.c.config_name == config_name) 2225 .values(rt_begin=rt_begin, 2226 rt_end=rt_end, 2227 mz_begin=mz_begin, 2228 mz_end=mz_end, 2229 ms1_centroid_tolerance=ms1_centroid_tolerance, 2230 ms2_centroid_tolerance=ms2_centroid_tolerance, 2231 smoothing_method=smoothing_method, 2232 smoothing_level=smoothing_level, 2233 min_peak_width=min_peak_width, 2234 min_peak_height=min_peak_height, 2235 mass_slice_width=mass_slice_width, 2236 post_id_rt_tolerance=post_id_rt_tolerance, 2237 post_id_mz_tolerance=post_id_mz_tolerance, 2238 post_id_score_cutoff=post_id_score_cutoff, 2239 alignment_rt_tolerance=alignment_rt_tolerance, 2240 alignment_mz_tolerance=alignment_mz_tolerance, 2241 alignment_rt_factor=alignment_rt_factor, 2242 alignment_mz_factor=alignment_mz_factor, 2243 peak_count_filter=peak_count_filter, 2244 qc_at_least_filter=qc_at_least_filter) 2245 ) 2246 2247 # Execute UPDATE to database, then close the connection 2248 connection.execute(update_parameters) 2249 connection.close()
Updates and saves changes of all parameters for a selected MS-DIAL configuration.
For details on MS-DIAL parameters, see: https://mtbinfo-team.github.io/mtbinfo.github.io/MS-DIAL/tutorial#section-2-3
Arguments:
- config_name (str): Name / ID of MS-DIAL configuration
- rt_begin (int): Minimum retention time in RT range for analysis range
- rt_end (int): Maximum retention time in RT range for analysis
- mz_begin (float): Minimum precursor mass in m/z range for analysis range
- mz_end (float): Maximum precursor mass in m/z range for analysis range
- ms1_centroid_tolerance (float): MS1 centroid tolerance
- ms2_centroid_tolerance (float): MS2 centroid tolerance
- smoothing_method (str): Peak smoothing method for peak detection
- smoothing_level (int): Peak smoothing level
- mass_slice_width (float): Mass slice width
- min_peak_width (int): Minimum peak width threshold
- min_peak_height (int): Minimum peak height threshold
- post_id_rt_tolerance (float): Post-identification retention time tolerance
- post_id_mz_tolerance (float): Post-identification precursor m/z tolerance
- post_id_score_cutoff (int): Similarity score cutoff after peak identification
- alignment_rt_tolerance (float): Post-alignment retention time tolerance
- alignment_mz_tolerance (float): Post-alignment precursor m/z tolerance
- alignment_rt_factor (float): Post-alignment retention time factor
- alignment_mz_factor (float): Post-alignment precursor m/z tolerance
- peak_count_filter (int): Peak count filter
- qc_at_least_filter (str): QC at least filter
Returns:
None
2252def get_msp_file_path(chromatography, polarity, bio_standard=None): 2253 2254 """ 2255 Returns file paths of MSPs for a selected chromatography / polarity (both stored 2256 in the methods folder upon user upload) for MS-DIAL parameter file generation. 2257 2258 TODO: Once added to workspace, MSP / TXT library file names are standardized. No need to store / retrieve from database. 2259 Get the file path using the filename e.g. return directory + chromatography + "_" + polarity + ".msp". 2260 2261 Args: 2262 chromatography (str): 2263 Chromatography method ID 2264 polarity (str): 2265 Polarity, either "Positive" or "Negative" 2266 bio_standard (str, default None): 2267 Name of biological standard 2268 2269 Returns: 2270 MSP / TXT library file path. 2271 """ 2272 2273 # Connect to database 2274 engine = sa.create_engine(settings_database) 2275 2276 if bio_standard is not None: 2277 # Get selected biological standard 2278 query = "SELECT * FROM biological_standards WHERE name = '" + bio_standard + "' AND chromatography='" + chromatography + "'" 2279 df_biological_standards = pd.read_sql(query, engine) 2280 2281 # Get file path of MSP in requested polarity 2282 if polarity == "Positive": 2283 msp_file_path = df_biological_standards["pos_bio_msp_file"].astype(str).values[0] 2284 elif polarity == "Negative": 2285 msp_file_path = df_biological_standards["neg_bio_msp_file"].astype(str).values[0] 2286 2287 else: 2288 # Get selected chromatography method 2289 query = "SELECT * FROM chromatography_methods WHERE method_id='" + chromatography + "'" 2290 df_methods = pd.read_sql(query, engine) 2291 2292 # Get file path of MSP in requested polarity 2293 if polarity == "Positive": 2294 msp_file_path = df_methods["pos_istd_msp_file"].astype(str).values[0] 2295 elif polarity == "Negative": 2296 msp_file_path = df_methods["neg_istd_msp_file"].astype(str).values[0] 2297 2298 msp_file_path = os.path.join(methods_directory, msp_file_path) 2299 2300 # Return file path 2301 return msp_file_path
Returns file paths of MSPs for a selected chromatography / polarity (both stored in the methods folder upon user upload) for MS-DIAL parameter file generation.
TODO: Once added to workspace, MSP / TXT library file names are standardized. No need to store / retrieve from database. Get the file path using the filename e.g. return directory + chromatography + "_" + polarity + ".msp".
Arguments:
- chromatography (str): Chromatography method ID
- polarity (str): Polarity, either "Positive" or "Negative"
- bio_standard (str, default None): Name of biological standard
Returns:
MSP / TXT library file path.
2304def get_parameter_file_path(chromatography, polarity, biological_standard=None): 2305 2306 """ 2307 Returns file path of parameters file stored in database. 2308 2309 TODO: Once generated, MS-DIAL parameter filenames are standardized. No need to store / retrieve from database. 2310 Get the file path using the filename e.g. return directory + chromatography + "_" + polarity + "_Parameters.txt". 2311 2312 Args: 2313 chromatography (str): 2314 Chromatography method ID 2315 polarity (str): 2316 Polarity, either "Positive" or "Negative" 2317 bio_standard (str, default None): 2318 Name of biological standard 2319 2320 Returns: 2321 File path for MS-DIAL parameters.txt file. 2322 """ 2323 2324 engine = sa.create_engine(settings_database) 2325 2326 if biological_standard is not None: 2327 query = "SELECT * FROM biological_standards WHERE chromatography='" + chromatography + \ 2328 "' AND name ='" + biological_standard + "'" 2329 else: 2330 query = "SELECT * FROM chromatography_methods WHERE method_id='" + chromatography + "'" 2331 2332 df = pd.read_sql(query, engine) 2333 2334 if polarity == "Pos": 2335 parameter_file = df["pos_parameter_file"].astype(str).values[0] 2336 elif polarity == "Neg": 2337 parameter_file = df["neg_parameter_file"].astype(str).values[0] 2338 2339 return parameter_file
Returns file path of parameters file stored in database.
TODO: Once generated, MS-DIAL parameter filenames are standardized. No need to store / retrieve from database. Get the file path using the filename e.g. return directory + chromatography + "_" + polarity + "_Parameters.txt".
Arguments:
- chromatography (str): Chromatography method ID
- polarity (str): Polarity, either "Positive" or "Negative"
- bio_standard (str, default None): Name of biological standard
Returns:
File path for MS-DIAL parameters.txt file.
2342def get_msdial_directory(): 2343 2344 """ 2345 Returns location of MS-DIAL directory. 2346 """ 2347 2348 return get_table("Settings", "workspace")["msdial_directory"].astype(str).values[0]
Returns location of MS-DIAL directory.
2351def get_msconvert_directory(): 2352 2353 """ 2354 Returns location of MSConvert directory. 2355 2356 This function uses the MS-DIAL directory path to retrieve user ID, which it then uses to 2357 retrieve the path for MSConvert.exe in C:/Users/<username>/AppData/Local/Apps. 2358 2359 TODO: There is probably a better way to implement this. 2360 2361 Returns: 2362 Location of MSConvert directory in C:/Users/<username>/AppData/Local/Apps/ProteoWizard. 2363 """ 2364 2365 user = get_msdial_directory().replace("\\", "/").split("/")[2] 2366 msconvert_folder = [f.path for f in os.scandir("C:/Users/" + user + "/AppData/Local/Apps/") if f.is_dir() and "ProteoWizard" in f.name][0] 2367 return msconvert_folder
Returns location of MSConvert directory.
This function uses the MS-DIAL directory path to retrieve user ID, which it then uses to
retrieve the path for MSConvert.exe in C:/Users/
TODO: There is probably a better way to implement this.
Returns:
Location of MSConvert directory in C:/Users/
/AppData/Local/Apps/ProteoWizard.
2370def update_msdial_directory(msdial_directory): 2371 2372 """ 2373 Updates location of MS-DIAL directory, stored in "workspace" table of the Settings database. 2374 2375 Args: 2376 msdial_directory (str): New MS-DIAL directory location 2377 2378 Returns: 2379 None 2380 """ 2381 2382 db_metadata, connection = connect_to_database("Settings") 2383 workspace_table = sa.Table("workspace", db_metadata, autoload=True) 2384 2385 update_msdial_directory = ( 2386 sa.update(workspace_table) 2387 .where(workspace_table.c.id == 1) 2388 .values(msdial_directory=msdial_directory) 2389 ) 2390 2391 connection.execute(update_msdial_directory) 2392 connection.close()
Updates location of MS-DIAL directory, stored in "workspace" table of the Settings database.
Arguments:
- msdial_directory (str): New MS-DIAL directory location
Returns:
None
2395def get_internal_standards_dict(chromatography, value_type): 2396 2397 """ 2398 Returns dictionary of internal standard keys mapped to either m/z or RT values. 2399 2400 This function is used to establish a y-axis range for internal standard retention time plots. 2401 See load_istd_rt_plot() in the PlotGeneration module. 2402 2403 TODO: This function needs to filter for polarity! 2404 2405 Args: 2406 chromatography (str): 2407 Chromatography method to retrieve internal standards for 2408 value_type (str): 2409 Data type ("precursor_mz", "retention_time", "ms2_spectrum") 2410 2411 Returns: 2412 Dictionary with key-value pairs of { internal_standard: value_type } 2413 """ 2414 2415 engine = sa.create_engine(settings_database) 2416 query = "SELECT * FROM internal_standards " + "WHERE chromatography='" + chromatography + "'" 2417 df_internal_standards = pd.read_sql(query, engine) 2418 2419 dict = {} 2420 keys = df_internal_standards["name"].astype(str).tolist() 2421 values = df_internal_standards[value_type].astype(float).tolist() 2422 2423 for index, key in enumerate(keys): 2424 dict[key] = values[index] 2425 2426 return dict
Returns dictionary of internal standard keys mapped to either m/z or RT values.
This function is used to establish a y-axis range for internal standard retention time plots. See load_istd_rt_plot() in the PlotGeneration module.
TODO: This function needs to filter for polarity!
Arguments:
- chromatography (str): Chromatography method to retrieve internal standards for
- value_type (str): Data type ("precursor_mz", "retention_time", "ms2_spectrum")
Returns:
Dictionary with key-value pairs of { internal_standard: value_type }
2429def get_internal_standards(chromatography, polarity): 2430 2431 """ 2432 Returns DataFrame of internal standards for a given chromatography method and polarity. 2433 2434 Args: 2435 chromatography (str): 2436 Chromatography method ID 2437 polarity (str): 2438 Polarity (either "Pos" or "Neg") 2439 2440 Returns: 2441 DataFrame of "internal_standards" table from Settings database, filtered by chromatography and polarity. 2442 """ 2443 2444 if polarity == "Pos": 2445 polarity = "Positive Mode" 2446 elif polarity == "Neg": 2447 polarity = "Negative Mode" 2448 2449 engine = sa.create_engine(settings_database) 2450 2451 query = "SELECT * FROM internal_standards " + \ 2452 "WHERE chromatography='" + chromatography + "' AND polarity='" + polarity + "'" 2453 2454 return pd.read_sql(query, engine)
Returns DataFrame of internal standards for a given chromatography method and polarity.
Arguments:
- chromatography (str): Chromatography method ID
- polarity (str): Polarity (either "Pos" or "Neg")
Returns:
DataFrame of "internal_standards" table from Settings database, filtered by chromatography and polarity.
2457def get_targeted_features(biological_standard, chromatography, polarity): 2458 2459 """ 2460 Returns DataFrame of metabolite targets for a given biological standard, chromatography, and polarity. 2461 2462 Args: 2463 biological_standard (str): 2464 Name of biological standard 2465 chromatography (str): 2466 Chromatography method ID (name) 2467 polarity (str): 2468 Polarity (either "Pos" or "Neg") 2469 2470 Returns: 2471 DataFrame of "targeted_features" table from Settings database, filtered by chromatography and polarity. 2472 """ 2473 2474 if polarity == "Pos": 2475 polarity = "Positive Mode" 2476 elif polarity == "Neg": 2477 polarity = "Negative Mode" 2478 2479 engine = sa.create_engine(settings_database) 2480 2481 query = "SELECT * FROM targeted_features " + \ 2482 "WHERE chromatography='" + chromatography + \ 2483 "' AND polarity='" + polarity + \ 2484 "' AND biological_standard ='" + biological_standard + "'" 2485 2486 return pd.read_sql(query, engine)
Returns DataFrame of metabolite targets for a given biological standard, chromatography, and polarity.
Arguments:
- biological_standard (str): Name of biological standard
- chromatography (str): Chromatography method ID (name)
- polarity (str): Polarity (either "Pos" or "Neg")
Returns:
DataFrame of "targeted_features" table from Settings database, filtered by chromatography and polarity.
2489def get_biological_standards(): 2490 2491 """ 2492 Returns DataFrame of the "biological_standards" table from the Settings database. 2493 """ 2494 2495 # Get table from database as a DataFrame 2496 engine = sa.create_engine(settings_database) 2497 df_biological_standards = pd.read_sql("SELECT * FROM biological_standards", engine) 2498 return df_biological_standards
Returns DataFrame of the "biological_standards" table from the Settings database.
2501def get_biological_standards_list(): 2502 2503 """ 2504 Returns list of biological standards from the Settings database. 2505 """ 2506 2507 df_biological_standards = get_biological_standards() 2508 return df_biological_standards["name"].astype(str).unique().tolist()
Returns list of biological standards from the Settings database.
2511def add_biological_standard(name, identifier): 2512 2513 """ 2514 Creates new biological standard with name and identifier. 2515 2516 The biological standard identifier is a text substring used to distinguish between sample and biological standard. 2517 MS-AutoQC checks filenames in the sequence for this identifier to process samples accordingly. 2518 2519 Args: 2520 name (str): 2521 Name of biological standard 2522 identifier (str): 2523 String identifier in filename for biological standard 2524 2525 Returns: 2526 None 2527 """ 2528 2529 # Get list of chromatography methods 2530 chromatography_methods = get_chromatography_methods()["method_id"].tolist() 2531 2532 # Connect to database and get "biological_standards" table 2533 db_metadata, connection = connect_to_database("Settings") 2534 biological_standards_table = sa.Table("biological_standards", db_metadata, autoload=True) 2535 2536 # Insert a biological standard row for each chromatography 2537 for method in chromatography_methods: 2538 insert = biological_standards_table.insert().values({ 2539 "name": name, 2540 "identifier": identifier, 2541 "chromatography": method, 2542 "num_pos_features": 0, 2543 "num_neg_features": 0, 2544 "msdial_config_id": "Default" 2545 }) 2546 connection.execute(insert) 2547 2548 # Close the connection 2549 connection.close()
Creates new biological standard with name and identifier.
The biological standard identifier is a text substring used to distinguish between sample and biological standard. MS-AutoQC checks filenames in the sequence for this identifier to process samples accordingly.
Arguments:
- name (str): Name of biological standard
- identifier (str): String identifier in filename for biological standard
Returns:
None
2552def remove_biological_standard(name): 2553 2554 """ 2555 Deletes biological standard and corresponding MSPs from Settings database. 2556 2557 Args: 2558 name (str): Name of the biological standard 2559 2560 Returns: 2561 None 2562 """ 2563 2564 # Delete corresponding MSPs from "methods" directory 2565 df = get_table("Settings", "biological_standards") 2566 df = df.loc[df["name"] == name] 2567 files_to_delete = df["pos_bio_msp_file"].astype(str).tolist() + df["neg_bio_msp_file"].astype(str).tolist() 2568 2569 for file in os.listdir(methods_directory): 2570 if name in files_to_delete: 2571 os.remove(os.path.join(methods_directory, file)) 2572 2573 # Connect to database and get relevant tables 2574 db_metadata, connection = connect_to_database("Settings") 2575 biological_standards_table = sa.Table("biological_standards", db_metadata, autoload=True) 2576 targeted_features_table = sa.Table("targeted_features", db_metadata, autoload=True) 2577 2578 # Remove biological standard 2579 delete_biological_standard = ( 2580 sa.delete(biological_standards_table) 2581 .where((biological_standards_table.c.name == name)) 2582 ) 2583 connection.execute(delete_biological_standard) 2584 2585 # Remove targeted features for that biological standard 2586 delete_targeted_features = ( 2587 sa.delete(targeted_features_table) 2588 .where((targeted_features_table.c.biological_standard == name)) 2589 ) 2590 connection.execute(delete_targeted_features) 2591 2592 # Close the connection 2593 connection.close()
Deletes biological standard and corresponding MSPs from Settings database.
Arguments:
- name (str): Name of the biological standard
Returns:
None
2596def update_msdial_config_for_bio_standard(biological_standard, chromatography, config_id): 2597 2598 """ 2599 Updates MS-DIAL configuration for given biological standard and chromatography method combination. 2600 2601 Args: 2602 biological_standard (str): 2603 Name of the biological standard 2604 chromatography (str): 2605 Chromatography method 2606 config_id (str): 2607 Name of MS-DIAL configuration to set for this biological standard - chromatography combination 2608 2609 Returns: 2610 None 2611 """ 2612 2613 # Connect to database and get relevant tables 2614 db_metadata, connection = connect_to_database("Settings") 2615 biological_standards_table = sa.Table("biological_standards", db_metadata, autoload=True) 2616 2617 # Update MS-DIAL configuration for biological standard 2618 update_msdial_config = ( 2619 sa.update(biological_standards_table) 2620 .where((biological_standards_table.c.name == biological_standard) 2621 & (biological_standards_table.c.chromatography == chromatography)) 2622 .values(msdial_config_id=config_id) 2623 ) 2624 2625 connection.execute(update_msdial_config) 2626 connection.close()
Updates MS-DIAL configuration for given biological standard and chromatography method combination.
Arguments:
- biological_standard (str): Name of the biological standard
- chromatography (str): Chromatography method
- config_id (str): Name of MS-DIAL configuration to set for this biological standard - chromatography combination
Returns:
None
2629def get_biological_standard_identifiers(bio_standards=None): 2630 2631 """ 2632 Returns dictionary of identifiers for a given list of biological standards. 2633 2634 If no list is provided, returns dict of identifiers for all biological standards. 2635 2636 Args: 2637 bio_standards (list, default None): List of biological standards 2638 2639 Returns: 2640 Dictionary with key-value pairs of { identifier: biological_standard } 2641 """ 2642 2643 df_bio_standards = get_biological_standards() 2644 2645 identifiers = {} 2646 2647 if bio_standards is not None: 2648 if len(bio_standards) > 0: 2649 for bio_standard in bio_standards: 2650 df = df_bio_standards.loc[df_bio_standards["name"] == bio_standard] 2651 identifier = df["identifier"].astype(str).unique().tolist()[0] 2652 identifiers[identifier] = bio_standard 2653 else: 2654 names = df_bio_standards["name"].astype(str).unique().tolist() 2655 ids = df_bio_standards["identifier"].astype(str).unique().tolist() 2656 for index, name in enumerate(names): 2657 identifiers[ids[index]] = names[index] 2658 2659 return identifiers
Returns dictionary of identifiers for a given list of biological standards.
If no list is provided, returns dict of identifiers for all biological standards.
Arguments:
- bio_standards (list, default None): List of biological standards
Returns:
Dictionary with key-value pairs of { identifier: biological_standard }
2662def get_qc_configurations(): 2663 2664 """ 2665 Returns DataFrame of "qc_parameters" table from Settings database. 2666 """ 2667 2668 engine = sa.create_engine(settings_database) 2669 return pd.read_sql("SELECT * FROM qc_parameters", engine)
Returns DataFrame of "qc_parameters" table from Settings database.
2672def get_qc_configurations_list(): 2673 2674 """ 2675 Returns list of names of QC configurations from Settings database. 2676 """ 2677 2678 return get_qc_configurations()["config_name"].astype(str).tolist()
Returns list of names of QC configurations from Settings database.
2681def add_qc_configuration(qc_config_name): 2682 2683 """ 2684 Adds a new QC configuration to the "qc_parameters" table in the Settings database. 2685 2686 Args: 2687 qc_config_name (str): Name of the QC configuration 2688 2689 Returns: 2690 None 2691 """ 2692 2693 # Connect to database 2694 db_metadata, connection = connect_to_database("Settings") 2695 2696 # Get QC parameters table 2697 qc_parameters_table = sa.Table("qc_parameters", db_metadata, autoload=True) 2698 2699 # Prepare insert of user-inputted run data 2700 insert_config = qc_parameters_table.insert().values( 2701 {"config_name": qc_config_name, 2702 "intensity_dropouts_cutoff": 4, 2703 "library_rt_shift_cutoff": 0.1, 2704 "in_run_rt_shift_cutoff": 0.05, 2705 "library_mz_shift_cutoff": 0.005, 2706 "intensity_enabled": True, 2707 "library_rt_enabled": True, 2708 "in_run_rt_enabled": True, 2709 "library_mz_enabled": True} 2710 ) 2711 2712 # Execute INSERT to database, then close the connection 2713 connection.execute(insert_config) 2714 connection.close()
Adds a new QC configuration to the "qc_parameters" table in the Settings database.
Arguments:
- qc_config_name (str): Name of the QC configuration
Returns:
None
2717def remove_qc_configuration(qc_config_name): 2718 2719 """ 2720 Deletes QC configuration from the "qc_parameters" table in the Settings database. 2721 2722 Args: 2723 qc_config_name (str): Name of the QC configuration 2724 2725 Returns: 2726 None 2727 """ 2728 2729 # Connect to database 2730 db_metadata, connection = connect_to_database("Settings") 2731 2732 # Get QC parameters table 2733 qc_parameters_table = sa.Table("qc_parameters", db_metadata, autoload=True) 2734 2735 # Prepare DELETE of MS-DIAL configuration 2736 delete_config = ( 2737 sa.delete(qc_parameters_table) 2738 .where(qc_parameters_table.c.config_name == qc_config_name) 2739 ) 2740 2741 # Execute DELETE, then close the connection 2742 connection.execute(delete_config) 2743 connection.close()
Deletes QC configuration from the "qc_parameters" table in the Settings database.
Arguments:
- qc_config_name (str): Name of the QC configuration
Returns:
None
2746def get_qc_configuration_parameters(config_name=None, instrument_id=None, run_id=None): 2747 2748 """ 2749 Returns DataFrame of parameters for a selected QC configuration. 2750 2751 The DataFrame has columns for each parameter, as well as for whether the parameter is enabled. 2752 2753 Args: 2754 config_name (str, default None): 2755 Name of QC configuration 2756 instrument_id (str, default None): 2757 Instrument ID (name) 2758 run_id (str, default None): 2759 Instrument run ID (job ID) 2760 2761 Returns: 2762 DataFrame of parameters for QC configuration. 2763 """ 2764 2765 df_configurations = get_table("Settings", "qc_parameters") 2766 2767 # Get selected configuration 2768 if config_name is not None: 2769 selected_config = df_configurations.loc[df_configurations["config_name"] == config_name] 2770 2771 elif instrument_id is not None and run_id is not None: 2772 df_runs = get_table(instrument_id, "runs") 2773 config_name = df_runs.loc[df_runs["run_id"] == run_id]["qc_config_id"].values[0] 2774 selected_config = df_configurations.loc[ 2775 df_configurations["config_name"] == config_name] 2776 2777 selected_config.drop(inplace=True, columns=["id", "config_name"]) 2778 2779 # Probably not the most efficient way to do this... 2780 for column in ["intensity_enabled", "library_rt_enabled", "in_run_rt_enabled", "library_mz_enabled"]: 2781 selected_config.loc[selected_config[column] == 1, column] = True 2782 selected_config.loc[selected_config[column] == 0, column] = False 2783 2784 # Return parameters of selected configuration as a tuple 2785 return selected_config
Returns DataFrame of parameters for a selected QC configuration.
The DataFrame has columns for each parameter, as well as for whether the parameter is enabled.
Arguments:
- config_name (str, default None): Name of QC configuration
- instrument_id (str, default None): Instrument ID (name)
- run_id (str, default None): Instrument run ID (job ID)
Returns:
DataFrame of parameters for QC configuration.
2788def update_qc_configuration(config_name, intensity_dropouts_cutoff, library_rt_shift_cutoff, in_run_rt_shift_cutoff, 2789 library_mz_shift_cutoff, intensity_enabled, library_rt_enabled, in_run_rt_enabled, library_mz_enabled): 2790 2791 """ 2792 Updates parameters for the given QC configuration. 2793 2794 Due to the database schema, booleans are stored as integers: 0 for False and 1 for True. They need to be 2795 cast back to booleans in get_qc_configuration_parameters(). A schema change would remove the bloat. 2796 2797 Args: 2798 config_name (str): 2799 Name of QC configuration 2800 intensity_dropouts_cutoff (int): 2801 Minimum number of internal standard intensity dropouts to constitute a QC fail 2802 library_rt_shift_cutoff (float): 2803 Maximum shift from library RT values to constitute a QC fail 2804 in_run_rt_shift_cutoff (float): 2805 Maximum shift from in-run RT values to constitute a QC fail 2806 library_mz_shift_cutoff (float): 2807 Maximum shift from library m/z values to constitute a QC fail 2808 intensity_enabled (bool): 2809 Enables / disables QC check for intensity dropout cutoffs 2810 library_rt_enabled (bool): 2811 Enables / disables QC check for library RT shifts 2812 in_run_rt_enabled (bool): 2813 Enables / disables QC check for in-run RT shifts 2814 library_mz_enabled (bool): 2815 Enables / disables QC check for library m/z shifts 2816 2817 Returns: 2818 None 2819 """ 2820 2821 # Connect to database 2822 db_metadata, connection = connect_to_database("Settings") 2823 2824 # Get QC parameters table 2825 qc_parameters_table = sa.Table("qc_parameters", db_metadata, autoload=True) 2826 2827 # Prepare insert of user-inputted QC parameters 2828 update_parameters = ( 2829 sa.update(qc_parameters_table) 2830 .where(qc_parameters_table.c.config_name == config_name) 2831 .values(intensity_dropouts_cutoff=intensity_dropouts_cutoff, 2832 library_rt_shift_cutoff=library_rt_shift_cutoff, 2833 in_run_rt_shift_cutoff=in_run_rt_shift_cutoff, 2834 library_mz_shift_cutoff=library_mz_shift_cutoff, 2835 intensity_enabled=intensity_enabled, 2836 library_rt_enabled=library_rt_enabled, 2837 in_run_rt_enabled=in_run_rt_enabled, 2838 library_mz_enabled=library_mz_enabled) 2839 ) 2840 2841 # Execute UPDATE to database, then close the connection 2842 connection.execute(update_parameters) 2843 connection.close()
Updates parameters for the given QC configuration.
Due to the database schema, booleans are stored as integers: 0 for False and 1 for True. They need to be cast back to booleans in get_qc_configuration_parameters(). A schema change would remove the bloat.
Arguments:
- config_name (str): Name of QC configuration
- intensity_dropouts_cutoff (int): Minimum number of internal standard intensity dropouts to constitute a QC fail
- library_rt_shift_cutoff (float): Maximum shift from library RT values to constitute a QC fail
- in_run_rt_shift_cutoff (float): Maximum shift from in-run RT values to constitute a QC fail
- library_mz_shift_cutoff (float): Maximum shift from library m/z values to constitute a QC fail
- intensity_enabled (bool): Enables / disables QC check for intensity dropout cutoffs
- library_rt_enabled (bool): Enables / disables QC check for library RT shifts
- in_run_rt_enabled (bool): Enables / disables QC check for in-run RT shifts
- library_mz_enabled (bool): Enables / disables QC check for library m/z shifts
Returns:
None
2846def get_samples_in_run(instrument_id, run_id, sample_type="Both"): 2847 2848 """ 2849 Returns DataFrame of samples for a given instrument run from instrument database. 2850 2851 Args: 2852 instrument_id (str): 2853 Instrument ID 2854 run_id (str): 2855 Instrument run ID (job ID) 2856 sample_type (str): 2857 Sample type, either "Sample" or "Biological Standard" or "Both" 2858 2859 Returns: 2860 DataFrame of sample tables for a given instrument run. 2861 """ 2862 2863 if sample_type == "Sample": 2864 df = get_table(instrument_id, "sample_qc_results") 2865 2866 elif sample_type == "Biological Standard": 2867 df = get_table(instrument_id, "bio_qc_results") 2868 2869 elif sample_type == "Both": 2870 df_samples = get_table(instrument_id, "sample_qc_results") 2871 df_bio_standards = get_table(instrument_id, "bio_qc_results") 2872 df_bio_standards.drop(columns=["biological_standard"], inplace=True) 2873 df = df_bio_standards.append(df_samples, ignore_index=True) 2874 2875 return df.loc[df["run_id"] == run_id]
Returns DataFrame of samples for a given instrument run from instrument database.
Arguments:
- instrument_id (str): Instrument ID
- run_id (str): Instrument run ID (job ID)
- sample_type (str): Sample type, either "Sample" or "Biological Standard" or "Both"
Returns:
DataFrame of sample tables for a given instrument run.
2878def get_samples_from_csv(instrument_id, run_id, sample_type="Both"): 2879 2880 """ 2881 Returns DataFrame of samples in a given run using CSV files from Google Drive. 2882 2883 CSV files of the run metadata, samples, and biological standards tables are stored 2884 in the ../data/Instrument_ID_Run_ID/csv directory, and removed on job completion. 2885 2886 Args: 2887 instrument_id (str): 2888 Instrument ID 2889 run_id (str): 2890 Instrument run ID (job ID) 2891 sample_type (str): 2892 Sample type, either "Sample" or "Biological Standard" or "Both" 2893 2894 Returns: 2895 DataFrame of samples for a given instrument run. 2896 """ 2897 2898 id = instrument_id.replace(" ", "_") + "_" + run_id 2899 csv_directory = os.path.join(data_directory, id, "csv") 2900 2901 samples_csv = os.path.join(csv_directory, "samples.csv") 2902 bio_standards_csv = os.path.join(csv_directory, "bio_standards.csv") 2903 2904 if sample_type == "Sample": 2905 df = pd.read_csv(samples_csv, index_col=False) 2906 2907 elif sample_type == "Biological Standard": 2908 df = pd.read_csv(bio_standards_csv, index_col=False) 2909 2910 elif sample_type == "Both": 2911 df_samples = pd.read_csv(samples_csv, index_col=False) 2912 df_bio_standards = pd.read_csv(bio_standards_csv, index_col=False) 2913 df_bio_standards.drop(columns=["biological_standard"], inplace=True) 2914 df = df_bio_standards.append(df_samples, ignore_index=True) 2915 2916 df = df.loc[df["run_id"] == run_id] 2917 2918 try: 2919 df.drop(columns=["id"], inplace=True) 2920 finally: 2921 return df
Returns DataFrame of samples in a given run using CSV files from Google Drive.
CSV files of the run metadata, samples, and biological standards tables are stored in the ../data/Instrument_ID_Run_ID/csv directory, and removed on job completion.
Arguments:
- instrument_id (str): Instrument ID
- run_id (str): Instrument run ID (job ID)
- sample_type (str): Sample type, either "Sample" or "Biological Standard" or "Both"
Returns:
DataFrame of samples for a given instrument run.
2924def get_next_sample(sample_id, instrument_id, run_id): 2925 2926 """ 2927 Returns sample following the given sample, or None if last sample. 2928 2929 Args: 2930 sample_id (str): 2931 Sample ID 2932 instrument_id (str): 2933 Instrument ID 2934 run_id (str): 2935 Instrument run ID (job ID) 2936 2937 Returns: 2938 str: The next sample in the instrument run after the given sample ID, or None if last sample. 2939 """ 2940 2941 # Get list of samples in run 2942 samples = get_samples_in_run(instrument_id, run_id, "Both")["sample_id"].astype(str).tolist() 2943 2944 # Find sample in list 2945 sample_index = samples.index(sample_id) 2946 next_sample_index = sample_index + 1 2947 2948 # Return next sample 2949 if next_sample_index != len(samples): 2950 return samples[next_sample_index] 2951 else: 2952 return None
Returns sample following the given sample, or None if last sample.
Arguments:
- sample_id (str): Sample ID
- instrument_id (str): Instrument ID
- run_id (str): Instrument run ID (job ID)
Returns:
str: The next sample in the instrument run after the given sample ID, or None if last sample.
2955def get_remaining_samples(instrument_id, run_id): 2956 2957 """ 2958 Returns list of samples remaining in a given instrument run (QC job). 2959 2960 TODO: This function should just return the samples with null values in the "qc_result" column. 2961 The "latest_sample" value in the "runs" table may be unreliable. 2962 2963 Args: 2964 instrument_id (str): 2965 Instrument ID 2966 run_id (str): 2967 Instrument run ID (job ID) 2968 2969 Returns: 2970 list: List of samples remaining in a QC job. 2971 """ 2972 2973 # Get last processed sample in run 2974 df_run = get_instrument_run(instrument_id, run_id) 2975 latest_sample = df_run["latest_sample"].astype(str).values[0] 2976 2977 # Get list of samples in run 2978 samples = get_samples_in_run(instrument_id, run_id, "Both")["sample_id"].astype(str).tolist() 2979 2980 # Return all samples if beginning of run 2981 if latest_sample == "None": 2982 return samples 2983 2984 # Get index of latest sample 2985 latest_sample_index = samples.index(latest_sample) 2986 2987 # Return list of samples starting at latest sample 2988 return samples[latest_sample_index:len(samples)]
Returns list of samples remaining in a given instrument run (QC job).
TODO: This function should just return the samples with null values in the "qc_result" column. The "latest_sample" value in the "runs" table may be unreliable.
Arguments:
- instrument_id (str): Instrument ID
- run_id (str): Instrument run ID (job ID)
Returns:
list: List of samples remaining in a QC job.
2991def get_unprocessed_samples(instrument_id, run_id): 2992 2993 """ 2994 For an active run, returns 1) a list of samples that were not processed due to error / runtime termination, 2995 and 2) the current sample being monitored / processed. 2996 2997 Args: 2998 instrument_id (str): 2999 Instrument ID 3000 run_id (str): 3001 Instrument run ID (job ID) 3002 3003 Returns: 3004 tuple: List of unprocessed samples for the given instrument run, and current sample being monitored / processed. 3005 """ 3006 3007 # Get samples in run 3008 df_samples = get_samples_in_run(instrument_id, run_id, "Both") 3009 3010 # Get list of samples in run 3011 samples = df_samples["sample_id"].astype(str).tolist() 3012 3013 # Construct dictionary of unprocessed samples in instrument run 3014 df_unprocessed_samples = df_samples.loc[df_samples["qc_result"].isnull()] 3015 unprocessed_samples = df_unprocessed_samples["sample_id"].astype(str).tolist() 3016 3017 # Get acquisition path, data files, and data file extension 3018 acquisition_path = get_acquisition_path(instrument_id, run_id) 3019 extension = get_data_file_type(instrument_id) 3020 directory_files = os.listdir(acquisition_path) 3021 data_files = [file.split(".")[0] for file in directory_files if file.split(".")[0] in unprocessed_samples] 3022 3023 # Mark acquired data files 3024 df_unprocessed_samples.loc[ 3025 df_unprocessed_samples["sample_id"].isin(data_files), "found"] = "Found" 3026 unprocessed_samples = df_unprocessed_samples.dropna(subset=["found"])["sample_id"].astype(str).tolist() 3027 3028 # Get current sample 3029 if len(unprocessed_samples) > 0: 3030 current_sample = unprocessed_samples[-1] 3031 del unprocessed_samples[-1] 3032 else: 3033 current_sample = None 3034 3035 # Return as tuple 3036 return unprocessed_samples, current_sample
For an active run, returns 1) a list of samples that were not processed due to error / runtime termination, and 2) the current sample being monitored / processed.
Arguments:
- instrument_id (str): Instrument ID
- run_id (str): Instrument run ID (job ID)
Returns:
tuple: List of unprocessed samples for the given instrument run, and current sample being monitored / processed.
3039def get_current_sample(instrument_id, run_id): 3040 3041 """ 3042 Returns the current sample being monitored / processed. 3043 3044 TODO: The "latest_sample" is the last sample to be processed. Nomenclature needs to be updated in many places. 3045 3046 Args: 3047 instrument_id (str): 3048 Instrument ID 3049 run_id (str): 3050 Instrument run ID (job ID) 3051 3052 Returns: 3053 str: Current sample being monitored / processed. 3054 """ 3055 3056 # Get latest sample in run 3057 df_run = get_instrument_run(instrument_id, run_id) 3058 latest_sample = df_run["latest_sample"].astype(str).values[0] 3059 3060 # Return second sample if beginning of run 3061 if latest_sample == "None": 3062 return samples[1]
Returns the current sample being monitored / processed.
TODO: The "latest_sample" is the last sample to be processed. Nomenclature needs to be updated in many places.
Arguments:
- instrument_id (str): Instrument ID
- run_id (str): Instrument run ID (job ID)
Returns:
str: Current sample being monitored / processed.
3065def parse_internal_standard_data(instrument_id, run_id, result_type, polarity, load_from, as_json=True): 3066 3067 """ 3068 Parses data from database into JSON-ified DataFrame for samples (as rows) vs. internal standards (as columns). 3069 3070 Data is stored in a column (for example, "retention_time") as a single-record string dict with the following structure: 3071 3072 | Sample | iSTD 1 | iSTD 2 | ... | 3073 | ---------- | ------ | ------ | ... | 3074 | SAMPLE_001 | 1.207 | 1.934 | ... | 3075 3076 These records are concatenated together with this function using pd.DataFrame(), which is 100x faster than pd.concat(). 3077 3078 Args: 3079 instrument_id (str): 3080 Instrument ID 3081 run_id (str): 3082 Instrument run ID (job ID) 3083 result_type (str): 3084 Column in sample_qc_results table to parse (either "retention_time" or "precursor_mz" or "intensity") 3085 polarity (str): 3086 Polarity ("Pos" or "Neg") 3087 load_from (str): 3088 Specifies whether to load data from CSV file (during Google Drive sync of active run) or instrument database 3089 as_json (bool, default True): 3090 Whether to return table as JSON string or as DataFrame 3091 3092 Returns: 3093 DataFrame of samples (rows) vs. internal standards (columns) as JSON string. 3094 """ 3095 3096 # Get relevant QC results table from database 3097 if load_from == "database" or load_from == "processing": 3098 df_samples = get_samples_in_run(instrument_id, run_id, "Sample") 3099 elif load_from == "csv": 3100 df_samples = get_samples_from_csv(instrument_id, run_id, "Sample") 3101 3102 # Filter by polarity 3103 df_samples = df_samples.loc[df_samples["polarity"] == polarity] 3104 sample_ids = df_samples["sample_id"].astype(str).tolist() 3105 3106 # Return None if results are None 3107 if load_from == "processing": 3108 if len(df_samples[result_type].dropna()) == 0: 3109 return None 3110 3111 # Initialize DataFrame with individual records of sample data 3112 results = df_samples[result_type].astype(str).tolist() 3113 results = [ast.literal_eval(result) if result != "None" and result != "nan" else {} for result in results] 3114 df_results = pd.DataFrame(results) 3115 df_results.drop(columns=["Name"], inplace=True) 3116 df_results["Sample"] = sample_ids 3117 3118 # Return DataFrame as JSON string 3119 if as_json: 3120 return df_results.to_json(orient="records") 3121 else: 3122 return df_results
Parses data from database into JSON-ified DataFrame for samples (as rows) vs. internal standards (as columns).
Data is stored in a column (for example, "retention_time") as a single-record string dict with the following structure:
| Sample | iSTD 1 | iSTD 2 | ... | | ---------- | ------ | ------ | ... | | SAMPLE_001 | 1.207 | 1.934 | ... |
These records are concatenated together with this function using pd.DataFrame(), which is 100x faster than pd.concat().
Arguments:
- instrument_id (str): Instrument ID
- run_id (str): Instrument run ID (job ID)
- result_type (str): Column in sample_qc_results table to parse (either "retention_time" or "precursor_mz" or "intensity")
- polarity (str): Polarity ("Pos" or "Neg")
- load_from (str): Specifies whether to load data from CSV file (during Google Drive sync of active run) or instrument database
- as_json (bool, default True): Whether to return table as JSON string or as DataFrame
Returns:
DataFrame of samples (rows) vs. internal standards (columns) as JSON string.
3125def parse_biological_standard_data(instrument_id, run_id, result_type, polarity, biological_standard, load_from, as_json=True): 3126 3127 """ 3128 Parses biological standard data into JSON-ified DataFrame of targeted features (as columns) vs. instrument runs (as rows). 3129 3130 The bio_qc_results table in the instrument database is first filtered by biological standard, chromatography, and polarity. 3131 Then, the sample name is replaced with the instrument run it was associated with. 3132 3133 Data is stored in a column (for example, "intensity") as a single-record string dict with the following structure: 3134 3135 | Name | Metabolite 1 | Metabolite 2 | ... | 3136 | ------------------- | ------------ | ------------ | ... | 3137 | INSTRUMENT_RUN_001 | 13597340 | 53024853 | ... | 3138 3139 These records are concatenated together with this function using pd.DataFrame(), which is 100x faster than pd.concat(). 3140 3141 | Name | Metabolite 1 | Metabolite 2 | ... | 3142 | ------------------- | ------------ | ------------ | ... | 3143 | INSTRUMENT_RUN_001 | 13597340 | 53024853 | ... | 3144 | INSTRUMENT_RUN_002 | 23543246 | 102030406 | ... | 3145 | ... | ... | ... | ... | 3146 3147 Args: 3148 instrument_id (str): 3149 Instrument ID 3150 run_id (str): 3151 Instrument run ID (job ID) 3152 result_type (str): 3153 Column in bio_qc_results table to parse (either "retention_time" or "precursor_mz" or "intensity") 3154 polarity (str): 3155 Polarity ("Pos" or "Neg") 3156 biological_standard (str): 3157 Name of biological standard 3158 load_from (str): 3159 Specifies whether to load data from CSV file (during Google Drive sync of active run) or instrument database 3160 as_json (bool, default True): 3161 Whether to return table as JSON string or as DataFrame 3162 3163 Returns: 3164 JSON-ified DataFrame of targeted features for a biological standard (columns) vs. instrument runs (rows). 3165 """ 3166 3167 # Get relevant QC results table from database 3168 if load_from == "database": 3169 df_samples = get_table(instrument_id, "bio_qc_results") 3170 elif load_from == "csv": 3171 id = instrument_id.replace(" ", "_") + "_" + run_id 3172 bio_standards_csv = os.path.join(data_directory, id, "csv", "bio_standards.csv") 3173 df_samples = pd.read_csv(bio_standards_csv, index_col=False) 3174 3175 # Filter by biological standard type 3176 df_samples = df_samples.loc[df_samples["biological_standard"] == biological_standard] 3177 3178 # Filter by polarity 3179 df_samples = df_samples.loc[df_samples["polarity"] == polarity] 3180 3181 # Filter by instrument 3182 df_runs = get_table(instrument_id, "runs") 3183 chromatography = df_runs.loc[df_runs["run_id"] == run_id]["chromatography"].values[0] 3184 3185 # Filter by chromatography 3186 run_ids = df_runs.loc[df_runs["chromatography"] == chromatography]["run_id"].astype(str).tolist() 3187 df_samples = df_samples.loc[df_samples["run_id"].isin(run_ids)] 3188 run_ids = df_samples["run_id"].astype(str).tolist() 3189 3190 # Initialize DataFrame with individual records of sample data 3191 results = df_samples[result_type].fillna('{}').tolist() 3192 results = [ast.literal_eval(result) if result != "None" and result != "nan" else {} for result in results] 3193 df_results = pd.DataFrame(results) 3194 df_results["Name"] = run_ids 3195 3196 # Return DataFrame as JSON string 3197 if as_json: 3198 return df_results.to_json(orient="records") 3199 else: 3200 return df_results
Parses biological standard data into JSON-ified DataFrame of targeted features (as columns) vs. instrument runs (as rows).
The bio_qc_results table in the instrument database is first filtered by biological standard, chromatography, and polarity. Then, the sample name is replaced with the instrument run it was associated with.
Data is stored in a column (for example, "intensity") as a single-record string dict with the following structure:
| Name | Metabolite 1 | Metabolite 2 | ... | | ------------------- | ------------ | ------------ | ... | | INSTRUMENT_RUN_001 | 13597340 | 53024853 | ... |
These records are concatenated together with this function using pd.DataFrame(), which is 100x faster than pd.concat().
| Name | Metabolite 1 | Metabolite 2 | ... | | ------------------- | ------------ | ------------ | ... | | INSTRUMENT_RUN_001 | 13597340 | 53024853 | ... | | INSTRUMENT_RUN_002 | 23543246 | 102030406 | ... | | ... | ... | ... | ... |
Arguments:
- instrument_id (str): Instrument ID
- run_id (str): Instrument run ID (job ID)
- result_type (str): Column in bio_qc_results table to parse (either "retention_time" or "precursor_mz" or "intensity")
- polarity (str): Polarity ("Pos" or "Neg")
- biological_standard (str): Name of biological standard
- load_from (str): Specifies whether to load data from CSV file (during Google Drive sync of active run) or instrument database
- as_json (bool, default True): Whether to return table as JSON string or as DataFrame
Returns:
JSON-ified DataFrame of targeted features for a biological standard (columns) vs. instrument runs (rows).
3203def parse_internal_standard_qc_data(instrument_id, run_id, polarity, result_type, load_from, as_json=True): 3204 3205 """ 3206 Parses QC data into JSON-ified DataFrame for samples (as rows) vs. internal standards (as columns). 3207 3208 The QC DataFrame is stored in the "qc_dataframe" column as a single-record string dict with the following structure: 3209 3210 | Sample | Delta m/z | Delta RT | In-run delta RT | Warnings | Fails | 3211 | ---------- | --------- | -------- | --------------- | -------- | ----- | 3212 | SAMPLE_001 | 0.000001 | 0.001 | 0.00001 | None | None | 3213 3214 These records are concatenated together with this function using pd.DataFrame(), which is 100x faster than pd.concat(). 3215 3216 Args: 3217 instrument_id (str): 3218 Instrument ID 3219 run_id (str): 3220 Instrument run ID (job ID) 3221 polarity (str): 3222 Polarity ("Pos" or "Neg") 3223 result_type (str): 3224 Column in sample_qc_results table to parse (either "retention_time" or "precursor_mz" or "intensity") 3225 load_from (str): 3226 Specifies whether to load data from CSV file (during Google Drive sync of active run) or instrument database 3227 as_json (bool, default True): 3228 Whether to return table as JSON string or as DataFrame 3229 3230 Returns: 3231 JSON-ified DataFrame of QC data for samples (as rows) vs. internal standards (as columns). 3232 """ 3233 3234 # Get relevant QC results table from database 3235 if load_from == "database" or load_from == "processing": 3236 df_samples = get_samples_in_run(instrument_id, run_id, "Sample") 3237 elif load_from == "csv": 3238 df_samples = get_samples_from_csv(instrument_id, run_id, "Sample") 3239 3240 # Filter by polarity 3241 df_samples = df_samples.loc[df_samples["polarity"] == polarity] 3242 3243 # For results DataFrame, each index corresponds to the result type 3244 get_result_index = { 3245 "Delta m/z": 0, 3246 "Delta RT": 1, 3247 "In-run delta RT": 2, 3248 "Intensity dropout": 3, 3249 "Warnings": 4, 3250 "Fails": 5 3251 } 3252 3253 # Get list of results using result type 3254 sample_ids = df_samples["sample_id"].astype(str).tolist() 3255 results = df_samples["qc_dataframe"].fillna('[{}, {}, {}, {}, {}, {}]').astype(str).tolist() 3256 3257 type_index = get_result_index[result_type] 3258 results = [ast.literal_eval(result)[type_index] for result in results] 3259 df_results = pd.DataFrame(results) 3260 df_results.drop(columns=["Name"], inplace=True) 3261 df_results["Sample"] = sample_ids 3262 3263 # Return DataFrame as JSON string 3264 if as_json: 3265 return df_results.to_json(orient="records") 3266 else: 3267 return df_results
Parses QC data into JSON-ified DataFrame for samples (as rows) vs. internal standards (as columns).
The QC DataFrame is stored in the "qc_dataframe" column as a single-record string dict with the following structure:
Sample | Delta m/z | Delta RT | In-run delta RT | Warnings | Fails |
---|---|---|---|---|---|
SAMPLE_001 | 0.000001 | 0.001 | 0.00001 | None | None |
These records are concatenated together with this function using pd.DataFrame(), which is 100x faster than pd.concat().
Arguments:
- instrument_id (str): Instrument ID
- run_id (str): Instrument run ID (job ID)
- polarity (str): Polarity ("Pos" or "Neg")
- result_type (str): Column in sample_qc_results table to parse (either "retention_time" or "precursor_mz" or "intensity")
- load_from (str): Specifies whether to load data from CSV file (during Google Drive sync of active run) or instrument database
- as_json (bool, default True): Whether to return table as JSON string or as DataFrame
Returns:
JSON-ified DataFrame of QC data for samples (as rows) vs. internal standards (as columns).
3270def get_workspace_users_list(): 3271 3272 """ 3273 Returns a list of users that have access to the MS-AutoQC workspace. 3274 """ 3275 3276 return get_table("Settings", "gdrive_users")["email_address"].astype(str).tolist()
Returns a list of users that have access to the MS-AutoQC workspace.
3279def add_user_to_workspace(email_address): 3280 3281 """ 3282 Gives user access to workspace in Google Drive and stores email address in database. 3283 3284 Access is granted by sharing the MS-AutoQC folder in Google Drive with the user's Google account. 3285 3286 Args: 3287 email_address (str): Email address for Google account to grant access to workspace. 3288 3289 Returns: 3290 None 3291 """ 3292 3293 if email_address in get_workspace_users_list(): 3294 return "User already exists" 3295 3296 # Get Google Drive instance 3297 drive = get_drive_instance() 3298 3299 # Get ID of MS-AutoQC folder in Google Drive 3300 gdrive_folder_id = get_drive_folder_id() 3301 3302 if gdrive_folder_id is not None: 3303 # Add user access by updating permissions 3304 folder = drive.CreateFile({"id": gdrive_folder_id}) 3305 permission = folder.InsertPermission({ 3306 "type": "user", 3307 "role": "writer", 3308 "value": email_address}) 3309 3310 # Insert user email address in "gdrive_users" table 3311 db_metadata, connection = connect_to_database("Settings") 3312 gdrive_users_table = sa.Table("gdrive_users", db_metadata, autoload=True) 3313 3314 insert_user_email = gdrive_users_table.insert().values( 3315 {"name": permission["name"], 3316 "email_address": email_address, 3317 "permission_id": permission["id"]}) 3318 3319 connection.execute(insert_user_email) 3320 connection.close() 3321 3322 else: 3323 return "Error"
Gives user access to workspace in Google Drive and stores email address in database.
Access is granted by sharing the MS-AutoQC folder in Google Drive with the user's Google account.
Arguments:
- email_address (str): Email address for Google account to grant access to workspace.
Returns:
None
3326def delete_user_from_workspace(email_address): 3327 3328 """ 3329 Removes user access to workspace in Google Drive and deletes email from database. 3330 3331 Args: 3332 email_address (str): Email address for Google account whose access will to be revoked. 3333 3334 Returns: 3335 None 3336 """ 3337 3338 if email_address not in get_workspace_users_list(): 3339 return "User does not exist" 3340 3341 # Get Google Drive instance 3342 drive = get_drive_instance() 3343 3344 # Get ID of MS-AutoQC folder in Google Drive 3345 gdrive_folder_id = get_drive_folder_id() 3346 3347 if gdrive_folder_id is not None: 3348 # Get permission ID of user from database 3349 folder = drive.CreateFile({"id": gdrive_folder_id}) 3350 df_gdrive_users = get_table("Settings", "gdrive_users") 3351 df_gdrive_users = df_gdrive_users.loc[df_gdrive_users["email_address"] == email_address] 3352 permission_id = df_gdrive_users["permission_id"].astype(str).values[0] 3353 3354 # Delete user access by updating permissions 3355 folder.DeletePermission(permission_id) 3356 3357 # Delete user email address in "gdrive_users" table 3358 db_metadata, connection = connect_to_database("Settings") 3359 gdrive_users_table = sa.Table("gdrive_users", db_metadata, autoload=True) 3360 3361 delete_user_email = ( 3362 sa.delete(gdrive_users_table) 3363 .where((gdrive_users_table.c.email_address == email_address)) 3364 ) 3365 3366 connection.execute(delete_user_email) 3367 connection.close() 3368 3369 else: 3370 return "Error"
Removes user access to workspace in Google Drive and deletes email from database.
Arguments:
- email_address (str): Email address for Google account whose access will to be revoked.
Returns:
None
3373def get_qc_results(instrument_id, sample_list, is_bio_standard=False): 3374 3375 """ 3376 Returns DataFrame of QC results for a given sample list. 3377 3378 TODO: This function will break if samples in different runs have the same sample ID. Add run ID filter. 3379 3380 Args: 3381 instrument_id (str): 3382 Instrument ID 3383 sample_list (list): 3384 List of samples to query 3385 is_bio_standard (bool, default False): 3386 Whether the list is biological standards (True) or samples (False) 3387 3388 Returns: 3389 DataFrame of QC results for a given sample list. 3390 """ 3391 3392 if len(sample_list) == 0: 3393 return pd.DataFrame() 3394 3395 database = get_database_file(instrument_id=instrument_id, sqlite_conn=True) 3396 engine = sa.create_engine(database) 3397 3398 sample_list = str(sample_list).replace("[", "(").replace("]", ")") 3399 3400 if is_bio_standard: 3401 query = "SELECT sample_id, qc_result FROM bio_qc_results WHERE sample_id in " + sample_list 3402 else: 3403 query = "SELECT sample_id, qc_result FROM sample_qc_results WHERE sample_id in " + sample_list 3404 3405 return pd.read_sql(query, engine)
Returns DataFrame of QC results for a given sample list.
TODO: This function will break if samples in different runs have the same sample ID. Add run ID filter.
Arguments:
- instrument_id (str): Instrument ID
- sample_list (list): List of samples to query
- is_bio_standard (bool, default False): Whether the list is biological standards (True) or samples (False)
Returns:
DataFrame of QC results for a given sample list.
3408def create_workspace_metadata(): 3409 3410 """ 3411 Creates record in "workspace" table to store various metadata. 3412 """ 3413 3414 db_metadata, connection = connect_to_database("Settings") 3415 workspace_table = sa.Table("workspace", db_metadata, autoload=True) 3416 connection.execute(workspace_table.insert().values({"id": 1})) 3417 connection.close()
Creates record in "workspace" table to store various metadata.
3420def get_device_identity(): 3421 3422 """ 3423 Returns device identity (either an Instrument ID or "Shared user"). 3424 """ 3425 3426 return get_table("Settings", "workspace")["instrument_identity"].astype(str).tolist()[0]
Returns device identity (either an Instrument ID or "Shared user").
3429def set_device_identity(is_instrument_computer, instrument_id): 3430 3431 """ 3432 Indicates whether the user's device is the instrument PC or not. 3433 3434 Args: 3435 is_instrument_computer (bool): 3436 Whether the device is an instrument computer or not 3437 instrument_id (str): 3438 Instrument ID (if None, set to "Shared user") 3439 3440 Returns: 3441 None 3442 """ 3443 3444 if not is_instrument_computer: 3445 instrument_id = "Shared user" 3446 3447 db_metadata, connection = connect_to_database("Settings") 3448 workspace_table = sa.Table("workspace", db_metadata, autoload=True) 3449 3450 update_identity = ( 3451 sa.update(workspace_table) 3452 .where(workspace_table.c.id == 1) 3453 .values( 3454 is_instrument_computer=is_instrument_computer, 3455 instrument_identity=instrument_id 3456 ) 3457 ) 3458 3459 connection.execute(update_identity) 3460 connection.close()
Indicates whether the user's device is the instrument PC or not.
Arguments:
- is_instrument_computer (bool): Whether the device is an instrument computer or not
- instrument_id (str): Instrument ID (if None, set to "Shared user")
Returns:
None
3463def run_is_on_instrument_pc(instrument_id, run_id): 3464 3465 """ 3466 Validates that the current device is the instrument PC on which the run was started. 3467 3468 TODO: Use this function in PlotGeneration and DashWebApp module. 3469 3470 Args: 3471 instrument_id (str): 3472 Instrument ID 3473 run_id (str): 3474 Instrument run ID 3475 3476 Returns: 3477 True if instrument run was started on the current device, and False if not. 3478 """ 3479 3480 instrument_id = get_instrument_run(instrument_id, run_id)["instrument_id"].astype(str).tolist()[0] 3481 device_identity = get_table("Settings", "workspace")["instrument_identity"].astype(str).tolist()[0] 3482 3483 if instrument_id == device_identity: 3484 return True 3485 else: 3486 return False
Validates that the current device is the instrument PC on which the run was started.
TODO: Use this function in PlotGeneration and DashWebApp module.
Arguments:
- instrument_id (str): Instrument ID
- run_id (str): Instrument run ID
Returns:
True if instrument run was started on the current device, and False if not.
3489def update_slack_bot_token(slack_bot_token): 3490 3491 """ 3492 Updates Slack bot user OAuth 2.0 token in "workspace" table of Settings database. 3493 3494 For details on the Slack API, see: https://slack.dev/python-slack-sdk/ 3495 3496 Args: 3497 slack_bot_token (str): Slack bot user OAuth token 3498 3499 Returns: 3500 None 3501 """ 3502 3503 db_metadata, connection = connect_to_database("Settings") 3504 workspace_table = sa.Table("workspace", db_metadata, autoload=True) 3505 3506 update_slack_bot_token = ( 3507 sa.update(workspace_table) 3508 .where(workspace_table.c.id == 1) 3509 .values(slack_bot_token=slack_bot_token) 3510 ) 3511 3512 connection.execute(update_slack_bot_token) 3513 connection.close()
Updates Slack bot user OAuth 2.0 token in "workspace" table of Settings database.
For details on the Slack API, see: https://slack.dev/python-slack-sdk/
Arguments:
- slack_bot_token (str): Slack bot user OAuth token
Returns:
None
3516def get_slack_bot_token(): 3517 3518 """ 3519 Returns Slack bot token stored in "workspace" table of Settings database. 3520 """ 3521 3522 return get_table("Settings", "workspace")["slack_bot_token"].astype(str).values[0]
Returns Slack bot token stored in "workspace" table of Settings database.
3525def update_slack_channel(slack_channel, notifications_enabled): 3526 3527 """ 3528 Updates Slack channel registered for notifications in "workspace" table of Settings database. 3529 3530 Args: 3531 slack_channel (str): 3532 Slack channel to post messages to 3533 notifications_enabled (bool): 3534 Whether to send Slack notifications for QC warnings and fails 3535 3536 Returns: 3537 None 3538 """ 3539 3540 db_metadata, connection = connect_to_database("Settings") 3541 workspace_table = sa.Table("workspace", db_metadata, autoload=True) 3542 3543 update_slack_channel = ( 3544 sa.update(workspace_table) 3545 .where(workspace_table.c.id == 1) 3546 .values( 3547 slack_channel=slack_channel.replace("#", ""), 3548 slack_enabled=notifications_enabled) 3549 ) 3550 3551 connection.execute(update_slack_channel) 3552 connection.close()
Updates Slack channel registered for notifications in "workspace" table of Settings database.
Arguments:
- slack_channel (str): Slack channel to post messages to
- notifications_enabled (bool): Whether to send Slack notifications for QC warnings and fails
Returns:
None
3555def get_slack_channel(): 3556 3557 """ 3558 Returns Slack channel registered for notifications. 3559 """ 3560 3561 return get_table("Settings", "workspace")["slack_channel"].astype(str).values[0]
Returns Slack channel registered for notifications.
3564def get_slack_notifications_toggled(): 3565 3566 """ 3567 Returns Slack notification toggle setting. 3568 """ 3569 3570 try: 3571 return get_table("Settings", "workspace")["slack_enabled"].astype(int).tolist()[0] 3572 except: 3573 return None
Returns Slack notification toggle setting.
3576def get_email_notifications_list(as_string=False): 3577 3578 """ 3579 Returns list of emails registered for email notifications for QC warnings and fails. 3580 3581 Args: 3582 as_string (bool, default False): 3583 Whether to return the list as a string (for Gmail API) or as list object (for display in Settings page) 3584 3585 Returns: 3586 List of emails registered for QC warning/fail notifications. 3587 """ 3588 3589 email_list = get_table("Settings", "email_notifications")["email_address"].astype(str).tolist() 3590 3591 if as_string: 3592 email_list_string = "" 3593 3594 for email in email_list: 3595 email_list_string += email 3596 if email != email_list[-1]: 3597 email_list_string += "," 3598 3599 return email_list_string 3600 3601 else: 3602 return email_list
Returns list of emails registered for email notifications for QC warnings and fails.
Arguments:
- as_string (bool, default False): Whether to return the list as a string (for Gmail API) or as list object (for display in Settings page)
Returns:
List of emails registered for QC warning/fail notifications.
3605def register_email_for_notifications(email_address): 3606 3607 """ 3608 Inserts email address into "email_notifications" table in Settings database. 3609 3610 Args: 3611 email_address (str): Email address to register for notifications. 3612 3613 Returns: 3614 None 3615 """ 3616 3617 db_metadata, connection = connect_to_database("Settings") 3618 email_notifications_table = sa.Table("email_notifications", db_metadata, autoload=True) 3619 3620 insert_email_address = email_notifications_table.insert().values({ 3621 "email_address": email_address 3622 }) 3623 3624 connection.execute(insert_email_address) 3625 connection.close()
Inserts email address into "email_notifications" table in Settings database.
Arguments:
- email_address (str): Email address to register for notifications.
Returns:
None
3628def delete_email_from_notifications(email_address): 3629 3630 """ 3631 Deletes email address from "email_notifications" table in Settings database. 3632 3633 Args: 3634 email_address (str): Email address to unsubscribe from notifications. 3635 3636 Returns: 3637 None 3638 """ 3639 3640 db_metadata, connection = connect_to_database("Settings") 3641 email_notifications_table = sa.Table("email_notifications", db_metadata, autoload=True) 3642 3643 delete_email_address = ( 3644 sa.delete(email_notifications_table) 3645 .where((email_notifications_table.c.email_address == email_address)) 3646 ) 3647 3648 connection.execute(delete_email_address) 3649 connection.close()
Deletes email address from "email_notifications" table in Settings database.
Arguments:
- email_address (str): Email address to unsubscribe from notifications.
Returns:
None
3652def get_completed_samples_count(instrument_id, run_id, status): 3653 3654 """ 3655 Returns tuple containing count for completed samples and total samples in a given instrument run. 3656 3657 Args: 3658 instrument_id (str): 3659 Instrument ID 3660 run_id (str): 3661 Instrument run ID (job ID) 3662 status (str): 3663 Instrument run (QC job) status, either "Active" or "Complete" 3664 3665 Returns: 3666 Tuple with number of completed samples and total samples for a given instrument run. 3667 """ 3668 3669 if status == "Active" and sync_is_enabled(): 3670 if get_device_identity() == instrument_id: 3671 df_instrument_run = get_instrument_run(instrument_id, run_id) 3672 else: 3673 df_instrument_run = get_instrument_run_from_csv(instrument_id, run_id) 3674 else: 3675 df_instrument_run = get_instrument_run(instrument_id, run_id) 3676 3677 completed = df_instrument_run["completed"].astype(int).tolist()[0] 3678 total_samples = df_instrument_run["samples"].astype(int).tolist()[0] 3679 return (completed, total_samples)
Returns tuple containing count for completed samples and total samples in a given instrument run.
Arguments:
- instrument_id (str): Instrument ID
- run_id (str): Instrument run ID (job ID)
- status (str): Instrument run (QC job) status, either "Active" or "Complete"
Returns:
Tuple with number of completed samples and total samples for a given instrument run.
3682def get_run_progress(instrument_id, run_id, status): 3683 3684 """ 3685 Returns progress of instrument run as a percentage of samples completed. 3686 3687 Args: 3688 instrument_id (str): 3689 Instrument ID 3690 run_id (str): 3691 Instrument run ID (job ID) 3692 status (str): 3693 Instrument run (QC job) status, either "Active" or "Complete" 3694 3695 Returns: 3696 float: Percent of samples processed for the given instrument run. 3697 """ 3698 3699 completed, total_samples = get_completed_samples_count(instrument_id, run_id, status) 3700 percent_complete = (completed / total_samples) * 100 3701 return round(percent_complete, 1)
Returns progress of instrument run as a percentage of samples completed.
Arguments:
- instrument_id (str): Instrument ID
- run_id (str): Instrument run ID (job ID)
- status (str): Instrument run (QC job) status, either "Active" or "Complete"
Returns:
float: Percent of samples processed for the given instrument run.
3704def update_sample_counters_for_run(instrument_id, run_id, latest_sample): 3705 3706 """ 3707 Increments "completed" count, as well as "pass" and "fail" counts accordingly. 3708 3709 TODO: The "latest_sample" is the last sample to be processed / completed. 3710 Nomenclature should be updated for clarity. 3711 3712 Args: 3713 instrument_id (str): 3714 Instrument ID 3715 run_id (str): 3716 Instrument run ID (job ID) 3717 latest_sample (str): 3718 Last sample to be processed 3719 3720 Returns: 3721 None 3722 """ 3723 3724 df = get_samples_in_run(instrument_id, run_id, "Both") 3725 3726 try: 3727 passes = int(df["qc_result"].value_counts()["Pass"]) 3728 except: 3729 passes = 0 3730 3731 try: 3732 warnings = int(df["qc_result"].value_counts()["Warning"]) 3733 except: 3734 warnings = 0 3735 3736 try: 3737 fails = int(df["qc_result"].value_counts()["Fail"]) 3738 except: 3739 fails = 0 3740 3741 completed = passes + fails 3742 3743 db_metadata, connection = connect_to_database(instrument_id) 3744 instrument_runs_table = sa.Table("runs", db_metadata, autoload=True) 3745 3746 update_status = ( 3747 sa.update(instrument_runs_table) 3748 .where(instrument_runs_table.c.run_id == run_id) 3749 .values( 3750 completed=completed, 3751 passes=passes, 3752 fails=fails, 3753 latest_sample=latest_sample 3754 ) 3755 ) 3756 3757 connection.execute(update_status) 3758 connection.close()
Increments "completed" count, as well as "pass" and "fail" counts accordingly.
TODO: The "latest_sample" is the last sample to be processed / completed. Nomenclature should be updated for clarity.
Arguments:
- instrument_id (str): Instrument ID
- run_id (str): Instrument run ID (job ID)
- latest_sample (str): Last sample to be processed
Returns:
None
3761def mark_run_as_completed(instrument_id, run_id): 3762 3763 """ 3764 Marks instrument run status as completed. 3765 3766 Args: 3767 instrument_id (str): 3768 Instrument ID 3769 run_id (str): 3770 Instrument run ID (job ID) 3771 3772 Returns: 3773 None 3774 """ 3775 3776 db_metadata, connection = connect_to_database(instrument_id) 3777 instrument_runs_table = sa.Table("runs", db_metadata, autoload=True) 3778 3779 update_status = ( 3780 sa.update(instrument_runs_table) 3781 .where(instrument_runs_table.c.run_id == run_id) 3782 .values(status="Complete") 3783 ) 3784 3785 connection.execute(update_status) 3786 connection.close()
Marks instrument run status as completed.
Arguments:
- instrument_id (str): Instrument ID
- run_id (str): Instrument run ID (job ID)
Returns:
None
3789def skip_sample(instrument_id, run_id): 3790 3791 """ 3792 Skips sample by setting "latest_sample" value for instrument run to the next sample. 3793 3794 This function was used after restarting the acquisition listener when MS-DIAL got stuck processing a corrupted file. 3795 Now that MS-DIAL runs in the background, it is deprecated and should be removed. 3796 3797 Args: 3798 instrument_id (str): 3799 Instrument ID 3800 run_id (str): 3801 Instrument run ID (job ID) 3802 3803 Returns: 3804 None 3805 """ 3806 3807 # Get next sample 3808 samples = get_remaining_samples(instrument_id, run_id) 3809 next_sample = samples[1] 3810 3811 # Set latest sample to next sample 3812 db_metadata, connection = connect_to_database(instrument_id) 3813 instrument_runs_table = sa.Table("runs", db_metadata, autoload=True) 3814 3815 connection.execute(( 3816 sa.update(instrument_runs_table) 3817 .where(instrument_runs_table.c.run_id == run_id) 3818 .values(latest_sample=next_sample) 3819 )) 3820 3821 connection.close()
Skips sample by setting "latest_sample" value for instrument run to the next sample.
This function was used after restarting the acquisition listener when MS-DIAL got stuck processing a corrupted file. Now that MS-DIAL runs in the background, it is deprecated and should be removed.
Arguments:
- instrument_id (str): Instrument ID
- run_id (str): Instrument run ID (job ID)
Returns:
None
3824def store_pid(instrument_id, run_id, pid): 3825 3826 """ 3827 Stores acquisition listener subprocess ID to allow for checkup and termination. 3828 3829 Args: 3830 instrument_id (str): 3831 Instrument ID 3832 run_id (str): 3833 Instrument run ID (job ID) 3834 pid (str): 3835 Process ID for acquisition listener subprocess 3836 3837 Returns: 3838 None 3839 """ 3840 3841 db_metadata, connection = connect_to_database(instrument_id) 3842 instrument_runs_table = sa.Table("runs", db_metadata, autoload=True) 3843 3844 update_pid = ( 3845 sa.update(instrument_runs_table) 3846 .where(instrument_runs_table.c.run_id == run_id) 3847 .values(pid=pid) 3848 ) 3849 3850 connection.execute(update_pid) 3851 connection.close()
Stores acquisition listener subprocess ID to allow for checkup and termination.
Arguments:
- instrument_id (str): Instrument ID
- run_id (str): Instrument run ID (job ID)
- pid (str): Process ID for acquisition listener subprocess
Returns:
None
3854def get_pid(instrument_id, run_id): 3855 3856 """ 3857 Retrieves acquisition listener process ID from "runs" table in Settings database. 3858 3859 Args: 3860 instrument_id (str): 3861 Instrument ID 3862 run_id (str): 3863 Instrument run ID (job ID) 3864 3865 Returns: 3866 None 3867 """ 3868 3869 try: 3870 return get_instrument_run(instrument_id, run_id)["pid"].astype(int).tolist()[0] 3871 except: 3872 return None
Retrieves acquisition listener process ID from "runs" table in Settings database.
Arguments:
- instrument_id (str): Instrument ID
- run_id (str): Instrument run ID (job ID)
Returns:
None
3875def upload_to_google_drive(file_dict): 3876 3877 """ 3878 Uploads files to MS-AutoQC folder in Google Drive. 3879 3880 Args: 3881 file_dict (dict): 3882 Dictionary with key-value structure { filename : file path } 3883 3884 Returns: 3885 dict: Dictionary with key-value structure { filename : Google Drive ID } 3886 """ 3887 3888 # Get Google Drive instance 3889 drive = get_drive_instance() 3890 3891 # Get Google Drive ID for the MS-AutoQC folder 3892 folder_id = get_drive_folder_id() 3893 3894 # Store Drive ID's of uploaded file(s) 3895 drive_ids = {} 3896 3897 # Validate Google Drive folder ID 3898 if folder_id is not None: 3899 if folder_id != "None" and folder_id != "": 3900 3901 # Upload each file to Google Drive 3902 for filename in file_dict.keys(): 3903 if os.path.exists(file_dict[filename]): 3904 metadata = { 3905 "title": filename, 3906 "parents": [{"id": folder_id}], 3907 } 3908 file = drive.CreateFile(metadata=metadata) 3909 file.SetContentFile(file_dict[filename]) 3910 file.Upload() 3911 3912 drive_ids[file["title"]] = file["id"] 3913 3914 return drive_ids
Uploads files to MS-AutoQC folder in Google Drive.
Arguments:
- file_dict (dict): Dictionary with key-value structure { filename : file path }
Returns:
dict: Dictionary with key-value structure { filename : Google Drive ID }
3917def upload_qc_results(instrument_id, run_id): 3918 3919 """ 3920 Uploads QC results for a given instrument run to Google Drive as CSV files. 3921 3922 Args: 3923 instrument_id (str): 3924 Instrument ID 3925 run_id (str): 3926 Instrument run ID (job ID) 3927 3928 Returns: 3929 None 3930 """ 3931 3932 id = instrument_id.replace(" ", "_") + "_" + run_id 3933 3934 # Get Google Drive instance 3935 drive = get_drive_instance() 3936 3937 # Define file names and file paths 3938 run_filename = "run.csv" 3939 samples_csv_filename = "samples.csv" 3940 bio_standards_csv_filename = "bio_standards.csv" 3941 3942 run_directory = os.path.join(data_directory, id) 3943 if not os.path.exists(run_directory): 3944 os.makedirs(run_directory) 3945 3946 csv_directory = os.path.join(run_directory, "csv") 3947 if not os.path.exists(csv_directory): 3948 os.makedirs(csv_directory) 3949 3950 run_csv_path = os.path.join(csv_directory, run_filename) 3951 samples_csv_path = os.path.join(csv_directory, samples_csv_filename) 3952 bio_standards_csv_path = os.path.join(csv_directory, bio_standards_csv_filename) 3953 3954 # Convert sample and biological standard QC results from database into CSV files 3955 df_run = get_instrument_run(instrument_id, run_id) 3956 df_run.to_csv(run_csv_path, index=False) 3957 3958 df_samples = get_samples_in_run(instrument_id=instrument_id, run_id=run_id, sample_type="Sample") 3959 if len(df_samples) > 0: 3960 df_samples.to_csv(samples_csv_path, index=False) 3961 3962 df_bio_standards = get_table(instrument_id, "bio_qc_results") 3963 if len(df_bio_standards) > 0: 3964 df_bio_standards.to_csv(bio_standards_csv_path, index=False) 3965 3966 # Compress CSV files into a ZIP archive for faster upload 3967 zip_filename = id + ".zip" 3968 zip_file_path = zip_csv_files( 3969 input_directory=csv_directory, output_directory_and_name=os.path.join(run_directory, id)) 3970 3971 zip_file = {zip_filename: zip_file_path} 3972 3973 # Get Google Drive ID for the CSV files ZIP archive 3974 zip_file_drive_id = get_instrument_run(instrument_id, run_id)["drive_id"].tolist()[0] 3975 3976 # Update existing ZIP archive in Google Drive 3977 if zip_file_drive_id is not None: 3978 3979 file = drive.CreateFile({ 3980 "id": zip_file_drive_id, 3981 "title": zip_filename, 3982 }) 3983 3984 # Execute upload 3985 file.SetContentFile(zip_file_path) 3986 file.Upload() 3987 3988 # If zip file Drive ID does not exist, 3989 else: 3990 3991 # Upload CSV files ZIP archive to Google Drive for first time 3992 drive_id = upload_to_google_drive(zip_file)[zip_filename] 3993 3994 # Store Drive ID of ZIP file in local database 3995 db_metadata, connection = connect_to_database(instrument_id) 3996 runs_table = sa.Table("runs", db_metadata, autoload=True) 3997 3998 connection.execute(( 3999 sa.update(runs_table) 4000 .where(runs_table.c.run_id == run_id) 4001 .values(drive_id=drive_id) 4002 )) 4003 4004 connection.close()
Uploads QC results for a given instrument run to Google Drive as CSV files.
Arguments:
- instrument_id (str): Instrument ID
- run_id (str): Instrument run ID (job ID)
Returns:
None
4007def download_qc_results(instrument_id, run_id): 4008 4009 """ 4010 Downloads CSV files of QC results from Google Drive and stores in /data directory. 4011 4012 Args: 4013 instrument_id (str): 4014 Instrument ID 4015 run_id (str): 4016 Instrument run ID (job ID) 4017 4018 Returns: 4019 tuple: Paths of run.csv, samples.csv, and bio_standards.csv, respectively. 4020 """ 4021 4022 id = instrument_id.replace(" ", "_") + "_" + run_id 4023 4024 # Get Google Drive instance 4025 drive = get_drive_instance() 4026 4027 # Initialize directories 4028 run_directory = os.path.join(data_directory, id) 4029 if not os.path.exists(run_directory): 4030 os.makedirs(run_directory) 4031 4032 csv_directory = os.path.join(run_directory, "csv") 4033 if not os.path.exists(csv_directory): 4034 os.makedirs(csv_directory) 4035 4036 # Zip file 4037 zip_filename = id + ".zip" 4038 zip_file_path = os.path.join(run_directory, zip_filename) 4039 4040 # Get Google Drive folder ID 4041 gdrive_folder_id = get_drive_folder_id() 4042 4043 # Find and download ZIP archive of CSV files from Google Drive 4044 for file in drive.ListFile({"q": "'" + gdrive_folder_id + "' in parents and trashed=false"}).GetList(): 4045 if file["title"] == zip_filename: 4046 os.chdir(run_directory) 4047 file.GetContentFile(file["title"]) 4048 os.chdir(root_directory) 4049 break 4050 4051 # Unzip archive 4052 unzip_csv_files(zip_file_path, csv_directory) 4053 4054 # Define and return file paths 4055 run_csv = os.path.join(csv_directory, "run.csv") 4056 samples_csv = os.path.join(csv_directory, "samples.csv") 4057 bio_standards_csv_file = os.path.join(csv_directory, "bio_standards.csv") 4058 4059 return (run_csv, samples_csv, bio_standards_csv_file)
Downloads CSV files of QC results from Google Drive and stores in /data directory.
Arguments:
- instrument_id (str): Instrument ID
- run_id (str): Instrument run ID (job ID)
Returns:
tuple: Paths of run.csv, samples.csv, and bio_standards.csv, respectively.
4062def get_drive_folder_id(): 4063 4064 """ 4065 Returns Google Drive ID for the MS-AutoQC folder (found in user's root Drive directory). 4066 """ 4067 4068 return get_table("Settings", "workspace")["gdrive_folder_id"].values[0]
Returns Google Drive ID for the MS-AutoQC folder (found in user's root Drive directory).
4071def get_database_drive_id(instrument_id): 4072 4073 """ 4074 Returns Google Drive ID for a given instrument's database. 4075 4076 Args: 4077 instrument_id (str): Instrument ID 4078 4079 Returns: 4080 str: Google Drive ID for the instrument database ZIP archive. 4081 """ 4082 4083 df = get_table("Settings", "instruments") 4084 return df.loc[df["name"] == instrument_id]["drive_id"].values[0]
Returns Google Drive ID for a given instrument's database.
Arguments:
- instrument_id (str): Instrument ID
Returns:
str: Google Drive ID for the instrument database ZIP archive.
4087def upload_database(instrument_id, sync_settings=False): 4088 4089 """ 4090 Uploads database file and methods directory to Google Drive as ZIP archives. 4091 4092 Args: 4093 instrument_id (str): 4094 Instrument ID for the instrument database to upload 4095 sync_settings (bool, default False): 4096 Whether to upload methods directory as well 4097 4098 Returns: 4099 str: Timestamp upon upload completion. 4100 """ 4101 4102 # Get Google Drive ID's for the MS-AutoQC folder and database file 4103 gdrive_folder_id = get_drive_folder_id() 4104 instrument_db_file_id = get_database_drive_id(instrument_id) 4105 4106 # Get Google Drive instance 4107 drive = get_drive_instance() 4108 4109 # Vacuum database to optimize size 4110 execute_vacuum(instrument_id) 4111 4112 # Upload methods directory to Google Drive 4113 if sync_settings == True: 4114 upload_methods() 4115 4116 # Upload database to Google Drive 4117 if gdrive_folder_id is not None and instrument_db_file_id is not None: 4118 4119 # Upload zipped database 4120 zip_database(instrument_id=instrument_id) 4121 file = drive.CreateFile( 4122 {"id": instrument_db_file_id, "title": instrument_id.replace(" ", "_") + ".zip"}) 4123 file.SetContentFile(get_database_file(instrument_id, zip=True)) 4124 file.Upload() 4125 4126 # Save modifiedDate of database file 4127 remember_last_modified(database=instrument_id, modified_date=file["modifiedDate"]) 4128 4129 else: 4130 return None 4131 4132 return time.strftime("%H:%M:%S")
Uploads database file and methods directory to Google Drive as ZIP archives.
Arguments:
- instrument_id (str): Instrument ID for the instrument database to upload
- sync_settings (bool, default False): Whether to upload methods directory as well
Returns:
str: Timestamp upon upload completion.
4135def download_database(instrument_id, sync_settings=False): 4136 4137 """ 4138 Downloads instrument database ZIP file from Google Drive. 4139 4140 This function is called when accessing an instrument database from a device other than the given instrument. 4141 4142 Args: 4143 instrument_id (str): 4144 Instrument ID for the instrument database to download 4145 sync_settings (bool, default False): 4146 Whether to download methods directory as well 4147 4148 Returns: 4149 str: Timestamp upon download completion. 4150 """ 4151 4152 db_zip_file = instrument_id.replace(" ", "_") + ".zip" 4153 4154 # If the database was not modified by another instrument, skip download (for instruments only) 4155 if not database_was_modified(instrument_id): 4156 return None 4157 4158 # Get Google Drive instance 4159 drive = get_drive_instance() 4160 4161 # Get Google Drive ID's for the MS-AutoQC folder and database file 4162 gdrive_folder_id = get_drive_folder_id() 4163 instrument_db_file_id = get_instrument(instrument_id)["drive_id"].values[0] 4164 4165 # If Google Drive folder is found, look for database next 4166 if gdrive_folder_id is not None and instrument_db_file_id is not None: 4167 4168 # Download newly added / modified MSP files in MS-AutoQC > methods 4169 if sync_settings == True: 4170 download_methods(skip_check=True) 4171 4172 try: 4173 for file in drive.ListFile({"q": "'" + gdrive_folder_id + "' in parents and trashed=false"}).GetList(): 4174 if file["title"] == db_zip_file: 4175 4176 # Download and unzip database 4177 os.chdir(data_directory) # Change to data directory 4178 file.GetContentFile(file["title"]) # Download database and get file ID 4179 os.chdir(root_directory) # Return to root directory 4180 unzip_database(instrument_id=instrument_id) # Unzip database 4181 4182 # Save modifiedDate of database file 4183 remember_last_modified(database=instrument_id, modified_date=file["modifiedDate"]) 4184 4185 except Exception as error: 4186 print("Error downloading database from Google Drive:", error) 4187 return None 4188 else: 4189 return None 4190 4191 return time.strftime("%H:%M:%S")
Downloads instrument database ZIP file from Google Drive.
This function is called when accessing an instrument database from a device other than the given instrument.
Arguments:
- instrument_id (str): Instrument ID for the instrument database to download
- sync_settings (bool, default False): Whether to download methods directory as well
Returns:
str: Timestamp upon download completion.
4194def upload_methods(): 4195 4196 """ 4197 Uploads methods directory ZIP archive to Google Drive. 4198 """ 4199 4200 df_workspace = get_table("Settings", "workspace") 4201 methods_zip_file_id = df_workspace["methods_zip_file_id"].values[0] 4202 4203 # Vacuum database to optimize size 4204 execute_vacuum("Settings") 4205 4206 # Get Google Drive instance 4207 drive = get_drive_instance() 4208 4209 # Upload methods ZIP archive to Google Drive 4210 if methods_zip_file_id is not None: 4211 4212 # Upload zipped database 4213 methods_zip_file = zip_methods() 4214 file = drive.CreateFile({"id": methods_zip_file_id, "title": "methods.zip"}) 4215 file.SetContentFile(methods_zip_file) 4216 file.Upload() 4217 4218 # Save modifiedDate of methods ZIP file 4219 remember_last_modified(database="Settings", modified_date=file["modifiedDate"]) 4220 4221 else: 4222 return None
Uploads methods directory ZIP archive to Google Drive.
4225def download_methods(skip_check=False): 4226 4227 """ 4228 Downloads methods directory ZIP archive from Google Drive. 4229 4230 Args: 4231 skip_check (bool, default False): If True, skips checking whether database was modified 4232 4233 Returns: 4234 None 4235 """ 4236 4237 # If the database was not modified by another instrument, skip download (for instruments only) 4238 if not skip_check: 4239 if not database_was_modified("Settings"): 4240 return None 4241 4242 # Get device identity 4243 instrument_bool = is_instrument_computer() 4244 device_identity = get_device_identity() 4245 4246 # Get MS-DIAL directory 4247 try: 4248 msdial_directory = get_msdial_directory() 4249 except: 4250 msdial_directory = None 4251 4252 # Get Google Drive instance 4253 drive = get_drive_instance() 4254 4255 # Get Google Drive folder ID 4256 gdrive_folder_id = get_drive_folder_id() 4257 4258 try: 4259 # Download and unzip methods directory 4260 for file in drive.ListFile({"q": "'" + gdrive_folder_id + "' in parents and trashed=false"}).GetList(): 4261 if file["title"] == "methods.zip": 4262 os.chdir(data_directory) # Change to data directory 4263 file.GetContentFile(file["title"]) # Download methods ZIP archive 4264 os.chdir(root_directory) # Return to root directory 4265 unzip_methods() # Unzip methods directory 4266 4267 # Save modifiedDate of methods directory 4268 remember_last_modified(database="Settings", modified_date=file["modifiedDate"]) 4269 4270 except Exception as error: 4271 print("Error downloading methods from Google Drive:", error) 4272 return None 4273 4274 # Update MS-DIAL directory 4275 update_msdial_directory(msdial_directory) 4276 4277 # Update user device identity 4278 set_device_identity(is_instrument_computer=instrument_bool, instrument_id=device_identity) 4279 return time.strftime("%H:%M:%S")
Downloads methods directory ZIP archive from Google Drive.
Arguments:
- skip_check (bool, default False): If True, skips checking whether database was modified
Returns:
None
4282def remember_last_modified(database, modified_date): 4283 4284 """ 4285 Stores last modified time of database file in Google Drive. 4286 4287 This function is called after file upload, and used for comparison before download. 4288 4289 Args: 4290 database (str): 4291 Name of database (either Instrument ID or "Settings") 4292 modified_date (str): 4293 Modified date of file uploaded to Google Drive 4294 4295 Returns: 4296 None 4297 """ 4298 4299 db_metadata, connection = connect_to_database("Settings") 4300 instruments_table = sa.Table("instruments", db_metadata, autoload=True) 4301 workspace_table = sa.Table("workspace", db_metadata, autoload=True) 4302 4303 if database == "Settings": 4304 connection.execute(( 4305 sa.update(workspace_table) 4306 .where((workspace_table.c.id == 1)) 4307 .values(methods_last_modified=modified_date) 4308 )) 4309 else: 4310 connection.execute(( 4311 sa.update(instruments_table) 4312 .where((instruments_table.c.name == database)) 4313 .values(last_modified=modified_date) 4314 )) 4315 4316 connection.close()
Stores last modified time of database file in Google Drive.
This function is called after file upload, and used for comparison before download.
Arguments:
- database (str): Name of database (either Instrument ID or "Settings")
- modified_date (str): Modified date of file uploaded to Google Drive
Returns:
None
4319def database_was_modified(database_name): 4320 4321 """ 4322 Returns True if workspace file was modified by another instrument PC in Google Drive, and False if not. 4323 4324 Args: 4325 database_name (str): Name of database 4326 4327 Returns: 4328 Returns True if workspace file was modified by another instrument PC in Google Drive, and False if not. 4329 """ 4330 4331 # Get Google Drive folder ID from database 4332 gdrive_folder_id = get_drive_folder_id() 4333 4334 # Compare "last modified" values 4335 if database_name == "Settings": 4336 local_last_modified = get_table("Settings", "workspace")["methods_last_modified"].values[0] 4337 filename = "methods.zip" 4338 else: 4339 local_last_modified = get_instrument(database_name)["last_modified"].values[0] 4340 filename = database_name.replace(" ", "_") + ".zip" 4341 4342 # Get Google Drive instance 4343 drive = get_drive_instance() 4344 4345 drive_last_modified = None 4346 for file in drive.ListFile({"q": "'" + gdrive_folder_id + "' in parents and trashed=false"}).GetList(): 4347 if file["title"] == filename: 4348 drive_last_modified = file["modifiedDate"] 4349 break 4350 4351 if local_last_modified == drive_last_modified: 4352 return False 4353 else: 4354 return True
Returns True if workspace file was modified by another instrument PC in Google Drive, and False if not.
Arguments:
- database_name (str): Name of database
Returns:
Returns True if workspace file was modified by another instrument PC in Google Drive, and False if not.
4357def send_sync_signal(folder_id): 4358 4359 """ 4360 Uploads empty file to signal that an instrument PC is syncing to Google Drive. 4361 4362 TODO: This method is deprecated. Please remove if no plans for usage. 4363 4364 Args: 4365 folder_id (str): Google Drive folder ID 4366 4367 Returns: 4368 bool: True if sync signal was sent, False if not. 4369 """ 4370 4371 # Get Google Drive instance 4372 drive = get_drive_instance() 4373 4374 try: 4375 drive.CreateFile(metadata={"title": "Syncing", "parents": [{"id": folder_id}]}).Upload() 4376 return True 4377 except: 4378 return False
Uploads empty file to signal that an instrument PC is syncing to Google Drive.
TODO: This method is deprecated. Please remove if no plans for usage.
Arguments:
- folder_id (str): Google Drive folder ID
Returns:
bool: True if sync signal was sent, False if not.
4381def safe_to_upload(folder_id): 4382 4383 """ 4384 Returns False if another device is currently uploading to Google Drive, else True. 4385 4386 TODO: This method is deprecated. Please remove if no plans for usage. 4387 4388 Args: 4389 folder_id (str): Google Drive folder ID 4390 4391 Returns: 4392 bool: False if another device is currently uploading to Google Drive, True if not. 4393 """ 4394 4395 # Get Google Drive instance 4396 drive = get_drive_instance() 4397 4398 for file in drive.ListFile({"q": "'" + folder_id + "' in parents and trashed=false"}).GetList(): 4399 if file["title"] == "Syncing": 4400 return False 4401 4402 return True
Returns False if another device is currently uploading to Google Drive, else True.
TODO: This method is deprecated. Please remove if no plans for usage.
Arguments:
- folder_id (str): Google Drive folder ID
Returns:
bool: False if another device is currently uploading to Google Drive, True if not.
4405def remove_sync_signal(folder_id): 4406 4407 """ 4408 Removes empty signal file to signal that an instrument PC has completed syncing to Google Drive. 4409 4410 TODO: This method is deprecated. Please remove if no plans for usage. 4411 4412 Args: 4413 folder_id (str): Google Drive folder ID 4414 4415 Returns: 4416 bool: True if sync signal was removed, False if not. 4417 """ 4418 4419 # Get Google Drive instance 4420 drive = get_drive_instance() 4421 4422 try: 4423 for file in drive.ListFile({"q": "'" + folder_id + "' in parents and trashed=false"}).GetList(): 4424 if file["title"] == "Syncing": 4425 file.Delete() 4426 return True 4427 except: 4428 return False
Removes empty signal file to signal that an instrument PC has completed syncing to Google Drive.
TODO: This method is deprecated. Please remove if no plans for usage.
Arguments:
- folder_id (str): Google Drive folder ID
Returns:
bool: True if sync signal was removed, False if not.
4431def delete_active_run_csv_files(instrument_id, run_id): 4432 4433 """ 4434 Checks for and deletes CSV files from Google Drive at the end of an active instrument run. 4435 4436 Args: 4437 instrument_id (str): 4438 Instrument ID 4439 run_id (str): 4440 Instrument run ID (job ID) 4441 4442 Returns: 4443 None 4444 """ 4445 4446 id = instrument_id.replace(" ", "_") + "_" + run_id 4447 4448 # Find zip archive of CSV files in Google Drive and delete it 4449 drive = get_drive_instance() 4450 gdrive_folder_id = get_drive_folder_id() 4451 4452 if gdrive_folder_id is not None: 4453 drive_file_list = drive.ListFile({"q": "'" + gdrive_folder_id + "' in parents and trashed=false"}).GetList() 4454 for file in drive_file_list: 4455 if file["title"] == id + ".zip": 4456 file.Delete() 4457 break 4458 4459 # Delete Drive ID from database 4460 db_metadata, connection = connect_to_database(instrument_id) 4461 runs_table = sa.Table("runs", db_metadata, autoload=True) 4462 4463 connection.execute(( 4464 sa.update(runs_table) 4465 .where(runs_table.c.run_id == run_id) 4466 .values(drive_id=None) 4467 )) 4468 4469 connection.close()
Checks for and deletes CSV files from Google Drive at the end of an active instrument run.
Arguments:
- instrument_id (str): Instrument ID
- run_id (str): Instrument run ID (job ID)
Returns:
None
4472def sync_on_run_completion(instrument_id, run_id): 4473 4474 """ 4475 Syncs database with Google Drive at the end of an active instrument run. 4476 4477 Performs the following actions: 4478 1. Upload database to Google Drive 4479 2. Delete active run CSV files 4480 4481 Args: 4482 instrument_id (str): 4483 Instrument ID 4484 run_id (str): 4485 Instrument run ID (job ID) 4486 4487 Returns: 4488 None 4489 """ 4490 4491 # Get Google Drive instance and folder ID 4492 drive = get_drive_instance() 4493 gdrive_folder_id = get_drive_folder_id() 4494 4495 # Upload database to Google Drive 4496 try: 4497 upload_database(instrument_id) 4498 except Exception as error: 4499 print("sync_on_run_completion() – Error uploading database during sync", error) 4500 return None 4501 4502 # Delete active run CSV files 4503 try: 4504 delete_active_run_csv_files(instrument_id, run_id) 4505 except Exception as error: 4506 print("sync_on_run_completion() – Error deleting CSV files after sync", error) 4507 return None
Syncs database with Google Drive at the end of an active instrument run.
Performs the following actions:
- Upload database to Google Drive
- Delete active run CSV files
Arguments:
- instrument_id (str): Instrument ID
- run_id (str): Instrument run ID (job ID)
Returns:
None
4510def get_data_file_type(instrument_id): 4511 4512 """ 4513 Returns expected data file extension based on instrument vendor type. 4514 4515 TODO: Modify this function as needed when adding support for other instrument vendors. 4516 4517 Args: 4518 instrument_id (str): Instrument ID 4519 4520 Returns: 4521 Data file extension for instrument vendor. 4522 """ 4523 4524 engine = sa.create_engine(settings_database) 4525 df_instruments = pd.read_sql("SELECT * FROM instruments WHERE name='" + instrument_id + "'", engine) 4526 vendor = df_instruments["vendor"].astype(str).tolist()[0] 4527 4528 if vendor == "Thermo Fisher": 4529 return "raw" 4530 elif vendor == "Agilent": 4531 return "d" 4532 elif vendor == "Bruker": 4533 return "baf" 4534 elif vendor == "Waters": 4535 return "raw" 4536 elif vendor == "Sciex": 4537 return "wiff2"
Returns expected data file extension based on instrument vendor type.
TODO: Modify this function as needed when adding support for other instrument vendors.
Arguments:
- instrument_id (str): Instrument ID
Returns:
Data file extension for instrument vendor.
4540def is_completed_run(instrument_id, run_id): 4541 4542 """ 4543 Returns True if the given QC job is for a completed run, and False if for an active run. 4544 4545 Args: 4546 instrument_id (str): 4547 Instrument ID 4548 run_id (str): 4549 Instrument run ID (job ID) 4550 4551 Returns: 4552 bool: True if the job is for a completed run, and False if job is for an active run. 4553 """ 4554 4555 try: 4556 job_type = get_instrument_run(instrument_id, run_id)["job_type"].astype(str).values[0] 4557 if job_type == "completed": 4558 return True 4559 else: 4560 return False 4561 except: 4562 print("Could not get MS-AutoQC job type.") 4563 traceback.print_exc() 4564 return False
Returns True if the given QC job is for a completed run, and False if for an active run.
Arguments:
- instrument_id (str): Instrument ID
- run_id (str): Instrument run ID (job ID)
Returns:
bool: True if the job is for a completed run, and False if job is for an active run.
4567def delete_temp_directory(instrument_id, run_id): 4568 4569 """ 4570 Deletes temporary data file directory in local app directory. 4571 4572 This function is called at the end of an instrument run (QC job). 4573 4574 Args: 4575 instrument_id (str): 4576 Instrument ID 4577 run_id (str): 4578 Instrument run ID (job ID) 4579 4580 Returns: 4581 None 4582 """ 4583 4584 # Delete temporary data file directory 4585 try: 4586 id = instrument_id.replace(" ", "_") + "_" + run_id 4587 temp_directory = os.path.join(data_directory, id) 4588 if os.path.exists(temp_directory): 4589 shutil.rmtree(temp_directory) 4590 except: 4591 print("Could not delete temporary data directory.")
Deletes temporary data file directory in local app directory.
This function is called at the end of an instrument run (QC job).
Arguments:
- instrument_id (str): Instrument ID
- run_id (str): Instrument run ID (job ID)
Returns:
None
4594def pipeline_valid(module=None): 4595 4596 """ 4597 Validates that MSConvert and MS-DIAL dependencies are installed. 4598 4599 This function is called during job setup validation. 4600 4601 Args: 4602 module (str, default None): If specified, only validates given module. 4603 4604 Returns: 4605 bool: Whether MSConvert.exe and MsdialConsoleApp.exe exist. 4606 """ 4607 4608 try: 4609 msconvert_installed = os.path.exists(os.path.join(get_msconvert_directory(), "msconvert.exe")) 4610 except: 4611 msconvert_installed = False 4612 4613 try: 4614 msdial_installed = os.path.exists(os.path.join(get_msdial_directory(), "MsdialConsoleApp.exe")) 4615 except: 4616 msdial_installed = False 4617 4618 if module == "msdial": 4619 return msdial_installed 4620 elif module == "msconvert": 4621 return msconvert_installed 4622 else: 4623 return msconvert_installed and msdial_installed
Validates that MSConvert and MS-DIAL dependencies are installed.
This function is called during job setup validation.
Arguments:
- module (str, default None): If specified, only validates given module.
Returns:
bool: Whether MSConvert.exe and MsdialConsoleApp.exe exist.
4626def send_email(subject, message_body): 4627 4628 """ 4629 Sends email using Google authenticated credentials. 4630 4631 This function is called for QC warnings and fails if: 4632 1. Google Drive sync is enabled 4633 2. Email addresses are registered for notifications 4634 4635 Args: 4636 subject (str): 4637 Subject of email 4638 message_body (str): 4639 Body of email 4640 4641 Returns: 4642 On success, an email.message.EmailMessage object. 4643 """ 4644 4645 try: 4646 credentials = google_auth.load_credentials_from_file(alt_credentials)[0] 4647 4648 service = build("gmail", "v1", credentials=credentials) 4649 message = EmailMessage() 4650 4651 message.set_content(message_body) 4652 4653 message["Subject"] = subject 4654 message["To"] = get_email_notifications_list(as_string=True) 4655 4656 encoded_message = base64.urlsafe_b64encode(message.as_bytes()).decode() 4657 create_message = { "raw": encoded_message } 4658 4659 send_message = (service.users().messages().send(userId="me", body=create_message).execute()) 4660 4661 except Exception as error: 4662 traceback.print_exc() 4663 send_message = None 4664 4665 return send_message
Sends email using Google authenticated credentials.
This function is called for QC warnings and fails if:
- Google Drive sync is enabled
- Email addresses are registered for notifications
Arguments:
- subject (str): Subject of email
- message_body (str): Body of email
Returns:
On success, an email.message.EmailMessage object.