ms_autoqc.PlotGeneration

  1import os, json, ast, traceback, time
  2import plotly.express as px
  3import pandas as pd
  4import numpy as np
  5import ms_autoqc.DatabaseFunctions as db
  6
  7# Bootstrap color dictionary
  8bootstrap_colors = {
  9    "blue": "rgb(0, 123, 255)",
 10    "red": "rgb(220, 53, 69)",
 11    "green": "rgb(40, 167, 69)",
 12    "yellow": "rgb(255, 193, 7)",
 13    "blue-low-opacity": "rgba(0, 123, 255, 0.4)",
 14    "red-low-opacity": "rgba(220, 53, 69, 0.4)",
 15    "green-low-opacity": "rgba(40, 167, 69, 0.4)",
 16    "yellow-low-opacity": "rgba(255, 193, 7, 0.4)"
 17}
 18
 19def get_qc_results(instrument_id, run_id, status="Complete", biological_standard=None, biological_standards_only=False, for_benchmark_plot=False):
 20
 21    """
 22    Loads and parses QC results (for samples and biological standards) into Plotly graphs.
 23
 24    This function will return whatever tables it can in a tuple, and fill None for the tables that throw errors in parsing.
 25    This is so that an error in retrieving one table will not prevent retrieving other tables.
 26
 27    Depending on whether Google Drive sync is enabled, this function will load data from either CSV files
 28    (for active instrument runs) or the local instrument database (for completed runs).
 29
 30    Regardless of whether Google Drive sync is enabled, the instrument computer (on which the run was started) will
 31    always load data from its local SQLite database.
 32
 33    Args:
 34        instrument_id (str):
 35            Instrument ID
 36        run_id (str):
 37            Instrument run ID (Job ID)
 38        status (str):
 39            QC job status, either "Active" or "Complete"
 40        biological_standard (str, default None):
 41            If specified, returns QC results for given biological standard associated with job
 42        biological_standards_only (bool, default False):
 43            If specified, returns QC results for biological standards only
 44        for_benchmark_plot (bool, default False):
 45            If specified, returns QC results specifically for biological standard benchmark plot
 46
 47    Returns:
 48        tuple: Tuple containing tables of various sample data in JSON "records" format. Order is as follows:
 49            1. df_rt_pos: Retention times for internal standards in positive mode
 50            2. df_rt_neg: Retention times for internal standards in negative mode
 51            3. df_intensity_pos: Intensities for internal standards in positive mode
 52            4. df_intensity_neg: Intensities for internal standards in negative mode
 53            5. df_mz_pos: Precursor masses for internal standards in positive mode
 54            6. df_mz_neg: Precursor masses for internal standards in negative mode
 55            7. df_sequence: Acquisition sequence table
 56            8. df_metadata: Sample metadata table
 57            9. df_bio_rt_pos: Retention times for targeted features in biological standard sample in positive mode
 58            10. df_bio_rt_neg: Retention times for targeted features in biological standard sample in negative mode
 59            11. df_bio_intensity_pos: Intensities for targeted features in biological standard sample in positive mode
 60            12. df_bio_intensity_neg: Intensities for targeted features in biological standard sample in negative mode
 61            13. df_bio_mz_pos: Precursor masses for targeted features in biological standard sample in positive mode
 62            14. df_bio_mz_neg: Precursor masses for targeted features in biological standard sample in negative mode
 63            15. resources: Metadata for instrument run
 64            16. df_samples: Table containing sample names, polarities, autosampler positions, and QC results
 65            17. pos_internal_standards: List of positive mode internal standards
 66            18. neg_internal_standards: List of negative mode internal standards
 67            19. df_delta_rt_pos: Delta RT's for internal standards in positive mode
 68            20. df_delta_rt_neg: Delta RT's for internal standards in negative mode
 69            21. df_in_run_delta_rt_pos: In-run delta RT's for internal standards in positive mode
 70            22. df_in_run_delta_rt_neg: In-run delta RT's for internal standards in negative mode
 71            23. df_delta_mz_pos: Delta m/z's for internal standards in positive mode
 72            24. df_delta_mz_neg: Delta m/z's for internal standards in negative mode
 73            25. df_warnings_pos: QC warnings for internal standards in positive mode
 74            26. df_warnings_neg: QC warnings for internal standards in negative mode
 75            27. df_fails_pos: QC fails for internal standards in positive mode
 76            28. df_fails_neg: QC fails for internal standards in negative mode
 77    """
 78
 79    # Get run information / metadata
 80    if db.get_device_identity() != instrument_id and db.sync_is_enabled():
 81        if status == "Complete":
 82            load_from = "database"
 83        elif status == "Active":
 84            load_from = "csv"
 85    else:
 86        load_from = "database"
 87
 88    if load_from == "database":
 89        df_run = db.get_instrument_run(instrument_id, run_id)
 90    elif load_from == "csv":
 91        db.download_qc_results(instrument_id, run_id)
 92        df_run = db.get_instrument_run_from_csv(instrument_id, run_id)
 93
 94    chromatography = df_run["chromatography"].values[0]
 95    df_sequence = df_run["sequence"].values[0]
 96    df_metadata = df_run["metadata"].values[0]
 97    completed = df_run["completed"].astype(int).tolist()[0]
 98
 99    biological_standards = df_run["biological_standards"].values[0]
100    if biological_standards is not None:
101        biological_standards = ast.literal_eval(biological_standards)
102
103    # Get internal standards in chromatography method
104    precursor_mz_dict = db.get_internal_standards_dict(chromatography, "precursor_mz")
105    retention_times_dict = db.get_internal_standards_dict(chromatography, "retention_time")
106
107    resources = {
108        "instrument": instrument_id,
109        "run_id": run_id,
110        "status": status,
111        "chromatography": chromatography,
112        "precursor_mass_dict": precursor_mz_dict,
113        "retention_times_dict": retention_times_dict,
114        "samples_completed": completed,
115        "biological_standards": json.dumps(biological_standards)
116    }
117
118    # Parse m/z, RT, and intensity data for biological standards into DataFrames
119    if biological_standards is not None:
120
121        if biological_standard is None:
122            biological_standard = biological_standards[0]
123
124        try:
125            df_bio_mz_pos = db.parse_biological_standard_data(instrument_id=instrument_id, run_id=run_id,
126                result_type="precursor_mz", polarity="Pos", biological_standard=biological_standard, load_from=load_from)
127        except Exception as error:
128            print("Error loading positive (–) mode biological standard precursor m/z data:", error)
129            df_bio_mz_pos = None
130
131        try:
132            df_bio_rt_pos = db.parse_biological_standard_data(instrument_id=instrument_id, run_id=run_id,
133                result_type="retention_time", polarity="Pos", biological_standard=biological_standard, load_from=load_from)
134        except Exception as error:
135            print("Error loading positive (–) mode biological standard precursor m/z data:", error)
136            df_bio_rt_pos = None
137
138        try:
139            df_bio_intensity_pos = db.parse_biological_standard_data(instrument_id=instrument_id, run_id=run_id,
140                result_type="intensity", polarity="Pos", biological_standard=biological_standard, load_from=load_from)
141        except Exception as error:
142            print("Error loading positive (–) mode biological standard retention time data:", error)
143            df_bio_intensity_pos = None
144
145        try:
146            df_bio_mz_neg = db.parse_biological_standard_data(instrument_id=instrument_id, run_id=run_id,
147                result_type="precursor_mz", polarity="Neg", biological_standard=biological_standard, load_from=load_from)
148        except Exception as error:
149            print("Error loading negative (–) mode biological standard precursor m/z data:", error)
150            df_bio_mz_neg = None
151
152        try:
153            df_bio_rt_neg = db.parse_biological_standard_data(instrument_id=instrument_id, run_id=run_id,
154                result_type="retention_time", polarity="Neg", biological_standard=biological_standard, load_from=load_from)
155        except Exception as error:
156            print("Error loading positive (–) mode biological standard retention time data:", error)
157            df_bio_rt_neg = None
158
159        try:
160            df_bio_intensity_neg = db.parse_biological_standard_data(instrument_id=instrument_id, run_id=run_id,
161                result_type="intensity", polarity="Neg", biological_standard=biological_standard, load_from=load_from)
162        except Exception as error:
163            print("Error loading negative (–) mode biological standard intensity data:", error)
164            df_bio_intensity_neg = None
165
166    else:
167        df_bio_mz_pos = None
168        df_bio_rt_pos = None
169        df_bio_intensity_pos = None
170        df_bio_mz_neg = None
171        df_bio_rt_neg = None
172        df_bio_intensity_neg = None
173
174    if biological_standards_only:
175        return df_bio_rt_pos, df_bio_rt_neg, df_bio_intensity_pos, df_bio_intensity_neg, df_bio_mz_pos, df_bio_mz_neg
176    elif for_benchmark_plot:
177        return df_bio_intensity_pos, df_bio_intensity_neg
178
179    # Parse m/z, RT, and intensity data for internal standards into DataFrames
180    try:
181        df_mz_pos = db.parse_internal_standard_data(instrument_id=instrument_id,
182            run_id=run_id, result_type="precursor_mz", polarity="Pos", load_from=load_from)
183    except Exception as error:
184        print("Error loading positive (+) mode precursor m/z data:", error)
185        df_mz_pos = None
186
187    try:
188        df_rt_pos = db.parse_internal_standard_data(instrument_id=instrument_id,
189            run_id=run_id, result_type="retention_time", polarity="Pos", load_from=load_from)
190    except Exception as error:
191        print("Error loading positive (+) mode retention time data:", error)
192        df_rt_pos = None
193
194    try:
195        df_intensity_pos = db.parse_internal_standard_data(instrument_id=instrument_id,
196            run_id=run_id, result_type="intensity", polarity="Pos", load_from=load_from)
197    except Exception as error:
198        print("Error loading positive (+) mode intensity data:", error)
199        df_intensity_pos = None
200
201    try:
202        df_mz_neg = db.parse_internal_standard_data(instrument_id=instrument_id,
203            run_id=run_id, result_type="precursor_mz", polarity="Neg", load_from=load_from)
204    except Exception as error:
205        print("Error loading negative (–) mode precursor m/z data:", error)
206        df_mz_neg = None
207
208    try:
209        df_rt_neg = db.parse_internal_standard_data(instrument_id=instrument_id,
210            run_id=run_id, result_type="retention_time", polarity="Neg", load_from=load_from)
211    except Exception as error:
212        print("Error loading negative (–) mode retention time data:", error)
213        df_rt_neg = None
214
215    try:
216        df_intensity_neg = db.parse_internal_standard_data(instrument_id=instrument_id,
217            run_id=run_id, result_type="intensity", polarity="Neg", load_from=load_from)
218    except Exception as error:
219        print("Error loading negative (–) mode intensity data:", error)
220        df_intensity_neg = None
221
222    try:
223        df_delta_rt_pos = db.parse_internal_standard_qc_data(instrument_id=instrument_id,
224            run_id=run_id, result_type="Delta RT", polarity="Pos", load_from=load_from)
225    except Exception as error:
226        print("Error loading positive (+) mode delta RT data:", error)
227        df_delta_rt_pos = None
228
229    try:
230        df_delta_rt_neg = db.parse_internal_standard_qc_data(instrument_id=instrument_id,
231            run_id=run_id, result_type="Delta RT", polarity="Neg", load_from=load_from)
232    except Exception as error:
233        print("Error loading negative (–) mode delta RT data:", error)
234        df_delta_rt_neg = None
235
236    try:
237        df_in_run_delta_rt_pos = db.parse_internal_standard_qc_data(instrument_id=instrument_id,
238            run_id=run_id, result_type="In-run delta RT", polarity="Pos", load_from=load_from)
239    except Exception as error:
240        print("Error loading positive (+) mode in-run delta RT data:", error)
241        df_in_run_delta_rt_pos = None
242
243    try:
244        df_in_run_delta_rt_neg = db.parse_internal_standard_qc_data(instrument_id=instrument_id,
245            run_id=run_id, result_type="In-run delta RT", polarity="Neg", load_from=load_from)
246    except Exception as error:
247        print("Error loading negative (–) mode in-run delta RT data:", error)
248        df_in_run_delta_rt_neg = None
249
250    try:
251        df_delta_mz_pos = db.parse_internal_standard_qc_data(instrument_id=instrument_id,
252            run_id=run_id, result_type="Delta m/z", polarity="Pos", load_from=load_from)
253    except Exception as error:
254        print("Error loading positive (+) mode delta m/z data:", error)
255        df_delta_mz_pos = None
256
257    try:
258        df_delta_mz_neg = db.parse_internal_standard_qc_data(instrument_id=instrument_id,
259            run_id=run_id, result_type="Delta m/z", polarity="Neg", load_from=load_from)
260    except Exception as error:
261        print("Error loading negative (–) mode delta m/z data:", error)
262        df_delta_mz_neg = None
263
264    try:
265        df_warnings_pos = db.parse_internal_standard_qc_data(instrument_id=instrument_id,
266            run_id=run_id, result_type="Warnings", polarity="Pos", load_from=load_from)
267    except Exception as error:
268        print("Error loading positive (+) mode QC warnings data:", error)
269        df_warnings_pos = None
270
271    try:
272        df_warnings_neg = db.parse_internal_standard_qc_data(instrument_id=instrument_id,
273            run_id=run_id, result_type="Warnings", polarity="Neg", load_from=load_from)
274    except Exception as error:
275        print("Error loading negative (–) mode QC warnings data:", error)
276        df_warnings_neg = None
277
278    try:
279        df_fails_pos = db.parse_internal_standard_qc_data(instrument_id=instrument_id,
280            run_id=run_id, result_type="Fails", polarity="Pos", load_from=load_from)
281    except Exception as error:
282        print("Error loading positive (+) mode QC fails data:", error)
283        df_fails_pos = None
284
285    try:
286        df_fails_neg = db.parse_internal_standard_qc_data(instrument_id=instrument_id,
287            run_id=run_id, result_type="Fails", polarity="Neg", load_from=load_from)
288    except Exception as error:
289        print("Error loading negative (+) mode QC fails data:", error)
290        df_fails_neg = None
291
292    # Generate DataFrame for sample table
293    try:
294        if load_from == "database":
295            df_samples = db.get_samples_in_run(instrument_id, run_id, "Both")
296        elif load_from == "csv":
297            df_samples = db.get_samples_from_csv(instrument_id, run_id, "Both")
298
299        df_samples = df_samples[["sample_id", "position", "qc_result", "polarity"]]
300        df_samples = df_samples.rename(
301            columns={
302                "sample_id": "Sample",
303                "position": "Position",
304                "qc_result": "QC",
305                "polarity": "Polarity"})
306        df_samples = df_samples.to_json(orient="records")
307
308    except Exception as error:
309        print("Error loading samples from database:", error)
310        traceback.print_exc()
311        df_samples = ""
312
313    # Get internal standards from data
314    if df_rt_pos is not None:
315        pos_internal_standards = pd.read_json(df_rt_pos, orient="records").columns.tolist()
316        pos_internal_standards.remove("Sample")
317    else:
318        pos_internal_standards = []
319
320    if df_rt_neg is not None:
321        neg_internal_standards = pd.read_json(df_rt_neg, orient="records").columns.tolist()
322        neg_internal_standards.remove("Sample")
323    else:
324        neg_internal_standards = []
325
326    return (df_rt_pos, df_rt_neg, df_intensity_pos, df_intensity_neg, df_mz_pos, df_mz_neg, df_sequence, df_metadata,
327        df_bio_rt_pos, df_bio_rt_neg, df_bio_intensity_pos, df_bio_intensity_neg, df_bio_mz_pos, df_bio_mz_neg,
328        json.dumps(resources), df_samples, json.dumps(pos_internal_standards), json.dumps(neg_internal_standards),
329        df_delta_rt_pos, df_delta_rt_neg, df_in_run_delta_rt_pos, df_in_run_delta_rt_neg, df_delta_mz_pos, df_delta_mz_neg,
330        df_warnings_pos, df_warnings_neg, df_fails_pos, df_fails_neg)
331
332
333def generate_sample_metadata_dataframe(sample, df_rt, df_mz, df_intensity, df_delta_rt, df_in_run_delta_rt,
334    df_delta_mz, df_warnings, df_fails, df_sequence, df_metadata):
335
336    """
337    Aggregates tables of relevant data from the acquisition sequence, metadata file, and QC results for a selected sample.
338
339    Returns two DataFrames by aggregating the following information:
340        1. Acquisition sequence and sample metadata information
341        2. Internal standard m/z, RT, intensity, delta m/z, delta RT, in-run delta RT, warnings, and fails
342
343    Args:
344        sample (str):
345            Sample ID
346        df_rt (DataFrame):
347            Retention times for internal standards (columns) across samples (rows)
348        df_mz (DataFrame):
349            Precursor masses for internal standards (columns) across samples (rows)
350        df_intensity (DataFrame):
351            Intensities for internal standards (columns) across samples (rows)
352        df_delta_rt (DataFrame):
353            Delta RT's from library values for internal standards (columns) across samples (rows)
354        df_in_run_delta_rt (DataFrame):
355            Delta RT's from in-run values for internal standards (columns) across samples (rows)
356        df_delta_mz (DataFrame):
357            Delta m/z's from library values for internal standards (columns) across samples (rows)
358        df_warnings (DataFrame):
359            QC warnings for internal standards (columns) across samples (rows)
360        df_fails (DataFrame):
361            QC fails for internal standards (columns) across samples (rows)
362        df_sequence (DataFrame):
363            Acquisition sequence table
364        df_metadata (DataFrame):
365            Sample metadata table
366
367    Returns:
368        Tuple containing two DataFrames, the first storing internal standard data and the second storing sample metadata.
369    """
370
371    df_sample_istd = pd.DataFrame()
372    df_sample_info = pd.DataFrame()
373
374    # Index the selected sample, then make sure all columns in all dataframes are in the same order
375    columns = df_rt.columns.tolist()
376    internal_standards = df_rt.columns.tolist()
377    internal_standards.remove("Sample")
378    df_sample_istd["Internal Standard"] = internal_standards
379
380    # Precursor m/z
381    df_mz = df_mz.loc[df_mz["Sample"] == sample][columns]
382    df_mz.drop(columns=["Sample"], inplace=True)
383    df_sample_istd["m/z"] = df_mz.iloc[0].astype(float).values.tolist()
384
385    # Retention times
386    df_rt = df_rt.loc[df_rt["Sample"] == sample][columns]
387    df_rt.drop(columns=["Sample"], inplace=True)
388    df_sample_istd["RT"] = df_rt.iloc[0].astype(float).round(2).values.tolist()
389
390    # Intensities
391    df_intensity = df_intensity.loc[df_intensity["Sample"] == sample][columns]
392    df_intensity.drop(columns=["Sample"], inplace=True)
393    intensities = df_intensity.iloc[0].fillna(0).values.tolist()
394    df_sample_istd["Intensity"] = ["{:.2e}".format(x) for x in intensities]
395
396    # Delta m/z
397    df_delta_mz.replace(" ", np.nan, inplace=True)
398    df_delta_mz = df_delta_mz.loc[df_delta_mz["Sample"] == sample][columns]
399    df_delta_mz.drop(columns=["Sample"], inplace=True)
400    df_sample_istd["Delta m/z"] = df_delta_mz.iloc[0].astype(float).round(6).values.tolist()
401
402    # Delta RT
403    df_delta_rt.replace(" ", np.nan, inplace=True)
404    df_delta_rt = df_delta_rt.loc[df_delta_rt["Sample"] == sample][columns]
405    df_delta_rt.drop(columns=["Sample"], inplace=True)
406    df_sample_istd["Delta RT"] = df_delta_rt.iloc[0].astype(float).round(3).values.tolist()
407
408    # In-run delta RT
409    df_in_run_delta_rt.replace(" ", np.nan, inplace=True)
410    df_in_run_delta_rt = df_in_run_delta_rt.loc[df_in_run_delta_rt["Sample"] == sample][columns]
411    df_in_run_delta_rt.drop(columns=["Sample"], inplace=True)
412    df_sample_istd["In-Run Delta RT"] = df_in_run_delta_rt.iloc[0].astype(float).round(3).values.tolist()
413
414    # Warnings
415    df_warnings.replace(" ", np.nan, inplace=True)
416    df_warnings = df_warnings.loc[df_warnings["Sample"] == sample][columns]
417    df_warnings.drop(columns=["Sample"], inplace=True)
418    df_sample_istd["Warnings"] = df_warnings.iloc[0].astype(str).values.tolist()
419
420    # Fails
421    df_fails.replace(" ", np.nan, inplace=True)
422    df_fails = df_fails.loc[df_fails["Sample"] == sample][columns]
423    df_fails.drop(columns=["Sample"], inplace=True)
424    df_sample_istd["Fails"] = df_fails.iloc[0].astype(str).values.tolist()
425
426    if len(df_sequence) > 0:
427        df_sequence = df_sequence.loc[df_sequence["File Name"].astype(str) == sample]
428        df_sample_info["Sample ID"] = df_sequence["L1 Study"].astype(str).values
429        df_sample_info["Position"] = df_sequence["Position"].astype(str).values
430        df_sample_info["Injection Volume"] = df_sequence["Inj Vol"].astype(str).values + " uL"
431        df_sample_info["Instrument Method"] = df_sequence["Instrument Method"].astype(str).values
432
433    if len(df_metadata) > 0:
434        df_metadata = df_metadata.loc[df_metadata["Filename"].astype(str) == sample]
435        if len(df_metadata) > 0:
436            df_sample_info["Species"] = df_metadata["Species"].astype(str).values
437            df_sample_info["Matrix"] = df_metadata["Matrix"].astype(str).values
438            df_sample_info["Growth-Harvest Conditions"] = df_metadata["Growth-Harvest Conditions"].astype(str).values
439            df_sample_info["Treatment"] = df_metadata["Treatment"].astype(str).values
440
441    df_sample_info = df_sample_info.append(df_sample_info.iloc[0])
442    df_sample_info.iloc[0] = df_sample_info.columns.tolist()
443    df_sample_info = df_sample_info.rename(index={0: "Sample Information"})
444    df_sample_info = df_sample_info.transpose()
445
446    return df_sample_istd, df_sample_info
447
448
449def generate_bio_standard_dataframe(clicked_sample, instrument_id, run_id, df_rt, df_mz, df_intensity):
450
451    """
452    Aggregates data for a selected biological standard.
453
454    TODO: More metrics could be added to sample information cards for biological standards here.
455
456    Aggregates and returns 2 DataFrames for a selected sample:
457        1. QC result and causes
458        2. Targeted metabolite m/z, RT, intensity, delta RT, and percent change
459
460    Args:
461        clicked_sample (str):
462            Sample ID
463        instrument_id (str):
464            Instrument ID
465        run_id (str):
466            Instrument run ID (job ID)
467        df_rt (DataFrame):
468            Retention times of targeted metabolites in the biological standard
469        df_mz (DataFrame):
470            Precursor masses of targeted metabolites in the biological standard
471        df_intensity:
472            Intensities of targeted metabolites in the biological standard
473
474    Returns:
475        Tuple containing two DataFrames, the first storing targeted metabolites data and the second storing sample metadata.
476    """
477
478    metabolites = df_mz.columns.tolist()
479    metabolites.remove("Name")
480
481    df_sample_features = pd.DataFrame()
482    df_sample_features["Metabolite name"] = metabolites
483    df_sample_features["Precursor m/z"] = df_mz[metabolites].iloc[0].astype(float).values
484    df_sample_features["Retention time (min)"] = df_rt[metabolites].iloc[0].astype(float).round(3).values
485    intensities = df_intensity[metabolites].iloc[0].fillna(0).astype(float).values.tolist()
486    df_sample_features["Intensity"] = ["{:.2e}".format(x) for x in intensities]
487
488    df_sample_info = pd.DataFrame()
489    df_sample_info["Sample ID"] = [clicked_sample]
490    qc_result = db.get_qc_results(
491        instrument_id=instrument_id, sample_list=[clicked_sample], is_bio_standard=True)["qc_result"].values[0]
492    df_sample_info["QC Result"] = [qc_result]
493
494    df_sample_info = df_sample_info.append(df_sample_info.iloc[0])
495    df_sample_info.iloc[0] = df_sample_info.columns.tolist()
496    df_sample_info = df_sample_info.rename(index={0: "Sample Information"})
497    df_sample_info = df_sample_info.transpose()
498
499    return df_sample_features, df_sample_info
500
501
502def load_istd_rt_plot(dataframe, samples, internal_standard, retention_times):
503
504    """
505    Returns line plot figure of retention times (for a selected internal standard) across samples.
506
507    Documentation on Plotly line plots: https://plotly.com/python-api-reference/generated/plotly.express.line.html
508
509    Args:
510        dataframe (DataFrame):
511            Table of retention times for internal standards (columns) across samples (rows)
512        samples (list):
513            Samples to query from the DataFrame
514        internal_standard (str):
515            The selected internal standard
516        retention_times (dict):
517            Dictionary with key-value pairs of type { internal_standard: retention_time }
518
519    Returns:
520        plotly.express.line object: Plotly line plot of retention times (for the selected internal standard) across samples.
521    """
522
523    df_filtered_by_samples = dataframe.loc[dataframe["Sample"].isin(samples)]
524    df_filtered_by_samples[internal_standard] = df_filtered_by_samples[internal_standard].astype(float).round(3)
525
526    y_min = retention_times[internal_standard] - 0.1
527    y_max = retention_times[internal_standard] + 0.1
528
529    fig = px.line(df_filtered_by_samples,
530        title="Retention Time vs. Samples – " + internal_standard,
531        x=samples,
532        y=internal_standard,
533        height=600,
534        markers=True,
535        hover_name=samples,
536        labels={"variable": "Internal Standard",
537              "index": "Sample",
538              "value": "Retention Time"},
539        log_x=False)
540    fig.update_layout(
541        transition_duration=500,
542        clickmode="event",
543        showlegend=False,
544        legend_title_text="Internal Standards",
545        margin=dict(t=75, b=75, l=0, r=0))
546    fig.update_xaxes(showticklabels=False, title="Sample")
547    fig.update_yaxes(title="Retention Time (min)", range=[y_min, y_max])
548    fig.add_hline(y=retention_times[internal_standard], line_width=2, line_dash="dash")
549    fig.update_traces(hovertemplate="Sample: %{x} <br>Retention Time: %{y} min<br>")
550
551    return fig
552
553
554def load_istd_intensity_plot(dataframe, samples, internal_standard, treatments):
555
556    """
557    Returns bar plot figure of peak intensities (for a selected internal standard) across samples.
558
559    Documentation on Plotly bar plots: https://plotly.com/python-api-reference/generated/plotly.express.bar.html
560
561    Args:
562        dataframe (DataFrame):
563            Table of intensities for internal standards (columns) across samples (rows)
564        samples (list):
565            Samples to query from the DataFrame
566        internal_standard (str):
567            The selected internal standard
568        treatments (DataFrame):
569            DataFrame with sample treatments (from the metadata file) mapped to sample ID's
570
571    Returns:
572        plotly.express.bar object: Plotly bar plot of intensities (for the selected internal standard) across samples.
573    """
574
575    df_filtered_by_samples = dataframe.loc[dataframe["Sample"].isin(samples)]
576
577    if len(treatments) > 0:
578        # Map treatments to sample names
579        df_mapped = pd.DataFrame()
580        df_mapped["Sample"] = df_filtered_by_samples["Sample"]
581        df_mapped["Treatment"] = df_mapped.replace(
582            treatments.set_index("Filename")["Treatment"])
583        df_filtered_by_samples["Treatment"] = df_mapped["Treatment"].astype(str)
584    else:
585        df_filtered_by_samples["Treatment"] = " "
586
587    fig = px.bar(df_filtered_by_samples,
588        title="Intensity vs. Samples – " + internal_standard,
589        x="Sample",
590        y=internal_standard,
591        text="Sample",
592        color="Treatment",
593        height=600)
594    fig.update_layout(
595        showlegend=False,
596        transition_duration=500,
597        clickmode="event",
598        xaxis=dict(rangeslider=dict(visible=True), autorange=True),
599        legend=dict(font=dict(size=10)),
600        margin=dict(t=75, b=75, l=0, r=0))
601    fig.update_xaxes(showticklabels=False, title="Sample")
602    fig.update_yaxes(title="Intensity")
603    fig.update_traces(textposition="outside", hovertemplate="Sample: %{x}<br>Intensity: %{y:.2e}<br>")
604
605    return fig
606
607
608def load_istd_delta_mz_plot(dataframe, samples, internal_standard):
609
610    """
611    Returns line plot figure of delta m/z (for a selected internal standard) across samples.
612
613    Documentation on Plotly line plots: https://plotly.com/python-api-reference/generated/plotly.express.line.html
614
615    Args:
616        dataframe (DataFrame):
617            Table of delta m/z's for internal standards (columns) across samples (rows)
618        samples (list):
619            Samples to query from the DataFrame
620        internal_standard (str):
621            The selected internal standard
622
623    Returns:
624        plotly.express.line object: Plotly line plot of delta m/z (for the selected internal standard) across samples.
625    """
626
627    # Get delta m/z results for selected samples
628    df_filtered_by_samples = dataframe.loc[dataframe["Sample"].isin(samples)]
629
630    fig = px.line(df_filtered_by_samples,
631        title="Delta m/z vs. Samples – " + internal_standard,
632        x=samples,
633        y=internal_standard,
634        height=600,
635        markers=True,
636        hover_name=samples,
637        labels={"variable": "Internal Standard",
638              "index": "Sample",
639              "value": "Delta m/z"},
640        log_x=False)
641    fig.update_layout(
642        transition_duration=500,
643        clickmode="event",
644        showlegend=False,
645        legend_title_text="Internal Standards",
646        margin=dict(t=75, b=75, l=0, r=0))
647    fig.update_xaxes(showticklabels=False, title="Sample")
648    fig.update_yaxes(title="delta m/z", range=[-0.01, 0.01])
649    fig.update_traces(hovertemplate="Sample: %{x} <br>Delta m/z: %{y}<br>")
650
651    return fig
652
653
654def load_bio_feature_plot(run_id, df_rt, df_mz, df_intensity):
655
656    """
657    Returns scatter plot figure of precursor m/z vs. retention time for targeted features in the biological standard.
658
659    To further clarify:
660        x-axis: retention times
661        y-axis: precursor masses
662        colorscale: percent change in intensity for each feature compared to the average intensity across all runs
663
664    Documentation on Plotly scatter plots: https://plotly.com/python-api-reference/generated/plotly.express.scatter.html
665
666    Args:
667        run_id (str):
668            Run ID to query the biological standard from
669        df_rt (DataFrame):
670            Table of retention times for targeted features (columns) across instrument runs (rows)
671        df_mz (DataFrame):
672            Table of precursor masses for targeted features (columns) across instrument runs (rows)
673        df_intensity (DataFrame):
674            Table of intensities for targeted features (columns) across instrument runs (rows)
675
676    Returns:
677        plotly.express.scatter object: m/z - RT scatter plot for targeted metabolites in the biological standard
678    """
679
680    # Get metabolites
681    metabolites = df_mz.columns.tolist()
682    del metabolites[0]
683
684    # Construct new DataFrame
685    bio_df = pd.DataFrame()
686    bio_df["Metabolite name"] = metabolites
687    bio_df["Precursor m/z"] = df_mz.loc[df_mz["Name"] == run_id][metabolites].iloc[0].astype(float).values
688    bio_df["Retention time (min)"] =  df_rt.loc[df_rt["Name"] == run_id][metabolites].iloc[0].astype(float).values
689    bio_df["Intensity"] =  df_intensity.loc[df_intensity["Name"] == run_id][metabolites].iloc[0].astype(float).values
690
691    # Get percent change of feature intensities (only for runs previous to this one)
692    df_intensity = df_intensity.fillna(0)
693
694    try:
695        index_of_run = df_intensity.loc[df_intensity["Name"] == run_id].index.tolist()[0]
696        df_intensity = df_intensity[0:index_of_run + 1]
697    finally:
698        feature_intensity_from_study = df_intensity.loc[df_intensity["Name"] == run_id][metabolites].iloc[0].astype(float).values
699
700    if len(df_intensity) > 1:
701        average_intensity_in_studies = df_intensity.loc[df_intensity["Name"] != run_id][metabolites].astype(float).mean().values
702        bio_df["% Change"] = ((feature_intensity_from_study - average_intensity_in_studies) / average_intensity_in_studies) * 100
703        bio_df.replace(np.inf, 100, inplace=True)
704        bio_df.replace(-np.inf, -100, inplace=True)
705    else:
706        bio_df["% Change"] = 0
707
708    # Plot readiness
709    bio_df["Retention time (min)"] = bio_df["Retention time (min)"].round(2)
710    bio_df["% Change"] = bio_df["% Change"].round(1).fillna(0)
711
712    labels = {"Retention time (min)": "Retention time (min)",
713              "Precursor m/z": "Precursor m/z",
714              "Intensity": "Intensity",
715              "Metabolite name": "Metabolite name"}
716
717    # Colorscale
718    diverging_colorscale = ["#1a88ff", "#3395ff", "#4da3ff", "#a186ca", "#e7727d", "#e35d6a", "#e04958"]
719    diverging_colorscale.reverse()
720
721    fig = px.scatter(bio_df,
722        title="Biological Standard – Targeted Metabolites",
723        x="Retention time (min)",
724        y="Precursor m/z",
725        height=600,
726        hover_name="Metabolite name",
727        color="% Change",
728        color_continuous_scale=diverging_colorscale,
729        labels=labels,
730        log_x=False,
731        range_color=[-100, 100])
732    fig.update_layout(
733        showlegend=False,
734        transition_duration=500,
735        clickmode="event",
736        margin=dict(t=75, b=75, l=0, r=0))
737    fig.update_xaxes(title="Retention time (min)")
738    fig.update_yaxes(title="Precursor m/z")
739    fig.update_traces(marker={"size": 30})
740
741    return fig
742
743
744def load_bio_benchmark_plot(dataframe, metabolite_name):
745
746    """
747    Returns bar plot figure of intensities for a targeted metabolite in a biological standard across instrument runs.
748
749    Documentation on Plotly bar plots: https://plotly.com/python-api-reference/generated/plotly.express.bar.html
750
751    Args:
752        dataframe (DataFrame):
753            Table of intensities for targeted metabolites (columns) across instrument runs (rows)
754        metabolite_name (str):
755            The targeted metabolite to query from the DataFrame
756
757    Returns:
758        plotly.express.bar object: Plotly bar plot of intensities (for the selected targeted metabolite) across instrument runs.
759    """
760
761    # Get list of runs
762    instrument_runs = dataframe["Name"].astype(str).tolist()
763
764    # Get targeted metabolite intensities for each run
765    intensities = dataframe[metabolite_name].values.tolist()
766    if len(intensities) == 0:
767        intensities = [0 for x in instrument_runs]
768
769    # Get intensities in scientific notation for labeling bar plot
770    if intensities is not None:
771        intensities_text = ["{:.2e}".format(x) for x in intensities]
772    else:
773        intensities_text = []
774
775    fig = px.bar(
776        x=instrument_runs,
777        y=intensities,
778        text=intensities_text,
779        height=600)
780    fig.update_layout(
781        title="Biological Standard Benchmark",
782        showlegend=False,
783        transition_duration=500,
784        clickmode="event",
785        xaxis=dict(rangeslider=dict(visible=True), autorange=True),
786        legend=dict(font=dict(size=10)),
787        margin=dict(t=75, b=75, l=0, r=0))
788    fig.update_xaxes(title="Study")
789    fig.update_yaxes(title="Intensity")
790    fig.update_traces(textposition="outside",
791                      hovertemplate=f"{metabolite_name}" + "<br>Study: %{x} <br>Intensity: %{text}<br>")
792
793    return fig
794
795
796def get_internal_standard_index(previous, next, max):
797
798    """
799    Button functionality for seeking through internal standards.
800
801    Uses n_clicks from the previous and next buttons to generate an index, which is used to index a list of internal
802    standards in the populate_istd_rt_plot(), populate_istd_intensity_plot(), and populate_istd_mz_plot() callback
803    functions of the DashWebApp module.
804
805    This function relies on the previous button's n_clicks to be reset to None on every click.
806
807    Args:
808        previous (int):
809            n_clicks for the "previous" button (None, unless previous button is clicked)
810        next (int):
811            n_clicks for the "next" button
812        max (int):
813            Number of internal standards (maximum index for list of internal standards)
814
815    Returns:
816        Integer index for a list of internal standards.
817    """
818
819    if previous is not None:
820        if next is None or next == 0:
821            return max - 1
822
823    if previous is None:
824        if next is None:
825            index = 0
826        else:
827            index = next
828    elif previous is not None:
829        index = next - previous
830
831    if index < 0 or index >= max:
832        index = 0
833
834    return index
def get_qc_results( instrument_id, run_id, status='Complete', biological_standard=None, biological_standards_only=False, for_benchmark_plot=False):
 20def get_qc_results(instrument_id, run_id, status="Complete", biological_standard=None, biological_standards_only=False, for_benchmark_plot=False):
 21
 22    """
 23    Loads and parses QC results (for samples and biological standards) into Plotly graphs.
 24
 25    This function will return whatever tables it can in a tuple, and fill None for the tables that throw errors in parsing.
 26    This is so that an error in retrieving one table will not prevent retrieving other tables.
 27
 28    Depending on whether Google Drive sync is enabled, this function will load data from either CSV files
 29    (for active instrument runs) or the local instrument database (for completed runs).
 30
 31    Regardless of whether Google Drive sync is enabled, the instrument computer (on which the run was started) will
 32    always load data from its local SQLite database.
 33
 34    Args:
 35        instrument_id (str):
 36            Instrument ID
 37        run_id (str):
 38            Instrument run ID (Job ID)
 39        status (str):
 40            QC job status, either "Active" or "Complete"
 41        biological_standard (str, default None):
 42            If specified, returns QC results for given biological standard associated with job
 43        biological_standards_only (bool, default False):
 44            If specified, returns QC results for biological standards only
 45        for_benchmark_plot (bool, default False):
 46            If specified, returns QC results specifically for biological standard benchmark plot
 47
 48    Returns:
 49        tuple: Tuple containing tables of various sample data in JSON "records" format. Order is as follows:
 50            1. df_rt_pos: Retention times for internal standards in positive mode
 51            2. df_rt_neg: Retention times for internal standards in negative mode
 52            3. df_intensity_pos: Intensities for internal standards in positive mode
 53            4. df_intensity_neg: Intensities for internal standards in negative mode
 54            5. df_mz_pos: Precursor masses for internal standards in positive mode
 55            6. df_mz_neg: Precursor masses for internal standards in negative mode
 56            7. df_sequence: Acquisition sequence table
 57            8. df_metadata: Sample metadata table
 58            9. df_bio_rt_pos: Retention times for targeted features in biological standard sample in positive mode
 59            10. df_bio_rt_neg: Retention times for targeted features in biological standard sample in negative mode
 60            11. df_bio_intensity_pos: Intensities for targeted features in biological standard sample in positive mode
 61            12. df_bio_intensity_neg: Intensities for targeted features in biological standard sample in negative mode
 62            13. df_bio_mz_pos: Precursor masses for targeted features in biological standard sample in positive mode
 63            14. df_bio_mz_neg: Precursor masses for targeted features in biological standard sample in negative mode
 64            15. resources: Metadata for instrument run
 65            16. df_samples: Table containing sample names, polarities, autosampler positions, and QC results
 66            17. pos_internal_standards: List of positive mode internal standards
 67            18. neg_internal_standards: List of negative mode internal standards
 68            19. df_delta_rt_pos: Delta RT's for internal standards in positive mode
 69            20. df_delta_rt_neg: Delta RT's for internal standards in negative mode
 70            21. df_in_run_delta_rt_pos: In-run delta RT's for internal standards in positive mode
 71            22. df_in_run_delta_rt_neg: In-run delta RT's for internal standards in negative mode
 72            23. df_delta_mz_pos: Delta m/z's for internal standards in positive mode
 73            24. df_delta_mz_neg: Delta m/z's for internal standards in negative mode
 74            25. df_warnings_pos: QC warnings for internal standards in positive mode
 75            26. df_warnings_neg: QC warnings for internal standards in negative mode
 76            27. df_fails_pos: QC fails for internal standards in positive mode
 77            28. df_fails_neg: QC fails for internal standards in negative mode
 78    """
 79
 80    # Get run information / metadata
 81    if db.get_device_identity() != instrument_id and db.sync_is_enabled():
 82        if status == "Complete":
 83            load_from = "database"
 84        elif status == "Active":
 85            load_from = "csv"
 86    else:
 87        load_from = "database"
 88
 89    if load_from == "database":
 90        df_run = db.get_instrument_run(instrument_id, run_id)
 91    elif load_from == "csv":
 92        db.download_qc_results(instrument_id, run_id)
 93        df_run = db.get_instrument_run_from_csv(instrument_id, run_id)
 94
 95    chromatography = df_run["chromatography"].values[0]
 96    df_sequence = df_run["sequence"].values[0]
 97    df_metadata = df_run["metadata"].values[0]
 98    completed = df_run["completed"].astype(int).tolist()[0]
 99
100    biological_standards = df_run["biological_standards"].values[0]
101    if biological_standards is not None:
102        biological_standards = ast.literal_eval(biological_standards)
103
104    # Get internal standards in chromatography method
105    precursor_mz_dict = db.get_internal_standards_dict(chromatography, "precursor_mz")
106    retention_times_dict = db.get_internal_standards_dict(chromatography, "retention_time")
107
108    resources = {
109        "instrument": instrument_id,
110        "run_id": run_id,
111        "status": status,
112        "chromatography": chromatography,
113        "precursor_mass_dict": precursor_mz_dict,
114        "retention_times_dict": retention_times_dict,
115        "samples_completed": completed,
116        "biological_standards": json.dumps(biological_standards)
117    }
118
119    # Parse m/z, RT, and intensity data for biological standards into DataFrames
120    if biological_standards is not None:
121
122        if biological_standard is None:
123            biological_standard = biological_standards[0]
124
125        try:
126            df_bio_mz_pos = db.parse_biological_standard_data(instrument_id=instrument_id, run_id=run_id,
127                result_type="precursor_mz", polarity="Pos", biological_standard=biological_standard, load_from=load_from)
128        except Exception as error:
129            print("Error loading positive (–) mode biological standard precursor m/z data:", error)
130            df_bio_mz_pos = None
131
132        try:
133            df_bio_rt_pos = db.parse_biological_standard_data(instrument_id=instrument_id, run_id=run_id,
134                result_type="retention_time", polarity="Pos", biological_standard=biological_standard, load_from=load_from)
135        except Exception as error:
136            print("Error loading positive (–) mode biological standard precursor m/z data:", error)
137            df_bio_rt_pos = None
138
139        try:
140            df_bio_intensity_pos = db.parse_biological_standard_data(instrument_id=instrument_id, run_id=run_id,
141                result_type="intensity", polarity="Pos", biological_standard=biological_standard, load_from=load_from)
142        except Exception as error:
143            print("Error loading positive (–) mode biological standard retention time data:", error)
144            df_bio_intensity_pos = None
145
146        try:
147            df_bio_mz_neg = db.parse_biological_standard_data(instrument_id=instrument_id, run_id=run_id,
148                result_type="precursor_mz", polarity="Neg", biological_standard=biological_standard, load_from=load_from)
149        except Exception as error:
150            print("Error loading negative (–) mode biological standard precursor m/z data:", error)
151            df_bio_mz_neg = None
152
153        try:
154            df_bio_rt_neg = db.parse_biological_standard_data(instrument_id=instrument_id, run_id=run_id,
155                result_type="retention_time", polarity="Neg", biological_standard=biological_standard, load_from=load_from)
156        except Exception as error:
157            print("Error loading positive (–) mode biological standard retention time data:", error)
158            df_bio_rt_neg = None
159
160        try:
161            df_bio_intensity_neg = db.parse_biological_standard_data(instrument_id=instrument_id, run_id=run_id,
162                result_type="intensity", polarity="Neg", biological_standard=biological_standard, load_from=load_from)
163        except Exception as error:
164            print("Error loading negative (–) mode biological standard intensity data:", error)
165            df_bio_intensity_neg = None
166
167    else:
168        df_bio_mz_pos = None
169        df_bio_rt_pos = None
170        df_bio_intensity_pos = None
171        df_bio_mz_neg = None
172        df_bio_rt_neg = None
173        df_bio_intensity_neg = None
174
175    if biological_standards_only:
176        return df_bio_rt_pos, df_bio_rt_neg, df_bio_intensity_pos, df_bio_intensity_neg, df_bio_mz_pos, df_bio_mz_neg
177    elif for_benchmark_plot:
178        return df_bio_intensity_pos, df_bio_intensity_neg
179
180    # Parse m/z, RT, and intensity data for internal standards into DataFrames
181    try:
182        df_mz_pos = db.parse_internal_standard_data(instrument_id=instrument_id,
183            run_id=run_id, result_type="precursor_mz", polarity="Pos", load_from=load_from)
184    except Exception as error:
185        print("Error loading positive (+) mode precursor m/z data:", error)
186        df_mz_pos = None
187
188    try:
189        df_rt_pos = db.parse_internal_standard_data(instrument_id=instrument_id,
190            run_id=run_id, result_type="retention_time", polarity="Pos", load_from=load_from)
191    except Exception as error:
192        print("Error loading positive (+) mode retention time data:", error)
193        df_rt_pos = None
194
195    try:
196        df_intensity_pos = db.parse_internal_standard_data(instrument_id=instrument_id,
197            run_id=run_id, result_type="intensity", polarity="Pos", load_from=load_from)
198    except Exception as error:
199        print("Error loading positive (+) mode intensity data:", error)
200        df_intensity_pos = None
201
202    try:
203        df_mz_neg = db.parse_internal_standard_data(instrument_id=instrument_id,
204            run_id=run_id, result_type="precursor_mz", polarity="Neg", load_from=load_from)
205    except Exception as error:
206        print("Error loading negative (–) mode precursor m/z data:", error)
207        df_mz_neg = None
208
209    try:
210        df_rt_neg = db.parse_internal_standard_data(instrument_id=instrument_id,
211            run_id=run_id, result_type="retention_time", polarity="Neg", load_from=load_from)
212    except Exception as error:
213        print("Error loading negative (–) mode retention time data:", error)
214        df_rt_neg = None
215
216    try:
217        df_intensity_neg = db.parse_internal_standard_data(instrument_id=instrument_id,
218            run_id=run_id, result_type="intensity", polarity="Neg", load_from=load_from)
219    except Exception as error:
220        print("Error loading negative (–) mode intensity data:", error)
221        df_intensity_neg = None
222
223    try:
224        df_delta_rt_pos = db.parse_internal_standard_qc_data(instrument_id=instrument_id,
225            run_id=run_id, result_type="Delta RT", polarity="Pos", load_from=load_from)
226    except Exception as error:
227        print("Error loading positive (+) mode delta RT data:", error)
228        df_delta_rt_pos = None
229
230    try:
231        df_delta_rt_neg = db.parse_internal_standard_qc_data(instrument_id=instrument_id,
232            run_id=run_id, result_type="Delta RT", polarity="Neg", load_from=load_from)
233    except Exception as error:
234        print("Error loading negative (–) mode delta RT data:", error)
235        df_delta_rt_neg = None
236
237    try:
238        df_in_run_delta_rt_pos = db.parse_internal_standard_qc_data(instrument_id=instrument_id,
239            run_id=run_id, result_type="In-run delta RT", polarity="Pos", load_from=load_from)
240    except Exception as error:
241        print("Error loading positive (+) mode in-run delta RT data:", error)
242        df_in_run_delta_rt_pos = None
243
244    try:
245        df_in_run_delta_rt_neg = db.parse_internal_standard_qc_data(instrument_id=instrument_id,
246            run_id=run_id, result_type="In-run delta RT", polarity="Neg", load_from=load_from)
247    except Exception as error:
248        print("Error loading negative (–) mode in-run delta RT data:", error)
249        df_in_run_delta_rt_neg = None
250
251    try:
252        df_delta_mz_pos = db.parse_internal_standard_qc_data(instrument_id=instrument_id,
253            run_id=run_id, result_type="Delta m/z", polarity="Pos", load_from=load_from)
254    except Exception as error:
255        print("Error loading positive (+) mode delta m/z data:", error)
256        df_delta_mz_pos = None
257
258    try:
259        df_delta_mz_neg = db.parse_internal_standard_qc_data(instrument_id=instrument_id,
260            run_id=run_id, result_type="Delta m/z", polarity="Neg", load_from=load_from)
261    except Exception as error:
262        print("Error loading negative (–) mode delta m/z data:", error)
263        df_delta_mz_neg = None
264
265    try:
266        df_warnings_pos = db.parse_internal_standard_qc_data(instrument_id=instrument_id,
267            run_id=run_id, result_type="Warnings", polarity="Pos", load_from=load_from)
268    except Exception as error:
269        print("Error loading positive (+) mode QC warnings data:", error)
270        df_warnings_pos = None
271
272    try:
273        df_warnings_neg = db.parse_internal_standard_qc_data(instrument_id=instrument_id,
274            run_id=run_id, result_type="Warnings", polarity="Neg", load_from=load_from)
275    except Exception as error:
276        print("Error loading negative (–) mode QC warnings data:", error)
277        df_warnings_neg = None
278
279    try:
280        df_fails_pos = db.parse_internal_standard_qc_data(instrument_id=instrument_id,
281            run_id=run_id, result_type="Fails", polarity="Pos", load_from=load_from)
282    except Exception as error:
283        print("Error loading positive (+) mode QC fails data:", error)
284        df_fails_pos = None
285
286    try:
287        df_fails_neg = db.parse_internal_standard_qc_data(instrument_id=instrument_id,
288            run_id=run_id, result_type="Fails", polarity="Neg", load_from=load_from)
289    except Exception as error:
290        print("Error loading negative (+) mode QC fails data:", error)
291        df_fails_neg = None
292
293    # Generate DataFrame for sample table
294    try:
295        if load_from == "database":
296            df_samples = db.get_samples_in_run(instrument_id, run_id, "Both")
297        elif load_from == "csv":
298            df_samples = db.get_samples_from_csv(instrument_id, run_id, "Both")
299
300        df_samples = df_samples[["sample_id", "position", "qc_result", "polarity"]]
301        df_samples = df_samples.rename(
302            columns={
303                "sample_id": "Sample",
304                "position": "Position",
305                "qc_result": "QC",
306                "polarity": "Polarity"})
307        df_samples = df_samples.to_json(orient="records")
308
309    except Exception as error:
310        print("Error loading samples from database:", error)
311        traceback.print_exc()
312        df_samples = ""
313
314    # Get internal standards from data
315    if df_rt_pos is not None:
316        pos_internal_standards = pd.read_json(df_rt_pos, orient="records").columns.tolist()
317        pos_internal_standards.remove("Sample")
318    else:
319        pos_internal_standards = []
320
321    if df_rt_neg is not None:
322        neg_internal_standards = pd.read_json(df_rt_neg, orient="records").columns.tolist()
323        neg_internal_standards.remove("Sample")
324    else:
325        neg_internal_standards = []
326
327    return (df_rt_pos, df_rt_neg, df_intensity_pos, df_intensity_neg, df_mz_pos, df_mz_neg, df_sequence, df_metadata,
328        df_bio_rt_pos, df_bio_rt_neg, df_bio_intensity_pos, df_bio_intensity_neg, df_bio_mz_pos, df_bio_mz_neg,
329        json.dumps(resources), df_samples, json.dumps(pos_internal_standards), json.dumps(neg_internal_standards),
330        df_delta_rt_pos, df_delta_rt_neg, df_in_run_delta_rt_pos, df_in_run_delta_rt_neg, df_delta_mz_pos, df_delta_mz_neg,
331        df_warnings_pos, df_warnings_neg, df_fails_pos, df_fails_neg)

Loads and parses QC results (for samples and biological standards) into Plotly graphs.

This function will return whatever tables it can in a tuple, and fill None for the tables that throw errors in parsing. This is so that an error in retrieving one table will not prevent retrieving other tables.

Depending on whether Google Drive sync is enabled, this function will load data from either CSV files (for active instrument runs) or the local instrument database (for completed runs).

Regardless of whether Google Drive sync is enabled, the instrument computer (on which the run was started) will always load data from its local SQLite database.

Arguments:
  • instrument_id (str): Instrument ID
  • run_id (str): Instrument run ID (Job ID)
  • status (str): QC job status, either "Active" or "Complete"
  • biological_standard (str, default None): If specified, returns QC results for given biological standard associated with job
  • biological_standards_only (bool, default False): If specified, returns QC results for biological standards only
  • for_benchmark_plot (bool, default False): If specified, returns QC results specifically for biological standard benchmark plot
Returns:

tuple: Tuple containing tables of various sample data in JSON "records" format. Order is as follows: 1. df_rt_pos: Retention times for internal standards in positive mode 2. df_rt_neg: Retention times for internal standards in negative mode 3. df_intensity_pos: Intensities for internal standards in positive mode 4. df_intensity_neg: Intensities for internal standards in negative mode 5. df_mz_pos: Precursor masses for internal standards in positive mode 6. df_mz_neg: Precursor masses for internal standards in negative mode 7. df_sequence: Acquisition sequence table 8. df_metadata: Sample metadata table 9. df_bio_rt_pos: Retention times for targeted features in biological standard sample in positive mode 10. df_bio_rt_neg: Retention times for targeted features in biological standard sample in negative mode 11. df_bio_intensity_pos: Intensities for targeted features in biological standard sample in positive mode 12. df_bio_intensity_neg: Intensities for targeted features in biological standard sample in negative mode 13. df_bio_mz_pos: Precursor masses for targeted features in biological standard sample in positive mode 14. df_bio_mz_neg: Precursor masses for targeted features in biological standard sample in negative mode 15. resources: Metadata for instrument run 16. df_samples: Table containing sample names, polarities, autosampler positions, and QC results 17. pos_internal_standards: List of positive mode internal standards 18. neg_internal_standards: List of negative mode internal standards 19. df_delta_rt_pos: Delta RT's for internal standards in positive mode 20. df_delta_rt_neg: Delta RT's for internal standards in negative mode 21. df_in_run_delta_rt_pos: In-run delta RT's for internal standards in positive mode 22. df_in_run_delta_rt_neg: In-run delta RT's for internal standards in negative mode 23. df_delta_mz_pos: Delta m/z's for internal standards in positive mode 24. df_delta_mz_neg: Delta m/z's for internal standards in negative mode 25. df_warnings_pos: QC warnings for internal standards in positive mode 26. df_warnings_neg: QC warnings for internal standards in negative mode 27. df_fails_pos: QC fails for internal standards in positive mode 28. df_fails_neg: QC fails for internal standards in negative mode

def generate_sample_metadata_dataframe( sample, df_rt, df_mz, df_intensity, df_delta_rt, df_in_run_delta_rt, df_delta_mz, df_warnings, df_fails, df_sequence, df_metadata):
334def generate_sample_metadata_dataframe(sample, df_rt, df_mz, df_intensity, df_delta_rt, df_in_run_delta_rt,
335    df_delta_mz, df_warnings, df_fails, df_sequence, df_metadata):
336
337    """
338    Aggregates tables of relevant data from the acquisition sequence, metadata file, and QC results for a selected sample.
339
340    Returns two DataFrames by aggregating the following information:
341        1. Acquisition sequence and sample metadata information
342        2. Internal standard m/z, RT, intensity, delta m/z, delta RT, in-run delta RT, warnings, and fails
343
344    Args:
345        sample (str):
346            Sample ID
347        df_rt (DataFrame):
348            Retention times for internal standards (columns) across samples (rows)
349        df_mz (DataFrame):
350            Precursor masses for internal standards (columns) across samples (rows)
351        df_intensity (DataFrame):
352            Intensities for internal standards (columns) across samples (rows)
353        df_delta_rt (DataFrame):
354            Delta RT's from library values for internal standards (columns) across samples (rows)
355        df_in_run_delta_rt (DataFrame):
356            Delta RT's from in-run values for internal standards (columns) across samples (rows)
357        df_delta_mz (DataFrame):
358            Delta m/z's from library values for internal standards (columns) across samples (rows)
359        df_warnings (DataFrame):
360            QC warnings for internal standards (columns) across samples (rows)
361        df_fails (DataFrame):
362            QC fails for internal standards (columns) across samples (rows)
363        df_sequence (DataFrame):
364            Acquisition sequence table
365        df_metadata (DataFrame):
366            Sample metadata table
367
368    Returns:
369        Tuple containing two DataFrames, the first storing internal standard data and the second storing sample metadata.
370    """
371
372    df_sample_istd = pd.DataFrame()
373    df_sample_info = pd.DataFrame()
374
375    # Index the selected sample, then make sure all columns in all dataframes are in the same order
376    columns = df_rt.columns.tolist()
377    internal_standards = df_rt.columns.tolist()
378    internal_standards.remove("Sample")
379    df_sample_istd["Internal Standard"] = internal_standards
380
381    # Precursor m/z
382    df_mz = df_mz.loc[df_mz["Sample"] == sample][columns]
383    df_mz.drop(columns=["Sample"], inplace=True)
384    df_sample_istd["m/z"] = df_mz.iloc[0].astype(float).values.tolist()
385
386    # Retention times
387    df_rt = df_rt.loc[df_rt["Sample"] == sample][columns]
388    df_rt.drop(columns=["Sample"], inplace=True)
389    df_sample_istd["RT"] = df_rt.iloc[0].astype(float).round(2).values.tolist()
390
391    # Intensities
392    df_intensity = df_intensity.loc[df_intensity["Sample"] == sample][columns]
393    df_intensity.drop(columns=["Sample"], inplace=True)
394    intensities = df_intensity.iloc[0].fillna(0).values.tolist()
395    df_sample_istd["Intensity"] = ["{:.2e}".format(x) for x in intensities]
396
397    # Delta m/z
398    df_delta_mz.replace(" ", np.nan, inplace=True)
399    df_delta_mz = df_delta_mz.loc[df_delta_mz["Sample"] == sample][columns]
400    df_delta_mz.drop(columns=["Sample"], inplace=True)
401    df_sample_istd["Delta m/z"] = df_delta_mz.iloc[0].astype(float).round(6).values.tolist()
402
403    # Delta RT
404    df_delta_rt.replace(" ", np.nan, inplace=True)
405    df_delta_rt = df_delta_rt.loc[df_delta_rt["Sample"] == sample][columns]
406    df_delta_rt.drop(columns=["Sample"], inplace=True)
407    df_sample_istd["Delta RT"] = df_delta_rt.iloc[0].astype(float).round(3).values.tolist()
408
409    # In-run delta RT
410    df_in_run_delta_rt.replace(" ", np.nan, inplace=True)
411    df_in_run_delta_rt = df_in_run_delta_rt.loc[df_in_run_delta_rt["Sample"] == sample][columns]
412    df_in_run_delta_rt.drop(columns=["Sample"], inplace=True)
413    df_sample_istd["In-Run Delta RT"] = df_in_run_delta_rt.iloc[0].astype(float).round(3).values.tolist()
414
415    # Warnings
416    df_warnings.replace(" ", np.nan, inplace=True)
417    df_warnings = df_warnings.loc[df_warnings["Sample"] == sample][columns]
418    df_warnings.drop(columns=["Sample"], inplace=True)
419    df_sample_istd["Warnings"] = df_warnings.iloc[0].astype(str).values.tolist()
420
421    # Fails
422    df_fails.replace(" ", np.nan, inplace=True)
423    df_fails = df_fails.loc[df_fails["Sample"] == sample][columns]
424    df_fails.drop(columns=["Sample"], inplace=True)
425    df_sample_istd["Fails"] = df_fails.iloc[0].astype(str).values.tolist()
426
427    if len(df_sequence) > 0:
428        df_sequence = df_sequence.loc[df_sequence["File Name"].astype(str) == sample]
429        df_sample_info["Sample ID"] = df_sequence["L1 Study"].astype(str).values
430        df_sample_info["Position"] = df_sequence["Position"].astype(str).values
431        df_sample_info["Injection Volume"] = df_sequence["Inj Vol"].astype(str).values + " uL"
432        df_sample_info["Instrument Method"] = df_sequence["Instrument Method"].astype(str).values
433
434    if len(df_metadata) > 0:
435        df_metadata = df_metadata.loc[df_metadata["Filename"].astype(str) == sample]
436        if len(df_metadata) > 0:
437            df_sample_info["Species"] = df_metadata["Species"].astype(str).values
438            df_sample_info["Matrix"] = df_metadata["Matrix"].astype(str).values
439            df_sample_info["Growth-Harvest Conditions"] = df_metadata["Growth-Harvest Conditions"].astype(str).values
440            df_sample_info["Treatment"] = df_metadata["Treatment"].astype(str).values
441
442    df_sample_info = df_sample_info.append(df_sample_info.iloc[0])
443    df_sample_info.iloc[0] = df_sample_info.columns.tolist()
444    df_sample_info = df_sample_info.rename(index={0: "Sample Information"})
445    df_sample_info = df_sample_info.transpose()
446
447    return df_sample_istd, df_sample_info

Aggregates tables of relevant data from the acquisition sequence, metadata file, and QC results for a selected sample.

Returns two DataFrames by aggregating the following information:
  1. Acquisition sequence and sample metadata information
  2. Internal standard m/z, RT, intensity, delta m/z, delta RT, in-run delta RT, warnings, and fails
Arguments:
  • sample (str): Sample ID
  • df_rt (DataFrame): Retention times for internal standards (columns) across samples (rows)
  • df_mz (DataFrame): Precursor masses for internal standards (columns) across samples (rows)
  • df_intensity (DataFrame): Intensities for internal standards (columns) across samples (rows)
  • df_delta_rt (DataFrame): Delta RT's from library values for internal standards (columns) across samples (rows)
  • df_in_run_delta_rt (DataFrame): Delta RT's from in-run values for internal standards (columns) across samples (rows)
  • df_delta_mz (DataFrame): Delta m/z's from library values for internal standards (columns) across samples (rows)
  • df_warnings (DataFrame): QC warnings for internal standards (columns) across samples (rows)
  • df_fails (DataFrame): QC fails for internal standards (columns) across samples (rows)
  • df_sequence (DataFrame): Acquisition sequence table
  • df_metadata (DataFrame): Sample metadata table
Returns:

Tuple containing two DataFrames, the first storing internal standard data and the second storing sample metadata.

def generate_bio_standard_dataframe(clicked_sample, instrument_id, run_id, df_rt, df_mz, df_intensity):
450def generate_bio_standard_dataframe(clicked_sample, instrument_id, run_id, df_rt, df_mz, df_intensity):
451
452    """
453    Aggregates data for a selected biological standard.
454
455    TODO: More metrics could be added to sample information cards for biological standards here.
456
457    Aggregates and returns 2 DataFrames for a selected sample:
458        1. QC result and causes
459        2. Targeted metabolite m/z, RT, intensity, delta RT, and percent change
460
461    Args:
462        clicked_sample (str):
463            Sample ID
464        instrument_id (str):
465            Instrument ID
466        run_id (str):
467            Instrument run ID (job ID)
468        df_rt (DataFrame):
469            Retention times of targeted metabolites in the biological standard
470        df_mz (DataFrame):
471            Precursor masses of targeted metabolites in the biological standard
472        df_intensity:
473            Intensities of targeted metabolites in the biological standard
474
475    Returns:
476        Tuple containing two DataFrames, the first storing targeted metabolites data and the second storing sample metadata.
477    """
478
479    metabolites = df_mz.columns.tolist()
480    metabolites.remove("Name")
481
482    df_sample_features = pd.DataFrame()
483    df_sample_features["Metabolite name"] = metabolites
484    df_sample_features["Precursor m/z"] = df_mz[metabolites].iloc[0].astype(float).values
485    df_sample_features["Retention time (min)"] = df_rt[metabolites].iloc[0].astype(float).round(3).values
486    intensities = df_intensity[metabolites].iloc[0].fillna(0).astype(float).values.tolist()
487    df_sample_features["Intensity"] = ["{:.2e}".format(x) for x in intensities]
488
489    df_sample_info = pd.DataFrame()
490    df_sample_info["Sample ID"] = [clicked_sample]
491    qc_result = db.get_qc_results(
492        instrument_id=instrument_id, sample_list=[clicked_sample], is_bio_standard=True)["qc_result"].values[0]
493    df_sample_info["QC Result"] = [qc_result]
494
495    df_sample_info = df_sample_info.append(df_sample_info.iloc[0])
496    df_sample_info.iloc[0] = df_sample_info.columns.tolist()
497    df_sample_info = df_sample_info.rename(index={0: "Sample Information"})
498    df_sample_info = df_sample_info.transpose()
499
500    return df_sample_features, df_sample_info

Aggregates data for a selected biological standard.

TODO: More metrics could be added to sample information cards for biological standards here.

Aggregates and returns 2 DataFrames for a selected sample: 1. QC result and causes 2. Targeted metabolite m/z, RT, intensity, delta RT, and percent change

Arguments:
  • clicked_sample (str): Sample ID
  • instrument_id (str): Instrument ID
  • run_id (str): Instrument run ID (job ID)
  • df_rt (DataFrame): Retention times of targeted metabolites in the biological standard
  • df_mz (DataFrame): Precursor masses of targeted metabolites in the biological standard
  • df_intensity: Intensities of targeted metabolites in the biological standard
Returns:

Tuple containing two DataFrames, the first storing targeted metabolites data and the second storing sample metadata.

def load_istd_rt_plot(dataframe, samples, internal_standard, retention_times):
503def load_istd_rt_plot(dataframe, samples, internal_standard, retention_times):
504
505    """
506    Returns line plot figure of retention times (for a selected internal standard) across samples.
507
508    Documentation on Plotly line plots: https://plotly.com/python-api-reference/generated/plotly.express.line.html
509
510    Args:
511        dataframe (DataFrame):
512            Table of retention times for internal standards (columns) across samples (rows)
513        samples (list):
514            Samples to query from the DataFrame
515        internal_standard (str):
516            The selected internal standard
517        retention_times (dict):
518            Dictionary with key-value pairs of type { internal_standard: retention_time }
519
520    Returns:
521        plotly.express.line object: Plotly line plot of retention times (for the selected internal standard) across samples.
522    """
523
524    df_filtered_by_samples = dataframe.loc[dataframe["Sample"].isin(samples)]
525    df_filtered_by_samples[internal_standard] = df_filtered_by_samples[internal_standard].astype(float).round(3)
526
527    y_min = retention_times[internal_standard] - 0.1
528    y_max = retention_times[internal_standard] + 0.1
529
530    fig = px.line(df_filtered_by_samples,
531        title="Retention Time vs. Samples – " + internal_standard,
532        x=samples,
533        y=internal_standard,
534        height=600,
535        markers=True,
536        hover_name=samples,
537        labels={"variable": "Internal Standard",
538              "index": "Sample",
539              "value": "Retention Time"},
540        log_x=False)
541    fig.update_layout(
542        transition_duration=500,
543        clickmode="event",
544        showlegend=False,
545        legend_title_text="Internal Standards",
546        margin=dict(t=75, b=75, l=0, r=0))
547    fig.update_xaxes(showticklabels=False, title="Sample")
548    fig.update_yaxes(title="Retention Time (min)", range=[y_min, y_max])
549    fig.add_hline(y=retention_times[internal_standard], line_width=2, line_dash="dash")
550    fig.update_traces(hovertemplate="Sample: %{x} <br>Retention Time: %{y} min<br>")
551
552    return fig

Returns line plot figure of retention times (for a selected internal standard) across samples.

Documentation on Plotly line plots: https://plotly.com/python-api-reference/generated/plotly.express.line.html

Arguments:
  • dataframe (DataFrame): Table of retention times for internal standards (columns) across samples (rows)
  • samples (list): Samples to query from the DataFrame
  • internal_standard (str): The selected internal standard
  • retention_times (dict): Dictionary with key-value pairs of type { internal_standard: retention_time }
Returns:

plotly.express.line object: Plotly line plot of retention times (for the selected internal standard) across samples.

def load_istd_intensity_plot(dataframe, samples, internal_standard, treatments):
555def load_istd_intensity_plot(dataframe, samples, internal_standard, treatments):
556
557    """
558    Returns bar plot figure of peak intensities (for a selected internal standard) across samples.
559
560    Documentation on Plotly bar plots: https://plotly.com/python-api-reference/generated/plotly.express.bar.html
561
562    Args:
563        dataframe (DataFrame):
564            Table of intensities for internal standards (columns) across samples (rows)
565        samples (list):
566            Samples to query from the DataFrame
567        internal_standard (str):
568            The selected internal standard
569        treatments (DataFrame):
570            DataFrame with sample treatments (from the metadata file) mapped to sample ID's
571
572    Returns:
573        plotly.express.bar object: Plotly bar plot of intensities (for the selected internal standard) across samples.
574    """
575
576    df_filtered_by_samples = dataframe.loc[dataframe["Sample"].isin(samples)]
577
578    if len(treatments) > 0:
579        # Map treatments to sample names
580        df_mapped = pd.DataFrame()
581        df_mapped["Sample"] = df_filtered_by_samples["Sample"]
582        df_mapped["Treatment"] = df_mapped.replace(
583            treatments.set_index("Filename")["Treatment"])
584        df_filtered_by_samples["Treatment"] = df_mapped["Treatment"].astype(str)
585    else:
586        df_filtered_by_samples["Treatment"] = " "
587
588    fig = px.bar(df_filtered_by_samples,
589        title="Intensity vs. Samples – " + internal_standard,
590        x="Sample",
591        y=internal_standard,
592        text="Sample",
593        color="Treatment",
594        height=600)
595    fig.update_layout(
596        showlegend=False,
597        transition_duration=500,
598        clickmode="event",
599        xaxis=dict(rangeslider=dict(visible=True), autorange=True),
600        legend=dict(font=dict(size=10)),
601        margin=dict(t=75, b=75, l=0, r=0))
602    fig.update_xaxes(showticklabels=False, title="Sample")
603    fig.update_yaxes(title="Intensity")
604    fig.update_traces(textposition="outside", hovertemplate="Sample: %{x}<br>Intensity: %{y:.2e}<br>")
605
606    return fig

Returns bar plot figure of peak intensities (for a selected internal standard) across samples.

Documentation on Plotly bar plots: https://plotly.com/python-api-reference/generated/plotly.express.bar.html

Arguments:
  • dataframe (DataFrame): Table of intensities for internal standards (columns) across samples (rows)
  • samples (list): Samples to query from the DataFrame
  • internal_standard (str): The selected internal standard
  • treatments (DataFrame): DataFrame with sample treatments (from the metadata file) mapped to sample ID's
Returns:

plotly.express.bar object: Plotly bar plot of intensities (for the selected internal standard) across samples.

def load_istd_delta_mz_plot(dataframe, samples, internal_standard):
609def load_istd_delta_mz_plot(dataframe, samples, internal_standard):
610
611    """
612    Returns line plot figure of delta m/z (for a selected internal standard) across samples.
613
614    Documentation on Plotly line plots: https://plotly.com/python-api-reference/generated/plotly.express.line.html
615
616    Args:
617        dataframe (DataFrame):
618            Table of delta m/z's for internal standards (columns) across samples (rows)
619        samples (list):
620            Samples to query from the DataFrame
621        internal_standard (str):
622            The selected internal standard
623
624    Returns:
625        plotly.express.line object: Plotly line plot of delta m/z (for the selected internal standard) across samples.
626    """
627
628    # Get delta m/z results for selected samples
629    df_filtered_by_samples = dataframe.loc[dataframe["Sample"].isin(samples)]
630
631    fig = px.line(df_filtered_by_samples,
632        title="Delta m/z vs. Samples – " + internal_standard,
633        x=samples,
634        y=internal_standard,
635        height=600,
636        markers=True,
637        hover_name=samples,
638        labels={"variable": "Internal Standard",
639              "index": "Sample",
640              "value": "Delta m/z"},
641        log_x=False)
642    fig.update_layout(
643        transition_duration=500,
644        clickmode="event",
645        showlegend=False,
646        legend_title_text="Internal Standards",
647        margin=dict(t=75, b=75, l=0, r=0))
648    fig.update_xaxes(showticklabels=False, title="Sample")
649    fig.update_yaxes(title="delta m/z", range=[-0.01, 0.01])
650    fig.update_traces(hovertemplate="Sample: %{x} <br>Delta m/z: %{y}<br>")
651
652    return fig

Returns line plot figure of delta m/z (for a selected internal standard) across samples.

Documentation on Plotly line plots: https://plotly.com/python-api-reference/generated/plotly.express.line.html

Arguments:
  • dataframe (DataFrame): Table of delta m/z's for internal standards (columns) across samples (rows)
  • samples (list): Samples to query from the DataFrame
  • internal_standard (str): The selected internal standard
Returns:

plotly.express.line object: Plotly line plot of delta m/z (for the selected internal standard) across samples.

def load_bio_feature_plot(run_id, df_rt, df_mz, df_intensity):
655def load_bio_feature_plot(run_id, df_rt, df_mz, df_intensity):
656
657    """
658    Returns scatter plot figure of precursor m/z vs. retention time for targeted features in the biological standard.
659
660    To further clarify:
661        x-axis: retention times
662        y-axis: precursor masses
663        colorscale: percent change in intensity for each feature compared to the average intensity across all runs
664
665    Documentation on Plotly scatter plots: https://plotly.com/python-api-reference/generated/plotly.express.scatter.html
666
667    Args:
668        run_id (str):
669            Run ID to query the biological standard from
670        df_rt (DataFrame):
671            Table of retention times for targeted features (columns) across instrument runs (rows)
672        df_mz (DataFrame):
673            Table of precursor masses for targeted features (columns) across instrument runs (rows)
674        df_intensity (DataFrame):
675            Table of intensities for targeted features (columns) across instrument runs (rows)
676
677    Returns:
678        plotly.express.scatter object: m/z - RT scatter plot for targeted metabolites in the biological standard
679    """
680
681    # Get metabolites
682    metabolites = df_mz.columns.tolist()
683    del metabolites[0]
684
685    # Construct new DataFrame
686    bio_df = pd.DataFrame()
687    bio_df["Metabolite name"] = metabolites
688    bio_df["Precursor m/z"] = df_mz.loc[df_mz["Name"] == run_id][metabolites].iloc[0].astype(float).values
689    bio_df["Retention time (min)"] =  df_rt.loc[df_rt["Name"] == run_id][metabolites].iloc[0].astype(float).values
690    bio_df["Intensity"] =  df_intensity.loc[df_intensity["Name"] == run_id][metabolites].iloc[0].astype(float).values
691
692    # Get percent change of feature intensities (only for runs previous to this one)
693    df_intensity = df_intensity.fillna(0)
694
695    try:
696        index_of_run = df_intensity.loc[df_intensity["Name"] == run_id].index.tolist()[0]
697        df_intensity = df_intensity[0:index_of_run + 1]
698    finally:
699        feature_intensity_from_study = df_intensity.loc[df_intensity["Name"] == run_id][metabolites].iloc[0].astype(float).values
700
701    if len(df_intensity) > 1:
702        average_intensity_in_studies = df_intensity.loc[df_intensity["Name"] != run_id][metabolites].astype(float).mean().values
703        bio_df["% Change"] = ((feature_intensity_from_study - average_intensity_in_studies) / average_intensity_in_studies) * 100
704        bio_df.replace(np.inf, 100, inplace=True)
705        bio_df.replace(-np.inf, -100, inplace=True)
706    else:
707        bio_df["% Change"] = 0
708
709    # Plot readiness
710    bio_df["Retention time (min)"] = bio_df["Retention time (min)"].round(2)
711    bio_df["% Change"] = bio_df["% Change"].round(1).fillna(0)
712
713    labels = {"Retention time (min)": "Retention time (min)",
714              "Precursor m/z": "Precursor m/z",
715              "Intensity": "Intensity",
716              "Metabolite name": "Metabolite name"}
717
718    # Colorscale
719    diverging_colorscale = ["#1a88ff", "#3395ff", "#4da3ff", "#a186ca", "#e7727d", "#e35d6a", "#e04958"]
720    diverging_colorscale.reverse()
721
722    fig = px.scatter(bio_df,
723        title="Biological Standard – Targeted Metabolites",
724        x="Retention time (min)",
725        y="Precursor m/z",
726        height=600,
727        hover_name="Metabolite name",
728        color="% Change",
729        color_continuous_scale=diverging_colorscale,
730        labels=labels,
731        log_x=False,
732        range_color=[-100, 100])
733    fig.update_layout(
734        showlegend=False,
735        transition_duration=500,
736        clickmode="event",
737        margin=dict(t=75, b=75, l=0, r=0))
738    fig.update_xaxes(title="Retention time (min)")
739    fig.update_yaxes(title="Precursor m/z")
740    fig.update_traces(marker={"size": 30})
741
742    return fig

Returns scatter plot figure of precursor m/z vs. retention time for targeted features in the biological standard.

To further clarify:

x-axis: retention times y-axis: precursor masses colorscale: percent change in intensity for each feature compared to the average intensity across all runs

Documentation on Plotly scatter plots: https://plotly.com/python-api-reference/generated/plotly.express.scatter.html

Arguments:
  • run_id (str): Run ID to query the biological standard from
  • df_rt (DataFrame): Table of retention times for targeted features (columns) across instrument runs (rows)
  • df_mz (DataFrame): Table of precursor masses for targeted features (columns) across instrument runs (rows)
  • df_intensity (DataFrame): Table of intensities for targeted features (columns) across instrument runs (rows)
Returns:

plotly.express.scatter object: m/z - RT scatter plot for targeted metabolites in the biological standard

def load_bio_benchmark_plot(dataframe, metabolite_name):
745def load_bio_benchmark_plot(dataframe, metabolite_name):
746
747    """
748    Returns bar plot figure of intensities for a targeted metabolite in a biological standard across instrument runs.
749
750    Documentation on Plotly bar plots: https://plotly.com/python-api-reference/generated/plotly.express.bar.html
751
752    Args:
753        dataframe (DataFrame):
754            Table of intensities for targeted metabolites (columns) across instrument runs (rows)
755        metabolite_name (str):
756            The targeted metabolite to query from the DataFrame
757
758    Returns:
759        plotly.express.bar object: Plotly bar plot of intensities (for the selected targeted metabolite) across instrument runs.
760    """
761
762    # Get list of runs
763    instrument_runs = dataframe["Name"].astype(str).tolist()
764
765    # Get targeted metabolite intensities for each run
766    intensities = dataframe[metabolite_name].values.tolist()
767    if len(intensities) == 0:
768        intensities = [0 for x in instrument_runs]
769
770    # Get intensities in scientific notation for labeling bar plot
771    if intensities is not None:
772        intensities_text = ["{:.2e}".format(x) for x in intensities]
773    else:
774        intensities_text = []
775
776    fig = px.bar(
777        x=instrument_runs,
778        y=intensities,
779        text=intensities_text,
780        height=600)
781    fig.update_layout(
782        title="Biological Standard Benchmark",
783        showlegend=False,
784        transition_duration=500,
785        clickmode="event",
786        xaxis=dict(rangeslider=dict(visible=True), autorange=True),
787        legend=dict(font=dict(size=10)),
788        margin=dict(t=75, b=75, l=0, r=0))
789    fig.update_xaxes(title="Study")
790    fig.update_yaxes(title="Intensity")
791    fig.update_traces(textposition="outside",
792                      hovertemplate=f"{metabolite_name}" + "<br>Study: %{x} <br>Intensity: %{text}<br>")
793
794    return fig

Returns bar plot figure of intensities for a targeted metabolite in a biological standard across instrument runs.

Documentation on Plotly bar plots: https://plotly.com/python-api-reference/generated/plotly.express.bar.html

Arguments:
  • dataframe (DataFrame): Table of intensities for targeted metabolites (columns) across instrument runs (rows)
  • metabolite_name (str): The targeted metabolite to query from the DataFrame
Returns:

plotly.express.bar object: Plotly bar plot of intensities (for the selected targeted metabolite) across instrument runs.

def get_internal_standard_index(previous, next, max):
797def get_internal_standard_index(previous, next, max):
798
799    """
800    Button functionality for seeking through internal standards.
801
802    Uses n_clicks from the previous and next buttons to generate an index, which is used to index a list of internal
803    standards in the populate_istd_rt_plot(), populate_istd_intensity_plot(), and populate_istd_mz_plot() callback
804    functions of the DashWebApp module.
805
806    This function relies on the previous button's n_clicks to be reset to None on every click.
807
808    Args:
809        previous (int):
810            n_clicks for the "previous" button (None, unless previous button is clicked)
811        next (int):
812            n_clicks for the "next" button
813        max (int):
814            Number of internal standards (maximum index for list of internal standards)
815
816    Returns:
817        Integer index for a list of internal standards.
818    """
819
820    if previous is not None:
821        if next is None or next == 0:
822            return max - 1
823
824    if previous is None:
825        if next is None:
826            index = 0
827        else:
828            index = next
829    elif previous is not None:
830        index = next - previous
831
832    if index < 0 or index >= max:
833        index = 0
834
835    return index

Button functionality for seeking through internal standards.

Uses n_clicks from the previous and next buttons to generate an index, which is used to index a list of internal standards in the populate_istd_rt_plot(), populate_istd_intensity_plot(), and populate_istd_mz_plot() callback functions of the DashWebApp module.

This function relies on the previous button's n_clicks to be reset to None on every click.

Arguments:
  • previous (int): n_clicks for the "previous" button (None, unless previous button is clicked)
  • next (int): n_clicks for the "next" button
  • max (int): Number of internal standards (maximum index for list of internal standards)
Returns:

Integer index for a list of internal standards.