"""
Functions for reading mzML format data files.
"""
################################################################################
# #
# PyMassSpec software for processing of mass-spectrometry data #
# Copyright (C) 2005-2012 Vladimir Likic #
# Copyright (C) 2019-2020 Dominic Davis-Foster #
# #
# This program is free software; you can redistribute it and/or modify #
# it under the terms of the GNU General Public License version 2 as #
# published by the Free Software Foundation. #
# #
# This program is distributed in the hope that it will be useful, #
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
# GNU General Public License for more details. #
# #
# You should have received a copy of the GNU General Public License #
# along with this program; if not, write to the Free Software #
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. #
# #
################################################################################
# 3rd party
import pymzml # type: ignore[import]
from domdf_python_tools.typing import PathLike
try:
# 3rd party
from mpi4py import MPI # type: ignore[import]
except ModuleNotFoundError:
pass
# this package
from pyms.Base import is_path
from pyms.GCMS.Class import GCMS_data
from pyms.Spectrum import Scan
__all__ = ["mzML_reader"]
[docs]def mzML_reader(file_name: PathLike) -> GCMS_data:
"""
A reader for mzML files.
:param file_name: The name of the mzML file.
:return: GC-MS data object.
:authors: Sean O'Callaghan, Dominic Davis-Foster (pathlib support)
"""
if not is_path(file_name):
raise TypeError("'file_name' must be a string or a PathLike object")
mzml_file = pymzml.run.Reader(str(file_name))
try: # avoid printing from each rank
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()
if rank == 0:
file_names = []
for i in range(1, size):
recv_buffer = ''
file_n = comm.recv(recv_buffer, i)
file_names.append(file_n)
print(" -> Reading mzML files:")
print(file_name)
for file_n in file_names:
print(file_n)
else:
comm.send(file_name, dest=0)
# TODO: Find specific error
except Exception as e:
print(e)
print(f" -> Reading mzML file '{file_name}'")
scan_list = []
time_list = []
for spectrum in mzml_file:
mass_list = []
intensity_list = []
for mz, i in spectrum.peaks:
mass_list.append(mz)
intensity_list.append(i)
# scan_list.append(Scan(mass_list, intensity_list))
for element in spectrum.xmlTree:
# For some reason there are spectra with no time value,
# Ignore these????????????
if element.get("accession") == "MS:1000016": # time value
# We need time in seconds not minutes
time_list.append(60 * float(element.get("value")))
scan_list.append(Scan(mass_list, intensity_list))
# print("time:", len(time_list))
# print("scan:", len(scan_list))
data = GCMS_data(time_list, scan_list)
return data