56

Download Source

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
"""proc.py
"""

from pyms.GCMS.IO.ANDI import ANDI_reader
from pyms.IntensityMatrix import build_intensity_matrix_i
from pyms.Noise.SavitzkyGolay import savitzky_golay
from pyms.TopHat import tophat

from pyms.BillerBiemann import BillerBiemann, rel_threshold, num_ions_threshold

from pyms.Peak.Function import peak_top_ion_areas

# read the raw data as a GCMS_data object
andi_file = "data/gc01_0812_066.cdf"
data = ANDI_reader(andi_file)

im = build_intensity_matrix_i(data)

n_scan, n_mz = im.size

print("Intensity matrix size (scans, masses):", (n_scan, n_mz))

# noise filter and baseline correct
for ii in range(n_mz):
    ic = im.get_ic_at_index(ii)
    ic_smooth = savitzky_golay(ic)
    ic_bc = tophat(ic_smooth, struct="1.5m")
    im.set_ic_at_index(ii, ic_bc)

# Use Biller and Biemann technique to find apexing ions at a scan.
peak_list = BillerBiemann(im, points=9, scans=2)

# percentage ratio of ion intensity to max ion intensity
r = 2
# minimum number of ions, n
n = 3
# greater than or equal to threshold, t
t = 10000

# trim by relative intensity
pl = rel_threshold(peak_list, r)

# trim by threshold
new_peak_list = num_ions_threshold(pl, n, t)

print("Number of filtered peaks: ", len(new_peak_list))

# find and set areas
print("Top 5 most abundant ions for each peak ")

for peak in new_peak_list:
    rt = peak.rt
    # Only test interesting sub-set from 29.5 to 32.5 minutes
    if rt >= 29.5*60.0 and rt <= 32.5*60.0:
        # determine and set ion areas, use default num of ions =5
        areas_dict = peak_top_ion_areas(im, peak)
        peak.ion_areas = areas_dict

        area_dict = peak.ion_areas
        # print the top 5 ions for each peak
        print(area_dict.keys())