Skip to content

Commit 14afbf6

Browse files
committed
Unified output path resolution
1 parent 3b497a9 commit 14afbf6

4 files changed

Lines changed: 47 additions & 33 deletions

File tree

MainClass.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -768,6 +768,9 @@ private static void RegularParametersParsing(string[] args)
768768
if (parseInput.OutputFormat == OutputFormat.IndexMzML) parseInput.OutputFormat = OutputFormat.MzML;
769769
}
770770

771+
// Switch off gzip compression for Parquet
772+
if (parseInput.OutputFormat == OutputFormat.Parquet) parseInput.Gzip = false;
773+
771774
parseInput.MaxLevel = parseInput.MsLevel.Max();
772775

773776
if (parseInput.S3Url != null && parseInput.S3AccessKeyId != null &&

ThermoRawFileParserTest/WriterTests.cs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,5 +281,30 @@ public void TestMzML_MS2()
281281

282282
Assert.That(testMzMl.run.chromatogramList.chromatogram[0].defaultArrayLength, Is.EqualTo(95));
283283
}
284+
285+
[Test]
286+
public void TestParquet()
287+
{
288+
// Get temp path for writing the test mzML
289+
var tempFilePath = Path.GetTempPath();
290+
291+
var testRawFile = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, @"Data/small.RAW");
292+
var parseInput = new ParseInput(testRawFile, null, tempFilePath, OutputFormat.Parquet);
293+
294+
RawFileParser.Parse(parseInput);
295+
296+
// Actual test
297+
//var xmlSerializer = new XmlSerializer(typeof(mzMLType));
298+
//var testMzMl = (mzMLType)xmlSerializer.Deserialize(new FileStream(
299+
// Path.Combine(tempFilePath, "small.mzML"), FileMode.Open, FileAccess.Read, FileShare.ReadWrite));
300+
301+
//Assert.That(testMzMl.run.spectrumList.count, Is.EqualTo("48"));
302+
//Assert.That(testMzMl.run.spectrumList.spectrum.Length, Is.EqualTo(48));
303+
304+
//Assert.That(testMzMl.run.chromatogramList.count, Is.EqualTo("1"));
305+
//Assert.That(testMzMl.run.chromatogramList.chromatogram.Length, Is.EqualTo(1));
306+
307+
//Assert.That(testMzMl.run.chromatogramList.chromatogram[0].defaultArrayLength, Is.EqualTo(48));
308+
}
284309
}
285310
}

Writer/ParquetSpectrumWriter.cs

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
using System;
22
using System.Collections.Generic;
3-
using System.IO;
43
using System.Reflection;
54
using log4net;
65
using Parquet.Serialization;
@@ -43,17 +42,18 @@ public override void Write(IRawDataPlus raw, int firstScanNumber, int lastScanNu
4342
throw new RawFileParserException("No MS data in RAW file, no output will be produced");
4443
}
4544

45+
//TODO: Correct iterator based on MS-level filter
4646
var enumerator = raw.GetFilteredScanEnumerator(" ");
4747

48-
// NB: replace with more robust strategy?
49-
var output = ParseInput.OutputDirectory + "//" + Path.GetFileNameWithoutExtension(ParseInput.RawFilePath) + ".mzparquet";
50-
48+
ConfigureWriter(".mzparquet");
49+
5150
ParquetSerializerOptions opts = new ParquetSerializerOptions();
5251
opts.CompressionLevel = System.IO.Compression.CompressionLevel.Fastest;
5352
opts.CompressionMethod = Parquet.CompressionMethod.Zstd;
5453

5554
var data = new List<MzParquet>();
5655

56+
//TODO Precursor tree
5757
// map last (msOrder - 1) -> scan number (e.g. mapping precursors)
5858
// note, this assumes time dependence of MS1 -> MS2 -> MSN
5959
var last_scans = new Dictionary<int, uint>();
@@ -63,6 +63,7 @@ public override void Write(IRawDataPlus raw, int firstScanNumber, int lastScanNu
6363
{
6464
var scanFilter = raw.GetFilterForScanNumber(scanNumber);
6565

66+
//TODO Centroiding if centroidStream is not available
6667
CentroidStream centroidStream = new CentroidStream();
6768

6869
// Pull out m/z and intensity values
@@ -170,7 +171,7 @@ public override void Write(IRawDataPlus raw, int firstScanNumber, int lastScanNu
170171
// present in the same row group (critical property of mzparquet)
171172
if (data.Count >= 1_048_576)
172173
{
173-
var task = ParquetSerializer.SerializeAsync(data, output, opts);
174+
var task = ParquetSerializer.SerializeAsync(data, Writer.BaseStream, opts);
174175
task.Wait();
175176
opts.Append = true;
176177
data.Clear();
@@ -182,7 +183,7 @@ public override void Write(IRawDataPlus raw, int firstScanNumber, int lastScanNu
182183
// serialize any remaining ions into the final row group
183184
if (data.Count > 0)
184185
{
185-
var task = ParquetSerializer.SerializeAsync(data, output, opts);
186+
var task = ParquetSerializer.SerializeAsync(data, Writer.BaseStream, opts);
186187
task.Wait();
187188
Log.Info("writing row group");
188189
}

Writer/SpectrumWriter.cs

Lines changed: 12 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -68,42 +68,27 @@ protected void ConfigureWriter(string extension)
6868
return;
6969
}
7070

71-
if (ParseInput.OutputFile == null)
71+
var fileName = NormalizeFileName(ParseInput.OutputFile, extension, ParseInput.Gzip);
72+
if (ParseInput.OutputFormat == OutputFormat.Parquet)
7273
{
73-
var fullExtension = ParseInput.Gzip ? extension + ".gz" : extension;
74-
if (!ParseInput.Gzip || ParseInput.OutputFormat == OutputFormat.IndexMzML)
75-
{
76-
Writer = File.CreateText(ParseInput.OutputDirectory + "//" +
77-
ParseInput.RawFileNameWithoutExtension +
78-
extension);
79-
}
80-
else
81-
{
82-
var fileStream = File.Create(ParseInput.OutputDirectory + "//" +
83-
ParseInput.RawFileNameWithoutExtension + fullExtension);
84-
var compress = new GZipStream(fileStream, CompressionMode.Compress);
85-
Writer = new StreamWriter(compress);
86-
}
74+
Writer = new StreamWriter(File.Create(fileName));
75+
}
76+
else if (!ParseInput.Gzip || ParseInput.OutputFormat == OutputFormat.IndexMzML)
77+
{
78+
Writer = File.CreateText(fileName);
8779
}
8880
else
8981
{
90-
var fileName = NormalizeFileName(ParseInput.OutputFile, extension, ParseInput.Gzip);
91-
if (!ParseInput.Gzip || ParseInput.OutputFormat == OutputFormat.IndexMzML)
92-
{
93-
Writer = File.CreateText(fileName);
94-
}
95-
else
96-
{
97-
var fileStream = File.Create(fileName);
98-
var compress = new GZipStream(fileStream, CompressionMode.Compress);
99-
Writer = new StreamWriter(compress);
100-
}
82+
var fileStream = File.Create(fileName);
83+
var compress = new GZipStream(fileStream, CompressionMode.Compress);
84+
Writer = new StreamWriter(compress);
10185
}
86+
10287
}
10388

10489
private string NormalizeFileName(string outputFile, string extension, bool gzip)
10590
{
106-
string result = outputFile;
91+
string result = outputFile == null ? Path.Combine(ParseInput.OutputDirectory, ParseInput.RawFileNameWithoutExtension) : outputFile;
10792
string tail = "";
10893

10994
string[] extensions;

0 commit comments

Comments
 (0)