33#include < seqan/bam_io.h>
44#include < stdio.h>
55#include < stdlib.h>
6- #include < string.h >
6+ #include < string>
77#include < cmath>
88#include < cstdlib>
99#include < stdint.h>
10+ #include < vector>
1011#include < seqan/arg_parse.h>
1112
1213
@@ -21,14 +22,13 @@ hasFlagSupplementary(seqan::BamAlignmentRecord const & record)
2122}
2223*/
2324struct Baminfo {
24- char version[1024 ];
25- char bindir[1024 ];
26- seqan::CharString bamFile;
25+ std::vector<std::string> bamfiles;
2726 bool debug;
2827 bool noReadNames;
29- bool noQuality;
28+ bool noQuality;
29+ bool paired;
3030
31- Baminfo () : debug(false ), noReadNames(false ), noQuality(false ) {}
31+ Baminfo () : debug(false ), noReadNames(false ), noQuality(false ), paired( true ) {}
3232
3333};
3434
@@ -40,9 +40,9 @@ parseCommandLine(Baminfo& options, int argc, char const **argv) {
4040
4141 setShortDescription (parser, " Checksum of a bam file" ); // TODO change description
4242 setVersion (parser, BAMHASH_VERSION);
43- setDate (parser, " Feb 2015" );
43+ setDate (parser, " May 2015" );
4444
45- addUsageLine (parser, " [\\ fIOPTIONS\\ fP] \\ fI<in.bam>\\ fP" );
45+ addUsageLine (parser, " [\\ fIOPTIONS\\ fP] \\ fI<in.bam> <in2.bam> ... \\ fP" );
4646 addDescription (parser, " Program for checksum of sequence reads. " );
4747
4848 addArgument (parser, seqan::ArgParseArgument (seqan::ArgParseArgument::INPUTFILE," bamfile" , " False" , 1 ));
@@ -55,6 +55,7 @@ parseCommandLine(Baminfo& options, int argc, char const **argv) {
5555 addOption (parser, seqan::ArgParseOption (" d" , " debug" , " Debug mode. Prints full hex for each read to stdout" ));
5656 addOption (parser, seqan::ArgParseOption (" R" , " no-readnames" , " Do not use read names as part of checksum" ));
5757 addOption (parser, seqan::ArgParseOption (" Q" , " no-quality" , " Do not use read quality as part of checksum" ));
58+ addOption (parser, seqan::ArgParseOption (" P" , " no-paired" , " Bam files were not generated with paired-end reads" ));
5859
5960 // Parse command line.
6061 seqan::ArgumentParser::ParseResult res = seqan::parse (parser, argc, argv);
@@ -65,7 +66,9 @@ parseCommandLine(Baminfo& options, int argc, char const **argv) {
6566 options.debug = isSet (parser, " debug" );
6667 options.noReadNames = isSet (parser, " no-readnames" );
6768 options.noQuality = isSet (parser, " no-quality" );
68- getArgumentValue (options.bamFile , parser, 0 );
69+ options.paired = !isSet (parser, " no-paired" );
70+
71+ options.bamfiles = getArgumentValues (parser, 0 );
6972
7073 return seqan::ArgumentParser::PARSE_OK;
7174}
@@ -80,84 +83,105 @@ int main(int argc, char const **argv) {
8083 return res == seqan::ArgumentParser::PARSE_ERROR;
8184 }
8285
83- // Open BGZF Stream for reading.
84- seqan::Stream<seqan::Bgzf> inStream;
85- if (!open (inStream, toCString (info.bamFile ), " r" )) {
86- std::cerr << " ERROR: Could not open " << info.bamFile << " for reading.\n " ;
87- return 1 ;
88- }
89-
90- // Setup name store, cache, and BAM I/O context.
91- typedef seqan::StringSet<seqan::CharString> TNameStore;
92- typedef seqan::NameStoreCache<TNameStore> TNameStoreCache;
93- typedef seqan::BamIOContext<TNameStore> TBamIOContext;
94- TNameStore nameStore;
95- TNameStoreCache nameStoreCache (nameStore);
96- TBamIOContext context (nameStore, nameStoreCache);
97-
98- // Read header.
99- seqan::BamHeader header;
100- if (readRecord (header, context, inStream, seqan::Bam ()) != 0 ) {
101- std::cerr << " ERROR: Could not read header from BAM file " << info.bamFile << " \n " ;
102- return 1 ;
103- }
104- seqan::clear (header);
105-
106- // Define:
107- seqan::BamAlignmentRecord record;
10886 uint64_t sum = 0 ;
10987 uint64_t count = 0 ;
110- seqan::CharString string2hash;
111- // char hexCstr[33];
88+ bool pairedWarning = false ;
89+
90+
11291
113- // Read record
114- while (!atEnd (inStream)) {
115- if (readRecord (record, context, inStream, seqan::Bam ()) != 0 ) {
116- std::cerr << " ERROR: Could not read record from BAM File " << info.bamFile << " \n " ;
92+ for (int i = 0 ; i < info.bamfiles .size (); i++) {
93+ // Open BGZF Stream for reading.
94+ seqan::Stream<seqan::Bgzf> inStream;
95+
96+ const char * bamfile = info.bamfiles [i].c_str ();
97+
98+ if (!open (inStream, bamfile, " r" )) {
99+ std::cerr << " ERROR: Could not open " << bamfile << " for reading.\n " ;
117100 return 1 ;
118101 }
119- // Check if flag: reverse complement and change record accordingly
120- if (hasFlagRC (record)) {
121- reverseComplement (record.seq );
122- reverse (record.qual );
102+
103+ // Setup name store, cache, and BAM I/O context.
104+ typedef seqan::StringSet<seqan::CharString> TNameStore;
105+ typedef seqan::NameStoreCache<TNameStore> TNameStoreCache;
106+ typedef seqan::BamIOContext<TNameStore> TBamIOContext;
107+ TNameStore nameStore;
108+ TNameStoreCache nameStoreCache (nameStore);
109+ TBamIOContext context (nameStore, nameStoreCache);
110+
111+ // Read header.
112+ seqan::BamHeader header;
113+ if (readRecord (header, context, inStream, seqan::Bam ()) != 0 ) {
114+ std::cerr << " ERROR: Could not read header from BAM file " << bamfile << " \n " ;
115+ return 1 ;
123116 }
124- // Check if flag: supplementary and exclude those
125- if (!hasFlagSupplementary (record) && !hasFlagSecondary (record)) {
126- count +=1 ;
127- // Construct one string from record
128- if (!info.noReadNames ) {
129- seqan::append (string2hash, record.qName );
130- if (hasFlagLast (record)) {
131- seqan::append (string2hash, " /2" );
132- } else {
133- seqan::append (string2hash, " /1" );
134- }
117+ seqan::clear (header);
118+
119+ // Define:
120+ seqan::BamAlignmentRecord record;
121+ seqan::CharString string2hash;
122+ // char hexCstr[33];
123+
124+ // Read record
125+ while (!atEnd (inStream)) {
126+ if (readRecord (record, context, inStream, seqan::Bam ()) != 0 ) {
127+ std::cerr << " ERROR: Could not read record from BAM File " << bamfile << " \n " ;
128+ return 1 ;
135129 }
136-
137- seqan::append (string2hash, record. seq );
138- if (!info. noQuality ) {
139- seqan::append (string2hash, record.qual );
130+ // Check if flag: reverse complement and change record accordingly
131+ if ( hasFlagRC ( record)) {
132+ reverseComplement (record. seq );
133+ reverse ( record.qual );
140134 }
141- seqan::clear (record);
135+ // Check if flag: supplementary and exclude those
136+ if (!hasFlagSupplementary (record) && !hasFlagSecondary (record)) {
137+ count +=1 ;
138+ // Construct one string from record
139+ if (!info.noReadNames ) {
140+ seqan::append (string2hash, record.qName );
141+ if (hasFlagLast (record)) {
142+ if (info.paired ) {
143+ seqan::append (string2hash, " /2" );
144+ } else {
145+ if (!pairedWarning) {
146+ std::cerr << " WARNING: BamHash was run with --no-paired mode, but BAM file has reads marked as second pair" << std::endl;
147+ pairedWarning = true ;
148+ }
149+ seqan::append (string2hash, " /1" );
150+ }
151+ } else {
152+ seqan::append (string2hash, " /1" );
153+ }
154+ }
155+
156+ seqan::append (string2hash, record.seq );
157+ if (!info.noQuality ) {
158+ seqan::append (string2hash, record.qual );
159+ }
160+ seqan::clear (record);
161+
162+ // Get MD5 hash
163+ hash_t hex = str2md5 (toCString (string2hash), length (string2hash));
142164
143- // Get MD5 hash
144- hash_t hex = str2md5 (toCString (string2hash), length (string2hash));
145- seqan::clear (string2hash);
146165
147- if (info.debug ) {
148- std::cout << std::hex << hex.p .low << " \n " ;
149- } else {
150- hexSum (hex, sum);
166+ if (info.debug ) {
167+ std::cout << string2hash << " " << std::hex << hex.p .low << " \n " ;
168+ } else {
169+ hexSum (hex, sum);
170+ }
171+
172+ seqan::clear (string2hash);
151173 }
152174 }
175+
176+ // print result
177+
153178 }
154179
155- // print result
156180 if (!info.debug ) {
157181 std::cout << std::hex << sum << " \t " ;
158182 std::cout << std::dec << count << " \n " ;
159183 }
160-
184+
161185 return 0 ;
162186}
163187
0 commit comments