Skip to content

Commit 95c8a40

Browse files
committed
Update ntRootAncestryPredictor.pl
-Add support for streaming .vcf.gz / .vcf.bgz inputs via gunzip/unpigz (no new dependencies) -Ensure proper filehandle cleanup (close on input/output) -Initialize $max to avoid undefined comparison warnings -Minor robustness improvements to input handling and stability
1 parent 8e2a84c commit 95c8a40

1 file changed

Lines changed: 29 additions & 11 deletions

File tree

ntRootAncestryPredictor.pl

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -60,14 +60,27 @@ sub usage_page {
6060
if ($tile_resolution) {
6161
open(IN,$fai) || die "Can't read $fai --fatal (is the file in your working directory?)\n";
6262
while(<IN>){
63-
chomp;
64-
my @a=split(/\t/);
65-
$chr->{$a[0]}=$a[1];
63+
chomp;
64+
my @a=split(/\t/);
65+
$chr->{$a[0]}=$a[1];
6666
}
67+
close IN; ### filehandle hygiene
68+
}
69+
70+
###below support for compressed vcf
71+
my $IN;
72+
73+
if ($f =~ /\.(gz|bgz)$/) {### add support for gz/bgz
74+
my $cmd = "gunzip -c"; # safe default
75+
$cmd = "unpigz -c" if system("command -v unpigz >/dev/null 2>&1") == 0;
76+
77+
#open($IN, "$cmd $f |") or die "can't read compressed $f -- fatal.\n";
78+
open($IN, "-|", split(" ", $cmd), $f) or die "can't read compressed $f -- fatal.\n";###safer open
79+
} else {
80+
open($IN, "<", $f) or die "can't read $f -- fatal.\n";
6781
}
6882

6983

70-
open(IN, $f) || die "can't read $f -- fatal.\n";
7184

7285
my $xr=0;
7386
my $s;
@@ -77,10 +90,10 @@ sub usage_page {
7790

7891
print "Inferring ancestry using SNVs (single nucleotide variants)...\n\n";
7992

80-
while(<IN>){
93+
while(<$IN>){
8194
chomp;
8295
my @a=split(/\t/);
83-
my $max;
96+
my $max = -1;###never previously initialized
8497
my $maxpop;
8598

8699
if(/_AF/){
@@ -113,23 +126,25 @@ sub usage_page {
113126
$z->{$a[0]}{$wn}{$pop}{'sum'}+=$afallele;
114127
$y->{$a[0]}{$wn}{'ct'}++;
115128

116-
117129
if($afallele){
118130
$s->{$d[0]}{'ct'}++;
119131
$z->{$a[0]}{$wn}{$pop}{'nzct'}++;
120132
if($a[1]>$max){
121133
$max=$a[1];
122134
$maxpop=$pop;
123135
}
124-
} else{
125-
$afallele=1;
136+
#} else{### Remove dead $afallele=1 line (not used)
137+
# $afallele=1;
126138
}
127139
}
128140
}
129141
}
130142
}
131143
}
132144
}
145+
146+
close $IN;
147+
133148
###calculate metric per tile
134149
my $top;
135150
my $total;
@@ -149,11 +164,13 @@ sub usage_page {
149164
my $wnl=$z->{$el};
150165
foreach my $wnum(sort {$a<=>$b} keys %$wnl){
151166
my $pl = $wnl->{$wnum};
152-
my $winmax;
167+
my $winmax = -1;###was never initialized
153168
my $winpop;
154169
my $window_population_metric;
155170
print "WARNING: chr$el tile$wnum has $y->{$el}{$wnum}{'ct'} only total SNVs -- you may need to increase the tile size (currently set at $dw)\n" if($y->{$el}{$wnum}{'ct'}<100);
156171
foreach my $pp(keys %$pl){
172+
my $ct = $y->{$el}{$wnum}{'ct'} || 0;###guards div by zero
173+
next if $ct == 0; ###guards div by zero
157174
my $rate = $pl->{$pp}{'sum'}/$y->{$el}{$wnum}{'ct'};
158175
my $metric = ($pl->{$pp}{'nzct'}/$y->{$el}{$wnum}{'ct'}) * $rate;
159176
$window_population_metric->{$pp} = $metric;
@@ -182,7 +199,6 @@ sub usage_page {
182199
}
183200
}
184201

185-
close IN;
186202
if ($tile_resolution) {
187203
close BEST;
188204
}
@@ -240,6 +256,8 @@ sub usage_page {
240256
}
241257
}
242258

259+
close OUT;### filehandle hygiene
260+
243261
print "\nGAI score: Average SNV allele frequency * rate of SNVs with non-zero allele frequency\n";
244262
print "Populations are ranked based on the LAI fraction\n";
245263
print "\nAbbreviations:\n\tAF: Allele Frequency";

0 commit comments

Comments
 (0)