Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 59 additions & 41 deletions Main/bin/runGeneCNVAndPloidyQuery
Original file line number Diff line number Diff line change
Expand Up @@ -7,43 +7,30 @@ use GUS::ObjRelP::DbiDatabase;
use GUS::Supported::GusConfig;
use CBIL::Util::PropertySet;

my ($gusConfigFile,$organismAbbrev,$geneSourceIdOrthologFile,$chrsForCalcsFile);
&GetOptions("organismAbbrev=s" => \$organismAbbrev,
my ($gusConfigFile,$taxonId,$orthoGroupFile,$geneSourceIdOrthologFile,$chrsForCalcsFile);
&GetOptions("taxonId=s" => \$taxonId,
"orthoGroupFile=s" => \$orthoGroupFile,
"geneSourceIdOrthologFile=s" => \$geneSourceIdOrthologFile,
"chrsForCalcsFile=s" => \$chrsForCalcsFile);
my $ploidy = 2;

my $geneSourceSql = "with sequence as (
select gf.source_id as gene_source_id
, gf.na_feature_id
, ns.source_id as contig_source_id
, ns.source_id as sequence_source_id
, ns.TAXON_ID
from dots.genefeature gf
, DOTS.NASEQUENCE ns
, SRES.ONTOLOGYTERM ot
where gf.na_sequence_id = ns.na_sequence_id
and ot.name = 'chromosome'
and ns.SEQUENCE_ONTOLOGY_ID = ot.ONTOLOGY_TERM_ID
and ns.taxon_id = (select taxon_id from apidb.organism where abbrev = '$organismAbbrev')
), orthologs as (
select gf.na_feature_id, sg.name
from dots.genefeature gf
, dots.SequenceSequenceGroup ssg
, dots.SequenceGroup sg
, core.TableInfo ti
where gf.na_feature_id = ssg.sequence_id
and ssg.sequence_group_id = sg.sequence_group_id
and ssg.source_table_id = ti.table_id
and ti.name = 'GeneFeature'
)
select s.gene_source_id
, o.name
from sequence s
, orthologs o
where s.na_feature_id = o.na_feature_id";

my $chrsForCalcsSql = "select ns.source_id from dots.nasequence ns, sres.ontologyterm ot where ot.name = 'chromosome' and ot.ontology_term_id = ns.sequence_ontology_id and ns.taxon_id = (select taxon_id from apidb.organism where abbrev = '$organismAbbrev')";

my $proteinToGeneSql = "
SELECT aas.source_id AS protein_source_id,
gf.source_id AS gene_source_id
FROM dots.AASequence aas
JOIN dots.TranslatedAASequence tas ON aas.aa_sequence_id = tas.aa_sequence_id
JOIN dots.TranslatedAAFeature taf ON taf.aa_sequence_id = tas.aa_sequence_id
JOIN dots.Transcript t ON taf.na_feature_id = t.na_feature_id
JOIN dots.GeneFeature gf ON t.parent_id = gf.na_feature_id
WHERE aas.subclass_view = 'TranslatedAASequence'
AND aas.taxon_id = $taxonId
AND aas.taxon_id IN (
SELECT taxon_id
FROM apidb.organism
WHERE is_annotated_genome = 1
)
";

my $chrsForCalcsSql = "select ns.source_id from dots.nasequence ns, sres.ontologyterm ot where ot.name = 'chromosome' and ot.ontology_term_id = ns.sequence_ontology_id and ns.taxon_id = $taxonId";

$gusConfigFile = $ENV{GUS_HOME}."/config/gus.config";
die "Config file $gusConfigFile does not exist" unless -e $gusConfigFile;
Expand All @@ -59,19 +46,50 @@ my $db = GUS::ObjRelP::DbiDatabase-> new($gusConfig->{props}->{dbiDsn},

my $dbh = $db->getQueryHandle();

my $orthoMclStmt = $dbh->prepare($geneSourceSql);
$orthoMclStmt->execute();
my $proteinToGeneStmt = $dbh->prepare($proteinToGeneSql);
$proteinToGeneStmt->execute();

my %proteinToGene;
while (my @row = $proteinToGeneStmt->fetchrow_array()){
$proteinToGene{$row[0]} = $row[1];
}

open(GENE,">$geneSourceIdOrthologFile");
while (my @row = $orthoMclStmt->fetchrow_array()){
print GENE "$row[0]\t$row[1]\n";
my %proteinToGroup;
open(GROUPS, "<$orthoGroupFile") or die "Cannot open $orthoGroupFile: $!";
while (my $line = <GROUPS>) {
chomp $line;
my ($groupId, $proteinList) = split(/:\s*/, $line, 2);
next unless defined $proteinList;
foreach my $protein (split(/\s+/, $proteinList)) {
$proteinToGroup{$protein} = $groupId;
}
}
close GROUPS;

my @proteinsWithNoGroup;
open(GENE, ">$geneSourceIdOrthologFile") or die "Cannot open $geneSourceIdOrthologFile: $!";
while (my ($protein, $gene) = each %proteinToGene) {
my $group = $proteinToGroup{$protein};
unless ($group) {
(my $altProtein = $protein) =~ s/:/\_/g;
$group = $proteinToGroup{$altProtein};
}
if ($group) {
print GENE "$gene\t$group\n";
} else {
push @proteinsWithNoGroup, $protein;
}
}
close GENE;

if (@proteinsWithNoGroup) {
print STDERR "The following proteins have no group assignment in $orthoGroupFile:\n" . join("\n", @proteinsWithNoGroup) . "\n";
}

my $chrsForCalcs = $dbh->prepare($chrsForCalcsSql);
$chrsForCalcs->execute();

open(CHRS,">$chrsForCalcsFile");
open(CHRS, ">$chrsForCalcsFile") or die "Cannot open $chrsForCalcsFile: $!";
while (my @row = $chrsForCalcs->fetchrow_array()){
print CHRS "$row[0]\t\n";
}
Expand Down
39 changes: 39 additions & 0 deletions Main/lib/perl/WorkflowSteps/MakeDnaSeqLoadNextflowConfig.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package ApiCommonWorkflow::Main::WorkflowSteps::MakeDnaSeqLoadNextflowConfig;

@ISA = (ApiCommonWorkflow::Main::WorkflowSteps::WorkflowStep);

use strict;
use warnings;
use ApiCommonWorkflow::Main::WorkflowSteps::WorkflowStep;

sub run {
my ($self, $test, $undo) = @_;

my $indelDir = $self->getParamValue("indelDir");
my $extDbRlsSpec = $self->getParamValue("extDbRlsSpec");
my $genomeExtDbRlsSpec = $self->getParamValue("genomeExtDbRlsSpec");

my $configPath = $self->getWorkflowDataDir() . "/" . $self->getParamValue("nextflowConfigFile");

if ($undo) {
$self->runCmd(0, "rm -rf $configPath");
} else {
open(F, ">", $configPath) or die "$! :Can't open config file '$configPath' for writing";
print F
"
params {
indelDir = \"$indelDir\"
extDbRlsSpec = '\"$extDbRlsSpec\"'
genomeExtDbRlsSpec = '\"$genomeExtDbRlsSpec\"'
}

singularity {
enabled = true
autoMounts = true
}
";
close(F);
}
}

1;
76 changes: 76 additions & 0 deletions Main/lib/perl/WorkflowSteps/MakeDnaSeqNextflowConfig.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
package ApiCommonWorkflow::Main::WorkflowSteps::MakeDnaSeqNextflowConfig;

@ISA = (ApiCommonWorkflow::Main::WorkflowSteps::WorkflowStep);

use strict;
use warnings;
use ApiCommonWorkflow::Main::WorkflowSteps::WorkflowStep;

sub run {
my ($self, $test, $undo) = @_;

my $workingDirRelativePath = $self->getParamValue("workingDirRelativePath");

my $sampleSheetFile = $self->getParamValue("sampleSheetFile");
my $genomeFile = $self->getParamValue("genomeFile");
my $gtfFile = $self->getParamValue("gtfFile");
my $footprintFile = $self->getParamValue("footprintFile");
my $ploidy = $self->getParamValue("ploidy");
my $resultsDirectory = $self->getParamValue("resultsDirectory");
my $geneSourceIdOrthologFile = $self->getParamValue("geneSourceIdOrthologFile");
my $chrsForCalcFile = $self->getParamValue("chrsForCalcFile");

my $nextflowConfigFile = $self->getWorkflowDataDir() . "/" . $self->getParamValue("nextflowConfigFile");

# Translate local paths to cluster-side paths
my $digestedSampleSheet = $self->relativePathToNextflowClusterPath($workingDirRelativePath, $sampleSheetFile);
my $digestedGenomeFile = $self->relativePathToNextflowClusterPath($workingDirRelativePath, $genomeFile);
my $digestedGtfFile = $self->relativePathToNextflowClusterPath($workingDirRelativePath, $gtfFile);
my $digestedFootprintFile = $self->relativePathToNextflowClusterPath($workingDirRelativePath, $footprintFile);
my $digestedResultsDir = $self->relativePathToNextflowClusterPath($workingDirRelativePath, $resultsDirectory);
my $digestedOrthologFile = $self->relativePathToNextflowClusterPath($workingDirRelativePath, $geneSourceIdOrthologFile);
my $digestedChrsForCalcFile = $self->relativePathToNextflowClusterPath($workingDirRelativePath, $chrsForCalcFile);

# Workflow config values
my $minCoverage = $self->getConfig("minCoverage");
my $winLen = $self->getConfig("winLen");
my $bwaThreads = $self->getConfig("bwaThreads");

my $executor = $self->getClusterExecutor();
my $queue = $self->getClusterQueue();

if ($undo) {
$self->runCmd(0, "rm -rf $nextflowConfigFile");
} else {
open(F, ">", $nextflowConfigFile) or die "$! :Can't open config file '$nextflowConfigFile' for writing";
print F
"
params {
samplesheet = \"$digestedSampleSheet\"
bwaThreads = $bwaThreads
minCoverage = $minCoverage
genomeFastaFile = \"$digestedGenomeFile\"
gtfFile = \"$digestedGtfFile\"
footprintFile = \"$digestedFootprintFile\"
winLen = $winLen
ploidy = $ploidy
outputDir = \"$digestedResultsDir\"
geneSourceIdOrthologFile = \"$digestedOrthologFile\"
chrsForCalcFile = \"$digestedChrsForCalcFile\"
}

process {
executor = '$executor'
queue = '$queue'
}

singularity {
enabled = true
autoMounts = true
}
";
close(F);
}
}

1;
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,8 @@ sub run {
my $varscanPValue = $self->getConfig("varscanPValue");
my $varscanMinVarFreqSnp = $self->getConfig("varscanMinVarFreqSnp");
my $varscanMinVarFreqCons = $self->getConfig("varscanMinVarFreqCons");
my $maxNumberOfReads = $self->getConfig("maxNumberOfReads");
my $hisat2Index = $self->getConfig("hisat2Index");
my $createIndex = $self->getConfig("createIndex");
my $trimmomaticAdaptorsFile = $self->getConfig("trimmomaticAdaptorsFile");
my $ebiFtpUser = $self->getConfig("ebiFtpUser");
my $ebiFtpPassword = $self->getConfig("ebiFtpPassword");

Expand Down Expand Up @@ -93,11 +91,9 @@ params {
hisat2Index = $hisat2Index
createIndex = $createIndex
outputDir = \"$clusterResultDir\"
trimmomaticAdaptorsFile = $trimmomaticAdaptorsFile
varscanPValue = $varscanPValue
varscanMinVarFreqSnp = $varscanMinVarFreqSnp
varscanMinVarFreqCons = $varscanMinVarFreqCons
maxNumberOfReads = $maxNumberOfReads
taxonId = \"$taxonId\"
geneSourceIdOrthologFile = \"$geneSourceIdOrthologFile\"
chrsForCalcFile = \"$chrsForCalcFile\"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,12 @@ sub run {
my $organismAbbrev = $self->getParamValue('organismAbbrev');
my $geneSourceIdOrthologFile = join("/", $self->getWorkflowDataDir(), $self->getParamValue("geneSourceIdOrthologFile"));
my $chrsForCalcsFile = join("/", $self->getWorkflowDataDir(), $self->getParamValue('chrsForCalcsFile'));

my $gusConfigFile = $self->getParamValue('gusConfigFile');
$gusConfigFile = $self->getWorkflowDataDir() . "/$gusConfigFile";
my $fullOrthoGroupsFile = $self->getSharedConfig("fullOrthoGroupsFile");

my $taxonId = $self->getOrganismInfo($test, $organismAbbrev, $gusConfigFile)->getTaxonId();

if ($undo) {
$self->runCmd(0, "rm -f $geneSourceIdOrthologFile");
$self->runCmd(0, "rm -f $chrsForCalcsFile");
Expand All @@ -19,7 +24,7 @@ sub run {
$self->runCmd($test,"echo test > $geneSourceIdOrthologFile");
$self->runCmd($test,"echo test > $chrsForCalcsFile");
} else {
$self->runCmd($test,"runGeneCNVAndPloidyQuery --organismAbbrev $organismAbbrev --geneSourceIdOrthologFile $geneSourceIdOrthologFile --chrsForCalcsFile $chrsForCalcsFile");
$self->runCmd($test,"runGeneCNVAndPloidyQuery --taxonId $taxonId --geneSourceIdOrthologFile $geneSourceIdOrthologFile --chrsForCalcsFile $chrsForCalcsFile --orthoGroupFile $fullGroupsFile");
}
}
}
Expand Down
Loading