Skip to content

Commit

Permalink
Merge pull request #429 from tcezard/EVA3496_merge_split_only_job
Browse files Browse the repository at this point in the history
EVA-3496 - Add new job for running only the MERGE and SPLIT
  • Loading branch information
tcezard authored Feb 9, 2024
2 parents e31c574 + a2d2182 commit 4943fe3
Show file tree
Hide file tree
Showing 6 changed files with 132 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ public class BeanNames {
public static final String PROCESS_REMAPPED_VARIANTS_WITH_RS_JOB = "PROCESS_REMAPPED_VARIANTS_WITH_RS_JOB";

public static final String CLUSTER_UNCLUSTERED_VARIANTS_JOB = "CLUSTER_UNCLUSTERED_VARIANTS_JOB";
public static final String RESOLVE_MERGE_THEN_SPLIT_CANDIDATE_JOB = "RESOLVE_MERGE_THEN_SPLIT_CANDIDATE_JOB";

public static final String BACK_PROPAGATE_NEW_RS_JOB = "BACK_PROPAGATE_NEW_RS_JOB";

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
public class ClusterUnclusteredVariantsJobConfiguration {
// Should be run after split or merge candidates have been processed in the step @see PROCESS_REMAPPED_VARIANTS_WITH_RS_JOB
// In this step, proceed to cluster as-yet unclustered variants in a given assembly
// This job should NOT be run in parallel for multiple species due to EVA-2178 but the previous step can be run in parallel.
// This job should only be run in parallel across instances
@Bean(CLUSTER_UNCLUSTERED_VARIANTS_JOB)
public Job clusteringFromMongoJob(
@Qualifier(PROCESS_RS_MERGE_CANDIDATES_STEP) Step processRSMergeCandidatesStep,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/*
* Copyright 2020 EMBL - European Bioinformatics Institute
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package uk.ac.ebi.eva.accession.clustering.configuration.batch.jobs;

import org.springframework.batch.core.Job;
import org.springframework.batch.core.Step;
import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing;
import org.springframework.batch.core.configuration.annotation.JobBuilderFactory;
import org.springframework.batch.core.launch.support.RunIdIncrementer;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;

import static uk.ac.ebi.eva.accession.clustering.configuration.BeanNames.CLEAR_RS_MERGE_AND_SPLIT_CANDIDATES_STEP;
import static uk.ac.ebi.eva.accession.clustering.configuration.BeanNames.RESOLVE_MERGE_THEN_SPLIT_CANDIDATE_JOB;
import static uk.ac.ebi.eva.accession.clustering.configuration.BeanNames.PROCESS_RS_MERGE_CANDIDATES_STEP;
import static uk.ac.ebi.eva.accession.clustering.configuration.BeanNames.PROCESS_RS_SPLIT_CANDIDATES_STEP;

@Configuration
@EnableBatchProcessing
public class ResolveMergeThenSplitCandidatesJobConfiguration {
// Should be run after split or merge candidates have been processed in the step @see PROCESS_REMAPPED_VARIANTS_WITH_RS_JOB
// This job should only be run in parallel across instances
@Bean(RESOLVE_MERGE_THEN_SPLIT_CANDIDATE_JOB)
public Job clusteringFromMongoJob(
@Qualifier(PROCESS_RS_MERGE_CANDIDATES_STEP) Step processRSMergeCandidatesStep,
@Qualifier(PROCESS_RS_SPLIT_CANDIDATES_STEP) Step processRSSplitCandidatesStep,
@Qualifier(CLEAR_RS_MERGE_AND_SPLIT_CANDIDATES_STEP) Step clearRSMergeAndSplitCandidatesStep,
JobBuilderFactory jobBuilderFactory) {
return jobBuilderFactory.get(RESOLVE_MERGE_THEN_SPLIT_CANDIDATE_JOB)
.incrementer(new RunIdIncrementer())
.start(processRSMergeCandidatesStep)
.next(processRSSplitCandidatesStep)
.next(clearRSMergeAndSplitCandidatesStep)
.build();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -518,6 +518,15 @@ public void runClusterUnclusteredVariantsJobWithNoErrors() throws JobExecutionEx
assertEquals(ClusteringCommandLineRunner.EXIT_WITHOUT_ERRORS, runner.getExitCode());
}

@Test
@UsingDataSet(locations = {"/test-data/clusteredVariantEntityForVcfJob.json"})
@DirtiesContext
public void runResolveMergeThenSplitCandidatesJobWithNoErrors() throws JobExecutionException {
runner.setJobNames(RESOLVE_MERGE_THEN_SPLIT_CANDIDATE_JOB);
runner.run();
assertEquals(ClusteringCommandLineRunner.EXIT_WITHOUT_ERRORS, runner.getExitCode());
}

@Test
@DirtiesContext
/*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,7 @@
import uk.ac.ebi.eva.accession.clustering.configuration.batch.io.RSMergeAndSplitCandidatesReaderConfiguration;
import uk.ac.ebi.eva.accession.clustering.configuration.batch.io.RSMergeAndSplitWriterConfiguration;
import uk.ac.ebi.eva.accession.clustering.configuration.batch.io.VcfReaderConfiguration;
import uk.ac.ebi.eva.accession.clustering.configuration.batch.jobs.BackPropagateRSJobConfiguration;
import uk.ac.ebi.eva.accession.clustering.configuration.batch.jobs.ClusterUnclusteredVariantsJobConfiguration;
import uk.ac.ebi.eva.accession.clustering.configuration.batch.jobs.ClusteringFromMongoJobConfiguration;
import uk.ac.ebi.eva.accession.clustering.configuration.batch.jobs.ClusteringFromVcfJobConfiguration;
import uk.ac.ebi.eva.accession.clustering.configuration.batch.jobs.ProcessRemappedVariantsWithRSJobConfiguration;
import uk.ac.ebi.eva.accession.clustering.configuration.batch.jobs.StudyClusteringJobConfiguration;
import uk.ac.ebi.eva.accession.clustering.configuration.batch.jobs.*;
import uk.ac.ebi.eva.accession.clustering.configuration.batch.jobs.qc.NewClusteredVariantsQCJobConfiguration;
import uk.ac.ebi.eva.accession.clustering.configuration.batch.listeners.ListenersConfiguration;
import uk.ac.ebi.eva.accession.clustering.configuration.batch.policies.ChunkSizeCompletionPolicyConfiguration;
Expand All @@ -57,6 +52,7 @@

@EnableAutoConfiguration
@Import({ClusteringFromVcfJobConfiguration.class,
ResolveMergeThenSplitCandidatesJobConfiguration.class,
ClusteringFromMongoJobConfiguration.class,
StudyClusteringJobConfiguration.class,
NewClusteredVariantsQCJobConfiguration.class,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
{
"submittedVariantOperationEntity": [
{
"_id" : "RSMC_GCA_000000001.1_HASH",
"eventType" : "RS_MERGE_CANDIDATES",
"accession" : NumberLong("2222222222"),
"reason" : "RS mismatch with 1111111111",
"inactiveObjects" : [
{
"seq" : "GCA_000000001.1",
"tax" : 3000,
"study" : "PRJEB00001",
"contig" : "CM000001.2",
"start" : NumberLong(3000),
"ref" : "C",
"alt" : "T",
"rs" : NumberLong("2222222222"),
"evidence" : true,
"asmMatch" : true,
"allelesMatch" : true,
"validated" : false,
"hashedMessage" : "45BD2A41D4C295DF0A85A54D6CB72C2BBABB0C7D",
"accession" : NumberLong(1000000000),
"version" : 1,
"createdDate" : ISODate("2022-04-26T08:33:26.562Z")
},
{
"seq" : "GCA_000000001.1",
"tax" : 3000,
"study" : "PRJEB00002",
"contig" : "CM000001.2",
"start" : NumberLong(3000),
"ref" : "C",
"alt" : "T",
"rs" : NumberLong("2222222222"),
"remappedFrom" : "GCA_000003205.6",
"remappedDate" : ISODate("2021-08-12T02:53:48.966Z"),
"remappingId" : "E076D09482EF2504D3168AABB13BC94172A92C27",
"hashedMessage" : "C2C9770AE529AFD445CC15458CBA2C71F7A82DB8",
"accession" : NumberLong(1000000001),
"version" : 1,
"createdDate" : ISODate("2014-11-10T14:58:00Z")
},
{
"seq" : "GCA_000000001.1",
"tax" : 3000,
"study" : "PRJEB38336",
"contig" : "CM000001.2",
"start" : NumberLong(3000),
"ref" : "C",
"alt" : "T",
"rs" : NumberLong("1111111111"),
"evidence" : true,
"asmMatch" : true,
"allelesMatch" : true,
"validated" : false,
"remappedFrom" : "GCA_000000000.1",
"remappedDate" : ISODate("2021-08-11T22:03:44.689Z"),
"remappingId" : "778BEEA595920286BA9CC9ECF13442A69F9DA73B",
"hashedMessage" : "581F717670A07D8D032E234212E0D74599943D44",
"accession" : NumberLong(1000000002),
"version" : 1,
"createdDate" : ISODate("2020-11-26T22:56:39.129Z")
}
],
"createdDate" : ISODate("2023-05-07T04:52:59.110Z")
}
]
}

0 comments on commit 4943fe3

Please sign in to comment.