diff --git a/eva-accession-clustering/src/main/java/uk/ac/ebi/eva/accession/clustering/configuration/BeanNames.java b/eva-accession-clustering/src/main/java/uk/ac/ebi/eva/accession/clustering/configuration/BeanNames.java index 132d12b14..26c704e7e 100644 --- a/eva-accession-clustering/src/main/java/uk/ac/ebi/eva/accession/clustering/configuration/BeanNames.java +++ b/eva-accession-clustering/src/main/java/uk/ac/ebi/eva/accession/clustering/configuration/BeanNames.java @@ -82,6 +82,7 @@ public class BeanNames { public static final String PROCESS_REMAPPED_VARIANTS_WITH_RS_JOB = "PROCESS_REMAPPED_VARIANTS_WITH_RS_JOB"; public static final String CLUSTER_UNCLUSTERED_VARIANTS_JOB = "CLUSTER_UNCLUSTERED_VARIANTS_JOB"; + public static final String RESOLVE_MERGE_THEN_SPLIT_CANDIDATE_JOB = "RESOLVE_MERGE_THEN_SPLIT_CANDIDATE_JOB"; public static final String BACK_PROPAGATE_NEW_RS_JOB = "BACK_PROPAGATE_NEW_RS_JOB"; diff --git a/eva-accession-clustering/src/main/java/uk/ac/ebi/eva/accession/clustering/configuration/batch/jobs/ClusterUnclusteredVariantsJobConfiguration.java b/eva-accession-clustering/src/main/java/uk/ac/ebi/eva/accession/clustering/configuration/batch/jobs/ClusterUnclusteredVariantsJobConfiguration.java index 479c46378..df7c621c3 100644 --- a/eva-accession-clustering/src/main/java/uk/ac/ebi/eva/accession/clustering/configuration/batch/jobs/ClusterUnclusteredVariantsJobConfiguration.java +++ b/eva-accession-clustering/src/main/java/uk/ac/ebi/eva/accession/clustering/configuration/batch/jobs/ClusterUnclusteredVariantsJobConfiguration.java @@ -35,7 +35,7 @@ public class ClusterUnclusteredVariantsJobConfiguration { // Should be run after split or merge candidates have been processed in the step @see PROCESS_REMAPPED_VARIANTS_WITH_RS_JOB // In this step, proceed to cluster as-yet unclustered variants in a given assembly - // This job should NOT be run in parallel for multiple species due to EVA-2178 but the previous step can be run in parallel. + // This job should only be run in parallel across instances @Bean(CLUSTER_UNCLUSTERED_VARIANTS_JOB) public Job clusteringFromMongoJob( @Qualifier(PROCESS_RS_MERGE_CANDIDATES_STEP) Step processRSMergeCandidatesStep, diff --git a/eva-accession-clustering/src/main/java/uk/ac/ebi/eva/accession/clustering/configuration/batch/jobs/ResolveMergeThenSplitCandidatesJobConfiguration.java b/eva-accession-clustering/src/main/java/uk/ac/ebi/eva/accession/clustering/configuration/batch/jobs/ResolveMergeThenSplitCandidatesJobConfiguration.java new file mode 100644 index 000000000..5126ec07d --- /dev/null +++ b/eva-accession-clustering/src/main/java/uk/ac/ebi/eva/accession/clustering/configuration/batch/jobs/ResolveMergeThenSplitCandidatesJobConfiguration.java @@ -0,0 +1,50 @@ +/* + * Copyright 2020 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.accession.clustering.configuration.batch.jobs; + +import org.springframework.batch.core.Job; +import org.springframework.batch.core.Step; +import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing; +import org.springframework.batch.core.configuration.annotation.JobBuilderFactory; +import org.springframework.batch.core.launch.support.RunIdIncrementer; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +import static uk.ac.ebi.eva.accession.clustering.configuration.BeanNames.CLEAR_RS_MERGE_AND_SPLIT_CANDIDATES_STEP; +import static uk.ac.ebi.eva.accession.clustering.configuration.BeanNames.RESOLVE_MERGE_THEN_SPLIT_CANDIDATE_JOB; +import static uk.ac.ebi.eva.accession.clustering.configuration.BeanNames.PROCESS_RS_MERGE_CANDIDATES_STEP; +import static uk.ac.ebi.eva.accession.clustering.configuration.BeanNames.PROCESS_RS_SPLIT_CANDIDATES_STEP; + +@Configuration +@EnableBatchProcessing +public class ResolveMergeThenSplitCandidatesJobConfiguration { + // Should be run after split or merge candidates have been processed in the step @see PROCESS_REMAPPED_VARIANTS_WITH_RS_JOB + // This job should only be run in parallel across instances + @Bean(RESOLVE_MERGE_THEN_SPLIT_CANDIDATE_JOB) + public Job clusteringFromMongoJob( + @Qualifier(PROCESS_RS_MERGE_CANDIDATES_STEP) Step processRSMergeCandidatesStep, + @Qualifier(PROCESS_RS_SPLIT_CANDIDATES_STEP) Step processRSSplitCandidatesStep, + @Qualifier(CLEAR_RS_MERGE_AND_SPLIT_CANDIDATES_STEP) Step clearRSMergeAndSplitCandidatesStep, + JobBuilderFactory jobBuilderFactory) { + return jobBuilderFactory.get(RESOLVE_MERGE_THEN_SPLIT_CANDIDATE_JOB) + .incrementer(new RunIdIncrementer()) + .start(processRSMergeCandidatesStep) + .next(processRSSplitCandidatesStep) + .next(clearRSMergeAndSplitCandidatesStep) + .build(); + } +} diff --git a/eva-accession-clustering/src/test/java/uk/ac/ebi/eva/accession/clustering/runner/ClusteringCommandLineRunnerTest.java b/eva-accession-clustering/src/test/java/uk/ac/ebi/eva/accession/clustering/runner/ClusteringCommandLineRunnerTest.java index d749dceef..2d2603128 100644 --- a/eva-accession-clustering/src/test/java/uk/ac/ebi/eva/accession/clustering/runner/ClusteringCommandLineRunnerTest.java +++ b/eva-accession-clustering/src/test/java/uk/ac/ebi/eva/accession/clustering/runner/ClusteringCommandLineRunnerTest.java @@ -518,6 +518,15 @@ public void runClusterUnclusteredVariantsJobWithNoErrors() throws JobExecutionEx assertEquals(ClusteringCommandLineRunner.EXIT_WITHOUT_ERRORS, runner.getExitCode()); } + @Test + @UsingDataSet(locations = {"/test-data/clusteredVariantEntityForVcfJob.json"}) + @DirtiesContext + public void runResolveMergeThenSplitCandidatesJobWithNoErrors() throws JobExecutionException { + runner.setJobNames(RESOLVE_MERGE_THEN_SPLIT_CANDIDATE_JOB); + runner.run(); + assertEquals(ClusteringCommandLineRunner.EXIT_WITHOUT_ERRORS, runner.getExitCode()); + } + @Test @DirtiesContext /* diff --git a/eva-accession-clustering/src/test/java/uk/ac/ebi/eva/accession/clustering/test/configuration/BatchTestConfiguration.java b/eva-accession-clustering/src/test/java/uk/ac/ebi/eva/accession/clustering/test/configuration/BatchTestConfiguration.java index c6e71531a..d9c75a056 100644 --- a/eva-accession-clustering/src/test/java/uk/ac/ebi/eva/accession/clustering/test/configuration/BatchTestConfiguration.java +++ b/eva-accession-clustering/src/test/java/uk/ac/ebi/eva/accession/clustering/test/configuration/BatchTestConfiguration.java @@ -35,12 +35,7 @@ import uk.ac.ebi.eva.accession.clustering.configuration.batch.io.RSMergeAndSplitCandidatesReaderConfiguration; import uk.ac.ebi.eva.accession.clustering.configuration.batch.io.RSMergeAndSplitWriterConfiguration; import uk.ac.ebi.eva.accession.clustering.configuration.batch.io.VcfReaderConfiguration; -import uk.ac.ebi.eva.accession.clustering.configuration.batch.jobs.BackPropagateRSJobConfiguration; -import uk.ac.ebi.eva.accession.clustering.configuration.batch.jobs.ClusterUnclusteredVariantsJobConfiguration; -import uk.ac.ebi.eva.accession.clustering.configuration.batch.jobs.ClusteringFromMongoJobConfiguration; -import uk.ac.ebi.eva.accession.clustering.configuration.batch.jobs.ClusteringFromVcfJobConfiguration; -import uk.ac.ebi.eva.accession.clustering.configuration.batch.jobs.ProcessRemappedVariantsWithRSJobConfiguration; -import uk.ac.ebi.eva.accession.clustering.configuration.batch.jobs.StudyClusteringJobConfiguration; +import uk.ac.ebi.eva.accession.clustering.configuration.batch.jobs.*; import uk.ac.ebi.eva.accession.clustering.configuration.batch.jobs.qc.NewClusteredVariantsQCJobConfiguration; import uk.ac.ebi.eva.accession.clustering.configuration.batch.listeners.ListenersConfiguration; import uk.ac.ebi.eva.accession.clustering.configuration.batch.policies.ChunkSizeCompletionPolicyConfiguration; @@ -57,6 +52,7 @@ @EnableAutoConfiguration @Import({ClusteringFromVcfJobConfiguration.class, + ResolveMergeThenSplitCandidatesJobConfiguration.class, ClusteringFromMongoJobConfiguration.class, StudyClusteringJobConfiguration.class, NewClusteredVariantsQCJobConfiguration.class, diff --git a/eva-accession-clustering/src/test/resources/test-data/submittedVariantOperationEntityMongoReader.json b/eva-accession-clustering/src/test/resources/test-data/submittedVariantOperationEntityMongoReader.json new file mode 100644 index 000000000..cf24bc3a6 --- /dev/null +++ b/eva-accession-clustering/src/test/resources/test-data/submittedVariantOperationEntityMongoReader.json @@ -0,0 +1,69 @@ +{ + "submittedVariantOperationEntity": [ + { + "_id" : "RSMC_GCA_000000001.1_HASH", + "eventType" : "RS_MERGE_CANDIDATES", + "accession" : NumberLong("2222222222"), + "reason" : "RS mismatch with 1111111111", + "inactiveObjects" : [ + { + "seq" : "GCA_000000001.1", + "tax" : 3000, + "study" : "PRJEB00001", + "contig" : "CM000001.2", + "start" : NumberLong(3000), + "ref" : "C", + "alt" : "T", + "rs" : NumberLong("2222222222"), + "evidence" : true, + "asmMatch" : true, + "allelesMatch" : true, + "validated" : false, + "hashedMessage" : "45BD2A41D4C295DF0A85A54D6CB72C2BBABB0C7D", + "accession" : NumberLong(1000000000), + "version" : 1, + "createdDate" : ISODate("2022-04-26T08:33:26.562Z") + }, + { + "seq" : "GCA_000000001.1", + "tax" : 3000, + "study" : "PRJEB00002", + "contig" : "CM000001.2", + "start" : NumberLong(3000), + "ref" : "C", + "alt" : "T", + "rs" : NumberLong("2222222222"), + "remappedFrom" : "GCA_000003205.6", + "remappedDate" : ISODate("2021-08-12T02:53:48.966Z"), + "remappingId" : "E076D09482EF2504D3168AABB13BC94172A92C27", + "hashedMessage" : "C2C9770AE529AFD445CC15458CBA2C71F7A82DB8", + "accession" : NumberLong(1000000001), + "version" : 1, + "createdDate" : ISODate("2014-11-10T14:58:00Z") + }, + { + "seq" : "GCA_000000001.1", + "tax" : 3000, + "study" : "PRJEB38336", + "contig" : "CM000001.2", + "start" : NumberLong(3000), + "ref" : "C", + "alt" : "T", + "rs" : NumberLong("1111111111"), + "evidence" : true, + "asmMatch" : true, + "allelesMatch" : true, + "validated" : false, + "remappedFrom" : "GCA_000000000.1", + "remappedDate" : ISODate("2021-08-11T22:03:44.689Z"), + "remappingId" : "778BEEA595920286BA9CC9ECF13442A69F9DA73B", + "hashedMessage" : "581F717670A07D8D032E234212E0D74599943D44", + "accession" : NumberLong(1000000002), + "version" : 1, + "createdDate" : ISODate("2020-11-26T22:56:39.129Z") + } + ], + "createdDate" : ISODate("2023-05-07T04:52:59.110Z") + } + ] +}