Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

EVA-3494 Ingest assembly in batches #125

Merged
merged 13 commits into from
Feb 12, 2024
Merged
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,15 @@
import org.springframework.boot.web.servlet.support.SpringBootServletInitializer;
import org.springframework.hateoas.config.EnableHypermediaSupport;
import org.springframework.retry.annotation.EnableRetry;
import org.springframework.scheduling.annotation.EnableAsync;
import org.springframework.scheduling.annotation.EnableScheduling;
import org.springframework.transaction.annotation.EnableTransactionManagement;

@EnableScheduling
@EnableAsync
@SpringBootApplication
@EnableRetry
@EnableTransactionManagement
@EnableHypermediaSupport(type = EnableHypermediaSupport.HypermediaType.HAL)
public class ContigAliasApplication extends SpringBootServletInitializer {

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package uk.ac.ebi.eva.contigalias.conf;

import org.springframework.beans.BeansException;
import org.springframework.context.ApplicationContext;
import org.springframework.context.ApplicationContextAware;
import org.springframework.stereotype.Component;

@Component
public class ApplicationContextHolder implements ApplicationContextAware {

private static ApplicationContext applicationContext;

@Override
public void setApplicationContext(ApplicationContext applicationContext) throws BeansException {
ApplicationContextHolder.applicationContext = applicationContext;
}

public static ApplicationContext getApplicationContext() {
return applicationContext;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,13 @@
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;
import uk.ac.ebi.eva.contigalias.exception.AssemblyNotFoundException;
import uk.ac.ebi.eva.contigalias.entities.AssemblyEntity;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.Optional;

@RequestMapping("/v1/admin")
@RestController
Expand Down Expand Up @@ -63,6 +63,9 @@ public ResponseEntity<?> fetchAndInsertAssemblyByAccession(
"GCA_000001405.10") String asmAccession) throws IOException {
try {
handler.fetchAndInsertAssemblyByAccession(asmAccession);
// submit jobs for updating ena sequence name and md5 checksum for assembly
handler.retrieveAndInsertENASequenceNameForAssembly(asmAccession);
handler.retrieveAndInsertMd5ChecksumForAssembly(asmAccession);
} catch (IllegalArgumentException e) {
return new ResponseEntity<>(e.getMessage(), HttpStatus.BAD_REQUEST);
}
Expand All @@ -82,43 +85,124 @@ public ResponseEntity<?> fetchAndInsertAssemblyByAccession(
"parallel manner.")
@PutMapping(value = "assemblies")
public ResponseEntity<?> fetchAndInsertAssemblyByAccession(
@RequestBody(required = false) @ApiParam(value = "A JSON array of INSDC or RefSeq assembly accessions. " +
@RequestBody @ApiParam(value = "A JSON array of INSDC or RefSeq assembly accessions. " +
"Eg: [\"GCA_000001405.10\",\"GCA_000001405.11\",\"GCA_000001405.12\"]") List<String> accessions) {
if (accessions == null || accessions.size() <= 0) {
return new ResponseEntity<>(HttpStatus.BAD_REQUEST);
}
Map<String, List<String>> accessionResult = handler.fetchAndInsertAssemblyByAccession(accessions);
// submit jobs for updating ena sequence names and md5 checksum for all successfully inserted assemblies
if (accessionResult.get("SUCCESS").size() > 0) {
handler.retrieveAndInsertENASequenceNameForAssembly(accessionResult.get("SUCCESS"));
handler.retrieveAndInsertMd5ChecksumForAssembly(accessionResult.get("SUCCESS"));
}
return new ResponseEntity<>("Accession Processing Result : " + accessionResult, HttpStatus.MULTI_STATUS);
}

@ApiOperation(value = "Given an assembly accession, retrieve MD5 checksum for all chromosomes belonging to assembly and update")
@PutMapping(value = "assemblies/{accession}/md5checksum")
@PutMapping(value = "assemblies/md5checksum/{accession}")
public ResponseEntity<String> retrieveAndInsertMd5ChecksumForAssembly(@PathVariable(name = "accession")
@ApiParam(value = "INSDC or RefSeq assembly accession. Eg: " +
"GCA_000001405.10") String asmAccession) {
try {
handler.getAssemblyByAccession(asmAccession);
handler.retrieveAndInsertMd5ChecksumForAssembly(asmAccession);
return ResponseEntity.ok("A task has been submitted for updating md5checksum for all chromosomes " +
"in assembly " + asmAccession + ". Depending upon the number of chromosomes present in assembly, " +
"this might take some time to complete");
} catch (AssemblyNotFoundException e) {
Optional<AssemblyEntity> assemblyOpt = handler.getAssemblyByAccession(asmAccession);
if (assemblyOpt.isPresent()) {
handler.retrieveAndInsertMd5ChecksumForAssembly(assemblyOpt.get().getInsdcAccession());
return ResponseEntity.ok("A task has been submitted for updating md5checksum for assembly " + asmAccession
+ "\nDepending upon the size of assembly and other scheduled jobs, this might take some time to complete");
} else {
return ResponseEntity.ok("Could not find assembly " + asmAccession +
". Please insert the assembly first (md5checksum will be updated as part of the insertion process");
". Please insert the assembly first. MD5 checksum will be updated as part of the insertion process");
}
}

@ApiOperation(value = "Given a list of assembly accessions, retrieve MD5 checksum for all chromosomes belonging to all the assemblies and update")
@PutMapping(value = "assemblies/md5checksum")
public ResponseEntity<String> retrieveAndInsertMd5ChecksumForAssembly(
@RequestBody @ApiParam(value = "A JSON array of INSDC or RefSeq assembly accessions. " +
"Eg: [\"GCA_000001405.10\",\"GCA_000001405.11\",\"GCA_000001405.12\"]") List<String> accessions) {
if (accessions == null || accessions.size() <= 0) {
return new ResponseEntity<>(HttpStatus.BAD_REQUEST);
}

List<String> asmInsdcAccessionsList = new ArrayList<>();
List<String> asmNotPresent = new ArrayList<>();
for (String accession : accessions) {
Optional<AssemblyEntity> assemblyOpt = handler.getAssemblyByAccession(accession);
if (assemblyOpt.isPresent()) {
asmInsdcAccessionsList.add(assemblyOpt.get().getInsdcAccession());
} else {
asmNotPresent.add(accession);
}
}

handler.retrieveAndInsertMd5ChecksumForAssembly(asmInsdcAccessionsList);

accessions.removeAll(asmNotPresent);
String responseText = "A task has been submitted for updating MD5 checksum for assemblies: " + accessions + "."
+ "\nDepending upon other scheduled jobs and the size of assembly, this might take some time to complete";
if (!asmNotPresent.isEmpty()) {
responseText = responseText + "\nThe following assemblies are not present: " + asmNotPresent + "."
+ "\nPlease insert the assembly first, MD5 Checksum will be updated as part of the insertion process";
}

return ResponseEntity.ok(responseText);
}

@ApiOperation(value = "Given an assembly accession, retrieve ENA sequence name for all chromosomes belonging to assembly and update")
@PutMapping(value = "assemblies/ena-sequence-name/{accession}")
public ResponseEntity<String> retrieveAndInsertENASequenceNameForAssembly(@PathVariable(name = "accession")
@ApiParam(value = "INSDC or RefSeq assembly accession. " +
"Eg: GCA_000001405.10") String asmAccession) {
Optional<AssemblyEntity> assemblyOpt = handler.getAssemblyByAccession(asmAccession);
if (assemblyOpt.isPresent()) {
handler.retrieveAndInsertENASequenceNameForAssembly(assemblyOpt.get().getInsdcAccession());
return ResponseEntity.ok("A task has been submitted for updating ENA Sequence Name for assembly " + asmAccession
+ "\nDepending upon the size of assembly and other scheduled jobs, this might take some time to complete");
} else {
return ResponseEntity.ok("Could not find assembly " + asmAccession +
". Please insert the assembly first. ENA sequence name will be updated as part of the insertion process");
}
}

@ApiOperation(value = "Given a list of assembly accessions, retrieve ENA sequence name for all chromosomes belonging to all the assemblies and update")
@PutMapping(value = "assemblies/ena-sequence-name")
public ResponseEntity<String> retrieveAndInsertENASequenceNameForAssembly(
@RequestBody @ApiParam(value = "A JSON array of INSDC or RefSeq assembly accessions. " +
"Eg: [\"GCA_000001405.10\",\"GCA_000001405.11\",\"GCA_000001405.12\"]") List<String> accessions) {
if (accessions == null || accessions.size() <= 0) {
return new ResponseEntity<>(HttpStatus.BAD_REQUEST);
}

List<String> asmInsdcAccessionsList = new ArrayList<>();
List<String> asmNotPresent = new ArrayList<>();
for (String accession : accessions) {
Optional<AssemblyEntity> assemblyOpt = handler.getAssemblyByAccession(accession);
if (assemblyOpt.isPresent()) {
asmInsdcAccessionsList.add(assemblyOpt.get().getInsdcAccession());
} else {
asmNotPresent.add(accession);
}
}

handler.retrieveAndInsertENASequenceNameForAssembly(asmInsdcAccessionsList);

accessions.removeAll(asmNotPresent);
String responseText = "A task has been submitted for updating ENA Sequence Name for assemblies: " + accessions
+ "\nDepending upon other scheduled jobs and the size of assembly, this might take some time to complete";
if (!asmNotPresent.isEmpty()) {
responseText = responseText + "\nThe following assemblies are not present: " + asmNotPresent + "."
+ "\nPlease insert the assembly first, ENA Sequence Name will be updated as part of the insertion process";
}

return ResponseEntity.ok(responseText);
}


@ApiOperation(value = "Retrieve list of assemblies for which MD5 Checksum updates are running/going-to-run ")
nitin-ebi marked this conversation as resolved.
Show resolved Hide resolved
@GetMapping(value = "assemblies/md5checksum/status")
public ResponseEntity<String> getMD5ChecksumUpdateTaskStatus() {
Map<String, Set<String>> md5ChecksumUpdateTasks = handler.getMD5ChecksumUpdateTaskStatus();
Set<String> runningTasks = md5ChecksumUpdateTasks.get("running");
Set<String> scheduledTasks = md5ChecksumUpdateTasks.get("scheduled");
String runningTaskRes = runningTasks == null || runningTasks.isEmpty() ? "No running MD5 checksum update tasks" :
runningTasks.stream().collect(Collectors.joining(","));
String scheduledTaskRes = scheduledTasks == null || scheduledTasks.isEmpty() ? "No scheduled MD5 checksum update tasks" :
scheduledTasks.stream().collect(Collectors.joining(","));
return ResponseEntity.ok("running: " + runningTaskRes + "\nscheduled: " + scheduledTaskRes);
@GetMapping(value = "assemblies/scheduled-jobs")
public ResponseEntity<List<String>> getMD5ChecksumUpdateTaskStatus() {
List<String> scheduledJobStatus = handler.getScheduledJobStatus();
return ResponseEntity.ok(scheduledJobStatus);
}

// This endpoint can be enabled in the future when checksums for assemblies are added to the project.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,13 @@
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.web.PagedResourcesAssembler;
import org.springframework.stereotype.Service;

import uk.ac.ebi.eva.contigalias.entities.AssemblyEntity;
import uk.ac.ebi.eva.contigalias.service.AssemblyService;
import uk.ac.ebi.eva.contigalias.service.ChromosomeService;

import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;

@Service
public class AdminHandler {
Expand All @@ -52,7 +49,7 @@ public Optional<AssemblyEntity> getAssemblyByAccession(String accession) {
return assemblyService.getAssemblyByAccession(accession);
}

public void fetchAndInsertAssemblyByAccession(String accession) throws IOException {
public void fetchAndInsertAssemblyByAccession(String accession) {
assemblyService.fetchAndInsertAssembly(accession);
}

Expand All @@ -64,8 +61,20 @@ public void retrieveAndInsertMd5ChecksumForAssembly(String accession) {
assemblyService.retrieveAndInsertMd5ChecksumForAssembly(accession);
}

public Map<String, Set<String>> getMD5ChecksumUpdateTaskStatus() {
return assemblyService.getMD5ChecksumUpdateTaskStatus();
public void retrieveAndInsertMd5ChecksumForAssembly(List<String> accessions) {
assemblyService.retrieveAndInsertMd5ChecksumForAssembly(accessions);
}

public void retrieveAndInsertENASequenceNameForAssembly(String accession) {
assemblyService.retrieveAndInsertENASequenceNameForAssembly(accession);
}

public void retrieveAndInsertENASequenceNameForAssembly(List<String> accessions) {
assemblyService.retrieveAndInsertENASequenceNameForAssembly(accessions);
}

public List<String> getScheduledJobStatus() {
return assemblyService.getScheduledJobStatus();
}

public void deleteAssemblyByAccession(String accession) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.domain.Page;
import org.springframework.data.domain.PageImpl;
import org.springframework.data.domain.PageRequest;
import org.springframework.data.domain.Pageable;
import org.springframework.data.web.PagedResourcesAssembler;
import org.springframework.hateoas.EntityModel;
Expand Down Expand Up @@ -83,6 +82,7 @@ public PagedModel<EntityModel<AssemblyEntity>> getAssemblyByRefseq(String refseq

public PagedModel<EntityModel<AssemblyEntity>> getAssembliesByTaxid(long taxid, Pageable request) {
Page<AssemblyEntity> page = assemblyService.getAssembliesByTaxid(taxid, request);
page.forEach(it->it.setChromosomes(null));
return generatePagedModelFromPage(page, assemblyAssembler);
}

Expand Down

This file was deleted.

Loading
Loading