Skip to content

Commit

Permalink
Change properties file generating to new accessioning/clustering (#56)
Browse files Browse the repository at this point in the history
* Remove instance id from the accessioning/clustering props and add
dummy recovery.cutoff.days
Co-authored-by: nitin-ebi <79518737+nitin-ebi@users.noreply.github.com>
  • Loading branch information
tcezard authored May 15, 2024
1 parent a8ba6d1 commit 86cee88
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 16 deletions.
20 changes: 10 additions & 10 deletions ebi_eva_internal_pyutils/spring_properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,10 +117,9 @@ def _common_accessioning_properties(self, assembly_accession, read_preference, c
merge = {**self._common_properties(read_preference=read_preference, chunk_size=chunk_size), **props}
return merge

def _common_accessioning_clustering_properties(self, *, instance='', assembly_accession, read_preference, chunk_size):
def _common_accessioning_clustering_properties(self, *, assembly_accession, read_preference, chunk_size):
"""Properties common to accessioning and clustering pipelines."""
props = {
'accessioning.instanceId': self._format_str('instance-{0}', instance),
'accessioning.submitted.categoryId': 'ss',
'accessioning.clustered.categoryId': 'rs',
'accessioning.monotonic.ss.blockSize': 100000,
Expand All @@ -129,16 +128,18 @@ def _common_accessioning_clustering_properties(self, *, instance='', assembly_ac
'accessioning.monotonic.rs.blockSize': 100000,
'accessioning.monotonic.rs.blockStartValue': 3000000000,
'accessioning.monotonic.rs.nextBlockInterval': 1000000000,
# This value is not used but is required to create beans in Java
'recovery.cutoff.days': 9999999
}
merge = {**self._common_accessioning_properties(assembly_accession, read_preference, chunk_size), **props}
return merge

def get_accessioning_properties(self, *, instance=None, target_assembly=None, fasta=None, assembly_report=None,
def get_accessioning_properties(self, *, target_assembly=None, fasta=None, assembly_report=None,
project_accession=None, aggregation='BASIC', taxonomy_accession=None,
vcf_file='', output_vcf='', chunk_size=100):
"""Properties for accessioning pipeline."""
return self._format(
self._common_accessioning_clustering_properties(instance=instance, assembly_accession=target_assembly,
self._common_accessioning_clustering_properties(assembly_accession=target_assembly,
read_preference='secondaryPreferred', chunk_size=chunk_size),
{
'spring.batch.job.names': 'CREATE_SUBSNP_ACCESSION_JOB',
Expand All @@ -154,21 +155,20 @@ def get_accessioning_properties(self, *, instance=None, target_assembly=None, fa
},
)

def get_clustering_properties(self, *, instance=None, read_preference='primary',
job_name=None, source_assembly='', target_assembly='', rs_report_path='', projects='',
def get_clustering_properties(self, *, read_preference='primary', job_name=None, source_assembly='',
target_assembly='', rs_report_path='', projects='',
project_accession='', vcf=''):
"""Properties common to all clustering pipelines, though not all are always used."""
return self._format(
self._common_accessioning_clustering_properties(instance=instance, assembly_accession=target_assembly,
read_preference=read_preference, chunk_size=100,
),
self._common_accessioning_clustering_properties(assembly_accession=target_assembly,
read_preference=read_preference, chunk_size=100),
{
'spring.batch.job.names': job_name,
'parameters.remappedFrom': source_assembly,
'parameters.projects': projects,
'parameters.projectAccession': project_accession,
'parameters.vcf': vcf,
'parameters.rsReportPath': rs_report_path
'parameters.rsReportPath': rs_report_path,
}
)

Expand Down
15 changes: 9 additions & 6 deletions tests/internal/test_spring_prop.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,6 @@ def test_get_clustering_properties(self):
parameters.vcf=
parameters.rsReportPath=/path/to/rs_report.txt
accessioning.instanceId=instance-1
accessioning.submitted.categoryId=ss
accessioning.clustered.categoryId=rs
accessioning.monotonic.ss.blockSize=100000
Expand All @@ -132,9 +131,11 @@ def test_get_clustering_properties(self):
accessioning.monotonic.rs.blockSize=100000
accessioning.monotonic.rs.blockStartValue=3000000000
accessioning.monotonic.rs.nextBlockInterval=1000000000
recovery.cutoff.days=9999999
'''
assert self.prop.get_clustering_properties(
instance=1, job_name='CLUSTERING_RSID', target_assembly='GCA_00000002.1',
job_name='CLUSTERING_RSID', target_assembly='GCA_00000002.1',
rs_report_path='/path/to/rs_report.txt') == expected

def test_get_accessioning_properties(self):
Expand Down Expand Up @@ -175,7 +176,6 @@ def test_get_accessioning_properties(self):
parameters.vcf=/path/to/vcf_file.vcf
parameters.outputVcf=
accessioning.instanceId=instance-1
accessioning.submitted.categoryId=ss
accessioning.clustered.categoryId=rs
accessioning.monotonic.ss.blockSize=100000
Expand All @@ -184,9 +184,11 @@ def test_get_accessioning_properties(self):
accessioning.monotonic.rs.blockSize=100000
accessioning.monotonic.rs.blockStartValue=3000000000
accessioning.monotonic.rs.nextBlockInterval=1000000000
recovery.cutoff.days=9999999
'''
assert self.prop.get_accessioning_properties(
instance=1, target_assembly='GCA_00000001.1', fasta='/path/to/fasta.fa',
target_assembly='GCA_00000001.1', fasta='/path/to/fasta.fa',
assembly_report='/path/to/assembly_report.txt', project_accession='PRJEB0001', aggregation='BASIC',
taxonomy_accession='9906', vcf_file='/path/to/vcf_file.vcf') == expected

Expand Down Expand Up @@ -405,7 +407,6 @@ def test_get_accessioning_properties_with_none(self):
parameters.vcf=/path/to/vcf_file.vcf
parameters.outputVcf=
accessioning.instanceId=instance-1
accessioning.submitted.categoryId=ss
accessioning.clustered.categoryId=rs
accessioning.monotonic.ss.blockSize=100000
Expand All @@ -414,9 +415,11 @@ def test_get_accessioning_properties_with_none(self):
accessioning.monotonic.rs.blockSize=100000
accessioning.monotonic.rs.blockStartValue=3000000000
accessioning.monotonic.rs.nextBlockInterval=1000000000
recovery.cutoff.days=9999999
'''
assert self.prop.get_accessioning_properties(
instance=1, target_assembly='GCA_00000001.1', fasta=None,
target_assembly='GCA_00000001.1', fasta=None,
assembly_report=None, project_accession='PRJEB0001', aggregation='BASIC',
taxonomy_accession='9906', vcf_file='/path/to/vcf_file.vcf') == expected

Expand Down

0 comments on commit 86cee88

Please sign in to comment.