gisaid/clade-counts #415
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: GISAID SARS-CoV-2 clade counts | |
on: | |
repository_dispatch: | |
types: | |
- clade-counts | |
- gisaid/clade-counts | |
workflow_dispatch: | |
inputs: | |
slack_channel: | |
description: 'Slack channel to push update alerts. Default channel is nextstrain-counts-updates.' | |
required: false | |
trial_name: | |
description: 'Short name for a trial run. WARNING: without this we will overwrite files in s3://nextstrain-data/files/workflows/forecasts-ncov/gisaid' | |
required: false | |
jobs: | |
gisaid_clade_counts: | |
runs-on: ubuntu-latest | |
env: | |
SLACK_TOKEN: ${{ secrets.SLACK_TOKEN }} | |
SLACK_CHANNELS: ${{ github.event.inputs.slack_channel || 'nextstrain-counts-updates' }} | |
defaults: | |
run: | |
# Login shell is required to include changes by conda init bash. | |
shell: bash -l -eo pipefail {0} | |
steps: | |
- uses: actions/checkout@v2 | |
- uses: conda-incubator/setup-miniconda@v2 | |
with: | |
python-version: "3.9" | |
miniforge-variant: Mambaforge | |
channels: conda-forge,bioconda | |
- name: setup | |
run: mamba install "pandas>=1.0.0" "tsv-utils" --freeze-installed | |
- name: download metadata | |
run: ./ingest/bin/download-from-s3 s3://nextstrain-ncov-private/metadata.tsv.gz metadata.tsv | |
env: | |
AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }} | |
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} | |
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | |
- name: subset metadata columns | |
run: | | |
tsv-select -H \ | |
-f strain,date,country,division,QC_overall_status,Nextstrain_clade,Nextclade_pango \ | |
metadata.tsv > metadata_pruned.tsv | |
mv metadata_pruned.tsv metadata.tsv | |
- name: summarize global clade counts | |
run: | | |
./ingest/bin/summarize-clade-sequence-counts \ | |
--metadata metadata.tsv \ | |
--clade-column Nextstrain_clade \ | |
--filter-columns QC_overall_status \ | |
--filter-query "QC_overall_status != 'bad'" \ | |
--output global_clade_counts.tsv | |
- name: summarize global lineage counts | |
run: | | |
./ingest/bin/summarize-clade-sequence-counts \ | |
--metadata metadata.tsv \ | |
--clade-column Nextclade_pango \ | |
--filter-columns QC_overall_status \ | |
--filter-query "QC_overall_status != 'bad'" \ | |
--output global_lineage_counts.tsv | |
- name: summarize us clade counts | |
run: | | |
./ingest/bin/summarize-clade-sequence-counts \ | |
--metadata metadata.tsv \ | |
--location-column division \ | |
--clade-column Nextstrain_clade \ | |
--filter-columns QC_overall_status country \ | |
--filter-query "QC_overall_status != 'bad' & country == 'USA'" \ | |
--output us_clade_counts.tsv | |
- name: summarize us lineage counts | |
run: | | |
./ingest/bin/summarize-clade-sequence-counts \ | |
--metadata metadata.tsv \ | |
--location-column division \ | |
--clade-column Nextclade_pango \ | |
--filter-columns QC_overall_status country \ | |
--filter-query "QC_overall_status != 'bad' & country == 'USA'" \ | |
--output us_lineage_counts.tsv | |
- name: upload clade counts | |
run: | | |
S3_DST=s3://nextstrain-data/files/workflows/forecasts-ncov/gisaid | |
CLOUDFRONT_DOMAIN="data.nextstrain.org" | |
if [[ "$TRIAL_NAME" ]]; then | |
S3_DST+=/trial/"$TRIAL_NAME" | |
fi | |
./ingest/bin/upload-to-s3 global_clade_counts.tsv "$S3_DST"/nextstrain_clades/global.tsv.gz "$CLOUDFRONT_DOMAIN" | |
./ingest/bin/upload-to-s3 global_lineage_counts.tsv "$S3_DST"/pango_lineages/global.tsv.gz "$CLOUDFRONT_DOMAIN" | |
./ingest/bin/upload-to-s3 us_clade_counts.tsv "$S3_DST"/nextstrain_clades/usa.tsv.gz "$CLOUDFRONT_DOMAIN" | |
./ingest/bin/upload-to-s3 us_lineage_counts.tsv "$S3_DST"/pango_lineages/usa.tsv.gz "$CLOUDFRONT_DOMAIN" | |
env: | |
AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }} | |
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} | |
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | |
TRIAL_NAME: ${{ github.event.inputs.trial_name }} | |
- name: notify_pipeline_failed | |
if: ${{ failure() }} | |
run: ./ingest/bin/notify-on-job-fail | |
trigger_model_runs: | |
needs: [gisaid_clade_counts] | |
if: ${{ !github.event.inputs.trial_name }} | |
runs-on: ubuntu-latest | |
steps: | |
- run: gh workflow run run-models.yaml --repo nextstrain/forecasts-ncov -f data_provenance=gisaid | |
env: | |
GITHUB_TOKEN: ${{ secrets.GH_TOKEN_NEXTSTRAIN_BOT_WORKFLOW_DISPATCH }} |