Skip to content

Commit

Permalink
Merge pull request #569 from RoanKanninga/master
Browse files Browse the repository at this point in the history
putting rawdata logs in the per rawdata folder
  • Loading branch information
Gerbenvandervries authored Jul 8, 2016
2 parents 76625d9 + 853cf95 commit 38ca862
Show file tree
Hide file tree
Showing 5 changed files with 57 additions and 94 deletions.
19 changes: 8 additions & 11 deletions compute5/NGS_Automated/copyProjectDataToPrm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ echo "Logfiles will be written to $LOGDIR"
for line in ${ARR[@]}
do
projectName=${line}
LOGGER=${LOGDIR}/${projectName}.copyProjectDataToPrm.logger
LOGGER=${LOGDIR}/${projectName}/${projectName}.copyProjectDataToPrm.logger

FINISHED="no"

Expand All @@ -49,7 +49,7 @@ do
makeProjectDataDir=$(ssh ${groupname}-dm@calculon.hpc.rug.nl "sh ${PROJECTSDIRPRM}/checkProjectData.sh ${PROJECTSDIRPRM} ${projectName}")

copyProjectDataDiagnosticsClusterToPrm="${PROJECTSDIR}/${projectName}/* ${groupname}-dm@calculon.hpc.rug.nl:${PROJECTSDIRPRM}/${projectName}"
if [[ -f $LOGDIR/${projectName}/${projectName}.pipeline.finished && ! -f $LOGDIR/${projectName}/${projectName}.projectDataCopiedToPrm ]]
if [[ -d $LOGDIR/${projectName}/ && ! -f $LOGDIR/${projectName}/${projectName}.projectDataCopiedToPrm ]]
then
countFilesProjectDataDirTmp=$(ls -R ${PROJECTSDIR}/${projectName}/*/results/ | wc -l)
module load hashdeep/4.4-foss-2015b
Expand All @@ -76,19 +76,16 @@ do
then
echo "md5sum check failed, the copying will start again" >> ${LOGGER}
rsync -r -av --exclude rawdata/ ${copyProjectDataDiagnosticsClusterToPrm} >> $LOGGER 2>&1
echo "copy failed" >> $LOGDIR/${projectName}.copyProjectDataToPrm.failed
echo "copy failed" >> $LOGDIR/${projectName}/${projectName}.copyProjectDataToPrm.failed
elif [[ "${COPIEDTOPRM}" == *"PASS"* ]]
then
touch $LOGDIR/${projectName}.projectDataCopiedToPrm
touch $LOGDIR/${projectName}/${projectName}.projectDataCopiedToPrm
echo "finished copying project data to calculon" >> ${LOGGER}
printf "De project data voor project ${projectName} is gekopieerd naar ${PROJECTSDIRPRM}" | mail -s "project data for project ${projectName} is copied to permanent storage" ${ONTVANGER}

mv $LOGDIR/${projectName}.projectDataCopiedToPrm $LOGDIR/${projectName}/
mv $LOGDIR/${projectName}.copyProjectDataToPrm.logger $LOGDIR/${projectName}/

if [ -f $LOGDIR/${projectName}.copyProjectDataToPrm.failed ]
if [ -f $LOGDIR/${projectName}/${projectName}.copyProjectDataToPrm.failed ]
then
rm $LOGDIR/${projectName}.copyProjectDataToPrm.failed
rm $LOGDIR/${projectName}/${projectName}.copyProjectDataToPrm.failed
fi
fi
else
Expand All @@ -98,9 +95,9 @@ do
fi
fi

if [ -f $LOGDIR/${projectName}.copyProjectDataToPrm.failed ]
if [ -f $LOGDIR/${projectName}/${projectName}.copyProjectDataToPrm.failed ]
then
COUNT=$(cat $LOGDIR/${projectName}.copyProjectDataToPrm.failed | wc -l)
COUNT=$(cat $LOGDIR/${projectName}/${projectName}.copyProjectDataToPrm.failed | wc -l)
if [ $COUNT == 10 ]
then
HOSTNA=$(hostname)
Expand Down
27 changes: 13 additions & 14 deletions compute5/NGS_Automated/copyRawDataToDiagnosticsCluster.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,19 +29,18 @@ for line in ${gattacaSamplesheets[@]}
do
csvFile=$(basename $line)
filePrefix="${csvFile%.*}"
LOGGER=${LOGDIR}/${filePrefix}.copyToDiagnosticsCluster.logger
LOGGER=${LOGDIR}/${filePrefix}/${filePrefix}.copyToDiagnosticsCluster.logger

if [ -d ${LOGDIR}/${filePrefix}/ ]
if [ ! -d ${LOGDIR}/${filePrefix}/ ]
then
echo "everything is finished of ${filePrefix}"
continue
mkdir ${LOGDIR}/${filePrefix}/
fi

function finish {
if [ -f ${LOGDIR}/${filePrefix}.copyToDiagnosticsCluster.locked ]
if [ -f ${LOGDIR}/${filePrefix}/${filePrefix}.copyToDiagnosticsCluster.locked ]
then
echo "TRAPPED"
rm ${LOGDIR}/${filePrefix}.copyToDiagnosticsCluster.locked
rm ${LOGDIR}/${filePrefix}/${filePrefix}.copyToDiagnosticsCluster.locked
fi
}
trap finish HUP INT QUIT TERM EXIT ERR
Expand All @@ -62,24 +61,24 @@ do
continue;
fi

if [ -f $LOGDIR/${filePrefix}.dataCopiedToDiagnosticsCluster ]
if [ -f $LOGDIR/${filePrefix}/${filePrefix}.dataCopiedToDiagnosticsCluster ]
then
continue;
fi

if [ -f ${LOGDIR}/${filePrefix}.copyToDiagnosticsCluster.locked ]
if [ -f ${LOGDIR}/${filePrefix}/${filePrefix}.copyToDiagnosticsCluster.locked ]
then
exit 0
fi
touch ${LOGDIR}/${filePrefix}.copyToDiagnosticsCluster.locked
touch ${LOGDIR}/${filePrefix}/${filePrefix}.copyToDiagnosticsCluster.locked

## Check if samplesheet is copied
copyRawGatToDiagnosticsCluster="umcg-ateambot@${gattacaAddress}:${GATTACA}/runs/run_${run}_${sequencer}/results/${filePrefix}* ${RAWDATADIR}/$filePrefix"

if [[ ! -f ${SAMPLESHEETSDIR}/$csvFile || ! -f $LOGDIR/${filePrefix}.SampleSheetCopied ]]
if [[ ! -f ${SAMPLESHEETSDIR}/$csvFile || ! -f $LOGDIR/${filePrefix}/${filePrefix}.SampleSheetCopied ]]
then
scp umcg-ateambot@${gattacaAddress}:${GATTACA}/Samplesheets/${csvFile} ${SAMPLESHEETSDIR}
touch $LOGDIR/${filePrefix}.SampleSheetCopied
touch $LOGDIR/${filePrefix}/${filePrefix}.SampleSheetCopied
fi
## Check if data is already copied to DiagnosticsCluster

Expand All @@ -91,7 +90,7 @@ do
fi


if [[ -d ${RAWDATADIR}/$filePrefix && ! -f $LOGDIR/${filePrefix}.dataCopiedToDiagnosticsCluster ]]
if [[ -d ${RAWDATADIR}/$filePrefix && ! -f $LOGDIR/${filePrefix}/${filePrefix}.dataCopiedToDiagnosticsCluster ]]
then
##Compare how many files are on both the servers in the directory
countFilesRawDataDirTmp=$(ls ${RAWDATADIR}/${filePrefix}/${filePrefix}* | wc -l)
Expand All @@ -110,7 +109,7 @@ do
then
echo "data copied to DiagnosticsCluster" >> $LOGGER
printf ".. done \n" >> $LOGGER
touch $LOGDIR/${filePrefix}.dataCopiedToDiagnosticsCluster
touch $LOGDIR/${filePrefix}/${filePrefix}.dataCopiedToDiagnosticsCluster
touch ${filePrefix}.md5sums.checked
else
echo "md5sum check failed, the copying will start again" >> $LOGGER
Expand All @@ -124,7 +123,7 @@ do
echo "data copied to DiagnosticsCluster" >> $LOGGER
fi
fi
rm ${LOGDIR}/${filePrefix}.copyToDiagnosticsCluster.locked
rm ${LOGDIR}/${filePrefix}/${filePrefix}.copyToDiagnosticsCluster.locked
done

trap - EXIT
Expand Down
39 changes: 13 additions & 26 deletions compute5/NGS_Automated/copyRawDataToPrm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ for line in ${ARR[@]}
do
csvFile=$(basename $line)
filePrefix="${csvFile%.*}"
LOGGER=${LOGDIR}/${filePrefix}.copyToPrm.logger
LOGGER=${LOGDIR}/${filePrefix}/${filePrefix}.copyToPrm.logger

FINISHED="no"
OLDIFS=$IFS
Expand All @@ -45,11 +45,6 @@ do
run=$3
IFS=$OLDIFS

if [ -d ${LOGDIR}/${filePrefix}/ ]
then
continue
fi

if [ -f ${LOGDIR}/copyDataToPrm.sh.locked ]
then
echo "copyToPrm is locked"
Expand All @@ -61,12 +56,6 @@ do
##get header to decide later which column is project
HEADER=$(head -1 ${line})

if [ -d ${LOGDIR}/${filePrefix}/ ]
then
echo "(startPipeline) everything is finished of ${filePrefix}"
continue
fi

##Remove header, only want to keep samples
sed '1d' $line > ${LOGDIR}/TMP/${filePrefix}.utmp
OLDIFS=$IFS
Expand Down Expand Up @@ -96,7 +85,7 @@ do
copyRawDiagnosticsClusterToPrm="${RAWDATADIR}/${filePrefix}/* ${groupname}-dm@calculon.hpc.rug.nl:${RAWDATADIRPRM}/${filePrefix}"
makeRawDataDir=$(ssh ${groupname}-dm@calculon.hpc.rug.nl "sh ${RAWDATADIRPRM}/../checkRawData.sh ${RAWDATADIRPRM} ${filePrefix}")

if [[ -f $LOGDIR/${filePrefix}.dataCopiedToDiagnosticsCluster && ! -f $LOGDIR/${filePrefix}.dataCopiedToPrm ]]
if [[ -f $LOGDIR/${filePrefix}/${filePrefix}.dataCopiedToDiagnosticsCluster && ! -f $LOGDIR/${filePrefix}/${filePrefix}.dataCopiedToPrm ]]
then
countFilesRawDataDirTmp=$(ls ${RAWDATADIR}/${filePrefix}/${filePrefix}* | wc -l)
if [ "${makeRawDataDir}" == "f" ]
Expand All @@ -115,22 +104,20 @@ do
then
echo "md5sum check failed, the copying will start again" >> ${LOGGER}
rsync -r -av ${copyRawDiagnosticsClusterToPrm} >> $LOGGER 2>&1
echo "copy failed" >> $LOGDIR/${filePrefix}.failed
echo "copy failed" >> $LOGDIR/${filePrefix}/${filePrefix}.failed
elif [[ "${COPIEDTOPRM}" == *"PASS"* ]]
then
scp ${SAMPLESHEETSDIR}/${csvFile} ${groupname}-dm@calculon.hpc.rug.nl:${RAWDATADIRPRM}/${filePrefix}/
scp ${SAMPLESHEETSDIR}/${csvFile} ${groupname}-dm@calculon.hpc.rug.nl:${SAMPLESHEETSPRMDIR}
echo "finished copying data to calculon" >> ${LOGGER}

mkdir $LOGDIR/${filePrefix}/
echo "Moving ${filePrefix} logfiles to $LOGDIR/${filePrefix}/ and removing tmp finished files" >> $LOGGER

rm $LOGDIR/${filePrefix}.SampleSheetCopied
rm $LOGDIR/${filePrefix}.dataCopiedToDiagnosticsCluster
mv $LOGDIR/${filePrefix}.copyToDiagnosticsCluster.logger $LOGDIR/${filePrefix}/
mv $LOGDIR/${filePrefix}.copyToPrm.logger $LOGDIR/${filePrefix}/
mv ${LOGDIR}/TMP/${filePrefix}.unique.projects $LOGDIR/${filePrefix}/projects.txt
#rm $LOGDIR/${filePrefix}.SampleSheetCopied
#rm $LOGDIR/${filePrefix}.dataCopiedToDiagnosticsCluster
#mv $LOGDIR/${filePrefix}.copyToDiagnosticsCluster.logger $LOGDIR/${filePrefix}/
#mv $LOGDIR/${filePrefix}.copyToPrm.logger $LOGDIR/${filePrefix}/
#mv ${LOGDIR}/TMP/${filePrefix}.unique.projects $LOGDIR/${filePrefix}/projects.txt
echo "finished with rawdata" >> ${LOGDIR}/${filePrefix}/${filePrefix}.copyToPrm.logger

if ls ${RAWDATADIR}/${filePrefix}/${filePrefix}*.log 1> /dev/null 2>&1
then
logFileStatistics=$(cat ${RAWDATADIR}/${filePrefix}/${filePrefix}*.log)
Expand All @@ -140,9 +127,9 @@ do
fi
echo -e "De data voor project ${filePrefix} is gekopieerd naar ${RAWDATADIRPRM}" | mail -s "${filePrefix} copied to permanent storage" ${ONTVANGER}
fi
if [ -f $LOGDIR/${filePrefix}.failed ]
if [ -f $LOGDIR/${filePrefix}/${filePrefix}.failed ]
then
rm $LOGDIR/${filePrefix}.failed
rm $LOGDIR/${filePrefix}/${filePrefix}.failed
fi
fi
else
Expand All @@ -153,9 +140,9 @@ do
fi
fi

if [ -f $LOGDIR/${filePrefix}.failed ]
if [ -f $LOGDIR/${filePrefix}/${filePrefix}.failed ]
then
COUNT=$(cat $LOGDIR/${filePrefix}.failed | wc -l)
COUNT=$(cat $LOGDIR/${filePrefix}/${filePrefix}.failed | wc -l)
if [ $COUNT == 10 ]
then
HOSTNA=$(hostname)
Expand Down
37 changes: 9 additions & 28 deletions compute5/NGS_Automated/pipelineFinished.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,47 +22,28 @@ for i in ${ALLFINISHED[@]}
do
filename=$(basename $i)
projectName="${filename%%.*}"
if [ ! -d ${LOGDIR}/${projectName}/ ]
then
mkdir -p ${LOGDIR}/${projectName}/
for i in $(ls ${PROJECTSDIR}/${projectName}/*/rawdata/ngs/*); do if [ -L $i ];then readlink $i > ${LOGDIR}/${projectName}.rawdatalink ; fi;done
for i in $(ls ${PROJECTSDIR}/${projectName}/*/rawdata/ngs/*); do if [ -L $i ];then readlink $i > ${LOGDIR}/${projectName}/${projectName}.rawdatalink ; fi;done

while read line ; do dirname $line > ${LOGDIR}/${projectName}.rawdatalinkDirName; done<${LOGDIR}/${projectName}.rawdatalink
while read line ; do dirname $line > ${LOGDIR}/${projectName}/${projectName}.rawdatalinkDirName; done<${LOGDIR}/${projectName}/${projectName}.rawdatalink

rawDataName=$(while read line ; do basename $line ; done<${LOGDIR}/${projectName}.rawdatalinkDirName)

if [ -f ${LOGDIR}/${rawDataName}.pipeline.logger ]
then
mv ${LOGDIR}/${rawDataName}.pipeline.logger ${LOGDIR}/${rawDataName}/
fi
if [ -f ${LOGDIR}/${rawDataName}.scriptsGenerated ]
then
mv ${LOGDIR}/${rawDataName}.scriptsGenerated ${LOGDIR}/${rawDataName}/
fi
rawDataName=$(while read line ; do basename $line ; done<${LOGDIR}/${projectName}/${projectName}.rawdatalinkDirName)

echo "moving ${projectName} files to ${LOGDIR}/${projectName}/ and removing tmp finished files"
if [[ -f ${LOGDIR}/${projectName}.pipeline.logger && -f ${LOGDIR}/${projectName}.pipeline.started && -f ${LOGDIR}/${projectName}.rawdatalink && -f ${LOGDIR}/${projectName}.rawdatalinkDirName ]]
if [[ -f ${LOGDIR}/${projectName}/${projectName}.pipeline.logger && -f ${LOGDIR}/${projectName}/${projectName}.pipeline.started && -f ${LOGDIR}/${projectName}/${projectName}.rawdatalink && -f ${LOGDIR}/${projectName}/${projectName}.rawdatalinkDirName ]]
then
mv ${LOGDIR}/${projectName}.pipeline.logger ${LOGDIR}/${projectName}/
rm ${LOGDIR}/${projectName}.pipeline.started
rm ${LOGDIR}/${projectName}.rawdatalink
rm ${LOGDIR}/${projectName}.rawdatalinkDirName
touch ${LOGDIR}/${projectName}/${rawDataName}
mv ${LOGDIR}/${projectName}.pipeline.finished ${LOGDIR}/${projectName}/

else
echo "there is/are missing some files:${projectName}.pipeline.logger or ${projectName}.pipeline.started or ${projectName}/${projectName}.rawdatalink or ${projectName}.rawdatalinkDirName"
echo "there is/are missing some files:${projectName}.pipeline.logger or ${projectName}.pipeline.started or ${projectName}/${projectName}.rawdatalink or ${projectName}.rawdatalinkDirName" >> ${LOGDIR}/${projectName}/${projectName}.pipeline.logger
fi
if [ -f ${LOGDIR}/${projectName}.pipeline.failed ]
then
mv ${LOGDIR}/${projectName}.pipeline.failed ${LOGDIR}/${projectName}/
fi
fi
if [ ! -f ${LOGDIR}/${projectName}/${projectName}.pipeline.finished.mailed ]
then
printf "The results can be found: ${PROJECTSDIR}/${projectName} \n\nCheers from the GCC :)"| mail -s "NGS_DNA pipeline is finished for project ${projectName} on `date +%d/%m/%Y` `date +%H:%M`" ${ONTVANGER}
touch ${LOGDIR}/${projectName}/${projectName}.pipeline.finished.mailed
rm ${LOGDIR}/${projectName}.pipeline.finished

fi



done

29 changes: 14 additions & 15 deletions compute5/NGS_Automated/startPipeline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,6 @@ do
##get header to decide later which column is project
HEADER=$(head -1 ${i})

if [ -d ${LOGDIR}/${filePrefix}/ ]
then
echo "(startPipeline) everything is finished of ${filePrefix}"
continue
fi

##Remove header, only want to keep samples
sed '1d' $i > ${LOGDIR}/TMP/${filePrefix}.tmp
OLDIFS=$IFS
Expand Down Expand Up @@ -77,7 +71,7 @@ do
miSeqRun="no"
while read line
do
if [[ "${line}" == *"CARDIO"* || "${line}" == *"DER_v1"* || "${line}" == *"DYS_v3"* || "${line}" == *"EPI_v3"* || "${line}" == *"LEVER_v1"* || "${line}" == *"NEURO_v1"* || "${line}" == *"ONCO_v1"* || "${line}" == *"PCS_v1"* ]]
if [[ "${line}" == *"CARDIO_v"* || "${line}" == *"DER_v"* || "${line}" == *"DYS_v"* || "${line}" == *"EPI_v"* || "${line}" == *"LEVER_v"* || "${line}" == *"NEURO_v"* || "${line}" == *"ONCO_v"* || "${line}" == *"PCS_v"* ]]
then
miSeqRun="yes"
break
Expand All @@ -90,13 +84,13 @@ do
sequencer=$2
run=$3
IFS=$OLDIFS
LOGGER=${LOGDIR}/${filePrefix}.pipeline.logger
LOGGER=${LOGDIR}/${filePrefix}/${filePrefix}.pipeline.logger

####
### Decide if the scripts should be created (per Samplesheet)
##
#
if [[ -f $LOGDIR/${filePrefix}.dataCopiedToDiagnosticsCluster && ! -f $LOGDIR/${filePrefix}.scriptsGenerated ]]
if [[ -f $LOGDIR/${filePrefix}/${filePrefix}.dataCopiedToDiagnosticsCluster && ! -f $LOGDIR/${filePrefix}/${filePrefix}.scriptsGenerated ]]
then
### Step 4: Does the pipeline need to run?
if [ "${pipeline}" == "RNA-Lexogen-reverse" ]
Expand Down Expand Up @@ -149,28 +143,33 @@ do
cd scripts

sh submit.sh
touch $LOGDIR/${filePrefix}.scriptsGenerated
touch $LOGDIR/${filePrefix}/${filePrefix}.scriptsGenerated
fi
fi

####
### If generatedscripts is already done, step in this part to submit the jobs (per project)
##
#
if [ -f $LOGDIR/${filePrefix}.scriptsGenerated ]
if [ -f $LOGDIR/${filePrefix}/${filePrefix}.scriptsGenerated ]
then
for PROJECT in ${PROJECTARRAY[@]}
do
if [ ! -d ${LOGDIR}/${PROJECT} ]
then
mkdir ${LOGDIR}/${PROJECT}
fi

WHOAMI=$(whoami)
HOSTN=$(hostname)
LOGGER=${LOGDIR}/${PROJECT}.pipeline.logger
if [ ! -f ${LOGDIR}/${PROJECT}.pipeline.started ]
LOGGER=${LOGDIR}/${PROJECT}/${PROJECT}.pipeline.logger
if [ ! -f ${LOGDIR}/${PROJECT}/${PROJECT}.pipeline.started ]
then
cd ${PROJECTSDIR}/${PROJECT}/run01/jobs/
sh submit.sh

touch ${LOGDIR}/${PROJECT}.pipeline.started
echo "${LOGDIR}/${PROJECT} started" >> $LOGGER
touch ${LOGDIR}/${PROJECT}/${PROJECT}.pipeline.started
echo "${PROJECT} started" >> $LOGGER

printf "Pipeline: ${pipeline}\nStarttime:`date +%d/%m/%Y` `date +%H:%M`\nProject: $PROJECT\nStarted by: $WHOAMI\nHost: ${HOSTN}\n\nProgress can be followed via the command squeue -u $WHOAMI on $HOSTN.\nYou will receive an email when the pipeline is finished!\n\nCheers from the GCC :)" | mail -s "NGS_DNA pipeline is started for project $PROJECT on `date +%d/%m/%Y` `date +%H:%M`" ${ONTVANGER}
sleep 40
Expand Down

0 comments on commit 38ca862

Please sign in to comment.