Merge pull request #269 from RoanKanninga/master

added small comments
molgenis · Jun 12, 2024 · c1ae398 · c1ae398
2 parents 12a3172 + e1acfe4
commit c1ae398
Show file tree

Hide file tree

Showing 4 changed files with 90 additions and 11 deletions.
diff --git a/bin/concordanceCheck-ArrayvsArray.sh b/bin/concordanceCheck-ArrayvsArray.sh
@@ -40,7 +40,8 @@ function showHelp() {
 	#
 	cat <<EOH
 ===============================================================================================================
-Script to do (many-to-many) ConcordanceChecks manually. 
+Script to do ConcordanceChecks for array vs array manually. (see instructions:
+https://github.com/molgenis/analysis-team-documents/blob/master/sops/GH-09-ConcordanceCheckArrayVsArray.md)
 Usage:
 	$(basename $0) OPTIONS
 Options:

diff --git a/bin/create_per_base_bed.pl b/bin/create_per_base_bed.pl
@@ -0,0 +1,76 @@
+#!/usr/bin/perl -w
+use strict;
+use warnings;
+use diagnostics;
+use Getopt::Long;
+use List::Util qw(first);
+use POSIX;
+
+my ($help, $inputbed, $output, $outputfolder);
+
+#### get options
+GetOptions(
+                "h"                             => \$help,
+                "input=s"                       => \$inputbed,
+                "output=s"                      => \$output,
+		"outputfolder=s"                => \$outputfolder,
+          );
+usage() and exit(1) if $help;
+# mandatory args
+usage() and exit(1) unless $inputbed;
+usage() and exit(1) unless $output;
+usage() and exit(1) unless $outputfolder;
+
+chomp $inputbed;
+chomp $output;
+chomp $outputfolder;
+
+
+#Open input and output files
+open (INPUT, "<", $inputbed ) or die $!;
+
+#Read bed file
+my $number=0;
+my $regnum = 1;
+my $binSize= 1;
+while (my $lines=<INPUT>){
+        chomp $lines;
+        if ($lines !~ m/^track.+/gs) {
+                #print $lines . "\n";
+                #Remove chr before chrNumber and substitute M with MT
+                $lines =~ s/^chr//i;
+		$lines =~ s/^M\t/MT\t/i;
+                #Split line
+                my @array = split("\t", $lines);
+                my $chr = $array[0];
+                my $start = $array[1];
+                my $stop = $array[2];
+		my $gene = $array[3];
+                my $region = ($stop-$start);
+                #Iterate over region and create bins
+		open (OUTPUT, ">>", "$outputfolder/$output.per_base.bed" ) or die $!;
+                for (my $i=($start); $i<$stop; $i=($i+$binSize)){
+                    print OUTPUT "$chr\t" . $i . "\t" . $i . "\t+\t$gene\n";
+                }
+                close(OUTPUT);
+
+                $regnum++; 
+        }else{
+                #Negative check
+                #print "$lines\n";
+        }
+}
+sub usage {
+        print <<EOF;
+#########################################################################################
+This script splits a bed file in regions of a length specified by the user.
+#########################################################################################
+Usage: ./create_per_base_bed.pl
+\t-input\t\t\tInput bed file.
+\t-output\t\t\tOutput prefix
+\t-outputfolder\t\tOutputfolder	
+Example usage: perl create_per_base_bed.pl -input target_exons.bed -output exonIntervals
+#########################################################################################
+EOF
+
+}
diff --git a/bin/gvcf2bed2.py b/bin/gvcf2bed2.py
@@ -23,7 +23,7 @@ def get_format_value(record, format_field, sample_idx):
             return record.QUAL
         return None
     if record.QUAL is not None and gq_arr is not None:
-        return min(int(gq_arr[sample_idx][0]), record.QUAL)
+        return int(gq_arr[sample_idx][0])
     elif gq_arr is not None:
         return gq_arr[sample_idx][0]
     elif record.QUAL is not None:

diff --git a/bin/prepare_NGS_Bedfiles.sh b/bin/prepare_NGS_Bedfiles.sh
@@ -181,16 +181,18 @@ fi
 
 if [[ "${COVPERBASE}" == "true" ]]
 then
-	if [ ! -f "${baits}.uniq.per_base.intervals" ]
+	if [ ! -f "${baits}.uniq.per_base.bed" ]
 	then
-		echo "starting to create_per_base_intervals, this may take a while"
-		create_per_base_intervals.pl -input "${baits}.merged.bed" -output "${NAME}" -outputfolder "${TMP}"
-		wc -l "${TMP}/${NAME}.per_base.intervals"
-
-		sort -V -k1 -k2 -k3 "${TMP}/${NAME}.per_base.intervals" | uniq > "${baits}.uniq.per_base.intervals.tmp"
-		head -n 86 "${baits}.interval_list" > "${baits}.uniq.per_base.interval_list"
-		sort -V "${baits}.uniq.per_base.intervals.tmp" >> "${baits}.uniq.per_base.interval_list"
-		tail -n+87 "${baits}.uniq.per_base.interval_list" |  awk '{print $1"\t"$2"\t"($3+1)"\t"$5}' > "${baits}.uniq.per_base.bed"
+		echo "starting to create_per_base_bed, this may take a while"
+		create_per_base_bed.pl -input "${baits}.merged.bed" -output "${NAME}" -outputfolder "${TMP}"
+		awk '{print $1"\t"$2"\t"($3+1)"\t"$5}' "${TMP}/${NAME}.per_base.bed" > "${baits}.uniq.per_base.bed"
+		wc -l "${TMP}/${NAME}.per_base.bed"
+		#
+
+		#sort -V -k1 -k2 -k3 "${TMP}/${NAME}.per_base.intervals" | uniq > "${baits}.uniq.per_base.intervals.tmp"
+		#head -n 86 "${baits}.interval_list" > "${baits}.uniq.per_base.interval_list"
+		#sort -V "${baits}.uniq.per_base.intervals.tmp" >> "${baits}.uniq.per_base.interval_list"
+		#tail -n+87 "${baits}.uniq.per_base.interval_list" |  awk '{print $1"\t"$2"\t"($3+1)"\t"$5}' > "${baits}.uniq.per_base.bed"
 	fi
 fi