-
Notifications
You must be signed in to change notification settings - Fork 1
/
Snakefile
88 lines (71 loc) · 3.15 KB
/
Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
R_CONTAINER_CMD="docker run -v /:/home/user -w /home/user/"+os.getcwd()+" fgajardoe/r-custom:latest "
def get_input(wildcards):
return config["speciesInsertions"][wildcards.specie]
def get_input_asm(wildcards):
return config["speciesAssemblies"][wildcards.specie]
rule all:
input:
config["prefix"]+".TEsuperfam.kimuraDistrib.pdf",
config["prefix"]+".TEsuperfam.genomeSizeContrib.pdf",
config["prefix"]+".TEorder.kimuraDistrib.pdf",
config["prefix"]+".TEorder.genomeSizeContrib.pdf"
rule calcGenomeSize:
input:
get_input_asm
output:
"{specie}.genomeSize.tab"
shell:
"cat {input} |awk '{{if(/^>/){{defline=substr($0,2); split(defline,a,\" \"); id=a[1]}}else{{s[id]+=length($0)}}}}END{{for(id in s){{print id\"\t\"s[id]}}}}' > {output}"
rule generateTableForPlotting:
input:
bed=expand(config["build_rmgr_path"]+"/{specie}.bed",specie=config["speciesInsertions"]),
#bed=config["speciesInsertions"],
genomesize=expand("{specie}.genomeSize.tab",specie=config["speciesInsertions"])
params:
expand("{specie}",specie=config["speciesInsertions"])
output:
config["prefix"]+".tab"
shell:
"echo {params} | perl -pe's/ /\n/g; s/,//g; s/\[//g; s/\]//g' |awk '{{print $0}}' > tmpSpecies && echo {input.bed} | perl -pe's/ +/\n/g' > tmpInsertionsBed && echo {input.genomesize} | perl -pe 's/ +/\n/g' > tmpGenomeSize && paste tmpSpecies tmpInsertionsBed tmpGenomeSize > {output}"
rule plotKimura_TEsuperfams:
input:
config["prefix"]+".tab",
# config["BedFile_Specie_GenomeSize_equivalence"],
config["TEclassification"]
params:
x=config["xmax_TEsuperfam"],
y=config["ymax_TEsuperfam"],
t=config["title"],
TEordersKimura=config["TEsuperfams_to_show_kimura"],
speciesOrder=config["speciesOrder"],
TEordersGenomeContrib=config["TEorders_to_show_genomeContrib"],
TElevel="TEsuperfamily",
normalizeByTotal=config["normalizeByTotal"],
pal=config["speciesPalette"],
output:
config["prefix"]+".TEsuperfam.kimuraDistrib.pdf",
config["prefix"]+".TEsuperfam.genomeSizeContrib.pdf",
#config["prefix"]+".RData"
shell:
R_CONTAINER_CMD+" Rscript plotKimuraDistance.R {input} {params.x} {params.y} '{params.t}' {params.TEordersKimura} {params.speciesOrder} {params.TEordersGenomeContrib} {params.TElevel} {params.normalizeByTotal} {params.pal} "+config["prefix"]+".TEsuperfam"
rule plotKimura_TEorders:
input:
config["prefix"]+".tab",
# config["BedFile_Specie_GenomeSize_equivalence"],
config["TEclassification"]
params:
x=config["xmax_TEorder"],
y=config["ymax_TEorder"],
t=config["title"],
TEordersKimura=config["TEorders_to_show_kimura"],
speciesOrder=config["speciesOrder"],
TEordersGenomeContrib=config["TEorders_to_show_genomeContrib"],
TElevel="TEorder",
normalizeByTotal=config["normalizeByTotal"],
pal=config["speciesPalette"],
output:
config["prefix"]+".TEorder.kimuraDistrib.pdf",
config["prefix"]+".TEorder.genomeSizeContrib.pdf",
#config["prefix"]+".RData"
shell:
R_CONTAINER_CMD+" Rscript plotKimuraDistance.R {input} {params.x} {params.y} '{params.t}' {params.TEordersKimura} {params.speciesOrder} {params.TEordersGenomeContrib} {params.TElevel} {params.normalizeByTotal} {params.pal} "+config["prefix"]+".TEorder"