-
Notifications
You must be signed in to change notification settings - Fork 0
/
Fig5.COG_analysis.R
73 lines (62 loc) · 3.3 KB
/
Fig5.COG_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
library(tidyverse)
library(dplyr)
library(ggplot2)
library(reshape2)
## Here, the COG analysis of E. coli fragmentation groups are compared (based on the core gene representatives generated by using Roary with 100% CG threshold).
## Load COG analysis summary for E. coli datasets.
EC_rad1_ori <- read.table(file = "R_code/Fig5.COG_analysis/EC_ori.txt", header = FALSE)
EC_rad1_50cut <- read.table(file = "R_code/Fig5.COG_analysis/EC_50cut.txt", header = FALSE)
EC_rad1_100cut <- read.table(file = "R_code/Fig5.COG_analysis/EC_100cut.txt", header = FALSE)
EC_rad1_200cut <- read.table(file = "R_code/Fig5.COG_analysis/EC_200cut.txt", header = FALSE)
EC_rad1_300cut <- read.table(file = "R_code/Fig5.COG_analysis/EC_300cut.txt", header = FALSE)
EC_rad1_400cut <- read.table(file = "R_code/Fig5.COG_analysis/EC_400cut.txt", header = FALSE)
## Reformat and combined the data.
group <- rep("ori", length(EC_rad1_ori$V1))
count <- EC_rad1_ori$V1
Cog <- EC_rad1_ori$V2
count_pct <- (count/sum(count))*100
EC_rad1_ori_COG <- data.frame(group,Cog,count,count_pct)
group <- rep("50cut", length(EC_rad1_50cut$V1))
count <- EC_rad1_50cut$V1
Cog <- EC_rad1_50cut$V2
count_pct <- (count/sum(count))*100
EC_rad1_50cut_COG <- data.frame(group,Cog,count,count_pct)
group <- rep("100cut", length(EC_rad1_100cut$V1))
count <- EC_rad1_100cut$V1
Cog <- EC_rad1_100cut$V2
count_pct <- (count/sum(count))*100
EC_rad1_100cut_COG <- data.frame(group,Cog,count,count_pct)
group <- rep("200cut", length(EC_rad1_200cut$V1))
count <- EC_rad1_200cut$V1
Cog <- EC_rad1_200cut$V2
count_pct <- (count/sum(count))*100
EC_rad1_200cut_COG <- data.frame(group,Cog,count,count_pct)
group <- rep("300cut", length(EC_rad1_300cut$V1))
count <- EC_rad1_300cut$V1
Cog <- EC_rad1_300cut$V2
count_pct <- (count/sum(count))*100
EC_rad1_300cut_COG <- data.frame(group,Cog,count,count_pct)
group <- rep("400cut", length(EC_rad1_400cut$V1))
count <- EC_rad1_400cut$V1
Cog <- EC_rad1_400cut$V2
count_pct <- (count/sum(count))*100
EC_rad1_400cut_COG <- data.frame(group,Cog,count,count_pct)
EC_rad1 <- rbind(EC_rad1_ori_COG,EC_rad1_50cut_COG,EC_rad1_100cut_COG,EC_rad1_200cut_COG,EC_rad1_300cut_COG,EC_rad1_400cut_COG)
EC_rad1$group <- factor(EC_rad1$group , levels=c("ori", "50cut", "100cut", "200cut", "300cut", "400cut"))
## Make plot to show the number of core gene representatives in different COG categories.
EC_frag_COG_plot <- ggplot(data = EC_rad1, aes(x = Cog, y = count, color = Cog, fill=group)) +
geom_bar(stat = 'identity', position=position_dodge(),colour="black")+
scale_fill_brewer(palette="Accent")+
scale_x_discrete(name = "COG categories")+
scale_y_continuous(name = "Number of core gene representatives")+
guides(fill=guide_legend("Groups"))+
theme(legend.position="bottom",
legend.text = element_text(colour="black", size=28, face= "bold"),
legend.title = element_text(colour="black", size=30, face= "bold"),
axis.text.x = element_text(color="black", size=28, face= "bold"),
axis.text.y = element_text(color="black", size=28, face= "bold"),
axis.title.x = element_text(size=30, face= "bold"),
axis.title.y = element_text(size=30, face= "bold"))+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"))
EC_frag_COG_plot