forked from jingliu9/mtcp
-
Notifications
You must be signed in to change notification settings - Fork 0
/
README-libos
201 lines (179 loc) · 9.5 KB
/
README-libos
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
mtcp-libos
===
epserver.c/main()
-> core.c/mtcp_init(conf_file)
-> LoadConfiguration(config_file)
-> setRoutingTable()
-> loadARPTable()
-> signal(SIGINT, HandleSignal)
-> signal(SIGUSR1, HandleSignal)
-> current_io_module_func -> load_module(); // load io module (dpdk_module), function pointer
-> dpdk_module.c/dpdk_load_module
-> setting the rss key
-> create the mbuf pools
-> DPDK/rte_mempool_create()
-> init each ports: (for each port)
-> DPDK/rte_ech_dev_configure()
-> DPDK/rte_eth_rx_queue_set() // per cpu per rx queue
-> RPRK/rte_eth_tx_queue_setup() // per cpu per tx queue
-> DPDK/rte_eth_dev_start(portid) // start device
-> DPDK/rte_eth_promiscuous_enable(portid) // rte_ethdev.h:enable receipt in promiscuous mode for an Ethernet device
-> DPDK/rte_eth_dev_flow_ctrl_get(portid, &fc_con); // retrieve current flow control settings per port
-- fc.conf.mode = RTE_FC_NONE; // disable flow control
-> DPDK/rte_eth_dev_flow_ctrl_set(portid, &fc_con);
-> mtcp_getconf(&mcfg);
-- if backlog is not specified, set it to 4K (Q:what is backlog?)
-> core.c/mtcp_register_signal(SIGINT, SIgnalHandler);
=============================================================================
== application initialization finished
=============================================================================
-- for each core, create the thread to run the application (bind to corresponding core)
-> pthread_create(&app_thread[i], NULL, RunServerThread, (void *)i&cores[i])
-- epserver.c/RunServerThread()
-> InitializeServerThread(int core)
-- affinitize application thread to a cpu core
-> ctx->mctx = core.c/mtcp_create_context(core) // create mtcp context
-> sem_init(&g_init_sem[cpu])
-- wake up mtcp threads (wake up i/o threads)
-- if master-core == cpu:
-> pthread_create(,NULL, MTCPRunThread, (void*) mctx)
-- else:
-> rte_eal_remote_launch(MTCPDPDKRunThread, mctx, cpu);
-> MTCPRunThread
-- core.c/MTCPRunThread(void *arg)
-> affinitize the thread to this core
-> mtcp->iom = current_iomodule_func
-> pthread_mutex_init(smap_lock)
-> pthread_mutex_init(flow_pool_lock)
-> pthread_mutex_init(socket_pool_lock)
-> tcp_stream_queue.h/SQ_LOCK_INIT (seems do nothing)
-> sys,mman.h/mlockall(): lock all of the calling process's virtual aaddress space into RAM, preventing that memory from being paged to the swap area, in unit of page
-> AttachDevice(ctx)
-> sem_post(&g_init_sem[ctx->cpu])
/***************************************/
// the loop of dpdk thread (perform network only for each core)
-> RunMainLoop(ctx)
-- while loop:
-> mtcp->iom->recv_pkts(ctx, rx_inf)
-- will call dpdk_module->recv_pkts(ctx, rx_inf)
-> recv_cnt = dpdk_module/dpdk_recv_pkts
-> rte_ethdev.h/rte_eth_rx_burst(portid, cpu, dpc->pkts_burst", MAX_PKT_BURST)
// Retrieve a burst of input packets from a receive queue of an Ethernet receive queue,
// up to *nb_pkts* packets, and for each completed RX descriptor in the ring,
// return the # of packets actually retrived
-- for recv_cnt: (for each packet, read, and process)
-> dpdk_module.c/get_rptr() // retrieve next pkt for application for packet read, return ptr to pkt buffer
-> eth_in.c/ProcessPacket()
-> ProcessIPv4Packet()
-> tcp_in.c/ProcessTCPPacket()
-> tcp_stream = StreamHTSearch([struct hashtable *ht]mtcp->tcp_flow_table, &s_stream)
-- if not found, create hash table (flow table) entry for this tcp stream
-- validate sequence
-- update receive window size
-- according to cur_stream->state to call TCP Handle functions
-- static inline void Handle_TCP_ST_SYN_RCVD
-> StreamEnqueue(listener->accesptq, cur_stream)
-> if tcp_timeout>0: AddToTimeoutList
-> if MTCP_EPOLLIN: AddEpollEvent(mtcp->ep, MTCP_EVENT_QUEUE, listener->socket, MTCP_EPOLLIN)
-> ProcessICMPPacket()
-- // interaction with applicaation
-- if mtcp->flow_cnt > 0: // to judge if interact with app. or not
-> CheckRtmTimeout()
-> CheckTimewaitExpire()
-> CheckCOnnectionTimeout()
-> if (mtcp->ep) FlushEpollEvents(mtcp, ts)
-> if (mtcp->flow_cnt > 0) core.c/HandleApplicationCalls(mtcp, ts)
-> core.c/WritePacketsToChunks(mtcp, ts)
-- // send packets from write buffer
-> @@ mtcp->iom->send_pkts(ctx, tx_inf); // will call dpdk_send_pkts
-- if there are packets in the queue ... flush them to the wire
-> if(dpc->wmbuf[ifidx].len > 0){
-> rte_eth_tx_burst(portid, ctxt->cpu, pkts, cnt)
}
-> mtcp->iom->select(ctx) // only work when #ifdef RX_IDLE_ENABLE
/**************************************/
-> mtcP_free_context(&m)
-> DestroyHashtable(g_mtcp[cpu]->cpu_flow_table)
-> DestroyHashtable(g_mtcp[cpu]->listener)
-> ctx->ep = mtcp_epoll_create(ctx->mctx, MAX_EVENTS); // create epoll descriptor
-> ctx->svars
-- return ctx
############################################################################
// this struct will be part as io_private_context when io_module is dpdk
dpdk_module.c:struct dpdk_private_context {
struct mbuf_table rmbufs[RTE_MAX_ETHPORTS];
/**
** struct mbuf_table{
** unsigned len; // length of queued packets
** struct rte_mbuf *m_taable[MAX_PKT_BURST];
** }
**/
struct mbuf_table wmbufs[RTE_MAX_ETHPORTS];
struct rte_mempool *pktmbuf_pool;
struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
// ...
}
/**
* mtcp.h
*
**/
struct mtcp_sender {
}
struct mtcp_manager {
mem_pool_t flow_pool;
mem_pool_t rv_pool;
mem_pool_t sv_pool;
mem_pool_t mv_pool;
sb_manager_t rbm_snd;
rb_manage_t rbm_rcv;
struct hashtable *tcp_flow_table;
socket_map_t smap;
struct mtcp_thread_context* ctx;
// variables related to events
struct mtcp_epoll ep;
uint32_t ts_last_event;
struct hashtable *listners;
stream_queue_t connectq; // streams need to connect
stream_queue_t sendq
stream_queue_t ackq;
stream_queue_t closeq; // streams to close
stream_queue_int *closeq_int; // internally maintained closeq
stream_queue_t resetq;
stream_queue_int *resetq_int;
stream_queue_t destroyq;
struct mtcp_sender *g_sender;
struct mtcp_sender *n_sender[ETH_NUM];
// ...
}
/**
* tcp_stream.h
*
**/
typedef struct tcp_stream {
} tcp_stream;
struct tcp_send_vars {
//...
struct tcp_send_buffer *sndbuf;
//...
}
############################################################################
# how to process the writing
core.c/WritePacketsToChunks
-> tcp_out.c/WriteTCPDataList
-> tcp_out.c/FlushTCPSendingBuffer()
-> SBUF_LOCK(&sndvar->write_lock)
-> tcp_out.c/SendTcpPacket()
-> tcph = ip_out.c/IPOutput()
-> eth_out.c/EthernetOutput()
-> mtcp->iom->get_wptr() // retrieve next pkt empty pkt buffer for the application for packet writting, returns ptr to pkt buffer
-> dpdk_get_wptr()
-- // return about dpc->wmbufs[ifidx].m_table[len_of_mbuf]
-> if payload exist, will call memcpy
-> SBUF_UNBLOCK(&sndvar->write_lock)
api.c/mtcp_write()
-> SBUF_LOCK(&sndvar->write_lock)
-> CopyFromUser()
-> SBPut(mtcp->rbm_snd, sndvar->sndbuf, buf, sndlen)
-- sndvar->sndbuf is (struct tcp_send_buffer)
-> memcpy(sndvar->sndbuf, buf)
-> SBUF_UNLOCK(&sndvar->write_lock)