Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

KNI内存泄露:kni out of memory #508

Open
Gongwenn opened this issue Dec 11, 2019 · 6 comments
Open

KNI内存泄露:kni out of memory #508

Gongwenn opened this issue Dec 11, 2019 · 6 comments

Comments

@Gongwenn
Copy link

Gongwenn commented Dec 11, 2019

测试环境中,我们用双网卡做bond,kni口会存在内存泄露问题,跑一段时间后会报错:KNI: Out of memory,即使kni口只跑BGP路由公告流量,也会造成泄露,只是泄露时间比较长。如果健康检查走kni口的话,泄露速度比较快,一两个小时就会报内存不足错误。

dpvs.log一直打印 KNI:Out of memory 之后kni口的IP就无法通讯了 原因是kni口的所有报文rx tx全部丢包
image
image

dpdk版本:stable-17.11.2
dpvs版本:1.7.8
单臂模式,双网卡bond
image

@Gongwenn Gongwenn changed the title KNI内存泄露:KNI Out of KNI内存泄露:kni out of memory Dec 11, 2019
@ywc689
Copy link
Collaborator

ywc689 commented Dec 11, 2019

Could you please give the outputs of the following commands for the problem?

  • ip link show
  • ip addr show
  • ip route show
  • dpip link show
  • dpip addr show
  • dpip route show

@Gongwenn
Copy link
Author

Could you please give the outputs of the following commands for the problem?

  • ip link show
    image
  • ip addr show
    image
  • ip route show
    image
  • dpip link show
    image
  • dpip addr show
    image
  • dpip route show
    image

@Gongwenn
Copy link
Author

Gongwenn commented Dec 12, 2019

+问题环境dpvs配置:
`
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! This is dpvs default configuration file.
!
! The attribute "" denotes the configuration item at initialization stage. Item of
! this type is configured oneshoot and not reloadable. If invalid value configured in the
! file, dpvs would use its default value.
!
! Note that dpvs configuration file supports the following comment type:
! * line comment: using '#" or '!'
! * inline range comment: using '<' and '>', put comment in between
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! global config
global_defs {
log_level DEBUG
log_file /data/var/log/dpvs.log
}
! netif config
netif_defs {
pktpool_size 4194296
pktpool_cache 512
device dpdk0 {
rx {
queue_number 16
descriptor_number 2048
rss all
}
tx {
queue_number 16
descriptor_number 2048
}
fdir {
mode perfect
pballoc 128k
status matched
}
! promisc_mode
! kni_name dpdk0.kni
}
device dpdk1 {
rx {
queue_number 16
descriptor_number 2048
rss all
}
tx {
queue_number 16
descriptor_number 2048
}
fdir {
mode perfect
pballoc 128k
status matched
}
! promisc_mode
! kni_name dpdk1.kni
}
bonding bond0 {
mode 4
slave dpdk0
slave dpdk1
primary dpdk0
kni_name bond0.kni
}
}
! worker config (lcores)
worker_defs {
worker cpu0 {
type master
cpu_id 0
}
worker cpu1 {
type slave
cpu_id 1
port bond0 {
rx_queue_ids 0
tx_queue_ids 0
! isol_rx_cpu_ids 9
! isol_rxq_ring_sz 1048576
}
}
worker cpu2 {
type slave
cpu_id 2
port bond0 {
rx_queue_ids 1
tx_queue_ids 1
! isol_rx_cpu_ids 10
! isol_rxq_ring_sz 1048576
}
}
worker cpu3 {
type slave
cpu_id 3
port bond0 {
rx_queue_ids 2
tx_queue_ids 2
! isol_rx_cpu_ids 11
! isol_rxq_ring_sz 1048576
}
}
worker cpu4 {
type slave
cpu_id 4
port bond0 {
rx_queue_ids 3
tx_queue_ids 3
! isol_rx_cpu_ids 12
! isol_rxq_ring_sz 1048576
}
}
worker cpu5 {
type slave
cpu_id 5
port bond0 {
rx_queue_ids 4
tx_queue_ids 4
! isol_rx_cpu_ids 13
! isol_rxq_ring_sz 1048576
}
}
worker cpu6 {
type slave
cpu_id 6
port bond0 {
rx_queue_ids 5
tx_queue_ids 5
! isol_rx_cpu_ids 14
! isol_rxq_ring_sz 1048576
}
}
worker cpu7 {
type slave
cpu_id 7
port bond0 {
rx_queue_ids 6
tx_queue_ids 6
! isol_rx_cpu_ids 15
! isol_rxq_ring_sz 1048576
}
}
worker cpu8 {
type slave
cpu_id 8
port bond0 {
rx_queue_ids 7
tx_queue_ids 7
! isol_rx_cpu_ids 16
! isol_rxq_ring_sz 1048576
}
}
worker cpu9 {
type slave
cpu_id 9
port bond0 {
rx_queue_ids 8
tx_queue_ids 8
! isol_rx_cpu_ids 16
! isol_rxq_ring_sz 1048576
}
}
worker cpu10 {
type slave
cpu_id 10
port bond0 {
rx_queue_ids 9
tx_queue_ids 9
! isol_rx_cpu_ids 16
! isol_rxq_ring_sz 1048576
}
}
worker cpu11 {
type slave
cpu_id 11
port bond0 {
rx_queue_ids 10
tx_queue_ids 10
! isol_rx_cpu_ids 16
! isol_rxq_ring_sz 1048576
}
}
worker cpu12 {
type slave
cpu_id 12
port bond0 {
rx_queue_ids 11
tx_queue_ids 11
! isol_rx_cpu_ids 16
! isol_rxq_ring_sz 1048576
}
}
worker cpu13 {
type slave
cpu_id 13
port bond0 {
rx_queue_ids 12
tx_queue_ids 12
! isol_rx_cpu_ids 16
! isol_rxq_ring_sz 1048576
}
}
worker cpu14 {
type slave
cpu_id 14
port bond0 {
rx_queue_ids 13
tx_queue_ids 13
! isol_rx_cpu_ids 16
! isol_rxq_ring_sz 1048576
}
}
worker cpu15 {
type slave
cpu_id 15
port bond0 {
rx_queue_ids 14
tx_queue_ids 14
! isol_rx_cpu_ids 16
! isol_rxq_ring_sz 1048576
}
}
worker cpu16 {
type slave
cpu_id 16
port bond0 {
rx_queue_ids 15
tx_queue_ids 15
! isol_rx_cpu_ids 16
! isol_rxq_ring_sz 1048576
}
}
}
! timer config
timer_defs {
# cpu job loops to schedule dpdk timer management
schedule_interval 500
}
! dpvs neighbor config
neigh_defs {
unres_queue_length 128
timeout 60
}
! dpvs ipv4 config
ipv4_defs {
forwarding off
default_ttl 64
fragment {
bucket_number 4096
bucket_entries 16
max_entries 4096
ttl 1
}
}
! dpvs ipv6 config
ipv6_defs {
disable off
forwarding off
route6 {
method "hlist"
recycle_time 10
}
}
! control plane config
ctrl_defs {
lcore_msg {
ring_size 4096
multicast_queue_length 256
sync_msg_timeout_us 20000
}
ipc_msg {
unix_domain /var/run/dpvs_ctrl
}
}
! ipvs config
ipvs_defs {
conn {
conn_pool_size 33554432
conn_pool_cache 512
conn_init_timeout 3
! expire_quiescent_template
! fast_xmit_close
! redirect off
}
udp {
! defence_udp_drop
uoa_mode ipo <opp for private protocol by default, or ipo for IP-option mode>
uoa_max_trail 0
timeout {
normal 300
last 3
}
}
tcp {
! defence_tcp_drop
timeout {
none 2
established 7200
syn_sent 3
syn_recv 30
fin_wait 7
time_wait 7
close 3
close_wait 7
last_ack 7
listen 120
synack 30
last 2
}
synproxy {
synack_options {
mss 1380
ttl 63
sack
! wscale
! timestamp
}
! defer_rs_syn
rs_syn_max_retry 3
ack_storm_thresh 10
max_ack_saved 3
conn_reuse_state {
close
time_wait
! fin_wait
! close_wait
! last_ack
}
}
}
}
! sa_pool config
sa_pool {
pool_hash_size 8

`

@haidfs
Copy link

haidfs commented Jan 16, 2020

请教下这问题解决了吗?我也遇到了类似的问题

@AssassinOdyssey
Copy link

请问,使用的是什么版本DPDK出现的KNI OOM?问题是否已经解决?

@sjaliang
Copy link

可以看下这个帖子 https://bugs.dpdk.org/show_bug.cgi?id=213,还有需要调大 kni memory pool 的大小

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

5 participants