k8s+kubeovn高可用環境搭建

暗痛發表於2024-10-25

1.準備環境

角色
IP
master1,node1 10.167.47.12
master2,node2 10.167.47.24
master3,node3 10.167.47.25
VIP(虛擬ip) 10.167.47.86
# 在master新增hosts
cat >> /etc/hosts << EOF
10.167.47.12 master1
10.167.47.24 master2
10.167.47.25 master2
EOF
# 關閉防火牆
systemctl stop firewalld && systemctl disable firewalld
# 關閉selinux
sed -i 's/enforcing/disabled/' /etc/selinux/config # 永久
setenforce 0 # 臨時
# 關閉swap
swapoff -a # 臨時
sed -ri 's/.*swap.*/#&/' /etc/fstab # 永久
# 根據規劃設定主機名
hostnamectl set-hostname <hostname>
sysctl --system # 生效
# 時間同步
cp /etc/yum.repos.d/CentOS-Base.repo /etc/yum.repos.d/CentOS-Base.repo.backup
curl -o /etc/yum.repos.d/CentOS-Base.repo http://mirrors.aliyun.com/repo/Centos-7.repo
yum clean all && yum makecache
yum install ntpdate -y && timedatectl set-timezone Asia/Shanghai && ntpdate time2.aliyun.com
# 加入到crontab
crontab -e
0 5 * * * /usr/sbin/ntpdate time2.aliyun.com
# 加入到開機自動同步,/etc/rc.local
vi /etc/rc.local
ntpdate time2.aliyun.com
#使用ulimit -a 可以檢視當前系統的所有限制值,使用ulimit -n 可以檢視當前的最大開啟檔案數。
#新裝的linux預設只有1024,當作負載較大的伺服器時,很容易遇到error: too many open files。因此,需要將其改大。
#使用 ulimit -n 65535 可即時修改,但重啟後就無效了。(注ulimit -SHn 65535 等效 ulimit -n 65535,-S指soft,-H指hard)
#臨時設定,但重啟後就無效了
ulimit -SHn 65535
# 資源配置,永久設定
vi /etc/security/limits.conf
# 末尾新增如下內容
* soft nofile 65536
* hard nofile 65536
* soft nproc 65536
* hard nproc 65536
* soft memlock unlimited
* hard memlock unlimited
#最佳化核心引數
cat <<EOF > /etc/sysctl.d/k8s.conf
net.ipv4.ip_forward=1
net.bridge.bridge-nf-call-iptables=1
net.bridge.bridge-nf-call-ip6tables=1
fs.may_detach_mounts=1
vm.overcommit_memory=1
vm.panic_on_oom=0
fs.inotify.max_user_watches=89100
fs.file-max=52706963
fs.nr_open=52706963
net.netfilter.nf_conntrack_max=2310720
net.ipv4.tcp_keepalive_time=600
net.ipv4.tcp_keepalive_probes=3
net.ipv4.tcp_keepalive_intvl=15
net.ipv4.tcp_max_tw_buckets=36000
net.ipv4.tcp_tw_reuse=1
net.ipv4.tcp_max_orphans=327680
net.ipv4.tcp_orphan_retries=3
net.ipv4.tcp_syncookies=1
net.ipv4.tcp_max_syn_backlog=16384
net.ipv4.ip_conntrack_max=65536
net.ipv4.tcp_max_syn_backlog=16384
net.ipv4.tcp_timestamps=0
net.core.somaxconn=16384
EOF
sysctl --system # 生效
#重啟後可以檢視是否生效
lsmod | grep --color=auto -e ip_vs -e nf_conntrack
#重啟
reboot

2.所有master節點部署keepalived

1.安裝相關包和keepalived

#安裝開啟ipvs
#安裝
yum install ipvsadm ipset sysstat conntrack libseccomp -y
#臨時生效
modprobe -- ip_vs
modprobe -- ip_vs_rr
modprobe -- ip_vs_wrr
modprobe -- ip_vs_sh
modprobe -- nf_conntrack_ipv4
#永久生效
cat <<EOF > /etc/modules-load.d/ipvs.conf
ip_vs
ip_vs_lc
ip_vs_wlc
ip_vs_rr
ip_vs_wrr
ip_vs_lblc
ip_vs_lblcr
ip_vs_dh
ip_vs_sh
ip_vs_nq
ip_vs_sed
ip_vs_ftp
ip_vs_sh
nf_conntrack
ip_tables
ip_set
xt_set
ipt_set
ipt_rpfilter
ipt_REJECT
ipip
EOF
#安裝haproxy
yum install -y haproxy
cat > /etc/haproxy/haproxy.cfg << EOF
global
log 127.0.0.1 local0
chroot /var/lib/haproxy
pidfile /var/run/haproxy.pid
maxconn 4000
user haproxy
group haproxy
daemon
stats socket /var/lib/haproxy/stats
defaults
mode tcp
log global
option tcplog
option dontlognull
option redispatch
retries 3
timeout queue 1m
timeout connect 10s
timeout client 1m
timeout server 1m
timeout check 10s
maxconn 3000
# 起名
listen k8s_master
# 虛擬IP的埠
bind 0.0.0.0:16443
mode tcp
option tcplog
balance roundrobin
# 高可用的負載均衡的master
server master1 10.167.47.12:6443 check inter 10000 fall 2 rise 2 weight 1
server master2 10.167.47.24:6443 check inter 10000 fall 2 rise 2 weight 1
server master3 10.167.47.25:6443 check inter 10000 fall 2 rise 2 weight 1
EOF
# 設定開機啟動
$ systemctl enable haproxy
# 開啟haproxy
$ systemctl start haproxy
# 檢視啟動狀態
$ systemctl status haproxy
#建立檢測指令碼
cat > /etc/keepalived/check_haproxy.sh << EOF
#!/bin/bash
if [ `ps -C haproxy --no-header | wc -l` == 0 ]; then
systemctl start haproxy
sleep 3
if [ `ps -C haproxy --no-header | wc -l` == 0 ]; then
systemctl stop keepalived
fi
fi
EOF
chmod +x /etc/keepalived/check_haproxy.sh
#安裝keepalived
yum install -y conntrack-tools libseccomp libtool-ltdl && yum install -y keepalived
#master1節點配置
cat > /etc/keepalived/keepalived.conf << EOF
global_defs {
router_id master1
}
vrrp_script check_haproxy {
script "/etc/keepalived/check_haproxy.sh"
interval 3000
}
vrrp_instance VI_1 {
state MASTER
interface eth0
virtual_router_id 80
priority 100
advert_int 1
authentication {
auth_type PASS
auth_pass 111111
}
virtual_ipaddress {
10.167.47.86
}
track_script {
check_haproxy
}
}
EOF
#master2節點配置
cat > /etc/keepalived/keepalived.conf << EOF
global_defs {
router_id master2
}
vrrp_script check_haproxy {
script "/etc/keepalived/check_haproxy.sh"
interval 3000
}
#修改網路卡名
vrrp_instance VI_1 {
state BACKUP
interface eth0
virtual_router_id 80
priority 90
advert_int 1
authentication {
auth_type PASS
auth_pass 111111
}
virtual_ipaddress {
10.167.47.86
}
track_script {
}
}
EOF
cat > /etc/keepalived/keepalived.conf << EOF
global_defs {
router_id master3
}
vrrp_script check_haproxy {
script "/etc/keepalived/check_haproxy.sh"
interval 3000
}
vrrp_instance VI_1 {
state BACKUP
interface eth0
virtual_router_id 80
priority 80
advert_int 1
authentication {
auth_type PASS
auth_pass 111111
}
virtual_ipaddress {
10.167.47.86
}
track_script {
}
}
EOF
# 啟動keepalived
$ systemctl start keepalived.service
設定開機啟動
$ systemctl enable keepalived.service
# 檢視啟動狀態
$ systemctl status keepalived.service
ip a s eth0

3.安裝Docker

#安裝docker的yum源
yum install -y yum-utils device-mapper-persistent-data lvm2
yum-config-manager --add-repo http://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
#或者
https://files.cnblogs.com/files/chuanghongmeng/docker-ce.zip?t=1669080259
#安裝
yum install docker-ce-20.10.3 -y
mkdir -p /data/docker
mkdir -p /etc/docker/
#溫馨提示:由於新版kubelet建議使用systemd,所以可以把docker的CgroupDriver改成systemd
#如果/etc/docker 目錄不存在,啟動docker會自動建立。
cat > /etc/docker/daemon.json <<EOF
{
"exec-opts": ["native.cgroupdriver=systemd"],
"registry-mirrors": ["https://xxxxxxxx.mirror.aliyuncs.com"]
}
EOF
#溫馨提示:根據伺服器的情況,選擇docker的資料儲存路徑,例如:/data
vi /usr/lib/systemd/system/docker.service
ExecStart=/usr/bin/dockerd --graph=/data/docker
#過載配置檔案
systemctl daemon-reload
systemctl restart docker
systemctl enable docker.service

4.安裝k8s元件(all node)

#跟換k8s的yum源
cat <<EOF>> /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/
enabled=1
gpgcheck=0
repo_gpgcheck=0
gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF
#更換阿里源
sed -i -e '/mirrors.cloud.aliyuncs.com/d' -e '/mirrors.aliyuncs.com/d' /etc/yum.repos.d/CentOS-Base.repo
#檢視可以安裝的版本
yum list kubeadm.x86_64 --showduplicates | sort -r
#解除安裝舊版本
yum remove -y kubelet kubeadm kubectl
#安裝
yum install kubeadm kubelet kubectl -y
#開機啟動
systemctl enable kubelet && systemctl start kubelet

5.k8s初始化配置 (master1)

mkdir /usr/local/kubernetes/manifests -p
cd /usr/local/kubernetes/manifests/
#cri 在 disabled_plugins 列表中,將它去除,然後儲存檔案並重新載入 所有節點執行,否則無法啟動kubelet
rm /etc/containerd/config.toml
containerd config default > /etc/containerd/config.toml
#建議修改disabled_plugins 裡面的cri刪掉
crictl config runtime-endpoint /run/containerd/containerd.sock
vi /etc/crictl.yaml
# 編輯/etc/crictl.yaml檔案, 修改, 主要是新版本增加的image-endpoint
runtime-endpoint: "unix:///run/containerd/containerd.sock"
image-endpoint: "unix:///run/containerd/containerd.sock" #與上邊runtime-endpoint一致即可
timeout: 10
debug: false
pull-image-on-create: false
disable-pull-on-run: false
systemctl restart containerd
#初始化叢集
kubeadm reset
#kubeadm init --image-repository registry.aliyuncs.com/google_containers --kubernetes-version v1.28.2 --service-cidr=10.11.0.0/16 --pod-network-cidr=10.10.0.0/16
#生成初始化配置檔案
vi kubeadm-config.yaml
apiVersion: kubeadm.k8s.io/v1beta3
bootstrapTokens:
- groups:
- system:bootstrappers:kubeadm:default-node-token
token: abcdef.0123456789abcdef
ttl: 24h0m0s
usages:
- signing
- authentication
kind: InitConfiguration
localAPIEndpoint:
advertiseAddress: 10.167.47.12
bindPort: 6443
nodeRegistration:
criSocket: /run/containerd/containerd.sock
#自己的主機名
name: master1
---
apiServer:
#新增高可用配置
extraArgs:
authorization-mode: "Node,RBAC"
#填寫所有kube-apiserver節點的hostname、IP、VIP
certSANs:
- master1
- master2
- master3
- 10.167.47.12
- 10.167.47.24
- 10.167.47.25
- 10.167.47.86
timeoutForControlPlane: 4m0s
apiVersion: kubeadm.k8s.io/v1beta3
certificatesDir: /etc/kubernetes/pki
clusterName: kubernetes
controllerManager: {}
dns:
type: CoreDNS
etcd:
local:
dataDir: /var/lib/etcd
#跟換映象源
imageRepository: registry.aliyuncs.com/google_containers
kind: ClusterConfiguration
kubernetesVersion: 1.28.2
#虛擬IP和埠
controlPlaneEndpoint: "10.167.47.86:16443"
networking:
dnsDomain: cluster.local
podSubnet: 10.10.0.0/16
serviceSubnet: 10.11.0.0/16
scheduler: {}
---
apiVersion: kubeproxy.config.k8s.io/v1alpha1
kind: KubeProxyConfiguration
mode: ipvs
---
apiVersion: kubelet.config.k8s.io/v1beta1
kind: KubeletConfiguration
cgroupDriver: systemd
#初始化叢集
#所有節點執行,否則會因為拉不下映象錯誤
ctr -n k8s.io images pull -k registry.aliyuncs.com/google_containers/pause:3.6
ctr -n k8s.io images tag registry.aliyuncs.com/google_containers/pause:3.6 registry.k8s.io/pause:3.6
kubeadm init --config=kubeadm-config.yaml
#按照提示配置環境變數,使用kubectl工具:
mkdir -p $HOME/.kube
cp -i /etc/kubernetes/admin.conf $HOME/.kube/configco
chown $(id -u):$(id -g) $HOME/.kube/config
kubectl get nodes
kubectl get pods -n kube-system
#其他master節點加入
#先複製證書到其他master節點,不然加入報錯 master2和master3
mkdir -p /etc/kubernetes/pki
mkdir -p /etc/kubernetes/pki/etcd/
scp -rp /etc/kubernetes/pki/ca.* master2:/etc/kubernetes/pki
scp -rp /etc/kubernetes/pki/sa.* master2:/etc/kubernetes/pki
scp -rp /etc/kubernetes/pki/front-proxy-ca.* master2:/etc/kubernetes/pki
scp -rp /etc/kubernetes/pki/etcd/ca.* master2:/etc/kubernetes/pki/etcd
scp -rp /etc/kubernetes/admin.conf master2:/etc/kubernetes/
scp -rp /etc/kubernetes/pki/ca.* master3:/etc/kubernetes/pki
scp -rp /etc/kubernetes/pki/sa.* master3:/etc/kubernetes/pki
scp -rp /etc/kubernetes/pki/front-proxy-ca.* master3:/etc/kubernetes/pki
scp -rp /etc/kubernetes/pki/etcd/ca.* master3:/etc/kubernetes/pki/etcd
scp -rp /etc/kubernetes/admin.conf master3:/etc/kubernetes/

6.安裝kube-ovn(master1安裝)

#需要首先設定container映象加速地址否則無法下載ovn映象 所有master節點執行
mkdir /etc/containerd/certs.d
# 我們是給docker來配置映象加速的,所以我們再建立一個docker.io的目錄
#修改config.toml
vi /etc/containerd/config.toml
# 追加內容
[plugins."io.containerd.grpc.v1.cri".registry]
config_path = "/etc/containerd/certs.d"
mkdir -p certs.d && cd certs.d/
# docker hub映象加速
mkdir -p /etc/containerd/certs.d/docker.io
cat > /etc/containerd/certs.d/docker.io/hosts.toml << EOF
server = "https://docker.io"
[host."https://dockerproxy.com"]
capabilities = ["pull", "resolve"]
skip_verify = true
[host."https://docker.m.daocloud.io"]
capabilities = ["pull", "resolve"]
skip_verify = true
[host."https://reg-mirror.qiniu.com"]
capabilities = ["pull", "resolve"]
skip_verify = true
[host."https://registry.docker-cn.com"]
capabilities = ["pull", "resolve"]
skip_verify = true
[host."http://hub-mirror.c.163.com"]
capabilities = ["pull", "resolve"]
skip_verify = true
EOF
# registry.k8s.io映象加速
mkdir -p /etc/containerd/certs.d/registry.k8s.io
tee /etc/containerd/certs.d/registry.k8s.io/hosts.toml << 'EOF'
server = "https://registry.k8s.io"
[host."https://k8s.m.daocloud.io"]
capabilities = ["pull", "resolve", "push"]
skip_verify = true
EOF
# docker.elastic.co映象加速
mkdir -p /etc/containerd/certs.d/docker.elastic.co
tee /etc/containerd/certs.d/docker.elastic.co/hosts.toml << 'EOF'
server = "https://docker.elastic.co"
[host."https://elastic.m.daocloud.io"]
capabilities = ["pull", "resolve", "push"]
skip_verify = true
EOF
# gcr.io映象加速
mkdir -p /etc/containerd/certs.d/gcr.io
tee /etc/containerd/certs.d/gcr.io/hosts.toml << 'EOF'
server = "https://gcr.io"
[host."https://gcr.m.daocloud.io"]
capabilities = ["pull", "resolve", "push"]
skip_verify = true
EOF
# ghcr.io映象加速
mkdir -p /etc/containerd/certs.d/ghcr.io
tee /etc/containerd/certs.d/ghcr.io/hosts.toml << 'EOF'
server = "https://ghcr.io"
[host."https://ghcr.m.daocloud.io"]
capabilities = ["pull", "resolve", "push"]
skip_verify = true
EOF
# k8s.gcr.io映象加速
mkdir -p /etc/containerd/certs.d/k8s.gcr.io
tee /etc/containerd/certs.d/k8s.gcr.io/hosts.toml << 'EOF'
server = "https://k8s.gcr.io"
[host."https://k8s-gcr.m.daocloud.io"]
capabilities = ["pull", "resolve", "push"]
skip_verify = true
EOF
# mcr.m.daocloud.io映象加速
mkdir -p /etc/containerd/certs.d/mcr.microsoft.com
tee /etc/containerd/certs.d/mcr.microsoft.com/hosts.toml << 'EOF'
server = "https://mcr.microsoft.com"
[host."https://mcr.m.daocloud.io"]
capabilities = ["pull", "resolve", "push"]
skip_verify = true
EOF
# nvcr.io映象加速
mkdir -p /etc/containerd/certs.d/nvcr.io
tee /etc/containerd/certs.d/nvcr.io/hosts.toml << 'EOF'
server = "https://nvcr.io"
[host."https://nvcr.m.daocloud.io"]
capabilities = ["pull", "resolve", "push"]
skip_verify = true
EOF
# quay.io映象加速
mkdir -p /etc/containerd/certs.d/quay.io
tee /etc/containerd/certs.d/quay.io/hosts.toml << 'EOF'
server = "https://quay.io"
[host."https://quay.m.daocloud.io"]
capabilities = ["pull", "resolve", "push"]
skip_verify = true
EOF
# registry.jujucharms.com映象加速
mkdir -p /etc/containerd/certs.d/registry.jujucharms.com
tee /etc/containerd/certs.d/registry.jujucharms.com/hosts.toml << 'EOF'
server = "https://registry.jujucharms.com"
[host."https://jujucharms.m.daocloud.io"]
capabilities = ["pull", "resolve", "push"]
skip_verify = true
EOF
# rocks.canonical.com映象加速
mkdir -p /etc/containerd/certs.d/rocks.canonical.com
tee /etc/containerd/certs.d/rocks.canonical.com/hosts.toml << 'EOF'
server = "https://rocks.canonical.com"
[host."https://rocks-canonical.m.daocloud.io"]
capabilities = ["pull", "resolve", "push"]
skip_verify = true
EOF
systemctl restart containerd
#驗證
ctr i pull --hosts-dir=/etc/containerd/certs.d registry.k8s.io/sig-storage/csi-provisioner:v3.5.0
ctr --debug=true i pull --hosts-dir=/etc/containerd/certs.d registry.k8s.io/sig-storage/csi-provisioner:v3.5.0
#使用crictl 命令拉取
crictl --debug=true pull docker.io/library/ubuntu:20.04
crictl images
# 下載自動化安裝指令碼可能需要FQ `wget https://raw.githubusercontent.com/kubeovn/kube-ovn/release-1.10/dist/images/install.sh`
# 修改`install.sh`配置引數
#清理指令碼https://raw.githubusercontent.com/alauda/kube-ovn/master/dist/images/cleanup.sh
REGISTRY="kubeovn" # 映象倉庫地址
VERSION="v1.10.10" # 映象版本/Tag
POD_CIDR="10.10.0.0/16" # 預設子網 CIDR 不要和 SVC/NODE/JOIN CIDR 重疊
SVC_CIDR="10.11.0.0/16" # 需要和 apiserver 的 service-cluster-ip-range 保持一致
JOIN_CIDR="100.12.0.0/16" # Pod 和主機通訊網路 CIDR,不要和 SVC/NODE/POD CIDR 重疊
LABEL="node-role.kubernetes.io/control-plane" # 部署 OVN DB 節點的標籤
IFACE="" # 容器網路所使用的的宿主機網路卡名,如果為空則使用 Kubernetes 中的 Node IP 所在網路卡
TUNNEL_TYPE="geneve" # 隧道封裝協議,可選 geneve, vxlan 或 stt,stt 需要單獨編譯 ovs 核心模組
# 執行`bash install.sh`安裝

7.將master節點同時置為node

預設情況下Kubernetes Control Plane Master Node被設定為不能部署pod的,因為Control Plane節點被預設設定了以下NoSchedule標籤

需要去掉NoSchedule標籤即可解決問題,如下操作 (以Master節點為例,其它Control Plane節點同樣操作):

kubectl taint node master1 node-role.kubernetes.io/control-plane:NoSchedule-node/master1 untainted
#檢視結果
kubectl describe node master1 | grep Taint
#可以用以下指令碼同時去掉三個節點的標籤
for node in $(kubectl get nodes --selector='node-role.kubernetes.io/control-plane' | awk 'NR>1 {print $1}' ) ; do kubectl taint node $node node-role.kubernetes.io/control-plane- ; done

注意,以上是為了測試才將 Kubernetes Control Plane Master Node承擔了Worker Node的角色,一般不建議如此操作,因為Control Plane Master Node是關鍵元件,負責管理整個叢集,包括排程叢集任務和工作量,監測節點和容器執行狀態等等,讓Control Plane Master Node承擔Worker Node功能會有負面作用,例如消耗了資源,導致時間延遲,以及系統不穩定。 最後,也有安全風險。

8.安裝dashboard(master1)

1.安裝helm

wget https://get.helm.sh/helm-v3.2.3-linux-amd64.tar.gz
tar -zxvf helm-v3.2.3-linux-amd64.tar.gz
cd linux-amd64
cp helm /usr/local/bin/
helm version

2.安裝dashboard

# 新增 kubernetes-dashboard 倉庫
helm repo add kubernetes-dashboard https://kubernetes.github.io/dashboard/
# 使用 kubernetes-dashboard Chart 部署名為 `kubernetes-dashboard` 的 Helm Release
helm upgrade --install kubernetes-dashboard kubernetes-dashboard/kubernetes-dashboard --create-namespace --namespace kubernetes-dashboard

相關文章