自動化kolla-ansible部署openstack+GPU透傳方法

星空之源發表於2021-04-23

自動化kolla-ansible部署openstack+GPU透傳方法

歡迎加QQ群:1026880196 進行交流學習

 

1. CentOS7.x-8.x系列為虛擬機器配置GPU直通

1. 編輯檔案vim  /etc/modules, 新增以下內容:
pci_stub
vfio
vfio_iommu_type1
vfio_pci
kvm
kvm_intel


2. 在KVM主機上啟用IOMMU 

#對於Intel晶片:
GRUB_CMDLINE_LINUX_DEFAULT="intel_iommu=on"

#對於AMD晶片:
GRUB_CMDLINE_LINUX_DEFAULT="iommu=pt iommu=1"


vim /etc/default/grub


GRUB_TIMEOUT=5
GRUB_DISTRIBUTOR="$(sed 's, release .*$,,g' /etc/system-release)"
GRUB_DEFAULT=saved
GRUB_DISABLE_SUBMENU=true
GRUB_TERMINAL_OUTPUT="console"
GRUB_CMDLINE_LINUX="crashkernel=auto rhgb quiet intel_iommu=on"
GRUB_DISABLE_RECOVERY="true"


   3.  重新生成grub
   EFI
   grub2-mkconfig -o /boot/efi/EFI/centos/grub.cfg


   非EFI
   grub2-mkconfig -o /boot/grub2/grub.cfg

4.  將下列內容加入到blacklist中以避免被宿主機佔用,編輯檔案
vim  /etc/modprobe.d/blacklist.conf
blacklist snd_hda_intel
blacklist amd76x_edac
blacklist vga16fb
blacklist nouveau
blacklist rivafb
blacklist nvidiafb
blacklist rivatv
blacklist nvidia


5.  查詢顯示卡的Product ID 以及 Vendor ID:
yum install pciutils -y
lspci -nn | grep NVIDIA

如下:
[root@stein-a ~]# 
03:00.0 VGA compatible controller [0300]: NVIDIA Corporation GP104GL [Quadro P4000] [10de:1bb1] (rev a1)
03:00.1 Audio device [0403]: NVIDIA Corporation GP104 High Definition Audio Controller [10de:10f0] (rev a1)


6.  編輯
vim /etc/modprobe.d/vfio.conf

# create new: for [ids=***], specify [vendor-ID:device-ID]
options vfio-pci ids=10de:1bb1,10de:10f0


7.  寫入到系統啟動項
echo 'vfio-pci' > /etc/modules-load.d/vfio-pci.conf 


8.  重新生成initramfs
mv /boot/initramfs-$(uname -r).img /boot/initramfs-$(uname -r).img.bak
dracut -v /boot/initramfs-$(uname -r).img $(uname -r)


9.  重啟系統
reboot


10. 驗證
lspci -nnk -d 10de:1bb1
dmesg | grep -i vfio

[root@stein-a ~]# lspci -nnk -d 10de:1bb1
03:00.0 VGA compatible controller [0300]: NVIDIA Corporation GP104GL [Quadro P4000] [10de:1bb1] (rev a1)
        Subsystem: NVIDIA Corporation Device [10de:11a3]
        Kernel driver in use: vfio-pci
        Kernel modules: nouveau
[root@stein-a ~]# dmesg | grep -i vfio
[    2.503115] VFIO - User Level meta-driver version: 0.3
[    2.515645] vfio_pci: add [10de:1bb1[ffff:ffff]] class 0x000000/00000000
[    2.515752] vfio_pci: add [10de:10f0[ffff:ffff]] class 0x000000/00000000
[root@stein-a ~]# 

 

2. Ubuntu18.04系列為虛擬機器配置GPU直通

1. 編輯檔案vim  /etc/modules, 新增以下內容:
pci_stub
vfio
vfio_iommu_type1
vfio_pci
kvm
kvm_intel


2. 在KVM主機上啟用IOMMU 

#對於Intel晶片:
GRUB_CMDLINE_LINUX_DEFAULT="quiet splash intel_iommu=on"

#對於AMD晶片:
GRUB_CMDLINE_LINUX_DEFAULT="iommu=pt iommu=1"


vim /etc/default/grub


GRUB_DEFAULT=0
GRUB_TIMEOUT_STYLE=hidden
GRUB_TIMEOUT=0
GRUB_DISTRIBUTOR=`lsb_release -i -s 2> /dev/null || echo Debian`
GRUB_CMDLINE_LINUX_DEFAULT="quiet splash intel_iommu=on"
GRUB_CMDLINE_LINUX=""


   3.  重新生成grub
   EFI
   grub2-mkconfig -o /boot/efi/EFI/centos/grub.cfg


   非EFI 
   grub2-mkconfig -o /boot/grub2/grub.cfg

4.  將下列內容加入到blacklist中以避免被宿主機佔用,編輯檔案
vim  /etc/modprobe.d/blacklist.conf
blacklist snd_hda_intel
blacklist amd76x_edac
blacklist vga16fb
blacklist nouveau
blacklist rivafb
blacklist nvidiafb
blacklist rivatv
blacklist nvidia


5.  查詢顯示卡的Product ID 以及 Vendor ID:
apt install pciutils -y
lspci -nn | grep NVIDIA

如下:
[root@stein-a ~]# lspci -nn | grep NVIDIA
03:00.0 VGA compatible controller [0300]: NVIDIA Corporation GP104GL [Quadro P4000] [10de:1bb1] (rev a1)
03:00.1 Audio device [0403]: NVIDIA Corporation GP104 High Definition Audio Controller [10de:10f0] (rev a1)


6.  編輯
vim /etc/modprobe.d/vfio.conf

# create new: for [ids=***], specify [vendor-ID:device-ID]
options vfio-pci ids=10de:1bb1,10de:10f0


7.  寫入到系統啟動項
echo 'vfio-pci' > /etc/modules-load.d/vfio-pci.conf 


8.  重新生成initramfs
dracut -v /boot/initramfs-$(uname -r).img $(uname -r)


9.  重啟系統
reboot


10. 驗證
lspci -nnk -d 10de:1bb1
dmesg | grep -i vfio

root@kvm:~# lspci -nnk -d 10de:1bb1
dmesg | grep -i vfio
03:00.0 VGA compatible controller [0300]: NVIDIA Corporation GP104GL [Quadro P4000] [10de:1bb1] (rev a1)
        Subsystem: NVIDIA Corporation GP104GL [Quadro P4000] [10de:11a3]
        Kernel driver in use: vfio-pci
        Kernel modules: nvidiafb, nouveau
root@kvm:~# dmesg | grep -i vfio
[    3.838714] VFIO - User Level meta-driver version: 0.3
[    3.846238] vfio-pci 0000:03:00.0: vgaarb: changed VGA decodes: olddecodes=io+mem,decodes=io+mem:owns=none
[    3.866370] vfio_pci: add [10de:1bb1[ffffffff:ffffffff]] class 0x000000/00000000
[    3.886375] vfio_pci: add [10de:10f0[ffffffff:ffffffff]] class 0x000000/00000000

 

#如果你單機部署的,在單機下配置。
#如果你是高可用部署的,在三臺控制節點配置

1. 新增pci
vim /etc/kolla/config/nova/nova-compute.conf

[libvirt]
inject_password=true
cpu_mode=host-passthrough
virt_type = kvm

[pci]
passthrough_whitelist: { "vendor_id": "10de", "product_id": "1bb1" }


2. 修改nova.conf
vim /etc/kolla/config/nova.conf
[DEFAULT]
service_down_time = 120
cpu_allocation_ratio = 4.0   
disk_allocation_ratio=1.0
ram_allocation_ratio = 1.0   
reserved_host_disk_mb = 4096 
reserved_host_memory_mb = 4096 
allow_resize_to_same_host = True    
remove_unused_base_images = False
image_cache_manager_interval = 0
resume_guests_state_on_host_boot = True


[PCI]
alias: { "vendor_id":"10de", "product_id":"1bb1", "device_type":"type-PCI", "name":"quadro-p4000" }

[filter_scheduler]
enabled_filters = RetryFilter, AvailabilityZoneFilter, ComputeFilter, ComputeCapabilitiesFilter, ImagePropertiesFilter, ServerGroupAntiAffinityFilter, ServerGroupAffinityFilter, PciPassthroughFilter
available_filters = nova.scheduler.filters.all_filters



3. GPU 型別例項建立  
openstack flavor create --vcpus 4 --ram 8192 --disk 30 --property "pci_passthrough:alias"="quadro-p4000:1" g1.4c.8m.p4000

 

3. CentOS7.x系列 安裝顯示卡驅動

1.  檢視是否含有英偉達顯示卡
lspci | grep -i NVIDIA

#下面說明有1塊英偉達的顯示卡
[root@train-all ~]#  lspci | grep -i NVIDIA
04:00.0 VGA compatible controller: NVIDIA Corporation GP104GL [Quadro P4000] (rev a1)
04:00.1 Audio device: NVIDIA Corporation GP104 High Definition Audio Controller (rev a1)
[root@train-all ~]# 


2.  新增ELRepo源
rpm --import https://www.elrepo.org/RPM-GPG-KEY-elrepo.org 


3.  安裝ELRepo
rpm -Uvh http://www.elrepo.org/elrepo-release-7.0-2.el7.elrepo.noarch.rpm


4.  安裝nvidia-detect
yum install nvidia-detect -y


5.  執行nvidia-detect
nvidia-detect -v


6.  查詢驅動程式
yum search kmod-nvidia


7.  安裝驅動程式
yum install kmod-nvidia.x86_64 -y


8.  檢視禁用Nouveau
lsmod | grep nouveau 
#若沒有輸出 則說明禁用成功,否則執行下面的命令


9.  在/etc/modprobe.d/blacklist-nouveau.conf中建立一個檔案,其內容如下:
vi /etc/modprobe.d/blacklist-nouveau.conf

新增
blacklist nouveau
options nouveau modeset=0


10. 重新生成核心initramfs
dracut --force


11.  重啟系統
reboot


12.  測試
nvidia-smi

 

相關文章