folio大概是5.16引入的,看起來像是page的封裝,這裡有一篇講解folio很好的部落格,論好名字的重要性: Linux核心page到folio的變遷-CSDN部落格
struct folio { /* private: don't document the anon union */ union { struct { /* public: */ unsigned long flags; union { struct list_head lru; /* private: avoid cluttering the output */ struct { void *__filler; /* public: */ unsigned int mlock_count; /* private: */ }; /* public: */ }; struct address_space *mapping; pgoff_t index; union { void *private; swp_entry_t swap; }; atomic_t _mapcount; atomic_t _refcount; #ifdef CONFIG_MEMCG unsigned long memcg_data; #endif #if defined(WANT_PAGE_VIRTUAL) void *virtual; #endif #ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS int _last_cpupid; #endif /* private: the union with struct page is transitional */ }; struct page page; }; union { struct { unsigned long _flags_1; unsigned long _head_1; unsigned long _folio_avail; /* public: */ atomic_t _entire_mapcount; atomic_t _nr_pages_mapped; atomic_t _pincount; #ifdef CONFIG_64BIT unsigned int _folio_nr_pages; #endif /* private: the union with struct page is transitional */ }; struct page __page_1; }; union { struct { unsigned long _flags_2; unsigned long _head_2; /* public: */ void *_hugetlb_subpool; void *_hugetlb_cgroup; void *_hugetlb_cgroup_rsvd; void *_hugetlb_hwpoison; /* private: the union with struct page is transitional */ }; struct { unsigned long _flags_2a; unsigned long _head_2a; /* public: */ struct list_head _deferred_list; /* private: the union with struct page is transitional */ }; struct page __page_2; }; };
簡單來看它似乎是三個page結構的組合。與第一個page union的結構跟page結構幾乎一致。引入folio是為了解決長久以來page混亂的語義。page除了可以代表單頁也可以代表連續多個頁面,甚至大頁。page在核心中應用廣泛,這種混亂增加了寫程式碼和理解程式碼的難度,人為的增加混亂。folio代表一個或多個page,本身就可以代表page所有的語義。在新的核心程式碼中folio在很多場合完成了page的替代,但是page依然存在。
比如compound_order的實現。在folio之前是這樣的。
static inline unsigned int compound_order(struct page *page) { if (!PageHead(page)) return 0; return page[1].compound_order; }
先檢查page是不是單頁,如果是單頁直接返回0,對於複合頁order儲存在後一個page的compound_order成員中。也即是單個page是表示不了多頁的,但是folio可以。
static inline unsigned int compound_order(struct page *page) { struct folio *folio = (struct folio *)page; if (!test_bit(PG_head, &folio->flags)) return 0; return folio->_flags_1 & 0xff; }
新的程式碼中首先將page強轉為folio,判斷其是否為複合頁,如果是複合頁order儲存在_flags_1中。看起來也沒簡化,反而更復雜一點,但是可以在一個folio結構中解決問題,不再依賴於tail page。
看一下page結構
struct page { unsigned long flags; /* Atomic flags, some possibly * updated asynchronously */ /* * Five words (20/40 bytes) are available in this union. * WARNING: bit 0 of the first word is used for PageTail(). That * means the other users of this union MUST NOT use the bit to * avoid collision and false-positive PageTail(). */ union { struct { /* Page cache and anonymous pages */ /** * @lru: Pageout list, eg. active_list protected by * lruvec->lru_lock. Sometimes used as a generic list * by the page owner. */ union { struct list_head lru; /* Or, for the Unevictable "LRU list" slot */ struct { /* Always even, to negate PageTail */ void *__filler; /* Count page's or folio's mlocks */ unsigned int mlock_count; }; /* Or, free page */ struct list_head buddy_list; struct list_head pcp_list; }; /* See page-flags.h for PAGE_MAPPING_FLAGS */ struct address_space *mapping; union { pgoff_t index; /* Our offset within mapping. */ unsigned long share; /* share count for fsdax */ }; /** * @private: Mapping-private opaque data. * Usually used for buffer_heads if PagePrivate. * Used for swp_entry_t if PageSwapCache. * Indicates order in the buddy system if PageBuddy. */ unsigned long private; }; struct { /* page_pool used by netstack */ /** * @pp_magic: magic value to avoid recycling non * page_pool allocated pages. */ unsigned long pp_magic; struct page_pool *pp; unsigned long _pp_mapping_pad; unsigned long dma_addr; atomic_long_t pp_ref_count; }; struct { /* Tail pages of compound page */ unsigned long compound_head; /* Bit zero is set */ }; struct { /* ZONE_DEVICE pages */ /** @pgmap: Points to the hosting device page map. */ struct dev_pagemap *pgmap; void *zone_device_data; /* * ZONE_DEVICE private pages are counted as being * mapped so the next 3 words hold the mapping, index, * and private fields from the source anonymous or * page cache page while the page is migrated to device * private memory. * ZONE_DEVICE MEMORY_DEVICE_FS_DAX pages also * use the mapping, index, and private fields when * pmem backed DAX files are mapped. */ }; /** @rcu_head: You can use this to free a page by RCU. */ struct rcu_head rcu_head; }; union { /* This union is 4 bytes in size. */ /* * If the page can be mapped to userspace, encodes the number * of times this page is referenced by a page table. */ atomic_t _mapcount; /* * If the page is neither PageSlab nor mappable to userspace, * the value stored here may help determine what this page * is used for. See page-flags.h for a list of page types * which are currently stored here. */ unsigned int page_type; }; /* Usage count. *DO NOT USE DIRECTLY*. See page_ref.h */ atomic_t _refcount; #ifdef CONFIG_MEMCG unsigned long memcg_data; #endif /* * On machines where all RAM is mapped into kernel address space, * we can simply calculate the virtual address. On machines with * highmem some memory is mapped into kernel virtual memory * dynamically, so we need a place to store that address. * Note that this field could be 16 bits on x86 ... ;) * * Architectures with slow multiplication can define * WANT_PAGE_VIRTUAL in asm/page.h */ #if defined(WANT_PAGE_VIRTUAL) void *virtual; /* Kernel virtual address (NULL if not kmapped, ie. highmem) */ #endif /* WANT_PAGE_VIRTUAL */ #ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS int _last_cpupid; #endif #ifdef CONFIG_KMSAN /* * KMSAN metadata for this page: * - shadow page: every bit indicates whether the corresponding * bit of the original page is initialized (0) or not (1); * - origin page: every 4 bytes contain an id of the stack trace * where the uninitialized value was created. */ struct page *kmsan_shadow; struct page *kmsan_origin; #endif } _struct_page_alignment;
1. flags
enum pageflags { PG_locked, /* Page is locked. Don't touch. */ PG_writeback, /* Page is under writeback */ PG_referenced, PG_uptodate, PG_dirty, PG_lru, PG_head, /* Must be in bit 6 */ PG_waiters, /* Page has waiters, check its waitqueue. Must be bit #7 and in the same byte as "PG_locked" */ PG_active, PG_workingset, PG_error, PG_slab, PG_owner_priv_1, /* Owner use. If pagecache, fs may use*/ PG_arch_1, PG_reserved, PG_private, /* If pagecache, has fs-private data */ PG_private_2, /* If pagecache, has fs aux data */ PG_mappedtodisk, /* Has blocks allocated on-disk */ PG_reclaim, /* To be reclaimed asap */ PG_swapbacked, /* Page is backed by RAM/swap */ PG_unevictable,
...
flags由四部分構成,|node|zone|last_cpuid|flags|
2. mapping
最低兩個bits可以用來判斷是否為匿名對映或ksm對映。對於匿名對映指向anon_vma. 對於file對映指向address_space結構。
#define PAGE_MAPPING_ANON 0x1 #define PAGE_MAPPING_MOVABLE 0x2 #define PAGE_MAPPING_KSM (PAGE_MAPPING_ANON | PAGE_MAPPING_MOVABLE) #define PAGE_MAPPING_FLAGS (PAGE_MAPPING_ANON | PAGE_MAPPING_MOVABLE)
3. _refcount
表示頁面在核心中的引用次數。大於0代表正在使用。
static inline void get_page(struct page *page) { folio_get(page_folio(page)); } static inline void folio_get(struct folio *folio) { VM_BUG_ON_FOLIO(folio_ref_zero_or_close_to_overflow(folio), folio); folio_ref_inc(folio); } static inline void folio_ref_inc(struct folio *folio) { page_ref_inc(&folio->page); } static inline void page_ref_inc(struct page *page) { atomic_inc(&page->_refcount); if (page_ref_tracepoint_active(page_ref_mod)) __page_ref_mod(page, 1); }
folio讓page操作變得非常繁瑣,這樣真的好嗎?
分配記憶體時_refcount + 1, 加入lru連結串列時+1等。
4. _mapcount
表示這個頁面被程序對映的次數,用做反向對映。-1代表沒有頁表對映。
page相關的API
static inline struct zone *page_zone(const struct page *page) { return &NODE_DATA(page_to_nid(page))->node_zones[page_zonenum(page)]; }
static inline int page_zone_id(struct page *page) { return (page->flags >> ZONEID_PGSHIFT) & ZONEID_MASK; }
mapping相關
struct address_space *page_mapping(struct page *page) { return folio_mapping(page_folio(page)); } struct address_space *folio_mapping(struct folio *folio) { struct address_space *mapping; /* This happens if someone calls flush_dcache_page on slab page */ if (unlikely(folio_test_slab(folio))) return NULL; if (unlikely(folio_test_swapcache(folio))) return swap_address_space(folio->swap); mapping = folio->mapping;
//如果是匿名頁或ksm頁 if ((unsigned long)mapping & PAGE_MAPPING_FLAGS) return NULL; return mapping; }
page_mapped
static inline bool page_mapped(struct page *page) { if (likely(!PageCompound(page))) return atomic_read(&page->_mapcount) >= 0; return folio_large_is_mapped(page_folio(page)); }
對於普通頁面只需判斷_mapcount值。