EXT4檔案系統學習(11)VFS之記憶體結構sb和inode
前面2篇介紹了ext4磁碟上的佈局,在使用過程中,核心需要頻繁的訪問某些資料結構,所以會把磁碟上面資料讀出裝在記憶體中相應的資料結構。
超級塊
ext4在記憶體中的超級塊結構定義如下:
/*
* fourth extended-fs super-block data in memory
*/
struct ext4_sb_info {
unsigned long s_desc_size; /* Size of a group descriptor in bytes */
unsigned long s_inodes_per_block;/* Number of inodes per block */
unsigned long s_blocks_per_group;/* Number of blocks in a group */
unsigned long s_clusters_per_group; /* Number of clusters in a group */
unsigned long s_inodes_per_group;/* Number of inodes in a group */
unsigned long s_itb_per_group; /* Number of inode table blocks per group */
unsigned long s_gdb_count; /* Number of group descriptor blocks */
unsigned long s_desc_per_block; /* Number of group descriptors per block */
ext4_group_t s_groups_count; /* Number of groups in the fs */
ext4_group_t s_blockfile_groups;/* Groups acceptable for non-extent files */
unsigned long s_overhead; /* # of fs overhead clusters */
unsigned int s_cluster_ratio; /* Number of blocks per cluster */
unsigned int s_cluster_bits; /* log2 of s_cluster_ratio */
loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */
struct buffer_head * s_sbh; /* Buffer containing the super block */
struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */
struct buffer_head **s_group_desc;
unsigned int s_mount_opt;
unsigned int s_mount_opt2;
unsigned int s_mount_flags;
unsigned int s_def_mount_opt;
ext4_fsblk_t s_sb_block;
atomic64_t s_resv_clusters;
kuid_t s_resuid;
kgid_t s_resgid;
unsigned short s_mount_state;
unsigned short s_pad;
int s_addr_per_block_bits;
int s_desc_per_block_bits;
int s_inode_size;
int s_first_ino;
unsigned int s_inode_readahead_blks;
unsigned int s_inode_goal;
spinlock_t s_next_gen_lock;
u32 s_next_generation;
u32 s_hash_seed[4];
int s_def_hash_version;
int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */
struct percpu_counter s_freeclusters_counter;
struct percpu_counter s_freeinodes_counter;
struct percpu_counter s_dirs_counter;
struct percpu_counter s_dirtyclusters_counter;
struct blockgroup_lock *s_blockgroup_lock;
struct proc_dir_entry *s_proc;
struct kobject s_kobj;
struct completion s_kobj_unregister;
struct super_block *s_sb;
/* Journaling */
struct journal_s *s_journal;
struct list_head s_orphan;
struct mutex s_orphan_lock;
unsigned long s_resize_flags; /* Flags indicating if there
is a resizer */
unsigned long s_commit_interval;
u32 s_max_batch_time;
u32 s_min_batch_time;
struct block_device *journal_bdev;
#ifdef CONFIG_QUOTA
char *s_qf_names[EXT4_MAXQUOTAS]; /* Names of quota files with journalled quota */
int s_jquota_fmt; /* Format of quota to use */
#endif
unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
struct rb_root system_blks;
#ifdef EXTENTS_STATS
/* ext4 extents stats */
unsigned long s_ext_min;
unsigned long s_ext_max;
unsigned long s_depth_max;
spinlock_t s_ext_stats_lock;
unsigned long s_ext_blocks;
unsigned long s_ext_extents;
#endif
/* for buddy allocator */
struct ext4_group_info ***s_group_info;
struct inode *s_buddy_cache;
spinlock_t s_md_lock;
unsigned short *s_mb_offsets;
unsigned int *s_mb_maxs;
unsigned int s_group_info_size;
/* tunables */
unsigned long s_stripe;
unsigned int s_mb_stream_request;
unsigned int s_mb_max_to_scan;
unsigned int s_mb_min_to_scan;
unsigned int s_mb_stats;
unsigned int s_mb_order2_reqs;
unsigned int s_mb_group_prealloc;
unsigned int s_max_dir_size_kb;
/* where last allocation was done - for stream allocation */
unsigned long s_mb_last_group;
unsigned long s_mb_last_start;
/* stats for buddy allocator */
atomic_t s_bal_reqs; /* number of reqs with len > 1 */
atomic_t s_bal_success; /* we found long enough chunks */
atomic_t s_bal_allocated; /* in blocks */
atomic_t s_bal_ex_scanned; /* total extents scanned */
atomic_t s_bal_goals; /* goal hits */
atomic_t s_bal_breaks; /* too long searches */
atomic_t s_bal_2orders; /* 2^order hits */
spinlock_t s_bal_lock;
unsigned long s_mb_buddies_generated;
unsigned long long s_mb_generation_time;
atomic_t s_mb_lost_chunks;
atomic_t s_mb_preallocated;
atomic_t s_mb_discarded;
atomic_t s_lock_busy;
/* locality groups */
struct ext4_locality_group __percpu *s_locality_groups;
/* for write statistics */
unsigned long s_sectors_written_start;
u64 s_kbytes_written;
/* the size of zero-out chunk */
unsigned int s_extent_max_zeroout_kb;
unsigned int s_log_groups_per_flex;
struct flex_groups *s_flex_groups;
ext4_group_t s_flex_groups_allocated;
/* workqueue for reserved extent conversions (buffered io) */
struct workqueue_struct *rsv_conversion_wq;
/* timer for periodic error stats printing */
struct timer_list s_err_report;
/* Lazy inode table initialization info */
struct ext4_li_request *s_li_request;
/* Wait multiplier for lazy initialization thread */
unsigned int s_li_wait_mult;
/* Kernel thread for multiple mount protection */
struct task_struct *s_mmp_tsk;
/* record the last minlen when FITRIM is called. */
atomic_t s_last_trim_minblks;
/* Reference to checksum algorithm driver via cryptoapi */
struct crypto_shash *s_chksum_driver;
/* Precomputed FS UUID checksum for seeding other checksums */
__u32 s_csum_seed;
/* Reclaim extents from extent status tree */
struct shrinker s_es_shrinker;
struct list_head s_es_list; /* List of inodes with reclaimable extents */
long s_es_nr_inode;
struct ext4_es_stats s_es_stats;
struct mb_cache *s_mb_cache;
spinlock_t s_es_lock ____cacheline_aligned_in_smp;
/* Ratelimit ext4 messages. */
struct ratelimit_state s_err_ratelimit_state;
struct ratelimit_state s_warning_ratelimit_state;
struct ratelimit_state s_msg_ratelimit_state;
#ifdef CONFIG_EXT4_FS_ENCRYPTION
/* Encryption */
uint32_t s_file_encryption_mode;
uint32_t s_dir_encryption_mode;
#endif
};
記憶體中的超級塊結構與磁碟中的超級塊結構大體一致,在驅動初始化時,核心會把磁碟上面的ext4檔案系統資料讀出裝入到記憶體中的磁碟資料結構體中,由於核心頻繁使用這些結構資料,所以這些資料是常駐記憶體的。
結構體成員s_sbh指向磁碟超級塊緩衝區頭部;
結構體成員s_group_desc指向磁碟組描述符緩衝區頭部;
結構體成員s_es指向磁碟超級塊結構的記憶體首地址;
ext4_sb_info的建立是在ext4_fill_super函式中完成的,程式碼如下:
struct ext4_sb_info {
struct buffer_head * s_sbh; /* Buffer containing the super block */
struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */
struct buffer_head **s_group_desc;
};
static int ext4_fill_super(struct super_block *sb, void *data, int silent)
{
struct ext4_sb_info *sbi;記憶體超級塊
struct buffer_head *bh;磁碟超級塊邏輯資料
struct ext4_super_block *es = NULL;磁碟超級塊
bh = sb_bread_unmovable(sb, logical_sb_block)讀出磁碟超級塊資料到緩衝區
es = (struct ext4_super_block *) (bh->b_data + offset);資料裝載到磁碟超級塊記憶體資料結構
sbi->s_sbh = bh;VFS與記憶體和磁碟超級塊的聯絡
sbi->s_es = es;
sb->s_fs_info = sbi;
sbi->s_sb = sb;
sbi->s_group_desc = ext4_kvmalloc(db_count *
sizeof(struct buffer_head *),
GFP_KERNEL);
sbi->s_group_desc[i] = sb_bread_unmovable(sb, block);讀出組描述符資料
inode
ext4記憶體中inode資料結構如下:
/*
* fourth extended file system inode data in memory
*/
struct ext4_inode_info {
__le32 i_data[15]; /* unconverted */
__u32 i_dtime;
ext4_fsblk_t i_file_acl;
/*
* i_block_group is the number of the block group which contains
* this file's inode. Constant across the lifetime of the inode,
* it is ued for making block allocation decisions - we try to
* place a file's data blocks near its inode block, and new inodes
* near to their parent directory's inode.
*/
ext4_group_t i_block_group;
ext4_lblk_t i_dir_start_lookup;
#if (BITS_PER_LONG < 64)
unsigned long i_state_flags; /* Dynamic state flags */
#endif
unsigned long i_flags;
/*
* Extended attributes can be read independently of the main file
* data. Taking i_mutex even when reading would cause contention
* between readers of EAs and writers of regular file data, so
* instead we synchronize on xattr_sem when reading or changing
* EAs.
*/
struct rw_semaphore xattr_sem;
struct list_head i_orphan; /* unlinked but open inodes */
/*
* i_disksize keeps track of what the inode size is ON DISK, not
* in memory. During truncate, i_size is set to the new size by
* the VFS prior to calling ext4_truncate(), but the filesystem won't
* set i_disksize to 0 until the truncate is actually under way.
*
* The intent is that i_disksize always represents the blocks which
* are used by this file. This allows recovery to restart truncate
* on orphans if we crash during truncate. We actually write i_disksize
* into the on-disk inode when writing inodes out, instead of i_size.
*
* The only time when i_disksize and i_size may be different is when
* a truncate is in progress. The only things which change i_disksize
* are ext4_get_block (growth) and ext4_truncate (shrinkth).
*/
loff_t i_disksize;
/*
* i_data_sem is for serialising ext4_truncate() against
* ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's
* data tree are chopped off during truncate. We can't do that in
* ext4 because whenever we perform intermediate commits during
* truncate, the inode and all the metadata blocks *must* be in a
* consistent state which allows truncation of the orphans to restart
* during recovery. Hence we must fix the get_block-vs-truncate race
* by other means, so we have i_data_sem.
*/
struct rw_semaphore i_data_sem;
/*
* i_mmap_sem is for serializing page faults with truncate / punch hole
* operations. We have to make sure that new page cannot be faulted in
* a section of the inode that is being punched. We cannot easily use
* i_data_sem for this since we need protection for the whole punch
* operation and i_data_sem ranks below transaction start so we have
* to occasionally drop it.
*/
struct rw_semaphore i_mmap_sem;
struct inode vfs_inode;
struct jbd2_inode *jinode;
spinlock_t i_raw_lock; /* protects updates to the raw inode */
/*
* File creation time. Its function is same as that of
* struct timespec i_{a,c,m}time in the generic inode.
*/
struct timespec i_crtime;
/* mballoc */
struct list_head i_prealloc_list;
spinlock_t i_prealloc_lock;
/* extents status tree */
struct ext4_es_tree i_es_tree;
rwlock_t i_es_lock;
struct list_head i_es_list;
unsigned int i_es_all_nr; /* protected by i_es_lock */
unsigned int i_es_shk_nr; /* protected by i_es_lock */
ext4_lblk_t i_es_shrink_lblk; /* Offset where we start searching for
extents to shrink. Protected by
i_es_lock */
/* ialloc */
ext4_group_t i_last_alloc_group;
/* allocation reservation info for delalloc */
/* In case of bigalloc, these refer to clusters rather than blocks */
unsigned int i_reserved_data_blocks;
unsigned int i_reserved_meta_blocks;
unsigned int i_allocated_meta_blocks;
ext4_lblk_t i_da_metadata_calc_last_lblock;
int i_da_metadata_calc_len;
/* on-disk additional length */
__u16 i_extra_isize;
char i_crypt_policy_flags;
/* Indicate the inline data space. */
u16 i_inline_off;
u16 i_inline_size;
#ifdef CONFIG_QUOTA
/* quota space reservation, managed internally by quota code */
qsize_t i_reserved_quota;
#endif
/* Lock protecting lists below */
spinlock_t i_completed_io_lock;
/*
* Completed IOs that need unwritten extents handling and have
* transaction reserved
*/
struct list_head i_rsv_conversion_list;
/*
* Completed IOs that need unwritten extents handling and don't have
* transaction reserved
*/
atomic_t i_ioend_count; /* Number of outstanding io_end structs */
atomic_t i_unwritten; /* Nr. of inflight conversions pending */
struct work_struct i_rsv_conversion_work;
spinlock_t i_block_reservation_lock;
/*
* Transactions that contain inode's metadata needed to complete
* fsync and fdatasync, respectively.
*/
tid_t i_sync_tid;
tid_t i_datasync_tid;
#ifdef CONFIG_QUOTA
struct dquot *i_dquot[MAXQUOTAS];
#endif
/* Precomputed uuid+inum+igen checksum for seeding inode checksums */
__u32 i_csum_seed;
#ifdef CONFIG_EXT4_FS_ENCRYPTION
/* Encryption params */
struct ext4_encryption_key i_encryption_key;
#endif
};
結構體成員與磁碟上類似,具體怎麼用還不知。
磁碟上的bmap和imap也會讀取到記憶體中,用什麼資料結構儲存的呢?什麼時候讀的?
struct inode vfs_inode;這個vfs_inode是虛擬檔案系統的inode結構,下一篇介紹。
相關文章
- EXT4檔案系統學習(14)VFS之VFS inode
- EXT4檔案系統學習(9)VFS之磁碟結構inode和direntry
- EXT4檔案系統學習(10)VFS之磁碟結構Group和superblockBloC
- EXT4檔案系統學習(15)VFS之VFS 檔案/目錄物件物件
- EXT4檔案系統學習(13)VFS之VFS超級塊super_blockBloC
- EXT4檔案系統學習(12)VFS之檔案系統物件file_system_type物件
- VFS檔案系統結構分析
- 記憶體檔案系統的再學習記憶體
- Oracle體系結構之-記憶體結構Oracle記憶體
- 理解Linux檔案系統之 inodeLinux
- Oracle體系結構之記憶體結構(SGA、PGA)Oracle記憶體
- JVM學習(一)——記憶體結構JVM記憶體
- Oracle體系結構:記憶體結構和程式結構(轉)Oracle記憶體
- 表`t`的INODE結構檔案分析
- 這一次,終於系統的學習了 JVM 記憶體結構JVM記憶體
- Postgresql資料庫體系結構-程式和記憶體結構SQL資料庫記憶體
- Linux AS 使用記憶體檔案系統Linux記憶體
- 虛擬記憶體檔案系統TMPFS記憶體
- 作業系統-記憶體、檔案管理作業系統記憶體
- Oracle記憶體體系結構Oracle記憶體
- Oracle體系結構學習筆記Oracle筆記
- ASM學習筆記_體系結構ASM筆記
- 記下原來學習Oracle的筆記之——體系結構Oracle筆記
- 作業系統——記憶體管理學習筆記作業系統記憶體筆記
- 【AIX 學習】檔案系統之 PROCAI
- QFS檔案系統-學習記錄
- ARM學習之時鐘體系結構
- DG學習筆記(2)_體系結構筆記
- linux磁碟和檔案系統的學習筆記Linux筆記
- [Virtualization]ESXi體系結構與記憶體管理(一)體系結構記憶體
- 如何系統學習C 語言(中)之 結構體篇結構體
- JVM之記憶體結構詳解JVM記憶體
- Linux系統學習之檔案管理Linux
- Linux檔案系統iNode及stat命令Linux
- GPFS檔案系統inode數是否有限制?
- OS學習筆記六:檔案系統筆記
- DBA_Oracle基本體系記憶體和程式結構(概念)Oracle記憶體
- ext4和xfs檔案系統的擴容和收縮