Flash Filesystem Advanced Filesystem Ext4 / Brtfs POHMELFS AXFS / Logfs / UBIFS
Total Page:16
File Type:pdf, Size:1020Kb
Developing a Simple Filesystem I. File Concepts II. File IO / Standard IO Libarary III. Filesystem-based Concepts IV. Kernel Concepts V. Developing Filesystem VI. Other Filesystem 2008. 10. 31 LG Electronics Software Center 심재훈 1 File concepts File Type Regular / Directories / Symbolic Links / Hard Links / Named Pipes Special Files File Descriptors handle through which the file can be subsequently accessed Basic File Properties file type / access permission link count owner and group of file size / date / name 2 File concepts Basic File Properties(con’t) 3 File concepts # VFS Inode structure 4 File concepts {“.”, 3333} / {“..”, 2} inode {“directory.”, 12668259} KELP_seminar/ 3333 {“file”, 8463735} 3 {“hard_link”, 8463735} {“symbolic_link.”, 8463736} {“symbolic_link2”, 8463740} directory/ file hard_link symbolic_link symbolic_link2 inode inode inode inode 12668259 8463735 8463736 8463740 inode number 2 2 1 1 nlink {“.”, 12668259} regular file “file” “./file” {“..”, 3333} hard_link 5 File IO basic file operations open() / creat() / close() / lseek() / read() / write() . truncate() / unlink() seeking and IO combined . pread() / pwrite() . read-write loop 1 million times. 35sec → 25sec vectored IO . readv() / writev() . use in case single-read data needs to placed in different areas of memory. asynchronous IO . aio_read() / aio_write() / aio_return() / aio_error() / aio_cancel() file and record locking . fcntl() : F_GETLK / F_SETLK / F_SETLKW memory mapped files, direct IO sparse file 6 File IO ## Vectored IO ## user address space readv(fd, &uiop, 3) ## Sparse File ## struct uio uip = { {addr1, 512}, {addr2, 512}, file size addr2 {addr3, 1024} }; not allocated area addr1 file offset ## Memory Mapping IO ## ## Direct IO ## user address space addr = mmap(NULL, MAPSZ, user address space fd = open(filename, O_DIRECT, mode) PROT_READ, MAP_SHARED, fd, 0); write(fd, buf, size); memcpy(addr, buf, size); page cache file offset file offset 7 Filesystem concepts Properties has root directory(/) and lost+found directory(most disk-based FS) each file and directory is identified uniquely by inode . normally root inode (2), lost+found (3) self-contained . no dependencies between filesystems clean / dirty state Disk, Partition, Volumes Creating filesystem : mkfs Repairing filesystem : fsck journaling / log-structured FS User / Group quotas 8 kernel concepts VFS objects on-disk layout file descriptors / file table inode cache / page cache pathname resolution opening file / reading file / closing files 9 Linux Filesystem Structure Overview User application User space system call Virtual FileSystem NFS network Ext3 FAT JFFS Yaffs SMB buffer / page cache Kernel space io interface FTL block device driver Flash driver storage device 10 VFS Object Virtual Filesystem Object[2] superblock object . stores information concerning a mounted filesystem. corresponds to filesystem control block stored on disk. inode object . stores general information about a specific file. corresponds to file control block stored on disk. file object . stores information about the interaction between open file and process dentry object . stores information about the linking of a directory entry with the corresponding file . recently used that are contained in dentry cache 11 VFS objects Interaction between process and VFS objects[2] page disk file cache Superblock Inode object object i_sb d_inode fd Process 1 File object f_dentry Process 2 File object dentry dentry object object Process 3 File object dentry cache 12 VFS Object File object associated with process struct fs_struct count f_flags root f_mode pwd f_dentry dentry object rootmnt f_pos pwdmnt f_count altroot f_op struct task_struct struct file fs files fdt max_fds fdtab fd next_fd open_fds f_flags f_mode fd_array[0] f_dentry dentry object fd_array[1] f_pos fd_array[2] f_count fd_array[3] f_op struct file fd_array[N] struct file 13 VFS Object dentry d_subdirs d_u.d_child d_parent dentry_hashtable[] d_hash 0 1 d_subdirs d_subdirs 2 d_u.d_child d_u.d_child 3 d_parent d_parent d_hash d_hash d_subdirs d_u.d_child d_parent i_dentry d_subdirs d_hash d_u.d_child d_lru d_parent d_alias d_hash d_inode d_lru d_name struct inode d_alias d_op struct dentry d_sb superblock struct super_block 14 VFS Object inode inode_hashtable[] inode_in_use inode_unused 0 i_hash i_hash 1 i_list i_list 2 i_dentry i_dentry 3 i_sb i_sb i_hash i_list i_hash i_hash i_dentry i_list i_list i_sb i_dentry i_dentry i_sb i_sb i_op i_hash i_fop i_list dentry i_mappinig i_dentry i_private i_sb specific inode structure page cache sb->s_dirty struct super_block SB dinode volume 15 VFS Object superblock & file_system_type super_blocks sb_lock s_list s_list s_list s_instance s_instance s_instance s_dirty s_dirty s_dirty s_inodes i_hash i_hash s_inodes s_inodes s_dirt i_list i_list s_dirt s_dirt s_root i_dentry i_dentry s_root s_root s_type i_sb i_sb s_type s_type s_fs_info s_fs_info ext3 jffs proc specific superblock structure name name name fs_supers fs_supers fs_supers get_sb() get_sb() get_sb() SB file_systems kill_sb() kill_sb() kill_sb() next next next volume file_systems_lock fs_flags fs_flags fs_flags struct file_system_type 16 VFS Object Virtual Filesystem Operations struct super_operations . alloc_inode / destroy_inode / read_inode / write_inode / delete_inode . statfs / put_super struct inode_operations . create / link / unlink / lookup / mkdir / rmdir / setattr / truncate struct file_operations . open / lseek / read / write / mmap / ioctl struct address_space_operations . write_page / read_page / direct_IO / release_page struct dentry_operations . d_revalidate / d_hash / d_compare 17 VFS Object Main Structure for file access super_blocks struct task_struct s_list files fd s_op struct files_struct struct super_block alloc_inode /destroy_inode f_flags d_sb f_mode d_inode f_dentry d_op i_sb f_pos struct inode i_op f_reada struct dentry i_mapping f_op struct address_space struct file a_ops create llseek lookup writepage / readpage d_revalidate read /write link / unlink sync_page d_hash aio_read/aio_write symlink writepages / readpages d_delete readdir mkdir / rmdir set_page_dirty d_release poll mknod prepare_write d_compare ioctl rename commit_write mmap readlink bmap open /release follow_link direct_IO flush / fsync / fasync truncate get_xip_page lock / flock truncate_range invalidatepage setattr releasepage 18 Reading file example fd = open(filename, flag) read(fd, buf, 512); kernel 2.6 kernel 2.4 kernel 2.2 buf buf buf user mode kernel mode struct task_struct struct file pathname struct inode struct dentry page cache lookup find block page cache page cache struct address_space buffer cache (0th block) (radix tree) (hashtable) I/O I/O I/O on disk using bio using bh using bh block 0 superblock inode for filename inodes 0th block data 19 Developing filesystem design filesystem module init / exit mount / umount directory lookup / pathname resolution inode manipulation allo c / wr ite / delete file creating / link management create / removing directories filesystem status 20 Filesystem Analysis Basic Categories Filesystem / Filename / Metadata / Contents / Application [10] Journaling Filesystem Application Category Category Layout & Size Quota Data Superblock Information Resource Group File Name Metadata Contents Category Category Category Times & Content Data file1.txt Address #1 Content Data #2 Times & file2.txt Address Content Data Directory structure(inode) File structure(inode) #1 Journal / Journaling Allocation/Deallocation Category Recovery 21 UXFS Layout Super lost+ root Inodes Data Blocks Block found block 0 block 8 ~ 39 40 41 for each inode struct ux_superblock { struct ux_inode { struct ux_dirent { __u32 s_magic; __u32 i_mode; __u32 d_ino; __u32 s_mod; __u32 i_nlink; char d_name[28]; __u32 s_nifree; __u32 i_atime; }; __u32 s_inode[UX_MAXFILES]; __u32 i_mtime; __u32 s_nbfree; __u32 i_ctime; d_ino = 2, d_name = “.” __u32 s_block[UX_MAXBLOCKS]; __u32 i_uid; d_ino = 2, d_name = “..” }; __u32 i_gid; __u32 i_size; d_ino = 3, d_name = “lost+found” __u32 i_blocks; __u32 i_addr[UX_DIRECT_BLOCKS]; d_ino = 4, d_name = “fred” }; d_ino = 0, d_name = “” #define UX_DIRECT_BLOCKS 16 #define UX_MAXFILES 32 #define UX_MAXBLOCKS 470 22 UXFS Layout Design Detail supports only 512-byte block (UX_BSIZE) fixed number of blocks (UX_MAXBLOCKS) . 470 blocks superblock is stored in block 0 there are only 32 inodes (UX_MAXFILES) . 실제로는 28개 사용가능(0, 1, root inode:2, lost+found:3 제외) . has 9 direct pointer. → limits the file size to (9 * 512) first data block is 42th . 40th block store root directory entries . 41th block store lost+found directory entries directory entries are fixed in size (32byte) . max filename size is 28 byte 23 Filesystem Registration register_filesystem() struct file_system_type static struct file_system_type uxfs_fs_type = { name .owner = THIS_MODULE, fs_flags .name = "uxfs", get_sb() kill_sb() .get_sb = ux_get_sb, next .kill_sb = kill_block_super, fs_supers .fs_flags = FS_REQUIRES_DEV, s_lock_key }; s_umount_key ... register_filesystem(&uxfs_fs_type); ext3 jffs proc file_systems name name name file_systems_lock fs_flags fs_flags fs_flags get_sb() get_sb() get_sb() kill_sb() kill_sb() kill_sb() next next next fs_supers fs_supers fs_supers struct file_system_type s_instances s_instances s_instances struct super_block 24 Filesystem mount # mount -t uxfs /dev/sdd1 /mnt/testdir / mnt/ testdir find dentry user using hash mode find directory lookup : ino = ux_find_entry() kernel /mnt/testdir pathname