1 /* Copyright (c) 2010 The Regents of the University of California
2 * Barret Rhoden <brho@cs.berkeley.edu>
3 * See LICENSE for details.
5 * Ext2, VFS required functions, internal functions, life, the universe, and
17 #include <arch/bitmask.h>
19 /* These structs are declared again and initialized farther down */
20 struct page_map_operations ext2_pm_op;
21 struct super_operations ext2_s_op;
22 struct inode_operations ext2_i_op;
23 struct dentry_operations ext2_d_op;
24 struct file_operations ext2_f_op_file;
25 struct file_operations ext2_f_op_dir;
26 struct file_operations ext2_f_op_sym;
28 /* EXT2 Internal Functions */
30 /* Useful helper functions. */
32 /* Returns the block group ID of the BG containing the inode. BGs start with 0,
33 * inodes are indexed starting at 1. */
34 static struct ext2_block_group *ext2_inode2bg(struct inode *inode)
36 struct ext2_sb_info *e2sbi = (struct ext2_sb_info*)inode->i_sb->s_fs_info;
37 unsigned int bg_num = (inode->i_ino - 1) /
38 le32_to_cpu(e2sbi->e2sb->s_inodes_per_group);
39 return &e2sbi->e2bg[bg_num];
42 /* This returns the inode's 0-index within a block group */
43 static unsigned int ext2_inode2bgidx(struct inode *inode)
45 struct ext2_sb_info *e2sbi = (struct ext2_sb_info*)inode->i_sb->s_fs_info;
46 return (inode->i_ino - 1) % le32_to_cpu(e2sbi->e2sb->s_inodes_per_group);
49 /* Returns an uncounted reference to the BG in the BG table, which is pinned,
50 * hanging off the sb. Note, the BGs cover the blocks starting from the first
51 * data block, not from 0. So if the FDB is 1, BG 0 covers 1 through 1024, and
52 * not 0 through 1023. */
53 static struct ext2_block_group *ext2_block2bg(struct super_block *sb,
56 struct ext2_sb_info *e2sbi = (struct ext2_sb_info*)sb->s_fs_info;
58 bg_num = (blk_num - le32_to_cpu(e2sbi->e2sb->s_first_data_block)) /
59 le32_to_cpu(e2sbi->e2sb->s_blocks_per_group);
60 return &e2sbi->e2bg[bg_num];
63 /* This returns the block's 0-index within a block group. Note all blocks are
64 * offset by FDB when dealing with BG membership. */
65 static unsigned int ext2_block2bgidx(struct super_block *sb, uint32_t blk_num)
67 struct ext2_sb_info *e2sbi = (struct ext2_sb_info*)sb->s_fs_info;
68 return (blk_num - le32_to_cpu(e2sbi->e2sb->s_first_data_block)) %
69 le32_to_cpu(e2sbi->e2sb->s_blocks_per_group);
72 /* Returns the FS block for the given BG's idx block */
73 static uint32_t ext2_bgidx2block(struct super_block *sb,
74 struct ext2_block_group *bg,
77 struct ext2_sb_info *e2sbi = (struct ext2_sb_info*)sb->s_fs_info;
78 struct ext2_sb *e2sb = e2sbi->e2sb;
79 struct ext2_block_group *e2bg = e2sbi->e2bg;
80 return (bg - e2bg) * le32_to_cpu(e2sb->s_blocks_per_group) + blk_idx +
81 le32_to_cpu(e2sb->s_first_data_block);
84 /* Slabs for ext2 specific info chunks */
85 struct kmem_cache *ext2_i_kcache;
87 /* One-time init for all ext2 instances */
90 ext2_i_kcache = kmem_cache_create("ext2_i_info", sizeof(struct ext2_i_info),
91 __alignof__(struct ext2_i_info), 0, 0, 0);
94 /* Block management */
96 /* Helper op to read one ext2 block, 0-indexing the block numbers. Kfree your
99 * TODO: consider taking a buffer_head, or having a generic block_dev function
100 * for this. Currently this is just using the BH to talk to breq, need to make
101 * it use the page mapping. */
102 void *__ext2_read_block(struct block_device *bdev, int block_num, int blocksize)
105 void *buffer = kmalloc(blocksize, 0);
106 struct block_request *breq = kmem_cache_alloc(breq_kcache, 0);
107 struct buffer_head *bh = kmem_cache_alloc(bh_kcache, 0);
108 assert(buffer && breq && bh);
110 /* Build the BH describing the mapping we want */
111 bh->bh_buffer = buffer; // TODO: have a real page
112 bh->bh_sector = block_num * (blocksize >> SECTOR_SZ_LOG);
113 bh->bh_nr_sector = blocksize >> SECTOR_SZ_LOG;
114 /* Build and submit the request */
115 breq->flags = BREQ_READ;
116 breq->bhs = breq->local_bhs;
119 retval = make_request(bdev, breq);
121 kmem_cache_free(breq_kcache, breq);
122 kmem_cache_free(bh_kcache, bh); /* TODO: shouldn't disconnect this */
126 /* TODO: pull these metablock functions out of ext2 */
127 /* Makes sure the FS block of metadata is in memory. This returns a pointer to
128 * the beginning of the requested block. Release it with put_metablock().
129 * Internally, the kreffing is done on the page. */
130 void *__ext2_get_metablock(struct block_device *bdev, unsigned long blk_num,
134 struct page_map *pm = &bdev->b_pm;
135 unsigned int blk_per_pg = PGSIZE / blk_sz;
136 unsigned int blk_offset = (blk_num % blk_per_pg) * blk_sz;
138 assert(blk_offset < PGSIZE);
139 error = pm_load_page(pm, blk_num / blk_per_pg, &page);
141 warn("Failed to read metablock! (%d)", error);
144 /* return where we are within the page for the given block */
145 return page2kva(page) + blk_offset;
148 /* Convenience wrapper */
149 void *ext2_get_metablock(struct super_block *sb, unsigned long block_num)
151 return __ext2_get_metablock(sb->s_bdev, block_num, sb->s_blocksize);
154 /* Decrefs the buffer from get_metablock(). Call this when you no longer
155 * reference your metadata block/buffer */
156 void ext2_put_metablock(void *buffer)
158 page_decref(kva2page(buffer));
161 /* Will dirty the block/BH/page for the given metadata block/buffer. Will have
162 * to be careful with the page reclaimer - if someone holds a reference, they
163 * can still dirty it. */
164 void ext2_dirty_metablock(void *buffer)
166 struct page *page = kva2page(buffer);
167 /* TODO: race on flag modification, and consider dirtying the BH. */
168 page->pg_flags |= PG_DIRTY;
171 /* Reads a block of file data. TODO: Function name and guts will change soon */
172 void *ext2_read_fileblock(struct super_block *sb, unsigned int block_num)
174 /* note, we might get rid of this read block, if all files use pages */
175 return __ext2_read_block(sb->s_bdev, block_num, sb->s_blocksize);
178 /* Helper for alloc_block. It will try to alloc a block from the BG, starting
179 * with blk_idx (relative number within the BG). If successful, it will return
180 * the FS block number via *block_num. TODO: concurrency protection */
181 static bool ext2_tryalloc(struct super_block *sb, struct ext2_block_group *bg,
182 unsigned int blk_idx, uint32_t *block_num)
185 struct ext2_sb_info *e2sbi = (struct ext2_sb_info*)sb->s_fs_info;
186 unsigned int blks_per_bg = le32_to_cpu(e2sbi->e2sb->s_blocks_per_group);
189 /* Check to see if there are any free blocks */
190 if (!le32_to_cpu(bg->bg_free_blocks_cnt))
192 /* Check the bitmap for your desired block. We'll loop through the whole
193 * BG, starting with the one we want first. */
194 blk_bitmap = ext2_get_metablock(sb, bg->bg_block_bitmap);
195 for (int i = 0; i < blks_per_bg; i++) {
196 if (!(GET_BITMASK_BIT(blk_bitmap, blk_idx))) {
197 SET_BITMASK_BIT(blk_bitmap, blk_idx);
198 bg->bg_free_blocks_cnt--;
199 ext2_dirty_metablock(blk_bitmap);
203 /* Note: the wrap-around hasn't been tested yet */
204 blk_idx = (blk_idx + 1) % blks_per_bg;
206 ext2_put_metablock(blk_bitmap);
208 *block_num = ext2_bgidx2block(sb, bg, blk_idx);
212 /* This allocates a fresh block for the inode, preferably 'fetish' (name
213 * courtesy of L.F.), returning the FS block number that's been allocated.
214 * Note, Linux does some block preallocation here. Consider doing the same (off
215 * the in-memory inode). Note the lack of concurrency protections here. */
216 uint32_t ext2_alloc_block(struct inode *inode, uint32_t fetish)
218 struct ext2_sb_info *e2sbi = (struct ext2_sb_info*)inode->i_sb->s_fs_info;
219 struct ext2_block_group *fetish_bg, *bg_i = e2sbi->e2bg;
220 unsigned int blk_idx;
225 /* Get our ideal starting point */
226 fetish_bg = ext2_block2bg(inode->i_sb, fetish);
227 blk_idx = ext2_block2bgidx(inode->i_sb, fetish);
228 /* Try to find a free block in the BG of the one we desire */
229 found = ext2_tryalloc(inode->i_sb, fetish_bg, blk_idx, &retval);
233 warn("This part hasn't been tested yet.");
234 /* Find a block anywhere else (perhaps using the log trick, but for now just
235 * linearly scanning). */
236 for (int i = 0; i < e2sbi->nr_bgs; i++, bg_i++) {
237 if (bg_i == fetish_bg)
239 found = ext2_tryalloc(inode->i_sb, bg_i, 0, &retval);
244 panic("Ran out of blocks! (probably a bug)");
248 /* Inode Table Management */
250 /* Helper for ino table management. blkid is the inode table block we are
251 * looking in, rel_blkid is the block we want, relative to the current
252 * threshhold for a level of indirection, and reach is how many items a given
253 * slot indexes. Returns a pointer to the slot for the given block. */
254 static uint32_t *ext2_find_inotable_slot(struct inode *inode, uint32_t blkid,
258 uint32_t *blk_buf = ext2_get_metablock(inode->i_sb, blkid);
260 return &blk_buf[rel_blkid / reach];
263 /* If blk_slot is empty (no block mapped there) it will alloc and link a new
264 * block. This is only used for allocating a block to be an indirect table
265 * (it's grabbing a metablock, we have no hint, and it handles the buffer
266 * differently than for a file page/buffer). */
267 static void ext2_fill_inotable_slot(struct inode *inode, uint32_t *blk_slot)
269 uint32_t new_blkid, hint_blk;
272 if (le32_to_cpu(*blk_slot))
274 /* Use any block in our inode's BG as a hint for the indirect block */
275 hint_blk = ext2_bgidx2block(inode->i_sb, ext2_inode2bg(inode), 0);
276 new_blkid = ext2_alloc_block(inode, hint_blk);
277 /* Actually read in the block we alloc'd */
278 new_blk = ext2_get_metablock(inode->i_sb, new_blkid);
279 memset(new_blk, 0, inode->i_sb->s_blocksize);
280 ext2_dirty_metablock(new_blk);
281 /* We put it, despite it getting relooked up in the next walk */
282 ext2_put_metablock(new_blk);
283 /* Now write the new block into its slot */
284 *blk_slot = cpu_to_le32(new_blkid);
285 ext2_dirty_metablock(blk_slot);
288 /* This walks a table stored at block 'blkid', returning which block you should
289 * walk next in 'blkid'. rel_inoblk is where you are given the current level of
290 * indirection tables, and returns where you should be for the next one. Reach
291 * is how many items the current table's *items* can index (so if we're on a
292 * 3x indir block, reach should be for the doubly-indirect entries, and
293 * rel_inoblk will tell you where within that double block you want).
295 * This will also alloc intermediate tables if there isn't one already (TODO:
296 * concurrency protection on modifying the table). */
297 static void ext2_walk_inotable(struct inode *inode, uint32_t *blkid,
298 uint32_t *rel_inoblk, unsigned int reach)
301 blk_slot = ext2_find_inotable_slot(inode, *blkid, *rel_inoblk, reach);
302 /* We could only do this based on a bool, but if we're trying to walk it,
303 * we ought to want to alloc if there is no block. */
304 ext2_fill_inotable_slot(inode, blk_slot);
305 *blkid = le32_to_cpu(*blk_slot);
306 *rel_inoblk = *rel_inoblk % reach;
307 ext2_put_metablock(blk_slot); /* the ref for the block we looked in */
310 /* Finds the slot of the FS block corresponding to a specific block number of an
311 * inode. It does this by walking the inode's tables. The general idea is that
312 * if the ino_block num is above a threshold, we'll need to go into indirect
313 * tables (1x, 2x, or 3x (triply indirect) tables). Block numbers start at 0.
315 * This returns a pointer within a metablock, which needs to be decref'd (and
316 * possibly dirtied) when you are done.
318 * Horrendously untested, btw. */
319 uint32_t *ext2_lookup_inotable_slot(struct inode *inode, uint32_t ino_block)
321 struct ext2_i_info *e2ii = (struct ext2_i_info*)inode->i_fs_info;
323 uint32_t blkid, *blk_slot;
324 /* The 'reach' is how many blocks a given table can 'address' */
325 int ptrs_per_blk = inode->i_sb->s_blocksize / sizeof(uint32_t);
326 int reach_1xblk = ptrs_per_blk;
327 int reach_2xblk = ptrs_per_blk * ptrs_per_blk;
328 /* thresholds are the first blocks that require a level of indirection */
329 int single_threshold = 12;
330 int double_threshold = single_threshold + reach_1xblk;
331 int triple_threshold = double_threshold + reach_2xblk;
332 /* this is the desired block num lookup within a level of indirection. It
333 * will need to be offset based on what level of lookups we want (try it in
334 * your head with 12 first). */
337 if (ino_block >= triple_threshold) {
338 /* ino_block requires a triply-indirect lookup */
339 rel_inoblk = ino_block - triple_threshold;
340 /* Make sure a 14 block (3x indirect) is there */
341 ext2_fill_inotable_slot(inode, &e2ii->i_block[14]);
342 blkid = e2ii->i_block[14];
343 ext2_walk_inotable(inode, &blkid, &rel_inoblk, reach_2xblk);
344 ext2_walk_inotable(inode, &blkid, &rel_inoblk, reach_1xblk);
345 blk_slot = ext2_find_inotable_slot(inode, blkid, rel_inoblk, 1);
346 } else if (ino_block >= double_threshold) {
347 /* ino_block requires a doubly-indirect lookup */
348 rel_inoblk = ino_block - double_threshold;
349 ext2_fill_inotable_slot(inode, &e2ii->i_block[13]);
350 blkid = e2ii->i_block[13];
351 ext2_walk_inotable(inode, &blkid, &rel_inoblk, reach_1xblk);
352 blk_slot = ext2_find_inotable_slot(inode, blkid, rel_inoblk, 1);
353 } else if (ino_block >= single_threshold) {
354 /* ino_block requires a singly-indirect lookup */
355 rel_inoblk = ino_block - single_threshold;
356 ext2_fill_inotable_slot(inode, &e2ii->i_block[12]);
357 blkid = e2ii->i_block[12];
358 blk_slot = ext2_find_inotable_slot(inode, blkid, rel_inoblk, 1);
360 /* Direct block, straight out of the inode */
361 blk_slot = &e2ii->i_block[ino_block];
362 /* need to incref, since the i_block isn't a real metablock (it's just a
363 * random page!), and the caller is going to end up decreffing it */
364 page_incref(kva2page(blk_slot));
369 /* Determines the FS block id for a given inode block id. Convenience wrapper
370 * that may go away soon. */
371 uint32_t ext2_find_inoblock(struct inode *inode, unsigned int ino_block)
373 uint32_t retval, *buf = ext2_lookup_inotable_slot(inode, ino_block);
375 ext2_put_metablock(buf);
379 /* Returns a kmalloc'd block for the contents of the ino block. Kept around for
380 * a couple commits, will prob go away soon */
381 void *ext2_read_ino_block(struct inode *inode, unsigned int ino_block)
383 unsigned long blkid = ext2_find_inoblock(inode, ino_block);
384 return ext2_read_fileblock(inode->i_sb, blkid);
387 /* This should help with degubbing. In read_inode(), print out the i_block, and
388 * consider manually (via memory inspection) examining those blocks. Odds are,
389 * the 2x and 3x walks are jacked up. */
390 void ext2_print_ino_blocks(struct inode *inode)
392 printk("Inode %08p, Size: %d, 512B 'blocks;: %d\n-------------\n", inode,
393 inode->i_size, inode->i_blocks);
394 for (int i = 0; i < inode->i_blocks; i++)
395 printk("# %03d, Block %03d\n", i, ext2_find_inoblock(inode, i));
400 /* This checks an ext2 disc SB for consistency, optionally printing out its
401 * stats. It also will also read in a copy of the block group descriptor table
402 * from its first location (right after the primary SB copy) */
403 void ext2_check_sb(struct ext2_sb *e2sb, struct ext2_block_group *bg,
407 unsigned int blksize, blks_per_group, num_blk_group, num_blks;
408 unsigned int inodes_per_grp, inode_size;
409 unsigned int sum_blks = 0, sum_inodes = 0;
411 assert(le16_to_cpu(e2sb->s_magic) == EXT2_SUPER_MAGIC);
412 num_blks = le32_to_cpu(e2sb->s_blocks_cnt);
413 blksize = 1024 << le32_to_cpu(e2sb->s_log_block_size);
414 blks_per_group = le32_to_cpu(e2sb->s_blocks_per_group);
415 num_blk_group = num_blks / blks_per_group + (num_blks % blks_per_group ? 1 : 0);
418 printk("EXT2 info:\n-------------------------\n");
419 printk("Total Inodes: %8d\n", le32_to_cpu(e2sb->s_inodes_cnt));
420 printk("Total Blocks: %8d\n", le32_to_cpu(e2sb->s_blocks_cnt));
421 printk("Num R-Blocks: %8d\n", le32_to_cpu(e2sb->s_rblocks_cnt));
422 printk("Num Free Blocks: %8d\n", le32_to_cpu(e2sb->s_free_blocks_cnt));
423 printk("Num Free Inodes: %8d\n", le32_to_cpu(e2sb->s_free_inodes_cnt));
424 printk("First Data Block: %8d\n",
425 le32_to_cpu(e2sb->s_first_data_block));
426 printk("Block Size: %8d\n",
427 1024 << le32_to_cpu(e2sb->s_log_block_size));
428 printk("Fragment Size: %8d\n",
429 1024 << le32_to_cpu(e2sb->s_log_frag_size));
430 printk("Blocks per group: %8d\n",
431 le32_to_cpu(e2sb->s_blocks_per_group));
432 printk("Inodes per group: %8d\n",
433 le32_to_cpu(e2sb->s_inodes_per_group));
434 printk("Block groups: %8d\n", num_blk_group);
435 printk("Mount state: %8d\n", le16_to_cpu(e2sb->s_state));
436 printk("Rev Level: %8d\n", le32_to_cpu(e2sb->s_minor_rev_level));
437 printk("Minor Rev Level: %8d\n", le16_to_cpu(e2sb->s_minor_rev_level));
438 printk("Creator OS: %8d\n", le32_to_cpu(e2sb->s_creator_os));
439 printk("First Inode: %8d\n", le32_to_cpu(e2sb->s_first_ino));
440 printk("Inode size: %8d\n", le16_to_cpu(e2sb->s_inode_size));
441 printk("This block group: %8d\n", le16_to_cpu(e2sb->s_block_group_nr));
442 printk("BG ID of 1st meta:%8d\n", le16_to_cpu(e2sb->s_first_meta_bg));
443 printk("Volume name: %s\n", e2sb->s_volume_name);
444 printk("\nBlock Group Info:\n----------------------\n");
447 for (int i = 0; i < num_blk_group; i++) {
448 sum_blks += le16_to_cpu(bg[i].bg_free_blocks_cnt);
449 sum_inodes += le16_to_cpu(bg[i].bg_free_inodes_cnt);
451 printk("*** BG %d at %08p\n", i, &bg[i]);
452 printk("Block bitmap:%8d\n", le32_to_cpu(bg[i].bg_block_bitmap));
453 printk("Inode bitmap:%8d\n", le32_to_cpu(bg[i].bg_inode_bitmap));
454 printk("Inode table: %8d\n", le32_to_cpu(bg[i].bg_inode_table));
455 printk("Free blocks: %8d\n", le16_to_cpu(bg[i].bg_free_blocks_cnt));
456 printk("Free inodes: %8d\n", le16_to_cpu(bg[i].bg_free_inodes_cnt));
457 printk("Used Dirs: %8d\n", le16_to_cpu(bg[i].bg_used_dirs_cnt));
461 /* Sanity Assertions. A good ext2 will always pass these. */
462 inodes_per_grp = le32_to_cpu(e2sb->s_inodes_per_group);
463 blks_per_group = le32_to_cpu(e2sb->s_blocks_per_group);
464 inode_size = le32_to_cpu(e2sb->s_inode_size);
465 assert(le32_to_cpu(e2sb->s_inodes_cnt) <= inodes_per_grp * num_blk_group);
466 assert(le32_to_cpu(e2sb->s_free_inodes_cnt) == sum_inodes);
467 assert(le32_to_cpu(e2sb->s_blocks_cnt) <= blks_per_group * num_blk_group);
468 assert(le32_to_cpu(e2sb->s_free_blocks_cnt) == sum_blks);
470 assert(le32_to_cpu(e2sb->s_first_data_block) == 1);
472 assert(le32_to_cpu(e2sb->s_first_data_block) == 0);
473 assert(inode_size <= blksize);
474 assert(inode_size == 1 << LOG2_UP(inode_size));
475 assert(blksize * 8 >= inodes_per_grp);
476 assert(inodes_per_grp % (blksize / inode_size) == 0);
478 printk("Passed EXT2 Checks\n");
481 /* VFS required Misc Functions */
483 /* Creates the SB. Like with Ext2's, we should consider pulling out the
484 * FS-independent stuff, if possible. */
485 struct super_block *ext2_get_sb(struct fs_type *fs, int flags,
486 char *dev_name, struct vfsmount *vmnt)
488 struct block_device *bdev;
489 struct ext2_sb *e2sb;
490 struct ext2_block_group *e2bg;
491 unsigned int blks_per_group, num_blk_group, num_blks;
493 static bool ran_once = FALSE;
498 bdev = get_bdev(dev_name);
500 /* Read the SB. It's always at byte 1024 and 1024 bytes long. Note we do
501 * not put the metablock (we pin it off the sb later). Same with e2bg. */
502 e2sb = (struct ext2_sb*)__ext2_get_metablock(bdev, 1, 1024);
503 if (!(le16_to_cpu(e2sb->s_magic) == EXT2_SUPER_MAGIC)) {
504 warn("EXT2 Not detected when it was expected!");
507 /* Read in the block group descriptor table. Which block the BG table is on
508 * depends on the blocksize */
509 unsigned int blksize = 1024 << le32_to_cpu(e2sb->s_log_block_size);
510 e2bg = __ext2_get_metablock(bdev, blksize == 1024 ? 2 : 1, blksize);
512 ext2_check_sb(e2sb, e2bg, FALSE);
514 /* Now we build and init the VFS SB */
515 struct super_block *sb = get_sb();
516 sb->s_dev = 0; /* what do we really want here? */
517 sb->s_blocksize = blksize;
518 /* max file size for a 1024 blocksize FS. good enough for now (TODO) */
519 sb->s_maxbytes = 17247252480;
520 sb->s_type = &ext2_fs_type;
521 sb->s_op = &ext2_s_op;
522 sb->s_flags = flags; /* from the disc too? which flags are these? */
523 sb->s_magic = EXT2_SUPER_MAGIC;
524 sb->s_mount = vmnt; /* Kref? also in KFS */
525 sb->s_syncing = FALSE;
526 kref_get(&bdev->b_kref, 1);
528 strlcpy(sb->s_name, "EXT2", 32);
529 sb->s_fs_info = kmalloc(sizeof(struct ext2_sb_info), 0);
530 assert(sb->s_fs_info);
531 /* store the in-memory copy of the disk SB and bg desc table */
532 ((struct ext2_sb_info*)sb->s_fs_info)->e2sb = e2sb;
533 ((struct ext2_sb_info*)sb->s_fs_info)->e2bg = e2bg;
534 /* Precompute the number of BGs */
535 num_blks = le32_to_cpu(e2sb->s_blocks_cnt);
536 blks_per_group = le32_to_cpu(e2sb->s_blocks_per_group);
537 ((struct ext2_sb_info*)sb->s_fs_info)->nr_bgs = num_blks / blks_per_group +
538 (num_blks % blks_per_group ? 1 : 0);
540 /* Final stages of initializing the sb, mostly FS-independent */
541 init_sb(sb, vmnt, &ext2_d_op, EXT2_ROOT_INO, 0);
543 printk("EXT2 superblock loaded\n");
544 kref_put(&bdev->b_kref);
548 void ext2_kill_sb(struct super_block *sb)
550 /* don't forget to kfree the s_fs_info and its two members */
551 panic("Killing an EXT2 SB is not supported!");
554 /* Every FS must have a static FS Type, with which the VFS code can bootstrap */
555 struct fs_type ext2_fs_type = {"EXT2", 0, ext2_get_sb, ext2_kill_sb, {0, 0},
556 TAILQ_HEAD_INITIALIZER(ext2_fs_type.fs_supers)};
558 /* Page Map Operations */
560 /* Sets up the bidirectional mapping between the page and its buffer heads. As
561 * a future optimization, we could try and detect if all of the blocks are
562 * contiguous (either before or after making them) and compact them to one BH.
563 * Note there is an assumption that the file has at least one block in it. */
564 int ext2_mappage(struct page_map *pm, struct page *page)
566 struct buffer_head *bh;
567 struct inode *inode = (struct inode*)pm->pm_host;
568 assert(!page->pg_private); /* double check that we aren't bh-mapped */
569 assert(inode->i_mapping == pm); /* double check we are the inode for pm */
570 struct block_device *bdev = inode->i_sb->s_bdev;
571 unsigned int blk_per_pg = PGSIZE / inode->i_sb->s_blocksize;
572 unsigned int sct_per_blk = inode->i_sb->s_blocksize / bdev->b_sector_sz;
573 uint32_t ino_blk_num, fs_blk_num = 0, *fs_blk_slot;
575 bh = kmem_cache_alloc(bh_kcache, 0);
576 page->pg_private = bh;
577 for (int i = 0; i < blk_per_pg; i++) {
578 /* free_bh() can handle having a halfway aborted mappage() */
581 bh->bh_page = page; /* weak ref */
582 bh->bh_buffer = page2kva(page) + i * inode->i_sb->s_blocksize;
583 bh->bh_flags = 0; /* whatever... */
584 bh->bh_bdev = bdev; /* uncounted ref */
585 /* compute the first sector of the FS block for the ith buf in the pg */
586 ino_blk_num = page->pg_index * blk_per_pg + i;
587 fs_blk_slot = ext2_lookup_inotable_slot(inode, ino_blk_num);
588 /* If there isn't a block there, lets get one. The previous fs_blk_num
589 * is our hint (or we have to compute one). */
592 fs_blk_num = ext2_bgidx2block(inode->i_sb,
593 ext2_inode2bg(inode), 0);
595 fs_blk_num = ext2_alloc_block(inode, fs_blk_num + 1);
596 /* Link it, and dirty the inode indirect block */
597 *fs_blk_slot = cpu_to_le32(fs_blk_num);
598 ext2_dirty_metablock(fs_blk_slot);
599 /* the block is still on disk, and we don't want its contents */
600 bh->bh_flags = BH_NEEDS_ZEROED; /* talking to readpage */
601 /* update our num blocks, with 512B each "block" (ext2-style) */
602 inode->i_blocks += inode->i_sb->s_blocksize >> 9;
603 } else { /* there is a block there already */
604 fs_blk_num = *fs_blk_slot;
606 ext2_put_metablock(fs_blk_slot);
607 bh->bh_sector = fs_blk_num * sct_per_blk;
608 bh->bh_nr_sector = sct_per_blk;
609 /* Stop if we're the last block in the page. We could be going beyond
610 * the end of the file, in which case the next BHs will be zeroed. */
611 if (i == blk_per_pg - 1) {
615 /* get and link to the next BH. */
616 bh->bh_next = kmem_cache_alloc(bh_kcache, 0);
623 /* Fills page with its contents from its backing store file. Note that we do
624 * the zero padding here, instead of higher in the VFS. Might change in the
625 * future. TODO: make this a block FS generic call. */
626 int ext2_readpage(struct page_map *pm, struct page *page)
629 struct block_device *bdev = pm->pm_host->i_sb->s_bdev;
630 struct buffer_head *bh;
631 struct block_request *breq;
634 assert(page->pg_flags & PG_BUFFER);
635 retval = ext2_mappage(pm, page);
640 /* Build and submit the request */
641 breq = kmem_cache_alloc(breq_kcache, 0);
646 breq->flags = BREQ_READ;
647 breq->bhs = breq->local_bhs;
649 /* Pack the BH pointers in the block request */
650 bh = (struct buffer_head*)page->pg_private;
652 /* Either read the block in, or zero the buffer. If we wanted to ensure no
653 * data is leaked after a crash, we'd write a 0 block too. */
654 for (int i = 0; bh; bh = bh->bh_next) {
655 if (!(bh->bh_flags & BH_NEEDS_ZEROED)) {
660 memset(bh->bh_buffer, 0, pm->pm_host->i_sb->s_blocksize);
661 bh->bh_flags |= BH_DIRTY;
662 bh->bh_page->pg_flags |= PG_DIRTY;
665 /* TODO: (BLK) this assumes we slept til the request was done */
666 retval = make_request(bdev, breq);
668 /* zero out whatever is beyond the EOF. we could do this by figuring out
669 * where the BHs end and zeroing from there, but I'd rather zero from where
670 * the file ends (which could be in the middle of an FS block */
672 eof_off = (pm->pm_host->i_size - page->pg_index * PGSIZE);
673 eof_off = MIN(eof_off, PGSIZE) % PGSIZE;
674 /* at this point, eof_off is the offset into the page of the EOF, or 0 */
676 memset(eof_off + page2kva(page), 0, PGSIZE - eof_off);
677 /* after the data is read, we mark it up to date and unlock the page. */
678 page->pg_flags |= PG_UPTODATE;
680 kmem_cache_free(breq_kcache, breq);
681 /* Useful debugging. Put one higher up if the page is not getting mapped */
682 //print_pageinfo(page);
686 /* Super Operations */
688 /* Creates and initializes a new inode. FS specific, yet inode-generic fields
689 * are filled in. inode-specific fields are filled in in read_inode() based on
690 * what's on the disk for a given i_no. i_no and i_fop are set by the caller.
692 * Note that this means this inode can be for an inode that is already on disk,
693 * or it can be used when creating. The i_fop depends on the type of file
694 * (file, directory, symlink, etc). */
695 struct inode *ext2_alloc_inode(struct super_block *sb)
697 struct inode *inode = kmem_cache_alloc(inode_kcache, 0);
698 memset(inode, 0, sizeof(struct inode));
699 inode->i_op = &ext2_i_op;
700 inode->i_pm.pm_op = &ext2_pm_op;
704 /* FS-specific clean up when an inode is dealloced. this is just cleaning up
705 * the in-memory version, and only the FS-specific parts. whether or not the
706 * inode is still on disc is irrelevant. */
707 void ext2_dealloc_inode(struct inode *inode)
709 kmem_cache_free(ext2_i_kcache, inode->i_fs_info);
712 /* reads the inode data on disk specified by inode->i_ino into the inode.
713 * basically, it's a "make this inode the one for i_ino (i number)" */
714 void ext2_read_inode(struct inode *inode)
716 unsigned int bg_idx, ino_per_blk, my_ino_blk;
717 struct ext2_sb_info *e2sbi = (struct ext2_sb_info*)inode->i_sb->s_fs_info;
718 struct ext2_block_group *my_bg;
719 struct ext2_inode *ino_tbl_chunk, *my_ino;
721 /* Need to compute the blockgroup and index of the requested inode */
722 ino_per_blk = inode->i_sb->s_blocksize /
723 le16_to_cpu(e2sbi->e2sb->s_inode_size);
724 bg_idx = ext2_inode2bgidx(inode);
725 my_bg = ext2_inode2bg(inode);
726 /* Figure out which FS block of the inode table we want and read in that
728 my_ino_blk = le32_to_cpu(my_bg->bg_inode_table) + bg_idx / ino_per_blk;
729 ino_tbl_chunk = ext2_get_metablock(inode->i_sb, my_ino_blk);
730 my_ino = &ino_tbl_chunk[bg_idx % ino_per_blk];
732 /* Have the disk inode now, let's put its info into the VFS inode: */
733 inode->i_mode = le16_to_cpu(my_ino->i_mode);
734 switch (inode->i_mode & __S_IFMT) {
736 inode->i_fop = &ext2_f_op_dir;
739 inode->i_fop = &ext2_f_op_file;
742 inode->i_fop = &ext2_f_op_sym;
747 inode->i_fop = &ext2_f_op_file;
748 warn("[Calm British Accent] Look around you. Unhandled filetype.");
750 inode->i_nlink = le16_to_cpu(my_ino->i_links_cnt);
751 inode->i_uid = le16_to_cpu(my_ino->i_uid);
752 inode->i_gid = le16_to_cpu(my_ino->i_gid);
753 /* technically, for large F_REG, we should | with i_dir_acl */
754 inode->i_size = le32_to_cpu(my_ino->i_size);
755 inode->i_atime.tv_sec = le32_to_cpu(my_ino->i_atime);
756 inode->i_atime.tv_nsec = 0;
757 inode->i_mtime.tv_sec = le32_to_cpu(my_ino->i_mtime);
758 inode->i_mtime.tv_nsec = 0;
759 inode->i_ctime.tv_sec = le32_to_cpu(my_ino->i_ctime);
760 inode->i_ctime.tv_nsec = 0;
761 inode->i_blocks = le32_to_cpu(my_ino->i_blocks);
762 inode->i_flags = le32_to_cpu(my_ino->i_flags);
763 inode->i_socket = FALSE; /* for now */
764 /* Copy over the other inode stuff that isn't in the VFS inode. For now,
765 * it's just the block pointers */
766 inode->i_fs_info = kmem_cache_alloc(ext2_i_kcache, 0);
767 struct ext2_i_info *e2ii = (struct ext2_i_info*)inode->i_fs_info;
768 for (int i = 0; i < 15; i++)
769 e2ii->i_block[i] = le32_to_cpu(my_ino->i_block[i]);
770 /* TODO: (HASH) unused: inode->i_hash add to hash (saves on disc reading) */
771 /* TODO: we could consider saving a pointer to the disk inode and pinning
772 * its buffer in memory, but for now we'll just free it. */
773 ext2_put_metablock(ino_tbl_chunk);
776 /* called when an inode in memory is modified (journalling FS's care) */
777 void ext2_dirty_inode(struct inode *inode)
779 // presumably we'll ext2_dirty_metablock(void *buffer) here
782 /* write the inode to disk (specifically, to inode inode->i_ino), synchronously
783 * if we're asked to wait */
784 void ext2_write_inode(struct inode *inode, bool wait)
789 /* called when an inode is decref'd, to do any FS specific work */
790 void ext2_put_inode(struct inode *inode)
795 /* Unused for now, will get rid of this if inode_release is sufficient */
796 void ext2_drop_inode(struct inode *inode)
801 /* delete the inode from disk (all data) */
802 void ext2_delete_inode(struct inode *inode)
805 // would remove from "disk" here
806 /* TODO: give up our i_ino */
809 /* unmount and release the super block */
810 void ext2_put_super(struct super_block *sb)
812 panic("Shazbot! Ext2 can't be unmounted yet!");
815 /* updates the on-disk SB with the in-memory SB */
816 void ext2_write_super(struct super_block *sb)
821 /* syncs FS metadata with the disc, synchronously if we're waiting. this info
822 * also includes anything pointed to by s_fs_info. */
823 int ext2_sync_fs(struct super_block *sb, bool wait)
829 /* remount the FS with the new flags */
830 int ext2_remount_fs(struct super_block *sb, int flags, char *data)
832 warn("Ext2 will not remount.");
833 return -1; // can't remount
836 /* interrupts a mount operation - used by NFS and friends */
837 void ext2_umount_begin(struct super_block *sb)
839 panic("Cannot abort a Ext2 mount, and why would you?");
842 /* inode_operations */
844 /* Little helper, used for initializing new inodes for file-like objects (files,
845 * symlinks, etc). We pass the dentry, since we need to up it. */
846 static void ext2_init_inode(struct inode *dir, struct dentry *dentry)
849 struct inode *inode = dentry->d_inode;
850 inode->i_ino = ext2_get_free_ino();
854 /* Called when creating a new disk inode in dir associated with dentry. We need
855 * to fill out the i_ino, set the type, and do whatever else we need */
856 int ext2_create(struct inode *dir, struct dentry *dentry, int mode,
857 struct nameidata *nd)
861 struct inode *inode = dentry->d_inode;
862 ext2_init_inode(dir, dentry);
863 SET_FTYPE(inode->i_mode, __S_IFREG);
864 inode->i_fop = &ext2_f_op_file;
865 /* fs_info->filestart is set by the caller, or else when first written (for
866 * new files. it was set to 0 in alloc_inode(). */
871 /* Searches the directory for the filename in the dentry, filling in the dentry
872 * with the FS specific info of this file. If it succeeds, it will pass back
873 * the *dentry you should use (which might be the same as the one you passed in).
874 * If this fails, it will return 0, but not free the memory of "dentry."
876 * Callers, make sure you alloc and fill out the name parts of the dentry. We
877 * don't currently use the ND. Might remove it in the future. */
878 struct dentry *ext2_lookup(struct inode *dir, struct dentry *dentry,
879 struct nameidata *nd)
881 assert(S_ISDIR(dir->i_mode));
882 struct ext2_dirent *dir_buf, *dir_i;
883 unsigned int dir_block = 0;
885 dir_buf = ext2_read_ino_block(dir, dir_block++);
887 /* now we have the first block worth of dirents. We'll get another block if
888 * dir_i hits a block boundary */
889 for (unsigned int bytes = 0; bytes < dir->i_size; ) {
890 /* On subsequent loops, we might need to advance to the next block */
891 if ((void*)dir_i >= (void*)dir_buf + dir->i_sb->s_blocksize) {
893 dir_buf = ext2_read_ino_block(dir, dir_block++);
897 /* Test if we're the one (TODO: use d_compare) */
898 if (!strncmp((char*)dir_i->dir_name, dentry->d_name.name,
899 dir_i->dir_namelen)){
900 load_inode(dentry, le32_to_cpu(dir_i->dir_inode));
901 /* TODO: (HASH) add dentry to dcache (maybe the caller should) */
905 /* Get ready for the next loop */
906 bytes += dir_i->dir_reclen;
907 dir_i = (void*)dir_i + dir_i->dir_reclen;
909 printd("EXT2: Not Found, %s\n", dentry->d_name.name);
914 /* Hard link to old_dentry in directory dir with a name specified by new_dentry.
915 * At the very least, set the new_dentry's FS-specific fields. */
916 int ext2_link(struct dentry *old_dentry, struct inode *dir,
917 struct dentry *new_dentry)
920 assert(new_dentry->d_op = &ext2_d_op);
924 /* Removes the link from the dentry in the directory */
925 int ext2_unlink(struct inode *dir, struct dentry *dentry)
931 /* Creates a new inode for a symlink dir, linking to / containing the name
932 * symname. dentry is the controlling dentry of the inode. */
933 int ext2_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
937 struct inode *inode = dentry->d_inode;
938 SET_FTYPE(inode->i_mode, __S_IFLNK);
939 inode->i_fop = &ext2_f_op_sym;
940 strncpy(string, symname, len);
941 string[len] = '\0'; /* symname should be \0d anyway, but just in case */
946 /* Called when creating a new inode for a directory associated with dentry in
947 * dir with the given mode. Note, we might (later) need to track subdirs within
948 * the parent inode, like we do with regular files. I'd rather not, so we'll
949 * see if we need it. */
950 int ext2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
954 struct inode *inode = dentry->d_inode;
955 inode->i_ino = ext2_get_free_ino();
956 SET_FTYPE(inode->i_mode, __S_IFDIR);
957 inode->i_fop = &ext2_f_op_dir;
962 /* Removes from dir the directory 'dentry.' Ext2 doesn't store anything in the
963 * inode for which children it has. It probably should, but since everything is
964 * pinned, it just relies on the dentry connections. */
965 int ext2_rmdir(struct inode *dir, struct dentry *dentry)
971 /* Used to make a generic file, based on the type and the major/minor numbers
972 * (in rdev), with the given mode. As with others, this creates a new disk
973 * inode for the file */
974 int ext2_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
980 /* Moves old_dentry from old_dir to new_dentry in new_dir */
981 int ext2_rename(struct inode *old_dir, struct dentry *old_dentry,
982 struct inode *new_dir, struct dentry *new_dentry)
988 /* Returns the char* for the symname for the given dentry. The VFS code that
989 * calls this for real FS's might assume it's already read in, so if the char *
990 * isn't already in memory, we'd need to read it in here. Regarding the char*
991 * storage, the char* only will last as long as the dentry and inode are in
993 char *ext2_readlink(struct dentry *dentry)
996 struct inode *inode = dentry->d_inode;
997 if (!S_ISLNK(inode->i_mode))
1002 /* Modifies the size of the file of inode to whatever its i_size is set to */
1003 void ext2_truncate(struct inode *inode)
1007 /* Checks whether the the access mode is allowed for the file belonging to the
1008 * inode. Implies that the permissions are on the file, and not the hardlink */
1009 int ext2_permission(struct inode *inode, int mode, struct nameidata *nd)
1015 /* dentry_operations */
1016 /* Determines if the dentry is still valid before using it to translate a path.
1017 * Network FS's need to deal with this. */
1018 int ext2_d_revalidate(struct dentry *dir, struct nameidata *nd)
1019 { // default, nothing
1023 /* Produces the hash to lookup this dentry from the dcache */
1024 int ext2_d_hash(struct dentry *dentry, struct qstr *name)
1029 /* Compares name1 and name2. name1 should be a member of dir. */
1030 int ext2_d_compare(struct dentry *dir, struct qstr *name1, struct qstr *name2)
1031 { // default, string comp (case sensitive)
1035 /* Called when the last ref is deleted (refcnt == 0) */
1036 int ext2_d_delete(struct dentry *dentry)
1037 { // default, nothin
1041 /* Called when it's about to be slab-freed */
1042 int ext2_d_release(struct dentry *dentry)
1047 /* Called when the dentry loses it's inode (becomes "negative") */
1048 void ext2_d_iput(struct dentry *dentry, struct inode *inode)
1049 { // default, call i_put to release the inode object
1053 /* file_operations */
1055 /* Updates the file pointer. TODO: think about locking, and putting this in the
1057 #include <syscall.h> /* just for set_errno, may go away later */
1058 off_t ext2_llseek(struct file *file, off_t offset, int whence)
1066 temp_off = file->f_pos + offset;
1069 temp_off = file->f_dentry->d_inode->i_size + offset;
1073 warn("Unknown 'whence' in llseek()!\n");
1076 file->f_pos = temp_off;
1080 /* Fills in the next directory entry (dirent), starting with d_off. Like with
1081 * read and write, there will be issues with userspace and the *dirent buf.
1083 int ext2_readdir(struct file *dir, struct dirent *dirent)
1086 /* Not enough data at the end of the directory */
1087 if (dir->f_dentry->d_inode->i_size <
1088 dirent->d_off + sizeof(struct ext2_dirent))
1091 /* Figure out which block we need to read in for dirent->d_off */
1092 int block = dirent->d_off / dir->f_dentry->d_sb->s_blocksize;
1093 buffer = ext2_read_ino_block(dir->f_dentry->d_inode, block);
1095 off_t f_off = dirent->d_off % dir->f_dentry->d_sb->s_blocksize;
1096 /* Copy out the dirent info */
1097 struct ext2_dirent *e2dir = (struct ext2_dirent*)(buffer + f_off);
1098 dirent->d_ino = le32_to_cpu(e2dir->dir_inode);
1099 dirent->d_off += le16_to_cpu(e2dir->dir_reclen);
1100 /* note, dir_namelen doesn't include the \0 */
1101 dirent->d_reclen = e2dir->dir_namelen;
1102 strncpy(dirent->d_name, (char*)e2dir->dir_name, e2dir->dir_namelen);
1103 assert(e2dir->dir_namelen <= MAX_FILENAME_SZ);
1104 dirent->d_name[e2dir->dir_namelen] = '\0';
1107 /* At the end of the directory, sort of. ext2 often preallocates blocks, so
1108 * this will cause us to walk along til the end, which isn't quite right. */
1109 if (dir->f_dentry->d_inode->i_size == dirent->d_off)
1111 if (dir->f_dentry->d_inode->i_size < dirent->d_off) {
1112 warn("Issues reaching the end of an ext2 directory!");
1115 return 1; /* normal success for readdir */
1118 /* This is called when a VMR is mapping a particular file. The FS needs to do
1119 * whatever it needs so that faults can be handled by read_page(), and handle all
1120 * of the cases of MAP_SHARED, MAP_PRIVATE, whatever. It also needs to ensure
1121 * the file is not being mmaped in a way that conflicts with the manner in which
1122 * the file was opened or the file type. */
1123 int ext2_mmap(struct file *file, struct vm_region *vmr)
1125 if (S_ISREG(file->f_dentry->d_inode->i_mode))
1130 /* Called by the VFS while opening the file, which corresponds to inode, for
1131 * the FS to do whatever it needs. */
1132 int ext2_open(struct inode *inode, struct file *file)
1134 /* TODO: check to make sure the file is openable, and maybe do some checks
1135 * for the open mode (like did we want to truncate, append, etc) */
1139 /* Called when a file descriptor is closed. */
1140 int ext2_flush(struct file *file)
1146 /* Called when the file is about to be closed (file obj freed) */
1147 int ext2_release(struct inode *inode, struct file *file)
1152 /* Flushes the file's dirty contents to disc */
1153 int ext2_fsync(struct file *file, struct dentry *dentry, int datasync)
1158 /* Traditionally, sleeps until there is file activity. We probably won't
1159 * support this, or we'll handle it differently. */
1160 unsigned int ext2_poll(struct file *file, struct poll_table_struct *poll_table)
1165 /* Reads count bytes from a file, starting from (and modifiying) offset, and
1166 * putting the bytes into buffers described by vector */
1167 ssize_t ext2_readv(struct file *file, const struct iovec *vector,
1168 unsigned long count, off_t *offset)
1173 /* Writes count bytes to a file, starting from (and modifiying) offset, and
1174 * taking the bytes from buffers described by vector */
1175 ssize_t ext2_writev(struct file *file, const struct iovec *vector,
1176 unsigned long count, off_t *offset)
1181 /* Write the contents of file to the page. Will sort the params later */
1182 ssize_t ext2_sendpage(struct file *file, struct page *page, int offset,
1183 size_t size, off_t pos, int more)
1188 /* Checks random FS flags. Used by NFS. */
1189 int ext2_check_flags(int flags)
1190 { // default, nothing
1194 /* Redeclaration and initialization of the FS ops structures */
1195 struct page_map_operations ext2_pm_op = {
1199 struct super_operations ext2_s_op = {
1215 struct inode_operations ext2_i_op = {
1230 struct dentry_operations ext2_d_op = {
1239 struct file_operations ext2_f_op_file = {
1256 struct file_operations ext2_f_op_dir = {
1273 struct file_operations ext2_f_op_sym = {