1 /* Copyright (c) 2010 The Regents of the University of California
2 * Barret Rhoden <brho@cs.berkeley.edu>
3 * See LICENSE for details.
5 * Block devices and generic blockdev infrastructure */
11 #include <page_alloc.h>
13 /* These two are needed for the fake interrupt */
17 struct file_operations block_f_op;
18 struct page_map_operations block_pm_op;
19 struct kmem_cache *breq_kcache;
23 breq_kcache = kmem_cache_create("block_reqs", sizeof(struct block_request),
24 __alignof__(struct block_request), 0, 0, 0);
25 bh_kcache = kmem_cache_create("buffer_heads", sizeof(struct buffer_head),
26 __alignof__(struct buffer_head), 0, 0, 0);
29 /* Now probe for and init the block device for the ext2 ram disk */
30 extern uint8_t _binary_mnt_ext2fs_img_size[];
31 extern uint8_t _binary_mnt_ext2fs_img_start[];
32 /* Build and init the block device */
33 struct block_device *ram_bd = kmalloc(sizeof(struct block_device), 0);
34 memset(ram_bd, 0, sizeof(struct block_device));
36 ram_bd->b_sector_sz = 512;
37 ram_bd->b_nr_sector = (unsigned long)_binary_mnt_ext2fs_img_size / 512;
38 kref_init(&ram_bd->b_kref, fake_release, 1);
39 pm_init(&ram_bd->b_pm, &block_pm_op, ram_bd);
40 ram_bd->b_data = _binary_mnt_ext2fs_img_start;
41 strlcpy(ram_bd->b_name, "RAMDISK", BDEV_INLINE_NAME);
42 /* Connect it to the file system */
43 struct file *ram_bf = make_device("/dev/ramdisk", S_IRUSR | S_IWUSR,
44 __S_IFBLK, &block_f_op);
45 /* make sure the inode tracks the right pm (not it's internal one) */
46 ram_bf->f_dentry->d_inode->i_mapping = &ram_bd->b_pm;
47 ram_bf->f_dentry->d_inode->i_bdev = ram_bd; /* this holds the bd kref */
48 kref_put(&ram_bf->f_kref);
49 #endif /* CONFIG_EXT2FS */
52 /* Generic helper, returns a kref'd reference out of principle. */
53 struct block_device *get_bdev(char *path)
55 struct block_device *bdev;
57 block_f = do_file_open(path, O_RDWR, 0);
59 bdev = block_f->f_dentry->d_inode->i_bdev;
60 kref_get(&bdev->b_kref, 1);
61 kref_put(&block_f->f_kref);
65 /* Frees all the BHs associated with page. There could be 0, to deal with one
66 * that wasn't UPTODATE. Don't call this on a page that isn't a PG_BUFFER.
67 * Note, these are not a circular LL (for now). */
68 void free_bhs(struct page *page)
70 struct buffer_head *bh, *next;
71 assert(atomic_read(&page->pg_flags) & PG_BUFFER);
72 bh = (struct buffer_head*)page->pg_private;
76 kmem_cache_free(bh_kcache, bh);
79 page->pg_private = 0; /* catch bugs */
82 /* This ultimately will handle the actual request processing, all the way down
83 * to the driver, and will deal with blocking. For now, we just fulfill the
84 * request right away (RAM based block devs). */
85 int bdev_submit_request(struct block_device *bdev, struct block_request *breq)
88 unsigned long first_sector;
89 unsigned int nr_sector;
91 for (int i = 0; i < breq->nr_bhs; i++) {
92 first_sector = breq->bhs[i]->bh_sector;
93 nr_sector = breq->bhs[i]->bh_nr_sector;
94 /* Sectors are indexed starting with 0, for now. */
95 if (first_sector + nr_sector > bdev->b_nr_sector) {
96 warn("Exceeding the num sectors!");
99 if (breq->flags & BREQ_READ) {
100 dst = breq->bhs[i]->bh_buffer;
101 src = bdev->b_data + (first_sector << SECTOR_SZ_LOG);
102 } else if (breq->flags & BREQ_WRITE) {
103 dst = bdev->b_data + (first_sector << SECTOR_SZ_LOG);
104 src = breq->bhs[i]->bh_buffer;
106 panic("Need a request type!\n");
108 memcpy(dst, src, nr_sector << SECTOR_SZ_LOG);
110 /* Faking the device interrupt with an alarm */
111 void breq_handler(struct alarm_waiter *waiter)
113 /* In the future, we'll need to figure out which breq this was in
115 struct block_request *breq = (struct block_request*)waiter->data;
117 breq->callback(breq);
120 struct timer_chain *tchain = &per_cpu_info[core_id()].tchain;
121 struct alarm_waiter *waiter = kmalloc(sizeof(struct alarm_waiter), 0);
122 init_awaiter(waiter, breq_handler);
123 /* Stitch things up, so we know how to find things later */
126 set_awaiter_rel(waiter, 5000);
127 set_alarm(tchain, waiter);
131 /* Helper method, unblocks someone blocked on sleep_on_breq(). */
132 void generic_breq_done(struct block_request *breq)
134 int8_t irq_state = 0;
135 if (!sem_up_irqsave(&breq->sem, &irq_state)) {
136 /* This shouldn't happen anymore. Let brho know if it does. */
137 warn("[kernel] no one waiting on breq %p", breq);
141 /* Helper, pairs with generic_breq_done(). Note we sleep here on a semaphore
142 * instead of faking it with an alarm. Ideally, this code will be the same even
143 * for real block devices (that don't fake things with timer interrupts). */
144 void sleep_on_breq(struct block_request *breq)
146 int8_t irq_state = 0;
147 /* Since printk takes a while, this may make you lose the race */
148 printd("Sleeping on breq %p\n", breq);
149 assert(irq_is_enabled());
150 sem_down_irqsave(&breq->sem, &irq_state);
153 /* This just tells the page cache that it is 'up to date'. Due to the nature of
154 * the blocks in the page cache, we don't actually read the items in on
155 * readpage, we read them in when a specific block is there */
156 int block_readpage(struct page_map *pm, struct page *page)
158 atomic_or(&page->pg_flags, PG_UPTODATE);
162 /* Returns a BH pointing to the buffer where blk_num from bdev is located (given
163 * blocks of size blk_sz). This uses the page cache for the page allocations
164 * and evictions, but only caches blocks that are requested. Check the docs for
165 * more info. The BH isn't refcounted, but a page refcnt is returned. Call
166 * put_block (nand/xor dirty block).
168 * Note we're using the lock_page() to sync (which is what we do with the page
169 * cache too. It's not ideal, but keeps things simpler for now.
171 * Also note we're a little inconsistent with the use of sector sizes in certain
172 * files. We'll sort it eventually. */
173 struct buffer_head *bdev_get_buffer(struct block_device *bdev,
174 unsigned long blk_num, unsigned int blk_sz)
177 struct page_map *pm = &bdev->b_pm;
178 struct buffer_head *bh, *new, *prev, **next_loc;
179 struct block_request *breq;
181 unsigned int blk_per_pg = PGSIZE / blk_sz;
182 unsigned int sct_per_blk = blk_sz / bdev->b_sector_sz;
183 unsigned int blk_offset = (blk_num % blk_per_pg) * blk_sz;
185 assert(blk_offset < PGSIZE);
187 warn("Asking for the 0th block of a bdev...");
188 /* Make sure there's a page in the page cache. Should always be one. */
189 error = pm_load_page(pm, blk_num / blk_per_pg, &page);
191 panic("Failed to load page! (%d)", error);
192 my_buf = page2kva(page) + blk_offset;
193 atomic_or(&page->pg_flags, PG_BUFFER);
195 bh = (struct buffer_head*)page->pg_private;
197 /* look through all the BHs for ours, stopping if we go too far. */
199 if (bh->bh_buffer == my_buf) {
201 } else if (bh->bh_buffer > my_buf) {
207 /* At this point, bh points to the one beyond our space (or 0), and prev is
208 * either the one before us or 0. We make a BH, and try to insert */
209 new = kmem_cache_alloc(bh_kcache, 0);
211 new->bh_page = page; /* weak ref */
212 new->bh_buffer = my_buf;
215 new->bh_bdev = bdev; /* uncounted ref */
216 new->bh_sector = blk_num * sct_per_blk;
217 new->bh_nr_sector = sct_per_blk;
218 /* Try to insert the new one in place. If it fails, retry the whole "find
219 * the bh" process. This should be rare, so no sense optimizing it. */
220 next_loc = prev ? &prev->bh_next : (struct buffer_head**)&page->pg_private;
221 /* Normally, there'd be an ABA problem here, but we never actually remove
222 * bhs from the chain until the whole page gets cleaned up, which can't
223 * happen while we hold a reference to the page. */
224 if (!atomic_cas_ptr((void**)next_loc, bh, new)) {
225 kmem_cache_free(bh_kcache, new);
230 /* At this point, we have the BH for our buf, but it might not be up to
231 * date, and there might be someone else trying to update it. */
232 /* is it already here and up to date? if so, we're done */
233 if (bh->bh_flags & BH_UPTODATE)
235 /* if not, try to lock the page (could BLOCK). Using this for syncing. */
237 /* double check, are we up to date? if so, we're done */
238 if (bh->bh_flags & BH_UPTODATE) {
242 /* if we're here, the page is locked by us, we need to read the block */
243 breq = kmem_cache_alloc(breq_kcache, 0);
245 breq->flags = BREQ_READ;
246 breq->callback = generic_breq_done;
248 sem_init_irqsave(&breq->sem, 0);
249 breq->bhs = breq->local_bhs;
252 error = bdev_submit_request(bdev, breq);
255 kmem_cache_free(breq_kcache, breq);
256 /* after the data is read, we mark it up to date and unlock the page. */
257 bh->bh_flags |= BH_UPTODATE;
262 /* Will dirty the block/BH/page for the given block/buffer. Will have to be
263 * careful with the page reclaimer - if someone holds a reference, they can
265 void bdev_dirty_buffer(struct buffer_head *bh)
267 struct page *page = bh->bh_page;
268 /* TODO: race on flag modification */
269 bh->bh_flags |= BH_DIRTY;
270 atomic_or(&page->pg_flags, PG_DIRTY);
273 /* Decrefs the buffer from bdev_get_buffer(). Call this when you no longer
274 * reference your block/buffer. For now, we do refcnting on the page, since the
275 * reclaiming will be in page sized chunks from the page cache. */
276 void bdev_put_buffer(struct buffer_head *bh)
278 pm_put_page(bh->bh_page);
281 /* Block device page map ops: */
282 struct page_map_operations block_pm_op = {
286 /* Block device file ops: for now, we don't let you do much of anything */
287 struct file_operations block_f_op = {
291 kfs_readdir, /* this will fail gracefully */
296 0, /* fsync - makes no sense */