1 /* Barret Rhoden <brho@cs.berkeley.edu>
3 * VFS, based on the Linux VFS as described in LKD 2nd Ed (Robert Love) and in
4 * UTLK (Bovet/Cesati) , which was probably written by Linus. A lot of it was
5 * changed (reduced) to handle what ROS will need, at least initially.
6 * Hopefully it'll be similar enough to interface with ext2 and other Linux
9 * struct qstr came directly from Linux.
10 * Lawyers can sort out the copyrights and whatnot with these interfaces and
13 #ifndef ROS_KERN_VFS_H
14 #define ROS_KERN_VFS_H
16 #include <ros/common.h>
17 #include <sys/queue.h>
23 #include <hashtable.h>
27 /* ghetto preprocessor hacks (since proc includes vfs) */
31 // TODO: temp typedefs, etc. remove when we support this stuff.
35 struct io_writeback {int x;};
36 struct event_poll {int x;};
37 struct poll_table_struct {int x;};
38 // end temp typedefs. note ino and off_t are needed in the next include
43 struct super_operations;
45 struct dentry_operations;
47 struct inode_operations;
49 struct file_operations;
52 struct pipe_inode_info;
54 /* List def's we need */
55 TAILQ_HEAD(sb_tailq, super_block);
56 TAILQ_HEAD(dentry_tailq, dentry);
57 SLIST_HEAD(dentry_slist, dentry);
58 TAILQ_HEAD(inode_tailq, inode);
59 SLIST_HEAD(inode_slist, inode);
60 TAILQ_HEAD(file_tailq, file);
61 TAILQ_HEAD(io_wb_tailq, io_writeback);
62 TAILQ_HEAD(event_poll_tailq, event_poll);
63 TAILQ_HEAD(vfsmount_tailq, vfsmount);
64 TAILQ_HEAD(fs_type_tailq, fs_type);
66 /* Linux's quickstring - saves recomputing the hash and length. Note the length
67 * is the non-null-terminated length, as you'd get from strlen(). (for now) */
74 /* Helpful structure to pass around during lookup operations. At each point,
75 * it tracks the the answer, the name of the previous, how deep the symlink
76 * following has gone, and the symlink pathnames. *dentry and *mnt up the
77 * refcnt of those objects too, so whoever 'receives; this will need to decref.
78 * This is meant to be pinning only the 'answer' to a path_lookup, and not the
79 * intermediate steps. The intermediates get pinned due to the existence of
80 * their children in memory. Internally, the VFS will refcnt any item whenever
81 * it is in this struct. The last_sym is needed to pin the dentry (and thus the
82 * inode and char* storage for the symname) for the duration of a lookup. When
83 * you resolve a pathname, you need to keep its string in memory. */
84 #define MAX_SYMLINK_DEPTH 6 // arbitrary.
86 struct dentry *dentry; /* dentry of the obj */
87 struct vfsmount *mnt; /* its mount pt */
88 struct qstr last; /* last component in search */
89 int flags; /* lookup flags */
90 int last_type; /* type of last component */
91 unsigned int depth; /* search's symlink depth */
92 int intent; /* access type for the file */
93 struct dentry *last_sym; /* pins the symname */
96 /* nameidata lookup flags and access type fields */
97 #define LOOKUP_FOLLOW 0x01 /* if the last is a symlink, follow */
98 #define LOOKUP_DIRECTORY 0x02 /* last component must be a directory */
99 #define LOOKUP_CONTINUE 0x04 /* still filenames to go */
100 #define LOOKUP_PARENT 0x08 /* lookup the dir that includes the item */
101 /* These are the nd's intent */
102 #define LOOKUP_OPEN 0x10 /* intent is to open a file */
103 #define LOOKUP_CREATE 0x11 /* create a file if it doesn't exist */
104 #define LOOKUP_ACCESS 0x12 /* access / check user permissions */
106 /* Superblock: Specific instance of a mounted filesystem. All synchronization
107 * is done with the one spinlock. */
110 TAILQ_ENTRY(super_block) s_list; /* list of all sbs */
111 dev_t s_dev; /* id */
112 unsigned long s_blocksize;
114 unsigned long long s_maxbytes; /* max file size */
115 struct fs_type *s_type;
116 struct super_operations *s_op;
117 unsigned long s_flags;
118 unsigned long s_magic;
119 struct vfsmount *s_mount; /* vfsmount point */
120 spinlock_t s_lock; /* used for all sync */
122 bool s_syncing; /* currently syncing metadata */
123 struct inode_tailq s_inodes; /* all inodes */
124 struct inode_tailq s_dirty_i; /* dirty inodes */
125 struct io_wb_tailq s_io_wb; /* writebacks */
126 struct file_tailq s_files; /* assigned files */
127 struct dentry_tailq s_lru_d; /* unused dentries (in dcache)*/
128 spinlock_t s_lru_lock;
129 struct hashtable *s_dcache; /* dentry cache */
130 spinlock_t s_dcache_lock;
131 struct hashtable *s_icache; /* inode cache */
132 spinlock_t s_icache_lock;
133 struct block_device *s_bdev;
134 TAILQ_ENTRY(super_block) s_instances; /* list of sbs of this fs type*/
139 struct super_operations {
140 struct inode *(*alloc_inode) (struct super_block *sb);
141 void (*dealloc_inode) (struct inode *);
142 void (*read_inode) (struct inode *);
143 void (*dirty_inode) (struct inode *);
144 void (*write_inode) (struct inode *, bool);
145 void (*put_inode) (struct inode *); /* when decreffed */
146 void (*drop_inode) (struct inode *); /* when about to destroy */
147 void (*delete_inode) (struct inode *); /* deleted from disk */
148 void (*put_super) (struct super_block *); /* releases sb */
149 void (*write_super) (struct super_block *); /* sync with sb on disk */
150 int (*sync_fs) (struct super_block *, bool);
151 int (*remount_fs) (struct super_block *, int, char *);
152 void (*umount_begin) (struct super_block *);/* called by NFS */
155 /* Sets the type of file, IAW the bits in ros/fs.h */
156 #define SET_FTYPE(mode, type) ((mode) = ((mode) & ~__S_IFMT) | (type))
158 /* Will need a bunch of states/flags for an inode. TBD */
159 #define I_STATE_DIRTY 0x001
161 /* Inode: represents a specific file */
163 SLIST_ENTRY(inode) i_hash; /* inclusion in a hash table */
164 TAILQ_ENTRY(inode) i_sb_list; /* all inodes in the FS */
165 TAILQ_ENTRY(inode) i_list; /* describes state (dirty) */
166 struct dentry_tailq i_dentry; /* all dentries pointing here*/
169 int i_mode; /* access mode and file type */
170 unsigned int i_nlink; /* hard links */
173 kdev_t i_rdev; /* real device node */
175 unsigned long i_blksize;
176 unsigned long i_blocks; /* filesize in blocks */
177 struct timespec i_atime;
178 struct timespec i_mtime;
179 struct timespec i_ctime;
181 struct inode_operations *i_op;
182 struct file_operations *i_fop;
183 struct super_block *i_sb;
184 struct page_map *i_mapping; /* usually points to i_pm */
185 struct page_map i_pm; /* this inode's page cache */
187 struct pipe_inode_info *i_pipe;
188 struct block_device *i_bdev;
189 struct char_device *i_cdev;
191 unsigned long i_state;
192 unsigned long dirtied_when; /* in jiffies */
193 unsigned int i_flags; /* filesystem mount flags */
195 atomic_t i_writecount; /* number of writers */
199 struct inode_operations {
200 int (*create) (struct inode *, struct dentry *, int, struct nameidata *);
201 struct dentry *(*lookup) (struct inode *, struct dentry *,
203 int (*link) (struct dentry *, struct inode *, struct dentry *);
204 int (*unlink) (struct inode *, struct dentry *);
205 int (*symlink) (struct inode *, struct dentry *, const char *);
206 int (*mkdir) (struct inode *, struct dentry *, int);
207 int (*rmdir) (struct inode *, struct dentry *);
208 int (*mknod) (struct inode *, struct dentry *, int, dev_t);
209 int (*rename) (struct inode *, struct dentry *,
210 struct inode *, struct dentry *);
211 char *(*readlink) (struct dentry *);
212 void (*truncate) (struct inode *); /* set i_size before calling */
213 int (*permission) (struct inode *, int, struct nameidata *);
216 #define DNAME_INLINE_LEN 32
218 /* Dentry flags. All negatives are also unused. */
219 #define DENTRY_USED 0x01 /* has a kref > 0 */
220 #define DENTRY_NEGATIVE 0x02 /* cache of a failed lookup */
221 #define DENTRY_DYING 0x04 /* should be freed on release */
223 /* Dentry: in memory object, corresponding to an element of a path. E.g. /,
224 * usr, bin, and vim are all dentries. All have inodes. Vim happens to be a
225 * file instead of a directory.
226 * They can be used (valid inode, currently in use), unused (valid, not used),
227 * or negative (not a valid inode (deleted or bad path), but kept to resolve
228 * requests quickly. If none of these, dealloc it back to the slab cache.
229 * Unused and negatives go in the LRU list. */
231 struct kref d_kref; /* don't discard when 0 */
232 unsigned long d_flags; /* dentry cache flags */
234 struct inode *d_inode;
235 TAILQ_ENTRY(dentry) d_lru; /* unused list */
236 TAILQ_ENTRY(dentry) d_alias; /* linkage for i_dentry */
237 struct dentry_tailq d_subdirs;
238 TAILQ_ENTRY(dentry) d_subdirs_link;
239 unsigned long d_time; /* revalidate time (jiffies)*/
240 struct dentry_operations *d_op;
241 struct super_block *d_sb;
242 bool d_mount_point; /* is an FS mounted over here */
243 struct vfsmount *d_mounted_fs; /* fs mounted here */
244 struct dentry *d_parent;
245 struct qstr d_name; /* pts to iname and holds hash*/
246 char d_iname[DNAME_INLINE_LEN];
250 /* not sure yet if we want to call delete when refcnt == 0 (move it to LRU) or
251 * when its time to remove it from the dcache. */
252 struct dentry_operations {
253 int (*d_revalidate) (struct dentry *, struct nameidata *);
254 int (*d_hash) (struct dentry *, struct qstr *);
255 int (*d_compare) (struct dentry *, struct qstr *, struct qstr *);
256 int (*d_delete) (struct dentry *);
257 int (*d_release) (struct dentry *);
258 void (*d_iput) (struct dentry *, struct inode *);
261 /* Yanked from glibc-2.11.1/posix/unistd.h */
262 #define SEEK_SET 0 /* Seek from beginning of file. */
263 #define SEEK_CUR 1 /* Seek from current position. */
264 #define SEEK_END 2 /* Seek from end of file. */
266 /* File: represents a file opened by a process. */
268 TAILQ_ENTRY(file) f_list; /* list of all files */
269 struct dentry *f_dentry; /* definitely not inode. =( */
270 struct vfsmount *f_vfsmnt;
271 struct file_operations *f_op;
273 unsigned int f_flags; /* O_APPEND, etc */
274 int f_mode; /* O_RDONLY, etc */
275 off64_t f_pos; /* offset / file pointer */
279 struct event_poll_tailq f_ep_links;
280 spinlock_t f_ep_lock;
281 void *f_privdata; /* tty/socket driver hook */
282 struct page_map *f_mapping; /* page cache mapping */
284 /* Ghetto appserver support */
285 int fd; // all it contains is an appserver fd (for pid 0, aka kernel)
290 struct file_operations {
291 int (*llseek) (struct file *, off64_t, off64_t *, int);
292 ssize_t (*read) (struct file *, char *, size_t, off64_t *);
293 ssize_t (*write) (struct file *, const char *, size_t, off64_t *);
294 int (*readdir) (struct file *, struct dirent *);
295 int (*mmap) (struct file *, struct vm_region *);
296 int (*open) (struct inode *, struct file *);
297 int (*flush) (struct file *);
298 int (*release) (struct inode *, struct file *);
299 int (*fsync) (struct file *, struct dentry *, int);
300 unsigned int (*poll) (struct file *, struct poll_table_struct *);
301 ssize_t (*readv) (struct file *, const struct iovec *, unsigned long,
303 ssize_t (*writev) (struct file *, const struct iovec *, unsigned long,
305 ssize_t (*sendpage) (struct file *, struct page *, int, size_t, off64_t,
307 int (*check_flags) (int flags); /* most FS's ignore this */
310 /* FS structs. One of these per FS (e.g., ext2) */
314 struct super_block *(*get_sb) (struct fs_type *, int,
315 char *, struct vfsmount *);
316 void (*kill_sb) (struct super_block *);
317 TAILQ_ENTRY(fs_type) list;
318 struct sb_tailq fs_supers; /* all of this FS's sbs */
321 /* A mount point: more focused on the mounting, and solely in memory, compared
322 * to the SB which is focused on FS definitions (and exists on disc). */
324 TAILQ_ENTRY(vfsmount) mnt_list;
325 struct vfsmount *mnt_parent;
326 struct dentry *mnt_mountpoint;/* parent dentry where mnted */
327 struct dentry *mnt_root; /* dentry of root of this fs */
328 struct super_block *mnt_sb;
329 struct vfsmount_tailq mnt_child_mounts;
330 TAILQ_ENTRY(vfsmount) mnt_child_link;
331 struct kref mnt_kref;
334 struct namespace *mnt_namespace;
337 struct pipe_inode_info
342 unsigned int p_nr_readers;
343 unsigned int p_nr_writers;
344 struct cond_var p_cv;
347 /* Per-process structs */
348 #define NR_OPEN_FILES_DEFAULT 32
349 #define NR_FILE_DESC_DEFAULT 32
350 /* keep this in sync with glibc's fd_setsize */
351 #define NR_FILE_DESC_MAX 1024
353 /* Bitmask for file descriptors, big for when we exceed the initial small. We
354 * could just use the fd_array to check for openness instead of the bitmask,
355 * but eventually we might want to use the bitmasks for other things (like
356 * which files are close_on_exec. */
358 typedef struct fd_set {
359 uint8_t fds_bits[BYTES_FOR_BITMASK(NR_FILE_DESC_MAX)];
363 struct small_fd_set {
364 uint8_t fds_bits[BYTES_FOR_BITMASK(NR_FILE_DESC_DEFAULT)];
367 /* Helper macros to manage fd_sets */
368 #define FD_SET(n, p) ((p)->fds_bits[(n)/8] |= (1 << ((n) & 7)))
369 #define FD_CLR(n, p) ((p)->fds_bits[(n)/8] &= ~(1 << ((n) & 7)))
370 #define FD_ISSET(n,p) ((p)->fds_bits[(n)/8] & (1 << ((n) & 7)))
371 #define FD_ZERO(p) memset((void*)(p),0,sizeof(*(p)))
373 /* Describes an open file. We need this, since the FD flags are supposed to be
374 * per file descriptor, not per file (like the file status flags). */
376 struct file *fd_file;
377 unsigned int fd_flags;
380 /* All open files for a process */
381 struct files_struct {
384 int max_files; /* max files ptd to by fd */
385 int max_fdset; /* max of the current fd_set */
386 int next_fd; /* next number available */
387 struct file_desc *fd; /* initially pts to fd_array */
388 struct fd_set *open_fds; /* init, pts to open_fds_init */
389 struct small_fd_set open_fds_init;
390 struct file_desc fd_array[NR_OPEN_FILES_DEFAULT];
393 /* Process specific filesystem info */
401 /* Each process can have its own (eventually), but default to the same NS */
405 struct vfsmount *root;
406 struct vfsmount_tailq vfsmounts; /* all vfsmounts in this ns */
410 extern struct sb_tailq super_blocks; /* list of all sbs */
411 extern spinlock_t super_blocks_lock;
412 extern struct fs_type_tailq file_systems; /* lock this if it's dynamic */
413 extern struct namespace default_ns;
415 /* Slab caches for common objects */
416 extern struct kmem_cache *dentry_kcache;
417 extern struct kmem_cache *inode_kcache;
418 extern struct kmem_cache *file_kcache;
420 /* Misc VFS functions */
422 void qstr_builder(struct dentry *dentry, char *l_name);
423 char *file_name(struct file *file);
424 int path_lookup(char *path, int flags, struct nameidata *nd);
425 void path_release(struct nameidata *nd);
426 int mount_fs(struct fs_type *fs, char *dev_name, char *path, int flags);
428 /* Superblock functions */
429 struct super_block *get_sb(void);
430 void init_sb(struct super_block *sb, struct vfsmount *vmnt,
431 struct dentry_operations *d_op, unsigned long root_ino,
434 /* Dentry Functions */
435 struct dentry *get_dentry(struct super_block *sb, struct dentry *parent,
437 void dentry_release(struct kref *kref);
438 void __dentry_free(struct dentry *dentry);
439 struct dentry *lookup_dentry(char *path, int flags);
440 struct dentry *dcache_get(struct super_block *sb, struct dentry *what_i_want);
441 void dcache_put(struct super_block *sb, struct dentry *key_val);
442 struct dentry *dcache_remove(struct super_block *sb, struct dentry *key);
443 void dcache_prune(struct super_block *sb, bool negative_only);
445 /* Inode Functions */
446 struct inode *get_inode(struct dentry *dentry);
447 void load_inode(struct dentry *dentry, unsigned long ino);
448 int create_file(struct inode *dir, struct dentry *dentry, int mode);
449 int create_dir(struct inode *dir, struct dentry *dentry, int mode);
450 int create_symlink(struct inode *dir, struct dentry *dentry,
451 const char *symname, int mode);
452 int check_perms(struct inode *inode, int access_mode);
453 void inode_release(struct kref *kref);
454 void stat_inode(struct inode *inode, struct kstat *kstat);
455 struct inode *icache_get(struct super_block *sb, unsigned long ino);
456 void icache_put(struct super_block *sb, struct inode *inode);
457 struct inode *icache_remove(struct super_block *sb, unsigned long ino);
459 /* File-ish functions */
460 ssize_t generic_file_read(struct file *file, char *buf, size_t count,
462 ssize_t generic_file_write(struct file *file, const char *buf, size_t count,
464 ssize_t generic_dir_read(struct file *file, char *u_buf, size_t count,
466 struct file *alloc_file(void);
467 struct file *do_file_open(char *path, int flags, int mode);
468 int do_symlink(char *path, const char *symname, int mode);
469 int do_link(char *old_path, char *new_path);
470 int do_unlink(char *path);
471 int do_access(char *path, int mode);
472 int do_chmod(char *path, int mode);
473 int do_mkdir(char *path, int mode);
474 int do_rmdir(char *path);
475 int do_pipe(struct file **pipe_files, int flags);
476 struct file *dentry_open(struct dentry *dentry, int flags);
477 void file_release(struct kref *kref);
479 /* Process-related File management functions */
480 struct file *get_file_from_fd(struct files_struct *open_files, int fd);
481 struct file *put_file_from_fd(struct files_struct *open_files, int file_desc);
482 int insert_file(struct files_struct *open_files, struct file *file, int low_fd);
483 void close_all_files(struct files_struct *open_files, bool cloexec);
484 void clone_files(struct files_struct *src, struct files_struct *dst);
485 int do_chdir(struct fs_struct *fs_env, char *path);
486 char *do_getcwd(struct fs_struct *fs_env, char **kfree_this, size_t cwd_l);
489 void print_kstat(struct kstat *kstat);
490 int ls_dash_r(char *path);
491 extern struct inode_operations dummy_i_op;
492 extern struct dentry_operations dummy_d_op;
494 int put_fd(struct files_struct *open_files, int file_desc);
495 int get_fd(struct files_struct *open_files, int low_fd);
496 int claim_fd(struct files_struct *open_files, int file_desc);
498 #endif /* ROS_KERN_VFS_H */