1 /* Copyright (c) 2018 Google Inc
2 * Barret Rhoden <brho@cs.berkeley.edu>
3 * See LICENSE for details.
5 * tree_file: structs and helpers to for a tree-based filesystem for 9ns
17 struct tree_filesystem;
19 /* The walk cache encapsulates a couple things related to caching, similar to
20 * the dentry cache in the VFS.
21 * - the parent -> child links in a hash table
22 * - the LRU list, consisting of all tree files with kref == 0
24 * LRU notes. Once a TF gets linked into a tree:
25 * - a TF is on the LRU IFF its refcnt == 0
26 * - a TF is on the LRU only if it is in the tree
27 * - any time kref gets set to 0, we consider putting it on the LRU (if not
28 * DISCONNECTED / in the tree). anytime it is increffed from 0, we yank it.
29 * - NEG entries are always kref == 0, and are on the LRU list if they are in
30 * the tree. They are never increffed, only rcu-read.
36 struct hash_helper hh; /* parts are rcu-read */
37 struct hlist_head *ht;
38 struct hlist_head static_ht[HASH_INIT_SZ];
41 /* All ops that operate on a parent have the parent qlocked.
43 * free(): the TF is being freed. Might be called on TFs that the TFS is
44 * unaware of (e.g. lookup threw an error).
46 * unlink(): child is being unlinked from the parent.
48 * lookup(): backends fill in the child TF with whatever it knows about the
49 * entry. This function is optional. TFSs whose entire tree is in the frontend
50 * (e.g. ramfs) don't do lookups from the non-existent backend. This
51 * structure, positive or negative, will be used until it is unlinked.
53 * create(): given parent and it's initialized child struct, create the file and
54 * do whatever else you need (e.g. 9p backend does the create). Same as lookup,
55 * the TFS needs to fill in child->dir, except for name. Note we don't pass
56 * omode. That's handled in the device's front-end create: open the chan after
59 * rename(): rename / move TF from the old_parent to the new_parent, and change
60 * the name to name (NULL == no change). Note that the rename op does *not* set
61 * the TF's name yet. Be careful changing any fields in the TF, since it is
62 * still in old_parent's HT.
64 * has_children(): TFSs that use the front end as a cache (e.g. #mnt) need to
65 * check with their backend to know if there are children or not. Checking the
66 * children list isn't necessarily sufficient.
68 struct tree_file_ops {
69 void (*free)(struct tree_file *tf);
70 void (*unlink)(struct tree_file *parent, struct tree_file *child);
71 void (*lookup)(struct tree_file *parent, struct tree_file *child);
72 void (*create)(struct tree_file *parent, struct tree_file *child, int perm);
73 void (*rename)(struct tree_file *tf, struct tree_file *old_parent,
74 struct tree_file *new_parent, const char *name, int flags);
75 bool (*has_children)(struct tree_file *parent);
78 struct tree_filesystem {
80 struct tree_file_ops tf_ops;
81 struct fs_file_ops fs_ops;
83 struct tree_file *root;
87 /* The tree_file is an fs_file (i.e. the first struct field) that exists in a
88 * tree filesystem with parent/child relationships.
90 * Children hold krefs on their parents. Parents have weak refs (i.e. not a
91 * kref) on their children. Parent's have a list of *known* children. The
92 * backend of the FS (e.g. the other side of #mnt) may know of other children.
93 * In this way, the parent's list is a cache. For devices like a ramfs or any
94 * dynamic, completely tree-based system (e.g. #srv), the tree filesystem is the
95 * authoritative FS; there is no backend.
97 * If we know of a parent-child relationship (i.e. it's in the tree_filesystem),
98 * then the following invariants are true:
99 * - The child is in the parent's list.
100 * - The child->parent == the parent's tree_file
101 * - There is an entry in the walk cache from {parent,name} -> child_tree_file.
102 * - The backend FS (if there is one) knows about the parent-child link
104 * To change these, hold the parent's qlock and you must change them all,
105 * with a slight exception: you must *set* child->parent = parent under the
106 * lock. You may *clear* that reference (and decref the parent) outside the
107 * parent's qlock when freeing the child. At this point, there are no
108 * references to child, it is off all lists and structures, and RCU readers are
111 * We'll need to make these changes for create, remove, rename, and lookup (add
112 * entries (positive or negative) that we didn't know about). Note that even
113 * changing the basename, but not moving, a child during rename requires the
114 * parent's qlock. (It changes the hashtable linkage).
116 * Keep in mind that the info in the frontend (tree_files) may be a subset of
117 * the real filesystem, which is the backend (e.g. 9p mount, disk structures,
118 * etc). The frontend might not have everything from the backend, typically
119 * because of memory reclaim.
121 * Readers use the walk cache hash table and child->parent) with RCU read
122 * locks. The linked list of children is only accessed under the parent's
123 * qlock. If a child needs a ref on its parent, it will need to kref_get during
124 * rcu lock. Children hold refs on parents, but a concurrent removal/rename
125 * could break that link and decref the parent.
127 * Synchronization rules
129 * Hold the lifetime spin lock when:
130 * - Changing or relying on the value of flags (e.g., during kcref upping)
131 * - Upping a kref from 0 during a lookup
132 * - Changing membership on LRU lists (e.g. during release or kcref upping)
133 * The lock ordering is entry -> LRU list. Note the LRU pruner will lock the
134 * list first, then *trylock* the entry *and* the parent, skipping on failure.
136 * Refcounting rules: (subject to a device's whims)
137 * - All walked or attached chans (distinct; those that will be closed in the
138 * device) have a refcounted tree_file.
139 * - The tree_files do not have to be in a lookup structure (parent's list /
140 * hash table); they could have been unlinked/removed and waiting for the
142 * - tree_files can have 0 references. Typically, files that are unopened
143 * (technically, unwalked) will have their kref == 0. On release, they can
144 * stay in the tree/lookup system, and they are added to an LRU list.
145 * - negative tree_files (i.e. names for files that don't exist) always have a
146 * refcnt == 0, and are candidates for being pruned at all times.
149 * - Note the tree uses the qlock in the FS file. In essence, they are the same
150 * object (a tree_file is an fs_file). This qlock is for changing the
151 * contents of the file, whether that's the parent directories links to its
152 * children, a file's contents (via write), or the metadata (dir) of either.
153 * - Parent qlock -> hash_table bucketlock
154 * - Parent qlock -> child lifetime spinlock -> LRU list spinlock
155 * Note the LRU pruner inverts this ordering, and uses trylocks
156 * - Parent qlock -> child qlock (unlink and create/rename).
157 * - Qlocking multiple files that aren't parent->child requires the rename_mtx
165 struct tree_file *parent; /* rcu protected */
166 struct hlist_node hash; /* rcu protected */
167 struct list_head siblings;
168 struct list_head children;
169 struct list_head lru;
170 bool can_have_children;
171 struct tree_filesystem *tfs;
174 #define TF_F_DISCONNECTED (1 << 0)
175 #define TF_F_NEGATIVE (1 << 1)
176 #define TF_F_ON_LRU (1 << 2)
177 #define TF_F_IS_ROOT (1 << 3)
178 #define TF_F_HAS_BEEN_USED (1 << 4)
180 /* Devices can put their tree_files / fs_files whereever they want. For now,
181 * all of them will use aux. We can make ops for this if we need it. */
182 static inline struct tree_file *chan_to_tree_file(struct chan *c)
187 static inline void chan_set_tree_file(struct chan *c, struct tree_file *tf)
192 static inline struct qid tree_file_to_qid(struct tree_file *tf)
194 return tf->file.dir.qid;
197 static inline bool tree_file_is_dir(struct tree_file *tf)
199 return tree_file_to_qid(tf).type & QTDIR ? true : false;
202 static inline bool tree_file_is_file(struct tree_file *tf)
204 return qid_is_file(tree_file_to_qid(tf));
207 static inline bool tree_file_is_symlink(struct tree_file *tf)
209 return tree_file_to_qid(tf).type & QTSYMLINK ? true : false;
212 static inline bool tree_file_is_negative(struct tree_file *tf)
214 return tf->flags & TF_F_NEGATIVE ? true : false;
217 static inline bool tree_file_is_root(struct tree_file *tf)
219 return tf == tf->tfs->root;
222 static inline char *tree_file_to_name(struct tree_file *tf)
224 return tf->file.dir.name;
227 static inline bool caller_has_tf_perms(struct tree_file *tf, int omode)
229 return caller_has_file_perms(&tf->file, omode);
232 static inline void tree_file_perm_check(struct tree_file *tf, int omode)
234 fs_file_perm_check(&tf->file, omode);
237 /* tree_file helpers */
238 bool tf_kref_get(struct tree_file *tf);
239 void tf_kref_put(struct tree_file *tf);
240 struct tree_file *tree_file_alloc(struct tree_filesystem *tfs,
241 struct tree_file *parent, const char *name);
242 struct walkqid *tree_file_walk(struct tree_file *from, char **name,
244 struct tree_file *tree_file_create(struct tree_file *parent, const char *name,
245 uint32_t perm, char *ext);
246 void tree_file_remove(struct tree_file *child);
247 void tree_file_rename(struct tree_file *tf, struct tree_file *new_parent,
248 const char *name, int flags);
249 ssize_t tree_file_readdir(struct tree_file *parent, void *ubuf, size_t n,
250 off64_t offset, int *dri);
252 /* tree_file helpers that operate on chans */
253 struct walkqid *tree_chan_walk(struct chan *c, struct chan *nc, char **name,
255 void tree_chan_create(struct chan *c, char *name, int omode, uint32_t perm,
257 struct chan *tree_chan_open(struct chan *c, int omode);
258 void tree_chan_close(struct chan *c);
259 void tree_chan_remove(struct chan *c);
260 void tree_chan_rename(struct chan *c, struct chan *new_p_c, const char *name,
262 size_t tree_chan_read(struct chan *c, void *ubuf, size_t n, off64_t offset);
263 size_t tree_chan_write(struct chan *c, void *ubuf, size_t n, off64_t offset);
264 size_t tree_chan_stat(struct chan *c, uint8_t *m_buf, size_t m_buf_sz);
265 size_t tree_chan_wstat(struct chan *c, uint8_t *m_buf, size_t m_buf_sz);
266 struct fs_file *tree_chan_mmap(struct chan *c, struct vm_region *vmr, int prot,
268 unsigned long tree_chan_ctl(struct chan *c, int op, unsigned long a1,
269 unsigned long a2, unsigned long a3,
272 struct chan *tree_file_alloc_chan(struct tree_file *tf, struct dev *dev,
274 void tfs_init(struct tree_filesystem *tfs);
275 void tfs_destroy(struct tree_filesystem *tfs);
276 void tfs_frontend_for_each(struct tree_filesystem *tfs,
277 void (*cb)(struct tree_file *tf));
278 void tfs_frontend_purge(struct tree_filesystem *tfs,
279 void (*cb)(struct tree_file *tf));
280 void __tfs_dump(struct tree_filesystem *tfs);
281 void __tfs_dump_tf(struct tree_file *tf);
283 void tfs_lru_for_each(struct tree_filesystem *tfs, bool cb(struct tree_file *),
285 void tfs_lru_prune_neg(struct tree_filesystem *tfs);