Fix for not having a cr3 loaded at all times
[akaros.git] / user / parlib / vcore.c
1 #include <arch/arch.h>
2 #include <stdbool.h>
3 #include <errno.h>
4 #include <vcore.h>
5 #include <mcs.h>
6 #include <sys/param.h>
7 #include <parlib.h>
8 #include <unistd.h>
9 #include <stdlib.h>
10 #include <sys/mman.h>
11 #include <rstdio.h>
12 #include <glibc-tls.h>
13
14 /* starting with 1 since we alloc vcore0's stacks and TLS in vcore_init(). */
15 static size_t _max_vcores_ever_wanted = 1;
16 static mcs_lock_t _vcore_lock = MCS_LOCK_INIT;
17
18 extern void** vcore_thread_control_blocks;
19
20 /* Get a TLS, returns 0 on failure.  Vcores have their own TLS, and any thread
21  * created by a user-level scheduler needs to create a TLS as well. */
22 void *allocate_tls(void)
23 {
24         extern void *_dl_allocate_tls(void *mem) internal_function;
25         void *tcb = _dl_allocate_tls(NULL);
26         if (!tcb)
27                 return 0;
28         /* Make sure the TLS is set up properly - its tcb pointer points to itself.
29          * Keep this in sync with sysdeps/ros/XXX/tls.h.  For whatever reason,
30          * dynamically linked programs do not need this to be redone, but statics
31          * do. */
32         tcbhead_t *head = (tcbhead_t*)tcb;
33         head->tcb = tcb;
34         head->self = tcb;
35         return tcb;
36 }
37
38 /* TODO: probably don't want to dealloc.  Considering caching */
39 static void free_transition_tls(int id)
40 {
41         extern void _dl_deallocate_tls (void *tcb, bool dealloc_tcb) internal_function;
42         if(vcore_thread_control_blocks[id])
43         {
44                 _dl_deallocate_tls(vcore_thread_control_blocks[id],true);
45                 vcore_thread_control_blocks[id] = NULL;
46         }
47 }
48
49 static int allocate_transition_tls(int id)
50 {
51         /* We want to free and then reallocate the tls rather than simply 
52          * reinitializing it because its size may have changed.  TODO: not sure if
53          * this is right.  0-ing is one thing, but freeing and reallocating can be
54          * expensive, esp if syscalls are involved.  Check out glibc's
55          * allocatestack.c for what might work. */
56         free_transition_tls(id);
57
58         void *tcb = allocate_tls();
59
60         if ((vcore_thread_control_blocks[id] = tcb) == NULL) {
61                 errno = ENOMEM;
62                 return -1;
63         }
64         return 0;
65 }
66
67 static void free_transition_stack(int id)
68 {
69         // don't actually free stacks
70 }
71
72 static int allocate_transition_stack(int id)
73 {
74         struct preempt_data *vcpd = &__procdata.vcore_preempt_data[id];
75         if (vcpd->transition_stack)
76                 return 0; // reuse old stack
77
78         void* stackbot = mmap(0, TRANSITION_STACK_SIZE,
79                               PROT_READ|PROT_WRITE|PROT_EXEC,
80                               MAP_POPULATE|MAP_ANONYMOUS, -1, 0);
81
82         if(stackbot == MAP_FAILED)
83                 return -1; // errno set by mmap
84
85         vcpd->transition_stack = (uintptr_t)stackbot + TRANSITION_STACK_SIZE;
86
87         return 0;
88 }
89
90 int vcore_init()
91 {
92         static int initialized = 0;
93         if(initialized)
94                 return 0;
95
96         vcore_thread_control_blocks = (void**)calloc(max_vcores(),sizeof(void*));
97
98         if(!vcore_thread_control_blocks)
99                 goto vcore_init_fail;
100
101         /* Need to alloc vcore0's transition stuff here (technically, just the TLS)
102          * so that schedulers can use vcore0's transition TLS before it comes up in
103          * vcore_entry() */
104         if(allocate_transition_stack(0) || allocate_transition_tls(0))
105                 goto vcore_init_tls_fail;
106
107         initialized = 1;
108         return 0;
109
110 vcore_init_tls_fail:
111         free(vcore_thread_control_blocks);
112 vcore_init_fail:
113         errno = ENOMEM;
114         return -1;
115 }
116
117 int vcore_request(size_t k)
118 {
119         int ret = -1;
120         size_t i,j;
121
122         if(vcore_init() < 0)
123                 return -1;
124
125         // TODO: could do this function without a lock once we 
126         // have atomic fetch and add in user space
127         mcs_lock_lock(&_vcore_lock);
128
129         size_t vcores_wanted = num_vcores() + k;
130         if(k < 0 || vcores_wanted > max_vcores())
131         {
132                 errno = EAGAIN;
133                 goto fail;
134         }
135
136         for(i = _max_vcores_ever_wanted; i < vcores_wanted; i++)
137         {
138                 if(allocate_transition_stack(i) || allocate_transition_tls(i))
139                         goto fail; // errno set by the call that failed
140                 _max_vcores_ever_wanted++;
141         }
142         ret = sys_resource_req(RES_CORES, vcores_wanted, 1, 0);
143
144 fail:
145         mcs_lock_unlock(&_vcore_lock);
146         return ret;
147 }
148
149 void vcore_yield()
150 {
151         sys_yield(0);
152 }
153
154 size_t max_vcores()
155 {
156         return MIN(__procinfo.max_vcores, MAX_VCORES);
157 }
158
159 size_t num_vcores()
160 {
161         return __procinfo.num_vcores;
162 }
163
164 int vcore_id()
165 {
166         return __vcoreid;
167 }
168