Move vm.c and vm_mmu.c to vmx.c and vmx_mmu.c
[akaros.git] / kern / arch / x86 / vmx_mmu.c
1 /*
2  * Kernel-based Virtual Machine driver for Linux
3  *
4  * This module enables machines with Intel VT-x extensions to run virtual
5  * machines without emulation or binary translation.
6  *
7  * MMU support
8  *
9  * Copyright (C) 2006 Qumranet, Inc.
10  *
11  * Authors:
12  *   Yaniv Kamay  <yaniv@qumranet.com>
13  *   Avi Kivity   <avi@qumranet.com>
14  *
15  */
16 #define DEBUG
17 #include <kmalloc.h>
18 #include <string.h>
19 #include <stdio.h>
20 #include <assert.h>
21 #include <error.h>
22 #include <pmap.h>
23 #include <sys/queue.h>
24 #include <smp.h>
25 #include <kref.h>
26 #include <atomic.h>
27 #include <alarm.h>
28 #include <event.h>
29 #include <umem.h>
30 #include <devalarm.h>
31 #include <arch/types.h>
32 #include <arch/vm.h>
33 #include <arch/emulate.h>
34 #include <arch/vmdebug.h>
35 #include <arch/msr-index.h>
36
37 #define pgprintk(x...) do { } while (0)
38
39 #define ASSERT(x)                                                       \
40         if (!(x)) {                                                     \
41                 printd( "assertion failed %s:%d: %s\n", \
42                        __FILE__, __LINE__, #x);                         \
43         }
44
45 #define PT64_ENT_PER_PAGE 512
46 #define PT32_ENT_PER_PAGE 1024
47
48 #define PT_WRITABLE_SHIFT 1
49
50 #define PT_PRESENT_MASK (1ULL << 0)
51 #define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT)
52 #define PT_USER_MASK (1ULL << 2)
53 #define PT_PWT_MASK (1ULL << 3)
54 #define PT_PCD_MASK (1ULL << 4)
55 #define PT_ACCESSED_MASK (1ULL << 5)
56 #define PT_DIRTY_MASK (1ULL << 6)
57 #define PT_PAGE_SIZE_MASK (1ULL << 7)
58 #define PT_PAT_MASK (1ULL << 7)
59 #define PT_GLOBAL_MASK (1ULL << 8)
60 #define PT64_NX_MASK (1ULL << 63)
61
62 #define PT_PAT_SHIFT 7
63 #define PT_DIR_PAT_SHIFT 12
64 #define PT_DIR_PAT_MASK (1ULL << PT_DIR_PAT_SHIFT)
65
66 #define PT32_DIR_PSE36_SIZE 4
67 #define PT32_DIR_PSE36_SHIFT 13
68 #define PT32_DIR_PSE36_MASK (((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT)
69
70
71 #define PT32_PTE_COPY_MASK \
72         (PT_PRESENT_MASK | PT_PWT_MASK | PT_PCD_MASK | \
73         PT_ACCESSED_MASK | PT_DIRTY_MASK | PT_PAT_MASK | \
74         PT_GLOBAL_MASK )
75
76 #define PT32_NON_PTE_COPY_MASK \
77         (PT_PRESENT_MASK | PT_PWT_MASK | PT_PCD_MASK | \
78         PT_ACCESSED_MASK | PT_DIRTY_MASK)
79
80
81 #define PT64_PTE_COPY_MASK \
82         (PT64_NX_MASK | PT32_PTE_COPY_MASK)
83
84 #define PT64_NON_PTE_COPY_MASK \
85         (PT64_NX_MASK | PT32_NON_PTE_COPY_MASK)
86
87
88
89 #define PT_FIRST_AVAIL_BITS_SHIFT 9
90 #define PT64_SECOND_AVAIL_BITS_SHIFT 52
91
92 #define PT_SHADOW_PS_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
93 #define PT_SHADOW_IO_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
94
95 #define PT_SHADOW_WRITABLE_SHIFT (PT_FIRST_AVAIL_BITS_SHIFT + 1)
96 #define PT_SHADOW_WRITABLE_MASK (1ULL << PT_SHADOW_WRITABLE_SHIFT)
97
98 #define PT_SHADOW_USER_SHIFT (PT_SHADOW_WRITABLE_SHIFT + 1)
99 #define PT_SHADOW_USER_MASK (1ULL << (PT_SHADOW_USER_SHIFT))
100
101 #define PT_SHADOW_BITS_OFFSET (PT_SHADOW_WRITABLE_SHIFT - PT_WRITABLE_SHIFT)
102
103 #define VALID_PAGE(x) ((x) != INVALID_PAGE)
104
105 #define PT64_LEVEL_BITS 9
106
107 #define PT64_LEVEL_SHIFT(level) \
108                 ( PAGE_SHIFT + (level - 1) * PT64_LEVEL_BITS )
109
110 #define PT64_LEVEL_MASK(level) \
111                 (((1ULL << PT64_LEVEL_BITS) - 1) << PT64_LEVEL_SHIFT(level))
112
113 #define PT64_INDEX(address, level)\
114         (((address) >> PT64_LEVEL_SHIFT(level)) & ((1 << PT64_LEVEL_BITS) - 1))
115
116
117 #define PT32_LEVEL_BITS 10
118
119 #define PT32_LEVEL_SHIFT(level) \
120                 ( PAGE_SHIFT + (level - 1) * PT32_LEVEL_BITS )
121
122 #define PT32_LEVEL_MASK(level) \
123                 (((1ULL << PT32_LEVEL_BITS) - 1) << PT32_LEVEL_SHIFT(level))
124
125 #define PT32_INDEX(address, level)\
126         (((address) >> PT32_LEVEL_SHIFT(level)) & ((1 << PT32_LEVEL_BITS) - 1))
127
128
129 #define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & PAGE_MASK)
130 #define PT64_DIR_BASE_ADDR_MASK \
131         (PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + PT64_LEVEL_BITS)) - 1))
132
133 #define PT32_BASE_ADDR_MASK PAGE_MASK
134 #define PT32_DIR_BASE_ADDR_MASK \
135         (PAGE_MASK & ~((1ULL << (PAGE_SHIFT + PT32_LEVEL_BITS)) - 1))
136
137
138 #define PFERR_PRESENT_MASK (1U << 0)
139 #define PFERR_WRITE_MASK (1U << 1)
140 #define PFERR_USER_MASK (1U << 2)
141
142 #define PT64_ROOT_LEVEL 4
143 #define PT32_ROOT_LEVEL 2
144 #define PT32E_ROOT_LEVEL 3
145
146 #define PT_DIRECTORY_LEVEL 2
147 #define PT_PAGE_TABLE_LEVEL 1
148
149 static int is_write_protection(void)
150 {
151         return guest_cr0() & CR0_WP_MASK;
152 }
153
154 static int is_cpuid_PSE36(void)
155 {
156         return 1;
157 }
158
159 static int is_present_pte(unsigned long pte)
160 {
161         return pte & PT_PRESENT_MASK;
162 }
163
164 static int is_writeble_pte(unsigned long pte)
165 {
166         return pte & PT_WRITABLE_MASK;
167 }
168
169 static int is_io_pte(unsigned long pte)
170 {
171         return pte & PT_SHADOW_IO_MARK;
172 }
173
174 static void litevm_mmu_free_page(struct litevm_vcpu *vcpu, hpa_t page_hpa)
175 {
176         struct litevm_mmu_page *page_head = page_header(page_hpa);
177
178         LIST_REMOVE(page_head, link);
179         //list_del(&page_head->link);
180         page_head->page_hpa = page_hpa;
181         //list_add(&page_head->link, &vcpu->free_pages);
182         LIST_INSERT_HEAD(&vcpu->link, page_head, link);
183 }
184
185 static int is_empty_shadow_page(hpa_t page_hpa)
186 {
187         uint32_t *pos;
188         uint32_t *end;
189         for (pos = KADDR(page_hpa), end = pos + PAGE_SIZE / sizeof(uint32_t);
190                       pos != end; pos++)
191                 if (*pos != 0)
192                         return 0;
193         return 1;
194 }
195
196 static hpa_t litevm_mmu_alloc_page(struct litevm_vcpu *vcpu,
197                                    uint64_t *parent_pte)
198 {
199         struct litevm_mmu_page *page;
200
201         if (LIST_EMPTY(&vcpu->link))
202                 return INVALID_PAGE;
203
204         page = LIST_FIRST(&vcpu->link);
205         LIST_REMOVE(page, link);
206         LIST_INSERT_HEAD(&vcpu->litevm->link, page, link);
207         ASSERT(is_empty_shadow_page(page->page_hpa));
208         page->slot_bitmap = 0;
209         page->global = 1;
210         page->parent_pte = parent_pte;
211         return page->page_hpa;
212 }
213
214 static void page_header_update_slot(struct litevm *litevm, void *pte, gpa_t gpa)
215 {
216         int slot = memslot_id(litevm, gfn_to_memslot(litevm, gpa >> PAGE_SHIFT));
217         struct litevm_mmu_page *page_head = page_header(PADDR(pte));
218
219         SET_BITMASK_BIT_ATOMIC((uint8_t *)&page_head->slot_bitmap, slot);
220 }
221
222 hpa_t safe_gpa_to_hpa(struct litevm_vcpu *vcpu, gpa_t gpa)
223 {
224         hpa_t hpa = gpa_to_hpa(vcpu, gpa);
225
226         return is_error_hpa(hpa) ? bad_page_address | (gpa & ~PAGE_MASK): hpa;
227 }
228
229 hpa_t gpa_to_hpa(struct litevm_vcpu *vcpu, gpa_t gpa)
230 {
231         struct litevm_memory_slot *slot;
232         struct page *page;
233
234         ASSERT((gpa & HPA_ERR_MASK) == 0);
235         slot = gfn_to_memslot(vcpu->litevm, gpa >> PAGE_SHIFT);
236         if (!slot)
237                 return gpa | HPA_ERR_MASK;
238         page = gfn_to_page(slot, gpa >> PAGE_SHIFT);
239         return ((hpa_t)page2ppn(page) << PAGE_SHIFT)
240                 | (gpa & (PAGE_SIZE-1));
241 }
242
243 hpa_t gva_to_hpa(struct litevm_vcpu *vcpu, gva_t gva)
244 {
245         gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva);
246
247         if (gpa == UNMAPPED_GVA)
248                 return UNMAPPED_GVA;
249         return gpa_to_hpa(vcpu, gpa);
250 }
251
252
253 static void release_pt_page_64(struct litevm_vcpu *vcpu, hpa_t page_hpa,
254                                int level)
255 {
256         ASSERT(vcpu);
257         ASSERT(VALID_PAGE(page_hpa));
258         ASSERT(level <= PT64_ROOT_LEVEL && level > 0);
259
260         if (level == 1)
261                 memset(KADDR(page_hpa), 0, PAGE_SIZE);
262         else {
263                 uint64_t *pos;
264                 uint64_t *end;
265
266                 for (pos = KADDR(page_hpa), end = pos + PT64_ENT_PER_PAGE;
267                      pos != end; pos++) {
268                         uint64_t current_ent = *pos;
269
270                         *pos = 0;
271                         if (is_present_pte(current_ent))
272                                 release_pt_page_64(vcpu,
273                                                   current_ent &
274                                                   PT64_BASE_ADDR_MASK,
275                                                   level - 1);
276                 }
277         }
278         litevm_mmu_free_page(vcpu, page_hpa);
279 }
280
281 static void nonpaging_new_cr3(struct litevm_vcpu *vcpu)
282 {
283 }
284
285 static int nonpaging_map(struct litevm_vcpu *vcpu, gva_t v, hpa_t p)
286 {
287         int level = PT32E_ROOT_LEVEL;
288         hpa_t table_addr = vcpu->mmu.root_hpa;
289
290         for (; ; level--) {
291                 uint32_t index = PT64_INDEX(v, level);
292                 uint64_t *table;
293
294                 ASSERT(VALID_PAGE(table_addr));
295                 table = KADDR(table_addr);
296
297                 if (level == 1) {
298                         mark_page_dirty(vcpu->litevm, v >> PAGE_SHIFT);
299                         page_header_update_slot(vcpu->litevm, table, v);
300                         table[index] = p | PT_PRESENT_MASK | PT_WRITABLE_MASK |
301                                                                 PT_USER_MASK;
302                         return 0;
303                 }
304
305                 if (table[index] == 0) {
306                         hpa_t new_table = litevm_mmu_alloc_page(vcpu,
307                                                              &table[index]);
308
309                         if (!VALID_PAGE(new_table)) {
310                                 pgprintk("nonpaging_map: ENOMEM\n");
311                                 return -ENOMEM;
312                         }
313
314                         if (level == PT32E_ROOT_LEVEL)
315                                 table[index] = new_table | PT_PRESENT_MASK;
316                         else
317                                 table[index] = new_table | PT_PRESENT_MASK |
318                                                 PT_WRITABLE_MASK | PT_USER_MASK;
319                 }
320                 table_addr = table[index] & PT64_BASE_ADDR_MASK;
321         }
322 }
323
324 static void nonpaging_flush(struct litevm_vcpu *vcpu)
325 {
326         hpa_t root = vcpu->mmu.root_hpa;
327
328         ++litevm_stat.tlb_flush;
329         pgprintk("nonpaging_flush\n");
330         ASSERT(VALID_PAGE(root));
331         release_pt_page_64(vcpu, root, vcpu->mmu.shadow_root_level);
332         root = litevm_mmu_alloc_page(vcpu, 0);
333         ASSERT(VALID_PAGE(root));
334         vcpu->mmu.root_hpa = root;
335         if (is_paging())
336                 root |= (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK));
337         vmcs_writel(GUEST_CR3, root);
338 }
339
340 static gpa_t nonpaging_gva_to_gpa(struct litevm_vcpu *vcpu, gva_t vaddr)
341 {
342         return vaddr;
343 }
344
345 static int nonpaging_page_fault(struct litevm_vcpu *vcpu, gva_t gva,
346                                uint32_t error_code)
347 {
348         int ret;
349         gpa_t addr = gva;
350
351         ASSERT(vcpu);
352         ASSERT(VALID_PAGE(vcpu->mmu.root_hpa));
353
354         for (;;) {
355              hpa_t paddr;
356
357              paddr = gpa_to_hpa(vcpu , addr & PT64_BASE_ADDR_MASK);
358
359              if (is_error_hpa(paddr))
360                      return 1;
361
362              ret = nonpaging_map(vcpu, addr & PAGE_MASK, paddr);
363              if (ret) {
364                      nonpaging_flush(vcpu);
365                      continue;
366              }
367              break;
368         }
369         return ret;
370 }
371
372 static void nonpaging_inval_page(struct litevm_vcpu *vcpu, gva_t addr)
373 {
374 }
375
376 static void nonpaging_free(struct litevm_vcpu *vcpu)
377 {
378         hpa_t root;
379
380         ASSERT(vcpu);
381         root = vcpu->mmu.root_hpa;
382         if (VALID_PAGE(root))
383                 release_pt_page_64(vcpu, root, vcpu->mmu.shadow_root_level);
384         vcpu->mmu.root_hpa = INVALID_PAGE;
385 }
386
387 static int nonpaging_init_context(struct litevm_vcpu *vcpu)
388 {
389         struct litevm_mmu *context = &vcpu->mmu;
390
391         context->new_cr3 = nonpaging_new_cr3;
392         context->page_fault = nonpaging_page_fault;
393         context->inval_page = nonpaging_inval_page;
394         context->gva_to_gpa = nonpaging_gva_to_gpa;
395         context->free = nonpaging_free;
396         context->root_level = PT32E_ROOT_LEVEL;
397         context->shadow_root_level = PT32E_ROOT_LEVEL;
398         context->root_hpa = litevm_mmu_alloc_page(vcpu, 0);
399         ASSERT(VALID_PAGE(context->root_hpa));
400         vmcs_writel(GUEST_CR3, context->root_hpa);
401         return 0;
402 }
403
404
405 static void litevm_mmu_flush_tlb(struct litevm_vcpu *vcpu)
406 {
407         struct litevm_mmu_page *page, *npage;
408
409         //list_for_each_entry_safe(page, npage, &vcpu->litevm->active_mmu_pages,
410         LIST_FOREACH_SAFE(page, &vcpu->litevm->link,
411                                  link, npage) {
412                 if (page->global)
413                         continue;
414
415                 if (!page->parent_pte)
416                         continue;
417
418                 *page->parent_pte = 0;
419                 release_pt_page_64(vcpu, page->page_hpa, 1);
420         }
421         ++litevm_stat.tlb_flush;
422 }
423
424 static void paging_new_cr3(struct litevm_vcpu *vcpu)
425 {
426         litevm_mmu_flush_tlb(vcpu);
427 }
428
429 static void mark_pagetable_nonglobal(void *shadow_pte)
430 {
431         page_header(PADDR(shadow_pte))->global = 0;
432 }
433
434 static inline void set_pte_common(struct litevm_vcpu *vcpu,
435                              uint64_t *shadow_pte,
436                              gpa_t gaddr,
437                              int dirty,
438                              uint64_t access_bits)
439 {
440         hpa_t paddr;
441
442         *shadow_pte |= access_bits << PT_SHADOW_BITS_OFFSET;
443         if (!dirty)
444                 access_bits &= ~PT_WRITABLE_MASK;
445
446         if (access_bits & PT_WRITABLE_MASK)
447                 mark_page_dirty(vcpu->litevm, gaddr >> PAGE_SHIFT);
448
449         *shadow_pte |= access_bits;
450
451         paddr = gpa_to_hpa(vcpu, gaddr & PT64_BASE_ADDR_MASK);
452
453         if (!(*shadow_pte & PT_GLOBAL_MASK))
454                 mark_pagetable_nonglobal(shadow_pte);
455
456         if (is_error_hpa(paddr)) {
457                 *shadow_pte |= gaddr;
458                 *shadow_pte |= PT_SHADOW_IO_MARK;
459                 *shadow_pte &= ~PT_PRESENT_MASK;
460         } else {
461                 *shadow_pte |= paddr;
462                 page_header_update_slot(vcpu->litevm, shadow_pte, gaddr);
463         }
464 }
465
466 static void inject_page_fault(struct litevm_vcpu *vcpu,
467                               uint64_t addr,
468                               uint32_t err_code)
469 {
470         uint32_t vect_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
471
472         pgprintk("inject_page_fault: 0x%llx err 0x%x\n", addr, err_code);
473
474         ++litevm_stat.pf_guest;
475
476         if (is_page_fault(vect_info)) {
477                 printd( "inject_page_fault: "
478                        "double fault 0x%llx @ 0x%lx\n",
479                        addr, vmcs_readl(GUEST_RIP));
480                 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, 0);
481                 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
482                              DF_VECTOR |
483                              INTR_TYPE_EXCEPTION |
484                              INTR_INFO_DELIEVER_CODE_MASK |
485                              INTR_INFO_VALID_MASK);
486                 return;
487         }
488         vcpu->cr2 = addr;
489         vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, err_code);
490         vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
491                      PF_VECTOR |
492                      INTR_TYPE_EXCEPTION |
493                      INTR_INFO_DELIEVER_CODE_MASK |
494                      INTR_INFO_VALID_MASK);
495
496 }
497
498 static inline int fix_read_pf(uint64_t *shadow_ent)
499 {
500         if ((*shadow_ent & PT_SHADOW_USER_MASK) &&
501             !(*shadow_ent & PT_USER_MASK)) {
502                 /*
503                  * If supervisor write protect is disabled, we shadow kernel
504                  * pages as user pages so we can trap the write access.
505                  */
506                 *shadow_ent |= PT_USER_MASK;
507                 *shadow_ent &= ~PT_WRITABLE_MASK;
508
509                 return 1;
510
511         }
512         return 0;
513 }
514
515 static int may_access(uint64_t pte, int write, int user)
516 {
517
518         if (user && !(pte & PT_USER_MASK))
519                 return 0;
520         if (write && !(pte & PT_WRITABLE_MASK))
521                 return 0;
522         return 1;
523 }
524
525 /*
526  * Remove a shadow pte.
527  */
528 static void paging_inval_page(struct litevm_vcpu *vcpu, gva_t addr)
529 {
530         hpa_t page_addr = vcpu->mmu.root_hpa;
531         int level = vcpu->mmu.shadow_root_level;
532
533         ++litevm_stat.invlpg;
534
535         for (; ; level--) {
536                 uint32_t index = PT64_INDEX(addr, level);
537                 uint64_t *table = KADDR(page_addr);
538
539                 if (level == PT_PAGE_TABLE_LEVEL ) {
540                         table[index] = 0;
541                         return;
542                 }
543
544                 if (!is_present_pte(table[index]))
545                         return;
546
547                 page_addr = table[index] & PT64_BASE_ADDR_MASK;
548
549                 if (level == PT_DIRECTORY_LEVEL &&
550                           (table[index] & PT_SHADOW_PS_MARK)) {
551                         table[index] = 0;
552                         release_pt_page_64(vcpu, page_addr, PT_PAGE_TABLE_LEVEL);
553
554                         //flush tlb
555                         vmcs_writel(GUEST_CR3, vcpu->mmu.root_hpa |
556                                     (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK)));
557                         return;
558                 }
559         }
560 }
561
562 static void paging_free(struct litevm_vcpu *vcpu)
563 {
564         nonpaging_free(vcpu);
565 }
566
567 #define PTTYPE 64
568 #include "paging_tmpl.h"
569 #undef PTTYPE
570
571 #define PTTYPE 32
572 #include "paging_tmpl.h"
573 #undef PTTYPE
574
575 static int paging64_init_context(struct litevm_vcpu *vcpu)
576 {
577         struct litevm_mmu *context = &vcpu->mmu;
578
579         ASSERT(is_pae());
580         context->new_cr3 = paging_new_cr3;
581         context->page_fault = paging64_page_fault;
582         context->inval_page = paging_inval_page;
583         context->gva_to_gpa = paging64_gva_to_gpa;
584         context->free = paging_free;
585         context->root_level = PT64_ROOT_LEVEL;
586         context->shadow_root_level = PT64_ROOT_LEVEL;
587         context->root_hpa = litevm_mmu_alloc_page(vcpu, 0);
588         ASSERT(VALID_PAGE(context->root_hpa));
589         vmcs_writel(GUEST_CR3, context->root_hpa |
590                     (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK)));
591         return 0;
592 }
593
594 static int paging32_init_context(struct litevm_vcpu *vcpu)
595 {
596         struct litevm_mmu *context = &vcpu->mmu;
597
598         context->new_cr3 = paging_new_cr3;
599         context->page_fault = paging32_page_fault;
600         context->inval_page = paging_inval_page;
601         context->gva_to_gpa = paging32_gva_to_gpa;
602         context->free = paging_free;
603         context->root_level = PT32_ROOT_LEVEL;
604         context->shadow_root_level = PT32E_ROOT_LEVEL;
605         context->root_hpa = litevm_mmu_alloc_page(vcpu, 0);
606         ASSERT(VALID_PAGE(context->root_hpa));
607         vmcs_writel(GUEST_CR3, context->root_hpa |
608                     (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK)));
609         return 0;
610 }
611
612 static int paging32E_init_context(struct litevm_vcpu *vcpu)
613 {
614         int ret;
615
616         if ((ret = paging64_init_context(vcpu)))
617                 return ret;
618
619         vcpu->mmu.root_level = PT32E_ROOT_LEVEL;
620         vcpu->mmu.shadow_root_level = PT32E_ROOT_LEVEL;
621         return 0;
622 }
623
624 static int init_litevm_mmu(struct litevm_vcpu *vcpu)
625 {
626         ASSERT(vcpu);
627         ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa));
628
629         if (!is_paging())
630                 return nonpaging_init_context(vcpu);
631         else if (is_long_mode())
632                 return paging64_init_context(vcpu);
633         else if (is_pae())
634                 return paging32E_init_context(vcpu);
635         else
636                 return paging32_init_context(vcpu);
637 }
638
639 static void destroy_litevm_mmu(struct litevm_vcpu *vcpu)
640 {
641         ASSERT(vcpu);
642         if (VALID_PAGE(vcpu->mmu.root_hpa)) {
643                 vcpu->mmu.free(vcpu);
644                 vcpu->mmu.root_hpa = INVALID_PAGE;
645         }
646 }
647
648 int litevm_mmu_reset_context(struct litevm_vcpu *vcpu)
649 {
650         destroy_litevm_mmu(vcpu);
651         return init_litevm_mmu(vcpu);
652 }
653
654 static void free_mmu_pages(struct litevm_vcpu *vcpu)
655 {
656         /* todo: use the right macros */
657         while (!LIST_EMPTY(&vcpu->link)) {
658                 struct litevm_mmu_page *vmpage;
659                 vmpage = LIST_FIRST(&vcpu->link);
660                 LIST_REMOVE(vmpage, link);
661                 uintptr_t ppn = vmpage->page_hpa>>PAGE_SHIFT;
662                 page_decref(ppn2page(ppn));
663                 assert(page_is_free(ppn));
664                 vmpage->page_hpa = INVALID_PAGE;
665         }
666 }
667
668 static int alloc_mmu_pages(struct litevm_vcpu *vcpu)
669 {
670         int i;
671
672         ASSERT(vcpu);
673
674         /* we could try to do the contiguous alloc but it's not
675          * necessary for them to be contiguous.
676          */
677         for (i = 0; i < LITEVM_NUM_MMU_PAGES; i++) {
678                 struct page *page;
679                 struct litevm_mmu_page *page_header = &vcpu->page_header_buf[i];
680
681                 if ((page = kpage_alloc_addr()) == NULL)
682                         goto error_1;
683                 page->pg_private = page_header;
684                 page_header->page_hpa = (hpa_t)page2ppn(page) << PAGE_SHIFT;
685                 memset(KADDR(page_header->page_hpa), 0, PAGE_SIZE);
686                 LIST_INSERT_HEAD(&vcpu->link, page_header, link);
687         }
688         return 0;
689
690 error_1:
691         free_mmu_pages(vcpu);
692         return -ENOMEM;
693 }
694
695 int litevm_mmu_init(struct litevm_vcpu *vcpu)
696 {
697         int r;
698
699         ASSERT(vcpu);
700         ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa));
701         ASSERT(LIST_EMPTY(&vcpu->link));
702
703         if ((r = alloc_mmu_pages(vcpu)))
704                 return r;
705
706         if ((r = init_litevm_mmu(vcpu))) {
707                 free_mmu_pages(vcpu);
708                 return r;
709         }
710         return 0;
711 }
712
713 void litevm_mmu_destroy(struct litevm_vcpu *vcpu)
714 {
715         ASSERT(vcpu);
716
717         destroy_litevm_mmu(vcpu);
718         free_mmu_pages(vcpu);
719 }
720
721 void litevm_mmu_slot_remove_write_access(struct litevm *litevm, int slot)
722 {
723         struct litevm_mmu_page *page, *link;
724
725         LIST_FOREACH(page, &litevm->link, link) {
726                 int i;
727                 uint64_t *pt;
728
729                 if (!GET_BITMASK_BIT((uint8_t*)&page->slot_bitmap, slot))
730                         continue;
731
732                 pt = KADDR(page->page_hpa);
733                 for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
734                         /* avoid RMW */
735                         if (pt[i] & PT_WRITABLE_MASK)
736                                 pt[i] &= ~PT_WRITABLE_MASK;
737
738         }
739 }