machine_kexec.c 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. /*
  2. * machine_kexec.c - handle transition of Linux booting another kernel
  3. * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com>
  4. *
  5. * This source code is licensed under the GNU General Public License,
  6. * Version 2. See the file COPYING for more details.
  7. */
  8. #include <linux/mm.h>
  9. #include <linux/kexec.h>
  10. #include <linux/delay.h>
  11. #include <asm/pgtable.h>
  12. #include <asm/pgalloc.h>
  13. #include <asm/tlbflush.h>
  14. #include <asm/mmu_context.h>
  15. #include <asm/io.h>
  16. #include <asm/apic.h>
  17. #include <asm/cpufeature.h>
  18. static inline unsigned long read_cr3(void)
  19. {
  20. unsigned long cr3;
  21. asm volatile("movl %%cr3,%0": "=r"(cr3));
  22. return cr3;
  23. }
  24. #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
  25. #define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
  26. #define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
  27. #define L2_ATTR (_PAGE_PRESENT)
  28. #define LEVEL0_SIZE (1UL << 12UL)
  29. #ifndef CONFIG_X86_PAE
  30. #define LEVEL1_SIZE (1UL << 22UL)
  31. static u32 pgtable_level1[1024] PAGE_ALIGNED;
  32. static void identity_map_page(unsigned long address)
  33. {
  34. unsigned long level1_index, level2_index;
  35. u32 *pgtable_level2;
  36. /* Find the current page table */
  37. pgtable_level2 = __va(read_cr3());
  38. /* Find the indexes of the physical address to identity map */
  39. level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE;
  40. level2_index = address / LEVEL1_SIZE;
  41. /* Identity map the page table entry */
  42. pgtable_level1[level1_index] = address | L0_ATTR;
  43. pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR;
  44. /* Flush the tlb so the new mapping takes effect.
  45. * Global tlb entries are not flushed but that is not an issue.
  46. */
  47. load_cr3(pgtable_level2);
  48. }
  49. #else
  50. #define LEVEL1_SIZE (1UL << 21UL)
  51. #define LEVEL2_SIZE (1UL << 30UL)
  52. static u64 pgtable_level1[512] PAGE_ALIGNED;
  53. static u64 pgtable_level2[512] PAGE_ALIGNED;
  54. static void identity_map_page(unsigned long address)
  55. {
  56. unsigned long level1_index, level2_index, level3_index;
  57. u64 *pgtable_level3;
  58. /* Find the current page table */
  59. pgtable_level3 = __va(read_cr3());
  60. /* Find the indexes of the physical address to identity map */
  61. level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE;
  62. level2_index = (address % LEVEL2_SIZE)/LEVEL1_SIZE;
  63. level3_index = address / LEVEL2_SIZE;
  64. /* Identity map the page table entry */
  65. pgtable_level1[level1_index] = address | L0_ATTR;
  66. pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR;
  67. set_64bit(&pgtable_level3[level3_index],
  68. __pa(pgtable_level2) | L2_ATTR);
  69. /* Flush the tlb so the new mapping takes effect.
  70. * Global tlb entries are not flushed but that is not an issue.
  71. */
  72. load_cr3(pgtable_level3);
  73. }
  74. #endif
  75. static void set_idt(void *newidt, __u16 limit)
  76. {
  77. unsigned char curidt[6];
  78. /* ia32 supports unaliged loads & stores */
  79. (*(__u16 *)(curidt)) = limit;
  80. (*(__u32 *)(curidt +2)) = (unsigned long)(newidt);
  81. __asm__ __volatile__ (
  82. "lidt %0\n"
  83. : "=m" (curidt)
  84. );
  85. };
  86. static void set_gdt(void *newgdt, __u16 limit)
  87. {
  88. unsigned char curgdt[6];
  89. /* ia32 supports unaligned loads & stores */
  90. (*(__u16 *)(curgdt)) = limit;
  91. (*(__u32 *)(curgdt +2)) = (unsigned long)(newgdt);
  92. __asm__ __volatile__ (
  93. "lgdt %0\n"
  94. : "=m" (curgdt)
  95. );
  96. };
  97. static void load_segments(void)
  98. {
  99. #define __STR(X) #X
  100. #define STR(X) __STR(X)
  101. __asm__ __volatile__ (
  102. "\tljmp $"STR(__KERNEL_CS)",$1f\n"
  103. "\t1:\n"
  104. "\tmovl $"STR(__KERNEL_DS)",%eax\n"
  105. "\tmovl %eax,%ds\n"
  106. "\tmovl %eax,%es\n"
  107. "\tmovl %eax,%fs\n"
  108. "\tmovl %eax,%gs\n"
  109. "\tmovl %eax,%ss\n"
  110. );
  111. #undef STR
  112. #undef __STR
  113. }
  114. typedef asmlinkage NORET_TYPE void (*relocate_new_kernel_t)(
  115. unsigned long indirection_page,
  116. unsigned long reboot_code_buffer,
  117. unsigned long start_address,
  118. unsigned int has_pae) ATTRIB_NORET;
  119. const extern unsigned char relocate_new_kernel[];
  120. extern void relocate_new_kernel_end(void);
  121. const extern unsigned int relocate_new_kernel_size;
  122. /*
  123. * A architecture hook called to validate the
  124. * proposed image and prepare the control pages
  125. * as needed. The pages for KEXEC_CONTROL_CODE_SIZE
  126. * have been allocated, but the segments have yet
  127. * been copied into the kernel.
  128. *
  129. * Do what every setup is needed on image and the
  130. * reboot code buffer to allow us to avoid allocations
  131. * later.
  132. *
  133. * Currently nothing.
  134. */
  135. int machine_kexec_prepare(struct kimage *image)
  136. {
  137. return 0;
  138. }
  139. /*
  140. * Undo anything leftover by machine_kexec_prepare
  141. * when an image is freed.
  142. */
  143. void machine_kexec_cleanup(struct kimage *image)
  144. {
  145. }
  146. /*
  147. * Do not allocate memory (or fail in any way) in machine_kexec().
  148. * We are past the point of no return, committed to rebooting now.
  149. */
  150. NORET_TYPE void machine_kexec(struct kimage *image)
  151. {
  152. unsigned long page_list;
  153. unsigned long reboot_code_buffer;
  154. relocate_new_kernel_t rnk;
  155. /* Interrupts aren't acceptable while we reboot */
  156. local_irq_disable();
  157. /* Compute some offsets */
  158. reboot_code_buffer = page_to_pfn(image->control_code_page)
  159. << PAGE_SHIFT;
  160. page_list = image->head;
  161. /* Set up an identity mapping for the reboot_code_buffer */
  162. identity_map_page(reboot_code_buffer);
  163. /* copy it out */
  164. memcpy((void *)reboot_code_buffer, relocate_new_kernel,
  165. relocate_new_kernel_size);
  166. /* The segment registers are funny things, they are
  167. * automatically loaded from a table, in memory wherever you
  168. * set them to a specific selector, but this table is never
  169. * accessed again you set the segment to a different selector.
  170. *
  171. * The more common model is are caches where the behide
  172. * the scenes work is done, but is also dropped at arbitrary
  173. * times.
  174. *
  175. * I take advantage of this here by force loading the
  176. * segments, before I zap the gdt with an invalid value.
  177. */
  178. load_segments();
  179. /* The gdt & idt are now invalid.
  180. * If you want to load them you must set up your own idt & gdt.
  181. */
  182. set_gdt(phys_to_virt(0),0);
  183. set_idt(phys_to_virt(0),0);
  184. /* now call it */
  185. rnk = (relocate_new_kernel_t) reboot_code_buffer;
  186. (*rnk)(page_list, reboot_code_buffer, image->start, cpu_has_pae);
  187. }