machine_kexec.c 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. /*
  2. * machine_kexec.c - handle transition of Linux booting another kernel
  3. * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com>
  4. *
  5. * This source code is licensed under the GNU General Public License,
  6. * Version 2. See the file COPYING for more details.
  7. */
  8. #include <linux/mm.h>
  9. #include <linux/kexec.h>
  10. #include <linux/delay.h>
  11. #include <asm/pgtable.h>
  12. #include <asm/pgalloc.h>
  13. #include <asm/tlbflush.h>
  14. #include <asm/mmu_context.h>
  15. #include <asm/io.h>
  16. #include <asm/apic.h>
  17. #include <asm/cpufeature.h>
  18. #include <asm/desc.h>
  19. static inline unsigned long read_cr3(void)
  20. {
  21. unsigned long cr3;
  22. asm volatile("movl %%cr3,%0": "=r"(cr3));
  23. return cr3;
  24. }
  25. #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
  26. #define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
  27. #define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
  28. #define L2_ATTR (_PAGE_PRESENT)
  29. #define LEVEL0_SIZE (1UL << 12UL)
  30. #ifndef CONFIG_X86_PAE
  31. #define LEVEL1_SIZE (1UL << 22UL)
  32. static u32 pgtable_level1[1024] PAGE_ALIGNED;
  33. static void identity_map_page(unsigned long address)
  34. {
  35. unsigned long level1_index, level2_index;
  36. u32 *pgtable_level2;
  37. /* Find the current page table */
  38. pgtable_level2 = __va(read_cr3());
  39. /* Find the indexes of the physical address to identity map */
  40. level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE;
  41. level2_index = address / LEVEL1_SIZE;
  42. /* Identity map the page table entry */
  43. pgtable_level1[level1_index] = address | L0_ATTR;
  44. pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR;
  45. /* Flush the tlb so the new mapping takes effect.
  46. * Global tlb entries are not flushed but that is not an issue.
  47. */
  48. load_cr3(pgtable_level2);
  49. }
  50. #else
  51. #define LEVEL1_SIZE (1UL << 21UL)
  52. #define LEVEL2_SIZE (1UL << 30UL)
  53. static u64 pgtable_level1[512] PAGE_ALIGNED;
  54. static u64 pgtable_level2[512] PAGE_ALIGNED;
  55. static void identity_map_page(unsigned long address)
  56. {
  57. unsigned long level1_index, level2_index, level3_index;
  58. u64 *pgtable_level3;
  59. /* Find the current page table */
  60. pgtable_level3 = __va(read_cr3());
  61. /* Find the indexes of the physical address to identity map */
  62. level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE;
  63. level2_index = (address % LEVEL2_SIZE)/LEVEL1_SIZE;
  64. level3_index = address / LEVEL2_SIZE;
  65. /* Identity map the page table entry */
  66. pgtable_level1[level1_index] = address | L0_ATTR;
  67. pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR;
  68. set_64bit(&pgtable_level3[level3_index],
  69. __pa(pgtable_level2) | L2_ATTR);
  70. /* Flush the tlb so the new mapping takes effect.
  71. * Global tlb entries are not flushed but that is not an issue.
  72. */
  73. load_cr3(pgtable_level3);
  74. }
  75. #endif
  76. static void set_idt(void *newidt, __u16 limit)
  77. {
  78. struct Xgt_desc_struct curidt;
  79. /* ia32 supports unaliged loads & stores */
  80. curidt.size = limit;
  81. curidt.address = (unsigned long)newidt;
  82. __asm__ __volatile__ (
  83. "lidtl %0\n"
  84. : : "m" (curidt)
  85. );
  86. };
  87. static void set_gdt(void *newgdt, __u16 limit)
  88. {
  89. struct Xgt_desc_struct curgdt;
  90. /* ia32 supports unaligned loads & stores */
  91. curgdt.size = limit;
  92. curgdt.address = (unsigned long)newgdt;
  93. __asm__ __volatile__ (
  94. "lgdtl %0\n"
  95. : : "m" (curgdt)
  96. );
  97. };
  98. static void load_segments(void)
  99. {
  100. #define __STR(X) #X
  101. #define STR(X) __STR(X)
  102. __asm__ __volatile__ (
  103. "\tljmp $"STR(__KERNEL_CS)",$1f\n"
  104. "\t1:\n"
  105. "\tmovl $"STR(__KERNEL_DS)",%eax\n"
  106. "\tmovl %eax,%ds\n"
  107. "\tmovl %eax,%es\n"
  108. "\tmovl %eax,%fs\n"
  109. "\tmovl %eax,%gs\n"
  110. "\tmovl %eax,%ss\n"
  111. );
  112. #undef STR
  113. #undef __STR
  114. }
  115. typedef asmlinkage NORET_TYPE void (*relocate_new_kernel_t)(
  116. unsigned long indirection_page,
  117. unsigned long reboot_code_buffer,
  118. unsigned long start_address,
  119. unsigned int has_pae) ATTRIB_NORET;
  120. const extern unsigned char relocate_new_kernel[];
  121. extern void relocate_new_kernel_end(void);
  122. const extern unsigned int relocate_new_kernel_size;
  123. /*
  124. * A architecture hook called to validate the
  125. * proposed image and prepare the control pages
  126. * as needed. The pages for KEXEC_CONTROL_CODE_SIZE
  127. * have been allocated, but the segments have yet
  128. * been copied into the kernel.
  129. *
  130. * Do what every setup is needed on image and the
  131. * reboot code buffer to allow us to avoid allocations
  132. * later.
  133. *
  134. * Currently nothing.
  135. */
  136. int machine_kexec_prepare(struct kimage *image)
  137. {
  138. return 0;
  139. }
  140. /*
  141. * Undo anything leftover by machine_kexec_prepare
  142. * when an image is freed.
  143. */
  144. void machine_kexec_cleanup(struct kimage *image)
  145. {
  146. }
  147. /*
  148. * Do not allocate memory (or fail in any way) in machine_kexec().
  149. * We are past the point of no return, committed to rebooting now.
  150. */
  151. NORET_TYPE void machine_kexec(struct kimage *image)
  152. {
  153. unsigned long page_list;
  154. unsigned long reboot_code_buffer;
  155. relocate_new_kernel_t rnk;
  156. /* Interrupts aren't acceptable while we reboot */
  157. local_irq_disable();
  158. /* Compute some offsets */
  159. reboot_code_buffer = page_to_pfn(image->control_code_page)
  160. << PAGE_SHIFT;
  161. page_list = image->head;
  162. /* Set up an identity mapping for the reboot_code_buffer */
  163. identity_map_page(reboot_code_buffer);
  164. /* copy it out */
  165. memcpy((void *)reboot_code_buffer, relocate_new_kernel,
  166. relocate_new_kernel_size);
  167. /* The segment registers are funny things, they are
  168. * automatically loaded from a table, in memory wherever you
  169. * set them to a specific selector, but this table is never
  170. * accessed again you set the segment to a different selector.
  171. *
  172. * The more common model is are caches where the behide
  173. * the scenes work is done, but is also dropped at arbitrary
  174. * times.
  175. *
  176. * I take advantage of this here by force loading the
  177. * segments, before I zap the gdt with an invalid value.
  178. */
  179. load_segments();
  180. /* The gdt & idt are now invalid.
  181. * If you want to load them you must set up your own idt & gdt.
  182. */
  183. set_gdt(phys_to_virt(0),0);
  184. set_idt(phys_to_virt(0),0);
  185. /* now call it */
  186. rnk = (relocate_new_kernel_t) reboot_code_buffer;
  187. (*rnk)(page_list, reboot_code_buffer, image->start, cpu_has_pae);
  188. }