cleanup.c 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101
  1. /* MTRR (Memory Type Range Register) cleanup
  2. Copyright (C) 2009 Yinghai Lu
  3. This library is free software; you can redistribute it and/or
  4. modify it under the terms of the GNU Library General Public
  5. License as published by the Free Software Foundation; either
  6. version 2 of the License, or (at your option) any later version.
  7. This library is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  10. Library General Public License for more details.
  11. You should have received a copy of the GNU Library General Public
  12. License along with this library; if not, write to the Free
  13. Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  14. */
  15. #include <linux/module.h>
  16. #include <linux/init.h>
  17. #include <linux/pci.h>
  18. #include <linux/smp.h>
  19. #include <linux/cpu.h>
  20. #include <linux/mutex.h>
  21. #include <linux/sort.h>
  22. #include <asm/e820.h>
  23. #include <asm/mtrr.h>
  24. #include <asm/uaccess.h>
  25. #include <asm/processor.h>
  26. #include <asm/msr.h>
  27. #include <asm/kvm_para.h>
  28. #include "mtrr.h"
  29. /* should be related to MTRR_VAR_RANGES nums */
  30. #define RANGE_NUM 256
  31. struct res_range {
  32. unsigned long start;
  33. unsigned long end;
  34. };
  35. static int __init
  36. add_range(struct res_range *range, int nr_range, unsigned long start,
  37. unsigned long end)
  38. {
  39. /* out of slots */
  40. if (nr_range >= RANGE_NUM)
  41. return nr_range;
  42. range[nr_range].start = start;
  43. range[nr_range].end = end;
  44. nr_range++;
  45. return nr_range;
  46. }
  47. static int __init
  48. add_range_with_merge(struct res_range *range, int nr_range, unsigned long start,
  49. unsigned long end)
  50. {
  51. int i;
  52. /* try to merge it with old one */
  53. for (i = 0; i < nr_range; i++) {
  54. unsigned long final_start, final_end;
  55. unsigned long common_start, common_end;
  56. if (!range[i].end)
  57. continue;
  58. common_start = max(range[i].start, start);
  59. common_end = min(range[i].end, end);
  60. if (common_start > common_end + 1)
  61. continue;
  62. final_start = min(range[i].start, start);
  63. final_end = max(range[i].end, end);
  64. range[i].start = final_start;
  65. range[i].end = final_end;
  66. return nr_range;
  67. }
  68. /* need to add that */
  69. return add_range(range, nr_range, start, end);
  70. }
  71. static void __init
  72. subtract_range(struct res_range *range, unsigned long start, unsigned long end)
  73. {
  74. int i, j;
  75. for (j = 0; j < RANGE_NUM; j++) {
  76. if (!range[j].end)
  77. continue;
  78. if (start <= range[j].start && end >= range[j].end) {
  79. range[j].start = 0;
  80. range[j].end = 0;
  81. continue;
  82. }
  83. if (start <= range[j].start && end < range[j].end &&
  84. range[j].start < end + 1) {
  85. range[j].start = end + 1;
  86. continue;
  87. }
  88. if (start > range[j].start && end >= range[j].end &&
  89. range[j].end > start - 1) {
  90. range[j].end = start - 1;
  91. continue;
  92. }
  93. if (start > range[j].start && end < range[j].end) {
  94. /* find the new spare */
  95. for (i = 0; i < RANGE_NUM; i++) {
  96. if (range[i].end == 0)
  97. break;
  98. }
  99. if (i < RANGE_NUM) {
  100. range[i].end = range[j].end;
  101. range[i].start = end + 1;
  102. } else {
  103. printk(KERN_ERR "run of slot in ranges\n");
  104. }
  105. range[j].end = start - 1;
  106. continue;
  107. }
  108. }
  109. }
  110. static int __init cmp_range(const void *x1, const void *x2)
  111. {
  112. const struct res_range *r1 = x1;
  113. const struct res_range *r2 = x2;
  114. long start1, start2;
  115. start1 = r1->start;
  116. start2 = r2->start;
  117. return start1 - start2;
  118. }
  119. struct var_mtrr_range_state {
  120. unsigned long base_pfn;
  121. unsigned long size_pfn;
  122. mtrr_type type;
  123. };
  124. static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
  125. static int __initdata debug_print;
  126. static int __init
  127. x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
  128. unsigned long extra_remove_base,
  129. unsigned long extra_remove_size)
  130. {
  131. unsigned long base, size;
  132. mtrr_type type;
  133. int i;
  134. for (i = 0; i < num_var_ranges; i++) {
  135. type = range_state[i].type;
  136. if (type != MTRR_TYPE_WRBACK)
  137. continue;
  138. base = range_state[i].base_pfn;
  139. size = range_state[i].size_pfn;
  140. nr_range = add_range_with_merge(range, nr_range, base,
  141. base + size - 1);
  142. }
  143. if (debug_print) {
  144. printk(KERN_DEBUG "After WB checking\n");
  145. for (i = 0; i < nr_range; i++)
  146. printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
  147. range[i].start, range[i].end + 1);
  148. }
  149. /* take out UC ranges */
  150. for (i = 0; i < num_var_ranges; i++) {
  151. type = range_state[i].type;
  152. if (type != MTRR_TYPE_UNCACHABLE &&
  153. type != MTRR_TYPE_WRPROT)
  154. continue;
  155. size = range_state[i].size_pfn;
  156. if (!size)
  157. continue;
  158. base = range_state[i].base_pfn;
  159. if (base < (1<<(20-PAGE_SHIFT)) && mtrr_state.have_fixed &&
  160. (mtrr_state.enabled & 1)) {
  161. /* Var MTRR contains UC entry below 1M? Skip it: */
  162. printk(KERN_WARNING "WARNING: BIOS bug: VAR MTRR %d "
  163. "contains strange UC entry under 1M, check "
  164. "with your system vendor!\n", i);
  165. if (base + size <= (1<<(20-PAGE_SHIFT)))
  166. continue;
  167. size -= (1<<(20-PAGE_SHIFT)) - base;
  168. base = 1<<(20-PAGE_SHIFT);
  169. }
  170. subtract_range(range, base, base + size - 1);
  171. }
  172. if (extra_remove_size)
  173. subtract_range(range, extra_remove_base,
  174. extra_remove_base + extra_remove_size - 1);
  175. /* get new range num */
  176. nr_range = 0;
  177. for (i = 0; i < RANGE_NUM; i++) {
  178. if (!range[i].end)
  179. continue;
  180. nr_range++;
  181. }
  182. if (debug_print) {
  183. printk(KERN_DEBUG "After UC checking\n");
  184. for (i = 0; i < nr_range; i++)
  185. printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
  186. range[i].start, range[i].end + 1);
  187. }
  188. /* sort the ranges */
  189. sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
  190. if (debug_print) {
  191. printk(KERN_DEBUG "After sorting\n");
  192. for (i = 0; i < nr_range; i++)
  193. printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
  194. range[i].start, range[i].end + 1);
  195. }
  196. /* clear those is not used */
  197. for (i = nr_range; i < RANGE_NUM; i++)
  198. memset(&range[i], 0, sizeof(range[i]));
  199. return nr_range;
  200. }
  201. static struct res_range __initdata range[RANGE_NUM];
  202. static int __initdata nr_range;
  203. #ifdef CONFIG_MTRR_SANITIZER
  204. static unsigned long __init sum_ranges(struct res_range *range, int nr_range)
  205. {
  206. unsigned long sum;
  207. int i;
  208. sum = 0;
  209. for (i = 0; i < nr_range; i++)
  210. sum += range[i].end + 1 - range[i].start;
  211. return sum;
  212. }
  213. static int enable_mtrr_cleanup __initdata =
  214. CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
  215. static int __init disable_mtrr_cleanup_setup(char *str)
  216. {
  217. enable_mtrr_cleanup = 0;
  218. return 0;
  219. }
  220. early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
  221. static int __init enable_mtrr_cleanup_setup(char *str)
  222. {
  223. enable_mtrr_cleanup = 1;
  224. return 0;
  225. }
  226. early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup);
  227. static int __init mtrr_cleanup_debug_setup(char *str)
  228. {
  229. debug_print = 1;
  230. return 0;
  231. }
  232. early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup);
  233. struct var_mtrr_state {
  234. unsigned long range_startk;
  235. unsigned long range_sizek;
  236. unsigned long chunk_sizek;
  237. unsigned long gran_sizek;
  238. unsigned int reg;
  239. };
  240. static void __init
  241. set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
  242. unsigned char type, unsigned int address_bits)
  243. {
  244. u32 base_lo, base_hi, mask_lo, mask_hi;
  245. u64 base, mask;
  246. if (!sizek) {
  247. fill_mtrr_var_range(reg, 0, 0, 0, 0);
  248. return;
  249. }
  250. mask = (1ULL << address_bits) - 1;
  251. mask &= ~((((u64)sizek) << 10) - 1);
  252. base = ((u64)basek) << 10;
  253. base |= type;
  254. mask |= 0x800;
  255. base_lo = base & ((1ULL<<32) - 1);
  256. base_hi = base >> 32;
  257. mask_lo = mask & ((1ULL<<32) - 1);
  258. mask_hi = mask >> 32;
  259. fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi);
  260. }
  261. static void __init
  262. save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
  263. unsigned char type)
  264. {
  265. range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10);
  266. range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10);
  267. range_state[reg].type = type;
  268. }
  269. static void __init
  270. set_var_mtrr_all(unsigned int address_bits)
  271. {
  272. unsigned long basek, sizek;
  273. unsigned char type;
  274. unsigned int reg;
  275. for (reg = 0; reg < num_var_ranges; reg++) {
  276. basek = range_state[reg].base_pfn << (PAGE_SHIFT - 10);
  277. sizek = range_state[reg].size_pfn << (PAGE_SHIFT - 10);
  278. type = range_state[reg].type;
  279. set_var_mtrr(reg, basek, sizek, type, address_bits);
  280. }
  281. }
  282. static unsigned long to_size_factor(unsigned long sizek, char *factorp)
  283. {
  284. char factor;
  285. unsigned long base = sizek;
  286. if (base & ((1<<10) - 1)) {
  287. /* not MB alignment */
  288. factor = 'K';
  289. } else if (base & ((1<<20) - 1)) {
  290. factor = 'M';
  291. base >>= 10;
  292. } else {
  293. factor = 'G';
  294. base >>= 20;
  295. }
  296. *factorp = factor;
  297. return base;
  298. }
  299. static unsigned int __init
  300. range_to_mtrr(unsigned int reg, unsigned long range_startk,
  301. unsigned long range_sizek, unsigned char type)
  302. {
  303. if (!range_sizek || (reg >= num_var_ranges))
  304. return reg;
  305. while (range_sizek) {
  306. unsigned long max_align, align;
  307. unsigned long sizek;
  308. /* Compute the maximum size I can make a range */
  309. if (range_startk)
  310. max_align = ffs(range_startk) - 1;
  311. else
  312. max_align = 32;
  313. align = fls(range_sizek) - 1;
  314. if (align > max_align)
  315. align = max_align;
  316. sizek = 1 << align;
  317. if (debug_print) {
  318. char start_factor = 'K', size_factor = 'K';
  319. unsigned long start_base, size_base;
  320. start_base = to_size_factor(range_startk,
  321. &start_factor),
  322. size_base = to_size_factor(sizek, &size_factor),
  323. printk(KERN_DEBUG "Setting variable MTRR %d, "
  324. "base: %ld%cB, range: %ld%cB, type %s\n",
  325. reg, start_base, start_factor,
  326. size_base, size_factor,
  327. (type == MTRR_TYPE_UNCACHABLE) ? "UC" :
  328. ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other")
  329. );
  330. }
  331. save_var_mtrr(reg++, range_startk, sizek, type);
  332. range_startk += sizek;
  333. range_sizek -= sizek;
  334. if (reg >= num_var_ranges)
  335. break;
  336. }
  337. return reg;
  338. }
  339. static unsigned __init
  340. range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
  341. unsigned long sizek)
  342. {
  343. unsigned long hole_basek, hole_sizek;
  344. unsigned long second_basek, second_sizek;
  345. unsigned long range0_basek, range0_sizek;
  346. unsigned long range_basek, range_sizek;
  347. unsigned long chunk_sizek;
  348. unsigned long gran_sizek;
  349. hole_basek = 0;
  350. hole_sizek = 0;
  351. second_basek = 0;
  352. second_sizek = 0;
  353. chunk_sizek = state->chunk_sizek;
  354. gran_sizek = state->gran_sizek;
  355. /* align with gran size, prevent small block used up MTRRs */
  356. range_basek = ALIGN(state->range_startk, gran_sizek);
  357. if ((range_basek > basek) && basek)
  358. return second_sizek;
  359. state->range_sizek -= (range_basek - state->range_startk);
  360. range_sizek = ALIGN(state->range_sizek, gran_sizek);
  361. while (range_sizek > state->range_sizek) {
  362. range_sizek -= gran_sizek;
  363. if (!range_sizek)
  364. return 0;
  365. }
  366. state->range_sizek = range_sizek;
  367. /* try to append some small hole */
  368. range0_basek = state->range_startk;
  369. range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
  370. /* no increase */
  371. if (range0_sizek == state->range_sizek) {
  372. if (debug_print)
  373. printk(KERN_DEBUG "rangeX: %016lx - %016lx\n",
  374. range0_basek<<10,
  375. (range0_basek + state->range_sizek)<<10);
  376. state->reg = range_to_mtrr(state->reg, range0_basek,
  377. state->range_sizek, MTRR_TYPE_WRBACK);
  378. return 0;
  379. }
  380. /* only cut back, when it is not the last */
  381. if (sizek) {
  382. while (range0_basek + range0_sizek > (basek + sizek)) {
  383. if (range0_sizek >= chunk_sizek)
  384. range0_sizek -= chunk_sizek;
  385. else
  386. range0_sizek = 0;
  387. if (!range0_sizek)
  388. break;
  389. }
  390. }
  391. second_try:
  392. range_basek = range0_basek + range0_sizek;
  393. /* one hole in the middle */
  394. if (range_basek > basek && range_basek <= (basek + sizek))
  395. second_sizek = range_basek - basek;
  396. if (range0_sizek > state->range_sizek) {
  397. /* one hole in middle or at end */
  398. hole_sizek = range0_sizek - state->range_sizek - second_sizek;
  399. /* hole size should be less than half of range0 size */
  400. if (hole_sizek >= (range0_sizek >> 1) &&
  401. range0_sizek >= chunk_sizek) {
  402. range0_sizek -= chunk_sizek;
  403. second_sizek = 0;
  404. hole_sizek = 0;
  405. goto second_try;
  406. }
  407. }
  408. if (range0_sizek) {
  409. if (debug_print)
  410. printk(KERN_DEBUG "range0: %016lx - %016lx\n",
  411. range0_basek<<10,
  412. (range0_basek + range0_sizek)<<10);
  413. state->reg = range_to_mtrr(state->reg, range0_basek,
  414. range0_sizek, MTRR_TYPE_WRBACK);
  415. }
  416. if (range0_sizek < state->range_sizek) {
  417. /* need to handle left over */
  418. range_sizek = state->range_sizek - range0_sizek;
  419. if (debug_print)
  420. printk(KERN_DEBUG "range: %016lx - %016lx\n",
  421. range_basek<<10,
  422. (range_basek + range_sizek)<<10);
  423. state->reg = range_to_mtrr(state->reg, range_basek,
  424. range_sizek, MTRR_TYPE_WRBACK);
  425. }
  426. if (hole_sizek) {
  427. hole_basek = range_basek - hole_sizek - second_sizek;
  428. if (debug_print)
  429. printk(KERN_DEBUG "hole: %016lx - %016lx\n",
  430. hole_basek<<10,
  431. (hole_basek + hole_sizek)<<10);
  432. state->reg = range_to_mtrr(state->reg, hole_basek,
  433. hole_sizek, MTRR_TYPE_UNCACHABLE);
  434. }
  435. return second_sizek;
  436. }
  437. static void __init
  438. set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn,
  439. unsigned long size_pfn)
  440. {
  441. unsigned long basek, sizek;
  442. unsigned long second_sizek = 0;
  443. if (state->reg >= num_var_ranges)
  444. return;
  445. basek = base_pfn << (PAGE_SHIFT - 10);
  446. sizek = size_pfn << (PAGE_SHIFT - 10);
  447. /* See if I can merge with the last range */
  448. if ((basek <= 1024) ||
  449. (state->range_startk + state->range_sizek == basek)) {
  450. unsigned long endk = basek + sizek;
  451. state->range_sizek = endk - state->range_startk;
  452. return;
  453. }
  454. /* Write the range mtrrs */
  455. if (state->range_sizek != 0)
  456. second_sizek = range_to_mtrr_with_hole(state, basek, sizek);
  457. /* Allocate an msr */
  458. state->range_startk = basek + second_sizek;
  459. state->range_sizek = sizek - second_sizek;
  460. }
  461. /* mininum size of mtrr block that can take hole */
  462. static u64 mtrr_chunk_size __initdata = (256ULL<<20);
  463. static int __init parse_mtrr_chunk_size_opt(char *p)
  464. {
  465. if (!p)
  466. return -EINVAL;
  467. mtrr_chunk_size = memparse(p, &p);
  468. return 0;
  469. }
  470. early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt);
  471. /* granity of mtrr of block */
  472. static u64 mtrr_gran_size __initdata;
  473. static int __init parse_mtrr_gran_size_opt(char *p)
  474. {
  475. if (!p)
  476. return -EINVAL;
  477. mtrr_gran_size = memparse(p, &p);
  478. return 0;
  479. }
  480. early_param("mtrr_gran_size", parse_mtrr_gran_size_opt);
  481. static int nr_mtrr_spare_reg __initdata =
  482. CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT;
  483. static int __init parse_mtrr_spare_reg(char *arg)
  484. {
  485. if (arg)
  486. nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0);
  487. return 0;
  488. }
  489. early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg);
  490. static int __init
  491. x86_setup_var_mtrrs(struct res_range *range, int nr_range,
  492. u64 chunk_size, u64 gran_size)
  493. {
  494. struct var_mtrr_state var_state;
  495. int i;
  496. int num_reg;
  497. var_state.range_startk = 0;
  498. var_state.range_sizek = 0;
  499. var_state.reg = 0;
  500. var_state.chunk_sizek = chunk_size >> 10;
  501. var_state.gran_sizek = gran_size >> 10;
  502. memset(range_state, 0, sizeof(range_state));
  503. /* Write the range etc */
  504. for (i = 0; i < nr_range; i++)
  505. set_var_mtrr_range(&var_state, range[i].start,
  506. range[i].end - range[i].start + 1);
  507. /* Write the last range */
  508. if (var_state.range_sizek != 0)
  509. range_to_mtrr_with_hole(&var_state, 0, 0);
  510. num_reg = var_state.reg;
  511. /* Clear out the extra MTRR's */
  512. while (var_state.reg < num_var_ranges) {
  513. save_var_mtrr(var_state.reg, 0, 0, 0);
  514. var_state.reg++;
  515. }
  516. return num_reg;
  517. }
  518. struct mtrr_cleanup_result {
  519. unsigned long gran_sizek;
  520. unsigned long chunk_sizek;
  521. unsigned long lose_cover_sizek;
  522. unsigned int num_reg;
  523. int bad;
  524. };
  525. /*
  526. * gran_size: 64K, 128K, 256K, 512K, 1M, 2M, ..., 2G
  527. * chunk size: gran_size, ..., 2G
  528. * so we need (1+16)*8
  529. */
  530. #define NUM_RESULT 136
  531. #define PSHIFT (PAGE_SHIFT - 10)
  532. static struct mtrr_cleanup_result __initdata result[NUM_RESULT];
  533. static unsigned long __initdata min_loss_pfn[RANGE_NUM];
  534. static void __init print_out_mtrr_range_state(void)
  535. {
  536. int i;
  537. char start_factor = 'K', size_factor = 'K';
  538. unsigned long start_base, size_base;
  539. mtrr_type type;
  540. for (i = 0; i < num_var_ranges; i++) {
  541. size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10);
  542. if (!size_base)
  543. continue;
  544. size_base = to_size_factor(size_base, &size_factor),
  545. start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10);
  546. start_base = to_size_factor(start_base, &start_factor),
  547. type = range_state[i].type;
  548. printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
  549. i, start_base, start_factor,
  550. size_base, size_factor,
  551. (type == MTRR_TYPE_UNCACHABLE) ? "UC" :
  552. ((type == MTRR_TYPE_WRPROT) ? "WP" :
  553. ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other"))
  554. );
  555. }
  556. }
  557. static int __init mtrr_need_cleanup(void)
  558. {
  559. int i;
  560. mtrr_type type;
  561. unsigned long size;
  562. /* extra one for all 0 */
  563. int num[MTRR_NUM_TYPES + 1];
  564. /* check entries number */
  565. memset(num, 0, sizeof(num));
  566. for (i = 0; i < num_var_ranges; i++) {
  567. type = range_state[i].type;
  568. size = range_state[i].size_pfn;
  569. if (type >= MTRR_NUM_TYPES)
  570. continue;
  571. if (!size)
  572. type = MTRR_NUM_TYPES;
  573. if (type == MTRR_TYPE_WRPROT)
  574. type = MTRR_TYPE_UNCACHABLE;
  575. num[type]++;
  576. }
  577. /* check if we got UC entries */
  578. if (!num[MTRR_TYPE_UNCACHABLE])
  579. return 0;
  580. /* check if we only had WB and UC */
  581. if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
  582. num_var_ranges - num[MTRR_NUM_TYPES])
  583. return 0;
  584. return 1;
  585. }
  586. static unsigned long __initdata range_sums;
  587. static void __init mtrr_calc_range_state(u64 chunk_size, u64 gran_size,
  588. unsigned long extra_remove_base,
  589. unsigned long extra_remove_size,
  590. int i)
  591. {
  592. int num_reg;
  593. static struct res_range range_new[RANGE_NUM];
  594. static int nr_range_new;
  595. unsigned long range_sums_new;
  596. /* convert ranges to var ranges state */
  597. num_reg = x86_setup_var_mtrrs(range, nr_range,
  598. chunk_size, gran_size);
  599. /* we got new setting in range_state, check it */
  600. memset(range_new, 0, sizeof(range_new));
  601. nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
  602. extra_remove_base, extra_remove_size);
  603. range_sums_new = sum_ranges(range_new, nr_range_new);
  604. result[i].chunk_sizek = chunk_size >> 10;
  605. result[i].gran_sizek = gran_size >> 10;
  606. result[i].num_reg = num_reg;
  607. if (range_sums < range_sums_new) {
  608. result[i].lose_cover_sizek =
  609. (range_sums_new - range_sums) << PSHIFT;
  610. result[i].bad = 1;
  611. } else
  612. result[i].lose_cover_sizek =
  613. (range_sums - range_sums_new) << PSHIFT;
  614. /* double check it */
  615. if (!result[i].bad && !result[i].lose_cover_sizek) {
  616. if (nr_range_new != nr_range ||
  617. memcmp(range, range_new, sizeof(range)))
  618. result[i].bad = 1;
  619. }
  620. if (!result[i].bad && (range_sums - range_sums_new <
  621. min_loss_pfn[num_reg])) {
  622. min_loss_pfn[num_reg] =
  623. range_sums - range_sums_new;
  624. }
  625. }
  626. static void __init mtrr_print_out_one_result(int i)
  627. {
  628. char gran_factor, chunk_factor, lose_factor;
  629. unsigned long gran_base, chunk_base, lose_base;
  630. gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
  631. chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
  632. lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
  633. printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t",
  634. result[i].bad ? "*BAD*" : " ",
  635. gran_base, gran_factor, chunk_base, chunk_factor);
  636. printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n",
  637. result[i].num_reg, result[i].bad ? "-" : "",
  638. lose_base, lose_factor);
  639. }
  640. static int __init mtrr_search_optimal_index(void)
  641. {
  642. int i;
  643. int num_reg_good;
  644. int index_good;
  645. if (nr_mtrr_spare_reg >= num_var_ranges)
  646. nr_mtrr_spare_reg = num_var_ranges - 1;
  647. num_reg_good = -1;
  648. for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) {
  649. if (!min_loss_pfn[i])
  650. num_reg_good = i;
  651. }
  652. index_good = -1;
  653. if (num_reg_good != -1) {
  654. for (i = 0; i < NUM_RESULT; i++) {
  655. if (!result[i].bad &&
  656. result[i].num_reg == num_reg_good &&
  657. !result[i].lose_cover_sizek) {
  658. index_good = i;
  659. break;
  660. }
  661. }
  662. }
  663. return index_good;
  664. }
  665. int __init mtrr_cleanup(unsigned address_bits)
  666. {
  667. unsigned long extra_remove_base, extra_remove_size;
  668. unsigned long base, size, def, dummy;
  669. mtrr_type type;
  670. u64 chunk_size, gran_size;
  671. int index_good;
  672. int i;
  673. if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
  674. return 0;
  675. rdmsr(MTRRdefType_MSR, def, dummy);
  676. def &= 0xff;
  677. if (def != MTRR_TYPE_UNCACHABLE)
  678. return 0;
  679. /* get it and store it aside */
  680. memset(range_state, 0, sizeof(range_state));
  681. for (i = 0; i < num_var_ranges; i++) {
  682. mtrr_if->get(i, &base, &size, &type);
  683. range_state[i].base_pfn = base;
  684. range_state[i].size_pfn = size;
  685. range_state[i].type = type;
  686. }
  687. /* check if we need handle it and can handle it */
  688. if (!mtrr_need_cleanup())
  689. return 0;
  690. /* print original var MTRRs at first, for debugging: */
  691. printk(KERN_DEBUG "original variable MTRRs\n");
  692. print_out_mtrr_range_state();
  693. memset(range, 0, sizeof(range));
  694. extra_remove_size = 0;
  695. extra_remove_base = 1 << (32 - PAGE_SHIFT);
  696. if (mtrr_tom2)
  697. extra_remove_size =
  698. (mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base;
  699. nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base,
  700. extra_remove_size);
  701. /*
  702. * [0, 1M) should always be coverred by var mtrr with WB
  703. * and fixed mtrrs should take effective before var mtrr for it
  704. */
  705. nr_range = add_range_with_merge(range, nr_range, 0,
  706. (1ULL<<(20 - PAGE_SHIFT)) - 1);
  707. /* sort the ranges */
  708. sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
  709. range_sums = sum_ranges(range, nr_range);
  710. printk(KERN_INFO "total RAM coverred: %ldM\n",
  711. range_sums >> (20 - PAGE_SHIFT));
  712. if (mtrr_chunk_size && mtrr_gran_size) {
  713. i = 0;
  714. mtrr_calc_range_state(mtrr_chunk_size, mtrr_gran_size,
  715. extra_remove_base, extra_remove_size, i);
  716. mtrr_print_out_one_result(i);
  717. if (!result[i].bad) {
  718. set_var_mtrr_all(address_bits);
  719. printk(KERN_DEBUG "New variable MTRRs\n");
  720. print_out_mtrr_range_state();
  721. return 1;
  722. }
  723. printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, "
  724. "will find optimal one\n");
  725. }
  726. i = 0;
  727. memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn));
  728. memset(result, 0, sizeof(result));
  729. for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) {
  730. for (chunk_size = gran_size; chunk_size < (1ULL<<32);
  731. chunk_size <<= 1) {
  732. if (i >= NUM_RESULT)
  733. continue;
  734. mtrr_calc_range_state(chunk_size, gran_size,
  735. extra_remove_base, extra_remove_size, i);
  736. if (debug_print) {
  737. mtrr_print_out_one_result(i);
  738. printk(KERN_INFO "\n");
  739. }
  740. i++;
  741. }
  742. }
  743. /* try to find the optimal index */
  744. index_good = mtrr_search_optimal_index();
  745. if (index_good != -1) {
  746. printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
  747. i = index_good;
  748. mtrr_print_out_one_result(i);
  749. /* convert ranges to var ranges state */
  750. chunk_size = result[i].chunk_sizek;
  751. chunk_size <<= 10;
  752. gran_size = result[i].gran_sizek;
  753. gran_size <<= 10;
  754. x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
  755. set_var_mtrr_all(address_bits);
  756. printk(KERN_DEBUG "New variable MTRRs\n");
  757. print_out_mtrr_range_state();
  758. return 1;
  759. } else {
  760. /* print out all */
  761. for (i = 0; i < NUM_RESULT; i++)
  762. mtrr_print_out_one_result(i);
  763. }
  764. printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n");
  765. printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n");
  766. return 0;
  767. }
  768. #else
  769. int __init mtrr_cleanup(unsigned address_bits)
  770. {
  771. return 0;
  772. }
  773. #endif
  774. static int disable_mtrr_trim;
  775. static int __init disable_mtrr_trim_setup(char *str)
  776. {
  777. disable_mtrr_trim = 1;
  778. return 0;
  779. }
  780. early_param("disable_mtrr_trim", disable_mtrr_trim_setup);
  781. /*
  782. * Newer AMD K8s and later CPUs have a special magic MSR way to force WB
  783. * for memory >4GB. Check for that here.
  784. * Note this won't check if the MTRRs < 4GB where the magic bit doesn't
  785. * apply to are wrong, but so far we don't know of any such case in the wild.
  786. */
  787. #define Tom2Enabled (1U << 21)
  788. #define Tom2ForceMemTypeWB (1U << 22)
  789. int __init amd_special_default_mtrr(void)
  790. {
  791. u32 l, h;
  792. if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
  793. return 0;
  794. if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11)
  795. return 0;
  796. /* In case some hypervisor doesn't pass SYSCFG through */
  797. if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0)
  798. return 0;
  799. /*
  800. * Memory between 4GB and top of mem is forced WB by this magic bit.
  801. * Reserved before K8RevF, but should be zero there.
  802. */
  803. if ((l & (Tom2Enabled | Tom2ForceMemTypeWB)) ==
  804. (Tom2Enabled | Tom2ForceMemTypeWB))
  805. return 1;
  806. return 0;
  807. }
  808. static u64 __init real_trim_memory(unsigned long start_pfn,
  809. unsigned long limit_pfn)
  810. {
  811. u64 trim_start, trim_size;
  812. trim_start = start_pfn;
  813. trim_start <<= PAGE_SHIFT;
  814. trim_size = limit_pfn;
  815. trim_size <<= PAGE_SHIFT;
  816. trim_size -= trim_start;
  817. return e820_update_range(trim_start, trim_size, E820_RAM,
  818. E820_RESERVED);
  819. }
  820. /**
  821. * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
  822. * @end_pfn: ending page frame number
  823. *
  824. * Some buggy BIOSes don't setup the MTRRs properly for systems with certain
  825. * memory configurations. This routine checks that the highest MTRR matches
  826. * the end of memory, to make sure the MTRRs having a write back type cover
  827. * all of the memory the kernel is intending to use. If not, it'll trim any
  828. * memory off the end by adjusting end_pfn, removing it from the kernel's
  829. * allocation pools, warning the user with an obnoxious message.
  830. */
  831. int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
  832. {
  833. unsigned long i, base, size, highest_pfn = 0, def, dummy;
  834. mtrr_type type;
  835. u64 total_trim_size;
  836. /* extra one for all 0 */
  837. int num[MTRR_NUM_TYPES + 1];
  838. /*
  839. * Make sure we only trim uncachable memory on machines that
  840. * support the Intel MTRR architecture:
  841. */
  842. if (!is_cpu(INTEL) || disable_mtrr_trim)
  843. return 0;
  844. rdmsr(MTRRdefType_MSR, def, dummy);
  845. def &= 0xff;
  846. if (def != MTRR_TYPE_UNCACHABLE)
  847. return 0;
  848. /* get it and store it aside */
  849. memset(range_state, 0, sizeof(range_state));
  850. for (i = 0; i < num_var_ranges; i++) {
  851. mtrr_if->get(i, &base, &size, &type);
  852. range_state[i].base_pfn = base;
  853. range_state[i].size_pfn = size;
  854. range_state[i].type = type;
  855. }
  856. /* Find highest cached pfn */
  857. for (i = 0; i < num_var_ranges; i++) {
  858. type = range_state[i].type;
  859. if (type != MTRR_TYPE_WRBACK)
  860. continue;
  861. base = range_state[i].base_pfn;
  862. size = range_state[i].size_pfn;
  863. if (highest_pfn < base + size)
  864. highest_pfn = base + size;
  865. }
  866. /* kvm/qemu doesn't have mtrr set right, don't trim them all */
  867. if (!highest_pfn) {
  868. printk(KERN_INFO "CPU MTRRs all blank - virtualized system.\n");
  869. return 0;
  870. }
  871. /* check entries number */
  872. memset(num, 0, sizeof(num));
  873. for (i = 0; i < num_var_ranges; i++) {
  874. type = range_state[i].type;
  875. if (type >= MTRR_NUM_TYPES)
  876. continue;
  877. size = range_state[i].size_pfn;
  878. if (!size)
  879. type = MTRR_NUM_TYPES;
  880. num[type]++;
  881. }
  882. /* no entry for WB? */
  883. if (!num[MTRR_TYPE_WRBACK])
  884. return 0;
  885. /* check if we only had WB and UC */
  886. if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
  887. num_var_ranges - num[MTRR_NUM_TYPES])
  888. return 0;
  889. memset(range, 0, sizeof(range));
  890. nr_range = 0;
  891. if (mtrr_tom2) {
  892. range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT));
  893. range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1;
  894. if (highest_pfn < range[nr_range].end + 1)
  895. highest_pfn = range[nr_range].end + 1;
  896. nr_range++;
  897. }
  898. nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0);
  899. total_trim_size = 0;
  900. /* check the head */
  901. if (range[0].start)
  902. total_trim_size += real_trim_memory(0, range[0].start);
  903. /* check the holes */
  904. for (i = 0; i < nr_range - 1; i++) {
  905. if (range[i].end + 1 < range[i+1].start)
  906. total_trim_size += real_trim_memory(range[i].end + 1,
  907. range[i+1].start);
  908. }
  909. /* check the top */
  910. i = nr_range - 1;
  911. if (range[i].end + 1 < end_pfn)
  912. total_trim_size += real_trim_memory(range[i].end + 1,
  913. end_pfn);
  914. if (total_trim_size) {
  915. printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
  916. " all of memory, losing %lluMB of RAM.\n",
  917. total_trim_size >> 20);
  918. if (!changed_by_mtrr_cleanup)
  919. WARN_ON(1);
  920. printk(KERN_INFO "update e820 for mtrr\n");
  921. update_e820();
  922. return 1;
  923. }
  924. return 0;
  925. }