cleanup.c 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101
  1. /*
  2. * MTRR (Memory Type Range Register) cleanup
  3. *
  4. * Copyright (C) 2009 Yinghai Lu
  5. *
  6. * This library is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Library General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2 of the License, or (at your option) any later version.
  10. *
  11. * This library is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Library General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Library General Public
  17. * License along with this library; if not, write to the Free
  18. * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  19. */
  20. #include <linux/module.h>
  21. #include <linux/init.h>
  22. #include <linux/pci.h>
  23. #include <linux/smp.h>
  24. #include <linux/cpu.h>
  25. #include <linux/sort.h>
  26. #include <linux/mutex.h>
  27. #include <linux/uaccess.h>
  28. #include <linux/kvm_para.h>
  29. #include <asm/processor.h>
  30. #include <asm/e820.h>
  31. #include <asm/mtrr.h>
  32. #include <asm/msr.h>
  33. #include "mtrr.h"
  34. struct res_range {
  35. unsigned long start;
  36. unsigned long end;
  37. };
  38. struct var_mtrr_range_state {
  39. unsigned long base_pfn;
  40. unsigned long size_pfn;
  41. mtrr_type type;
  42. };
  43. struct var_mtrr_state {
  44. unsigned long range_startk;
  45. unsigned long range_sizek;
  46. unsigned long chunk_sizek;
  47. unsigned long gran_sizek;
  48. unsigned int reg;
  49. };
  50. /* Should be related to MTRR_VAR_RANGES nums */
  51. #define RANGE_NUM 256
  52. static struct res_range __initdata range[RANGE_NUM];
  53. static int __initdata nr_range;
  54. static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
  55. static int __initdata debug_print;
  56. #define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0)
  57. static int __init
  58. add_range(struct res_range *range, int nr_range,
  59. unsigned long start, unsigned long end)
  60. {
  61. /* Out of slots: */
  62. if (nr_range >= RANGE_NUM)
  63. return nr_range;
  64. range[nr_range].start = start;
  65. range[nr_range].end = end;
  66. nr_range++;
  67. return nr_range;
  68. }
  69. static int __init
  70. add_range_with_merge(struct res_range *range, int nr_range,
  71. unsigned long start, unsigned long end)
  72. {
  73. int i;
  74. /* Try to merge it with old one: */
  75. for (i = 0; i < nr_range; i++) {
  76. unsigned long final_start, final_end;
  77. unsigned long common_start, common_end;
  78. if (!range[i].end)
  79. continue;
  80. common_start = max(range[i].start, start);
  81. common_end = min(range[i].end, end);
  82. if (common_start > common_end + 1)
  83. continue;
  84. final_start = min(range[i].start, start);
  85. final_end = max(range[i].end, end);
  86. range[i].start = final_start;
  87. range[i].end = final_end;
  88. return nr_range;
  89. }
  90. /* Need to add it: */
  91. return add_range(range, nr_range, start, end);
  92. }
  93. static void __init
  94. subtract_range(struct res_range *range, unsigned long start, unsigned long end)
  95. {
  96. int i, j;
  97. for (j = 0; j < RANGE_NUM; j++) {
  98. if (!range[j].end)
  99. continue;
  100. if (start <= range[j].start && end >= range[j].end) {
  101. range[j].start = 0;
  102. range[j].end = 0;
  103. continue;
  104. }
  105. if (start <= range[j].start && end < range[j].end &&
  106. range[j].start < end + 1) {
  107. range[j].start = end + 1;
  108. continue;
  109. }
  110. if (start > range[j].start && end >= range[j].end &&
  111. range[j].end > start - 1) {
  112. range[j].end = start - 1;
  113. continue;
  114. }
  115. if (start > range[j].start && end < range[j].end) {
  116. /* Find the new spare: */
  117. for (i = 0; i < RANGE_NUM; i++) {
  118. if (range[i].end == 0)
  119. break;
  120. }
  121. if (i < RANGE_NUM) {
  122. range[i].end = range[j].end;
  123. range[i].start = end + 1;
  124. } else {
  125. printk(KERN_ERR "run of slot in ranges\n");
  126. }
  127. range[j].end = start - 1;
  128. continue;
  129. }
  130. }
  131. }
  132. static int __init cmp_range(const void *x1, const void *x2)
  133. {
  134. const struct res_range *r1 = x1;
  135. const struct res_range *r2 = x2;
  136. long start1, start2;
  137. start1 = r1->start;
  138. start2 = r2->start;
  139. return start1 - start2;
  140. }
  141. #define BIOS_BUG_MSG KERN_WARNING \
  142. "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n"
  143. static int __init
  144. x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
  145. unsigned long extra_remove_base,
  146. unsigned long extra_remove_size)
  147. {
  148. unsigned long base, size;
  149. mtrr_type type;
  150. int i;
  151. for (i = 0; i < num_var_ranges; i++) {
  152. type = range_state[i].type;
  153. if (type != MTRR_TYPE_WRBACK)
  154. continue;
  155. base = range_state[i].base_pfn;
  156. size = range_state[i].size_pfn;
  157. nr_range = add_range_with_merge(range, nr_range, base,
  158. base + size - 1);
  159. }
  160. if (debug_print) {
  161. printk(KERN_DEBUG "After WB checking\n");
  162. for (i = 0; i < nr_range; i++)
  163. printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
  164. range[i].start, range[i].end + 1);
  165. }
  166. /* Take out UC ranges: */
  167. for (i = 0; i < num_var_ranges; i++) {
  168. type = range_state[i].type;
  169. if (type != MTRR_TYPE_UNCACHABLE &&
  170. type != MTRR_TYPE_WRPROT)
  171. continue;
  172. size = range_state[i].size_pfn;
  173. if (!size)
  174. continue;
  175. base = range_state[i].base_pfn;
  176. if (base < (1<<(20-PAGE_SHIFT)) && mtrr_state.have_fixed &&
  177. (mtrr_state.enabled & 1)) {
  178. /* Var MTRR contains UC entry below 1M? Skip it: */
  179. printk(BIOS_BUG_MSG, i);
  180. if (base + size <= (1<<(20-PAGE_SHIFT)))
  181. continue;
  182. size -= (1<<(20-PAGE_SHIFT)) - base;
  183. base = 1<<(20-PAGE_SHIFT);
  184. }
  185. subtract_range(range, base, base + size - 1);
  186. }
  187. if (extra_remove_size)
  188. subtract_range(range, extra_remove_base,
  189. extra_remove_base + extra_remove_size - 1);
  190. /* get new range num */
  191. nr_range = 0;
  192. for (i = 0; i < RANGE_NUM; i++) {
  193. if (!range[i].end)
  194. continue;
  195. nr_range++;
  196. }
  197. if (debug_print) {
  198. printk(KERN_DEBUG "After UC checking\n");
  199. for (i = 0; i < nr_range; i++)
  200. printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
  201. range[i].start, range[i].end + 1);
  202. }
  203. /* sort the ranges */
  204. sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
  205. if (debug_print) {
  206. printk(KERN_DEBUG "After sorting\n");
  207. for (i = 0; i < nr_range; i++)
  208. printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
  209. range[i].start, range[i].end + 1);
  210. }
  211. /* clear those is not used */
  212. for (i = nr_range; i < RANGE_NUM; i++)
  213. memset(&range[i], 0, sizeof(range[i]));
  214. return nr_range;
  215. }
  216. #ifdef CONFIG_MTRR_SANITIZER
  217. static unsigned long __init sum_ranges(struct res_range *range, int nr_range)
  218. {
  219. unsigned long sum = 0;
  220. int i;
  221. for (i = 0; i < nr_range; i++)
  222. sum += range[i].end + 1 - range[i].start;
  223. return sum;
  224. }
  225. static int enable_mtrr_cleanup __initdata =
  226. CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
  227. static int __init disable_mtrr_cleanup_setup(char *str)
  228. {
  229. enable_mtrr_cleanup = 0;
  230. return 0;
  231. }
  232. early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
  233. static int __init enable_mtrr_cleanup_setup(char *str)
  234. {
  235. enable_mtrr_cleanup = 1;
  236. return 0;
  237. }
  238. early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup);
  239. static int __init mtrr_cleanup_debug_setup(char *str)
  240. {
  241. debug_print = 1;
  242. return 0;
  243. }
  244. early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup);
  245. static void __init
  246. set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
  247. unsigned char type, unsigned int address_bits)
  248. {
  249. u32 base_lo, base_hi, mask_lo, mask_hi;
  250. u64 base, mask;
  251. if (!sizek) {
  252. fill_mtrr_var_range(reg, 0, 0, 0, 0);
  253. return;
  254. }
  255. mask = (1ULL << address_bits) - 1;
  256. mask &= ~((((u64)sizek) << 10) - 1);
  257. base = ((u64)basek) << 10;
  258. base |= type;
  259. mask |= 0x800;
  260. base_lo = base & ((1ULL<<32) - 1);
  261. base_hi = base >> 32;
  262. mask_lo = mask & ((1ULL<<32) - 1);
  263. mask_hi = mask >> 32;
  264. fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi);
  265. }
  266. static void __init
  267. save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
  268. unsigned char type)
  269. {
  270. range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10);
  271. range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10);
  272. range_state[reg].type = type;
  273. }
  274. static void __init set_var_mtrr_all(unsigned int address_bits)
  275. {
  276. unsigned long basek, sizek;
  277. unsigned char type;
  278. unsigned int reg;
  279. for (reg = 0; reg < num_var_ranges; reg++) {
  280. basek = range_state[reg].base_pfn << (PAGE_SHIFT - 10);
  281. sizek = range_state[reg].size_pfn << (PAGE_SHIFT - 10);
  282. type = range_state[reg].type;
  283. set_var_mtrr(reg, basek, sizek, type, address_bits);
  284. }
  285. }
  286. static unsigned long to_size_factor(unsigned long sizek, char *factorp)
  287. {
  288. unsigned long base = sizek;
  289. char factor;
  290. if (base & ((1<<10) - 1)) {
  291. /* Not MB-aligned: */
  292. factor = 'K';
  293. } else if (base & ((1<<20) - 1)) {
  294. factor = 'M';
  295. base >>= 10;
  296. } else {
  297. factor = 'G';
  298. base >>= 20;
  299. }
  300. *factorp = factor;
  301. return base;
  302. }
  303. static unsigned int __init
  304. range_to_mtrr(unsigned int reg, unsigned long range_startk,
  305. unsigned long range_sizek, unsigned char type)
  306. {
  307. if (!range_sizek || (reg >= num_var_ranges))
  308. return reg;
  309. while (range_sizek) {
  310. unsigned long max_align, align;
  311. unsigned long sizek;
  312. /* Compute the maximum size with which we can make a range: */
  313. if (range_startk)
  314. max_align = ffs(range_startk) - 1;
  315. else
  316. max_align = 32;
  317. align = fls(range_sizek) - 1;
  318. if (align > max_align)
  319. align = max_align;
  320. sizek = 1 << align;
  321. if (debug_print) {
  322. char start_factor = 'K', size_factor = 'K';
  323. unsigned long start_base, size_base;
  324. start_base = to_size_factor(range_startk, &start_factor);
  325. size_base = to_size_factor(sizek, &size_factor);
  326. Dprintk("Setting variable MTRR %d, "
  327. "base: %ld%cB, range: %ld%cB, type %s\n",
  328. reg, start_base, start_factor,
  329. size_base, size_factor,
  330. (type == MTRR_TYPE_UNCACHABLE) ? "UC" :
  331. ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other")
  332. );
  333. }
  334. save_var_mtrr(reg++, range_startk, sizek, type);
  335. range_startk += sizek;
  336. range_sizek -= sizek;
  337. if (reg >= num_var_ranges)
  338. break;
  339. }
  340. return reg;
  341. }
  342. static unsigned __init
  343. range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
  344. unsigned long sizek)
  345. {
  346. unsigned long hole_basek, hole_sizek;
  347. unsigned long second_basek, second_sizek;
  348. unsigned long range0_basek, range0_sizek;
  349. unsigned long range_basek, range_sizek;
  350. unsigned long chunk_sizek;
  351. unsigned long gran_sizek;
  352. hole_basek = 0;
  353. hole_sizek = 0;
  354. second_basek = 0;
  355. second_sizek = 0;
  356. chunk_sizek = state->chunk_sizek;
  357. gran_sizek = state->gran_sizek;
  358. /* Align with gran size, prevent small block used up MTRRs: */
  359. range_basek = ALIGN(state->range_startk, gran_sizek);
  360. if ((range_basek > basek) && basek)
  361. return second_sizek;
  362. state->range_sizek -= (range_basek - state->range_startk);
  363. range_sizek = ALIGN(state->range_sizek, gran_sizek);
  364. while (range_sizek > state->range_sizek) {
  365. range_sizek -= gran_sizek;
  366. if (!range_sizek)
  367. return 0;
  368. }
  369. state->range_sizek = range_sizek;
  370. /* Try to append some small hole: */
  371. range0_basek = state->range_startk;
  372. range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
  373. /* No increase: */
  374. if (range0_sizek == state->range_sizek) {
  375. Dprintk("rangeX: %016lx - %016lx\n",
  376. range0_basek<<10,
  377. (range0_basek + state->range_sizek)<<10);
  378. state->reg = range_to_mtrr(state->reg, range0_basek,
  379. state->range_sizek, MTRR_TYPE_WRBACK);
  380. return 0;
  381. }
  382. /* Only cut back when it is not the last: */
  383. if (sizek) {
  384. while (range0_basek + range0_sizek > (basek + sizek)) {
  385. if (range0_sizek >= chunk_sizek)
  386. range0_sizek -= chunk_sizek;
  387. else
  388. range0_sizek = 0;
  389. if (!range0_sizek)
  390. break;
  391. }
  392. }
  393. second_try:
  394. range_basek = range0_basek + range0_sizek;
  395. /* One hole in the middle: */
  396. if (range_basek > basek && range_basek <= (basek + sizek))
  397. second_sizek = range_basek - basek;
  398. if (range0_sizek > state->range_sizek) {
  399. /* One hole in middle or at the end: */
  400. hole_sizek = range0_sizek - state->range_sizek - second_sizek;
  401. /* Hole size should be less than half of range0 size: */
  402. if (hole_sizek >= (range0_sizek >> 1) &&
  403. range0_sizek >= chunk_sizek) {
  404. range0_sizek -= chunk_sizek;
  405. second_sizek = 0;
  406. hole_sizek = 0;
  407. goto second_try;
  408. }
  409. }
  410. if (range0_sizek) {
  411. Dprintk("range0: %016lx - %016lx\n",
  412. range0_basek<<10,
  413. (range0_basek + range0_sizek)<<10);
  414. state->reg = range_to_mtrr(state->reg, range0_basek,
  415. range0_sizek, MTRR_TYPE_WRBACK);
  416. }
  417. if (range0_sizek < state->range_sizek) {
  418. /* Need to handle left over range: */
  419. range_sizek = state->range_sizek - range0_sizek;
  420. Dprintk("range: %016lx - %016lx\n",
  421. range_basek<<10,
  422. (range_basek + range_sizek)<<10);
  423. state->reg = range_to_mtrr(state->reg, range_basek,
  424. range_sizek, MTRR_TYPE_WRBACK);
  425. }
  426. if (hole_sizek) {
  427. hole_basek = range_basek - hole_sizek - second_sizek;
  428. Dprintk("hole: %016lx - %016lx\n",
  429. hole_basek<<10,
  430. (hole_basek + hole_sizek)<<10);
  431. state->reg = range_to_mtrr(state->reg, hole_basek,
  432. hole_sizek, MTRR_TYPE_UNCACHABLE);
  433. }
  434. return second_sizek;
  435. }
  436. static void __init
  437. set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn,
  438. unsigned long size_pfn)
  439. {
  440. unsigned long basek, sizek;
  441. unsigned long second_sizek = 0;
  442. if (state->reg >= num_var_ranges)
  443. return;
  444. basek = base_pfn << (PAGE_SHIFT - 10);
  445. sizek = size_pfn << (PAGE_SHIFT - 10);
  446. /* See if I can merge with the last range: */
  447. if ((basek <= 1024) ||
  448. (state->range_startk + state->range_sizek == basek)) {
  449. unsigned long endk = basek + sizek;
  450. state->range_sizek = endk - state->range_startk;
  451. return;
  452. }
  453. /* Write the range mtrrs: */
  454. if (state->range_sizek != 0)
  455. second_sizek = range_to_mtrr_with_hole(state, basek, sizek);
  456. /* Allocate an msr: */
  457. state->range_startk = basek + second_sizek;
  458. state->range_sizek = sizek - second_sizek;
  459. }
  460. /* Mininum size of mtrr block that can take hole: */
  461. static u64 mtrr_chunk_size __initdata = (256ULL<<20);
  462. static int __init parse_mtrr_chunk_size_opt(char *p)
  463. {
  464. if (!p)
  465. return -EINVAL;
  466. mtrr_chunk_size = memparse(p, &p);
  467. return 0;
  468. }
  469. early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt);
  470. /* Granularity of mtrr of block: */
  471. static u64 mtrr_gran_size __initdata;
  472. static int __init parse_mtrr_gran_size_opt(char *p)
  473. {
  474. if (!p)
  475. return -EINVAL;
  476. mtrr_gran_size = memparse(p, &p);
  477. return 0;
  478. }
  479. early_param("mtrr_gran_size", parse_mtrr_gran_size_opt);
  480. static unsigned long nr_mtrr_spare_reg __initdata =
  481. CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT;
  482. static int __init parse_mtrr_spare_reg(char *arg)
  483. {
  484. if (arg)
  485. nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0);
  486. return 0;
  487. }
  488. early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg);
  489. static int __init
  490. x86_setup_var_mtrrs(struct res_range *range, int nr_range,
  491. u64 chunk_size, u64 gran_size)
  492. {
  493. struct var_mtrr_state var_state;
  494. int num_reg;
  495. int i;
  496. var_state.range_startk = 0;
  497. var_state.range_sizek = 0;
  498. var_state.reg = 0;
  499. var_state.chunk_sizek = chunk_size >> 10;
  500. var_state.gran_sizek = gran_size >> 10;
  501. memset(range_state, 0, sizeof(range_state));
  502. /* Write the range: */
  503. for (i = 0; i < nr_range; i++) {
  504. set_var_mtrr_range(&var_state, range[i].start,
  505. range[i].end - range[i].start + 1);
  506. }
  507. /* Write the last range: */
  508. if (var_state.range_sizek != 0)
  509. range_to_mtrr_with_hole(&var_state, 0, 0);
  510. num_reg = var_state.reg;
  511. /* Clear out the extra MTRR's: */
  512. while (var_state.reg < num_var_ranges) {
  513. save_var_mtrr(var_state.reg, 0, 0, 0);
  514. var_state.reg++;
  515. }
  516. return num_reg;
  517. }
  518. struct mtrr_cleanup_result {
  519. unsigned long gran_sizek;
  520. unsigned long chunk_sizek;
  521. unsigned long lose_cover_sizek;
  522. unsigned int num_reg;
  523. int bad;
  524. };
  525. /*
  526. * gran_size: 64K, 128K, 256K, 512K, 1M, 2M, ..., 2G
  527. * chunk size: gran_size, ..., 2G
  528. * so we need (1+16)*8
  529. */
  530. #define NUM_RESULT 136
  531. #define PSHIFT (PAGE_SHIFT - 10)
  532. static struct mtrr_cleanup_result __initdata result[NUM_RESULT];
  533. static unsigned long __initdata min_loss_pfn[RANGE_NUM];
  534. static void __init print_out_mtrr_range_state(void)
  535. {
  536. char start_factor = 'K', size_factor = 'K';
  537. unsigned long start_base, size_base;
  538. mtrr_type type;
  539. int i;
  540. for (i = 0; i < num_var_ranges; i++) {
  541. size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10);
  542. if (!size_base)
  543. continue;
  544. size_base = to_size_factor(size_base, &size_factor),
  545. start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10);
  546. start_base = to_size_factor(start_base, &start_factor),
  547. type = range_state[i].type;
  548. printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
  549. i, start_base, start_factor,
  550. size_base, size_factor,
  551. (type == MTRR_TYPE_UNCACHABLE) ? "UC" :
  552. ((type == MTRR_TYPE_WRPROT) ? "WP" :
  553. ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other"))
  554. );
  555. }
  556. }
  557. static int __init mtrr_need_cleanup(void)
  558. {
  559. int i;
  560. mtrr_type type;
  561. unsigned long size;
  562. /* Extra one for all 0: */
  563. int num[MTRR_NUM_TYPES + 1];
  564. /* Check entries number: */
  565. memset(num, 0, sizeof(num));
  566. for (i = 0; i < num_var_ranges; i++) {
  567. type = range_state[i].type;
  568. size = range_state[i].size_pfn;
  569. if (type >= MTRR_NUM_TYPES)
  570. continue;
  571. if (!size)
  572. type = MTRR_NUM_TYPES;
  573. if (type == MTRR_TYPE_WRPROT)
  574. type = MTRR_TYPE_UNCACHABLE;
  575. num[type]++;
  576. }
  577. /* Check if we got UC entries: */
  578. if (!num[MTRR_TYPE_UNCACHABLE])
  579. return 0;
  580. /* Check if we only had WB and UC */
  581. if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
  582. num_var_ranges - num[MTRR_NUM_TYPES])
  583. return 0;
  584. return 1;
  585. }
  586. static unsigned long __initdata range_sums;
  587. static void __init
  588. mtrr_calc_range_state(u64 chunk_size, u64 gran_size,
  589. unsigned long x_remove_base,
  590. unsigned long x_remove_size, int i)
  591. {
  592. static struct res_range range_new[RANGE_NUM];
  593. unsigned long range_sums_new;
  594. static int nr_range_new;
  595. int num_reg;
  596. /* Convert ranges to var ranges state: */
  597. num_reg = x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
  598. /* We got new setting in range_state, check it: */
  599. memset(range_new, 0, sizeof(range_new));
  600. nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
  601. x_remove_base, x_remove_size);
  602. range_sums_new = sum_ranges(range_new, nr_range_new);
  603. result[i].chunk_sizek = chunk_size >> 10;
  604. result[i].gran_sizek = gran_size >> 10;
  605. result[i].num_reg = num_reg;
  606. if (range_sums < range_sums_new) {
  607. result[i].lose_cover_sizek = (range_sums_new - range_sums) << PSHIFT;
  608. result[i].bad = 1;
  609. } else {
  610. result[i].lose_cover_sizek = (range_sums - range_sums_new) << PSHIFT;
  611. }
  612. /* Double check it: */
  613. if (!result[i].bad && !result[i].lose_cover_sizek) {
  614. if (nr_range_new != nr_range || memcmp(range, range_new, sizeof(range)))
  615. result[i].bad = 1;
  616. }
  617. if (!result[i].bad && (range_sums - range_sums_new < min_loss_pfn[num_reg]))
  618. min_loss_pfn[num_reg] = range_sums - range_sums_new;
  619. }
  620. static void __init mtrr_print_out_one_result(int i)
  621. {
  622. unsigned long gran_base, chunk_base, lose_base;
  623. char gran_factor, chunk_factor, lose_factor;
  624. gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
  625. chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
  626. lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
  627. pr_info("%sgran_size: %ld%c \tchunk_size: %ld%c \t",
  628. result[i].bad ? "*BAD*" : " ",
  629. gran_base, gran_factor, chunk_base, chunk_factor);
  630. pr_cont("num_reg: %d \tlose cover RAM: %s%ld%c\n",
  631. result[i].num_reg, result[i].bad ? "-" : "",
  632. lose_base, lose_factor);
  633. }
  634. static int __init mtrr_search_optimal_index(void)
  635. {
  636. int num_reg_good;
  637. int index_good;
  638. int i;
  639. if (nr_mtrr_spare_reg >= num_var_ranges)
  640. nr_mtrr_spare_reg = num_var_ranges - 1;
  641. num_reg_good = -1;
  642. for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) {
  643. if (!min_loss_pfn[i])
  644. num_reg_good = i;
  645. }
  646. index_good = -1;
  647. if (num_reg_good != -1) {
  648. for (i = 0; i < NUM_RESULT; i++) {
  649. if (!result[i].bad &&
  650. result[i].num_reg == num_reg_good &&
  651. !result[i].lose_cover_sizek) {
  652. index_good = i;
  653. break;
  654. }
  655. }
  656. }
  657. return index_good;
  658. }
  659. int __init mtrr_cleanup(unsigned address_bits)
  660. {
  661. unsigned long x_remove_base, x_remove_size;
  662. unsigned long base, size, def, dummy;
  663. u64 chunk_size, gran_size;
  664. mtrr_type type;
  665. int index_good;
  666. int i;
  667. if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
  668. return 0;
  669. rdmsr(MSR_MTRRdefType, def, dummy);
  670. def &= 0xff;
  671. if (def != MTRR_TYPE_UNCACHABLE)
  672. return 0;
  673. /* Get it and store it aside: */
  674. memset(range_state, 0, sizeof(range_state));
  675. for (i = 0; i < num_var_ranges; i++) {
  676. mtrr_if->get(i, &base, &size, &type);
  677. range_state[i].base_pfn = base;
  678. range_state[i].size_pfn = size;
  679. range_state[i].type = type;
  680. }
  681. /* Check if we need handle it and can handle it: */
  682. if (!mtrr_need_cleanup())
  683. return 0;
  684. /* Print original var MTRRs at first, for debugging: */
  685. printk(KERN_DEBUG "original variable MTRRs\n");
  686. print_out_mtrr_range_state();
  687. memset(range, 0, sizeof(range));
  688. x_remove_size = 0;
  689. x_remove_base = 1 << (32 - PAGE_SHIFT);
  690. if (mtrr_tom2)
  691. x_remove_size = (mtrr_tom2 >> PAGE_SHIFT) - x_remove_base;
  692. nr_range = x86_get_mtrr_mem_range(range, 0, x_remove_base, x_remove_size);
  693. /*
  694. * [0, 1M) should always be covered by var mtrr with WB
  695. * and fixed mtrrs should take effect before var mtrr for it:
  696. */
  697. nr_range = add_range_with_merge(range, nr_range, 0,
  698. (1ULL<<(20 - PAGE_SHIFT)) - 1);
  699. /* Sort the ranges: */
  700. sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
  701. range_sums = sum_ranges(range, nr_range);
  702. printk(KERN_INFO "total RAM covered: %ldM\n",
  703. range_sums >> (20 - PAGE_SHIFT));
  704. if (mtrr_chunk_size && mtrr_gran_size) {
  705. i = 0;
  706. mtrr_calc_range_state(mtrr_chunk_size, mtrr_gran_size,
  707. x_remove_base, x_remove_size, i);
  708. mtrr_print_out_one_result(i);
  709. if (!result[i].bad) {
  710. set_var_mtrr_all(address_bits);
  711. printk(KERN_DEBUG "New variable MTRRs\n");
  712. print_out_mtrr_range_state();
  713. return 1;
  714. }
  715. printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, "
  716. "will find optimal one\n");
  717. }
  718. i = 0;
  719. memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn));
  720. memset(result, 0, sizeof(result));
  721. for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) {
  722. for (chunk_size = gran_size; chunk_size < (1ULL<<32);
  723. chunk_size <<= 1) {
  724. if (i >= NUM_RESULT)
  725. continue;
  726. mtrr_calc_range_state(chunk_size, gran_size,
  727. x_remove_base, x_remove_size, i);
  728. if (debug_print) {
  729. mtrr_print_out_one_result(i);
  730. printk(KERN_INFO "\n");
  731. }
  732. i++;
  733. }
  734. }
  735. /* Try to find the optimal index: */
  736. index_good = mtrr_search_optimal_index();
  737. if (index_good != -1) {
  738. printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
  739. i = index_good;
  740. mtrr_print_out_one_result(i);
  741. /* Convert ranges to var ranges state: */
  742. chunk_size = result[i].chunk_sizek;
  743. chunk_size <<= 10;
  744. gran_size = result[i].gran_sizek;
  745. gran_size <<= 10;
  746. x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
  747. set_var_mtrr_all(address_bits);
  748. printk(KERN_DEBUG "New variable MTRRs\n");
  749. print_out_mtrr_range_state();
  750. return 1;
  751. } else {
  752. /* print out all */
  753. for (i = 0; i < NUM_RESULT; i++)
  754. mtrr_print_out_one_result(i);
  755. }
  756. printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n");
  757. printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n");
  758. return 0;
  759. }
  760. #else
  761. int __init mtrr_cleanup(unsigned address_bits)
  762. {
  763. return 0;
  764. }
  765. #endif
  766. static int disable_mtrr_trim;
  767. static int __init disable_mtrr_trim_setup(char *str)
  768. {
  769. disable_mtrr_trim = 1;
  770. return 0;
  771. }
  772. early_param("disable_mtrr_trim", disable_mtrr_trim_setup);
  773. /*
  774. * Newer AMD K8s and later CPUs have a special magic MSR way to force WB
  775. * for memory >4GB. Check for that here.
  776. * Note this won't check if the MTRRs < 4GB where the magic bit doesn't
  777. * apply to are wrong, but so far we don't know of any such case in the wild.
  778. */
  779. #define Tom2Enabled (1U << 21)
  780. #define Tom2ForceMemTypeWB (1U << 22)
  781. int __init amd_special_default_mtrr(void)
  782. {
  783. u32 l, h;
  784. if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
  785. return 0;
  786. if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11)
  787. return 0;
  788. /* In case some hypervisor doesn't pass SYSCFG through: */
  789. if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0)
  790. return 0;
  791. /*
  792. * Memory between 4GB and top of mem is forced WB by this magic bit.
  793. * Reserved before K8RevF, but should be zero there.
  794. */
  795. if ((l & (Tom2Enabled | Tom2ForceMemTypeWB)) ==
  796. (Tom2Enabled | Tom2ForceMemTypeWB))
  797. return 1;
  798. return 0;
  799. }
  800. static u64 __init
  801. real_trim_memory(unsigned long start_pfn, unsigned long limit_pfn)
  802. {
  803. u64 trim_start, trim_size;
  804. trim_start = start_pfn;
  805. trim_start <<= PAGE_SHIFT;
  806. trim_size = limit_pfn;
  807. trim_size <<= PAGE_SHIFT;
  808. trim_size -= trim_start;
  809. return e820_update_range(trim_start, trim_size, E820_RAM, E820_RESERVED);
  810. }
  811. /**
  812. * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
  813. * @end_pfn: ending page frame number
  814. *
  815. * Some buggy BIOSes don't setup the MTRRs properly for systems with certain
  816. * memory configurations. This routine checks that the highest MTRR matches
  817. * the end of memory, to make sure the MTRRs having a write back type cover
  818. * all of the memory the kernel is intending to use. If not, it'll trim any
  819. * memory off the end by adjusting end_pfn, removing it from the kernel's
  820. * allocation pools, warning the user with an obnoxious message.
  821. */
  822. int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
  823. {
  824. unsigned long i, base, size, highest_pfn = 0, def, dummy;
  825. mtrr_type type;
  826. u64 total_trim_size;
  827. /* extra one for all 0 */
  828. int num[MTRR_NUM_TYPES + 1];
  829. /*
  830. * Make sure we only trim uncachable memory on machines that
  831. * support the Intel MTRR architecture:
  832. */
  833. if (!is_cpu(INTEL) || disable_mtrr_trim)
  834. return 0;
  835. rdmsr(MSR_MTRRdefType, def, dummy);
  836. def &= 0xff;
  837. if (def != MTRR_TYPE_UNCACHABLE)
  838. return 0;
  839. /* Get it and store it aside: */
  840. memset(range_state, 0, sizeof(range_state));
  841. for (i = 0; i < num_var_ranges; i++) {
  842. mtrr_if->get(i, &base, &size, &type);
  843. range_state[i].base_pfn = base;
  844. range_state[i].size_pfn = size;
  845. range_state[i].type = type;
  846. }
  847. /* Find highest cached pfn: */
  848. for (i = 0; i < num_var_ranges; i++) {
  849. type = range_state[i].type;
  850. if (type != MTRR_TYPE_WRBACK)
  851. continue;
  852. base = range_state[i].base_pfn;
  853. size = range_state[i].size_pfn;
  854. if (highest_pfn < base + size)
  855. highest_pfn = base + size;
  856. }
  857. /* kvm/qemu doesn't have mtrr set right, don't trim them all: */
  858. if (!highest_pfn) {
  859. printk(KERN_INFO "CPU MTRRs all blank - virtualized system.\n");
  860. return 0;
  861. }
  862. /* Check entries number: */
  863. memset(num, 0, sizeof(num));
  864. for (i = 0; i < num_var_ranges; i++) {
  865. type = range_state[i].type;
  866. if (type >= MTRR_NUM_TYPES)
  867. continue;
  868. size = range_state[i].size_pfn;
  869. if (!size)
  870. type = MTRR_NUM_TYPES;
  871. num[type]++;
  872. }
  873. /* No entry for WB? */
  874. if (!num[MTRR_TYPE_WRBACK])
  875. return 0;
  876. /* Check if we only had WB and UC: */
  877. if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
  878. num_var_ranges - num[MTRR_NUM_TYPES])
  879. return 0;
  880. memset(range, 0, sizeof(range));
  881. nr_range = 0;
  882. if (mtrr_tom2) {
  883. range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT));
  884. range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1;
  885. if (highest_pfn < range[nr_range].end + 1)
  886. highest_pfn = range[nr_range].end + 1;
  887. nr_range++;
  888. }
  889. nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0);
  890. /* Check the head: */
  891. total_trim_size = 0;
  892. if (range[0].start)
  893. total_trim_size += real_trim_memory(0, range[0].start);
  894. /* Check the holes: */
  895. for (i = 0; i < nr_range - 1; i++) {
  896. if (range[i].end + 1 < range[i+1].start)
  897. total_trim_size += real_trim_memory(range[i].end + 1,
  898. range[i+1].start);
  899. }
  900. /* Check the top: */
  901. i = nr_range - 1;
  902. if (range[i].end + 1 < end_pfn)
  903. total_trim_size += real_trim_memory(range[i].end + 1,
  904. end_pfn);
  905. if (total_trim_size) {
  906. pr_warning("WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing %lluMB of RAM.\n", total_trim_size >> 20);
  907. if (!changed_by_mtrr_cleanup)
  908. WARN_ON(1);
  909. pr_info("update e820 for mtrr\n");
  910. update_e820();
  911. return 1;
  912. }
  913. return 0;
  914. }