coredump.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693
  1. #include <linux/slab.h>
  2. #include <linux/file.h>
  3. #include <linux/fdtable.h>
  4. #include <linux/mm.h>
  5. #include <linux/stat.h>
  6. #include <linux/fcntl.h>
  7. #include <linux/swap.h>
  8. #include <linux/string.h>
  9. #include <linux/init.h>
  10. #include <linux/pagemap.h>
  11. #include <linux/perf_event.h>
  12. #include <linux/highmem.h>
  13. #include <linux/spinlock.h>
  14. #include <linux/key.h>
  15. #include <linux/personality.h>
  16. #include <linux/binfmts.h>
  17. #include <linux/coredump.h>
  18. #include <linux/utsname.h>
  19. #include <linux/pid_namespace.h>
  20. #include <linux/module.h>
  21. #include <linux/namei.h>
  22. #include <linux/mount.h>
  23. #include <linux/security.h>
  24. #include <linux/syscalls.h>
  25. #include <linux/tsacct_kern.h>
  26. #include <linux/cn_proc.h>
  27. #include <linux/audit.h>
  28. #include <linux/tracehook.h>
  29. #include <linux/kmod.h>
  30. #include <linux/fsnotify.h>
  31. #include <linux/fs_struct.h>
  32. #include <linux/pipe_fs_i.h>
  33. #include <linux/oom.h>
  34. #include <linux/compat.h>
  35. #include <asm/uaccess.h>
  36. #include <asm/mmu_context.h>
  37. #include <asm/tlb.h>
  38. #include <asm/exec.h>
  39. #include <trace/events/task.h>
  40. #include "internal.h"
  41. #include "coredump.h"
  42. #include <trace/events/sched.h>
  43. int core_uses_pid;
  44. char core_pattern[CORENAME_MAX_SIZE] = "core";
  45. unsigned int core_pipe_limit;
  46. struct core_name {
  47. char *corename;
  48. int used, size;
  49. };
  50. static atomic_t call_count = ATOMIC_INIT(1);
  51. /* The maximal length of core_pattern is also specified in sysctl.c */
  52. static int expand_corename(struct core_name *cn)
  53. {
  54. char *old_corename = cn->corename;
  55. cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count);
  56. cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL);
  57. if (!cn->corename) {
  58. kfree(old_corename);
  59. return -ENOMEM;
  60. }
  61. return 0;
  62. }
  63. static int cn_printf(struct core_name *cn, const char *fmt, ...)
  64. {
  65. char *cur;
  66. int need;
  67. int ret;
  68. va_list arg;
  69. va_start(arg, fmt);
  70. need = vsnprintf(NULL, 0, fmt, arg);
  71. va_end(arg);
  72. if (likely(need < cn->size - cn->used - 1))
  73. goto out_printf;
  74. ret = expand_corename(cn);
  75. if (ret)
  76. goto expand_fail;
  77. out_printf:
  78. cur = cn->corename + cn->used;
  79. va_start(arg, fmt);
  80. vsnprintf(cur, need + 1, fmt, arg);
  81. va_end(arg);
  82. cn->used += need;
  83. return 0;
  84. expand_fail:
  85. return ret;
  86. }
  87. static void cn_escape(char *str)
  88. {
  89. for (; *str; str++)
  90. if (*str == '/')
  91. *str = '!';
  92. }
  93. static int cn_print_exe_file(struct core_name *cn)
  94. {
  95. struct file *exe_file;
  96. char *pathbuf, *path;
  97. int ret;
  98. exe_file = get_mm_exe_file(current->mm);
  99. if (!exe_file) {
  100. char *commstart = cn->corename + cn->used;
  101. ret = cn_printf(cn, "%s (path unknown)", current->comm);
  102. cn_escape(commstart);
  103. return ret;
  104. }
  105. pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY);
  106. if (!pathbuf) {
  107. ret = -ENOMEM;
  108. goto put_exe_file;
  109. }
  110. path = d_path(&exe_file->f_path, pathbuf, PATH_MAX);
  111. if (IS_ERR(path)) {
  112. ret = PTR_ERR(path);
  113. goto free_buf;
  114. }
  115. cn_escape(path);
  116. ret = cn_printf(cn, "%s", path);
  117. free_buf:
  118. kfree(pathbuf);
  119. put_exe_file:
  120. fput(exe_file);
  121. return ret;
  122. }
  123. /* format_corename will inspect the pattern parameter, and output a
  124. * name into corename, which must have space for at least
  125. * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
  126. */
  127. static int format_corename(struct core_name *cn, struct coredump_params *cprm)
  128. {
  129. const struct cred *cred = current_cred();
  130. const char *pat_ptr = core_pattern;
  131. int ispipe = (*pat_ptr == '|');
  132. int pid_in_pattern = 0;
  133. int err = 0;
  134. cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count);
  135. cn->corename = kmalloc(cn->size, GFP_KERNEL);
  136. cn->used = 0;
  137. if (!cn->corename)
  138. return -ENOMEM;
  139. /* Repeat as long as we have more pattern to process and more output
  140. space */
  141. while (*pat_ptr) {
  142. if (*pat_ptr != '%') {
  143. if (*pat_ptr == 0)
  144. goto out;
  145. err = cn_printf(cn, "%c", *pat_ptr++);
  146. } else {
  147. switch (*++pat_ptr) {
  148. /* single % at the end, drop that */
  149. case 0:
  150. goto out;
  151. /* Double percent, output one percent */
  152. case '%':
  153. err = cn_printf(cn, "%c", '%');
  154. break;
  155. /* pid */
  156. case 'p':
  157. pid_in_pattern = 1;
  158. err = cn_printf(cn, "%d",
  159. task_tgid_vnr(current));
  160. break;
  161. /* uid */
  162. case 'u':
  163. err = cn_printf(cn, "%d", cred->uid);
  164. break;
  165. /* gid */
  166. case 'g':
  167. err = cn_printf(cn, "%d", cred->gid);
  168. break;
  169. case 'd':
  170. err = cn_printf(cn, "%d",
  171. __get_dumpable(cprm->mm_flags));
  172. break;
  173. /* signal that caused the coredump */
  174. case 's':
  175. err = cn_printf(cn, "%ld", cprm->siginfo->si_signo);
  176. break;
  177. /* UNIX time of coredump */
  178. case 't': {
  179. struct timeval tv;
  180. do_gettimeofday(&tv);
  181. err = cn_printf(cn, "%lu", tv.tv_sec);
  182. break;
  183. }
  184. /* hostname */
  185. case 'h': {
  186. char *namestart = cn->corename + cn->used;
  187. down_read(&uts_sem);
  188. err = cn_printf(cn, "%s",
  189. utsname()->nodename);
  190. up_read(&uts_sem);
  191. cn_escape(namestart);
  192. break;
  193. }
  194. /* executable */
  195. case 'e': {
  196. char *commstart = cn->corename + cn->used;
  197. err = cn_printf(cn, "%s", current->comm);
  198. cn_escape(commstart);
  199. break;
  200. }
  201. case 'E':
  202. err = cn_print_exe_file(cn);
  203. break;
  204. /* core limit size */
  205. case 'c':
  206. err = cn_printf(cn, "%lu",
  207. rlimit(RLIMIT_CORE));
  208. break;
  209. default:
  210. break;
  211. }
  212. ++pat_ptr;
  213. }
  214. if (err)
  215. return err;
  216. }
  217. /* Backward compatibility with core_uses_pid:
  218. *
  219. * If core_pattern does not include a %p (as is the default)
  220. * and core_uses_pid is set, then .%pid will be appended to
  221. * the filename. Do not do this for piped commands. */
  222. if (!ispipe && !pid_in_pattern && core_uses_pid) {
  223. err = cn_printf(cn, ".%d", task_tgid_vnr(current));
  224. if (err)
  225. return err;
  226. }
  227. out:
  228. return ispipe;
  229. }
  230. static int zap_process(struct task_struct *start, int exit_code)
  231. {
  232. struct task_struct *t;
  233. int nr = 0;
  234. start->signal->flags = SIGNAL_GROUP_EXIT;
  235. start->signal->group_exit_code = exit_code;
  236. start->signal->group_stop_count = 0;
  237. t = start;
  238. do {
  239. task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
  240. if (t != current && t->mm) {
  241. sigaddset(&t->pending.signal, SIGKILL);
  242. signal_wake_up(t, 1);
  243. nr++;
  244. }
  245. } while_each_thread(start, t);
  246. return nr;
  247. }
  248. static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
  249. struct core_state *core_state, int exit_code)
  250. {
  251. struct task_struct *g, *p;
  252. unsigned long flags;
  253. int nr = -EAGAIN;
  254. spin_lock_irq(&tsk->sighand->siglock);
  255. if (!signal_group_exit(tsk->signal)) {
  256. mm->core_state = core_state;
  257. nr = zap_process(tsk, exit_code);
  258. }
  259. spin_unlock_irq(&tsk->sighand->siglock);
  260. if (unlikely(nr < 0))
  261. return nr;
  262. if (atomic_read(&mm->mm_users) == nr + 1)
  263. goto done;
  264. /*
  265. * We should find and kill all tasks which use this mm, and we should
  266. * count them correctly into ->nr_threads. We don't take tasklist
  267. * lock, but this is safe wrt:
  268. *
  269. * fork:
  270. * None of sub-threads can fork after zap_process(leader). All
  271. * processes which were created before this point should be
  272. * visible to zap_threads() because copy_process() adds the new
  273. * process to the tail of init_task.tasks list, and lock/unlock
  274. * of ->siglock provides a memory barrier.
  275. *
  276. * do_exit:
  277. * The caller holds mm->mmap_sem. This means that the task which
  278. * uses this mm can't pass exit_mm(), so it can't exit or clear
  279. * its ->mm.
  280. *
  281. * de_thread:
  282. * It does list_replace_rcu(&leader->tasks, &current->tasks),
  283. * we must see either old or new leader, this does not matter.
  284. * However, it can change p->sighand, so lock_task_sighand(p)
  285. * must be used. Since p->mm != NULL and we hold ->mmap_sem
  286. * it can't fail.
  287. *
  288. * Note also that "g" can be the old leader with ->mm == NULL
  289. * and already unhashed and thus removed from ->thread_group.
  290. * This is OK, __unhash_process()->list_del_rcu() does not
  291. * clear the ->next pointer, we will find the new leader via
  292. * next_thread().
  293. */
  294. rcu_read_lock();
  295. for_each_process(g) {
  296. if (g == tsk->group_leader)
  297. continue;
  298. if (g->flags & PF_KTHREAD)
  299. continue;
  300. p = g;
  301. do {
  302. if (p->mm) {
  303. if (unlikely(p->mm == mm)) {
  304. lock_task_sighand(p, &flags);
  305. nr += zap_process(p, exit_code);
  306. unlock_task_sighand(p, &flags);
  307. }
  308. break;
  309. }
  310. } while_each_thread(g, p);
  311. }
  312. rcu_read_unlock();
  313. done:
  314. atomic_set(&core_state->nr_threads, nr);
  315. return nr;
  316. }
  317. static int coredump_wait(int exit_code, struct core_state *core_state)
  318. {
  319. struct task_struct *tsk = current;
  320. struct mm_struct *mm = tsk->mm;
  321. int core_waiters = -EBUSY;
  322. init_completion(&core_state->startup);
  323. core_state->dumper.task = tsk;
  324. core_state->dumper.next = NULL;
  325. down_write(&mm->mmap_sem);
  326. if (!mm->core_state)
  327. core_waiters = zap_threads(tsk, mm, core_state, exit_code);
  328. up_write(&mm->mmap_sem);
  329. if (core_waiters > 0) {
  330. struct core_thread *ptr;
  331. wait_for_completion(&core_state->startup);
  332. /*
  333. * Wait for all the threads to become inactive, so that
  334. * all the thread context (extended register state, like
  335. * fpu etc) gets copied to the memory.
  336. */
  337. ptr = core_state->dumper.next;
  338. while (ptr != NULL) {
  339. wait_task_inactive(ptr->task, 0);
  340. ptr = ptr->next;
  341. }
  342. }
  343. return core_waiters;
  344. }
  345. static void coredump_finish(struct mm_struct *mm)
  346. {
  347. struct core_thread *curr, *next;
  348. struct task_struct *task;
  349. next = mm->core_state->dumper.next;
  350. while ((curr = next) != NULL) {
  351. next = curr->next;
  352. task = curr->task;
  353. /*
  354. * see exit_mm(), curr->task must not see
  355. * ->task == NULL before we read ->next.
  356. */
  357. smp_mb();
  358. curr->task = NULL;
  359. wake_up_process(task);
  360. }
  361. mm->core_state = NULL;
  362. }
  363. static void wait_for_dump_helpers(struct file *file)
  364. {
  365. struct pipe_inode_info *pipe;
  366. pipe = file_inode(file)->i_pipe;
  367. pipe_lock(pipe);
  368. pipe->readers++;
  369. pipe->writers--;
  370. while ((pipe->readers > 1) && (!signal_pending(current))) {
  371. wake_up_interruptible_sync(&pipe->wait);
  372. kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
  373. pipe_wait(pipe);
  374. }
  375. pipe->readers--;
  376. pipe->writers++;
  377. pipe_unlock(pipe);
  378. }
  379. /*
  380. * umh_pipe_setup
  381. * helper function to customize the process used
  382. * to collect the core in userspace. Specifically
  383. * it sets up a pipe and installs it as fd 0 (stdin)
  384. * for the process. Returns 0 on success, or
  385. * PTR_ERR on failure.
  386. * Note that it also sets the core limit to 1. This
  387. * is a special value that we use to trap recursive
  388. * core dumps
  389. */
  390. static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
  391. {
  392. struct file *files[2];
  393. struct coredump_params *cp = (struct coredump_params *)info->data;
  394. int err = create_pipe_files(files, 0);
  395. if (err)
  396. return err;
  397. cp->file = files[1];
  398. err = replace_fd(0, files[0], 0);
  399. fput(files[0]);
  400. /* and disallow core files too */
  401. current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
  402. return err;
  403. }
  404. void do_coredump(siginfo_t *siginfo)
  405. {
  406. struct core_state core_state;
  407. struct core_name cn;
  408. struct mm_struct *mm = current->mm;
  409. struct linux_binfmt * binfmt;
  410. const struct cred *old_cred;
  411. struct cred *cred;
  412. int retval = 0;
  413. int flag = 0;
  414. int ispipe;
  415. struct files_struct *displaced;
  416. bool need_nonrelative = false;
  417. static atomic_t core_dump_count = ATOMIC_INIT(0);
  418. struct coredump_params cprm = {
  419. .siginfo = siginfo,
  420. .regs = signal_pt_regs(),
  421. .limit = rlimit(RLIMIT_CORE),
  422. /*
  423. * We must use the same mm->flags while dumping core to avoid
  424. * inconsistency of bit flags, since this flag is not protected
  425. * by any locks.
  426. */
  427. .mm_flags = mm->flags,
  428. };
  429. audit_core_dumps(siginfo->si_signo);
  430. binfmt = mm->binfmt;
  431. if (!binfmt || !binfmt->core_dump)
  432. goto fail;
  433. if (!__get_dumpable(cprm.mm_flags))
  434. goto fail;
  435. cred = prepare_creds();
  436. if (!cred)
  437. goto fail;
  438. /*
  439. * We cannot trust fsuid as being the "true" uid of the process
  440. * nor do we know its entire history. We only know it was tainted
  441. * so we dump it as root in mode 2, and only into a controlled
  442. * environment (pipe handler or fully qualified path).
  443. */
  444. if (__get_dumpable(cprm.mm_flags) == SUID_DUMP_ROOT) {
  445. /* Setuid core dump mode */
  446. flag = O_EXCL; /* Stop rewrite attacks */
  447. cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */
  448. need_nonrelative = true;
  449. }
  450. retval = coredump_wait(siginfo->si_signo, &core_state);
  451. if (retval < 0)
  452. goto fail_creds;
  453. old_cred = override_creds(cred);
  454. /*
  455. * Clear any false indication of pending signals that might
  456. * be seen by the filesystem code called to write the core file.
  457. */
  458. clear_thread_flag(TIF_SIGPENDING);
  459. ispipe = format_corename(&cn, &cprm);
  460. if (ispipe) {
  461. int dump_count;
  462. char **helper_argv;
  463. if (ispipe < 0) {
  464. printk(KERN_WARNING "format_corename failed\n");
  465. printk(KERN_WARNING "Aborting core\n");
  466. goto fail_corename;
  467. }
  468. if (cprm.limit == 1) {
  469. /* See umh_pipe_setup() which sets RLIMIT_CORE = 1.
  470. *
  471. * Normally core limits are irrelevant to pipes, since
  472. * we're not writing to the file system, but we use
  473. * cprm.limit of 1 here as a speacial value, this is a
  474. * consistent way to catch recursive crashes.
  475. * We can still crash if the core_pattern binary sets
  476. * RLIM_CORE = !1, but it runs as root, and can do
  477. * lots of stupid things.
  478. *
  479. * Note that we use task_tgid_vnr here to grab the pid
  480. * of the process group leader. That way we get the
  481. * right pid if a thread in a multi-threaded
  482. * core_pattern process dies.
  483. */
  484. printk(KERN_WARNING
  485. "Process %d(%s) has RLIMIT_CORE set to 1\n",
  486. task_tgid_vnr(current), current->comm);
  487. printk(KERN_WARNING "Aborting core\n");
  488. goto fail_unlock;
  489. }
  490. cprm.limit = RLIM_INFINITY;
  491. dump_count = atomic_inc_return(&core_dump_count);
  492. if (core_pipe_limit && (core_pipe_limit < dump_count)) {
  493. printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n",
  494. task_tgid_vnr(current), current->comm);
  495. printk(KERN_WARNING "Skipping core dump\n");
  496. goto fail_dropcount;
  497. }
  498. helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL);
  499. if (!helper_argv) {
  500. printk(KERN_WARNING "%s failed to allocate memory\n",
  501. __func__);
  502. goto fail_dropcount;
  503. }
  504. retval = call_usermodehelper_fns(helper_argv[0], helper_argv,
  505. NULL, UMH_WAIT_EXEC, umh_pipe_setup,
  506. NULL, &cprm);
  507. argv_free(helper_argv);
  508. if (retval) {
  509. printk(KERN_INFO "Core dump to %s pipe failed\n",
  510. cn.corename);
  511. goto close_fail;
  512. }
  513. } else {
  514. struct inode *inode;
  515. if (cprm.limit < binfmt->min_coredump)
  516. goto fail_unlock;
  517. if (need_nonrelative && cn.corename[0] != '/') {
  518. printk(KERN_WARNING "Pid %d(%s) can only dump core "\
  519. "to fully qualified path!\n",
  520. task_tgid_vnr(current), current->comm);
  521. printk(KERN_WARNING "Skipping core dump\n");
  522. goto fail_unlock;
  523. }
  524. cprm.file = filp_open(cn.corename,
  525. O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
  526. 0600);
  527. if (IS_ERR(cprm.file))
  528. goto fail_unlock;
  529. inode = file_inode(cprm.file);
  530. if (inode->i_nlink > 1)
  531. goto close_fail;
  532. if (d_unhashed(cprm.file->f_path.dentry))
  533. goto close_fail;
  534. /*
  535. * AK: actually i see no reason to not allow this for named
  536. * pipes etc, but keep the previous behaviour for now.
  537. */
  538. if (!S_ISREG(inode->i_mode))
  539. goto close_fail;
  540. /*
  541. * Dont allow local users get cute and trick others to coredump
  542. * into their pre-created files.
  543. */
  544. if (!uid_eq(inode->i_uid, current_fsuid()))
  545. goto close_fail;
  546. if (!cprm.file->f_op || !cprm.file->f_op->write)
  547. goto close_fail;
  548. if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
  549. goto close_fail;
  550. }
  551. /* get us an unshared descriptor table; almost always a no-op */
  552. retval = unshare_files(&displaced);
  553. if (retval)
  554. goto close_fail;
  555. if (displaced)
  556. put_files_struct(displaced);
  557. retval = binfmt->core_dump(&cprm);
  558. if (retval)
  559. current->signal->group_exit_code |= 0x80;
  560. if (ispipe && core_pipe_limit)
  561. wait_for_dump_helpers(cprm.file);
  562. close_fail:
  563. if (cprm.file)
  564. filp_close(cprm.file, NULL);
  565. fail_dropcount:
  566. if (ispipe)
  567. atomic_dec(&core_dump_count);
  568. fail_unlock:
  569. kfree(cn.corename);
  570. fail_corename:
  571. coredump_finish(mm);
  572. revert_creds(old_cred);
  573. fail_creds:
  574. put_cred(cred);
  575. fail:
  576. return;
  577. }
  578. /*
  579. * Core dumping helper functions. These are the only things you should
  580. * do on a core-file: use only these functions to write out all the
  581. * necessary info.
  582. */
  583. int dump_write(struct file *file, const void *addr, int nr)
  584. {
  585. return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr;
  586. }
  587. EXPORT_SYMBOL(dump_write);
  588. int dump_seek(struct file *file, loff_t off)
  589. {
  590. int ret = 1;
  591. if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
  592. if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
  593. return 0;
  594. } else {
  595. char *buf = (char *)get_zeroed_page(GFP_KERNEL);
  596. if (!buf)
  597. return 0;
  598. while (off > 0) {
  599. unsigned long n = off;
  600. if (n > PAGE_SIZE)
  601. n = PAGE_SIZE;
  602. if (!dump_write(file, buf, n)) {
  603. ret = 0;
  604. break;
  605. }
  606. off -= n;
  607. }
  608. free_page((unsigned long)buf);
  609. }
  610. return ret;
  611. }
  612. EXPORT_SYMBOL(dump_seek);