sys.c 44 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915
  1. /*
  2. * linux/kernel/sys.c
  3. *
  4. * Copyright (C) 1991, 1992 Linus Torvalds
  5. */
  6. #include <linux/module.h>
  7. #include <linux/mm.h>
  8. #include <linux/utsname.h>
  9. #include <linux/mman.h>
  10. #include <linux/reboot.h>
  11. #include <linux/prctl.h>
  12. #include <linux/highuid.h>
  13. #include <linux/fs.h>
  14. #include <linux/perf_event.h>
  15. #include <linux/resource.h>
  16. #include <linux/kernel.h>
  17. #include <linux/kexec.h>
  18. #include <linux/workqueue.h>
  19. #include <linux/capability.h>
  20. #include <linux/device.h>
  21. #include <linux/key.h>
  22. #include <linux/times.h>
  23. #include <linux/posix-timers.h>
  24. #include <linux/security.h>
  25. #include <linux/dcookies.h>
  26. #include <linux/suspend.h>
  27. #include <linux/tty.h>
  28. #include <linux/signal.h>
  29. #include <linux/cn_proc.h>
  30. #include <linux/getcpu.h>
  31. #include <linux/task_io_accounting_ops.h>
  32. #include <linux/seccomp.h>
  33. #include <linux/cpu.h>
  34. #include <linux/personality.h>
  35. #include <linux/ptrace.h>
  36. #include <linux/fs_struct.h>
  37. #include <linux/gfp.h>
  38. #include <linux/syscore_ops.h>
  39. #include <linux/version.h>
  40. #include <linux/ctype.h>
  41. #include <linux/compat.h>
  42. #include <linux/syscalls.h>
  43. #include <linux/kprobes.h>
  44. #include <linux/user_namespace.h>
  45. #include <linux/kmsg_dump.h>
  46. /* Move somewhere else to avoid recompiling? */
  47. #include <generated/utsrelease.h>
  48. #include <asm/uaccess.h>
  49. #include <asm/io.h>
  50. #include <asm/unistd.h>
  51. #ifndef SET_UNALIGN_CTL
  52. # define SET_UNALIGN_CTL(a,b) (-EINVAL)
  53. #endif
  54. #ifndef GET_UNALIGN_CTL
  55. # define GET_UNALIGN_CTL(a,b) (-EINVAL)
  56. #endif
  57. #ifndef SET_FPEMU_CTL
  58. # define SET_FPEMU_CTL(a,b) (-EINVAL)
  59. #endif
  60. #ifndef GET_FPEMU_CTL
  61. # define GET_FPEMU_CTL(a,b) (-EINVAL)
  62. #endif
  63. #ifndef SET_FPEXC_CTL
  64. # define SET_FPEXC_CTL(a,b) (-EINVAL)
  65. #endif
  66. #ifndef GET_FPEXC_CTL
  67. # define GET_FPEXC_CTL(a,b) (-EINVAL)
  68. #endif
  69. #ifndef GET_ENDIAN
  70. # define GET_ENDIAN(a,b) (-EINVAL)
  71. #endif
  72. #ifndef SET_ENDIAN
  73. # define SET_ENDIAN(a,b) (-EINVAL)
  74. #endif
  75. #ifndef GET_TSC_CTL
  76. # define GET_TSC_CTL(a) (-EINVAL)
  77. #endif
  78. #ifndef SET_TSC_CTL
  79. # define SET_TSC_CTL(a) (-EINVAL)
  80. #endif
  81. /*
  82. * this is where the system-wide overflow UID and GID are defined, for
  83. * architectures that now have 32-bit UID/GID but didn't in the past
  84. */
  85. int overflowuid = DEFAULT_OVERFLOWUID;
  86. int overflowgid = DEFAULT_OVERFLOWGID;
  87. #ifdef CONFIG_UID16
  88. EXPORT_SYMBOL(overflowuid);
  89. EXPORT_SYMBOL(overflowgid);
  90. #endif
  91. /*
  92. * the same as above, but for filesystems which can only store a 16-bit
  93. * UID and GID. as such, this is needed on all architectures
  94. */
  95. int fs_overflowuid = DEFAULT_FS_OVERFLOWUID;
  96. int fs_overflowgid = DEFAULT_FS_OVERFLOWUID;
  97. EXPORT_SYMBOL(fs_overflowuid);
  98. EXPORT_SYMBOL(fs_overflowgid);
  99. /*
  100. * this indicates whether you can reboot with ctrl-alt-del: the default is yes
  101. */
  102. int C_A_D = 1;
  103. struct pid *cad_pid;
  104. EXPORT_SYMBOL(cad_pid);
  105. /*
  106. * If set, this is used for preparing the system to power off.
  107. */
  108. void (*pm_power_off_prepare)(void);
  109. /*
  110. * Returns true if current's euid is same as p's uid or euid,
  111. * or has CAP_SYS_NICE to p's user_ns.
  112. *
  113. * Called with rcu_read_lock, creds are safe
  114. */
  115. static bool set_one_prio_perm(struct task_struct *p)
  116. {
  117. const struct cred *cred = current_cred(), *pcred = __task_cred(p);
  118. if (pcred->user->user_ns == cred->user->user_ns &&
  119. (pcred->uid == cred->euid ||
  120. pcred->euid == cred->euid))
  121. return true;
  122. if (ns_capable(pcred->user->user_ns, CAP_SYS_NICE))
  123. return true;
  124. return false;
  125. }
  126. /*
  127. * set the priority of a task
  128. * - the caller must hold the RCU read lock
  129. */
  130. static int set_one_prio(struct task_struct *p, int niceval, int error)
  131. {
  132. int no_nice;
  133. if (!set_one_prio_perm(p)) {
  134. error = -EPERM;
  135. goto out;
  136. }
  137. if (niceval < task_nice(p) && !can_nice(p, niceval)) {
  138. error = -EACCES;
  139. goto out;
  140. }
  141. no_nice = security_task_setnice(p, niceval);
  142. if (no_nice) {
  143. error = no_nice;
  144. goto out;
  145. }
  146. if (error == -ESRCH)
  147. error = 0;
  148. set_user_nice(p, niceval);
  149. out:
  150. return error;
  151. }
  152. SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval)
  153. {
  154. struct task_struct *g, *p;
  155. struct user_struct *user;
  156. const struct cred *cred = current_cred();
  157. int error = -EINVAL;
  158. struct pid *pgrp;
  159. if (which > PRIO_USER || which < PRIO_PROCESS)
  160. goto out;
  161. /* normalize: avoid signed division (rounding problems) */
  162. error = -ESRCH;
  163. if (niceval < -20)
  164. niceval = -20;
  165. if (niceval > 19)
  166. niceval = 19;
  167. rcu_read_lock();
  168. read_lock(&tasklist_lock);
  169. switch (which) {
  170. case PRIO_PROCESS:
  171. if (who)
  172. p = find_task_by_vpid(who);
  173. else
  174. p = current;
  175. if (p)
  176. error = set_one_prio(p, niceval, error);
  177. break;
  178. case PRIO_PGRP:
  179. if (who)
  180. pgrp = find_vpid(who);
  181. else
  182. pgrp = task_pgrp(current);
  183. do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
  184. error = set_one_prio(p, niceval, error);
  185. } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
  186. break;
  187. case PRIO_USER:
  188. user = (struct user_struct *) cred->user;
  189. if (!who)
  190. who = cred->uid;
  191. else if ((who != cred->uid) &&
  192. !(user = find_user(who)))
  193. goto out_unlock; /* No processes for this user */
  194. do_each_thread(g, p) {
  195. if (__task_cred(p)->uid == who)
  196. error = set_one_prio(p, niceval, error);
  197. } while_each_thread(g, p);
  198. if (who != cred->uid)
  199. free_uid(user); /* For find_user() */
  200. break;
  201. }
  202. out_unlock:
  203. read_unlock(&tasklist_lock);
  204. rcu_read_unlock();
  205. out:
  206. return error;
  207. }
  208. /*
  209. * Ugh. To avoid negative return values, "getpriority()" will
  210. * not return the normal nice-value, but a negated value that
  211. * has been offset by 20 (ie it returns 40..1 instead of -20..19)
  212. * to stay compatible.
  213. */
  214. SYSCALL_DEFINE2(getpriority, int, which, int, who)
  215. {
  216. struct task_struct *g, *p;
  217. struct user_struct *user;
  218. const struct cred *cred = current_cred();
  219. long niceval, retval = -ESRCH;
  220. struct pid *pgrp;
  221. if (which > PRIO_USER || which < PRIO_PROCESS)
  222. return -EINVAL;
  223. rcu_read_lock();
  224. read_lock(&tasklist_lock);
  225. switch (which) {
  226. case PRIO_PROCESS:
  227. if (who)
  228. p = find_task_by_vpid(who);
  229. else
  230. p = current;
  231. if (p) {
  232. niceval = 20 - task_nice(p);
  233. if (niceval > retval)
  234. retval = niceval;
  235. }
  236. break;
  237. case PRIO_PGRP:
  238. if (who)
  239. pgrp = find_vpid(who);
  240. else
  241. pgrp = task_pgrp(current);
  242. do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
  243. niceval = 20 - task_nice(p);
  244. if (niceval > retval)
  245. retval = niceval;
  246. } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
  247. break;
  248. case PRIO_USER:
  249. user = (struct user_struct *) cred->user;
  250. if (!who)
  251. who = cred->uid;
  252. else if ((who != cred->uid) &&
  253. !(user = find_user(who)))
  254. goto out_unlock; /* No processes for this user */
  255. do_each_thread(g, p) {
  256. if (__task_cred(p)->uid == who) {
  257. niceval = 20 - task_nice(p);
  258. if (niceval > retval)
  259. retval = niceval;
  260. }
  261. } while_each_thread(g, p);
  262. if (who != cred->uid)
  263. free_uid(user); /* for find_user() */
  264. break;
  265. }
  266. out_unlock:
  267. read_unlock(&tasklist_lock);
  268. rcu_read_unlock();
  269. return retval;
  270. }
  271. /**
  272. * emergency_restart - reboot the system
  273. *
  274. * Without shutting down any hardware or taking any locks
  275. * reboot the system. This is called when we know we are in
  276. * trouble so this is our best effort to reboot. This is
  277. * safe to call in interrupt context.
  278. */
  279. void emergency_restart(void)
  280. {
  281. kmsg_dump(KMSG_DUMP_EMERG);
  282. machine_emergency_restart();
  283. }
  284. EXPORT_SYMBOL_GPL(emergency_restart);
  285. void kernel_restart_prepare(char *cmd)
  286. {
  287. blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd);
  288. system_state = SYSTEM_RESTART;
  289. usermodehelper_disable();
  290. device_shutdown();
  291. syscore_shutdown();
  292. }
  293. /**
  294. * register_reboot_notifier - Register function to be called at reboot time
  295. * @nb: Info about notifier function to be called
  296. *
  297. * Registers a function with the list of functions
  298. * to be called at reboot time.
  299. *
  300. * Currently always returns zero, as blocking_notifier_chain_register()
  301. * always returns zero.
  302. */
  303. int register_reboot_notifier(struct notifier_block *nb)
  304. {
  305. return blocking_notifier_chain_register(&reboot_notifier_list, nb);
  306. }
  307. EXPORT_SYMBOL(register_reboot_notifier);
  308. /**
  309. * unregister_reboot_notifier - Unregister previously registered reboot notifier
  310. * @nb: Hook to be unregistered
  311. *
  312. * Unregisters a previously registered reboot
  313. * notifier function.
  314. *
  315. * Returns zero on success, or %-ENOENT on failure.
  316. */
  317. int unregister_reboot_notifier(struct notifier_block *nb)
  318. {
  319. return blocking_notifier_chain_unregister(&reboot_notifier_list, nb);
  320. }
  321. EXPORT_SYMBOL(unregister_reboot_notifier);
  322. /**
  323. * kernel_restart - reboot the system
  324. * @cmd: pointer to buffer containing command to execute for restart
  325. * or %NULL
  326. *
  327. * Shutdown everything and perform a clean reboot.
  328. * This is not safe to call in interrupt context.
  329. */
  330. void kernel_restart(char *cmd)
  331. {
  332. kernel_restart_prepare(cmd);
  333. if (!cmd)
  334. printk(KERN_EMERG "Restarting system.\n");
  335. else
  336. printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd);
  337. kmsg_dump(KMSG_DUMP_RESTART);
  338. machine_restart(cmd);
  339. }
  340. EXPORT_SYMBOL_GPL(kernel_restart);
  341. static void kernel_shutdown_prepare(enum system_states state)
  342. {
  343. blocking_notifier_call_chain(&reboot_notifier_list,
  344. (state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL);
  345. system_state = state;
  346. usermodehelper_disable();
  347. device_shutdown();
  348. }
  349. /**
  350. * kernel_halt - halt the system
  351. *
  352. * Shutdown everything and perform a clean system halt.
  353. */
  354. void kernel_halt(void)
  355. {
  356. kernel_shutdown_prepare(SYSTEM_HALT);
  357. syscore_shutdown();
  358. printk(KERN_EMERG "System halted.\n");
  359. kmsg_dump(KMSG_DUMP_HALT);
  360. machine_halt();
  361. }
  362. EXPORT_SYMBOL_GPL(kernel_halt);
  363. /**
  364. * kernel_power_off - power_off the system
  365. *
  366. * Shutdown everything and perform a clean system power_off.
  367. */
  368. void kernel_power_off(void)
  369. {
  370. kernel_shutdown_prepare(SYSTEM_POWER_OFF);
  371. if (pm_power_off_prepare)
  372. pm_power_off_prepare();
  373. disable_nonboot_cpus();
  374. syscore_shutdown();
  375. printk(KERN_EMERG "Power down.\n");
  376. kmsg_dump(KMSG_DUMP_POWEROFF);
  377. machine_power_off();
  378. }
  379. EXPORT_SYMBOL_GPL(kernel_power_off);
  380. static DEFINE_MUTEX(reboot_mutex);
  381. /*
  382. * Reboot system call: for obvious reasons only root may call it,
  383. * and even root needs to set up some magic numbers in the registers
  384. * so that some mistake won't make this reboot the whole machine.
  385. * You can also set the meaning of the ctrl-alt-del-key here.
  386. *
  387. * reboot doesn't sync: do that yourself before calling this.
  388. */
  389. SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
  390. void __user *, arg)
  391. {
  392. char buffer[256];
  393. int ret = 0;
  394. /* We only trust the superuser with rebooting the system. */
  395. if (!capable(CAP_SYS_BOOT))
  396. return -EPERM;
  397. /* For safety, we require "magic" arguments. */
  398. if (magic1 != LINUX_REBOOT_MAGIC1 ||
  399. (magic2 != LINUX_REBOOT_MAGIC2 &&
  400. magic2 != LINUX_REBOOT_MAGIC2A &&
  401. magic2 != LINUX_REBOOT_MAGIC2B &&
  402. magic2 != LINUX_REBOOT_MAGIC2C))
  403. return -EINVAL;
  404. /* Instead of trying to make the power_off code look like
  405. * halt when pm_power_off is not set do it the easy way.
  406. */
  407. if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off)
  408. cmd = LINUX_REBOOT_CMD_HALT;
  409. mutex_lock(&reboot_mutex);
  410. switch (cmd) {
  411. case LINUX_REBOOT_CMD_RESTART:
  412. kernel_restart(NULL);
  413. break;
  414. case LINUX_REBOOT_CMD_CAD_ON:
  415. C_A_D = 1;
  416. break;
  417. case LINUX_REBOOT_CMD_CAD_OFF:
  418. C_A_D = 0;
  419. break;
  420. case LINUX_REBOOT_CMD_HALT:
  421. kernel_halt();
  422. do_exit(0);
  423. panic("cannot halt");
  424. case LINUX_REBOOT_CMD_POWER_OFF:
  425. kernel_power_off();
  426. do_exit(0);
  427. break;
  428. case LINUX_REBOOT_CMD_RESTART2:
  429. if (strncpy_from_user(&buffer[0], arg, sizeof(buffer) - 1) < 0) {
  430. ret = -EFAULT;
  431. break;
  432. }
  433. buffer[sizeof(buffer) - 1] = '\0';
  434. kernel_restart(buffer);
  435. break;
  436. #ifdef CONFIG_KEXEC
  437. case LINUX_REBOOT_CMD_KEXEC:
  438. ret = kernel_kexec();
  439. break;
  440. #endif
  441. #ifdef CONFIG_HIBERNATION
  442. case LINUX_REBOOT_CMD_SW_SUSPEND:
  443. ret = hibernate();
  444. break;
  445. #endif
  446. default:
  447. ret = -EINVAL;
  448. break;
  449. }
  450. mutex_unlock(&reboot_mutex);
  451. return ret;
  452. }
  453. static void deferred_cad(struct work_struct *dummy)
  454. {
  455. kernel_restart(NULL);
  456. }
  457. /*
  458. * This function gets called by ctrl-alt-del - ie the keyboard interrupt.
  459. * As it's called within an interrupt, it may NOT sync: the only choice
  460. * is whether to reboot at once, or just ignore the ctrl-alt-del.
  461. */
  462. void ctrl_alt_del(void)
  463. {
  464. static DECLARE_WORK(cad_work, deferred_cad);
  465. if (C_A_D)
  466. schedule_work(&cad_work);
  467. else
  468. kill_cad_pid(SIGINT, 1);
  469. }
  470. /*
  471. * Unprivileged users may change the real gid to the effective gid
  472. * or vice versa. (BSD-style)
  473. *
  474. * If you set the real gid at all, or set the effective gid to a value not
  475. * equal to the real gid, then the saved gid is set to the new effective gid.
  476. *
  477. * This makes it possible for a setgid program to completely drop its
  478. * privileges, which is often a useful assertion to make when you are doing
  479. * a security audit over a program.
  480. *
  481. * The general idea is that a program which uses just setregid() will be
  482. * 100% compatible with BSD. A program which uses just setgid() will be
  483. * 100% compatible with POSIX with saved IDs.
  484. *
  485. * SMP: There are not races, the GIDs are checked only by filesystem
  486. * operations (as far as semantic preservation is concerned).
  487. */
  488. SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid)
  489. {
  490. const struct cred *old;
  491. struct cred *new;
  492. int retval;
  493. new = prepare_creds();
  494. if (!new)
  495. return -ENOMEM;
  496. old = current_cred();
  497. retval = -EPERM;
  498. if (rgid != (gid_t) -1) {
  499. if (old->gid == rgid ||
  500. old->egid == rgid ||
  501. nsown_capable(CAP_SETGID))
  502. new->gid = rgid;
  503. else
  504. goto error;
  505. }
  506. if (egid != (gid_t) -1) {
  507. if (old->gid == egid ||
  508. old->egid == egid ||
  509. old->sgid == egid ||
  510. nsown_capable(CAP_SETGID))
  511. new->egid = egid;
  512. else
  513. goto error;
  514. }
  515. if (rgid != (gid_t) -1 ||
  516. (egid != (gid_t) -1 && egid != old->gid))
  517. new->sgid = new->egid;
  518. new->fsgid = new->egid;
  519. return commit_creds(new);
  520. error:
  521. abort_creds(new);
  522. return retval;
  523. }
  524. /*
  525. * setgid() is implemented like SysV w/ SAVED_IDS
  526. *
  527. * SMP: Same implicit races as above.
  528. */
  529. SYSCALL_DEFINE1(setgid, gid_t, gid)
  530. {
  531. const struct cred *old;
  532. struct cred *new;
  533. int retval;
  534. new = prepare_creds();
  535. if (!new)
  536. return -ENOMEM;
  537. old = current_cred();
  538. retval = -EPERM;
  539. if (nsown_capable(CAP_SETGID))
  540. new->gid = new->egid = new->sgid = new->fsgid = gid;
  541. else if (gid == old->gid || gid == old->sgid)
  542. new->egid = new->fsgid = gid;
  543. else
  544. goto error;
  545. return commit_creds(new);
  546. error:
  547. abort_creds(new);
  548. return retval;
  549. }
  550. /*
  551. * change the user struct in a credentials set to match the new UID
  552. */
  553. static int set_user(struct cred *new)
  554. {
  555. struct user_struct *new_user;
  556. new_user = alloc_uid(current_user_ns(), new->uid);
  557. if (!new_user)
  558. return -EAGAIN;
  559. /*
  560. * We don't fail in case of NPROC limit excess here because too many
  561. * poorly written programs don't check set*uid() return code, assuming
  562. * it never fails if called by root. We may still enforce NPROC limit
  563. * for programs doing set*uid()+execve() by harmlessly deferring the
  564. * failure to the execve() stage.
  565. */
  566. if (atomic_read(&new_user->processes) >= rlimit(RLIMIT_NPROC) &&
  567. new_user != INIT_USER)
  568. current->flags |= PF_NPROC_EXCEEDED;
  569. else
  570. current->flags &= ~PF_NPROC_EXCEEDED;
  571. free_uid(new->user);
  572. new->user = new_user;
  573. return 0;
  574. }
  575. /*
  576. * Unprivileged users may change the real uid to the effective uid
  577. * or vice versa. (BSD-style)
  578. *
  579. * If you set the real uid at all, or set the effective uid to a value not
  580. * equal to the real uid, then the saved uid is set to the new effective uid.
  581. *
  582. * This makes it possible for a setuid program to completely drop its
  583. * privileges, which is often a useful assertion to make when you are doing
  584. * a security audit over a program.
  585. *
  586. * The general idea is that a program which uses just setreuid() will be
  587. * 100% compatible with BSD. A program which uses just setuid() will be
  588. * 100% compatible with POSIX with saved IDs.
  589. */
  590. SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid)
  591. {
  592. const struct cred *old;
  593. struct cred *new;
  594. int retval;
  595. new = prepare_creds();
  596. if (!new)
  597. return -ENOMEM;
  598. old = current_cred();
  599. retval = -EPERM;
  600. if (ruid != (uid_t) -1) {
  601. new->uid = ruid;
  602. if (old->uid != ruid &&
  603. old->euid != ruid &&
  604. !nsown_capable(CAP_SETUID))
  605. goto error;
  606. }
  607. if (euid != (uid_t) -1) {
  608. new->euid = euid;
  609. if (old->uid != euid &&
  610. old->euid != euid &&
  611. old->suid != euid &&
  612. !nsown_capable(CAP_SETUID))
  613. goto error;
  614. }
  615. if (new->uid != old->uid) {
  616. retval = set_user(new);
  617. if (retval < 0)
  618. goto error;
  619. }
  620. if (ruid != (uid_t) -1 ||
  621. (euid != (uid_t) -1 && euid != old->uid))
  622. new->suid = new->euid;
  623. new->fsuid = new->euid;
  624. retval = security_task_fix_setuid(new, old, LSM_SETID_RE);
  625. if (retval < 0)
  626. goto error;
  627. return commit_creds(new);
  628. error:
  629. abort_creds(new);
  630. return retval;
  631. }
  632. /*
  633. * setuid() is implemented like SysV with SAVED_IDS
  634. *
  635. * Note that SAVED_ID's is deficient in that a setuid root program
  636. * like sendmail, for example, cannot set its uid to be a normal
  637. * user and then switch back, because if you're root, setuid() sets
  638. * the saved uid too. If you don't like this, blame the bright people
  639. * in the POSIX committee and/or USG. Note that the BSD-style setreuid()
  640. * will allow a root program to temporarily drop privileges and be able to
  641. * regain them by swapping the real and effective uid.
  642. */
  643. SYSCALL_DEFINE1(setuid, uid_t, uid)
  644. {
  645. const struct cred *old;
  646. struct cred *new;
  647. int retval;
  648. new = prepare_creds();
  649. if (!new)
  650. return -ENOMEM;
  651. old = current_cred();
  652. retval = -EPERM;
  653. if (nsown_capable(CAP_SETUID)) {
  654. new->suid = new->uid = uid;
  655. if (uid != old->uid) {
  656. retval = set_user(new);
  657. if (retval < 0)
  658. goto error;
  659. }
  660. } else if (uid != old->uid && uid != new->suid) {
  661. goto error;
  662. }
  663. new->fsuid = new->euid = uid;
  664. retval = security_task_fix_setuid(new, old, LSM_SETID_ID);
  665. if (retval < 0)
  666. goto error;
  667. return commit_creds(new);
  668. error:
  669. abort_creds(new);
  670. return retval;
  671. }
  672. /*
  673. * This function implements a generic ability to update ruid, euid,
  674. * and suid. This allows you to implement the 4.4 compatible seteuid().
  675. */
  676. SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid)
  677. {
  678. const struct cred *old;
  679. struct cred *new;
  680. int retval;
  681. new = prepare_creds();
  682. if (!new)
  683. return -ENOMEM;
  684. old = current_cred();
  685. retval = -EPERM;
  686. if (!nsown_capable(CAP_SETUID)) {
  687. if (ruid != (uid_t) -1 && ruid != old->uid &&
  688. ruid != old->euid && ruid != old->suid)
  689. goto error;
  690. if (euid != (uid_t) -1 && euid != old->uid &&
  691. euid != old->euid && euid != old->suid)
  692. goto error;
  693. if (suid != (uid_t) -1 && suid != old->uid &&
  694. suid != old->euid && suid != old->suid)
  695. goto error;
  696. }
  697. if (ruid != (uid_t) -1) {
  698. new->uid = ruid;
  699. if (ruid != old->uid) {
  700. retval = set_user(new);
  701. if (retval < 0)
  702. goto error;
  703. }
  704. }
  705. if (euid != (uid_t) -1)
  706. new->euid = euid;
  707. if (suid != (uid_t) -1)
  708. new->suid = suid;
  709. new->fsuid = new->euid;
  710. retval = security_task_fix_setuid(new, old, LSM_SETID_RES);
  711. if (retval < 0)
  712. goto error;
  713. return commit_creds(new);
  714. error:
  715. abort_creds(new);
  716. return retval;
  717. }
  718. SYSCALL_DEFINE3(getresuid, uid_t __user *, ruid, uid_t __user *, euid, uid_t __user *, suid)
  719. {
  720. const struct cred *cred = current_cred();
  721. int retval;
  722. if (!(retval = put_user(cred->uid, ruid)) &&
  723. !(retval = put_user(cred->euid, euid)))
  724. retval = put_user(cred->suid, suid);
  725. return retval;
  726. }
  727. /*
  728. * Same as above, but for rgid, egid, sgid.
  729. */
  730. SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid)
  731. {
  732. const struct cred *old;
  733. struct cred *new;
  734. int retval;
  735. new = prepare_creds();
  736. if (!new)
  737. return -ENOMEM;
  738. old = current_cred();
  739. retval = -EPERM;
  740. if (!nsown_capable(CAP_SETGID)) {
  741. if (rgid != (gid_t) -1 && rgid != old->gid &&
  742. rgid != old->egid && rgid != old->sgid)
  743. goto error;
  744. if (egid != (gid_t) -1 && egid != old->gid &&
  745. egid != old->egid && egid != old->sgid)
  746. goto error;
  747. if (sgid != (gid_t) -1 && sgid != old->gid &&
  748. sgid != old->egid && sgid != old->sgid)
  749. goto error;
  750. }
  751. if (rgid != (gid_t) -1)
  752. new->gid = rgid;
  753. if (egid != (gid_t) -1)
  754. new->egid = egid;
  755. if (sgid != (gid_t) -1)
  756. new->sgid = sgid;
  757. new->fsgid = new->egid;
  758. return commit_creds(new);
  759. error:
  760. abort_creds(new);
  761. return retval;
  762. }
  763. SYSCALL_DEFINE3(getresgid, gid_t __user *, rgid, gid_t __user *, egid, gid_t __user *, sgid)
  764. {
  765. const struct cred *cred = current_cred();
  766. int retval;
  767. if (!(retval = put_user(cred->gid, rgid)) &&
  768. !(retval = put_user(cred->egid, egid)))
  769. retval = put_user(cred->sgid, sgid);
  770. return retval;
  771. }
  772. /*
  773. * "setfsuid()" sets the fsuid - the uid used for filesystem checks. This
  774. * is used for "access()" and for the NFS daemon (letting nfsd stay at
  775. * whatever uid it wants to). It normally shadows "euid", except when
  776. * explicitly set by setfsuid() or for access..
  777. */
  778. SYSCALL_DEFINE1(setfsuid, uid_t, uid)
  779. {
  780. const struct cred *old;
  781. struct cred *new;
  782. uid_t old_fsuid;
  783. new = prepare_creds();
  784. if (!new)
  785. return current_fsuid();
  786. old = current_cred();
  787. old_fsuid = old->fsuid;
  788. if (uid == old->uid || uid == old->euid ||
  789. uid == old->suid || uid == old->fsuid ||
  790. nsown_capable(CAP_SETUID)) {
  791. if (uid != old_fsuid) {
  792. new->fsuid = uid;
  793. if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0)
  794. goto change_okay;
  795. }
  796. }
  797. abort_creds(new);
  798. return old_fsuid;
  799. change_okay:
  800. commit_creds(new);
  801. return old_fsuid;
  802. }
  803. /*
  804. * Samma på svenska..
  805. */
  806. SYSCALL_DEFINE1(setfsgid, gid_t, gid)
  807. {
  808. const struct cred *old;
  809. struct cred *new;
  810. gid_t old_fsgid;
  811. new = prepare_creds();
  812. if (!new)
  813. return current_fsgid();
  814. old = current_cred();
  815. old_fsgid = old->fsgid;
  816. if (gid == old->gid || gid == old->egid ||
  817. gid == old->sgid || gid == old->fsgid ||
  818. nsown_capable(CAP_SETGID)) {
  819. if (gid != old_fsgid) {
  820. new->fsgid = gid;
  821. goto change_okay;
  822. }
  823. }
  824. abort_creds(new);
  825. return old_fsgid;
  826. change_okay:
  827. commit_creds(new);
  828. return old_fsgid;
  829. }
  830. void do_sys_times(struct tms *tms)
  831. {
  832. cputime_t tgutime, tgstime, cutime, cstime;
  833. spin_lock_irq(&current->sighand->siglock);
  834. thread_group_times(current, &tgutime, &tgstime);
  835. cutime = current->signal->cutime;
  836. cstime = current->signal->cstime;
  837. spin_unlock_irq(&current->sighand->siglock);
  838. tms->tms_utime = cputime_to_clock_t(tgutime);
  839. tms->tms_stime = cputime_to_clock_t(tgstime);
  840. tms->tms_cutime = cputime_to_clock_t(cutime);
  841. tms->tms_cstime = cputime_to_clock_t(cstime);
  842. }
  843. SYSCALL_DEFINE1(times, struct tms __user *, tbuf)
  844. {
  845. if (tbuf) {
  846. struct tms tmp;
  847. do_sys_times(&tmp);
  848. if (copy_to_user(tbuf, &tmp, sizeof(struct tms)))
  849. return -EFAULT;
  850. }
  851. force_successful_syscall_return();
  852. return (long) jiffies_64_to_clock_t(get_jiffies_64());
  853. }
  854. /*
  855. * This needs some heavy checking ...
  856. * I just haven't the stomach for it. I also don't fully
  857. * understand sessions/pgrp etc. Let somebody who does explain it.
  858. *
  859. * OK, I think I have the protection semantics right.... this is really
  860. * only important on a multi-user system anyway, to make sure one user
  861. * can't send a signal to a process owned by another. -TYT, 12/12/91
  862. *
  863. * Auch. Had to add the 'did_exec' flag to conform completely to POSIX.
  864. * LBT 04.03.94
  865. */
  866. SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid)
  867. {
  868. struct task_struct *p;
  869. struct task_struct *group_leader = current->group_leader;
  870. struct pid *pgrp;
  871. int err;
  872. if (!pid)
  873. pid = task_pid_vnr(group_leader);
  874. if (!pgid)
  875. pgid = pid;
  876. if (pgid < 0)
  877. return -EINVAL;
  878. rcu_read_lock();
  879. /* From this point forward we keep holding onto the tasklist lock
  880. * so that our parent does not change from under us. -DaveM
  881. */
  882. write_lock_irq(&tasklist_lock);
  883. err = -ESRCH;
  884. p = find_task_by_vpid(pid);
  885. if (!p)
  886. goto out;
  887. err = -EINVAL;
  888. if (!thread_group_leader(p))
  889. goto out;
  890. if (same_thread_group(p->real_parent, group_leader)) {
  891. err = -EPERM;
  892. if (task_session(p) != task_session(group_leader))
  893. goto out;
  894. err = -EACCES;
  895. if (p->did_exec)
  896. goto out;
  897. } else {
  898. err = -ESRCH;
  899. if (p != group_leader)
  900. goto out;
  901. }
  902. err = -EPERM;
  903. if (p->signal->leader)
  904. goto out;
  905. pgrp = task_pid(p);
  906. if (pgid != pid) {
  907. struct task_struct *g;
  908. pgrp = find_vpid(pgid);
  909. g = pid_task(pgrp, PIDTYPE_PGID);
  910. if (!g || task_session(g) != task_session(group_leader))
  911. goto out;
  912. }
  913. err = security_task_setpgid(p, pgid);
  914. if (err)
  915. goto out;
  916. if (task_pgrp(p) != pgrp)
  917. change_pid(p, PIDTYPE_PGID, pgrp);
  918. err = 0;
  919. out:
  920. /* All paths lead to here, thus we are safe. -DaveM */
  921. write_unlock_irq(&tasklist_lock);
  922. rcu_read_unlock();
  923. return err;
  924. }
  925. SYSCALL_DEFINE1(getpgid, pid_t, pid)
  926. {
  927. struct task_struct *p;
  928. struct pid *grp;
  929. int retval;
  930. rcu_read_lock();
  931. if (!pid)
  932. grp = task_pgrp(current);
  933. else {
  934. retval = -ESRCH;
  935. p = find_task_by_vpid(pid);
  936. if (!p)
  937. goto out;
  938. grp = task_pgrp(p);
  939. if (!grp)
  940. goto out;
  941. retval = security_task_getpgid(p);
  942. if (retval)
  943. goto out;
  944. }
  945. retval = pid_vnr(grp);
  946. out:
  947. rcu_read_unlock();
  948. return retval;
  949. }
  950. #ifdef __ARCH_WANT_SYS_GETPGRP
  951. SYSCALL_DEFINE0(getpgrp)
  952. {
  953. return sys_getpgid(0);
  954. }
  955. #endif
  956. SYSCALL_DEFINE1(getsid, pid_t, pid)
  957. {
  958. struct task_struct *p;
  959. struct pid *sid;
  960. int retval;
  961. rcu_read_lock();
  962. if (!pid)
  963. sid = task_session(current);
  964. else {
  965. retval = -ESRCH;
  966. p = find_task_by_vpid(pid);
  967. if (!p)
  968. goto out;
  969. sid = task_session(p);
  970. if (!sid)
  971. goto out;
  972. retval = security_task_getsid(p);
  973. if (retval)
  974. goto out;
  975. }
  976. retval = pid_vnr(sid);
  977. out:
  978. rcu_read_unlock();
  979. return retval;
  980. }
  981. SYSCALL_DEFINE0(setsid)
  982. {
  983. struct task_struct *group_leader = current->group_leader;
  984. struct pid *sid = task_pid(group_leader);
  985. pid_t session = pid_vnr(sid);
  986. int err = -EPERM;
  987. write_lock_irq(&tasklist_lock);
  988. /* Fail if I am already a session leader */
  989. if (group_leader->signal->leader)
  990. goto out;
  991. /* Fail if a process group id already exists that equals the
  992. * proposed session id.
  993. */
  994. if (pid_task(sid, PIDTYPE_PGID))
  995. goto out;
  996. group_leader->signal->leader = 1;
  997. __set_special_pids(sid);
  998. proc_clear_tty(group_leader);
  999. err = session;
  1000. out:
  1001. write_unlock_irq(&tasklist_lock);
  1002. if (err > 0) {
  1003. proc_sid_connector(group_leader);
  1004. sched_autogroup_create_attach(group_leader);
  1005. }
  1006. return err;
  1007. }
  1008. DECLARE_RWSEM(uts_sem);
  1009. #ifdef COMPAT_UTS_MACHINE
  1010. #define override_architecture(name) \
  1011. (personality(current->personality) == PER_LINUX32 && \
  1012. copy_to_user(name->machine, COMPAT_UTS_MACHINE, \
  1013. sizeof(COMPAT_UTS_MACHINE)))
  1014. #else
  1015. #define override_architecture(name) 0
  1016. #endif
  1017. /*
  1018. * Work around broken programs that cannot handle "Linux 3.0".
  1019. * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40
  1020. */
  1021. static int override_release(char __user *release, int len)
  1022. {
  1023. int ret = 0;
  1024. char buf[len];
  1025. if (current->personality & UNAME26) {
  1026. char *rest = UTS_RELEASE;
  1027. int ndots = 0;
  1028. unsigned v;
  1029. while (*rest) {
  1030. if (*rest == '.' && ++ndots >= 3)
  1031. break;
  1032. if (!isdigit(*rest) && *rest != '.')
  1033. break;
  1034. rest++;
  1035. }
  1036. v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 40;
  1037. snprintf(buf, len, "2.6.%u%s", v, rest);
  1038. ret = copy_to_user(release, buf, len);
  1039. }
  1040. return ret;
  1041. }
  1042. SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name)
  1043. {
  1044. int errno = 0;
  1045. down_read(&uts_sem);
  1046. if (copy_to_user(name, utsname(), sizeof *name))
  1047. errno = -EFAULT;
  1048. up_read(&uts_sem);
  1049. if (!errno && override_release(name->release, sizeof(name->release)))
  1050. errno = -EFAULT;
  1051. if (!errno && override_architecture(name))
  1052. errno = -EFAULT;
  1053. return errno;
  1054. }
  1055. #ifdef __ARCH_WANT_SYS_OLD_UNAME
  1056. /*
  1057. * Old cruft
  1058. */
  1059. SYSCALL_DEFINE1(uname, struct old_utsname __user *, name)
  1060. {
  1061. int error = 0;
  1062. if (!name)
  1063. return -EFAULT;
  1064. down_read(&uts_sem);
  1065. if (copy_to_user(name, utsname(), sizeof(*name)))
  1066. error = -EFAULT;
  1067. up_read(&uts_sem);
  1068. if (!error && override_release(name->release, sizeof(name->release)))
  1069. error = -EFAULT;
  1070. if (!error && override_architecture(name))
  1071. error = -EFAULT;
  1072. return error;
  1073. }
  1074. SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name)
  1075. {
  1076. int error;
  1077. if (!name)
  1078. return -EFAULT;
  1079. if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname)))
  1080. return -EFAULT;
  1081. down_read(&uts_sem);
  1082. error = __copy_to_user(&name->sysname, &utsname()->sysname,
  1083. __OLD_UTS_LEN);
  1084. error |= __put_user(0, name->sysname + __OLD_UTS_LEN);
  1085. error |= __copy_to_user(&name->nodename, &utsname()->nodename,
  1086. __OLD_UTS_LEN);
  1087. error |= __put_user(0, name->nodename + __OLD_UTS_LEN);
  1088. error |= __copy_to_user(&name->release, &utsname()->release,
  1089. __OLD_UTS_LEN);
  1090. error |= __put_user(0, name->release + __OLD_UTS_LEN);
  1091. error |= __copy_to_user(&name->version, &utsname()->version,
  1092. __OLD_UTS_LEN);
  1093. error |= __put_user(0, name->version + __OLD_UTS_LEN);
  1094. error |= __copy_to_user(&name->machine, &utsname()->machine,
  1095. __OLD_UTS_LEN);
  1096. error |= __put_user(0, name->machine + __OLD_UTS_LEN);
  1097. up_read(&uts_sem);
  1098. if (!error && override_architecture(name))
  1099. error = -EFAULT;
  1100. if (!error && override_release(name->release, sizeof(name->release)))
  1101. error = -EFAULT;
  1102. return error ? -EFAULT : 0;
  1103. }
  1104. #endif
  1105. SYSCALL_DEFINE2(sethostname, char __user *, name, int, len)
  1106. {
  1107. int errno;
  1108. char tmp[__NEW_UTS_LEN];
  1109. if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN))
  1110. return -EPERM;
  1111. if (len < 0 || len > __NEW_UTS_LEN)
  1112. return -EINVAL;
  1113. down_write(&uts_sem);
  1114. errno = -EFAULT;
  1115. if (!copy_from_user(tmp, name, len)) {
  1116. struct new_utsname *u = utsname();
  1117. memcpy(u->nodename, tmp, len);
  1118. memset(u->nodename + len, 0, sizeof(u->nodename) - len);
  1119. errno = 0;
  1120. }
  1121. up_write(&uts_sem);
  1122. return errno;
  1123. }
  1124. #ifdef __ARCH_WANT_SYS_GETHOSTNAME
  1125. SYSCALL_DEFINE2(gethostname, char __user *, name, int, len)
  1126. {
  1127. int i, errno;
  1128. struct new_utsname *u;
  1129. if (len < 0)
  1130. return -EINVAL;
  1131. down_read(&uts_sem);
  1132. u = utsname();
  1133. i = 1 + strlen(u->nodename);
  1134. if (i > len)
  1135. i = len;
  1136. errno = 0;
  1137. if (copy_to_user(name, u->nodename, i))
  1138. errno = -EFAULT;
  1139. up_read(&uts_sem);
  1140. return errno;
  1141. }
  1142. #endif
  1143. /*
  1144. * Only setdomainname; getdomainname can be implemented by calling
  1145. * uname()
  1146. */
  1147. SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len)
  1148. {
  1149. int errno;
  1150. char tmp[__NEW_UTS_LEN];
  1151. if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN))
  1152. return -EPERM;
  1153. if (len < 0 || len > __NEW_UTS_LEN)
  1154. return -EINVAL;
  1155. down_write(&uts_sem);
  1156. errno = -EFAULT;
  1157. if (!copy_from_user(tmp, name, len)) {
  1158. struct new_utsname *u = utsname();
  1159. memcpy(u->domainname, tmp, len);
  1160. memset(u->domainname + len, 0, sizeof(u->domainname) - len);
  1161. errno = 0;
  1162. }
  1163. up_write(&uts_sem);
  1164. return errno;
  1165. }
  1166. SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim)
  1167. {
  1168. struct rlimit value;
  1169. int ret;
  1170. ret = do_prlimit(current, resource, NULL, &value);
  1171. if (!ret)
  1172. ret = copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0;
  1173. return ret;
  1174. }
  1175. #ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT
  1176. /*
  1177. * Back compatibility for getrlimit. Needed for some apps.
  1178. */
  1179. SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource,
  1180. struct rlimit __user *, rlim)
  1181. {
  1182. struct rlimit x;
  1183. if (resource >= RLIM_NLIMITS)
  1184. return -EINVAL;
  1185. task_lock(current->group_leader);
  1186. x = current->signal->rlim[resource];
  1187. task_unlock(current->group_leader);
  1188. if (x.rlim_cur > 0x7FFFFFFF)
  1189. x.rlim_cur = 0x7FFFFFFF;
  1190. if (x.rlim_max > 0x7FFFFFFF)
  1191. x.rlim_max = 0x7FFFFFFF;
  1192. return copy_to_user(rlim, &x, sizeof(x))?-EFAULT:0;
  1193. }
  1194. #endif
  1195. static inline bool rlim64_is_infinity(__u64 rlim64)
  1196. {
  1197. #if BITS_PER_LONG < 64
  1198. return rlim64 >= ULONG_MAX;
  1199. #else
  1200. return rlim64 == RLIM64_INFINITY;
  1201. #endif
  1202. }
  1203. static void rlim_to_rlim64(const struct rlimit *rlim, struct rlimit64 *rlim64)
  1204. {
  1205. if (rlim->rlim_cur == RLIM_INFINITY)
  1206. rlim64->rlim_cur = RLIM64_INFINITY;
  1207. else
  1208. rlim64->rlim_cur = rlim->rlim_cur;
  1209. if (rlim->rlim_max == RLIM_INFINITY)
  1210. rlim64->rlim_max = RLIM64_INFINITY;
  1211. else
  1212. rlim64->rlim_max = rlim->rlim_max;
  1213. }
  1214. static void rlim64_to_rlim(const struct rlimit64 *rlim64, struct rlimit *rlim)
  1215. {
  1216. if (rlim64_is_infinity(rlim64->rlim_cur))
  1217. rlim->rlim_cur = RLIM_INFINITY;
  1218. else
  1219. rlim->rlim_cur = (unsigned long)rlim64->rlim_cur;
  1220. if (rlim64_is_infinity(rlim64->rlim_max))
  1221. rlim->rlim_max = RLIM_INFINITY;
  1222. else
  1223. rlim->rlim_max = (unsigned long)rlim64->rlim_max;
  1224. }
  1225. /* make sure you are allowed to change @tsk limits before calling this */
  1226. int do_prlimit(struct task_struct *tsk, unsigned int resource,
  1227. struct rlimit *new_rlim, struct rlimit *old_rlim)
  1228. {
  1229. struct rlimit *rlim;
  1230. int retval = 0;
  1231. if (resource >= RLIM_NLIMITS)
  1232. return -EINVAL;
  1233. if (new_rlim) {
  1234. if (new_rlim->rlim_cur > new_rlim->rlim_max)
  1235. return -EINVAL;
  1236. if (resource == RLIMIT_NOFILE &&
  1237. new_rlim->rlim_max > sysctl_nr_open)
  1238. return -EPERM;
  1239. }
  1240. /* protect tsk->signal and tsk->sighand from disappearing */
  1241. read_lock(&tasklist_lock);
  1242. if (!tsk->sighand) {
  1243. retval = -ESRCH;
  1244. goto out;
  1245. }
  1246. rlim = tsk->signal->rlim + resource;
  1247. task_lock(tsk->group_leader);
  1248. if (new_rlim) {
  1249. /* Keep the capable check against init_user_ns until
  1250. cgroups can contain all limits */
  1251. if (new_rlim->rlim_max > rlim->rlim_max &&
  1252. !capable(CAP_SYS_RESOURCE))
  1253. retval = -EPERM;
  1254. if (!retval)
  1255. retval = security_task_setrlimit(tsk->group_leader,
  1256. resource, new_rlim);
  1257. if (resource == RLIMIT_CPU && new_rlim->rlim_cur == 0) {
  1258. /*
  1259. * The caller is asking for an immediate RLIMIT_CPU
  1260. * expiry. But we use the zero value to mean "it was
  1261. * never set". So let's cheat and make it one second
  1262. * instead
  1263. */
  1264. new_rlim->rlim_cur = 1;
  1265. }
  1266. }
  1267. if (!retval) {
  1268. if (old_rlim)
  1269. *old_rlim = *rlim;
  1270. if (new_rlim)
  1271. *rlim = *new_rlim;
  1272. }
  1273. task_unlock(tsk->group_leader);
  1274. /*
  1275. * RLIMIT_CPU handling. Note that the kernel fails to return an error
  1276. * code if it rejected the user's attempt to set RLIMIT_CPU. This is a
  1277. * very long-standing error, and fixing it now risks breakage of
  1278. * applications, so we live with it
  1279. */
  1280. if (!retval && new_rlim && resource == RLIMIT_CPU &&
  1281. new_rlim->rlim_cur != RLIM_INFINITY)
  1282. update_rlimit_cpu(tsk, new_rlim->rlim_cur);
  1283. out:
  1284. read_unlock(&tasklist_lock);
  1285. return retval;
  1286. }
  1287. /* rcu lock must be held */
  1288. static int check_prlimit_permission(struct task_struct *task)
  1289. {
  1290. const struct cred *cred = current_cred(), *tcred;
  1291. if (current == task)
  1292. return 0;
  1293. tcred = __task_cred(task);
  1294. if (cred->user->user_ns == tcred->user->user_ns &&
  1295. (cred->uid == tcred->euid &&
  1296. cred->uid == tcred->suid &&
  1297. cred->uid == tcred->uid &&
  1298. cred->gid == tcred->egid &&
  1299. cred->gid == tcred->sgid &&
  1300. cred->gid == tcred->gid))
  1301. return 0;
  1302. if (ns_capable(tcred->user->user_ns, CAP_SYS_RESOURCE))
  1303. return 0;
  1304. return -EPERM;
  1305. }
  1306. SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource,
  1307. const struct rlimit64 __user *, new_rlim,
  1308. struct rlimit64 __user *, old_rlim)
  1309. {
  1310. struct rlimit64 old64, new64;
  1311. struct rlimit old, new;
  1312. struct task_struct *tsk;
  1313. int ret;
  1314. if (new_rlim) {
  1315. if (copy_from_user(&new64, new_rlim, sizeof(new64)))
  1316. return -EFAULT;
  1317. rlim64_to_rlim(&new64, &new);
  1318. }
  1319. rcu_read_lock();
  1320. tsk = pid ? find_task_by_vpid(pid) : current;
  1321. if (!tsk) {
  1322. rcu_read_unlock();
  1323. return -ESRCH;
  1324. }
  1325. ret = check_prlimit_permission(tsk);
  1326. if (ret) {
  1327. rcu_read_unlock();
  1328. return ret;
  1329. }
  1330. get_task_struct(tsk);
  1331. rcu_read_unlock();
  1332. ret = do_prlimit(tsk, resource, new_rlim ? &new : NULL,
  1333. old_rlim ? &old : NULL);
  1334. if (!ret && old_rlim) {
  1335. rlim_to_rlim64(&old, &old64);
  1336. if (copy_to_user(old_rlim, &old64, sizeof(old64)))
  1337. ret = -EFAULT;
  1338. }
  1339. put_task_struct(tsk);
  1340. return ret;
  1341. }
  1342. SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
  1343. {
  1344. struct rlimit new_rlim;
  1345. if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
  1346. return -EFAULT;
  1347. return do_prlimit(current, resource, &new_rlim, NULL);
  1348. }
  1349. /*
  1350. * It would make sense to put struct rusage in the task_struct,
  1351. * except that would make the task_struct be *really big*. After
  1352. * task_struct gets moved into malloc'ed memory, it would
  1353. * make sense to do this. It will make moving the rest of the information
  1354. * a lot simpler! (Which we're not doing right now because we're not
  1355. * measuring them yet).
  1356. *
  1357. * When sampling multiple threads for RUSAGE_SELF, under SMP we might have
  1358. * races with threads incrementing their own counters. But since word
  1359. * reads are atomic, we either get new values or old values and we don't
  1360. * care which for the sums. We always take the siglock to protect reading
  1361. * the c* fields from p->signal from races with exit.c updating those
  1362. * fields when reaping, so a sample either gets all the additions of a
  1363. * given child after it's reaped, or none so this sample is before reaping.
  1364. *
  1365. * Locking:
  1366. * We need to take the siglock for CHILDEREN, SELF and BOTH
  1367. * for the cases current multithreaded, non-current single threaded
  1368. * non-current multithreaded. Thread traversal is now safe with
  1369. * the siglock held.
  1370. * Strictly speaking, we donot need to take the siglock if we are current and
  1371. * single threaded, as no one else can take our signal_struct away, no one
  1372. * else can reap the children to update signal->c* counters, and no one else
  1373. * can race with the signal-> fields. If we do not take any lock, the
  1374. * signal-> fields could be read out of order while another thread was just
  1375. * exiting. So we should place a read memory barrier when we avoid the lock.
  1376. * On the writer side, write memory barrier is implied in __exit_signal
  1377. * as __exit_signal releases the siglock spinlock after updating the signal->
  1378. * fields. But we don't do this yet to keep things simple.
  1379. *
  1380. */
  1381. static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r)
  1382. {
  1383. r->ru_nvcsw += t->nvcsw;
  1384. r->ru_nivcsw += t->nivcsw;
  1385. r->ru_minflt += t->min_flt;
  1386. r->ru_majflt += t->maj_flt;
  1387. r->ru_inblock += task_io_get_inblock(t);
  1388. r->ru_oublock += task_io_get_oublock(t);
  1389. }
  1390. static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
  1391. {
  1392. struct task_struct *t;
  1393. unsigned long flags;
  1394. cputime_t tgutime, tgstime, utime, stime;
  1395. unsigned long maxrss = 0;
  1396. memset((char *) r, 0, sizeof *r);
  1397. utime = stime = cputime_zero;
  1398. if (who == RUSAGE_THREAD) {
  1399. task_times(current, &utime, &stime);
  1400. accumulate_thread_rusage(p, r);
  1401. maxrss = p->signal->maxrss;
  1402. goto out;
  1403. }
  1404. if (!lock_task_sighand(p, &flags))
  1405. return;
  1406. switch (who) {
  1407. case RUSAGE_BOTH:
  1408. case RUSAGE_CHILDREN:
  1409. utime = p->signal->cutime;
  1410. stime = p->signal->cstime;
  1411. r->ru_nvcsw = p->signal->cnvcsw;
  1412. r->ru_nivcsw = p->signal->cnivcsw;
  1413. r->ru_minflt = p->signal->cmin_flt;
  1414. r->ru_majflt = p->signal->cmaj_flt;
  1415. r->ru_inblock = p->signal->cinblock;
  1416. r->ru_oublock = p->signal->coublock;
  1417. maxrss = p->signal->cmaxrss;
  1418. if (who == RUSAGE_CHILDREN)
  1419. break;
  1420. case RUSAGE_SELF:
  1421. thread_group_times(p, &tgutime, &tgstime);
  1422. utime = cputime_add(utime, tgutime);
  1423. stime = cputime_add(stime, tgstime);
  1424. r->ru_nvcsw += p->signal->nvcsw;
  1425. r->ru_nivcsw += p->signal->nivcsw;
  1426. r->ru_minflt += p->signal->min_flt;
  1427. r->ru_majflt += p->signal->maj_flt;
  1428. r->ru_inblock += p->signal->inblock;
  1429. r->ru_oublock += p->signal->oublock;
  1430. if (maxrss < p->signal->maxrss)
  1431. maxrss = p->signal->maxrss;
  1432. t = p;
  1433. do {
  1434. accumulate_thread_rusage(t, r);
  1435. t = next_thread(t);
  1436. } while (t != p);
  1437. break;
  1438. default:
  1439. BUG();
  1440. }
  1441. unlock_task_sighand(p, &flags);
  1442. out:
  1443. cputime_to_timeval(utime, &r->ru_utime);
  1444. cputime_to_timeval(stime, &r->ru_stime);
  1445. if (who != RUSAGE_CHILDREN) {
  1446. struct mm_struct *mm = get_task_mm(p);
  1447. if (mm) {
  1448. setmax_mm_hiwater_rss(&maxrss, mm);
  1449. mmput(mm);
  1450. }
  1451. }
  1452. r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */
  1453. }
  1454. int getrusage(struct task_struct *p, int who, struct rusage __user *ru)
  1455. {
  1456. struct rusage r;
  1457. k_getrusage(p, who, &r);
  1458. return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0;
  1459. }
  1460. SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru)
  1461. {
  1462. if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN &&
  1463. who != RUSAGE_THREAD)
  1464. return -EINVAL;
  1465. return getrusage(current, who, ru);
  1466. }
  1467. SYSCALL_DEFINE1(umask, int, mask)
  1468. {
  1469. mask = xchg(&current->fs->umask, mask & S_IRWXUGO);
  1470. return mask;
  1471. }
  1472. SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
  1473. unsigned long, arg4, unsigned long, arg5)
  1474. {
  1475. struct task_struct *me = current;
  1476. unsigned char comm[sizeof(me->comm)];
  1477. long error;
  1478. error = security_task_prctl(option, arg2, arg3, arg4, arg5);
  1479. if (error != -ENOSYS)
  1480. return error;
  1481. error = 0;
  1482. switch (option) {
  1483. case PR_SET_PDEATHSIG:
  1484. if (!valid_signal(arg2)) {
  1485. error = -EINVAL;
  1486. break;
  1487. }
  1488. me->pdeath_signal = arg2;
  1489. error = 0;
  1490. break;
  1491. case PR_GET_PDEATHSIG:
  1492. error = put_user(me->pdeath_signal, (int __user *)arg2);
  1493. break;
  1494. case PR_GET_DUMPABLE:
  1495. error = get_dumpable(me->mm);
  1496. break;
  1497. case PR_SET_DUMPABLE:
  1498. if (arg2 < 0 || arg2 > 1) {
  1499. error = -EINVAL;
  1500. break;
  1501. }
  1502. set_dumpable(me->mm, arg2);
  1503. error = 0;
  1504. break;
  1505. case PR_SET_UNALIGN:
  1506. error = SET_UNALIGN_CTL(me, arg2);
  1507. break;
  1508. case PR_GET_UNALIGN:
  1509. error = GET_UNALIGN_CTL(me, arg2);
  1510. break;
  1511. case PR_SET_FPEMU:
  1512. error = SET_FPEMU_CTL(me, arg2);
  1513. break;
  1514. case PR_GET_FPEMU:
  1515. error = GET_FPEMU_CTL(me, arg2);
  1516. break;
  1517. case PR_SET_FPEXC:
  1518. error = SET_FPEXC_CTL(me, arg2);
  1519. break;
  1520. case PR_GET_FPEXC:
  1521. error = GET_FPEXC_CTL(me, arg2);
  1522. break;
  1523. case PR_GET_TIMING:
  1524. error = PR_TIMING_STATISTICAL;
  1525. break;
  1526. case PR_SET_TIMING:
  1527. if (arg2 != PR_TIMING_STATISTICAL)
  1528. error = -EINVAL;
  1529. else
  1530. error = 0;
  1531. break;
  1532. case PR_SET_NAME:
  1533. comm[sizeof(me->comm)-1] = 0;
  1534. if (strncpy_from_user(comm, (char __user *)arg2,
  1535. sizeof(me->comm) - 1) < 0)
  1536. return -EFAULT;
  1537. set_task_comm(me, comm);
  1538. return 0;
  1539. case PR_GET_NAME:
  1540. get_task_comm(comm, me);
  1541. if (copy_to_user((char __user *)arg2, comm,
  1542. sizeof(comm)))
  1543. return -EFAULT;
  1544. return 0;
  1545. case PR_GET_ENDIAN:
  1546. error = GET_ENDIAN(me, arg2);
  1547. break;
  1548. case PR_SET_ENDIAN:
  1549. error = SET_ENDIAN(me, arg2);
  1550. break;
  1551. case PR_GET_SECCOMP:
  1552. error = prctl_get_seccomp();
  1553. break;
  1554. case PR_SET_SECCOMP:
  1555. error = prctl_set_seccomp(arg2);
  1556. break;
  1557. case PR_GET_TSC:
  1558. error = GET_TSC_CTL(arg2);
  1559. break;
  1560. case PR_SET_TSC:
  1561. error = SET_TSC_CTL(arg2);
  1562. break;
  1563. case PR_TASK_PERF_EVENTS_DISABLE:
  1564. error = perf_event_task_disable();
  1565. break;
  1566. case PR_TASK_PERF_EVENTS_ENABLE:
  1567. error = perf_event_task_enable();
  1568. break;
  1569. case PR_GET_TIMERSLACK:
  1570. error = current->timer_slack_ns;
  1571. break;
  1572. case PR_SET_TIMERSLACK:
  1573. if (arg2 <= 0)
  1574. current->timer_slack_ns =
  1575. current->default_timer_slack_ns;
  1576. else
  1577. current->timer_slack_ns = arg2;
  1578. error = 0;
  1579. break;
  1580. case PR_MCE_KILL:
  1581. if (arg4 | arg5)
  1582. return -EINVAL;
  1583. switch (arg2) {
  1584. case PR_MCE_KILL_CLEAR:
  1585. if (arg3 != 0)
  1586. return -EINVAL;
  1587. current->flags &= ~PF_MCE_PROCESS;
  1588. break;
  1589. case PR_MCE_KILL_SET:
  1590. current->flags |= PF_MCE_PROCESS;
  1591. if (arg3 == PR_MCE_KILL_EARLY)
  1592. current->flags |= PF_MCE_EARLY;
  1593. else if (arg3 == PR_MCE_KILL_LATE)
  1594. current->flags &= ~PF_MCE_EARLY;
  1595. else if (arg3 == PR_MCE_KILL_DEFAULT)
  1596. current->flags &=
  1597. ~(PF_MCE_EARLY|PF_MCE_PROCESS);
  1598. else
  1599. return -EINVAL;
  1600. break;
  1601. default:
  1602. return -EINVAL;
  1603. }
  1604. error = 0;
  1605. break;
  1606. case PR_MCE_KILL_GET:
  1607. if (arg2 | arg3 | arg4 | arg5)
  1608. return -EINVAL;
  1609. if (current->flags & PF_MCE_PROCESS)
  1610. error = (current->flags & PF_MCE_EARLY) ?
  1611. PR_MCE_KILL_EARLY : PR_MCE_KILL_LATE;
  1612. else
  1613. error = PR_MCE_KILL_DEFAULT;
  1614. break;
  1615. default:
  1616. error = -EINVAL;
  1617. break;
  1618. }
  1619. return error;
  1620. }
  1621. SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep,
  1622. struct getcpu_cache __user *, unused)
  1623. {
  1624. int err = 0;
  1625. int cpu = raw_smp_processor_id();
  1626. if (cpup)
  1627. err |= put_user(cpu, cpup);
  1628. if (nodep)
  1629. err |= put_user(cpu_to_node(cpu), nodep);
  1630. return err ? -EFAULT : 0;
  1631. }
  1632. char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff";
  1633. static void argv_cleanup(struct subprocess_info *info)
  1634. {
  1635. argv_free(info->argv);
  1636. }
  1637. /**
  1638. * orderly_poweroff - Trigger an orderly system poweroff
  1639. * @force: force poweroff if command execution fails
  1640. *
  1641. * This may be called from any context to trigger a system shutdown.
  1642. * If the orderly shutdown fails, it will force an immediate shutdown.
  1643. */
  1644. int orderly_poweroff(bool force)
  1645. {
  1646. int argc;
  1647. char **argv = argv_split(GFP_ATOMIC, poweroff_cmd, &argc);
  1648. static char *envp[] = {
  1649. "HOME=/",
  1650. "PATH=/sbin:/bin:/usr/sbin:/usr/bin",
  1651. NULL
  1652. };
  1653. int ret = -ENOMEM;
  1654. struct subprocess_info *info;
  1655. if (argv == NULL) {
  1656. printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n",
  1657. __func__, poweroff_cmd);
  1658. goto out;
  1659. }
  1660. info = call_usermodehelper_setup(argv[0], argv, envp, GFP_ATOMIC);
  1661. if (info == NULL) {
  1662. argv_free(argv);
  1663. goto out;
  1664. }
  1665. call_usermodehelper_setfns(info, NULL, argv_cleanup, NULL);
  1666. ret = call_usermodehelper_exec(info, UMH_NO_WAIT);
  1667. out:
  1668. if (ret && force) {
  1669. printk(KERN_WARNING "Failed to start orderly shutdown: "
  1670. "forcing the issue\n");
  1671. /* I guess this should try to kick off some daemon to
  1672. sync and poweroff asap. Or not even bother syncing
  1673. if we're doing an emergency shutdown? */
  1674. emergency_sync();
  1675. kernel_power_off();
  1676. }
  1677. return ret;
  1678. }
  1679. EXPORT_SYMBOL_GPL(orderly_poweroff);