summaryrefslogtreecommitdiff
path: root/init
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-01-11 02:42:53 +0100
committerIngo Molnar <mingo@elte.hu>2009-01-11 02:42:53 +0100
commit506c10f26c481b7f8ef27c1c79290f68989b2e9e (patch)
tree03de82e812f00957aa6276dac2fe51c3358e88d7 /init
parente1df957670aef74ffd9a4ad93e6d2c90bf6b4845 (diff)
parentc59765042f53a79a7a65585042ff463b69cb248c (diff)
Merge commit 'v2.6.29-rc1' into perfcounters/core
Conflicts: include/linux/kernel_stat.h
Diffstat (limited to 'init')
-rw-r--r--init/Kconfig291
-rw-r--r--init/do_mounts.c6
-rw-r--r--init/do_mounts_md.c2
-rw-r--r--init/do_mounts_rd.c14
-rw-r--r--init/initramfs.c1
-rw-r--r--init/main.c41
6 files changed, 239 insertions, 116 deletions
diff --git a/init/Kconfig b/init/Kconfig
index c38ae71a5e19..a588cdc274bc 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -271,59 +271,6 @@ config LOG_BUF_SHIFT
13 => 8 KB
12 => 4 KB
-config CGROUPS
- bool "Control Group support"
- help
- This option will let you use process cgroup subsystems
- such as Cpusets
-
- Say N if unsure.
-
-config CGROUP_DEBUG
- bool "Example debug cgroup subsystem"
- depends on CGROUPS
- default n
- help
- This option enables a simple cgroup subsystem that
- exports useful debugging information about the cgroups
- framework
-
- Say N if unsure
-
-config CGROUP_NS
- bool "Namespace cgroup subsystem"
- depends on CGROUPS
- help
- Provides a simple namespace cgroup subsystem to
- provide hierarchical naming of sets of namespaces,
- for instance virtual servers and checkpoint/restart
- jobs.
-
-config CGROUP_FREEZER
- bool "control group freezer subsystem"
- depends on CGROUPS
- help
- Provides a way to freeze and unfreeze all tasks in a
- cgroup.
-
-config CGROUP_DEVICE
- bool "Device controller for cgroups"
- depends on CGROUPS && EXPERIMENTAL
- help
- Provides a cgroup implementing whitelists for devices which
- a process in the cgroup can mknod or open.
-
-config CPUSETS
- bool "Cpuset support"
- depends on SMP && CGROUPS
- help
- This option will let you create and manage CPUSETs which
- allow dynamically partitioning a system into sets of CPUs and
- Memory Nodes and assigning tasks to run only within those sets.
- This is primarily useful on large SMP or NUMA systems.
-
- Say N if unsure.
-
#
# Architectures with an unreliable sched_clock() should select this:
#
@@ -337,6 +284,8 @@ config GROUP_SCHED
help
This feature lets CPU scheduler recognize task groups and control CPU
bandwidth allocation to such task groups.
+ In order to create a group from arbitrary set of processes, use
+ CONFIG_CGROUPS. (See Control Group support.)
config FAIR_GROUP_SCHED
bool "Group scheduling for SCHED_OTHER"
@@ -379,6 +328,66 @@ config CGROUP_SCHED
endchoice
+menu "Control Group support"
+config CGROUPS
+ bool "Control Group support"
+ help
+ This option add support for grouping sets of processes together, for
+ use with process control subsystems such as Cpusets, CFS, memory
+ controls or device isolation.
+ See
+ - Documentation/cpusets.txt (Cpusets)
+ - Documentation/scheduler/sched-design-CFS.txt (CFS)
+ - Documentation/cgroups/ (features for grouping, isolation)
+ - Documentation/controllers/ (features for resource control)
+
+ Say N if unsure.
+
+config CGROUP_DEBUG
+ bool "Example debug cgroup subsystem"
+ depends on CGROUPS
+ default n
+ help
+ This option enables a simple cgroup subsystem that
+ exports useful debugging information about the cgroups
+ framework
+
+ Say N if unsure
+
+config CGROUP_NS
+ bool "Namespace cgroup subsystem"
+ depends on CGROUPS
+ help
+ Provides a simple namespace cgroup subsystem to
+ provide hierarchical naming of sets of namespaces,
+ for instance virtual servers and checkpoint/restart
+ jobs.
+
+config CGROUP_FREEZER
+ bool "control group freezer subsystem"
+ depends on CGROUPS
+ help
+ Provides a way to freeze and unfreeze all tasks in a
+ cgroup.
+
+config CGROUP_DEVICE
+ bool "Device controller for cgroups"
+ depends on CGROUPS && EXPERIMENTAL
+ help
+ Provides a cgroup implementing whitelists for devices which
+ a process in the cgroup can mknod or open.
+
+config CPUSETS
+ bool "Cpuset support"
+ depends on SMP && CGROUPS
+ help
+ This option will let you create and manage CPUSETs which
+ allow dynamically partitioning a system into sets of CPUs and
+ Memory Nodes and assigning tasks to run only within those sets.
+ This is primarily useful on large SMP or NUMA systems.
+
+ Say N if unsure.
+
config CGROUP_CPUACCT
bool "Simple CPU accounting cgroup subsystem"
depends on CGROUPS
@@ -393,9 +402,6 @@ config RESOURCE_COUNTERS
infrastructure that works with cgroups
depends on CGROUPS
-config MM_OWNER
- bool
-
config CGROUP_MEM_RES_CTLR
bool "Memory Resource Controller for Control Groups"
depends on CGROUPS && RESOURCE_COUNTERS
@@ -414,36 +420,68 @@ config CGROUP_MEM_RES_CTLR
sure you need the memory resource controller. Even when you enable
this, you can set "cgroup_disable=memory" at your boot option to
disable memory resource controller and you can avoid overheads.
- (and lose benefits of memory resource contoller)
+ (and lose benefits of memory resource controller)
This config option also selects MM_OWNER config option, which
could in turn add some fork/exit overhead.
+config MM_OWNER
+ bool
+
+config CGROUP_MEM_RES_CTLR_SWAP
+ bool "Memory Resource Controller Swap Extension(EXPERIMENTAL)"
+ depends on CGROUP_MEM_RES_CTLR && SWAP && EXPERIMENTAL
+ help
+ Add swap management feature to memory resource controller. When you
+ enable this, you can limit mem+swap usage per cgroup. In other words,
+ when you disable this, memory resource controller has no cares to
+ usage of swap...a process can exhaust all of the swap. This extension
+ is useful when you want to avoid exhaustion swap but this itself
+ adds more overheads and consumes memory for remembering information.
+ Especially if you use 32bit system or small memory system, please
+ be careful about enabling this. When memory resource controller
+ is disabled by boot option, this will be automatically disabled and
+ there will be no overhead from this. Even when you set this config=y,
+ if boot option "noswapaccount" is set, swap will not be accounted.
+
+
+endmenu
+
config SYSFS_DEPRECATED
bool
config SYSFS_DEPRECATED_V2
- bool "Create deprecated sysfs files"
+ bool "Create deprecated sysfs layout for older userspace tools"
depends on SYSFS
default y
select SYSFS_DEPRECATED
help
- This option creates deprecated symlinks such as the
- "device"-link, the <subsystem>:<name>-link, and the
- "bus"-link. It may also add deprecated key in the
- uevent environment.
- None of these features or values should be used today, as
- they export driver core implementation details to userspace
- or export properties which can't be kept stable across kernel
- releases.
-
- If enabled, this option will also move any device structures
- that belong to a class, back into the /sys/class hierarchy, in
- order to support older versions of udev and some userspace
- programs.
-
- If you are using a distro with the most recent userspace
- packages, it should be safe to say N here.
+ This option switches the layout of sysfs to the deprecated
+ version.
+
+ The current sysfs layout features a unified device tree at
+ /sys/devices/, which is able to express a hierarchy between
+ class devices. If the deprecated option is set to Y, the
+ unified device tree is split into a bus device tree at
+ /sys/devices/ and several individual class device trees at
+ /sys/class/. The class and bus devices will be connected by
+ "<subsystem>:<name>" and the "device" links. The "block"
+ class devices, will not show up in /sys/class/block/. Some
+ subsystems will suppress the creation of some devices which
+ depend on the unified device tree.
+
+ This option is not a pure compatibility option that can
+ be safely enabled on newer distributions. It will change the
+ layout of sysfs to the non-extensible deprecated version,
+ and disable some features, which can not be exported without
+ confusing older userspace tools. Since 2007/2008 all major
+ distributions do not enable this option, and ship no tools which
+ depend on the deprecated layout or this option.
+
+ If you are using a new kernel on an older distribution, or use
+ older userspace tools, you might need to say Y here. Do not say Y,
+ if the original kernel, that came with your distribution, has
+ this option set to N.
config PROC_PID_CPUSET
bool "Include legacy /proc/<pid>/cpuset file"
@@ -868,10 +906,6 @@ config RT_MUTEXES
boolean
select PLIST
-config TINY_SHMEM
- default !SHMEM
- bool
-
config BASE_SMALL
int
default 0 if BASE_FULL
@@ -946,14 +980,17 @@ config MODULE_SRCVERSION_ALL
the version). With this option, such a "srcversion" field
will be created for all modules. If unsure, say N.
-config KMOD
- def_bool y
- help
- This is being removed soon. These days, CONFIG_MODULES
- implies CONFIG_KMOD, so use that instead.
-
endif # MODULES
+config INIT_ALL_POSSIBLE
+ bool
+ help
+ Back when each arch used to define their own cpu_online_map and
+ cpu_possible_map, some of them chose to initialize cpu_possible_map
+ with all 1s, and others with all 0s. When they were centralised,
+ it was better to provide this option than to break all the archs
+ and have several arch maintainers persuing me down dark alleys.
+
config STOP_MACHINE
bool
default y
@@ -966,10 +1003,90 @@ source "block/Kconfig"
config PREEMPT_NOTIFIERS
bool
+choice
+ prompt "RCU Implementation"
+ default CLASSIC_RCU
+
config CLASSIC_RCU
- def_bool !PREEMPT_RCU
+ bool "Classic RCU"
help
This option selects the classic RCU implementation that is
designed for best read-side performance on non-realtime
- systems. Classic RCU is the default. Note that the
- PREEMPT_RCU symbol is used to select/deselect this option.
+ systems.
+
+ Select this option if you are unsure.
+
+config TREE_RCU
+ bool "Tree-based hierarchical RCU"
+ help
+ This option selects the RCU implementation that is
+ designed for very large SMP system with hundreds or
+ thousands of CPUs.
+
+config PREEMPT_RCU
+ bool "Preemptible RCU"
+ depends on PREEMPT
+ help
+ This option reduces the latency of the kernel by making certain
+ RCU sections preemptible. Normally RCU code is non-preemptible, if
+ this option is selected then read-only RCU sections become
+ preemptible. This helps latency, but may expose bugs due to
+ now-naive assumptions about each RCU read-side critical section
+ remaining on a given CPU through its execution.
+
+endchoice
+
+config RCU_TRACE
+ bool "Enable tracing for RCU"
+ depends on TREE_RCU || PREEMPT_RCU
+ help
+ This option provides tracing in RCU which presents stats
+ in debugfs for debugging RCU implementation.
+
+ Say Y here if you want to enable RCU tracing
+ Say N if you are unsure.
+
+config RCU_FANOUT
+ int "Tree-based hierarchical RCU fanout value"
+ range 2 64 if 64BIT
+ range 2 32 if !64BIT
+ depends on TREE_RCU
+ default 64 if 64BIT
+ default 32 if !64BIT
+ help
+ This option controls the fanout of hierarchical implementations
+ of RCU, allowing RCU to work efficiently on machines with
+ large numbers of CPUs. This value must be at least the cube
+ root of NR_CPUS, which allows NR_CPUS up to 32,768 for 32-bit
+ systems and up to 262,144 for 64-bit systems.
+
+ Select a specific number if testing RCU itself.
+ Take the default if unsure.
+
+config RCU_FANOUT_EXACT
+ bool "Disable tree-based hierarchical RCU auto-balancing"
+ depends on TREE_RCU
+ default n
+ help
+ This option forces use of the exact RCU_FANOUT value specified,
+ regardless of imbalances in the hierarchy. This is useful for
+ testing RCU itself, and might one day be useful on systems with
+ strong NUMA behavior.
+
+ Without RCU_FANOUT_EXACT, the code will balance the hierarchy.
+
+ Say N if unsure.
+
+config TREE_RCU_TRACE
+ def_bool RCU_TRACE && TREE_RCU
+ select DEBUG_FS
+ help
+ This option provides tracing for the TREE_RCU implementation,
+ permitting Makefile to trivially select kernel/rcutree_trace.c.
+
+config PREEMPT_RCU_TRACE
+ def_bool RCU_TRACE && PREEMPT_RCU
+ select DEBUG_FS
+ help
+ This option provides tracing for the PREEMPT_RCU implementation,
+ permitting Makefile to trivially select kernel/rcupreempt_trace.c.
diff --git a/init/do_mounts.c b/init/do_mounts.c
index d055b1914c3d..708105e163df 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -13,6 +13,7 @@
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/initrd.h>
+#include <linux/async.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_fs_sb.h>
@@ -220,10 +221,10 @@ static int __init do_mount_root(char *name, char *fs, int flags, void *data)
sys_chdir("/root");
ROOT_DEV = current->fs->pwd.mnt->mnt_sb->s_dev;
- printk("VFS: Mounted root (%s filesystem)%s.\n",
+ printk("VFS: Mounted root (%s filesystem)%s on device %u:%u.\n",
current->fs->pwd.mnt->mnt_sb->s_type->name,
current->fs->pwd.mnt->mnt_sb->s_flags & MS_RDONLY ?
- " readonly" : "");
+ " readonly" : "", MAJOR(ROOT_DEV), MINOR(ROOT_DEV));
return 0;
}
@@ -372,6 +373,7 @@ void __init prepare_namespace(void)
/* wait for the known devices to complete their probing */
while (driver_probe_done() != 0)
msleep(100);
+ async_synchronize_full();
md_run_setup();
diff --git a/init/do_mounts_md.c b/init/do_mounts_md.c
index d6da5cdd3c38..ff95e3192884 100644
--- a/init/do_mounts_md.c
+++ b/init/do_mounts_md.c
@@ -271,7 +271,7 @@ static int __init raid_setup(char *str)
__setup("raid=", raid_setup);
__setup("md=", md_setup);
-static void autodetect_raid(void)
+static void __init autodetect_raid(void)
{
int fd;
diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c
index a7c748fa977a..0f0f0cf3ba9a 100644
--- a/init/do_mounts_rd.c
+++ b/init/do_mounts_rd.c
@@ -9,6 +9,7 @@
#include <linux/string.h>
#include "do_mounts.h"
+#include "../fs/squashfs/squashfs_fs.h"
int __initdata rd_prompt = 1;/* 1 = prompt for RAM disk, 0 = don't prompt */
@@ -41,6 +42,7 @@ static int __init crd_load(int in_fd, int out_fd);
* ext2
* romfs
* cramfs
+ * squashfs
* gzip
*/
static int __init
@@ -51,6 +53,7 @@ identify_ramdisk_image(int fd, int start_block)
struct ext2_super_block *ext2sb;
struct romfs_super_block *romfsb;
struct cramfs_super *cramfsb;
+ struct squashfs_super_block *squashfsb;
int nblocks = -1;
unsigned char *buf;
@@ -62,6 +65,7 @@ identify_ramdisk_image(int fd, int start_block)
ext2sb = (struct ext2_super_block *) buf;
romfsb = (struct romfs_super_block *) buf;
cramfsb = (struct cramfs_super *) buf;
+ squashfsb = (struct squashfs_super_block *) buf;
memset(buf, 0xe5, size);
/*
@@ -99,6 +103,16 @@ identify_ramdisk_image(int fd, int start_block)
goto done;
}
+ /* squashfs is at block zero too */
+ if (le32_to_cpu(squashfsb->s_magic) == SQUASHFS_MAGIC) {
+ printk(KERN_NOTICE
+ "RAMDISK: squashfs filesystem found at block %d\n",
+ start_block);
+ nblocks = (le64_to_cpu(squashfsb->bytes_used) + BLOCK_SIZE - 1)
+ >> BLOCK_SIZE_BITS;
+ goto done;
+ }
+
/*
* Read block 1 to test for minix and ext2 superblock
*/
diff --git a/init/initramfs.c b/init/initramfs.c
index 4f5ba75aaa7c..d9c941c0c3ca 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -317,6 +317,7 @@ static int __init do_name(void)
if (wfd >= 0) {
sys_fchown(wfd, uid, gid);
sys_fchmod(wfd, mode);
+ sys_ftruncate(wfd, body_len);
vcollected = kstrdup(collected, GFP_KERNEL);
state = CopyFile;
}
diff --git a/init/main.c b/init/main.c
index 17e9757bfde2..844209453c02 100644
--- a/init/main.c
+++ b/init/main.c
@@ -50,7 +50,6 @@
#include <linux/rmap.h>
#include <linux/mempolicy.h>
#include <linux/key.h>
-#include <linux/unwind.h>
#include <linux/buffer_head.h>
#include <linux/page_cgroup.h>
#include <linux/debug_locks.h>
@@ -63,6 +62,7 @@
#include <linux/signal.h>
#include <linux/idr.h>
#include <linux/ftrace.h>
+#include <linux/async.h>
#include <trace/boot.h>
#include <asm/io.h>
@@ -75,15 +75,6 @@
#include <asm/smp.h>
#endif
-/*
- * This is one of the first .c files built. Error out early if we have compiler
- * trouble.
- */
-
-#if __GNUC__ == 4 && __GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL__ == 0
-#warning gcc-4.1.0 is known to miscompile the kernel. A different compiler version is recommended.
-#endif
-
static int kernel_init(void *);
extern void init_IRQ(void);
@@ -117,7 +108,7 @@ EXPORT_SYMBOL(system_state);
extern void time_init(void);
/* Default late time init is NULL. archs can override this later. */
-void (*late_time_init)(void);
+void (*__initdata late_time_init)(void);
extern void softirq_init(void);
/* Untouched command line saved by arch-specific code. */
@@ -380,12 +371,7 @@ EXPORT_SYMBOL(nr_cpu_ids);
/* An arch may set nr_cpu_ids earlier if needed, so this would be redundant */
static void __init setup_nr_cpu_ids(void)
{
- int cpu, highest_cpu = 0;
-
- for_each_possible_cpu(cpu)
- highest_cpu = cpu;
-
- nr_cpu_ids = highest_cpu + 1;
+ nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1;
}
#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
@@ -461,7 +447,7 @@ static void __init setup_command_line(char *command_line)
* gcc-3.4 accidentally inlines this function, so use noinline.
*/
-static void noinline __init_refok rest_init(void)
+static noinline void __init_refok rest_init(void)
__releases(kernel_lock)
{
int pid;
@@ -527,9 +513,9 @@ static void __init boot_cpu_init(void)
{
int cpu = smp_processor_id();
/* Mark the boot cpu "present", "online" etc for SMP and UP case */
- cpu_set(cpu, cpu_online_map);
- cpu_set(cpu, cpu_present_map);
- cpu_set(cpu, cpu_possible_map);
+ set_cpu_online(cpu, true);
+ set_cpu_present(cpu, true);
+ set_cpu_possible(cpu, true);
}
void __init __weak smp_setup_processor_id(void)
@@ -551,7 +537,6 @@ asmlinkage void __init start_kernel(void)
* Need to run as early as possible, to initialize the
* lockdep hash:
*/
- unwind_init();
lockdep_init();
debug_objects_early_init();
cgroup_init_early();
@@ -573,7 +558,6 @@ asmlinkage void __init start_kernel(void)
setup_arch(&command_line);
mm_init_owner(&init_mm, &init_task);
setup_command_line(command_line);
- unwind_setup();
setup_per_cpu_areas();
setup_nr_cpu_ids();
smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */
@@ -604,6 +588,8 @@ asmlinkage void __init start_kernel(void)
sort_main_extable();
trap_init();
rcu_init();
+ /* init some links before init_ISA_irqs() */
+ early_irq_init();
init_IRQ();
pidhash_init();
init_timers();
@@ -614,7 +600,8 @@ asmlinkage void __init start_kernel(void)
sched_clock_init();
profile_init();
if (!irqs_disabled())
- printk("start_kernel(): bug: interrupts were enabled early\n");
+ printk(KERN_CRIT "start_kernel(): bug: interrupts were "
+ "enabled early\n");
early_boot_irqs_on();
local_irq_enable();
@@ -699,7 +686,7 @@ asmlinkage void __init start_kernel(void)
rest_init();
}
-static int initcall_debug;
+int initcall_debug;
core_param(initcall_debug, initcall_debug, bool, 0644);
int do_one_initcall(initcall_t fn)
@@ -798,8 +785,10 @@ static void run_init_process(char *init_filename)
/* This is a non __init function. Force it to be noinline otherwise gcc
* makes it inline to init() and it becomes part of init.text section
*/
-static int noinline init_post(void)
+static noinline int init_post(void)
{
+ /* need to finish all async __init code before freeing the memory */
+ async_synchronize_full();
free_initmem();
unlock_kernel();
mark_rodata_ro();