From 8a25a2fd126c621f44f3aeaef80d51f00fc11639 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Wed, 21 Dec 2011 14:29:42 -0800 Subject: cpu: convert 'cpu' and 'machinecheck' sysdev_class to a regular subsystem This moves the 'cpu sysdev_class' over to a regular 'cpu' subsystem and converts the devices to regular devices. The sysdev drivers are implemented as subsystem interfaces now. After all sysdev classes are ported to regular driver core entities, the sysdev implementation will be entirely removed from the kernel. Userspace relies on events and generic sysfs subsystem infrastructure from sysdev devices, which are made available with this conversion. Cc: Haavard Skinnemoen Cc: Hans-Christian Egtvedt Cc: Tony Luck Cc: Fenghua Yu Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Paul Mundt Cc: "David S. Miller" Cc: Chris Metcalf Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Borislav Petkov Cc: Tigran Aivazian Cc: Len Brown Cc: Zhang Rui Cc: Dave Jones Cc: Peter Zijlstra Cc: Russell King Cc: Andrew Morton Cc: Arjan van de Ven Cc: "Rafael J. Wysocki" Cc: "Srivatsa S. Bhat" Signed-off-by: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- drivers/base/node.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/base/node.c') diff --git a/drivers/base/node.c b/drivers/base/node.c index 793f796c4da3..6ce1501c7de5 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -315,12 +315,12 @@ struct node node_devices[MAX_NUMNODES]; int register_cpu_under_node(unsigned int cpu, unsigned int nid) { int ret; - struct sys_device *obj; + struct device *obj; if (!node_online(nid)) return 0; - obj = get_cpu_sysdev(cpu); + obj = get_cpu_device(cpu); if (!obj) return 0; @@ -337,12 +337,12 @@ int register_cpu_under_node(unsigned int cpu, unsigned int nid) int unregister_cpu_under_node(unsigned int cpu, unsigned int nid) { - struct sys_device *obj; + struct device *obj; if (!node_online(nid)) return 0; - obj = get_cpu_sysdev(cpu); + obj = get_cpu_device(cpu); if (!obj) return 0; -- cgit v1.2.3 From 10fbcf4c6cb122005cdf36fc24d7683da92c7a27 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Wed, 21 Dec 2011 14:48:43 -0800 Subject: convert 'memory' sysdev_class to a regular subsystem This moves the 'memory sysdev_class' over to a regular 'memory' subsystem and converts the devices to regular devices. The sysdev drivers are implemented as subsystem interfaces now. After all sysdev classes are ported to regular driver core entities, the sysdev implementation will be entirely removed from the kernel. Signed-off-by: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- drivers/base/node.c | 146 +++++++++++++++++++++++++++------------------------- 1 file changed, 76 insertions(+), 70 deletions(-) (limited to 'drivers/base/node.c') diff --git a/drivers/base/node.c b/drivers/base/node.c index 6ce1501c7de5..996d2189689b 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -1,8 +1,7 @@ /* - * drivers/base/node.c - basic Node class support + * Basic Node interface support */ -#include #include #include #include @@ -19,18 +18,16 @@ #include #include -static struct sysdev_class_attribute *node_state_attrs[]; - -static struct sysdev_class node_class = { +static struct bus_type node_subsys = { .name = "node", - .attrs = node_state_attrs, + .dev_name = "node", }; -static ssize_t node_read_cpumap(struct sys_device *dev, int type, char *buf) +static ssize_t node_read_cpumap(struct device *dev, int type, char *buf) { struct node *node_dev = to_node(dev); - const struct cpumask *mask = cpumask_of_node(node_dev->sysdev.id); + const struct cpumask *mask = cpumask_of_node(node_dev->dev.id); int len; /* 2008/04/07: buf currently PAGE_SIZE, need 9 chars per 32 bits. */ @@ -44,23 +41,23 @@ static ssize_t node_read_cpumap(struct sys_device *dev, int type, char *buf) return len; } -static inline ssize_t node_read_cpumask(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static inline ssize_t node_read_cpumask(struct device *dev, + struct device_attribute *attr, char *buf) { return node_read_cpumap(dev, 0, buf); } -static inline ssize_t node_read_cpulist(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static inline ssize_t node_read_cpulist(struct device *dev, + struct device_attribute *attr, char *buf) { return node_read_cpumap(dev, 1, buf); } -static SYSDEV_ATTR(cpumap, S_IRUGO, node_read_cpumask, NULL); -static SYSDEV_ATTR(cpulist, S_IRUGO, node_read_cpulist, NULL); +static DEVICE_ATTR(cpumap, S_IRUGO, node_read_cpumask, NULL); +static DEVICE_ATTR(cpulist, S_IRUGO, node_read_cpulist, NULL); #define K(x) ((x) << (PAGE_SHIFT - 10)) -static ssize_t node_read_meminfo(struct sys_device * dev, - struct sysdev_attribute *attr, char * buf) +static ssize_t node_read_meminfo(struct device *dev, + struct device_attribute *attr, char *buf) { int n; int nid = dev->id; @@ -155,10 +152,10 @@ static ssize_t node_read_meminfo(struct sys_device * dev, } #undef K -static SYSDEV_ATTR(meminfo, S_IRUGO, node_read_meminfo, NULL); +static DEVICE_ATTR(meminfo, S_IRUGO, node_read_meminfo, NULL); -static ssize_t node_read_numastat(struct sys_device * dev, - struct sysdev_attribute *attr, char * buf) +static ssize_t node_read_numastat(struct device *dev, + struct device_attribute *attr, char *buf) { return sprintf(buf, "numa_hit %lu\n" @@ -174,10 +171,10 @@ static ssize_t node_read_numastat(struct sys_device * dev, node_page_state(dev->id, NUMA_LOCAL), node_page_state(dev->id, NUMA_OTHER)); } -static SYSDEV_ATTR(numastat, S_IRUGO, node_read_numastat, NULL); +static DEVICE_ATTR(numastat, S_IRUGO, node_read_numastat, NULL); -static ssize_t node_read_vmstat(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +static ssize_t node_read_vmstat(struct device *dev, + struct device_attribute *attr, char *buf) { int nid = dev->id; int i; @@ -189,10 +186,10 @@ static ssize_t node_read_vmstat(struct sys_device *dev, return n; } -static SYSDEV_ATTR(vmstat, S_IRUGO, node_read_vmstat, NULL); +static DEVICE_ATTR(vmstat, S_IRUGO, node_read_vmstat, NULL); -static ssize_t node_read_distance(struct sys_device * dev, - struct sysdev_attribute *attr, char * buf) +static ssize_t node_read_distance(struct device *dev, + struct device_attribute *attr, char * buf) { int nid = dev->id; int len = 0; @@ -210,7 +207,7 @@ static ssize_t node_read_distance(struct sys_device * dev, len += sprintf(buf + len, "\n"); return len; } -static SYSDEV_ATTR(distance, S_IRUGO, node_read_distance, NULL); +static DEVICE_ATTR(distance, S_IRUGO, node_read_distance, NULL); #ifdef CONFIG_HUGETLBFS /* @@ -228,7 +225,7 @@ static node_registration_func_t __hugetlb_unregister_node; static inline bool hugetlb_register_node(struct node *node) { if (__hugetlb_register_node && - node_state(node->sysdev.id, N_HIGH_MEMORY)) { + node_state(node->dev.id, N_HIGH_MEMORY)) { __hugetlb_register_node(node); return true; } @@ -264,17 +261,17 @@ int register_node(struct node *node, int num, struct node *parent) { int error; - node->sysdev.id = num; - node->sysdev.cls = &node_class; - error = sysdev_register(&node->sysdev); + node->dev.id = num; + node->dev.bus = &node_subsys; + error = device_register(&node->dev); if (!error){ - sysdev_create_file(&node->sysdev, &attr_cpumap); - sysdev_create_file(&node->sysdev, &attr_cpulist); - sysdev_create_file(&node->sysdev, &attr_meminfo); - sysdev_create_file(&node->sysdev, &attr_numastat); - sysdev_create_file(&node->sysdev, &attr_distance); - sysdev_create_file(&node->sysdev, &attr_vmstat); + device_create_file(&node->dev, &dev_attr_cpumap); + device_create_file(&node->dev, &dev_attr_cpulist); + device_create_file(&node->dev, &dev_attr_meminfo); + device_create_file(&node->dev, &dev_attr_numastat); + device_create_file(&node->dev, &dev_attr_distance); + device_create_file(&node->dev, &dev_attr_vmstat); scan_unevictable_register_node(node); @@ -294,17 +291,17 @@ int register_node(struct node *node, int num, struct node *parent) */ void unregister_node(struct node *node) { - sysdev_remove_file(&node->sysdev, &attr_cpumap); - sysdev_remove_file(&node->sysdev, &attr_cpulist); - sysdev_remove_file(&node->sysdev, &attr_meminfo); - sysdev_remove_file(&node->sysdev, &attr_numastat); - sysdev_remove_file(&node->sysdev, &attr_distance); - sysdev_remove_file(&node->sysdev, &attr_vmstat); + device_remove_file(&node->dev, &dev_attr_cpumap); + device_remove_file(&node->dev, &dev_attr_cpulist); + device_remove_file(&node->dev, &dev_attr_meminfo); + device_remove_file(&node->dev, &dev_attr_numastat); + device_remove_file(&node->dev, &dev_attr_distance); + device_remove_file(&node->dev, &dev_attr_vmstat); scan_unevictable_unregister_node(node); hugetlb_unregister_node(node); /* no-op, if memoryless node */ - sysdev_unregister(&node->sysdev); + device_unregister(&node->dev); } struct node node_devices[MAX_NUMNODES]; @@ -324,15 +321,15 @@ int register_cpu_under_node(unsigned int cpu, unsigned int nid) if (!obj) return 0; - ret = sysfs_create_link(&node_devices[nid].sysdev.kobj, + ret = sysfs_create_link(&node_devices[nid].dev.kobj, &obj->kobj, kobject_name(&obj->kobj)); if (ret) return ret; return sysfs_create_link(&obj->kobj, - &node_devices[nid].sysdev.kobj, - kobject_name(&node_devices[nid].sysdev.kobj)); + &node_devices[nid].dev.kobj, + kobject_name(&node_devices[nid].dev.kobj)); } int unregister_cpu_under_node(unsigned int cpu, unsigned int nid) @@ -346,10 +343,10 @@ int unregister_cpu_under_node(unsigned int cpu, unsigned int nid) if (!obj) return 0; - sysfs_remove_link(&node_devices[nid].sysdev.kobj, + sysfs_remove_link(&node_devices[nid].dev.kobj, kobject_name(&obj->kobj)); sysfs_remove_link(&obj->kobj, - kobject_name(&node_devices[nid].sysdev.kobj)); + kobject_name(&node_devices[nid].dev.kobj)); return 0; } @@ -391,15 +388,15 @@ int register_mem_sect_under_node(struct memory_block *mem_blk, int nid) continue; if (page_nid != nid) continue; - ret = sysfs_create_link_nowarn(&node_devices[nid].sysdev.kobj, - &mem_blk->sysdev.kobj, - kobject_name(&mem_blk->sysdev.kobj)); + ret = sysfs_create_link_nowarn(&node_devices[nid].dev.kobj, + &mem_blk->dev.kobj, + kobject_name(&mem_blk->dev.kobj)); if (ret) return ret; - return sysfs_create_link_nowarn(&mem_blk->sysdev.kobj, - &node_devices[nid].sysdev.kobj, - kobject_name(&node_devices[nid].sysdev.kobj)); + return sysfs_create_link_nowarn(&mem_blk->dev.kobj, + &node_devices[nid].dev.kobj, + kobject_name(&node_devices[nid].dev.kobj)); } /* mem section does not span the specified node */ return 0; @@ -432,10 +429,10 @@ int unregister_mem_sect_under_nodes(struct memory_block *mem_blk, continue; if (node_test_and_set(nid, *unlinked_nodes)) continue; - sysfs_remove_link(&node_devices[nid].sysdev.kobj, - kobject_name(&mem_blk->sysdev.kobj)); - sysfs_remove_link(&mem_blk->sysdev.kobj, - kobject_name(&node_devices[nid].sysdev.kobj)); + sysfs_remove_link(&node_devices[nid].dev.kobj, + kobject_name(&mem_blk->dev.kobj)); + sysfs_remove_link(&mem_blk->dev.kobj, + kobject_name(&node_devices[nid].dev.kobj)); } NODEMASK_FREE(unlinked_nodes); return 0; @@ -466,7 +463,7 @@ static int link_mem_sections(int nid) } if (mem_blk) - kobject_put(&mem_blk->sysdev.kobj); + kobject_put(&mem_blk->dev.kobj); return err; } @@ -594,19 +591,19 @@ static ssize_t print_nodes_state(enum node_states state, char *buf) } struct node_attr { - struct sysdev_class_attribute attr; + struct device_attribute attr; enum node_states state; }; -static ssize_t show_node_state(struct sysdev_class *class, - struct sysdev_class_attribute *attr, char *buf) +static ssize_t show_node_state(struct device *dev, + struct device_attribute *attr, char *buf) { struct node_attr *na = container_of(attr, struct node_attr, attr); return print_nodes_state(na->state, buf); } #define _NODE_ATTR(name, state) \ - { _SYSDEV_CLASS_ATTR(name, 0444, show_node_state, NULL), state } + { __ATTR(name, 0444, show_node_state, NULL), state } static struct node_attr node_state_attr[] = { _NODE_ATTR(possible, N_POSSIBLE), @@ -618,17 +615,26 @@ static struct node_attr node_state_attr[] = { #endif }; -static struct sysdev_class_attribute *node_state_attrs[] = { - &node_state_attr[0].attr, - &node_state_attr[1].attr, - &node_state_attr[2].attr, - &node_state_attr[3].attr, +static struct attribute *node_state_attrs[] = { + &node_state_attr[0].attr.attr, + &node_state_attr[1].attr.attr, + &node_state_attr[2].attr.attr, + &node_state_attr[3].attr.attr, #ifdef CONFIG_HIGHMEM - &node_state_attr[4].attr, + &node_state_attr[4].attr.attr, #endif NULL }; +static struct attribute_group memory_root_attr_group = { + .attrs = node_state_attrs, +}; + +static const struct attribute_group *cpu_root_attr_groups[] = { + &memory_root_attr_group, + NULL, +}; + #define NODE_CALLBACK_PRI 2 /* lower than SLAB */ static int __init register_node_type(void) { @@ -637,7 +643,7 @@ static int __init register_node_type(void) BUILD_BUG_ON(ARRAY_SIZE(node_state_attr) != NR_NODE_STATES); BUILD_BUG_ON(ARRAY_SIZE(node_state_attrs)-1 != NR_NODE_STATES); - ret = sysdev_class_register(&node_class); + ret = subsys_system_register(&node_subsys, cpu_root_attr_groups); if (!ret) { hotplug_memory_notifier(node_memory_callback, NODE_CALLBACK_PRI); -- cgit v1.2.3 From 321bf4ed5ff5f7c62ef59f33b7eec5b154391f0a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 30 Jan 2012 13:57:12 -0800 Subject: drivers/base/memory.c: fix memory_dev_init() long delay One system with 2048g ram, reported soft lockup on recent kernel. [ 34.426749] cpu_dev_init done [ 61.166399] BUG: soft lockup - CPU#0 stuck for 22s! [swapper/0:1] [ 61.166733] Modules linked in: [ 61.166904] irq event stamp: 1935610 [ 61.178431] hardirqs last enabled at (1935609): [] mutex_lock_nested+0x299/0x2b4 [ 61.178923] hardirqs last disabled at (1935610): [] apic_timer_interrupt+0x6b/0x80 [ 61.198767] softirqs last enabled at (1935476): [] __do_softirq+0x195/0x1ab [ 61.218604] softirqs last disabled at (1935471): [] call_softirq+0x1c/0x30 [ 61.238408] CPU 0 [ 61.238549] Modules linked in: [ 61.238744] [ 61.238825] Pid: 1, comm: swapper/0 Not tainted 3.3.0-rc1-tip-yh-02076-g962f689-dirty #171 [ 61.278212] RIP: 0010:[] [] lock_release+0x90/0x9c [ 61.278627] RSP: 0018:ffff883f64dbfd70 EFLAGS: 00000246 [ 61.298287] RAX: ffff883f64dc0000 RBX: 0000000000000000 RCX: 000000000000008b [ 61.298690] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 [ 61.318383] RBP: ffff883f64dbfda0 R08: 0000000000000001 R09: 000000000000008b [ 61.338215] R10: 0000000000000000 R11: 0000000000000000 R12: ffff883f64dbfd10 [ 61.338610] R13: ffff883f64dc0708 R14: ffff883f64dc0708 R15: ffffffff81095657 [ 61.358299] FS: 0000000000000000(0000) GS:ffff883f7d600000(0000) knlGS:0000000000000000 [ 61.378118] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 61.378450] CR2: 0000000000000000 CR3: 00000000024af000 CR4: 00000000000007f0 [ 61.398144] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 61.417918] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 61.418260] Process swapper/0 (pid: 1, threadinfo ffff883f64dbe000, task ffff883f64dc0000) [ 61.445358] Stack: [ 61.445511] 0000000000000002 ffff897f649ba168 ffff883f64dbfe10 ffff88ff64bb57a8 [ 61.458040] 0000000000000000 0000000000000000 ffff883f64dbfdc0 ffffffff81ceb1b4 [ 61.458491] 000000000011608c ffff88ff64bb58a8 ffff883f64dbfdf0 ffffffff81c57638 [ 61.478215] Call Trace: [ 61.478367] [] _raw_spin_unlock+0x21/0x2e [ 61.497994] [] klist_next+0x9e/0xbc [ 61.498264] [] next_device+0xe/0x1e [ 61.517867] [] subsys_find_device_by_id+0xb7/0xd6 [ 61.518197] [] find_memory_block_hinted+0x3d/0x66 [ 61.537927] [] find_memory_block+0x10/0x12 [ 61.538193] [] add_memory_section+0x35/0x9e [ 61.557932] [] memory_dev_init+0x68/0xda [ 61.558227] [] driver_init+0x97/0xa7 [ 61.577853] [] kernel_init+0xf6/0x1c0 [ 61.578140] [] kernel_thread_helper+0x4/0x10 [ 61.597850] [] ? retint_restore_args+0xe/0xe [ 61.598144] [] ? start_kernel+0x3ab/0x3ab [ 61.617826] [] ? gs_change+0xb/0xb [ 61.618060] Code: 10 48 83 3b 00 eb e8 4c 89 f2 44 89 fe 4c 89 ef e8 e1 fe ff ff 65 48 8b 04 25 40 bc 00 00 c7 80 cc 06 00 00 00 00 00 00 41 54 9d <5e> 5b 41 5c 41 5d 41 5e 41 5f 5d c3 55 48 89 e5 41 57 41 89 cf [ 89.285380] memory_dev_init done Finally it takes about 55s to create 16400 memory entries. Root cause: for x86_64, 2048g (with 2g hole at [2g,4g), and TOP2 will be 2050g), will have 16400 memory block. find_memory_block/subsys_find_device_by_id will be expensive with that many entries. Actually, we don't need to find that memory block for BOOT path. Skip that finding make it get back to normal. [ 34.466696] cpu_dev_init done [ 35.290080] memory_dev_init done Also solved the delay with topology_init when sections_per_block is not 1. Signed-off-by: Yinghai Lu Cc: Kay Sievers Cc: Nathan Fontenot Cc: Robin Holt Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- drivers/base/node.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers/base/node.c') diff --git a/drivers/base/node.c b/drivers/base/node.c index 44f427a66117..90aa2a11a933 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -456,7 +456,15 @@ static int link_mem_sections(int nid) if (!present_section_nr(section_nr)) continue; mem_sect = __nr_to_section(section_nr); + + /* same memblock ? */ + if (mem_blk) + if ((section_nr >= mem_blk->start_section_nr) && + (section_nr <= mem_blk->end_section_nr)) + continue; + mem_blk = find_memory_block_hinted(mem_sect, mem_blk); + ret = register_mem_sect_under_node(mem_blk, nid); if (!err) err = ret; -- cgit v1.2.3