diff options
author | Linus Torvalds <torvalds@woody.osdl.org> | 2006-12-08 11:21:55 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.osdl.org> | 2006-12-08 11:21:55 -0800 |
commit | 88032b322a38b37335c8cb2e3473a45c81d280eb (patch) | |
tree | cd722ab15b18a10f6b1aa50656b8da713ee8b799 /arch/s390 | |
parent | 63f3861d2fbf8ccbad1386ac9ac8b822c036ea00 (diff) | |
parent | 028d9b3cc62cb9dd31f1b5929edb3c23612cfccc (diff) |
Merge branch 'for-linus' of git://git390.osdl.marist.edu/pub/scm/linux-2.6
* 'for-linus' of git://git390.osdl.marist.edu/pub/scm/linux-2.6:
[S390] Poison init section before freeing it.
[S390] Use add_active_range() and free_area_init_nodes().
[S390] Virtual memmap for s390.
[S390] Update documentation for dynamic subchannel mapping.
[S390] Use dev->groups for adding/removing the subchannel attribute group.
[S390] Support for disconnected devices reappearing on another subchannel.
[S390] subchannel lock conversion.
[S390] Some preparations for the dynamic subchannel mapping patch.
[S390] runtime switch for qdio performance statistics
[S390] New DASD feature for ERP related logging
[S390] add reset call handler to the ap bus.
[S390] more workqueue fixes.
[S390] workqueue fixes.
[S390] uaccess_pt: add missing down_read() and convert to is_init().
Diffstat (limited to 'arch/s390')
-rw-r--r-- | arch/s390/Kconfig | 14 | ||||
-rw-r--r-- | arch/s390/defconfig | 1 | ||||
-rw-r--r-- | arch/s390/kernel/setup.c | 55 | ||||
-rw-r--r-- | arch/s390/lib/uaccess_pt.c | 5 | ||||
-rw-r--r-- | arch/s390/mm/Makefile | 2 | ||||
-rw-r--r-- | arch/s390/mm/extmem.c | 106 | ||||
-rw-r--r-- | arch/s390/mm/init.c | 184 | ||||
-rw-r--r-- | arch/s390/mm/vmem.c | 381 |
8 files changed, 477 insertions, 271 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 45e47bfb68a9..ff690564edbd 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -241,8 +241,14 @@ config WARN_STACK_SIZE This allows you to specify the maximum frame size a function may have without the compiler complaining about it. +config ARCH_POPULATES_NODE_MAP + def_bool y + source "mm/Kconfig" +config HOLES_IN_ZONE + def_bool y + comment "I/O subsystem configuration" config MACHCHK_WARNING @@ -266,14 +272,6 @@ config QDIO If unsure, say Y. -config QDIO_PERF_STATS - bool "Performance statistics in /proc" - depends on QDIO - help - Say Y here to get performance statistics in /proc/qdio_perf - - If unsure, say N. - config QDIO_DEBUG bool "Extended debugging information" depends on QDIO diff --git a/arch/s390/defconfig b/arch/s390/defconfig index 7cd51e73e274..a6ec919ba83f 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -134,7 +134,6 @@ CONFIG_RESOURCES_64BIT=y # CONFIG_MACHCHK_WARNING=y CONFIG_QDIO=y -# CONFIG_QDIO_PERF_STATS is not set # CONFIG_QDIO_DEBUG is not set # diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index b928fecdc743..49ef206ec880 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -64,9 +64,8 @@ unsigned int console_devno = -1; unsigned int console_irq = -1; unsigned long machine_flags = 0; -struct mem_chunk memory_chunk[MEMORY_CHUNKS]; +struct mem_chunk __initdata memory_chunk[MEMORY_CHUNKS]; volatile int __cpu_logical_map[NR_CPUS]; /* logical cpu to cpu address */ -unsigned long __initdata zholes_size[MAX_NR_ZONES]; static unsigned long __initdata memory_end; /* @@ -354,21 +353,6 @@ void machine_power_off(void) */ void (*pm_power_off)(void) = machine_power_off; -static void __init -add_memory_hole(unsigned long start, unsigned long end) -{ - unsigned long dma_pfn = MAX_DMA_ADDRESS >> PAGE_SHIFT; - - if (end <= dma_pfn) - zholes_size[ZONE_DMA] += end - start + 1; - else if (start > dma_pfn) - zholes_size[ZONE_NORMAL] += end - start + 1; - else { - zholes_size[ZONE_DMA] += dma_pfn - start + 1; - zholes_size[ZONE_NORMAL] += end - dma_pfn; - } -} - static int __init early_parse_mem(char *p) { memory_end = memparse(p, &p); @@ -521,7 +505,6 @@ setup_memory(void) { unsigned long bootmap_size; unsigned long start_pfn, end_pfn, init_pfn; - unsigned long last_rw_end; int i; /* @@ -577,39 +560,27 @@ setup_memory(void) /* * Register RAM areas with the bootmem allocator. */ - last_rw_end = start_pfn; for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { - unsigned long start_chunk, end_chunk; + unsigned long start_chunk, end_chunk, pfn; if (memory_chunk[i].type != CHUNK_READ_WRITE) continue; - start_chunk = (memory_chunk[i].addr + PAGE_SIZE - 1); - start_chunk >>= PAGE_SHIFT; - end_chunk = (memory_chunk[i].addr + memory_chunk[i].size); - end_chunk >>= PAGE_SHIFT; - if (start_chunk < start_pfn) - start_chunk = start_pfn; - if (end_chunk > end_pfn) - end_chunk = end_pfn; - if (start_chunk < end_chunk) { - /* Initialize storage key for RAM pages */ - for (init_pfn = start_chunk ; init_pfn < end_chunk; - init_pfn++) - page_set_storage_key(init_pfn << PAGE_SHIFT, - PAGE_DEFAULT_KEY); - free_bootmem(start_chunk << PAGE_SHIFT, - (end_chunk - start_chunk) << PAGE_SHIFT); - if (last_rw_end < start_chunk) - add_memory_hole(last_rw_end, start_chunk - 1); - last_rw_end = end_chunk; - } + start_chunk = PFN_DOWN(memory_chunk[i].addr); + end_chunk = start_chunk + PFN_DOWN(memory_chunk[i].size) - 1; + end_chunk = min(end_chunk, end_pfn); + if (start_chunk >= end_chunk) + continue; + add_active_range(0, start_chunk, end_chunk); + pfn = max(start_chunk, start_pfn); + for (; pfn <= end_chunk; pfn++) + page_set_storage_key(PFN_PHYS(pfn), PAGE_DEFAULT_KEY); } psw_set_key(PAGE_DEFAULT_KEY); - if (last_rw_end < end_pfn - 1) - add_memory_hole(last_rw_end, end_pfn - 1); + free_bootmem_with_active_regions(0, max_pfn); + reserve_bootmem(0, PFN_PHYS(start_pfn)); /* * Reserve the bootmem bitmap itself as well. We do this in two diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c index 8741bdc09299..633249c3ba91 100644 --- a/arch/s390/lib/uaccess_pt.c +++ b/arch/s390/lib/uaccess_pt.c @@ -8,8 +8,8 @@ */ #include <linux/errno.h> -#include <asm/uaccess.h> #include <linux/mm.h> +#include <asm/uaccess.h> #include <asm/futex.h> static inline int __handle_fault(struct mm_struct *mm, unsigned long address, @@ -60,8 +60,9 @@ out: out_of_memory: up_read(&mm->mmap_sem); - if (current->pid == 1) { + if (is_init(current)) { yield(); + down_read(&mm->mmap_sem); goto survive; } printk("VM: killing process %s\n", current->comm); diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index aa9a42b6e62d..8e09db1edbb9 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -2,6 +2,6 @@ # Makefile for the linux s390-specific parts of the memory manager. # -obj-y := init.o fault.o ioremap.o extmem.o mmap.o +obj-y := init.o fault.o ioremap.o extmem.o mmap.o vmem.o obj-$(CONFIG_CMM) += cmm.o diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index 9e9bc48463a5..775bf19e742b 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -16,6 +16,7 @@ #include <linux/bootmem.h> #include <linux/ctype.h> #include <asm/page.h> +#include <asm/pgtable.h> #include <asm/ebcdic.h> #include <asm/errno.h> #include <asm/extmem.h> @@ -238,65 +239,6 @@ query_segment_type (struct dcss_segment *seg) } /* - * check if the given segment collides with guest storage. - * returns 1 if this is the case, 0 if no collision was found - */ -static int -segment_overlaps_storage(struct dcss_segment *seg) -{ - int i; - - for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { - if (memory_chunk[i].type != CHUNK_READ_WRITE) - continue; - if ((memory_chunk[i].addr >> 20) > (seg->end >> 20)) - continue; - if (((memory_chunk[i].addr + memory_chunk[i].size - 1) >> 20) - < (seg->start_addr >> 20)) - continue; - return 1; - } - return 0; -} - -/* - * check if segment collides with other segments that are currently loaded - * returns 1 if this is the case, 0 if no collision was found - */ -static int -segment_overlaps_others (struct dcss_segment *seg) -{ - struct list_head *l; - struct dcss_segment *tmp; - - BUG_ON(!mutex_is_locked(&dcss_lock)); - list_for_each(l, &dcss_list) { - tmp = list_entry(l, struct dcss_segment, list); - if ((tmp->start_addr >> 20) > (seg->end >> 20)) - continue; - if ((tmp->end >> 20) < (seg->start_addr >> 20)) - continue; - if (seg == tmp) - continue; - return 1; - } - return 0; -} - -/* - * check if segment exceeds the kernel mapping range (detected or set via mem=) - * returns 1 if this is the case, 0 if segment fits into the range - */ -static inline int -segment_exceeds_range (struct dcss_segment *seg) -{ - int seg_last_pfn = (seg->end) >> PAGE_SHIFT; - if (seg_last_pfn > max_pfn) - return 1; - return 0; -} - -/* * get info about a segment * possible return values: * -ENOSYS : we are not running on VM @@ -341,24 +283,26 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long rc = query_segment_type (seg); if (rc < 0) goto out_free; - if (segment_exceeds_range(seg)) { - PRINT_WARN ("segment_load: not loading segment %s - exceeds" - " kernel mapping range\n",name); - rc = -ERANGE; + + rc = add_shared_memory(seg->start_addr, seg->end - seg->start_addr + 1); + + switch (rc) { + case 0: + break; + case -ENOSPC: + PRINT_WARN("segment_load: not loading segment %s - overlaps " + "storage/segment\n", name); goto out_free; - } - if (segment_overlaps_storage(seg)) { - PRINT_WARN ("segment_load: not loading segment %s - overlaps" - " storage\n",name); - rc = -ENOSPC; + case -ERANGE: + PRINT_WARN("segment_load: not loading segment %s - exceeds " + "kernel mapping range\n", name); goto out_free; - } - if (segment_overlaps_others(seg)) { - PRINT_WARN ("segment_load: not loading segment %s - overlaps" - " other segments\n",name); - rc = -EBUSY; + default: + PRINT_WARN("segment_load: not loading segment %s (rc: %d)\n", + name, rc); goto out_free; } + if (do_nonshared) dcss_command = DCSS_LOADNSR; else @@ -372,7 +316,7 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long rc = dcss_diag_translate_rc (seg->end); dcss_diag(DCSS_PURGESEG, seg->dcss_name, &seg->start_addr, &seg->end); - goto out_free; + goto out_shared; } seg->do_nonshared = do_nonshared; atomic_set(&seg->ref_count, 1); @@ -391,6 +335,8 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long (void*)seg->start_addr, (void*)seg->end, segtype_string[seg->vm_segtype]); goto out; + out_shared: + remove_shared_memory(seg->start_addr, seg->end - seg->start_addr + 1); out_free: kfree(seg); out: @@ -530,12 +476,12 @@ segment_unload(char *name) "please report to linux390@de.ibm.com\n",name); goto out_unlock; } - if (atomic_dec_return(&seg->ref_count) == 0) { - list_del(&seg->list); - dcss_diag(DCSS_PURGESEG, seg->dcss_name, - &dummy, &dummy); - kfree(seg); - } + if (atomic_dec_return(&seg->ref_count) != 0) + goto out_unlock; + remove_shared_memory(seg->start_addr, seg->end - seg->start_addr + 1); + list_del(&seg->list); + dcss_diag(DCSS_PURGESEG, seg->dcss_name, &dummy, &dummy); + kfree(seg); out_unlock: mutex_unlock(&dcss_lock); } diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index e1881c31b1cb..4bb21be3b007 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -24,6 +24,7 @@ #include <linux/pagemap.h> #include <linux/bootmem.h> #include <linux/pfn.h> +#include <linux/poison.h> #include <asm/processor.h> #include <asm/system.h> @@ -69,6 +70,8 @@ void show_mem(void) printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); i = max_mapnr; while (i-- > 0) { + if (!pfn_valid(i)) + continue; page = pfn_to_page(i); total++; if (PageReserved(page)) @@ -84,150 +87,52 @@ void show_mem(void) printk("%d pages swap cached\n",cached); } -extern unsigned long __initdata zholes_size[]; -/* - * paging_init() sets up the page tables - */ - -#ifndef CONFIG_64BIT -void __init paging_init(void) +static void __init setup_ro_region(void) { - pgd_t * pg_dir; - pte_t * pg_table; - pte_t pte; - int i; - unsigned long tmp; - unsigned long pfn = 0; - unsigned long pgdir_k = (__pa(swapper_pg_dir) & PAGE_MASK) | _KERNSEG_TABLE; - static const int ssm_mask = 0x04000000L; - unsigned long ro_start_pfn, ro_end_pfn; - unsigned long zones_size[MAX_NR_ZONES]; - - ro_start_pfn = PFN_DOWN((unsigned long)&__start_rodata); - ro_end_pfn = PFN_UP((unsigned long)&__end_rodata); - - memset(zones_size, 0, sizeof(zones_size)); - zones_size[ZONE_DMA] = max_low_pfn; - free_area_init_node(0, &contig_page_data, zones_size, - __pa(PAGE_OFFSET) >> PAGE_SHIFT, - zholes_size); - - /* unmap whole virtual address space */ - - pg_dir = swapper_pg_dir; - - for (i = 0; i < PTRS_PER_PGD; i++) - pmd_clear((pmd_t *) pg_dir++); - - /* - * map whole physical memory to virtual memory (identity mapping) - */ - - pg_dir = swapper_pg_dir; - - while (pfn < max_low_pfn) { - /* - * pg_table is physical at this point - */ - pg_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); - - pmd_populate_kernel(&init_mm, (pmd_t *) pg_dir, pg_table); - pg_dir++; - - for (tmp = 0 ; tmp < PTRS_PER_PTE ; tmp++,pg_table++) { - if (pfn >= ro_start_pfn && pfn < ro_end_pfn) - pte = pfn_pte(pfn, __pgprot(_PAGE_RO)); - else - pte = pfn_pte(pfn, PAGE_KERNEL); - if (pfn >= max_low_pfn) - pte_val(pte) = _PAGE_TYPE_EMPTY; - set_pte(pg_table, pte); - pfn++; - } - } - - S390_lowcore.kernel_asce = pgdir_k; - - /* enable virtual mapping in kernel mode */ - __ctl_load(pgdir_k, 1, 1); - __ctl_load(pgdir_k, 7, 7); - __ctl_load(pgdir_k, 13, 13); - __raw_local_irq_ssm(ssm_mask); - - local_flush_tlb(); + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + pte_t new_pte; + unsigned long address, end; + + address = ((unsigned long)&__start_rodata) & PAGE_MASK; + end = PFN_ALIGN((unsigned long)&__end_rodata); + + for (; address < end; address += PAGE_SIZE) { + pgd = pgd_offset_k(address); + pmd = pmd_offset(pgd, address); + pte = pte_offset_kernel(pmd, address); + new_pte = mk_pte_phys(address, __pgprot(_PAGE_RO)); + set_pte(pte, new_pte); + } } -#else /* CONFIG_64BIT */ +extern void vmem_map_init(void); +/* + * paging_init() sets up the page tables + */ void __init paging_init(void) { - pgd_t * pg_dir; - pmd_t * pm_dir; - pte_t * pt_dir; - pte_t pte; - int i,j,k; - unsigned long pfn = 0; - unsigned long pgdir_k = (__pa(swapper_pg_dir) & PAGE_MASK) | - _KERN_REGION_TABLE; + pgd_t *pg_dir; + int i; + unsigned long pgdir_k; static const int ssm_mask = 0x04000000L; - unsigned long zones_size[MAX_NR_ZONES]; - unsigned long dma_pfn, high_pfn; - unsigned long ro_start_pfn, ro_end_pfn; - - memset(zones_size, 0, sizeof(zones_size)); - dma_pfn = MAX_DMA_ADDRESS >> PAGE_SHIFT; - high_pfn = max_low_pfn; - ro_start_pfn = PFN_DOWN((unsigned long)&__start_rodata); - ro_end_pfn = PFN_UP((unsigned long)&__end_rodata); - - if (dma_pfn > high_pfn) - zones_size[ZONE_DMA] = high_pfn; - else { - zones_size[ZONE_DMA] = dma_pfn; - zones_size[ZONE_NORMAL] = high_pfn - dma_pfn; - } - - /* Initialize mem_map[]. */ - free_area_init_node(0, &contig_page_data, zones_size, - __pa(PAGE_OFFSET) >> PAGE_SHIFT, zholes_size); + unsigned long max_zone_pfns[MAX_NR_ZONES]; - /* - * map whole physical memory to virtual memory (identity mapping) - */ - - pg_dir = swapper_pg_dir; - - for (i = 0 ; i < PTRS_PER_PGD ; i++,pg_dir++) { + pg_dir = swapper_pg_dir; - if (pfn >= max_low_pfn) { - pgd_clear(pg_dir); - continue; - } - - pm_dir = (pmd_t *) alloc_bootmem_pages(PAGE_SIZE * 4); - pgd_populate(&init_mm, pg_dir, pm_dir); - - for (j = 0 ; j < PTRS_PER_PMD ; j++,pm_dir++) { - if (pfn >= max_low_pfn) { - pmd_clear(pm_dir); - continue; - } - - pt_dir = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); - pmd_populate_kernel(&init_mm, pm_dir, pt_dir); - - for (k = 0 ; k < PTRS_PER_PTE ; k++,pt_dir++) { - if (pfn >= ro_start_pfn && pfn < ro_end_pfn) - pte = pfn_pte(pfn, __pgprot(_PAGE_RO)); - else - pte = pfn_pte(pfn, PAGE_KERNEL); - if (pfn >= max_low_pfn) - pte_val(pte) = _PAGE_TYPE_EMPTY; - set_pte(pt_dir, pte); - pfn++; - } - } - } +#ifdef CONFIG_64BIT + pgdir_k = (__pa(swapper_pg_dir) & PAGE_MASK) | _KERN_REGION_TABLE; + for (i = 0; i < PTRS_PER_PGD; i++) + pgd_clear(pg_dir + i); +#else + pgdir_k = (__pa(swapper_pg_dir) & PAGE_MASK) | _KERNSEG_TABLE; + for (i = 0; i < PTRS_PER_PGD; i++) + pmd_clear((pmd_t *)(pg_dir + i)); +#endif + vmem_map_init(); + setup_ro_region(); S390_lowcore.kernel_asce = pgdir_k; @@ -237,9 +142,11 @@ void __init paging_init(void) __ctl_load(pgdir_k, 13, 13); __raw_local_irq_ssm(ssm_mask); - local_flush_tlb(); + memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); + max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS); + max_zone_pfns[ZONE_NORMAL] = max_low_pfn; + free_area_init_nodes(max_zone_pfns); } -#endif /* CONFIG_64BIT */ void __init mem_init(void) { @@ -269,6 +176,8 @@ void __init mem_init(void) printk("Write protected kernel read-only data: %#lx - %#lx\n", (unsigned long)&__start_rodata, PFN_ALIGN((unsigned long)&__end_rodata) - 1); + printk("Virtual memmap size: %ldk\n", + (max_pfn * sizeof(struct page)) >> 10); } void free_initmem(void) @@ -279,6 +188,7 @@ void free_initmem(void) for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { ClearPageReserved(virt_to_page(addr)); init_page_count(virt_to_page(addr)); + memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); free_page(addr); totalram_pages++; } diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c new file mode 100644 index 000000000000..7f2944d3ec2a --- /dev/null +++ b/arch/s390/mm/vmem.c @@ -0,0 +1,381 @@ +/* + * arch/s390/mm/vmem.c + * + * Copyright IBM Corp. 2006 + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> + */ + +#include <linux/bootmem.h> +#include <linux/pfn.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/list.h> +#include <asm/pgalloc.h> +#include <asm/pgtable.h> +#include <asm/setup.h> +#include <asm/tlbflush.h> + +unsigned long vmalloc_end; +EXPORT_SYMBOL(vmalloc_end); + +static struct page *vmem_map; +static DEFINE_MUTEX(vmem_mutex); + +struct memory_segment { + struct list_head list; + unsigned long start; + unsigned long size; +}; + +static LIST_HEAD(mem_segs); + +void memmap_init(unsigned long size, int nid, unsigned long zone, + unsigned long start_pfn) +{ + struct page *start, *end; + struct page *map_start, *map_end; + int i; + + start = pfn_to_page(start_pfn); + end = start + size; + + for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { + unsigned long cstart, cend; + + cstart = PFN_DOWN(memory_chunk[i].addr); + cend = cstart + PFN_DOWN(memory_chunk[i].size); + + map_start = mem_map + cstart; + map_end = mem_map + cend; + + if (map_start < start) + map_start = start; + if (map_end > end) + map_end = end; + + map_start -= ((unsigned long) map_start & (PAGE_SIZE - 1)) + / sizeof(struct page); + map_end += ((PFN_ALIGN((unsigned long) map_end) + - (unsigned long) map_end) + / sizeof(struct page)); + + if (map_start < map_end) + memmap_init_zone((unsigned long)(map_end - map_start), + nid, zone, page_to_pfn(map_start)); + } +} + +static inline void *vmem_alloc_pages(unsigned int order) +{ + if (slab_is_available()) + return (void *)__get_free_pages(GFP_KERNEL, order); + return alloc_bootmem_pages((1 << order) * PAGE_SIZE); +} + +static inline pmd_t *vmem_pmd_alloc(void) +{ + pmd_t *pmd; + int i; + + pmd = vmem_alloc_pages(PMD_ALLOC_ORDER); + if (!pmd) + return NULL; + for (i = 0; i < PTRS_PER_PMD; i++) + pmd_clear(pmd + i); + return pmd; +} + +static inline pte_t *vmem_pte_alloc(void) +{ + pte_t *pte; + pte_t empty_pte; + int i; + + pte = vmem_alloc_pages(PTE_ALLOC_ORDER); + if (!pte) + return NULL; + pte_val(empty_pte) = _PAGE_TYPE_EMPTY; + for (i = 0; i < PTRS_PER_PTE; i++) + set_pte(pte + i, empty_pte); + return pte; +} + +/* + * Add a physical memory range to the 1:1 mapping. + */ +static int vmem_add_range(unsigned long start, unsigned long size) +{ + unsigned long address; + pgd_t *pg_dir; + pmd_t *pm_dir; + pte_t *pt_dir; + pte_t pte; + int ret = -ENOMEM; + + for (address = start; address < start + size; address += PAGE_SIZE) { + pg_dir = pgd_offset_k(address); + if (pgd_none(*pg_dir)) { + pm_dir = vmem_pmd_alloc(); + if (!pm_dir) + goto out; + pgd_populate(&init_mm, pg_dir, pm_dir); + } + + pm_dir = pmd_offset(pg_dir, address); + if (pmd_none(*pm_dir)) { + pt_dir = vmem_pte_alloc(); + if (!pt_dir) + goto out; + pmd_populate_kernel(&init_mm, pm_dir, pt_dir); + } + + pt_dir = pte_offset_kernel(pm_dir, address); + pte = pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL); + set_pte(pt_dir, pte); + } + ret = 0; +out: + flush_tlb_kernel_range(start, start + size); + return ret; +} + +/* + * Remove a physical memory range from the 1:1 mapping. + * Currently only invalidates page table entries. + */ +static void vmem_remove_range(unsigned long start, unsigned long size) +{ + unsigned long address; + pgd_t *pg_dir; + pmd_t *pm_dir; + pte_t *pt_dir; + pte_t pte; + + pte_val(pte) = _PAGE_TYPE_EMPTY; + for (address = start; address < start + size; address += PAGE_SIZE) { + pg_dir = pgd_offset_k(address); + if (pgd_none(*pg_dir)) + continue; + pm_dir = pmd_offset(pg_dir, address); + if (pmd_none(*pm_dir)) + continue; + pt_dir = pte_offset_kernel(pm_dir, address); + set_pte(pt_dir, pte); + } + flush_tlb_kernel_range(start, start + size); +} + +/* + * Add a backed mem_map array to the virtual mem_map array. + */ +static int vmem_add_mem_map(unsigned long start, unsigned long size) +{ + unsigned long address, start_addr, end_addr; + struct page *map_start, *map_end; + pgd_t *pg_dir; + pmd_t *pm_dir; + pte_t *pt_dir; + pte_t pte; + int ret = -ENOMEM; + + map_start = vmem_map + PFN_DOWN(start); + map_end = vmem_map + PFN_DOWN(start + size); + + start_addr = (unsigned long) map_start & PAGE_MASK; + end_addr = PFN_ALIGN((unsigned long) map_end); + + for (address = start_addr; address < end_addr; address += PAGE_SIZE) { + pg_dir = pgd_offset_k(address); + if (pgd_none(*pg_dir)) { + pm_dir = vmem_pmd_alloc(); + if (!pm_dir) + goto out; + pgd_populate(&init_mm, pg_dir, pm_dir); + } + + pm_dir = pmd_offset(pg_dir, address); + if (pmd_none(*pm_dir)) { + pt_dir = vmem_pte_alloc(); + if (!pt_dir) + goto out; + pmd_populate_kernel(&init_mm, pm_dir, pt_dir); + } + + pt_dir = pte_offset_kernel(pm_dir, address); + if (pte_none(*pt_dir)) { + unsigned long new_page; + + new_page =__pa(vmem_alloc_pages(0)); + if (!new_page) + goto out; + pte = pfn_pte(new_page >> PAGE_SHIFT, PAGE_KERNEL); + set_pte(pt_dir, pte); + } + } + ret = 0; +out: + flush_tlb_kernel_range(start_addr, end_addr); + return ret; +} + +static int vmem_add_mem(unsigned long start, unsigned long size) +{ + int ret; + + ret = vmem_add_range(start, size); + if (ret) + return ret; + return vmem_add_mem_map(start, size); +} + +/* + * Add memory segment to the segment list if it doesn't overlap with + * an already present segment. + */ +static int insert_memory_segment(struct memory_segment *seg) +{ + struct memory_segment *tmp; + + if (PFN_DOWN(seg->start + seg->size) > max_pfn || + seg->start + seg->size < seg->start) + return -ERANGE; + + list_for_each_entry(tmp, &mem_segs, list) { + if (seg->start >= tmp->start + tmp->size) + continue; + if (seg->start + seg->size <= tmp->start) + continue; + return -ENOSPC; + } + list_add(&seg->list, &mem_segs); + return 0; +} + +/* + * Remove memory segment from the segment list. + */ +static void remove_memory_segment(struct memory_segment *seg) +{ + list_del(&seg->list); +} + +static void __remove_shared_memory(struct memory_segment *seg) +{ + remove_memory_segment(seg); + vmem_remove_range(seg->start, seg->size); +} + +int remove_shared_memory(unsigned long start, unsigned long size) +{ + struct memory_segment *seg; + int ret; + + mutex_lock(&vmem_mutex); + + ret = -ENOENT; + list_for_each_entry(seg, &mem_segs, list) { + if (seg->start == start && seg->size == size) + break; + } + + if (seg->start != start || seg->size != size) + goto out; + + ret = 0; + __remove_shared_memory(seg); + kfree(seg); +out: + mutex_unlock(&vmem_mutex); + return ret; +} + +int add_shared_memory(unsigned long start, unsigned long size) +{ + struct memory_segment *seg; + struct page *page; + unsigned long pfn, num_pfn, end_pfn; + int ret; + + mutex_lock(&vmem_mutex); + ret = -ENOMEM; + seg = kzalloc(sizeof(*seg), GFP_KERNEL); + if (!seg) + goto out; + seg->start = start; + seg->size = size; + + ret = insert_memory_segment(seg); + if (ret) + goto out_free; + + ret = vmem_add_mem(start, size); + if (ret) + goto out_remove; + + pfn = PFN_DOWN(start); + num_pfn = PFN_DOWN(size); + end_pfn = pfn + num_pfn; + + page = pfn_to_page(pfn); + memset(page, 0, num_pfn * sizeof(struct page)); + + for (; pfn < end_pfn; pfn++) { + page = pfn_to_page(pfn); + init_page_count(page); + reset_page_mapcount(page); + SetPageReserved(page); + INIT_LIST_HEAD(&page->lru); + } + goto out; + +out_remove: + __remove_shared_memory(seg); +out_free: + kfree(seg); +out: + mutex_unlock(&vmem_mutex); + return ret; +} + +/* + * map whole physical memory to virtual memory (identity mapping) + */ +void __init vmem_map_init(void) +{ + unsigned long map_size; + int i; + + map_size = ALIGN(max_low_pfn, MAX_ORDER_NR_PAGES) * sizeof(struct page); + vmalloc_end = PFN_ALIGN(VMALLOC_END_INIT) - PFN_ALIGN(map_size); + vmem_map = (struct page *) vmalloc_end; + NODE_DATA(0)->node_mem_map = vmem_map; + + for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) + vmem_add_mem(memory_chunk[i].addr, memory_chunk[i].size); +} + +/* + * Convert memory chunk array to a memory segment list so there is a single + * list that contains both r/w memory and shared memory segments. + */ +static int __init vmem_convert_memory_chunk(void) +{ + struct memory_segment *seg; + int i; + + mutex_lock(&vmem_mutex); + for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { + if (!memory_chunk[i].size) + continue; + seg = kzalloc(sizeof(*seg), GFP_KERNEL); + if (!seg) + panic("Out of memory...\n"); + seg->start = memory_chunk[i].addr; + seg->size = memory_chunk[i].size; + insert_memory_segment(seg); + } + mutex_unlock(&vmem_mutex); + return 0; +} + +core_initcall(vmem_convert_memory_chunk); |