patch-1.3.57 linux/mm/swap.c

Next file: linux/mm/swap_state.c
Previous file: linux/mm/page_io.c
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v1.3.56/linux/mm/swap.c linux/mm/swap.c
@@ -30,12 +30,17 @@
 #include <asm/bitops.h>
 #include <asm/pgtable.h>
 
-#define MAX_SWAPFILES 8
-
-#define SWP_USED	1
-#define SWP_WRITEOK	3
-
+/*
+ * We identify three levels of free memory.  We never let free mem
+ * fall below the min_free_pages except for atomic allocations.  We
+ * start background swapping if we fall below free_pages_high free
+ * pages, and we begin intensive swapping below free_pages_low.
+ *
+ * Keep these three variables contiguous for sysctl(2).  
+ */
 int min_free_pages = 20;
+int free_pages_low = 30;
+int free_pages_high = 40;
 
 /*
  * Constants for the page aging mechanism: the maximum age (actually,
@@ -53,87 +58,6 @@
 	RCL_ROUND_ROBIN		/* Balancing policy */
 };
 
-static int nr_swapfiles = 0;
-static struct wait_queue * lock_queue = NULL;
-static struct {
-	int head;	/* head of priority-ordered swapfile list */
-	int next;	/* swapfile to be used next */
-} swap_list = {-1, -1};
-
-static struct swap_info_struct {
-	unsigned int flags;
-	kdev_t swap_device;
-	struct inode * swap_file;
-	unsigned char * swap_map;
-	unsigned char * swap_lockmap;
-	int lowest_bit;
-	int highest_bit;
-	int prio;			/* swap priority */
-	int pages;
-	unsigned long max;
-	int next;			/* next entry on swap list */
-} swap_info[MAX_SWAPFILES];
-
-extern int shm_swap (int, unsigned long);
-
-/*
- * To save us from swapping out pages which have just been swapped in and
- * have not been modified since then, we keep in swap_cache[page>>PAGE_SHIFT]
- * the swap entry which was last used to fill the page, or zero if the
- * page does not currently correspond to a page in swap. PAGE_DIRTY makes
- * this info useless.
- */
-unsigned long *swap_cache;
-
-#ifdef SWAP_CACHE_INFO
-unsigned long swap_cache_add_total = 0;
-unsigned long swap_cache_add_success = 0;
-unsigned long swap_cache_del_total = 0;
-unsigned long swap_cache_del_success = 0;
-unsigned long swap_cache_find_total = 0;
-unsigned long swap_cache_find_success = 0;
-
-extern inline void show_swap_cache_info(void)
-{
-	printk("Swap cache: add %ld/%ld, delete %ld/%ld, find %ld/%ld\n",
-		swap_cache_add_total, swap_cache_add_success, 
-		swap_cache_del_total, swap_cache_del_success,
-		swap_cache_find_total, swap_cache_find_success);
-}
-#endif
-
-static int add_to_swap_cache(unsigned long addr, unsigned long entry)
-{
-	struct swap_info_struct * p = &swap_info[SWP_TYPE(entry)];
-
-#ifdef SWAP_CACHE_INFO
-	swap_cache_add_total++;
-#endif
-	if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
-		entry = xchg(swap_cache + MAP_NR(addr), entry);
-		if (entry)  {
-			printk("swap_cache: replacing non-NULL entry\n");
-		}
-#ifdef SWAP_CACHE_INFO
-		swap_cache_add_success++;
-#endif
-		return 1;
-	}
-	return 0;
-}
-
-static unsigned long init_swap_cache(unsigned long mem_start,
-	unsigned long mem_end)
-{
-	unsigned long swap_cache_size;
-
-	mem_start = (mem_start + 15) & ~15;
-	swap_cache = (unsigned long *) mem_start;
-	swap_cache_size = MAP_NR(mem_end);
-	memset(swap_cache, 0, swap_cache_size * sizeof (unsigned long));
-	return (unsigned long) (swap_cache + swap_cache_size);
-}
-
 /* General swap control */
 
 /* Parse the kernel command line "swap=" option at load time: */
@@ -174,1174 +98,3 @@
 	}
 }
 
-/* Page aging */
-
-void rw_swap_page(int rw, unsigned long entry, char * buf)
-{
-	unsigned long type, offset;
-	struct swap_info_struct * p;
-
-	type = SWP_TYPE(entry);
-	if (type >= nr_swapfiles) {
-		printk("Internal error: bad swap-device\n");
-		return;
-	}
-	p = &swap_info[type];
-	offset = SWP_OFFSET(entry);
-	if (offset >= p->max) {
-		printk("rw_swap_page: weirdness\n");
-		return;
-	}
-	if (p->swap_map && !p->swap_map[offset]) {
-		printk("Hmm.. Trying to use unallocated swap (%08lx)\n", entry);
-		return;
-	}
-	if (!(p->flags & SWP_USED)) {
-		printk("Trying to swap to unused swap-device\n");
-		return;
-	}
-	while (set_bit(offset,p->swap_lockmap))
-		sleep_on(&lock_queue);
-	if (rw == READ)
-		kstat.pswpin++;
-	else
-		kstat.pswpout++;
-	if (p->swap_device) {
-		ll_rw_page(rw,p->swap_device,offset,buf);
-	} else if (p->swap_file) {
-		struct inode *swapf = p->swap_file;
-		unsigned int zones[PAGE_SIZE/512];
-		int i;
-		if (swapf->i_op->bmap == NULL
-			&& swapf->i_op->smap != NULL){
-			/*
-				With MsDOS, we use msdos_smap which return
-				a sector number (not a cluster or block number).
-				It is a patch to enable the UMSDOS project.
-				Other people are working on better solution.
-
-				It sounds like ll_rw_swap_file defined
-				it operation size (sector size) based on
-				PAGE_SIZE and the number of block to read.
-				So using bmap or smap should work even if
-				smap will require more blocks.
-			*/
-			int j;
-			unsigned int block = offset << 3;
-
-			for (i=0, j=0; j< PAGE_SIZE ; i++, j += 512){
-				if (!(zones[i] = swapf->i_op->smap(swapf,block++))) {
-					printk("rw_swap_page: bad swap file\n");
-					return;
-				}
-			}
-		}else{
-			int j;
-			unsigned int block = offset
-				<< (PAGE_SHIFT - swapf->i_sb->s_blocksize_bits);
-
-			for (i=0, j=0; j< PAGE_SIZE ; i++, j +=swapf->i_sb->s_blocksize)
-				if (!(zones[i] = bmap(swapf,block++))) {
-					printk("rw_swap_page: bad swap file\n");
-					return;
-				}
-		}
-		ll_rw_swap_file(rw,swapf->i_dev, zones, i,buf);
-	} else
-		printk("re_swap_page: no swap file or device\n");
-	if (offset && !clear_bit(offset,p->swap_lockmap))
-		printk("rw_swap_page: lock already cleared\n");
-	wake_up(&lock_queue);
-}
-
-unsigned long get_swap_page(void)
-{
-	struct swap_info_struct * p;
-	unsigned long offset, entry;
-	int type, wrapped = 0;
-
-	type = swap_list.next;
-	if (type < 0)
-	  return 0;
-
-	while (1) {
-		p = &swap_info[type];
-		if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
-			for (offset = p->lowest_bit; offset <= p->highest_bit ; offset++) {
-				if (p->swap_map[offset])
-				  continue;
-				if (test_bit(offset, p->swap_lockmap))
-				  continue;
-				p->swap_map[offset] = 1;
-				nr_swap_pages--;
-				if (offset == p->highest_bit)
-				  p->highest_bit--;
-				p->lowest_bit = offset;
-				entry = SWP_ENTRY(type,offset);
-
-				type = swap_info[type].next;
-				if (type < 0 || p->prio != swap_info[type].prio) {
-				    swap_list.next = swap_list.head;
-				} else {
-				    swap_list.next = type;
-				}
-				return entry;
-			}
-		}
-		type = p->next;
-		if (!wrapped) {
-			if (type < 0 || p->prio != swap_info[type].prio) {
-				type = swap_list.head;
-				wrapped = 1;
-			}
-		} else if (type < 0) {
-			return 0;	/* out of swap space */
-		}
-	}
-}
-
-void swap_duplicate(unsigned long entry)
-{
-	struct swap_info_struct * p;
-	unsigned long offset, type;
-
-	if (!entry)
-		return;
-	offset = SWP_OFFSET(entry);
-	type = SWP_TYPE(entry);
-	if (type & SHM_SWP_TYPE)
-		return;
-	if (type >= nr_swapfiles) {
-		printk("Trying to duplicate nonexistent swap-page\n");
-		return;
-	}
-	p = type + swap_info;
-	if (offset >= p->max) {
-		printk("swap_duplicate: weirdness\n");
-		return;
-	}
-	if (!p->swap_map[offset]) {
-		printk("swap_duplicate: trying to duplicate unused page\n");
-		return;
-	}
-	p->swap_map[offset]++;
-	return;
-}
-
-void swap_free(unsigned long entry)
-{
-	struct swap_info_struct * p;
-	unsigned long offset, type;
-
-	if (!entry)
-		return;
-	type = SWP_TYPE(entry);
-	if (type & SHM_SWP_TYPE)
-		return;
-	if (type >= nr_swapfiles) {
-		printk("Trying to free nonexistent swap-page\n");
-		return;
-	}
-	p = & swap_info[type];
-	offset = SWP_OFFSET(entry);
-	if (offset >= p->max) {
-		printk("swap_free: weirdness\n");
-		return;
-	}
-	if (!(p->flags & SWP_USED)) {
-		printk("Trying to free swap from unused swap-device\n");
-		return;
-	}
-	if (offset < p->lowest_bit)
-		p->lowest_bit = offset;
-	if (offset > p->highest_bit)
-		p->highest_bit = offset;
-	if (!p->swap_map[offset])
-		printk("swap_free: swap-space map bad (entry %08lx)\n",entry);
-	else
-		if (!--p->swap_map[offset])
-			nr_swap_pages++;
-	if (p->prio > swap_info[swap_list.next].prio) {
-	    swap_list.next = swap_list.head;
-	}
-}
-
-/*
- * The tests may look silly, but it essentially makes sure that
- * no other process did a swap-in on us just as we were waiting.
- *
- * Also, don't bother to add to the swap cache if this page-in
- * was due to a write access.
- */
-void swap_in(struct task_struct * tsk, struct vm_area_struct * vma,
-	pte_t * page_table, unsigned long entry, int write_access)
-{
-	unsigned long page = __get_free_page(GFP_KERNEL);
-
-	if (pte_val(*page_table) != entry) {
-		free_page(page);
-		return;
-	}
-	if (!page) {
-		set_pte(page_table, BAD_PAGE);
-		swap_free(entry);
-		oom(tsk);
-		return;
-	}
-	read_swap_page(entry, (char *) page);
-	if (pte_val(*page_table) != entry) {
-		free_page(page);
-		return;
-	}
-	vma->vm_mm->rss++;
-	tsk->maj_flt++;
-	if (!write_access && add_to_swap_cache(page, entry)) {
-		set_pte(page_table, mk_pte(page, vma->vm_page_prot));
-		return;
-	}
-	set_pte(page_table, pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))));
-  	swap_free(entry);
-  	return;
-}
-
-/*
- * The swap-out functions return 1 if they successfully
- * threw something out, and we got a free page. It returns
- * zero if it couldn't do anything, and any other value
- * indicates it decreased rss, but the page was shared.
- *
- * NOTE! If it sleeps, it *must* return 1 to make sure we
- * don't continue with the swap-out. Otherwise we may be
- * using a process that no longer actually exists (it might
- * have died while we slept).
- */
-static inline int try_to_swap_out(struct task_struct * tsk, struct vm_area_struct* vma,
-	unsigned long address, pte_t * page_table, unsigned long limit)
-{
-	pte_t pte;
-	unsigned long entry;
-	unsigned long page;
-	struct page * page_map;
-
-	pte = *page_table;
-	if (!pte_present(pte))
-		return 0;
-	page = pte_page(pte);
-	if (MAP_NR(page) >= MAP_NR(high_memory))
-		return 0;
-	if (page >= limit)
-		return 0;
-
-	page_map = mem_map + MAP_NR(page);
-	if (page_map->reserved)
-		return 0;
-	/* Deal with page aging.  Pages age from being unused; they
-	 * rejuvinate on being accessed.  Only swap old pages (age==0
-	 * is oldest). */
-	if ((pte_dirty(pte) && delete_from_swap_cache(page)) 
-	    || pte_young(pte))  {
-		set_pte(page_table, pte_mkold(pte));
-		page_age_update(page_map, 1);
-		return 0;
-	}	
-	if (page_age_update(page_map, pte_young(pte)))
-		return 0;
-	if (pte_dirty(pte)) {
-		if (vma->vm_ops && vma->vm_ops->swapout) {
-			pid_t pid = tsk->pid;
-			vma->vm_mm->rss--;
-			if (vma->vm_ops->swapout(vma, address - vma->vm_start + vma->vm_offset, page_table))
-				kill_proc(pid, SIGBUS, 1);
-		} else {
-			if (page_map->count != 1)
-				return 0;
-			if (!(entry = get_swap_page()))
-				return 0;
-			vma->vm_mm->rss--;
-			set_pte(page_table, __pte(entry));
-			invalidate_page(vma, address);
-			tsk->nswap++;
-			write_swap_page(entry, (char *) page);
-		}
-		free_page(page);
-		return 1;	/* we slept: the process may not exist any more */
-	}
-        if ((entry = find_in_swap_cache(page)))  {
-		if (page_map->count != 1) {
-			set_pte(page_table, pte_mkdirty(pte));
-			printk("Aiee.. duplicated cached swap-cache entry\n");
-			return 0;
-		}
-		vma->vm_mm->rss--;
-		set_pte(page_table, __pte(entry));
-		invalidate_page(vma, address);
-		free_page(page);
-		return 1;
-	} 
-	vma->vm_mm->rss--;
-	pte_clear(page_table);
-	invalidate_page(vma, address);
-	entry = page_unuse(page);
-	free_page(page);
-	return entry;
-}
-
-/*
- * A new implementation of swap_out().  We do not swap complete processes,
- * but only a small number of blocks, before we continue with the next
- * process.  The number of blocks actually swapped is determined on the
- * number of page faults, that this process actually had in the last time,
- * so we won't swap heavily used processes all the time ...
- *
- * Note: the priority argument is a hint on much CPU to waste with the
- *       swap block search, not a hint, of how much blocks to swap with
- *       each process.
- *
- * (C) 1993 Kai Petzke, wpp@marie.physik.tu-berlin.de
- */
-
-static inline int swap_out_pmd(struct task_struct * tsk, struct vm_area_struct * vma,
-	pmd_t *dir, unsigned long address, unsigned long end, unsigned long limit)
-{
-	pte_t * pte;
-	unsigned long pmd_end;
-
-	if (pmd_none(*dir))
-		return 0;
-	if (pmd_bad(*dir)) {
-		printk("swap_out_pmd: bad pmd (%08lx)\n", pmd_val(*dir));
-		pmd_clear(dir);
-		return 0;
-	}
-	
-	pte = pte_offset(dir, address);
-	
-	pmd_end = (address + PMD_SIZE) & PMD_MASK;
-	if (end > pmd_end)
-		end = pmd_end;
-
-	do {
-		int result;
-		tsk->swap_address = address + PAGE_SIZE;
-		result = try_to_swap_out(tsk, vma, address, pte, limit);
-		if (result)
-			return result;
-		address += PAGE_SIZE;
-		pte++;
-	} while (address < end);
-	return 0;
-}
-
-static inline int swap_out_pgd(struct task_struct * tsk, struct vm_area_struct * vma,
-	pgd_t *dir, unsigned long address, unsigned long end, unsigned long limit)
-{
-	pmd_t * pmd;
-	unsigned long pgd_end;
-
-	if (pgd_none(*dir))
-		return 0;
-	if (pgd_bad(*dir)) {
-		printk("swap_out_pgd: bad pgd (%08lx)\n", pgd_val(*dir));
-		pgd_clear(dir);
-		return 0;
-	}
-
-	pmd = pmd_offset(dir, address);
-
-	pgd_end = (address + PGDIR_SIZE) & PGDIR_MASK;	
-	if (end > pgd_end)
-		end = pgd_end;
-	
-	do {
-		int result = swap_out_pmd(tsk, vma, pmd, address, end, limit);
-		if (result)
-			return result;
-		address = (address + PMD_SIZE) & PMD_MASK;
-		pmd++;
-	} while (address < end);
-	return 0;
-}
-
-static int swap_out_vma(struct task_struct * tsk, struct vm_area_struct * vma,
-	pgd_t *pgdir, unsigned long start, unsigned long limit)
-{
-	unsigned long end;
-
-	/* Don't swap out areas like shared memory which have their
-	    own separate swapping mechanism or areas which are locked down */
-	if (vma->vm_flags & (VM_SHM | VM_LOCKED))
-		return 0;
-
-	end = vma->vm_end;
-	while (start < end) {
-		int result = swap_out_pgd(tsk, vma, pgdir, start, end, limit);
-		if (result)
-			return result;
-		start = (start + PGDIR_SIZE) & PGDIR_MASK;
-		pgdir++;
-	}
-	return 0;
-}
-
-static int swap_out_process(struct task_struct * p, unsigned long limit)
-{
-	unsigned long address;
-	struct vm_area_struct* vma;
-
-	/*
-	 * Go through process' page directory.
-	 */
-	address = p->swap_address;
-	p->swap_address = 0;
-
-	/*
-	 * Find the proper vm-area
-	 */
-	vma = find_vma(p, address);
-	if (!vma)
-		return 0;
-	if (address < vma->vm_start)
-		address = vma->vm_start;
-
-	for (;;) {
-		int result = swap_out_vma(p, vma, pgd_offset(p->mm, address), address, limit);
-		if (result)
-			return result;
-		vma = vma->vm_next;
-		if (!vma)
-			break;
-		address = vma->vm_start;
-	}
-	p->swap_address = 0;
-	return 0;
-}
-
-static int swap_out(unsigned int priority, unsigned long limit)
-{
-	static int swap_task;
-	int loop, counter;
-	struct task_struct *p;
-
-	counter = ((PAGEOUT_WEIGHT * nr_tasks) >> 10) >> priority;
-	for(; counter >= 0; counter--) {
-		/*
-		 * Check that swap_task is suitable for swapping.  If not, look for
-		 * the next suitable process.
-		 */
-		loop = 0;
-		while(1) {
-			if (swap_task >= NR_TASKS) {
-				swap_task = 1;
-				if (loop)
-					/* all processes are unswappable or already swapped out */
-					return 0;
-				loop = 1;
-			}
-
-			p = task[swap_task];
-			if (p && p->swappable && p->mm->rss)
-				break;
-
-			swap_task++;
-		}
-
-		/*
-		 * Determine the number of pages to swap from this process.
-		 */
-		if (!p->swap_cnt) {
- 			/* Normalise the number of pages swapped by
-			   multiplying by (RSS / 1MB) */
-			p->swap_cnt = AGE_CLUSTER_SIZE(p->mm->rss);
-		}
-		if (!--p->swap_cnt)
-			swap_task++;
-		switch (swap_out_process(p, limit)) {
-			case 0:
-				if (p->swap_cnt)
-					swap_task++;
-				break;
-			case 1:
-				return 1;
-			default:
-				break;
-		}
-	}
-	return 0;
-}
-
-/*
- * We are much more aggressive about trying to swap out than we used
- * to be.  This works out OK, because we now do proper aging on page
- * contents. 
- */
-static int try_to_free_page(int priority, unsigned long limit)
-{
-	static int state = 0;
-	int i=6;
-
-	switch (state) {
-		do {
-		case 0:
-			if (priority != GFP_NOBUFFER && shrink_buffers(i, limit))
-				return 1;
-			state = 1;
-		case 1:
-			if (shm_swap(i, limit))
-				return 1;
-			state = 2;
-		case 2:
-			if (shrink_mmap(i, limit))
-				return 1;
-			state = 3;
-		default:
-			if (swap_out(i, limit))
-				return 1;
-			state = 0;
-		} while(i--);
-	}
-	return 0;
-}
-
-static inline void add_mem_queue(struct mem_list * head, struct mem_list * entry)
-{
-	entry->prev = head;
-	(entry->next = head->next)->prev = entry;
-	head->next = entry;
-}
-
-static inline void remove_mem_queue(struct mem_list * head, struct mem_list * entry)
-{
-	struct mem_list * next = entry->next;
-	(next->prev = entry->prev)->next = next;
-}
-
-/*
- * Free_page() adds the page to the free lists. This is optimized for
- * fast normal cases (no error jumps taken normally).
- *
- * The way to optimize jumps for gcc-2.2.2 is to:
- *  - select the "normal" case and put it inside the if () { XXX }
- *  - no else-statements if you can avoid them
- *
- * With the above two rules, you get a straight-line execution path
- * for the normal case, giving better asm-code.
- *
- * free_page() may sleep since the page being freed may be a buffer
- * page or present in the swap cache. It will not sleep, however,
- * for a freshly allocated page (get_free_page()).
- */
-
-/*
- * Buddy system. Hairy. You really aren't expected to understand this
- */
-static inline void free_pages_ok(unsigned long addr, unsigned long order)
-{
-	unsigned long index = MAP_NR(addr) >> (1 + order);
-	unsigned long mask = PAGE_MASK << order;
-
-	addr &= mask;
-	nr_free_pages += 1 << order;
-	while (order < NR_MEM_LISTS-1) {
-		if (!change_bit(index, free_area_map[order]))
-			break;
-		remove_mem_queue(free_area_list+order, (struct mem_list *) (addr ^ (1+~mask)));
-		order++;
-		index >>= 1;
-		mask <<= 1;
-		addr &= mask;
-	}
-	add_mem_queue(free_area_list+order, (struct mem_list *) addr);
-}
-
-static inline void check_free_buffers(unsigned long addr)
-{
-	struct buffer_head * bh;
-
-	bh = buffer_pages[MAP_NR(addr)];
-	if (bh) {
-		struct buffer_head *tmp = bh;
-		do {
-			if (tmp->b_list == BUF_SHARED
-			    && tmp->b_dev != B_FREE)
-				refile_buffer(tmp);
-			tmp = tmp->b_this_page;
-		} while (tmp != bh);
-	}
-}
-
-void free_pages(unsigned long addr, unsigned long order)
-{
-	if (MAP_NR(addr) < MAP_NR(high_memory)) {
-		unsigned long flag;
-		mem_map_t * map = mem_map + MAP_NR(addr);
-		if (map->reserved)
-			return;
-		if (map->count) {
-			save_flags(flag);
-			cli();
-			if (!--map->count) {
-				free_pages_ok(addr, order);
-				delete_from_swap_cache(addr);
-			}
-			restore_flags(flag);
-			if (map->count == 1)
-				check_free_buffers(addr);
-			return;
-		}
-		printk("Trying to free free memory (%08lx): memory probably corrupted\n",addr);
-		printk("PC = %p\n", __builtin_return_address(0));
-		return;
-	}
-}
-
-/*
- * Some ugly macros to speed up __get_free_pages()..
- */
-#define RMQUEUE(order, limit) \
-do { struct mem_list * queue = free_area_list+order; \
-     unsigned long new_order = order; \
-	do { struct mem_list *prev = queue, *ret; \
-		while (queue != (ret = prev->next)) { \
-			if ((unsigned long) ret < (limit)) { \
-				(prev->next = ret->next)->prev = prev; \
-				mark_used((unsigned long) ret, new_order); \
-				nr_free_pages -= 1 << order; \
-				restore_flags(flags); \
-				EXPAND(ret, order, new_order); \
-				return (unsigned long) ret; \
-			} \
-			prev = ret; \
-		} \
-		new_order++; queue++; \
-	} while (new_order < NR_MEM_LISTS); \
-} while (0)
-
-static inline int mark_used(unsigned long addr, unsigned long order)
-{
-	return change_bit(MAP_NR(addr) >> (1+order), free_area_map[order]);
-}
-
-#define EXPAND(addr,low,high) \
-do { unsigned long size = PAGE_SIZE << high; \
-	while (high > low) { \
-		high--; size >>= 1; cli(); \
-		add_mem_queue(free_area_list+high, addr); \
-		mark_used((unsigned long) addr, high); \
-		restore_flags(flags); \
-		addr = (struct mem_list *) (size + (unsigned long) addr); \
-	} mem_map[MAP_NR((unsigned long) addr)].count = 1; \
-	mem_map[MAP_NR((unsigned long) addr)].age = PAGE_INITIAL_AGE; \
-} while (0)
-
-unsigned long __get_free_pages(int priority, unsigned long order, unsigned long limit)
-{
-	unsigned long flags;
-	int reserved_pages;
-
-	if (order >= NR_MEM_LISTS)
-		return 0;
-	if (intr_count && priority != GFP_ATOMIC) {
-		static int count = 0;
-		if (++count < 5) {
-			printk("gfp called nonatomically from interrupt %p\n",
-				__builtin_return_address(0));
-			priority = GFP_ATOMIC;
-		}
-	}
-	reserved_pages = 5;
-	if (priority != GFP_NFS)
-		reserved_pages = min_free_pages;
-	save_flags(flags);
-repeat:
-	cli();
-	if ((priority==GFP_ATOMIC) || nr_free_pages > reserved_pages) {
-		RMQUEUE(order, limit);
-		restore_flags(flags);
-		return 0;
-	}
-	restore_flags(flags);
-	if (priority != GFP_BUFFER && try_to_free_page(priority, limit))
-		goto repeat;
-	return 0;
-}
-
-/*
- * Show free area list (used inside shift_scroll-lock stuff)
- * We also calculate the percentage fragmentation. We do this by counting the
- * memory on each free list with the exception of the first item on the list.
- */
-void show_free_areas(void)
-{
- 	unsigned long order, flags;
- 	unsigned long total = 0;
-
-	printk("Free pages:      %6dkB\n ( ",nr_free_pages<<(PAGE_SHIFT-10));
-	save_flags(flags);
-	cli();
- 	for (order=0 ; order < NR_MEM_LISTS; order++) {
-		struct mem_list * tmp;
-		unsigned long nr = 0;
-		for (tmp = free_area_list[order].next ; tmp != free_area_list + order ; tmp = tmp->next) {
-			nr ++;
-		}
-		total += nr * ((PAGE_SIZE>>10) << order);
-		printk("%lu*%lukB ", nr, (PAGE_SIZE>>10) << order);
-	}
-	restore_flags(flags);
-	printk("= %lukB)\n", total);
-#ifdef SWAP_CACHE_INFO
-	show_swap_cache_info();
-#endif	
-}
-
-/*
- * Trying to stop swapping from a file is fraught with races, so
- * we repeat quite a bit here when we have to pause. swapoff()
- * isn't exactly timing-critical, so who cares (but this is /really/
- * inefficient, ugh).
- *
- * We return 1 after having slept, which makes the process start over
- * from the beginning for this process..
- */
-static inline int unuse_pte(struct vm_area_struct * vma, unsigned long address,
-	pte_t *dir, unsigned int type, unsigned long page)
-{
-	pte_t pte = *dir;
-
-	if (pte_none(pte))
-		return 0;
-	if (pte_present(pte)) {
-		unsigned long page = pte_page(pte);
-		if (page >= high_memory)
-			return 0;
-		if (!in_swap_cache(page))
-			return 0;
-		if (SWP_TYPE(in_swap_cache(page)) != type)
-			return 0;
-		delete_from_swap_cache(page);
-		set_pte(dir, pte_mkdirty(pte));
-		return 0;
-	}
-	if (SWP_TYPE(pte_val(pte)) != type)
-		return 0;
-	read_swap_page(pte_val(pte), (char *) page);
-	if (pte_val(*dir) != pte_val(pte)) {
-		free_page(page);
-		return 1;
-	}
-	set_pte(dir, pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))));
-	++vma->vm_mm->rss;
-	swap_free(pte_val(pte));
-	return 1;
-}
-
-static inline int unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
-	unsigned long address, unsigned long size, unsigned long offset,
-	unsigned int type, unsigned long page)
-{
-	pte_t * pte;
-	unsigned long end;
-
-	if (pmd_none(*dir))
-		return 0;
-	if (pmd_bad(*dir)) {
-		printk("unuse_pmd: bad pmd (%08lx)\n", pmd_val(*dir));
-		pmd_clear(dir);
-		return 0;
-	}
-	pte = pte_offset(dir, address);
-	offset += address & PMD_MASK;
-	address &= ~PMD_MASK;
-	end = address + size;
-	if (end > PMD_SIZE)
-		end = PMD_SIZE;
-	do {
-		if (unuse_pte(vma, offset+address-vma->vm_start, pte, type, page))
-			return 1;
-		address += PAGE_SIZE;
-		pte++;
-	} while (address < end);
-	return 0;
-}
-
-static inline int unuse_pgd(struct vm_area_struct * vma, pgd_t *dir,
-	unsigned long address, unsigned long size,
-	unsigned int type, unsigned long page)
-{
-	pmd_t * pmd;
-	unsigned long offset, end;
-
-	if (pgd_none(*dir))
-		return 0;
-	if (pgd_bad(*dir)) {
-		printk("unuse_pgd: bad pgd (%08lx)\n", pgd_val(*dir));
-		pgd_clear(dir);
-		return 0;
-	}
-	pmd = pmd_offset(dir, address);
-	offset = address & PGDIR_MASK;
-	address &= ~PGDIR_MASK;
-	end = address + size;
-	if (end > PGDIR_SIZE)
-		end = PGDIR_SIZE;
-	do {
-		if (unuse_pmd(vma, pmd, address, end - address, offset, type, page))
-			return 1;
-		address = (address + PMD_SIZE) & PMD_MASK;
-		pmd++;
-	} while (address < end);
-	return 0;
-}
-
-static int unuse_vma(struct vm_area_struct * vma, pgd_t *pgdir,
-	unsigned long start, unsigned long end,
-	unsigned int type, unsigned long page)
-{
-	while (start < end) {
-		if (unuse_pgd(vma, pgdir, start, end - start, type, page))
-			return 1;
-		start = (start + PGDIR_SIZE) & PGDIR_MASK;
-		pgdir++;
-	}
-	return 0;
-}
-
-static int unuse_process(struct task_struct * p, unsigned int type, unsigned long page)
-{
-	struct vm_area_struct* vma;
-
-	/*
-	 * Go through process' page directory.
-	 */
-	if (!p->mm || pgd_inuse(p->mm->pgd))
-		return 0;
-	vma = p->mm->mmap;
-	while (vma) {
-		pgd_t * pgd = pgd_offset(p->mm, vma->vm_start);
-		if (unuse_vma(vma, pgd, vma->vm_start, vma->vm_end, type, page))
-			return 1;
-		vma = vma->vm_next;
-	}
-	return 0;
-}
-
-/*
- * To avoid races, we repeat for each process after having
- * swapped something in. That gets rid of a few pesky races,
- * and "swapoff" isn't exactly timing critical.
- */
-static int try_to_unuse(unsigned int type)
-{
-	int nr;
-	unsigned long page = get_free_page(GFP_KERNEL);
-
-	if (!page)
-		return -ENOMEM;
-	nr = 0;
-	while (nr < NR_TASKS) {
-		if (task[nr]) {
-			if (unuse_process(task[nr], type, page)) {
-				page = get_free_page(GFP_KERNEL);
-				if (!page)
-					return -ENOMEM;
-				continue;
-			}
-		}
-		nr++;
-	}
-	free_page(page);
-	return 0;
-}
-
-asmlinkage int sys_swapoff(const char * specialfile)
-{
-	struct swap_info_struct * p;
-	struct inode * inode;
-	struct file filp;
-	int i, type, prev;
-
-	if (!suser())
-		return -EPERM;
-	i = namei(specialfile,&inode);
-	if (i)
-		return i;
-	prev = -1;
-	for (type = swap_list.head; type >= 0; type = swap_info[type].next) {
-		p = swap_info + type;
-		if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
-			if (p->swap_file) {
-				if (p->swap_file == inode)
-				  break;
-			} else {
-				if (S_ISBLK(inode->i_mode)
-				    && (p->swap_device == inode->i_rdev))
-				  break;
-			}
-		}
-		prev = type;
-	}
-	if (type < 0){
-		iput(inode);
-		return -EINVAL;
-	}
-	if (prev < 0) {
-		swap_list.head = p->next;
-	} else {
-		swap_info[prev].next = p->next;
-	}
-	if (type == swap_list.next) {
-		/* just pick something that's safe... */
-		swap_list.next = swap_list.head;
-	}
-	p->flags = SWP_USED;
-	i = try_to_unuse(type);
-	if (i) {
-		iput(inode);
-		p->flags = SWP_WRITEOK;
-		return i;
-	}
-
-	if(p->swap_device){
-		memset(&filp, 0, sizeof(filp));		
-		filp.f_inode = inode;
-		filp.f_mode = 3; /* read write */
-		/* open it again to get fops */
-		if( !blkdev_open(inode, &filp) &&
-		   filp.f_op && filp.f_op->release){
-			filp.f_op->release(inode,&filp);
-			filp.f_op->release(inode,&filp);
-		}
-	}
-	iput(inode);
-
-	nr_swap_pages -= p->pages;
-	iput(p->swap_file);
-	p->swap_file = NULL;
-	p->swap_device = 0;
-	vfree(p->swap_map);
-	p->swap_map = NULL;
-	free_page((long) p->swap_lockmap);
-	p->swap_lockmap = NULL;
-	p->flags = 0;
-	return 0;
-}
-
-/*
- * Written 01/25/92 by Simmule Turner, heavily changed by Linus.
- *
- * The swapon system call
- */
-asmlinkage int sys_swapon(const char * specialfile, int swap_flags)
-{
-	struct swap_info_struct * p;
-	struct inode * swap_inode;
-	unsigned int type;
-	int i, j, prev;
-	int error;
-	struct file filp;
-	static int least_priority = 0;
-
-	memset(&filp, 0, sizeof(filp));
-	if (!suser())
-		return -EPERM;
-	p = swap_info;
-	for (type = 0 ; type < nr_swapfiles ; type++,p++)
-		if (!(p->flags & SWP_USED))
-			break;
-	if (type >= MAX_SWAPFILES)
-		return -EPERM;
-	if (type >= nr_swapfiles)
-		nr_swapfiles = type+1;
-	p->flags = SWP_USED;
-	p->swap_file = NULL;
-	p->swap_device = 0;
-	p->swap_map = NULL;
-	p->swap_lockmap = NULL;
-	p->lowest_bit = 0;
-	p->highest_bit = 0;
-	p->max = 1;
-	p->next = -1;
-	if (swap_flags & SWAP_FLAG_PREFER) {
-		p->prio =
-		  (swap_flags & SWAP_FLAG_PRIO_MASK)>>SWAP_FLAG_PRIO_SHIFT;
-	} else {
-		p->prio = --least_priority;
-	}
-	error = namei(specialfile,&swap_inode);
-	if (error)
-		goto bad_swap_2;
-	p->swap_file = swap_inode;
-	error = -EBUSY;
-	if (swap_inode->i_count != 1)
-		goto bad_swap_2;
-	error = -EINVAL;
-
-	if (S_ISBLK(swap_inode->i_mode)) {
-		p->swap_device = swap_inode->i_rdev;
-
-		filp.f_inode = swap_inode;
-		filp.f_mode = 3; /* read write */
-		error = blkdev_open(swap_inode, &filp);
-		p->swap_file = NULL;
-		iput(swap_inode);
-		if(error)
-			goto bad_swap_2;
-		error = -ENODEV;
-		if (!p->swap_device)
-			goto bad_swap;
-		error = -EBUSY;
-		for (i = 0 ; i < nr_swapfiles ; i++) {
-			if (i == type)
-				continue;
-			if (p->swap_device == swap_info[i].swap_device)
-				goto bad_swap;
-		}
-	} else if (!S_ISREG(swap_inode->i_mode))
-		goto bad_swap;
-	p->swap_lockmap = (unsigned char *) get_free_page(GFP_USER);
-	if (!p->swap_lockmap) {
-		printk("Unable to start swapping: out of memory :-)\n");
-		error = -ENOMEM;
-		goto bad_swap;
-	}
-	read_swap_page(SWP_ENTRY(type,0), (char *) p->swap_lockmap);
-	if (memcmp("SWAP-SPACE",p->swap_lockmap+PAGE_SIZE-10,10)) {
-		printk("Unable to find swap-space signature\n");
-		error = -EINVAL;
-		goto bad_swap;
-	}
-	memset(p->swap_lockmap+PAGE_SIZE-10,0,10);
-	j = 0;
-	p->lowest_bit = 0;
-	p->highest_bit = 0;
-	for (i = 1 ; i < 8*PAGE_SIZE ; i++) {
-		if (test_bit(i,p->swap_lockmap)) {
-			if (!p->lowest_bit)
-				p->lowest_bit = i;
-			p->highest_bit = i;
-			p->max = i+1;
-			j++;
-		}
-	}
-	if (!j) {
-		printk("Empty swap-file\n");
-		error = -EINVAL;
-		goto bad_swap;
-	}
-	p->swap_map = (unsigned char *) vmalloc(p->max);
-	if (!p->swap_map) {
-		error = -ENOMEM;
-		goto bad_swap;
-	}
-	for (i = 1 ; i < p->max ; i++) {
-		if (test_bit(i,p->swap_lockmap))
-			p->swap_map[i] = 0;
-		else
-			p->swap_map[i] = 0x80;
-	}
-	p->swap_map[0] = 0x80;
-	memset(p->swap_lockmap,0,PAGE_SIZE);
-	p->flags = SWP_WRITEOK;
-	p->pages = j;
-	nr_swap_pages += j;
-	printk("Adding Swap: %dk swap-space\n",j<<(PAGE_SHIFT-10));
-
-	/* insert swap space into swap_list: */
-	prev = -1;
-	for (i = swap_list.head; i >= 0; i = swap_info[i].next) {
-		if (p->prio >= swap_info[i].prio) {
-			break;
-		}
-		prev = i;
-	}
-	p->next = i;
-	if (prev < 0) {
-		swap_list.head = swap_list.next = p - swap_info;
-	} else {
-		swap_info[prev].next = p - swap_info;
-	}
-	return 0;
-bad_swap:
-	if(filp.f_op && filp.f_op->release)
-		filp.f_op->release(filp.f_inode,&filp);
-bad_swap_2:
-	free_page((long) p->swap_lockmap);
-	vfree(p->swap_map);
-	iput(p->swap_file);
-	p->swap_device = 0;
-	p->swap_file = NULL;
-	p->swap_map = NULL;
-	p->swap_lockmap = NULL;
-	p->flags = 0;
-	return error;
-}
-
-void si_swapinfo(struct sysinfo *val)
-{
-	unsigned int i, j;
-
-	val->freeswap = val->totalswap = 0;
-	for (i = 0; i < nr_swapfiles; i++) {
-		if ((swap_info[i].flags & SWP_WRITEOK) != SWP_WRITEOK)
-			continue;
-		for (j = 0; j < swap_info[i].max; ++j)
-			switch (swap_info[i].swap_map[j]) {
-				case 128:
-					continue;
-				case 0:
-					++val->freeswap;
-				default:
-					++val->totalswap;
-			}
-	}
-	val->freeswap <<= PAGE_SHIFT;
-	val->totalswap <<= PAGE_SHIFT;
-	return;
-}
-
-#define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1))
-
-/*
- * set up the free-area data structures:
- *   - mark all pages reserved
- *   - mark all memory queues empty
- *   - clear the memory bitmaps
- */
-unsigned long free_area_init(unsigned long start_mem, unsigned long end_mem)
-{
-	mem_map_t * p;
-	unsigned long mask = PAGE_MASK;
-	int i;
-
-	/*
-	 * select nr of pages we try to keep free for important stuff
-	 * with a minimum of 16 pages. This is totally arbitrary
-	 */
-	i = (end_mem - PAGE_OFFSET) >> (PAGE_SHIFT+6);
-	if (i < 16)
-		i = 16;
-	min_free_pages = i;
-	start_mem = init_swap_cache(start_mem, end_mem);
-	mem_map = (mem_map_t *) start_mem;
-	p = mem_map + MAP_NR(end_mem);
-	start_mem = LONG_ALIGN((unsigned long) p);
-	memset(mem_map, 0, start_mem - (unsigned long) mem_map);
-	do {
-		--p;
-		p->reserved = 1;
-	} while (p > mem_map);
-
-	for (i = 0 ; i < NR_MEM_LISTS ; i++) {
-		unsigned long bitmap_size;
-		free_area_list[i].prev = free_area_list[i].next = &free_area_list[i];
-		mask += mask;
-		end_mem = (end_mem + ~mask) & mask;
-		bitmap_size = (end_mem - PAGE_OFFSET) >> (PAGE_SHIFT + i);
-		bitmap_size = (bitmap_size + 7) >> 3;
-		bitmap_size = LONG_ALIGN(bitmap_size);
-		free_area_map[i] = (unsigned int *) start_mem;
-		memset((void *) start_mem, 0, bitmap_size);
-		start_mem += bitmap_size;
-	}
-	return start_mem;
-}

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov with Sam's (original) version
of this