diff -urN oldtree/include/linux/pagemap.h newtree/include/linux/pagemap.h --- oldtree/include/linux/pagemap.h 2006-10-07 08:59:26.000000000 -0400 +++ newtree/include/linux/pagemap.h 2006-10-07 09:00:06.000000000 -0400 @@ -173,7 +173,6 @@ typedef int filler_t(void *, struct page *); -extern int __probe_page(struct address_space *mapping, pgoff_t offset); extern int probe_page(struct address_space *mapping, pgoff_t offset); extern struct page * find_get_page(struct address_space *mapping, unsigned long index); diff -urN oldtree/mm/filemap.c newtree/mm/filemap.c --- oldtree/mm/filemap.c 2006-10-07 08:59:26.000000000 -0400 +++ newtree/mm/filemap.c 2006-10-07 09:00:06.000000000 -0400 @@ -588,22 +588,15 @@ /* * Probing page existence. - */ -int __probe_page(struct address_space *mapping, pgoff_t offset) -{ - return !! radix_tree_lookup(&mapping->page_tree, offset); -} - -/* - * Here we just do not bother to grab the page, it's meaningless anyway. + * We do not bother to take a ref to the page, it's meaningless anyway. */ int probe_page(struct address_space *mapping, pgoff_t offset) { int exists; - read_lock_irq(&mapping->tree_lock); - exists = __probe_page(mapping, offset); - read_unlock_irq(&mapping->tree_lock); + rcu_read_lock(); + exists = !!radix_tree_lookup(&mapping->page_tree, offset); + rcu_read_unlock(); return exists; } @@ -677,15 +670,31 @@ * Is there a pagecache struct page at the given (mapping, offset) tuple? * If yes, increment its refcount and return it; if no, return NULL. */ -struct page * find_get_page(struct address_space *mapping, unsigned long offset) +struct page *find_get_page(struct address_space *mapping, unsigned long offset) { + void **pagep; struct page *page; - read_lock_irq(&mapping->tree_lock); - page = radix_tree_lookup(&mapping->page_tree, offset); - if (page) - page_cache_get(page); - read_unlock_irq(&mapping->tree_lock); + rcu_read_lock(); +repeat: + page = NULL; + pagep = radix_tree_lookup_slot(&mapping->page_tree, offset); + if (pagep) { + page = radix_tree_deref_slot(pagep); + if (unlikely(!page || page == RADIX_TREE_RETRY)) + goto repeat; + + if (!page_cache_get_speculative(page)) + goto repeat; + + /* Has the page moved? */ + if (unlikely(page != *pagep)) { + page_cache_release(page); + goto repeat; + } + } + rcu_read_unlock(); + return page; } EXPORT_SYMBOL(find_get_page); @@ -725,26 +734,19 @@ { struct page *page; - read_lock_irq(&mapping->tree_lock); repeat: - page = radix_tree_lookup(&mapping->page_tree, offset); + page = find_get_page(mapping, offset); if (page) { - page_cache_get(page); - if (TestSetPageLocked(page)) { - read_unlock_irq(&mapping->tree_lock); - __lock_page(page); - read_lock_irq(&mapping->tree_lock); - - /* Has the page been truncated while we slept? */ - if (unlikely(page->mapping != mapping || - page->index != offset)) { - unlock_page(page); - page_cache_release(page); - goto repeat; - } + lock_page(page); + /* Has the page been truncated? */ + if (unlikely(page->mapping != mapping + || page->index != offset)) { + unlock_page(page); + page_cache_release(page); + goto repeat; } } - read_unlock_irq(&mapping->tree_lock); + return page; } EXPORT_SYMBOL(find_lock_page); @@ -814,13 +816,39 @@ { unsigned int i; unsigned int ret; + unsigned int nr_found; - read_lock_irq(&mapping->tree_lock); - ret = radix_tree_gang_lookup(&mapping->page_tree, - (void **)pages, start, nr_pages); - for (i = 0; i < ret; i++) - page_cache_get(pages[i]); - read_unlock_irq(&mapping->tree_lock); + rcu_read_lock(); +restart: + nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, + (void ***)pages, start, nr_pages); + ret = 0; + for (i = 0; i < nr_found; i++) { + struct page *page; +repeat: + page = radix_tree_deref_slot((void **)pages[i]); + if (unlikely(!page)) + continue; + /* + * this can only trigger if nr_found == 1, making livelock + * a non issue. + */ + if (unlikely(page == RADIX_TREE_RETRY)) + goto restart; + + if (!page_cache_get_speculative(page)) + goto repeat; + + /* Has the page moved? */ + if (unlikely(page != *((void **)pages[i]))) { + page_cache_release(page); + goto repeat; + } + + pages[ret] = page; + ret++; + } + rcu_read_unlock(); return ret; } EXPORT_SYMBOL(find_get_pages); @@ -842,19 +870,44 @@ { unsigned int i; unsigned int ret; + unsigned int nr_found; - read_lock_irq(&mapping->tree_lock); - ret = radix_tree_gang_lookup(&mapping->page_tree, - (void **)pages, index, nr_pages); - for (i = 0; i < ret; i++) { - if (pages[i]->mapping == NULL || pages[i]->index != index) + rcu_read_lock(); +restart: + nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, + (void ***)pages, index, nr_pages); + ret = 0; + for (i = 0; i < nr_found; i++) { + struct page *page; +repeat: + page = radix_tree_deref_slot((void **)pages[i]); + if (unlikely(!page)) + continue; + /* + * this can only trigger if nr_found == 1, making livelock + * a non issue. + */ + if (unlikely(page == RADIX_TREE_RETRY)) + goto restart; + + if (page->mapping == NULL || page->index != index) break; - page_cache_get(pages[i]); + if (!page_cache_get_speculative(page)) + goto repeat; + + /* Has the page moved? */ + if (unlikely(page != *((void **)pages[i]))) { + page_cache_release(page); + goto repeat; + } + + pages[ret] = page; + ret++; index++; } - read_unlock_irq(&mapping->tree_lock); - return i; + rcu_read_unlock(); + return ret; } EXPORT_SYMBOL(find_get_pages_tag); @@ -876,6 +929,7 @@ unsigned int ret; read_lock_irq(&mapping->tree_lock); + /* TODO: implement lookup_tag_slot and make this lockless */ ret = radix_tree_gang_lookup_tag(&mapping->page_tree, (void **)pages, *index, nr_pages, tag); for (i = 0; i < ret; i++) diff -urN oldtree/mm/page-writeback.c newtree/mm/page-writeback.c --- oldtree/mm/page-writeback.c 2006-10-06 16:52:04.000000000 -0400 +++ newtree/mm/page-writeback.c 2006-10-07 09:00:06.000000000 -0400 @@ -987,17 +987,15 @@ EXPORT_SYMBOL(writeback_congestion_end); /* - * Return true if any of the pages in the mapping are marged with the + * Return true if any of the pages in the mapping are marked with the * passed tag. */ int mapping_tagged(struct address_space *mapping, int tag) { - unsigned long flags; int ret; - - read_lock_irqsave(&mapping->tree_lock, flags); + rcu_read_lock(); ret = radix_tree_tagged(&mapping->page_tree, tag); - read_unlock_irqrestore(&mapping->tree_lock, flags); + rcu_read_unlock(); return ret; } EXPORT_SYMBOL(mapping_tagged); diff -urN oldtree/mm/readahead.c newtree/mm/readahead.c --- oldtree/mm/readahead.c 2006-10-05 15:26:55.000000000 -0400 +++ newtree/mm/readahead.c 2006-10-07 09:00:06.000000000 -0400 @@ -429,21 +429,20 @@ /* * Preallocate as many pages as we will need. */ - read_lock_irq(&mapping->tree_lock); for (page_idx = 0; page_idx < nr_to_read; page_idx++) { pgoff_t page_offset = offset + page_idx; if (page_offset > end_index) break; + rcu_read_lock(); page = radix_tree_lookup(&mapping->page_tree, page_offset); + rcu_read_unlock(); if (page) continue; - read_unlock_irq(&mapping->tree_lock); page = page_cache_alloc_cold(mapping); cond_resched(); - read_lock_irq(&mapping->tree_lock); if (!page) break; page->index = page_offset; @@ -452,7 +451,6 @@ SetPageReadahead(page); ret++; } - read_unlock_irq(&mapping->tree_lock); /* * Now start the IO. We ignore I/O errors - if the page is not @@ -1358,14 +1356,14 @@ read_lock_irq(&mapping->tree_lock); ra_index = radix_tree_scan_hole(&mapping->page_tree, index, max_scan); #ifdef DEBUG_READAHEAD_RADIXTREE - BUG_ON(!__probe_page(mapping, index)); + BUG_ON(!probe_page(mapping, index)); WARN_ON(ra_index < index); - if (ra_index != index && !__probe_page(mapping, ra_index - 1)) + if (ra_index != index && !probe_page(mapping, ra_index - 1)) printk(KERN_ERR "radix_tree_scan_hole(index=%lu ra_index=%lu " "max_scan=%lu nrpages=%lu) fooled!\n", index, ra_index, max_scan, mapping->nrpages); if (ra_index != ~0UL && ra_index - index < max_scan) - WARN_ON(__probe_page(mapping, ra_index)); + WARN_ON(probe_page(mapping, ra_index)); #endif read_unlock_irq(&mapping->tree_lock); @@ -1390,13 +1388,10 @@ * Poor man's radix_tree_scan_data_backward() implementation. * Acceptable because max_scan won't be large. */ - read_lock_irq(&mapping->tree_lock); - for (; origin - index < max_scan;) - if (__probe_page(mapping, --index)) { - read_unlock_irq(&mapping->tree_lock); + for (; origin - index < max_scan;) { + if (probe_page(mapping, --index)) return index + 1; - } - read_unlock_irq(&mapping->tree_lock); + } return 0; } @@ -1453,9 +1448,9 @@ #ifdef DEBUG_READAHEAD_RADIXTREE WARN_ON(index > offset - 1); if (index != offset - 1) - WARN_ON(!__probe_page(mapping, index + 1)); + WARN_ON(!probe_page(mapping, index + 1)); if (index && offset - 1 - index < ra_max) - WARN_ON(__probe_page(mapping, index)); + WARN_ON(probe_page(mapping, index)); #endif *remain = (offset - 1) - index; @@ -1485,7 +1480,7 @@ 100 / (readahead_ratio | 1); for (count += ra_max; count < nr_lookback; count += ra_max) - if (!__probe_page(mapping, offset - count)) + if (!probe_page(mapping, offset - count)) break; out_unlock: