From 90cf7c80506afb15067a018dffdac6aaf4a3bad4 Mon Sep 17 00:00:00 2001
From: Aubrey.Li <aubreylee@gmail.com>
Date: Wed, 17 Jan 2007 16:54:15 +0800
Subject: [PATCH] Add an interface to limit total vfs page cache.

The default value is using 90% memory for page cache

Signed-off-by: Aubrey.Li <aubreylee@gmail.com>
Signed-off-by: Thomas Chou <thomas@wytron.com.tw>
---
 linux-2.6.x/include/linux/gfp.h     |    1 +
 linux-2.6.x/include/linux/pagemap.h |    2 +-
 linux-2.6.x/include/linux/sysctl.h  |    2 ++
 linux-2.6.x/kernel/sysctl.c         |   15 +++++++++++++++
 linux-2.6.x/mm/page_alloc.c         |   17 +++++++++++++++--
 5 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/linux-2.6.x/include/linux/gfp.h b/linux-2.6.x/include/linux/gfp.h
index bf2b6bc..e118944 100644
--- a/linux-2.6.x/include/linux/gfp.h
+++ b/linux-2.6.x/include/linux/gfp.h
@@ -46,6 +46,7 @@ struct vm_area_struct;
 #define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */
 #define __GFP_HARDWALL   ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */
 #define __GFP_THISNODE	((__force gfp_t)0x40000u)/* No fallback, no policies */
+#define __GFP_PAGECACHE	((__force gfp_t)0x80000u) /* Is page cache allocation ? */
 
 #define __GFP_BITS_SHIFT 20	/* Room for 20 __GFP_FOO bits */
 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
diff --git a/linux-2.6.x/include/linux/pagemap.h b/linux-2.6.x/include/linux/pagemap.h
index c3e255b..890bb23 100644
--- a/linux-2.6.x/include/linux/pagemap.h
+++ b/linux-2.6.x/include/linux/pagemap.h
@@ -62,7 +62,7 @@ static inline struct page *__page_cache_alloc(gfp_t gfp)
 
 static inline struct page *page_cache_alloc(struct address_space *x)
 {
-	return __page_cache_alloc(mapping_gfp_mask(x));
+	return __page_cache_alloc(mapping_gfp_mask(x)|__GFP_PAGECACHE);
 }
 
 static inline struct page *page_cache_alloc_cold(struct address_space *x)
diff --git a/linux-2.6.x/include/linux/sysctl.h b/linux-2.6.x/include/linux/sysctl.h
index d98562f..0fe2cb1 100644
--- a/linux-2.6.x/include/linux/sysctl.h
+++ b/linux-2.6.x/include/linux/sysctl.h
@@ -202,6 +202,7 @@ enum
 	VM_PANIC_ON_OOM=33,	/* panic at out-of-memory */
 	VM_VDSO_ENABLED=34,	/* map VDSO into new processes? */
 	VM_MIN_SLAB=35,		 /* Percent pages ignored by zone reclaim */
+	VM_PAGECACHE_RATIO=36,	/* percent of RAM to use as page cache */
 };
 
 
@@ -964,6 +965,7 @@ extern ctl_handler sysctl_string;
 extern ctl_handler sysctl_intvec;
 extern ctl_handler sysctl_jiffies;
 extern ctl_handler sysctl_ms_jiffies;
+extern int sysctl_pagecache_ratio;
 
 
 /*
diff --git a/linux-2.6.x/kernel/sysctl.c b/linux-2.6.x/kernel/sysctl.c
index 09e569f..ff445d2 100644
--- a/linux-2.6.x/kernel/sysctl.c
+++ b/linux-2.6.x/kernel/sysctl.c
@@ -1034,6 +1034,21 @@ static ctl_table vm_table[] = {
 		.extra1		= &zero,
 	},
 #endif
+/*
+ * NOTE: do not add new entries to this table unless you have read
+ * Documentation/sysctl/ctl_unnumbered.txt
+ */
+	{
+		.ctl_name	= VM_PAGECACHE_RATIO,
+		.procname	= "pagecache_ratio",
+		.data		= &sysctl_pagecache_ratio,
+		.maxlen		= sizeof(sysctl_pagecache_ratio),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1         = &zero,
+                .extra2         = &one_hundred,
+	},
 	{ .ctl_name = 0 }
 };
 
diff --git a/linux-2.6.x/mm/page_alloc.c b/linux-2.6.x/mm/page_alloc.c
index 7fbd098..3a0ac4d 100644
--- a/linux-2.6.x/mm/page_alloc.c
+++ b/linux-2.6.x/mm/page_alloc.c
@@ -81,6 +81,8 @@ int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = {
 #endif
 };
 
+int sysctl_pagecache_ratio = 10;
+
 EXPORT_SYMBOL(totalram_pages);
 
 /*
@@ -892,6 +894,7 @@ failed:
 #define ALLOC_HARDER		0x10 /* try to alloc harder */
 #define ALLOC_HIGH		0x20 /* __GFP_HIGH set */
 #define ALLOC_CPUSET		0x40 /* check for correct cpuset */
+#define ALLOC_PAGECACHE		0x80 /* __GFP_PAGECACHE set */
 
 /*
  * Return 1 if free pages are above 'mark'. This takes into account the order
@@ -910,6 +913,9 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
 	if (alloc_flags & ALLOC_HARDER)
 		min -= min / 4;
 
+	if (alloc_flags & ALLOC_PAGECACHE)
+		min = min + (sysctl_pagecache_ratio * z->present_pages) / 100;
+
 	if (free_pages <= min + z->lowmem_reserve[classzone_idx])
 		return 0;
 	for (o = 0; o < order; o++) {
@@ -1000,8 +1006,12 @@ restart:
 		return NULL;
 	}
 
-	page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
-				zonelist, ALLOC_WMARK_LOW|ALLOC_CPUSET);
+	if (gfp_mask & __GFP_PAGECACHE)	
+		page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
+			zonelist, ALLOC_WMARK_LOW|ALLOC_CPUSET|ALLOC_PAGECACHE);
+	else
+		page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
+					zonelist, ALLOC_WMARK_LOW|ALLOC_CPUSET);
 	if (page)
 		goto got_pg;
 
@@ -1027,6 +1037,9 @@ restart:
 	if (wait)
 		alloc_flags |= ALLOC_CPUSET;
 
+	if (gfp_mask & __GFP_PAGECACHE)
+		alloc_flags |= ALLOC_PAGECACHE;
+
 	/*
 	 * Go through the zonelist again. Let __GFP_HIGH and allocations
 	 * coming from realtime tasks go deeper into reserves.
-- 
1.5.3.3

