File Coverage

blib/lib/Cache/FastMmap.pm
Criterion Covered Total %
statement 135 184 73.4
branch 34 64 53.1
condition 42 66 63.6
subroutine 19 23 82.6
pod 11 12 91.7
total 241 349 69.1


line stmt bran cond sub pod time code
1             package Cache::FastMmap;
2              
3 9     9   329 use Data::Dumper;
  9         90  
  9         213  
4              
5             =head1 NAME
6            
7             Cache::FastMmap - Uses an mmap'ed file to act as a shared memory interprocess cache
8            
9             =head1 SYNOPSIS
10            
11             use Cache::FastMmap;
12            
13             # Uses vaguely sane defaults
14             $Cache = Cache::FastMmap->new();
15            
16             # $Value must be a reference...
17             $Cache->set($Key, $Value);
18             $Value = $Cache->get($Key);
19            
20             $Cache = Cache::FastMmap->new(raw_values => 1);
21            
22             # $Value can't be a reference...
23             $Cache->set($Key, $Value);
24             $Value = $Cache->get($Key);
25            
26             =head1 ABSTRACT
27            
28             A shared memory cache through an mmap'ed file. It's core is written
29             in C for performance. It uses fcntl locking to ensure multiple
30             processes can safely access the cache at the same time. It uses
31             a basic LRU algorithm to keep the most used entries in the cache.
32            
33             =head1 DESCRIPTION
34            
35             In multi-process environments (eg mod_perl, forking daemons, etc),
36             it's common to want to cache information, but have that cache
37             shared between processes. Many solutions already exist, and may
38             suit your situation better:
39            
40             =over 4
41            
42             =item *
43            
44             L<MLDBM::Sync> - acts as a database, data is not automatically
45             expired, slow
46            
47             =item *
48            
49             L<IPC::MM> - hash implementation is broken, data is not automatically
50             expired, slow
51            
52             =item *
53            
54             L<Cache::FileCache> - lots of features, slow
55            
56             =item *
57            
58             L<Cache::SharedMemoryCache> - lots of features, VERY slow. Uses
59             IPC::ShareLite which freeze/thaws ALL data at each read/write
60            
61             =item *
62            
63             L<DBI> - use your favourite RDBMS. can perform well, need a
64             DB server running. very global. socket connection latency
65            
66             =item *
67            
68             L<Cache::Mmap> - similar to this module, in pure perl. slows down
69             with larger pages
70            
71             =item *
72            
73             L<BerkeleyDB> - very fast (data ends up mostly in shared memory
74             cache) but acts as a database overall, so data is not automatically
75             expired
76            
77             =back
78            
79             In the case I was working on, I needed:
80            
81             =over 4
82            
83             =item *
84            
85             Automatic expiry and space management
86            
87             =item *
88            
89             Very fast access to lots of small items
90            
91             =item *
92            
93             The ability to fetch/store many items in one go
94            
95             =back
96            
97             Which is why I developed this module. It tries to be quite
98             efficient through a number of means:
99            
100             =over 4
101            
102             =item *
103            
104             Core code is written in C for performance
105            
106             =item *
107            
108             It uses multiple pages within a file, and uses Fcntl to only lock
109             a page at a time to reduce contention when multiple processes access
110             the cache.
111            
112             =item *
113            
114             It uses a dual level hashing system (hash to find page, then hash
115             within each page to find a slot) to make most C<get()> calls O(1) and
116             fast
117            
118             =item *
119            
120             On each C<set()>, if there are slots and page space available, only
121             the slot has to be updated and the data written at the end of the used
122             data space. If either runs out, a re-organisation of the page is
123             performed to create new slots/space which is done in an efficient way
124            
125             =back
126            
127             The class also supports read-through, and write-back or write-through
128             callbacks to access the real data if it's not in the cache, meaning that
129             code like this:
130            
131             my $Value = $Cache->get($Key);
132             if (!defined $Value) {
133             $Value = $RealDataSource->get($Key);
134             $Cache->set($Key, $Value)
135             }
136            
137             Isn't required, you instead specify in the constructor:
138            
139             Cache::FastMmap->new(
140             ...
141             context => $RealDataSourceHandle,
142             read_cb => sub { $_[0]->get($_[1]) },
143             write_cb => sub { $_[0]->set($_[1], $_[2]) },
144             );
145            
146             And then:
147            
148             my $Value = $Cache->get($Key);
149            
150             $Cache->set($Key, $NewValue);
151            
152             Will just work and will be read/written to the underlying data source as
153             needed automatically.
154            
155             =head1 PERFORMANCE
156            
157             If you're storing relatively large and complex structures into
158             the cache, then you're limited by the speed of the Storable module.
159             If you're storing simple structures, or raw data, then
160             Cache::FastMmap has noticeable performance improvements.
161            
162             See L<http://cpan.robm.fastmail.fm/cache_perf.html> for some
163             comparisons to other modules.
164            
165             =head1 COMPATIABILITY
166            
167             Cache::FastMmap uses mmap to map a file as the shared cache space,
168             and fcntl to do page locking. This means it should work on most
169             UNIX like operating systems, but will not work on Windows or
170             Win32 like environments.
171            
172             =head1 MEMORY SIZE
173            
174             Because Cache::FastMmap mmap's a shared file into your processes memory
175             space, this can make each process look quite large, even though it's just
176             mmap'd memory that's shared between all processes that use the cache,
177             and may even be swapped out if the cache is getting low usage.
178            
179             However, the OS will think your process is quite large, which might
180             mean you hit some BSD::Resource or 'ulimits' you set previously that you
181             thought were sane, but aren't anymore, so be aware.
182            
183             =head1 USAGE
184            
185             Because the cache uses shared memory through an mmap'd file, you have
186             to make sure each process connects up to the file. There's probably
187             two main ways to do this:
188            
189             =over 4
190            
191             =item *
192            
193             Create the cache in the parent process, and then when it forks, each
194             child will inherit the same file descriptor, mmap'ed memory, etc and
195             just work.
196            
197             =item *
198            
199             Explicitly connect up in each forked child to the share file
200            
201             =back
202            
203             The first way is usually the easiest. If you're using the cache in a
204             Net::Server based module, you'll want to open the cache in the
205             C<pre_loop_hook>, because that's executed before the fork, but after
206             the process ownership has changed and any chroot has been done.
207            
208             In mod_perl, just open the cache at the global level in the appropriate
209             module, which is executed as the server is starting and before it
210             starts forking children, but you'll probably want to chmod or chown
211             the file to the permissions of the apache process.
212            
213             =head1 METHODS
214            
215             =over 4
216            
217             =cut
218              
219             # Modules/Export/XSLoader {{{
220 9     9   366 use 5.006;
  9         95  
  9         98  
221 9     9   145 use strict;
  9         81  
  9         142  
222 9     9   132 use warnings;
  9         82  
  9         145  
223 9     9   146 use bytes;
  9         110  
  9         119  
224 9     9   440 use Cache::FastMmap::CImpl;
  9         1430  
  9         372  
225              
226             require Exporter;
227              
228             our @ISA = qw(Exporter);
229              
230             # Items to export into callers namespace by default. Note: do not export
231             # names by default without a very good reason. Use EXPORT_OK instead.
232             # Do not simply export all your public functions/methods/constants.
233              
234             # This allows declaration use Cache::FastMmap ':all';
235             # If you do not need this, moving things directly into @EXPORT or @EXPORT_OK
236             # will save memory.
237             our %EXPORT_TAGS = ( 'all' => [ qw(
238            
239             ) ] );
240              
241             our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
242              
243             our @EXPORT = qw(
244            
245             );
246              
247             our $VERSION = '1.14';
248              
249 9     9   510 use constant FC_ISDIRTY => 1;
  9         79  
  9         160  
250             # }}}
251              
252             =item I<new(%Opts)>
253            
254             Create a new Cache::FastMmap object.
255            
256             Basic global parameters are:
257            
258             =over 4
259            
260             =item * B<share_file>
261            
262             File to mmap for sharing of data (default: /tmp/sharefile)
263            
264             =item * B<init_file>
265            
266             Clear any existing values and re-initialise file. Useful to do in a
267             parent that forks off children to ensure that file is empty at the start
268             (default: 0)
269            
270             B<Note:> This is quite important to do in the parent to ensure a
271             consistent file structure. The shared file is not perfectly transaction
272             safe, and so if a child is killed at the wrong instant, it might leave
273             the the cache file in an inconsistent state.
274            
275             =item * B<raw_values>
276            
277             Store values as raw binary data rather than using Storable to free/thaw
278             data structures (default: 0)
279            
280             =item * B<expire_time>
281            
282             Maximum time to hold values in the cache in seconds. A value of 0
283             means does no explicit expiry time, and values are expired only based
284             on LRU usage. Can be expressed as 1m, 1h, 1d for minutes/hours/days
285             respectively. (default: 0)
286            
287             =back
288            
289             You may specify the cache size as:
290            
291             =over 4
292            
293             =item * B<cache_size>
294            
295             Size of cache. Can be expresses as 1k, 1m for kilobytes or megabytes
296             respectively. Automatically guesses page size/page count values.
297            
298             =back
299            
300             Or specify explicit page size/page count values. If none of these are
301             specified, the values page_size = 64k and num_pages = 89 are used.
302            
303             =over 4
304            
305             =item * B<page_size>
306            
307             Size of each page. Must be a power of 2 between 4k and 1024k. If not,
308             is rounded to the nearest value.
309            
310             =item * B<num_pages>
311            
312             Number of pages. Should be a prime number for best hashing
313            
314             =back
315            
316             The cache allows the use of callbacks for reading/writing data to an
317             underlying data store.
318            
319             =over 4
320            
321             =item * B<context>
322            
323             Opaque reference passed as the first parameter to any callback function
324             if specified
325            
326             =item * B<read_cb>
327            
328             Callback to read data from the underlying data store. Called as:
329            
330             $read_cb->($context, $Key)
331            
332             Should return the value to use. This value will be saved in the cache
333             for future retrievals. Return undef if there is no value for the
334             given key
335            
336             =item * B<write_cb>
337            
338             Callback to write data to the underlying data store.
339             Called as:
340            
341             $write_cb->($context, $Key, $Value, $ExpiryTime)
342            
343             In 'write_through' mode, it's always called as soon as a I<set(...)>
344             is called on the Cache::FastMmap class. In 'write_back' mode, it's
345             called when a value is expunged from the cache if it's been changed
346             by a I<set(...)> rather than read from the underlying store with the
347             I<read_cb> above.
348            
349             Note: Expired items do result in the I<write_cb> being
350             called if 'write_back' caching is enabled and the item has been
351             changed. You can check the $ExpiryTime against C<time()> if you only
352             want to write back values which aren't expired.
353            
354             Also remember that I<write_cb> may be called in a different process
355             to the one that placed the data in the cache in the first place
356            
357             =item * B<delete_cb>
358            
359             Callback to delete data from the underlying data store. Called as:
360            
361             $delete_cb->($context, $Key)
362            
363             Called as soon as I<remove(...)> is called on the Cache::FastMmap class
364            
365             =item * B<cache_not_found>
366            
367             If set to true, then if the I<read_cb> is called and it returns
368             undef to say nothing was found, then that information is stored
369             in the cache, so that next time a I<get(...)> is called on that
370             key, undef is returned immediately rather than again calling
371             the I<read_cb>
372            
373             =item * B<write_action>
374            
375             Either 'write_back' or 'write_through'. (default: write_through)
376            
377             =item * B<empty_on_exit>
378            
379             When you have 'write_back' mode enabled, then
380             you really want to make sure all values from the cache are expunged
381             when your program exits so any changes are written back. This is a
382             bit tricky, because we don't know if you're in a child, so you
383             must ensure that the parent process either explicitly calls
384             I<empty()> or that this flag is set to true when the parent connects
385             to the cache, and false in all the children
386            
387             =back
388            
389             =cut
390             sub new {
391 10     10 1 450   my $Proto = shift;
392 10   33     186   my $Class = ref($Proto) || $Proto;
393 10         139   my %Args = @_;
394              
395 10         102   my $Self = {};
396 10         162   bless ($Self, $Class);
397              
398             # Work out cache file and whether to init
399 10   50     266   my $share_file = $Self->{share_file}
400                 = $Args{share_file} || '/tmp/sharefile';
401 10   50     142   my $init_file = $Args{init_file} || 0;
402 10   50     160   my $test_file = $Args{test_file} || 0;
403              
404             # Storing raw/storable values?
405 10   100     194   my $raw_values = $Self->{raw_values} = int($Args{raw_values} || 0);
406              
407             # Need storable module if not using raw values
408 10 100       114   if (!$raw_values) {
409 2 50   2   24     eval "use Storable qw(freeze thaw); 1;"
  2         29  
  2         19  
  2         45  
410                   || die "Could not load Storable module: $@";
411               }
412              
413             # Work out expiry time in seconds
414 10   100     263   my $expire_time = $Args{expire_time} || 0;
415 10         139   my %Times = (m => 60, h => 60*60, d => 24*60*60);
416 10 50       148   $expire_time *= $Times{$1} if $expire_time =~ s/([mhd])$//i;
417 10         155   $Self->{expire_time} = $expire_time = int($expire_time);
418              
419             # Function rounds to the nearest power of 2
420 10     10 0 186   sub RoundPow2 { return int(2 ** int(log($_[0])/log(2)) + 0.1); }
421              
422             # Work out cache size
423 10         92   my ($cache_size, $num_pages, $page_size);
424              
425 10         114   my %Sizes = (k => 1024, m => 1024*1024);
426 10 50       122   if ($cache_size = $Args{cache_size}) {
427 0 0       0     $cache_size *= $Sizes{$1} if $cache_size =~ s/([km])$//i;
428              
429 0 0       0     if ($num_pages = $Args{num_pages}) {
430 0         0       $page_size = RoundPow2($cache_size / $num_pages);
431 0 0       0       $page_size = 4096 if $page_size < 4096;
432              
433                 } else {
434 0   0     0       $page_size = $Args{page_size} || 65536;
435 0 0