File Coverage

blib/lib/Cache/Mmap.pm
Criterion Covered Total %
statement 335 386 86.8
branch 107 156 68.6
condition 26 47 55.3
subroutine 40 42 95.2
pod 14 14 100.0
total 522 645 80.9


line stmt bran cond sub pod time code
1             # $Id: Mmap.pm,v 1.13 2005/11/15 18:33:29 pmh Exp $
2              
3             =head1 NAME
4            
5             Cache::Mmap - Shared data cache using memory mapped files
6            
7             =head1 SYNOPSIS
8            
9             use Cache::Mmap;
10            
11             $cache=Cache::Mmap->new($filename,\%options);
12            
13             $val1=$cache->read($key1);
14             $cache->write($key2,$val2);
15             $cache->delete($key3);
16            
17             =head1 DESCRIPTION
18            
19             This module implements a shared data cache, using memory mapped files.
20             If routines are provided which interact with the underlying data, access to
21             the cache is completely transparent, and the module handles all the details of
22             refreshing cache contents, and updating underlying data, if necessary.
23            
24             Cache entries are assigned to "buckets" within the cache file, depending on
25             the key. Within each bucket, entries are stored approximately in order of last
26             access, so that frequently accessed entries will move to the head of the
27             bucket, thus decreasing access time. Concurrent accesses to the same bucket are
28             prevented by file locking of the relevant section of the cache file.
29            
30             =cut
31              
32             package Cache::Mmap;
33              
34             # Do we need to worry about UTF-8?
35 5   33 5   302 use constant has_utf8 => defined($^V) && $^V ge "\5\6\0";
  5         92  
  5         216  
36              
37 5     5   482 use Carp qw(croak);
  5         58  
  5         102  
38 5     5   268 use DynaLoader();
  5         44  
  5         46  
39 5     5   72 use Exporter;
  5         44  
  5         70  
40 5     5   81 use Fcntl;
  5         46  
  5         95  
41 5     5   263 use IO::Seekable qw(SEEK_SET SEEK_END);
  5         51  
  5         118  
42 5     5   84 use Storable qw(freeze thaw);
  5         53  
  5         97  
43 5     5   87 use Symbol();
  5         108  
  5         50  
44 5     5   21658 use integer;
  5         49  
  5         400  
45 5     5   80 use strict;
  5         42  
  5         70  
46 5         75 use vars qw(
47             $VERSION @ISA
48             @EXPORT_OK
49 5     5   71 );
  5         46  
50              
51             $VERSION='0.09';
52             @ISA=qw(DynaLoader Exporter);
53             @EXPORT_OK=qw(CMM_keep_expired CMM_keep_expired_refresh);
54              
55             __PACKAGE__->bootstrap($VERSION);
56              
57             # Default cache options
58             my %def_options=(
59               buckets => 13, # Number of buckets
60               bucketsize => 1024, # Size of each bucket
61               pagesize => 1024, # Bucket alignment
62               strings => 0, # Store strings, rather than refs
63               expiry => 0, # Number of seconds to hold values, 0==forever
64               context => undef, # Context to pass to read and write subs
65               permissions => 0600, # Permissions for new file creation
66             # read => sub called as ($found,$val)/$val=$read->($key,$context)
67               cachenegative => 0, # true: Cache not-found values
68             # false: Don't cache not-found values
69             # write => sub called as $write->($key,$oval,$context)
70             # Leave out for no writing to underlying data
71               writethrough => 1, # true: Write when value is added to cache
72             # false: Write when value expires or is pushed out
73             # delete => sub called as $delete->($key,$oval,$context)
74             # Leave out for no deleting of underlying data
75             );
76              
77             # Bit positions for cache-level flags
78 5     5   152 use constant flag_strings => 0x0001;
  5         46  
  5         75  
79             # Names for cache-level flags
80             my %bool_opts=(
81               strings => flag_strings,
82             );
83              
84             # Bit positions for element flags
85 5     5   387 use constant elem_dirty => 0x0001;
  5         235  
  5         100  
86              
87 5     5   254 use constant magic => 0x15ACACE;# Cache file magic number
  5         45  
  5         108  
88 5     5   72 use constant filevers => 1; # File format version number supported
  5         44  
  5         61  
89              
90              
91             my $headsize=4*10; # File: magic, buckets, bucketsize, pagesize, flags,
92             # file format version
93             my $bheadsize=4*10; # Bucket: filled
94             my $eheadsize=4*10; # Element: size, time, klen, vlen, flags
95             my $maxheadsize=$headsize > $bheadsize ? $headsize : $bheadsize;
96             $maxheadsize=$eheadsize if $eheadsize > $maxheadsize;
97              
98             # While these look random, the low word could be a bitmask
99 5     5   148 use constant CMM_keep_expired => 0xCACE0001; # Keep the expired value
  5         48  
  5         63  
100 5     5   107 use constant CMM_keep_expired_refresh => 0xCACE0003; # Keep the expired value, and unexpire it
  5         46  
  5         62  
101              
102              
103             =head1 CLASS METHODS
104            
105             =over
106            
107             =item new($filename,\%options)
108            
109             Creates a new cache object. If the file named by C<$filename> does not already
110             exist, it will be created. If the cache object cannot be created for any
111             reason, an exception will be thrown. Various options may be set in C<%options>,
112             which affect the behaviour of the cache (defaults in parentheses):
113            
114             =over 4
115            
116             =item permissions (0600)
117            
118             Sets the file permissions for the cache file if it doesn't already exist.
119            
120             =item buckets (13)
121            
122             Sets the number of buckets inside the cache file. A larger number of buckets
123             will give better performance for a cache with many accesses, as there will be
124             less chance of concurrent access to the same bucket.
125            
126             =item bucketsize (1024)
127            
128             Sets the size of each bucket, in bytes. A larger bucket size will be needed to
129             store large cache entries. If the bucketsize is not large enough to hold a
130             particular entry, it will still be passed between the underlying data and the
131             application in its entirety, but will not be stored in the cache.
132            
133             =item pagesize (1024)
134            
135             Sets the alignment of buckets within the file. The file header will be extended
136             to this size, and bucket sizes will be rounded up to the nearest multiple.
137             Choosing a pagesize equal to the virtual memory page size of the host system
138             should improve performance.
139            
140             =item strings (0)
141            
142             If true, cache entries are treated as strings, rather than references. This
143             will help performance for string-only caches, as no time will be taken to
144             serialize cache entries.
145            
146             =item expiry (0)
147            
148             If non-zero, sets the length of time, in seconds, which cache entries are
149             considered valid. A new entry will be fetched from the underlying data if
150             an expired cache entry would otherwise have been returned.
151            
152             =item context (undef)
153            
154             This value is passed to the read/write/delete routines below, to provide
155             context. This will typically be a database handle, used to fetch data from.
156            
157             =item read (undef)
158            
159             Provides a code reference to a routine which will fetch entries from the
160             underlying data. Called as C<$read-E<gt>($key,$context)>, this routine should
161             return a list C<($found,$value)>, where C<$found> is true if the entry could
162             be found in the underlying data, and C<$value> is the value to cache.
163            
164             If the routine only returns a single scalar, that will be taken as
165             the value, and C<$found> will be set to true if the value is defined.
166            
167             If this routine is not provided, only values already in the cache will ever
168             be returned.
169            
170             There are currently two special values of C<$found> which cause slightly
171             different behaviour. These are constants which may be imported in the
172             C<use> statement.
173            
174             =over 4
175            
176             =item C<Cache::Mmap::CMM_keep_expired>
177            
178             Use the previously cached value, even if it has expired. This is useful if
179             the underlying data source has become unavailable for some reason. Note that
180             even though the value returned will be ignored in this case, it must be
181             returned to avoid C<$found> being interpreted as a single scalar:
182            
183             return (Cache::Mmap::CMM_keep_expired, undef);
184            
185             =item C<Cache::Mmap::CMM_keep_expired_refresh>
186            
187             This causes the same behaviour as C<CMM_keep_expired>, but the cache entry's
188             expiry time will be reset as if a value had been successfully read from the
189             underlying data.
190            
191             =back
192            
193             =item cachenegative (0)
194            
195             If true, even unsuccessful fetches from the underlying data are cached. This
196             can be useful to only search the underlying data once for each required key.
197            
198             =item write (undef)
199            
200             Provides a code reference to a routine which will write cache entries into the
201             underlying data. This routine will be called by write(), to synchronise the
202             underlying data with the cache. Called as C<$write-E<gt>($key,$val,$context)>.
203             If the routine is not provided, the underlying data will not be synchronised
204             after cache writes.
205            
206             =item writethrough (1)
207            
208             If true, the C<write> routine above will be called as soon as
209             write() is called. This provides immediate synchronisation of
210             underlying data and cache contents.
211            
212             If false, the C<write> routine will
213             be called for each cache entry which no longer fits in its bucket after a
214             cache read or write. This provides a write-as-necessary behaviour, which may
215             be more efficient than the writethrough behaviour. However, only data fetched
216             through the cache will reflect these changes.
217            
218             =item delete (undef)
219            
220             Provides a code reference to a routine which will delete items from the
221             underlying data. This routine will be called by delete(),
222             to synchronise the underlying data with the cache. Called as
223             C<$delete-E<gt>($key,$cval,$context)>, where C<$cval> is the value
224             currently stored in the cache. If this routine is not provided, entries
225             deleted from the cache have no effect on the underlying data.
226            
227             =back
228            
229             An alternative to supplying a C<write> routine, is to call
230             delete() after updating the underlying data. Note however, that
231             in the case of databases, this should be done after committing the update, so
232             that a concurrent process doesn't reload the cache between being the entry
233             being deleted, and the database updates being committed.
234            
235             =cut
236              
237             sub new{
238 11     11 1 593   my($class,$filename,$options)=@_;
239 11 100       229   my $self={
240                 %def_options,
241 11         164     %{$options || {}},
242               };
243              
244             # Check options for sensible values
245 11         140   foreach(qw(buckets bucketsize pagesize permissions)){
246 44 50 33     951     defined($self->{$_}) && $self->{$_}=~/^[1-9]\d*$/s
247                   or croak "'$_' option for $class must be a positive integer";
248               }
249 11 50       254   $self->{pagesize}>=$maxheadsize
250                 or croak "'pagesize' option for $class must be at least $maxheadsize";
251 11         106   foreach(qw(read write delete)){
252 33 50 66     1256     !$self->{$_} || ref $self->{$_} eq 'CODE'
253                   or croak "'$_' option for $class must be a CODE ref or empty";
254               }
255              
256             # Align bucketsize
257               {
258 5     5   119     no integer;
  5         56  
  5         78  
  11         111  
259 11         129     my $n_pages=$self->{bucketsize}/$self->{pagesize};
260 11 50       151     if((my $i_pages=int $n_pages)!=$n_pages){
261 0         0       $self->{bucketsize}=($i_pages+1)*$self->{pagesize};
262                 }
263               }
264              
265             # Try to open a file
266 11         149   my $fh=Symbol::gensym;
267 11 50       453   sysopen($fh,$filename,O_RDWR|O_CREAT,$self->{permissions})
268                 or croak "Can't open cache file $filename: $!";
269              
270             # Create cache object
271 11         135   bless $self,$class;
272 11         141   $self->{_filename}=$filename;
273 11         108   $self->{_fh}=$fh;
274              
275             # Set options
276 11         140   $self->_set_options;
277              
278 9         155   $self;
279             }
280              
281             =back
282            
283             =head1 METHODS
284            
285             =head2 CACHE DATA METHODS
286            
287             These are the everyday methods used to access the data stored by the cache.
288            
289             =over 4
290            
291             =item read($key)
292            
293             Reads an entry from the cache, or from the underlying data if not cached.
294             Returns the value in scalar context, and C<($found,$value)> in list context,
295             where C<$found> is true if the item was found in either the cache or the
296             underlying data.
297            
298             =cut
299              
300             sub read{
301 49     49 1 538   my($self,$key)=@_;
302 49         518   my $bucket=$self->_bucket($key);
303 49         12317   my $ekey=$self->_encode($key,1);
304              
305             # Lock the bucket. This is a write lock, even for reading, since we may
306             # move items within the bucket
307 49         803   $self->_lock($bucket);
308              
309 49         423   my($found,$val,$err);
310 49 100       451   eval{
311 49         728     local $SIG{__DIE__};
312              
313 49         2320     ($found,my($expired,$poff,$off,$size,$klen,$vlen,$flags))
314                   =$self->_find($bucket,$key);
315              
316             # We need to read a new value if we don't have a value,
317             # or if that value is expired.
318 47         1864     my ($new_found, $new_val);
319 47 100 100     678     if (!$found or $expired) {
320 9 100       2529       my @_read=$self->{read}
321             ? $self->{read}->($key,$self->{context}) : ();
322 9 50       152       ($new_found,$new_val)=@_read==1 ? (defined($_read[0]),$_read[0]) : @_read;
323 9 100       98       $new_found=0 if !defined $new_found;
324 9 100       87       undef $new_val unless $new_found;
325              
326 9 100       111       if($new_found==CMM_keep_expired){
    100          
327             # Use the old value, even though it's expired
328 1         11         $found=$expired;
329 1         11 $expired=0;
330 1         10 $new_found=0;
331                   }elsif($new_found==CMM_keep_expired_refresh){
332             # Use the old value, and update its time so it's not expired anymore
333 1         9         $found=$expired;
334 1         10 $expired=0;
335 1         79 $new_found=0;
336              
337             # Modify the time field in a hideously unmaintainable way
338 1 50       26 substr($self->{_mmap},$off+4,4)=pack 'l',time
339             if $found;
340                   }
341                 }
342              
343 47 100       479     if($found){{
344             # Remove expired item, and pretend we didn't find it
345             # XXX What about dirty expired items???
346 42 100 66     349       if($expired && !($flags & elem_dirty)){
  42         490  
347             # No need to write underlying data, because it's not dirty
348 2         20 my $b_end=$bucket+$self->{bucketsize};
349 2         40 substr($self->{_mmap},$off,$b_end-$off)
350             =substr($self->{_mmap},$off+$size,$b_end-$off-$size).("\0" x $size);
351 2         22 my($filled)=unpack 'l',substr($self->{_mmap},$bucket,$bheadsize);
352 2         18 $filled-=$size;
353 2         31 substr($self->{_mmap},$bucket,$bheadsize)
354             =substr(pack("lx$bheadsize",$filled),0,$bheadsize);
355 2         17 $found=0; # it's expired, so pretend we didn't find anything
356 2         17 last;
357                   }
358             # Swap with previous item unless at head of bucket
359 40 100       672       if($poff){
360 6         58 my $psize=$off-$poff;
361 6         93 substr($self