File Coverage

blib/lib/Convert/Binary/C.pm
Criterion Covered Total %
statement 34 34 100.0
branch 9 10 90.0
condition 3 3 100.0
subroutine 6 6 100.0
pod n/a
total 52 53 98.1


line stmt bran cond sub pod time code
1             ################################################################################
2             #
3             # MODULE: Convert::Binary::C
4             #
5             ################################################################################
6             #
7             # DESCRIPTION: Convert::Binary::C Perl extension module
8             #
9             ################################################################################
10             #
11             # $Project: /Convert-Binary-C $
12             # $Author: mhx $
13             # $Date: 2006/11/02 12:57:01 +0100 $
14             # $Revision: 86 $
15             # $Source: /lib/Convert/Binary/C.pm $
16             #
17             ################################################################################
18             #
19             # Copyright (c) 2002-2006 Marcus Holland-Moritz. All rights reserved.
20             # This program is free software; you can redistribute it and/or modify
21             # it under the same terms as Perl itself.
22             #
23             ################################################################################
24              
25             package Convert::Binary::C;
26              
27 61     61   954 use strict;
  61         1673  
  61         1836  
28 61     61   1064 use DynaLoader;
  61         2793  
  61         1716  
29 61     61   1353 use Carp;
  61         546  
  61         1109  
30 61     61   919 use vars qw( @ISA $VERSION $XS_VERSION $AUTOLOAD );
  61         614  
  61         3554  
31              
32             @ISA = qw(DynaLoader);
33              
34             $VERSION    = do { my @r = '$Snapshot: /Convert-Binary-C/0.67 $' =~ /(\d+\.\d+(?:_\d+)?)/; @r ? $r[0] : '9.99' };
35             $XS_VERSION = $VERSION;
36             $VERSION    = eval $VERSION;
37              
38             bootstrap Convert::Binary::C $XS_VERSION;
39              
40             # Unfortunately, XS AUTOLOAD isn't supported
41             # by stable perl distributions before 5.8.0.
42              
43             sub AUTOLOAD
44             {
45 6200     6200   11589590   my $self = shift;
46 6200         74657   my $opt = $AUTOLOAD;
47 6200 50       303279   ref $self or croak "$self is not an object";
48 6200         159911   $opt =~ s/.*://;
49 6200 100       178656   $opt =~ /^[A-Z]/ or croak "Invalid method $opt called";
50 6199 100       135261   @_ <= 1 or croak "$opt cannot take more than one argument";
51 6198 100 100     80659   unless (@_ or defined wantarray) {
52 21         279     carp "Useless use of $opt in void context";
53 21         580     return;
54               }
55 6177         66729   my @warn;
56               {
57 6177     4   62913     local $SIG{__WARN__} = sub { push @warn, $_[0] };
  6177         213388  
  4         51  
58 6177         129738     $opt = eval { $self->configure( $opt, @_ ) };
  6177         157486  
59               }
60 6177         242186   for my $w (@warn) {
61 4         83     $w =~ s/\s+at.*?C\.pm.*//s;
62 4         54     carp $w;
63               }
64 6177 100       75689   if ($@) {
65 198         3458     $@ =~ s/\s+at.*?C\.pm.*//s;
66 198         2448     croak $@;
67               }
68 5979         87570   $opt;
69             }
70              
71             1;
72              
73             __END__
74            
75             =head1 NAME
76            
77             Convert::Binary::C - Binary Data Conversion using C Types
78            
79             =head1 SYNOPSIS
80            
81             =head2 Simple
82            
83             use Convert::Binary::C;
84            
85             #---------------------------------------------
86             # Create a new object and parse embedded code
87             #---------------------------------------------
88             my $c = Convert::Binary::C->new->parse(<<ENDC);
89            
90             enum Month { JAN, FEB, MAR, APR, MAY, JUN,
91             JUL, AUG, SEP, OCT, NOV, DEC };
92            
93             struct Date {
94             int year;
95             enum Month month;
96             int day;
97             };
98            
99             ENDC
100            
101             #-----------------------------------------------
102             # Pack Perl data structure into a binary string
103             #-----------------------------------------------
104             my $date = { year => 2002, month => 'DEC', day => 24 };
105            
106             my $packed = $c->pack('Date', $date);
107            
108             =head2 Advanced
109            
110             use Convert::Binary::C;
111             use Data::Dumper;
112            
113             #---------------------
114             # Create a new object
115             #---------------------
116             my $c = new Convert::Binary::C ByteOrder => 'BigEndian';
117            
118             #---------------------------------------------------
119             # Add include paths and global preprocessor defines
120             #---------------------------------------------------
121             $c->Include('/usr/lib/gcc-lib/i686-pc-linux-gnu/3.3.6/include',
122             '/usr/include')
123             ->Define(qw( __USE_POSIX __USE_ISOC99=1 ));
124            
125             #----------------------------------
126             # Parse the 'time.h' header file
127             #----------------------------------
128             $c->parse_file('time.h');
129            
130             #---------------------------------------
131             # See which files the object depends on
132             #---------------------------------------
133             print Dumper([$c->dependencies]);
134            
135             #-----------------------------------------------------------
136             # See if struct timespec is defined and dump its definition
137             #-----------------------------------------------------------
138             if ($c->def('struct timespec')) {
139             print Dumper($c->struct('timespec'));
140             }
141            
142             #-------------------------------
143             # Create some binary dummy data
144             #-------------------------------
145             my $data = "binary_test_string";
146            
147             #--------------------------------------------------------
148             # Unpack $data according to 'struct timespec' definition
149             #--------------------------------------------------------
150             if (length($data) >= $c->sizeof('timespec')) {
151             my $perl = $c->unpack('timespec', $data);
152             print Dumper($perl);
153             }
154            
155             #--------------------------------------------------------
156             # See which member lies at offset 5 of 'struct timespec'
157             #--------------------------------------------------------
158             my $member = $c->member('timespec', 5);
159             print "member('timespec', 5) = '$member'\n";
160            
161             =head1 DESCRIPTION
162            
163             Convert::Binary::C is a preprocessor and parser for C type
164             definitions. It is highly configurable and supports
165             arbitrarily complex data structures. Its object-oriented
166             interface has L<C<pack>|/"pack"> and L<C<unpack>|/"unpack"> methods
167             that act as replacements for
168             Perl's L<C<pack>|perlfunc/"pack"> and L<C<unpack>|perlfunc/"unpack"> and
169             allow to use C types instead of a string representation
170             of the data structure for conversion of binary data from and
171             to Perl's complex data structures.
172            
173             Actually, what Convert::Binary::C does is not very different
174             from what a C compiler does, just that it doesn't compile the
175             source code into an object file or executable, but only parses
176             the code and allows Perl to use the enumerations, structs, unions
177             and typedefs that have been defined within your C source for binary
178             data conversion, similar to
179             Perl's L<C<pack>|perlfunc/"pack"> and L<C<unpack>|perlfunc/"unpack">.
180            
181             Beyond that, the module offers a lot of convenience methods
182             to retrieve information about the C types that have been parsed.
183            
184             =head2 Background and History
185            
186             In late 2000 I wrote a real-time debugging interface for an
187             embedded medical device that allowed me to send out data from
188             that device over its integrated Ethernet adapter.
189             The interface was C<printf()>-like, so you could easily send
190             out strings or numbers. But you could also send out what I
191             called I<arbitrary data>, which was intended for arbitrary
192             blocks of the device's memory.
193            
194             Another part of this real-time debugger was a Perl application
195             running on my workstation that gathered all the messages that
196             were sent out from the embedded device. It printed all the
197             strings and numbers, and hex-dumped the arbitrary data.
198             However, manually parsing a couple of 300 byte hex-dumps of a
199             complex C structure is not only frustrating, but also error-prone
200             and time consuming.
201            
202             Using L<C<unpack>|perlfunc/"unpack"> to retrieve the contents
203             of a C structure works fine for small structures and if you
204             don't have to deal with struct member alignment. But otherwise,
205             maintaining such code can be as awful as deciphering hex-dumps.
206            
207             As I didn't find anything to solve my problem on the CPAN,
208             I wrote a little module that translated simple C structs
209             into L<C<unpack>|perlfunc/"unpack"> strings. It worked, but
210             it was slow. And since it couldn't deal with struct member
211             alignment, I soon found myself adding padding bytes everywhere.
212             So again, I had to maintain two sources, and changing one of
213             them forced me to touch the other one.
214            
215             All in all, this little module seemed to make my task a bit
216             easier, but it was far from being what I was thinking of:
217            
218             =over 2
219            
220             =item *
221            
222             A module that could directly use the source I've been coding
223             for the embedded device without any modifications.
224            
225             =item *
226            
227             A module that could be configured to match the properties
228             of the different compilers and target platforms I was using.
229            
230             =item *
231            
232             A module that was fast enough to decode a great amount of
233             binary data even on my slow workstation.
234            
235             =back
236            
237             I didn't know how to accomplish these tasks until I read something
238             about XS. At least, it seemed as if it could solve my performance
239             problems. However, writing a C parser in C isn't easier than it is
240             in Perl. But writing a C preprocessor from scratch is even worse.
241            
242             Fortunately enough, after a few weeks of searching I found both,
243             a lean, open-source C preprocessor library, and a reusable YACC
244             grammar for ANSI-C. That was the beginning of the development of
245             Convert::Binary::C in late 2001.
246            
247             Now, I'm successfully using the module in my embedded environment
248             since long before it appeared on CPAN. From my point of view, it
249             is exactly what I had in mind. It's fast, flexible, easy to use
250             and portable. It doesn't require external programs or other Perl
251             modules.
252            
253             =head2 About this document
254            
255             This document describes how to use Convert::Binary::C. A lot of
256             different features are presented, and the example code sometimes
257             uses Perl's more advanced language elements. If your experience
258             with Perl is rather limited, you should know how to use Perl's
259             very good documentation system.
260            
261             To look up one of the manpages, use the L<C<perldoc>|perldoc> command.
262             For example,
263            
264             perldoc perl
265            
266             will show you Perl's main manpage. To look up a specific Perl
267             function, use C<perldoc -f>:
268            
269             perldoc -f map
270            
271             gives you more information about the L<C<map>|perlfunc/"map"> function.
272             You can also search the FAQ using C<perldoc -q>:
273            
274             perldoc -q array
275            
276             will give you everything you ever wanted to know about Perl
277             arrays. But now, let's go on with some real stuff!
278            
279             =head2 Why use Convert::Binary::C?
280            
281             Say you want to pack (or unpack) data according to the following
282             C structure:
283            
284             struct foo {
285             char ary[3];
286             unsigned short baz;
287             int bar;
288             };
289            
290             You could of course use
291             Perl's L<C<pack>|perlfunc/"pack"> and L<C<unpack>|perlfunc/"unpack"> functions:
292            
293             @ary = (1, 2, 3);
294             $baz = 40000;
295             $bar = -4711;
296             $binary = pack 'c3 S i', @ary, $baz, $bar;
297            
298             But this implies that the struct members are byte aligned. If
299             they were long aligned (which is the default for most compilers),
300             you'd have to write
301            
302             $binary = pack 'c3 x S x2 i', @ary, $baz, $bar;
303            
304             which doesn't really increase readability.
305            
306             Now imagine that you need to pack the data for a completely
307             different architecture with different byte order. You would
308             look into the L<C<pack>|perlfunc/"pack"> manpage again and
309             perhaps come up with this:
310            
311             $binary = pack 'c3 x n x2 N', @ary, $baz, $bar;
312            
313             However, if you try to unpack C<$foo> again, your signed values
314             have turned into unsigned ones.
315            
316             All this can still be managed with Perl. But imagine your
317             structures get more complex? Imagine you need to support
318             different platforms? Imagine you need to make changes to
319             the structures? You'll not only have to change the C source
320             but also dozens of L<C<pack>|perlfunc/"pack"> strings in
321             your Perl code. This is no fun. And Perl should be fun.
322            
323             Now, wouldn't it be great if you could just read in the C
324             source you've already written and use all the types defined
325             there for packing and unpacking? That's what Convert::Binary::C
326             does.
327            
328             =head2 Creating a Convert::Binary::C object
329            
330             To use Convert::Binary::C just say
331            
332             use Convert::Binary::C;
333            
334             to load the module. Its interface is completely object
335             oriented, so it doesn't export any functions.
336            
337             Next, you need to create a new Convert::Binary::C object. This
338             can be done by either
339            
340             $c = Convert::Binary::C->new;
341            
342             or
343            
344             $c = new Convert::Binary::C;
345            
346             You can optionally pass configuration options to
347             the L<constructor|/"new"> as described in the next section.
348            
349             =head2 Configuring the object
350            
351             To configure a Convert::Binary::C object, you can either call
352             the L<C<configure>|/"configure"> method or directly pass the configuration
353             options to the L<constructor|/"new">. If you want to change byte order
354             and alignment, you can use
355            
356             $c->configure(ByteOrder => 'LittleEndian',
357             Alignment => 2);
358            
359             or you can change the construction code to
360            
361             $c = new Convert::Binary::C ByteOrder => 'LittleEndian',
362             Alignment => 2;
363            
364             Either way, the object will now know that it should use
365             little endian (Intel) byte order and 2-byte struct member
366             alignment for packing and unpacking.
367            
368             Alternatively, you can use the option names as names of
369             methods to configure the object, like:
370            
371             $c->ByteOrder('LittleEndian');
372            
373             You can also retrieve information about the current
374             configuration of a Convert::Binary::C object. For details,
375             see the section about the L<C<configure>|/"configure"> method.
376            
377             =head2 Parsing C code
378            
379             Convert::Binary::C allows two ways of parsing C source. Either
380             by parsing external C header or C source files:
381            
382             $c->parse_file('header.h');
383            
384             Or by parsing C code embedded in your script:
385            
386             $c->parse(<<'CCODE');
387             struct foo {
388             char ary[3];
389             unsigned short baz;
390             int bar;
391             };
392             CCODE
393            
394             Now the object C<$c> will know everything about C<struct foo>.
395             The example above uses a so-called here-document. It allows to
396             easily embed multi-line strings in your code. You can find more
397             about here-documents in L<perldata> or L<perlop>.
398            
399             Since the L<C<parse>|/"parse"> and L<C<parse_file>|/"parse_file"> methods
400             throw an exception when a parse error occurs, you usually want to catch
401             these in an C<eval> block:
402            
403             eval { $c->parse_file('header.h') };
404             if ($@) {
405             # handle error appropriately
406             }
407            
408             Perl's special C<$@> variable will contain an empty string (which
409             evaluates to a false value in boolean context) on success or
410             an error string on failure.
411            
412             As another feature, L<C<parse>|/"parse"> and L<C<parse_file>|/"parse_file"> return
413             a reference to their object on success, just like L<C<configure>|/"configure"> does
414             when you're configuring the object. This will allow you to write constructs
415             like this:
416            
417             my $c = eval {
418             Convert::Binary::C->new(Include => ['/usr/include'])
419             ->parse_file('header.h')
420             };
421             if ($@) {
422             # handle error appropriately
423             }
424            
425             =head2 Packing and unpacking
426            
427             Convert::Binary::C has two methods, L<C<pack>|/"pack"> and L<C<unpack>|/"unpack">,
428             that act similar to the functions of same denominator in Perl.
429             To perform the packing described in the example above,
430             you could write:
431            
432             $data = {
433             ary => [1, 2, 3],
434             baz => 40000,
435             bar => -4711,
436             };
437             $binary = $c->pack('foo', $data);
438            
439             Unpacking will work exactly the same way, just that
440             the L<C<unpack>|/"unpack"> method will take a byte string as its input
441             and will return a reference to a (possibly very complex)
442             Perl data structure.
443            
444             $binary = get_data_from_memory();
445             $data = $c->unpack('foo', $binary);
446            
447             You can now easily access all of the values:
448            
449             print "foo.ary[1] = $data->{ary}[1]\n";
450            
451             Or you can even more conveniently use
452             the L<Data::Dumper|Data::Dumper> module:
453            
454             use Data::Dumper;
455             print Dumper($data);
456            
457             The output would look something like this:
458            
459             $VAR1 = {
460             'bar' => -271,
461             'baz' => 5000,
462             'ary' => [
463             42,
464             48,
465             100
466             ]
467             };
468            
469             =head2 Preprocessor configuration
470            
471             Convert::Binary::C uses Thomas Pornin's C<ucpp> as an internal
472             C preprocessor. It is compliant to ISO-C99, so you don't have
473             to worry about using even weird preprocessor constructs in
474             your code.
475            
476             If your C source contains includes or depends upon preprocessor
477             defines, you may need to configure the internal preprocessor.
478             Use the C<Include> and C<Define> configuration options for that:
479            
480             $c->configure(Include => ['/usr/include',
481             '/home/mhx/include'],
482             Define => [qw( NDEBUG FOO=42 )]);
483            
484             If your code uses system includes, it is most likely
485             that you will need to define the symbols that are usually
486             defined by the compiler.
487            
488             On some operating systems, the system includes require the
489             preprocessor to predefine a certain set of assertions.
490             Assertions are supported by C<ucpp>, and you can define them
491             either in the source code using C<#assert> or as a property
492             of the Convert::Binary::C object using C<Assert>:
493            
494             $c->configure(Assert => ['predicate(answer)']);
495            
496             Information about defined macros can be retrieved from the
497             preprocessor as long as its configuration isn't changed. The
498             preprocessor is implicitly reset if you change one of the
499             following configuration options:
500            
501             Include
502             Define
503             Assert
504             HasCPPComments
505             HasMacroVAARGS
506            
507             =head2 Supported pragma directives
508            
509             Convert::Binary::C supports the C<pack> pragma to locally override
510             struct member alignment. The supported syntax is as follows:
511            
512             =over 4
513            
514             =item #pragma pack( ALIGN )
515            
516             Sets the new alignment to ALIGN.
517            
518             =item #pragma pack
519            
520             Resets the alignment to its original value.
521            
522             =item #pragma pack( push, ALIGN )
523            
524             Saves the current alignment on a stack and sets the new
525             alignment to ALIGN.
526            
527             =item #pragma pack( pop )
528            
529             Restores the alignment to the last value saved on the
530             stack.
531            
532             =back
533            
534             /* Example assumes sizeof( short ) == 2, sizeof( long ) == 4. */
535            
536             #pragma pack(1)
537            
538             struct nopad {
539             char a; /* no padding bytes between 'a' and 'b' */
540             long b;
541             };
542            
543             #pragma pack /* reset to "native" alignment */
544            
545             #pragma pack( push, 2 )
546            
547             struct pad {
548             char a; /* one padding byte between 'a' and 'b' */
549             long b;
550            
551             #pragma pack( push, 1 )
552            
553             struct {
554             char c; /* no padding between 'c' and 'd' */
555             short d;
556             } e; /* sizeof( e ) == 3 */
557            
558             #pragma pack( pop ); /* back to pack( 2 ) */
559            
560             long f; /* one padding byte between 'e' and 'f' */
561             };
562            
563             #pragma pack( pop ); /* back to "native" */
564            
565             The C<pack> pragma as it is currently implemented only affects
566             the I<maximum> struct member alignment. There are compilers
567             that also allow to specify the I<minimum> struct member
568             alignment. This is not supported by Convert::Binary::C.
569            
570             =head2 Automatic configuration using C<ccconfig>
571            
572             As there are over 20 different configuration options, setting
573             all of them correctly can be a lengthy and tedious task.
574            
575             The L<C<ccconfig>|ccconfig> script, which is bundled with this
576             module, aims at automatically determining the correct compiler
577             configuration by testing the compiler executable. It works for
578             both, native and cross compilers.
579            
580             =head1 UNDERSTANDING TYPES
581            
582             This section covers one of the fundamental features of
583             Convert::Binary::C. It's how I<type expressions>, referred to
584             as TYPEs in the L<method reference|/"METHODS">, are handled
585             by the module.
586            
587             Many of the methods,
588             namely L<C<pack>|/"pack">, L<C<unpack>|/"unpack">, L<C<sizeof>|/"sizeof">, L<C<typeof>|/"typeof">, L<C<member>|/"member">, L<C<offsetof>|/"offsetof">, L<C<def>|/"def">, L<C<initializer>|/"initializer"> and L<C<tag>|/"tag">,
589             are passed a TYPE to operate on as their first argument.
590            
591             =head2 Standard Types
592            
593             These are trivial. Standard types are simply enum names, struct
594             names, union names, or typedefs. Almost every method that wants
595             a TYPE will accept a standard type.
596            
597             For enums, structs and unions, the prefixes C<enum>, C<struct> and C<union> are
598             optional. However, if a typedef with the same name exists, like in
599            
600             struct foo {
601             int bar;
602             };
603            
604             typedef int foo;
605            
606             you will have to use the prefix to distinguish between the
607             struct and the typedef. Otherwise, a typedef is always given
608             preference.
609            
610             =head2 Basic Types
611            
612             Basic types, or atomic types, are C<int> or C<char>, for example.
613             It's possible to use these basic types without having parsed any
614             code. You can simply do
615            
616             $c = new Convert::Binary::C;
617             $size = $c->sizeof('unsigned long');
618             $data = $c->pack('short int', 42);
619            
620             Even though the above works fine, it is not possible to define
621             more complex types on the fly, so
622            
623             $size = $c->sizeof('struct { int a, b; }');
624            
625             will result in an error.
626            
627             Basic types are not supported by all methods. For example, it makes
628             no sense to use L<C<member>|/"member"> or L<C<offsetof>|/"offsetof"> on
629             a basic type. Using L<C<typeof>|/"typeof"> isn't very useful, but
630             supported.
631            
632             =head2 Member Expressions
633            
634             This is by far the most complex part, depending on the complexity of
635             your data structures. Any L<standard type|/"Standard Types"> that
636             defines a compound or an array may be followed by a member expression
637             to select only a certain part of the data type. Say you have parsed the
638             following C code:
639            
640             struct foo {
641             long type;
642             struct {
643             short x, y;
644             } array[20];
645             };
646            
647             typedef struct foo matrix[8][8];
648            
649             You may want to know the size of the C<array> member of C<struct foo>.
650             This is quite easy:
651            
652             print $c->sizeof('foo.array'), " bytes";
653            
654             will print
655            
656             80 bytes
657            
658             depending of course on the C<ShortSize> you configured.
659            
660             If you wanted to unpack only a single column of C<matrix>, that's
661             easy as well (and of course it doesn't matter which index you use):
662            
663             $column = $c->unpack('matrix[2]', $data);
664            
665             Just like in C, it is possible to use out-of-bounds array indices.
666             This means that, for example, despite C<array> is declared to have
667             20 elements, the following code
668            
669             $size = $c->sizeof('foo.array[4711]');
670             $offset = $c->offsetof('foo', 'array[-13]');
671            
672             is perfectly valid and will result in:
673            
674             $size = 4
675             $offset = -48
676            
677             Member expressions can be arbitrarily complex:
678            
679             $type = $c->typeof('matrix[2][3].array[7].y');
680             print "the type is $type";
681            
682             will, for example, print
683            
684             the type is short
685            
686             Member expressions are also used as the second argument
687             to L<C<offsetof>|/"offsetof">.
688            
689             =head2 Offsets
690            
691             Members returned by the L<C<member>|/"member"> method have an optional
692             offset suffix to indicate that the given offset doesn't point to the
693             start of that member. For example,
694            
695             $member = $c->member('matrix', 1431);
696             print $member;
697            
698             will print
699            
700             [2][1].type+3
701            
702             If you would use this as a member expression, like in
703            
704             $size = $c->sizeof("matrix $member");
705            
706             the offset suffix will simply be ignored. Actually, it will be
707             ignored for all methods if it's used in the first argument.
708            
709             When used in the second argument to L<C<offsetof>|/"offsetof">,
710             it will usually do what you mean, i. e. the offset suffix, if
711             present, will be considered when determining the offset. This
712             behaviour ensures that
713            
714             $member = $c->member('foo', 43);
715             $offset = $c->offsetof('foo', $member);
716             print "'$member' is located at offset $offset of struct foo";
717            
718             will always correctly set C<$offset>:
719            
720             '.array[9].y+1' is located at offset 43 of struct foo
721            
722             If this is not what you mean, e.g. because you want to know the
723             offset where the member returned by L<C<member>|/"member"> starts,
724             you just have to remove the suffix:
725            
726             $member =~ s/\+\d+$//;
727             $offset = $c->offsetof('foo', $member);
728             print "'$member' starts at offset $offset of struct foo";
729            
730             This would then print:
731            
732             '.array[9].y' starts at offset 42 of struct foo
733            
734             =head1 USING TAGS
735            
736             In a nutshell, tags are properties that you can attach to types.
737            
738             You can add tags to types using the L<C<tag>|/"tag"> method,
739             and remove them using L<C<tag>|/"tag"> or L<C<untag>|/"untag">,
740             for example:
741            
742             # Attach 'Format' and 'Hooks' tags
743             $c->tag('type', Format => 'String', Hooks => { pack => \&rout });
744            
745             $c->untag('type', 'Format'); # Remove only 'Format' tag
746             $c->untag('type'); # Remove all tags
747            
748             You can also use L<C<tag>|/"tag"> to see which tags are
749             attached to a type, for example:
750            
751             $tags = $c->tag('type');
752            
753             This would give you:
754            
755             $tags = {
756             'Hooks' => {
757             'pack' => \&rout
758             },
759             'Format' => 'String'
760             };
761            
762             Currently, there are only a couple of different tags that
763             influence the way data is packed and unpacked. There are
764             probably more tags to come in the future.
765            
766             =head2 The Format Tag
767            
768             One of the tags currently available is the C<Format> tag.
769             Using this tag, you can tell a Convert::Binary::C object to
770             pack and unpack a certain data type in a special way.
771            
772             For example, if you have a (fixed length) string type
773            
774             typedef char str_type[40];
775            
776             this type would, by default, be unpacked as an array
777             of C<char>s. That's because it B<is> only an array
778             of C<char>s, and Convert::Binary::C doesn't know it is
779             actually used as a string.
780            
781             But you can tell Convert::Binary::C that C<str_type> is
782             a C string using the C<Format> tag:
783            
784             $c->tag('str_type', Format => 'String');
785            
786             This will make L<C<unpack>|/"unpack"> (and of course
787             also L<C<pack>|/"pack">) treat the binary data like a
788             null-terminated C string:
789            
790             $binary = "Hello World!\n\0 this is just some dummy data";
791             $hello = $c->unpack('str_type', $binary);
792             print $hello;
793            
794             would thusly print:
795            
796             Hello World!
797            
798             Of course, this also works the other way round:
799            
800             use Data::Hexdumper;
801            
802             $binary = $c->pack('str_type', "Just another C::B::C hacker");
803             print hexdump(data => $binary);
804            
805             would print:
806            
807             0x0000 : 4A 75 73 74 20 61 6E 6F 74 68 65 72 20 43 3A 3A : Just.another.C::
808             0x0010 : 42 3A 3A 43 20 68 61 63 6B 65 72 00 00 00 00 00 : B::C.hacker.....
809             0x0020 : 00 00 00 00 00 00 00 00 : ........
810            
811             If you want Convert::Binary::C to not interpret the binary
812             data at all, you can set the C<Format> tag to C<Binary>.
813             This might not be seem very useful,
814             as L<C<pack>|/"pack"> and L<C<unpack>|/"unpack"> would
815             just pass through the unmodified binary data.
816             But you can tag not only whole types, but also compound
817             members. For example
818            
819             $c->parse(<<ENDC);
820             struct packet {
821             unsigned short header;
822             unsigned short flags;
823             unsigned char payload[28];
824             };
825             ENDC
826            
827             $c->tag('packet.payload', Format => 'Binary');
828            
829             would allow you to write:
830            
831             read FILE, $payload, $c->sizeof('packet.payload');
832            
833             $packet = {
834             header => 4711,
835             flags => 0xf00f,
836             payload => $payload,
837             };
838            
839             $binary = $c->pack('packet', $packet);
840            
841             print hexdump(data => $binary);
842            
843             This would print something like:
844            
845             0x0000 : 12 67 F0 0F 6E 6F 0A 6E 6F 0A 6E 6F 0A 6E 6F 0A : .g..no.no.no.no.
846             0x0010 : 6E 6F 0A 6E 6F 0A 6E 6F 0A 6E 6F 0A 6E 6F 0A 6E : no.no.no.no.no.n
847            
848             For obvious reasons, it is not allowed to attach a C<Format> tag
849             to bitfield members. Trying to do so will result in an exception
850             being thrown by the L<C<tag>|/"tag"> method.
851            
852             =head2 The ByteOrder Tag
853            
854             The C<ByteOrder> tag allows you to override the byte order of
855             certain types or members. The implementation of this tag is
856             considered B<experimental> and may be subject to changes in the
857             future.
858            
859             Usually it doesn't make much sense to override the byte order,
860             but there may be applications where a sub-structure is packed
861             in a different byte order than the surrounding structure.
862            
863             Take, for example, the following code:
864            
865             $c = Convert::Binary::C->new(ByteOrder => 'BigEndian',
866             OrderMembers => 1);
867             $c->parse(<<'ENDC');
868            
869             typedef unsigned short u_16;
870            
871             struct coords_3d {
872             long x, y, z;
873             };
874            
875             struct coords_msg {
876             u_16 header;
877             u_16 length;
878             struct coords_3d coords;
879             };
880            
881             ENDC
882            
883             Assume that while C<coords_msg> is big endian, the embedded
884             coordinates C<coords_3d> are stored in little endian format
885             for some reason. In C, you'll have to handle this manually.
886            
887             But using Convert::Binary::C, you can simply attach
888             a C<ByteOrder> tag to either the C<coords_3d> structure or to
889             the C<coords> member of the C<coords_msg> structure. Both
890             will work in this case. The only difference is that if you
891             tag the C<coords> member, C<coords_3d> will only be treated
892             as little endian if you L<C<pack>|/"pack"> or L<C<unpack>|/"unpack"> the
893             C<coords_msg> structure. (BTW, you could also tag all members
894             of C<coords_3d> individually, but that would be inefficient.)
895            
896             So, let's attach the C<ByteOrder> tag to the C<coords> member:
897            
898             $c->tag('coords_msg.coords', ByteOrder => 'LittleEndian');
899            
900             Assume the following binary message:
901            
902             0x0000 : 00 2A 00 0C FF FF FF FF 02 00 00 00 2A 00 00 00 : .*..........*...
903            
904             If you unpack this message...
905            
906             $msg = $c->unpack('coords_msg', $binary);
907            
908             ...you will get the following data structure:
909            
910             $msg = {
911             'header' => 42,
912             'length' => 12,
913             'coords' => {
914             'x' => -1,
915             'y' => 2,
916             'z' => 42
917             }
918             };
919            
920             Without the C<ByteOrder> tag, you would get:
921            
922             $msg = {
923             'header' => 42,
924             'length' => 12,
925             'coords' => {
926             'x' => -1,
927             'y' => 33554432,
928             'z' => 704643072
929             }
930             };
931            
932             The C<ByteOrder> tag is a I<recursive> tag, i.e. it applies
933             to all children of the tagged object recursively. Of course,
934             it is also possible to override a C<ByteOrder> tag by attaching
935             another C<ByteOrder> tag to a child type. Confused? Here's an
936             example. In addition to tagging the C<coords> member as little
937             endian, we now tag C<coords_3d.y> as big endian:
938            
939             $c->tag('coords_3d.y', ByteOrder => 'BigEndian');
940             $msg = $c->unpack('coords_msg', $binary);
941            
942             This will return the following data structure:
943            
944             $msg = {
945             'header' => 42,
946             'length' => 12,
947             'coords' => {
948             'x' => -1,
949             'y' => 33554432,
950             'z' => 42
951             }
952             };
953            
954             Note that if you tag both a type and a member of that type
955             within a compound, the tag attached to the type itself has
956             higher precedence. Using the example above, if you would attach
957             a C<ByteOrder> tag to both C<coords_msg.coords> and C<coords_3d>,
958             the tag attached to C<coords_3d> would always win.
959            
960             Also note that the C<ByteOrder> tag might not work as expected
961             along with bitfields, which is why the implementation is considered
962             experimental. Bitfields are currently B<not> affected by
963             the C<ByteOrder> tag at all. This is because the byte order
964             would affect the bitfield layout, and a consistent implementation
965             supporting multiple layouts of the same struct would be quite
966             bulky and probably slow down the whole module.
967            
968             If you really need the correct behaviour, you can use the
969             following trick:
970            
971             $le = Convert::Binary::C->new(ByteOrder => 'LittleEndian');
972            
973             $le->parse(<<'ENDC');
974            
975             typedef unsigned short u_16;
976             typedef unsigned long u_32;
977            
978             struct message {
979             u_16 header;
980             u_16 length;
981             struct {
982             u_32 a;
983             u_32 b;
984             u_32 c : 7;
985             u_32 d : 5;
986             u_32 e : 20;
987             } data;
988             };
989            
990             ENDC
991            
992             $be = $le->clone->ByteOrder('BigEndian');
993            
994             $le->tag('message.data', Format => 'Binary', Hooks => {
995             unpack => sub { $be->unpack('message.data', @_) },
996             pack => sub { $be->pack('message.data', @_) },
997             });
998            
999            
1000             $msg = $le->unpack('message', $binary);
1001            
1002             This uses the L<C<Format>|/"The Format Tag"> and L<C<Hooks>|/"The Hooks Tag"> tags
1003             along with a big endian L<C<clone>|/"clone"> of the original
1004             little endian object. It attaches hooks to the little endian
1005             object and in the hooks it uses the big endian object
1006             to L<C<pack>|/"pack"> and L<C<unpack>|/"unpack"> the binary data.
1007            
1008             =head2 The Dimension Tag
1009            
1010             The C<Dimension> tag allows you to override the declared dimension
1011             of an array for packing or unpacking data. The implementation of
1012             this tag is considered B<very experimental> and will B<definitely change> in
1013             a future release.
1014            
1015             That being said, the C<Dimension> tag is primarily useful to support
1016             variable length arrays. Usually, you have to write the following code
1017             for such a variable length array in C:
1018            
1019             struct c_message
1020             {
1021             unsigned count;
1022             char data[1];
1023             };
1024            
1025             So, because you cannot declare an empty array, you declare an array
1026             with a single element. If you have a ISO-C99 compliant compiler,
1027             you can write this code instead:
1028            
1029             struct c99_message
1030             {
1031             unsigned count;
1032             char data[];
1033             };
1034            
1035             This explicitly tells the compiler that C<data> is a flexible array
1036             member. Convert::Binary::C already uses this information to
1037             handle L<flexible array members|/"FLEXIBLE ARRAY MEMBERS AND INCOMPLETE TYPES"> in
1038             a special way.
1039            
1040             As you can see in the following example, the two types are treated
1041             differently:
1042            
1043             $data = pack 'NC*', 3, 1..8;
1044             $uc = $c->unpack('c_message', $data);
1045             $uc99 = $c->unpack('c99_message', $data);
1046            
1047             This will result in:
1048            
1049             $uc = {'count' => 3,'data' => [1]};
1050             $uc99 = {'count' => 3,'data' => [1,2,3,4,5,6,7,8]};
1051            
1052             However, only few compilers support ISO-C99, and you probably don't want
1053             to change your existing code only to get some extra features when
1054             using Convert::Binary::C.
1055            
1056             So it is possible to attach a tag to the C<data> member of
1057             the C<c_message> struct that tells Convert::Binary::C to treat
1058             the array as if it were flexible:
1059            
1060             $c->tag('c_message.data', Dimension => '*');
1061            
1062             Now both C<c_message> and C<c99_message> will behave exactly the
1063             same when using L<C<pack>|/"pack"> or L<C<unpack>|/"unpack">.
1064             Repeating the above code:
1065            
1066             $uc = $c->unpack('c_message', $data);
1067            
1068             This will result in:
1069            
1070             $uc = {'count' => 3,'data' => [1,2,3,4,5,6,7,8]};
1071            
1072             But there's more you can do. Even though it probably doesn't
1073             make much sense, you can tag a fixed dimension to an array:
1074            
1075             $c->tag('c_message.data', Dimension => '5');
1076            
1077             This will obviously result in:
1078            
1079             $uc = {'count' => 3,'data' => [1,2,3,4,5]};
1080            
1081             A more useful way to use the C<Dimension> tag is to set it to
1082             the name of a member in the same compound:
1083            
1084             $c->tag('c_message.data', Dimension => 'count');
1085            
1086             Convert::Binary::C will now use the value of that member to
1087             determine the size of the array, so unpacking will result in:
1088            
1089             $uc = {'count' => 3,'data' => [1,2,3]};
1090            
1091             Of course, you can also tag flexible array members. And yes,
1092             it's also possible to use more complex member expressions:
1093            
1094             $c->parse(<<ENDC);
1095             struct msg_header
1096             {
1097             unsigned len[2];
1098             };
1099            
1100             struct more_complex
1101             {
1102             struct msg_header hdr;
1103             char data[];
1104             };
1105             ENDC
1106            
1107             $data = pack 'NNC*', 42, 7, 1 .. 10;
1108            
1109             $c->tag('more_complex.data', Dimension => 'hdr.len[1]');
1110            
1111             $u = $c->unpack('more_complex', $data);
1112            
1113             The result will be:
1114            
1115             $u = {
1116             'hdr' => {
1117             'len' => [
1118             42,
1119             7
1120             ]
1121             },
1122             'data' => [
1123             1,
1124             2,
1125             3,
1126             4,
1127             5,
1128             6,
1129             7
1130             ]
1131             };
1132            
1133             By the way, it's also possible to tag arrays that are not
1134             embedded inside a compound:
1135            
1136             $c->parse(<<ENDC);
1137             typedef unsigned short short_array[];
1138             ENDC
1139            
1140             $c->tag('short_array', Dimension => '5');
1141            
1142             $u = $c->unpack('short_array', $data);
1143            
1144             Resulting in:
1145            
1146             $u = [0,42,0,7,258];
1147            
1148             The final and most powerful way to define a C<Dimension> tag is
1149             to pass it a subroutine reference. The referenced subroutine can
1150             execute whatever code is neccessary to determine the size of the
1151             tagged array:
1152            
1153             sub get_size
1154             {
1155             my $m = shift;
1156             return $m->{hdr}{len}[0] / $m->{hdr}{len}[1];
1157             }
1158            
1159             $c->tag('more_complex.data', Dimension => \&get_size);
1160            
1161             $u = $c->unpack('more_complex', $data);
1162            
1163             As you can guess from the above code, the subroutine is being passed
1164             a reference to hash that stores the already unpacked part of the
1165             compound embedding the tagged array. This is the result:
1166            
1167             $u = {
1168             'hdr' => {
1169             'len' => [
1170             42,
1171             7
1172             ]
1173             },
1174             'data' => [
1175             1,
1176             2,
1177             3,
1178             4,
1179             5,
1180             6
1181             ]
1182             };
1183            
1184             You can also pass custom arguments to the subroutines by using
1185             the L<C<arg>|/"arg"> method. This is similar to the functionality
1186             offered by the L<C<Hooks>|/"The Hooks Tag"> tag.
1187            
1188             Of course, all that also works for the L<C<pack>|/"pack"> method
1189             as well.
1190            
1191             However, the current implementation has at least one shortcomings,
1192             which is why it's experimental: The C<Dimension> tag doesn't impact
1193             compound layout. This means that while you can alter the size of an
1194             array in the middle of a compound, the offset of the members after
1195             that array won't be impacted. I'd rather like to see the layout adapt
1196             dynamically, so this is what I'm hoping to implement in the future.
1197            
1198             =head2 The Hooks Tag
1199            
1200             Hooks are a special kind of tag that can be extremely useful.
1201            
1202             Using hooks, you can easily override the
1203             way L<C<pack>|/"pack"> and L<C<unpack>|/"unpack"> handle data
1204             using your own subroutines.
1205             If you define hooks for a certain data type, each time this
1206             data type is processed the corresponding hook will be called
1207             to allow you to modify that data.
1208            
1209             =head3 Basic Hooks
1210            
1211             Here's an example. Let's assume the following C code has been
1212             parsed:
1213            
1214             typedef unsigned long u_32;
1215             typedef u_32 ProtoId;
1216             typedef ProtoId MyProtoId;
1217            
1218             struct MsgHeader {
1219             MyProtoId id;
1220             u_32 len;
1221             };
1222            
1223             struct String {
1224             u_32 len;
1225             char buf[];
1226             };
1227            
1228             You could now use the types above and, for example, unpack
1229             binary data representing a C<MsgHeader> like this:
1230            
1231             $msg_header = $c->unpack('MsgHeader', $data);
1232            
1233             This would give you:
1234            
1235             $msg_header = {
1236             'len' => 13,
1237             'id' => 42
1238             };
1239            
1240             Instead of dealing with C<ProtoId>'s as integers, you would
1241             rather like to have them as clear text. You could provide
1242             subroutines to convert between clear text and integers:
1243            
1244             %proto = (
1245             CATS => 1,
1246             DOGS => 42,
1247             HEDGEHOGS => 4711,
1248             );
1249            
1250             %rproto = reverse %proto;
1251            
1252             sub ProtoId_unpack {
1253             $rproto{$_[0]} || 'unknown protocol'
1254             }
1255            
1256             sub ProtoId_pack {
1257             $proto{$_[0]} or die 'unknown protocol'
1258             }
1259            
1260             You can now register these subroutines by attaching a C<Hooks> tag
1261             to C<ProtoId> using the L<C<tag>|/"tag"> method:
1262            
1263             $c->tag('ProtoId', Hooks => { pack => \&ProtoId_pack,
1264             unpack => \&ProtoId_unpack });
1265            
1266             Doing exactly the same unpack on C<MsgHeader> again would
1267             now return:
1268            
1269             $msg_header = {
1270             'len' => 13,
1271             'id' => 'DOGS'
1272             };
1273            
1274             Actually, if you don't need the reverse operation, you don't even
1275             have to register a C<pack> hook. Or, even better, you can have a
1276             more intelligent C<unpack> hook that creates a dual-typed variable:
1277            
1278             use Scalar::Util qw(dualvar);
1279            
1280             sub ProtoId_unpack2 {
1281             dualvar $_[0], $rproto{$_[0]} || 'unknown protocol'
1282             }
1283            
1284             $c->tag('ProtoId', Hooks => { unpack => \&ProtoId_unpack2 });
1285            
1286             $msg_header = $c->unpack('MsgHeader', $data);
1287            
1288             Just as before, this would print
1289            
1290             $msg_header = {
1291             'len' => 13,
1292             'id' => 'DOGS'
1293             };
1294            
1295             but without requiring a C<pack> hook for packing, at least as
1296             long as you keep the variable dual-typed.
1297            
1298             Hooks are usually called with exactly one argument, which is the
1299             data that should be processed (see L<"Advanced Hooks"> for details
1300             on how to customize hook arguments). They are called in scalar
1301             context and expected to return the processed data.
1302            
1303             To get rid of registered hooks, you can either undefine only
1304             certain hooks
1305            
1306             $c->tag('ProtoId', Hooks => { pack => undef });
1307            
1308             or all hooks:
1309            
1310             $c->tag('ProtoId', Hooks => undef);
1311            
1312             Of course, hooks are not restricted to handling integer values.
1313             You could just as well attach hooks for the C<String> struct from
1314             the code above. A useful example would be to have these hooks:
1315            
1316             sub string_unpack {
1317             my $s = shift;
1318             pack "c$s->{len}", @{$s->{buf}};
1319             }
1320            
1321             sub string_pack {
1322             my $s = shift;
1323             return {
1324             len => length $s,
1325             buf => [ unpack 'c*', $s ],
1326             }
1327             }
1328            
1329             (Don't be confused by the fact that the C<unpack> hook
1330             uses C<pack> and the C<pack> hook uses C<unpack>.
1331             And also see L<"Advanced Hooks"> for a more clever approach.)
1332            
1333             While you would normally get the following output when unpacking
1334             a C<String>
1335            
1336             $string = {
1337             'len' => 12,
1338             'buf' => [
1339             72,
1340             101,
1341             108,
1342             108,
1343             111,
1344             32,
1345             87,
1346             111,
1347             114,
1348             108,
1349             100,
1350             33
1351             ]
1352             };
1353            
1354             you could just register the hooks using
1355            
1356             $c->tag('String', Hooks => { pack => \&string_pack,
1357             unpack => \&string_unpack });
1358            
1359             and you would get a nice human-readable Perl string:
1360            
1361             $string = 'Hello World!';
1362            
1363             Packing a string turns out to be just as easy:
1364            
1365             use Data::Hexdumper;
1366            
1367             $data = $c->pack('String', 'Just another Perl hacker,');
1368            
1369             print hexdump(data => $data);
1370            
1371             This would print:
1372            
1373             0x0000 : 00 00 00 19 4A 75 73 74 20 61 6E 6F 74 68 65 72 : ....Just.another
1374             0x0010 : 20 50 65 72 6C 20 68 61 63 6B 65 72 2C : .Perl.hacker,
1375            
1376             If you want to find out if or which hooks are registered for
1377             a certain type, you can also use the L<C<tag>|/"tag"> method:
1378            
1379             $hooks = $c->tag('String', 'Hooks');
1380            
1381             This would return:
1382            
1383             $hooks = {
1384             'unpack' => \&string_unpack,
1385             'pack' => \&string_pack
1386             };
1387            
1388             =head3 Advanced Hooks
1389            
1390             It is also possible to combine hooks with using the C<Format> tag.
1391             This can be useful if you know better than Convert::Binary::C how
1392             to interpret the binary data. In the previous section, we've handled
1393             this type
1394            
1395             struct String {
1396             u_32 len;
1397             char buf[];
1398             };
1399            
1400             with the following hooks:
1401            
1402             sub string_unpack {
1403             my $s = shift;
1404             pack "c$s->{len}", @{$s->{buf}};
1405             }
1406            
1407             sub string_pack {
1408             my $s = shift;
1409             return {
1410             len => length $s,
1411             buf => [ unpack 'c*', $s ],
1412             }
1413             }
1414            
1415             $c->tag('String', Hooks => { pack => \&string_pack,
1416             unpack => \&string_unpack });
1417            
1418             As you can see in the hook code, C<buf> is expected to be an array
1419             of characters. For the L<C<unpack>|/"unpack"> case Convert::Binary::C
1420             first turns the binary data into a Perl array, and then the hook packs
1421             it back into a string. The intermediate array creation and destruction
1422             is completely useless.
1423             Same thing, of course, for the L<C<pack>|/"pack"> case.
1424            
1425             Here's a clever way to handle this. Just tag C<buf> as binary
1426            
1427             $c->tag('String.buf', Format => 'Binary');
1428            
1429             and use the following hooks instead:
1430            
1431             sub string_unpack2 {
1432             my $s = shift;
1433             substr $s->{buf}, 0, $s->{len};
1434             }
1435            
1436             sub string_pack2 {
1437             my $s = shift;
1438             return {
1439             len => length $s,
1440             buf => $s,
1441             }
1442             }
1443            
1444             $c->tag('String', Hooks => { pack => \&string_pack2,
1445             unpack => \&string_unpack2 });
1446            
1447             This will be exactly equivalent to the old code, but faster and
1448             probably even much easier to understand.
1449            
1450             But hooks are even more powerful. You can customize the arguments
1451             that are passed to your hooks and you can use L<C<arg>|/"arg"> to
1452             pass certain special arguments, such as the name of the type that
1453             is currently being processed by the hook.
1454            
1455             The following example shows how it is easily possible to peek into
1456             the perl internals using hooks.
1457            
1458             use Config;
1459            
1460             $c = new Convert::Binary::C %CC, OrderMembers => 1;
1461             $c->Include(["$Config{archlib}/CORE", @{$c->Include}]);
1462             $c->parse(<<ENDC);
1463             #include "EXTERN.h"
1464             #include "perl.h"
1465             ENDC
1466            
1467             $c->tag($_, Hooks => { unpack_ptr => [\&unpack_ptr,
1468             $c->arg(qw(SELF TYPE DATA))] })
1469             for qw( XPVAV XPVHV MAGIC MGVTBL HV );
1470            
1471             First, we add the perl core include path and parse F<perl.h>. Then,
1472             we add an C<unpack_ptr> hook for a couple of the internal data types.
1473            
1474             The C<unpack_ptr> and C<pack_ptr> hooks are called whenever a pointer
1475             to a certain data structure is processed. This is by far the most
1476             experimental part of the hooks feature, as this includes B<any> kind
1477             of pointer. There's no way for the hook to know the difference between
1478             a plain pointer, or a pointer to a pointer, or a pointer to an array
1479             (this is because the difference doesn't matter anywhere else in
1480             Convert::Binary::C).
1481            
1482             But the hook above makes use of another very interesting feature: It
1483             uses L<C<arg>|/"arg"> to pass special arguments to the hook subroutine.
1484             Usually, the hook subroutine is simply passed a single data argument.
1485             But using the above definition, it'll get a reference to the calling
1486             object (C<SELF>), the name of the type being processed (C<TYPE>) and
1487             the data (C<DATA>).
1488            
1489             But how does our hook look like?
1490            
1491             sub unpack_ptr {
1492             my($self, $type, $ptr) = @_;
1493             $ptr or return '<NULL>';
1494             my $size = $self->sizeof($type);
1495             $self->unpack($type, unpack("P$size", pack('I', $ptr)));
1496             }
1497            
1498             As you can see, the hook is rather simple. First, it receives the
1499             arguments mentioned above. It performs a quick check if the pointer
1500             is C<NULL> and shouldn't be processed any further. Next, it determines
1501             the size of the type being processed. And finally, it'll just use
1502             the C<P>I<n> unpack template to read from that memory location and
1503             recursively call L<C<unpack>|/"unpack"> to unpack the type. (And yes,
1504             this may of course again call other hooks.)
1505            
1506             Now, let's test that:
1507            
1508             my $ref = bless ["Boo!"], "Foo::Bar";
1509             my $ptr = hex(("$ref" =~ /\(0x([[:xdigit:]]+)\)$/)[0]);
1510            
1511             print Dumper(unpack_ptr($c, 'AV', $ptr));
1512            
1513             Just for the fun of it, we create a blessed array reference. But how
1514             do we get a pointer to the corresponding C<AV>? This is rather easy,
1515             as the address of the C<AV> is just the hex value that appears when
1516             using the array reference in string context. So we just grab that and
1517             turn it into decimal. All that's left to do is just call our hook,
1518             as it can already handle C<AV> pointers. And this is what we get:
1519            
1520             $VAR1 = {
1521             'sv_any' => {
1522             'xnv_u' => {
1523             'xnv_nv' => '0',
1524             'xgv_stash' => '<NULL>'
1525             },
1526             'xav_fill' => 0,
1527             'xav_max' => 0,
1528             'xiv_u' => {
1529             'xivu_iv' => 140312788,
1530             'xivu_uv' => 140312788,
1531             'xivu_p1' => 140312788,
1532             'xivu_i32' => 140312788,
1533             'xivu_namehek' => 140312788
1534             },
1535             'xmg_u' => {
1536             'xmg_magic' => '<NULL>',
1537             'xmg_ourstash' => '<NULL>'
1538             },
1539             'xmg_stash' => {
1540             'sv_any' => {
1541             'xnv_u' => {
1542             'xnv_nv' => '0',
1543             'xgv_stash' => '<NULL>'
1544             },
1545             'xhv_fill' => 2,
1546             'xhv_max' => 7,
1547             'xiv_u' => {
1548             'xivu_iv' => 2,
1549             'xivu_uv' => 2,
1550             'xivu_p1' => 2,
1551             'xivu_i32' => 2,
1552             'xivu_namehek' => 2
1553             },
1554             'xmg_u' => {
1555             'xmg_magic' => {
1556             'mg_moremagic' => '<NULL>',
1557             'mg_virtual' => {
1558             'svt_get' => 0,
1559             'svt_set' => 0,
1560             'svt_len' => 0,
1561             'svt_clear' => 0,
1562             'svt_free' => 136674986,
1563             'svt_copy' => 0,
1564             'svt_dup' => 0,
1565             'svt_local' => 0
1566             },
1567             'mg_private' => 0,
1568             'mg_type' => 99,
1569             'mg_flags' => 0,
1570             'mg_obj' => 0,
1571             'mg_ptr' => 139425604,
1572             'mg_len' => 12
1573             },
1574             'xmg_ourstash' => {
1575             'sv_any' => '<NULL>',
1576             'sv_refcnt' => 137217696,
1577             'sv_flags' => 6488064,
1578             'sv_u' => {
1579             'svu_iv' => '598828409405046784',
1580             'svu_uv' => '598828409405046784',
1581             'svu_rv' => 0,
1582             'svu_pv' => 0,
1583             'svu_array' => 0,
1584             'svu_hash' => 0,
1585             'svu_gp' => 0
1586             }
1587             }
1588             },
1589             'xmg_stash' => '<NULL>'
1590             },
1591             'sv_refcnt' => 2,
1592             'sv_flags' => 578813964,
1593             'sv_u' => {
1594             'svu_iv' => '2314885530418902580',
1595             'svu_uv' => '2314885530418902580',
1596             'svu_rv' => 139425332,
1597             'svu_pv' => 139425332,
1598             'svu_array' => 139425332,
1599             'svu_hash' => 139425332,
1600             'svu_gp' => 139425332
1601             }
1602             }
1603             },
1604             'sv_refcnt' => 1,
1605             'sv_flags' => 1074790411,
1606             'sv_u' => {
1607             'svu_iv' => '3689628089976684756',
1608             'svu_uv' => '3689628089976684756',
1609             'svu_rv' => 140312788,
1610             'svu_pv' => 140312788,
1611             'svu_array' => 140312788,
1612             'svu_hash' => 140312788,
1613             'svu_gp' => 140312788
1614             }
1615             };
1616            
1617             Even though it is rather easy to do such stuff using C<unpack_ptr> hooks,
1618             you should really know what you're doing and do it with extreme care
1619             because of the limitations mentioned above. It's really easy to run into
1620             segmentation faults when you're dereferencing pointers that point to
1621             memory which you don't own.
1622            
1623             =head3 Performance
1624            
1625             Using hooks isn't for free. In performance-critical applications
1626             you have to keep in mind that hooks are actually perl subroutines
1627             and that they are called once for every value of a registered
1628             type that is being packed or unpacked. If only about 10% of the
1629             values require hooks to be called, you'll hardly notice the
1630             difference (if your hooks are implemented efficiently, that is).
1631             But if all values would require hooks to be called, that alone
1632             could easily make packing and unpacking very slow.
1633            
1634             =head2 Tag Order
1635            
1636             Since it is possible to attach multiple tags to a single type,
1637             the order in which the tags are processed is important. Here's
1638             a small table that shows the processing order.
1639            
1640             pack unpack
1641             ---------------------
1642             Hooks Format
1643             Format ByteOrder
1644             ByteOrder Hooks
1645            
1646             As a general rule, the L<C<Hooks>|/"The Hooks Tag"> tag is always
1647             the first thing processed when packing data, and the last thing
1648             processed when unpacking data.
1649            
1650             The L<C<Format>|/"The Format Tag"> and L<C<ByteOrder>|/"The ByteOrder Tag"> tags
1651             are exclusive, but when both are given the L<C<Format>|/"The Format Tag"> tag
1652             wins.
1653            
1654             =head1 METHODS
1655            
1656             =head2 new
1657            
1658             =over 8
1659            
1660             =item C<new>
1661            
1662             =item C<new> OPTION1 =E<gt> VALUE1, OPTION2 =E<gt> VALUE2, ...
1663            
1664             The constructor is used to create a new Convert::Binary::C object.
1665             You can simply use
1666            
1667             $c = new Convert::Binary::C;
1668            
1669             without additional arguments to create an object, or you can
1670             optionally pass any arguments to the constructor that are
1671             described for the L<C<configure>|/"configure"> method.
1672            
1673             =back
1674            
1675             =head2 configure
1676            
1677             =over 8
1678            
1679             =item C<configure>
1680            
1681             =item C<configure> OPTION
1682            
1683             =item C<configure> OPTION1 =E<gt> VALUE1, OPTION2 =E<gt> VALUE2, ...
1684            
1685             This method can be used to configure an existing Convert::Binary::C
1686             object or to retrieve its current configuration.
1687            
1688             To configure the object, the list of options consists of key
1689             and value pairs and must therefore contain an even number of
1690             elements. L<C<configure>|/"configure"> (and also L<C<new>|/"new"> if
1691             used with configuration options) will throw an exception if you
1692             pass an odd number of elements. Configuration will normally look
1693             like this:
1694            
1695             $c->configure(ByteOrder => 'BigEndian', IntSize => 2);
1696            
1697             To retrieve the current value of a configuration option, you
1698             must pass a single argument to L<C<configure>|/"configure"> that
1699             holds the name of the option, just like
1700            
1701             $order = $c->configure('ByteOrder');
1702            
1703             If you want to get the values of all configuration options at
1704             once, you can call L<C<configure>|/"configure"> without any
1705             arguments and it will return a reference to a hash table that
1706             holds the whole object configuration. This can be conveniently
1707             used with the L<Data::Dumper|Data::Dumper> module, for example:
1708            
1709             use Convert::Binary::C;
1710             use Data::Dumper;
1711            
1712             $c = new Convert::Binary::C Define => ['DEBUGGING', 'FOO=123'],
1713             Include => ['/usr/include'];
1714            
1715             print Dumper($c->configure);
1716            
1717             Which will print something like this:
1718            
1719             $VAR1 = {
1720             'Define' => [
1721             'DEBUGGING',
1722             'FOO=123'
1723             ],
1724             'ByteOrder' => 'LittleEndian',
1725             'LongSize' => 4,
1726             'IntSize' => 4,
1727             'ShortSize' => 2,
1728             'HasMacroVAARGS' => 1,
1729             'Assert' => [],
1730             'UnsignedChars' => 0,
1731             'DoubleSize' => 8,
1732             'CharSize' => 1,
1733             'EnumType' => 'Integer',
1734             'PointerSize' => 4,
1735             'EnumSize' => 4,
1736             'DisabledKeywords' => [],
1737             'FloatSize' => 4,
1738             'Alignment' => 1,
1739             'LongLongSize' => 8,
1740             'LongDoubleSize' => 12,
1741             'KeywordMap' => {},
1742             'Include' => [
1743             '/usr/include'
1744             ],
1745             'HasCPPComments' => 1,
1746             'Bitfields' => {
1747             'Engine' => 'Generic'
1748             },
1749             'UnsignedBitfields' => 0,
1750             'Warnings' => 0,
1751             'CompoundAlignment' => 1,
1752             'OrderMembers' => 0
1753             };
1754            
1755             Since you may not always want to write a L<C<configure>|/"configure"> call
1756             when you only want to change a single configuration item, you can
1757             use any configuration option name as a method name, like:
1758            
1759             $c->ByteOrder('LittleEndian') if $c->IntSize < 4;
1760            
1761             (Yes, the example doesn't make very much sense... ;-)
1762            
1763             However, you should keep in mind that configuration methods
1764             that can take lists (namely C<Include>, C<Define> and C<Assert>,
1765             but not C<DisabledKeywords>) may behave slightly different than
1766             their L<C<configure>|/"configure"> equivalent.
1767             If you pass these methods a single argument that is an array
1768             reference, the current list will be B<replaced> by the new one,
1769             which is just the behaviour of the
1770             corresponding L<C<configure>|/"configure"> call.
1771             So the following are equivalent:
1772            
1773             $c->configure(Define => ['foo', 'bar=123']);
1774             $c->Define(['foo', 'bar=123']);
1775            
1776             But if you pass a list of strings instead of an array reference
1777             (which cannot be done when using L<C<configure>|/"configure">),
1778             the new list items are B<appended> to the current list, so
1779            
1780             $c = new Convert::Binary::C Include => ['/include'];
1781             $c->Include('/usr/include', '/usr/local/include');
1782             print Dumper($c->Include);
1783            
1784             $c->Include(['/usr/local/include']);
1785             print Dumper($c->Include);
1786            
1787             will first print all three include paths, but finally
1788             only C</usr/local/include> will be configured:
1789            
1790             $VAR1 = [
1791             '/include',
1792             '/usr/include',
1793             '/usr/local/include'
1794             ];
1795             $VAR1 = [
1796             '/usr/local/include'
1797             ];
1798            
1799             Furthermore, configuration methods can be chained together,
1800             as they return a reference to their object if called as a
1801             set method. So, if you like, you can configure your object
1802             like this:
1803            
1804             $c = Convert::Binary::C->new(IntSize => 4)
1805             ->Define(qw( __DEBUG__ DB_LEVEL=3 ))
1806             ->ByteOrder('BigEndian');
1807            
1808             $c->configure(EnumType => 'Both', Alignment => 4)
1809             ->Include('/usr/include', '/usr/local/include');
1810            
1811             In the example above, C<qw( ... )> is the word list quoting
1812             operator. It returns a list of all non-whitespace sequences,
1813             and is especially useful for configuring preprocessor defines
1814             or assertions. The following assignments are equivalent:
1815            
1816             @array = ('one', 'two', 'three');
1817             @array = qw(one two three);
1818            
1819             You can configure the following options. Unknown options, as well
1820             as invalid values for an option, will cause the object to throw
1821             exceptions.
1822            
1823             =over 4
1824            
1825             =item C<IntSize> =E<gt> 0 | 1 | 2 | 4 | 8
1826            
1827             Set the number of bytes that are occupied by an integer. This is
1828             in most cases 2 or 4. If you set it to zero, the size of an
1829             integer on the host system will be used. This is also the
1830             default unless overridden by C<CBC_DEFAULT_INT_SIZE> at compile time.
1831            
1832             =item C<CharSize> =E<gt> 0 | 1 | 2 | 4 | 8
1833            
1834             Set the number of bytes that are occupied by a C<char>.
1835             This rarely needs to be changed, except for some platforms
1836             that don't care about bytes, for example DSPs.
1837             If you set this to zero, the size of a C<char> on the host
1838             system will be used. This is also the default unless
1839             overridden by C<CBC_DEFAULT_CHAR_SIZE> at compile time.
1840            
1841             =item C<ShortSize> =E<gt> 0 | 1 | 2 | 4 | 8
1842            
1843             Set the number of bytes that are occupied by a short integer.
1844             Although integers explicitly declared as C<short> should be
1845             always 16 bit, there are compilers that make a short
1846             8 bit wide. If you set it to zero, the size of a short
1847             integer on the host system will be used. This is also the
1848             default unless overridden by C<CBC_DEFAULT_SHORT_SIZE> at compile
1849             time.
1850            
1851             =item C<LongSize> =E<gt> 0 | 1 | 2 | 4 | 8
1852            
1853             Set the number of bytes that are occupied by a long integer.
1854             If set to zero, the size of a long integer on the host system
1855             will be used. This is also the default unless overridden
1856             by C<CBC_DEFAULT_LONG_SIZE> at compile time.
1857            
1858             =item C<LongLongSize> =E<gt> 0 | 1 | 2 | 4 | 8
1859            
1860             Set the number of bytes that are occupied by a long long
1861             integer. If set to zero, the size of a long long integer
1862             on the host system, or 8, will be used. This is also the
1863             default unless overridden by C<CBC_DEFAULT_LONG_LONG_SIZE> at
1864             compile time.
1865            
1866             =item C<FloatSize> =E<gt> 0 | 1 | 2 | 4 | 8 | 12 | 16
1867            
1868             Set the number of bytes that are occupied by a single
1869             precision floating point value.
1870             If you set it to zero, the size of a C<float> on the
1871             host system will be used. This is also the default unless
1872             overridden by C<CBC_DEFAULT_FLOAT_SIZE> at compile time.
1873             For details on floating point support,
1874             see L<"FLOATING POINT VALUES">.
1875            
1876             =item C<DoubleSize> =E<gt> 0 | 1 | 2 | 4 | 8 | 12 | 16
1877            
1878             Set the number of bytes that are occupied by a double
1879             precision floating point value.
1880             If you set it to zero, the size of a C<double> on the
1881             host system will be used. This is also the default unless
1882             overridden by C<CBC_DEFAULT_DOUBLE_SIZE> at compile time.
1883             For details on floating point support,
1884             see L<"FLOATING POINT VALUES">.
1885            
1886             =item C<LongDoubleSize> =E<gt> 0 | 1 | 2 | 4 | 8 | 12 | 16
1887            
1888             Set the number of bytes that are occupied by a double
1889             precision floating point value.
1890             If you set it to zero, the size of a C<long double> on
1891             the host system, or 12 will be used. This is also the
1892             default unless overridden by C<CBC_DEFAULT_LONG_DOUBLE_SIZE> at compile
1893             time. For details on floating point support,
1894             see L<"FLOATING POINT VALUES">.
1895            
1896             =item C<PointerSize> =E<gt> 0 | 1 | 2 | 4 | 8
1897            
1898             Set the number of bytes that are occupied by a pointer. This is
1899             in most cases 2 or 4. If you set it to zero, the size of a
1900             pointer on the host system will be used. This is also the
1901             default unless overridden by C<CBC_DEFAULT_PTR_SIZE> at compile time.
1902            
1903             =item C<EnumSize> =E<gt> -1 | 0 | 1 | 2 | 4 | 8
1904            
1905             Set the number of bytes that are occupied by an enumeration type.
1906             On most systems, this is equal to the size of an integer,
1907             which is also the default. However, for some compilers, the
1908             size of an enumeration type depends on the size occupied by the
1909             largest enumerator. So the size may vary between 1 and 8. If you
1910             have
1911            
1912             enum foo {
1913             ONE = 100, TWO = 200
1914