#!/usr/bin/env perl $serializer_method = "Serialize"; $deserializer_method = "Deserialize"; # Parse command line arguments $debug_level = 0; my $binary_file = ""; foreach (@ARGV){ if(/^-h/){ &Usage(); }elsif(/-d/){ $debug_level++; }elsif(/c=/){ push(@clclasses, $'); $user_supplied_classes = "true"; }elsif(/classes=/){ open(FILE, $'); foreach(){chomp; if(length($_)>0){push(@clclasses, $_);}} close(FILE); $user_supplied_classes = "true"; }elsif(/methods=/){ if($' eq "yes"){$print_methods = true;} }else{ $binary_file = $_; } } if(length($binary_file) == 0){&Usage();} # Get the names of all class/struct types from the specified binary my $typenamesref; if($user_supplied_classes eq "true"){ $typenamesref = \@clclasses; }else{ $typenamesref = &GetClasses($binary_file); } # Get the class definitions for the classes listed in $typenamesref my $classrefs = &GetClassDefinitions($typenamesref); # Filter out classes that don't have the serializer method # And don't appear anywhere in the type tree of a class that # does have a serializer method. $filteredrefs = &FilterClassDefinitions($classrefs); # Sort filtered classes in alphabetical order by classname @$filteredrefs = sort {$$a{"classname"} cmp $$b{"classname"}}(@$filteredrefs); # Determine output filename based on input filename @paths = split(/\//, $binary_file); $xmlfilename = pop(@paths).".xml"; open(FILE, ">$xmlfilename"); print "Writing $xmlfilename ... "; # Print out all of the classes in the filtered list my $classref; print FILE "\n"; $tabs .= "\t"; foreach $classref (@$filteredrefs){ &PrintClass($classref); } chop($tabs); print FILE "\n"; close(FILE); open(FILE,"$xmlfilename"); @lines = ; close(FILE); print @lines." lines\n"; ######################################################################## sub Usage() { print "\n"; print "Usage:\n"; print " jil_exe2xml [options] executable_file\n"; print "\n"; print " This script is part of the JIL (JLab Introspection Library) package.\n"; print "jil_exe2xml takes an executable binary file created by the GNU compiler/\n"; print "linker. The executable must have debugging symbols included (see the -g\n"; print "option \n"; print "\n"; exit(0); } ######################################################################## sub GetClasses() { my $binary_file = $_[0]; my $gdb_commands_file = "tmp2.gdb"; open(GDB, ">$gdb_commands_file"); print GDB "info types\n"; close(GDB); # Run gdb using the command file we just created and capture # the output my $gdb_output = `gdb $binary_file -batch -x $gdb_commands_file`; # Clean up temporary file if($debug_level==0){unlink $gdb_commands_file;} my @typenames; my @lines = split(/\n/, $gdb_output); my %used; foreach $line (@lines){ # Chop off trailing semi-colon chop($line); # We only want the typedef lines, but not those with std::allocator if($line !~ /^typedef/){next;} if($line =~ /std::allocator/){next;} # Typename is entire line after the "typedef " $line =~ /^typedef /; $typename = $'; # Filter out atomic types if($typename eq "char"){next;} if($typename eq "int"){next;} if($typename eq "float"){next;} if($typename eq "void"){next;} if($typename eq "bool"){next;} if($typename eq "double"){next;} if($typename eq "struct"){next;} # Types that are auto-generated or in standard libraries or # just plain cause problems when passed to gdb's ptype command if(length($typename) == 0){next;} if($typename =~ /^allocator\$gdb_commands_file"); foreach $class (@classes){print GDB "ptype $class\n";} close(GDB); # Run gdb using the command file we just created and capture # the output $gdb_output = `gdb $binary_file -batch -x $gdb_commands_file`; # Clean up temporary file if($debug_level==0){unlink $gdb_commands_file;} # Split the output into the individual class definitions @classdefs = split(/type = (class|struct|enum) /, $gdb_output); my $classindex = -1; my $class_struct_enum = ""; foreach $classdef (@classdefs){ if($classdef eq "enum" || $classdef eq "class" || $classdef eq "struct"){ # The @classdefs split above keeps the class,struct, or enum # string as an item in the @classdefs array because of the # parentheses(). Since it appears as the entry before the actual # type definition, we copy it here for use on the next item. # We need this info so we can set the isenum flag. $class_struct_enum = $classdef; } @lines = split(/\n/, $classdef); if ($lines[0] !~ /\{/){next;} # Filter out non type-def lines $classindex++; # Initialize arrays that will hold class definitions %classdef = (); @datas = (); @methods = (); $hasSerializer = ""; $hasDeserializer = ""; if($class_struct_enum eq "enum"){$classdef{"isEnum"} = "true";} # Loop over lines of the definition $i = 0; $section = "public"; foreach $line (@lines){ # On some versions of gdb, template classes based on multiple # types have a space after the commas that seperate the types. # Remove those spaces here so that the whole templated type # is put into one token $line =~ s/\, /\,/g; # Get rid of std:: qualifiers. They are superfluous to us $line =~ s/std:://g; @tokens = split(/\s+/, $line); # If the first "token" is empty, remove it if(length($tokens[0])<1){ @tokens = reverse(@tokens); pop(@tokens); @tokens = reverse(@tokens); } if($i++ == 0){ # Get class and base class from first line # Format will be: # class : public base_class {\n $classname = $tokens[0]; $baseclass = $tokens[3]; # Some gdb versions will only print "typedef struct {", # omitting the actual type name. We try and recover by # using the name from the @classes array. if($classname =~ /^\{/){ $classname = $classes[$classindex]; } # For templated types, the name contains angle brackets <> # These are special in XML so we need to replace them $classname =~ s/\/\}/g ; $baseclass =~ s/\/\}/g ; $classdef{"classname"} = $classname; $classdef{"baseclass"} = $baseclass; }elsif($line =~ /public:/) { # Switch to public definition section $section = "public"; }elsif($line =~ /protected:/) { # Switch to protected definition section $section = "protected"; }elsif($line =~ /private:/) { # Switch to private definition section $section = "private"; }else{ # Must be a data member or method. # Remember if this class has a (de)serializer method. if($line =~ /$serializer_method\(/){$classdef{"hasSerializer"} = "true";} if($line =~ /$deserializer_method\(/){$classdef{"hasDeserializer"} = "true";} # If there is a round bracket, then it must be a method if($line =~ /\(/){ # chop off leading white space and replace angle brackets # with curly ones my $method = $line; $method =~ s/^\s+//g; $method =~ s/\>/\}/g; $method =~ s/\ # These are special in XML so we need to replace them $type =~ s/\/\}/g ; $typedef{"type"} = $type; # For STL classes, we need to record the definition of the # type on which we're based. if(length($templatebase)>0){ # For STL types based on other STL types, the typedef is # cut off at the first comma. Add a comma back on in those # cases so the above code will properly find the type. if($templatebase =~ /^(vector\<|list\<)/){$templatebase .= ",";} $typedef{"typebaseref"} = &ParseTypeDefinition($templatebase); } \%typedef; # return reference to the type definition } ######################################################################## sub FilterClassDefinitions { # Here we want to find the sublist of the class definitions passed # to us that contains 1. all of the classes that have serializer # methods and 2. all of the classes that the ones from 1. depend on. # Note that gcc turns all structures into classes. These will # necessarily NOT have serializer methods. We would still like to # to write those "struct" definitions out though so that the objects that # use them can try embedding the struct serialization inside the # class serializer. # # Since the class definitions can have nested types that are # arbitrarily deep (vectors of lists of ...) we need to recursively # loop through the types, building a list of the dependent classes as # we go. my $classrefs = $_[0]; # First, just make a list of the classes with a serializer method foreach $classref (@$classrefs){ my $classname = $$classref{"classname"}; # Add class if it has a serializer method declared or ... if($$classref{"hasSerializer"} eq "true"){ &AddClassToOutput($classname, $classrefs); } # ... the user supplied the classes if($user_supplied_classes eq "true"){ &AddClassToOutput($classname, $classrefs); } } # Copy class definition references into a scaler array while(($classname, $classref) = each %filtered){ push(@filtered, $classref); } \@filtered; } ######################################################################## sub AddClassToOutput() { my $classname = $_[0]; my $classrefs = $_[1]; # Avoid infinite recursion #print "$tabs Adding class -- $classname\n"; if(length($filtered{$classname})>0){return;} my $classref = &FindClassRef($classname, $classrefs); if(length($classref)<=0){return;} $filtered{$classname} = $classref; # Add base class (if any) $tabs .= "\t"; if(length($$classref{"baseclass"})>0){ #print "$tabs Adding base class -- ".$$classref{"baseclass"}." classname=".$$classref{"classname"}."\n"; &AddClassToOutput($$classref{"baseclass"}, $classrefs); } # Add classes/structs from member data my $datarefs = $$classref{"data_arrayref"}; my $dataref; #print "$tabs Added class --- ".$$classref{"classname"}." ref=$classref\n"; foreach $dataref (@$datarefs){ my $typeref = $$dataref{"typedef_ref"}; #print "$tabs type=".$$typeref{"type"}."\n"; &AddTypeToOutput($typeref, $classrefs); } chop($tabs); } ######################################################################## sub AddTypeToOutput() { my $typeref = $_[0]; my $classrefs = $_[1]; # This may be a class name or an atomic type (int, float etc.) my $typename = $$typeref{"type"}; #print "$tabs Adding type -- $typename ref = $typeref\n"; # Avoid infinite recursion if(length($filtered{$typename})>0){return;} # Try adding this type. If it's not a class, it will be ignored # by AddClassToOutput() $tabs .= "\t"; &AddClassToOutput($typename, $classrefs); # If this type is based on another type (e.g. it's an STL vector) # then add the type it's based on to the filtered list as well my $basetyperef = $$typeref{"typebaseref"}; if(length($basetyperef)>0){&AddTypeToOutput($basetyperef, $classrefs);} chop($tabs); } ######################################################################## sub FindClassRef() { my $classname = $_[0]; my $classrefs = $_[1]; my $classref; foreach $classref (@$classrefs){ #print "Comparing $classname to ".$$classref{"classname"}." ref=".$classref."\n"; if($classname eq $$classref{"classname"}){ return $classref; } } return ""; } ######################################################################## sub PrintClass() { my $classref = $_[0]; my $datarefs = $$classref{"data_arrayref"}; # Enums are trivial for out purposes if($$classref{"isEnum"} eq "true"){ print FILE "$tabs\n"; return; } # Open Class tag my $classname = $$classref{"classname"}; print FILE "$tabs0){ print FILE " baseclass=\"".$$classref{"baseclass"}."\""; } if($$classref{"hasSerializer"} eq "true"){print FILE " hasSerializer=\"true\"";} if($$classref{"hasDeserializer"} eq "true"){print FILE " hasDeserializer=\"true\"";} print FILE ">\n"; # Add data member tags $tabs .= "\t"; foreach $dataref (@$datarefs){ # Write type definition &PrintType($$dataref{"typedef_ref"}, $$dataref{"name"}, $$dataref{"section"}); } # Add methods if($print_methods eq true){ my $methodrefs = $$classref{"methods_arrayref"}; my $methodref; foreach $methodref (@$methodrefs){ print FILE "$tabs\n"; } } chop($tabs); # Close Class tag print FILE "$tabs\n\n"; } ######################################################################## sub PrintType() { my $typeref = $_[0]; my $varname = $_[1]; my $section = $_[2]; # Open type tag my $type = $$typeref{"type"}; print FILE "$tabs0){print FILE " name=\"$varname\" section=\"$section\"";} @attributes = ("const", "static", "unsigned", "pointer"); foreach $att (@attributes){ if($$typeref{$att} eq "true"){print FILE " $att=\"true\"";} } $size = $$typeref{"size"}; if(length($size)>0){print FILE " size=\"$size\"";} if(length($$typeref{"typebaseref"})>0){ # Type is based on another. Put based-on-type on its own line(s) print FILE ">\n"; $tabs .= "\t"; &PrintType($$typeref{"typebaseref"}); chop($tabs); print FILE "$tabs\n"; }else{ # Simple type. Close tag on same line print FILE "/>\n"; } }