SOAdvancedDissector

SOAdvancedDissector Git Source Tree

Root/SOAdvancedDissector.py

1#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3#
4# Copyright Grégory Soutadé
5
6# This file is part of SOAdvancedDissector
7
8# SOAdvancedDissector is free software: you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation, either version 3 of the License, or
11# (at your option) any later version.
12#
13# SOAdvancedDissector is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the GNU General Public License
19# along with SOAdvancedDissector. If not, see <http://www.gnu.org/licenses/>.
20#
21
22import re
23import os
24import shutil
25import struct
26import sys
27import subprocess
28import argparse
29from objects import *
30from cppprototypeparser import CPPPrototypeParser
31from display import ProgressionDisplay
32
33#
34# Regexp for readelf -sW|c++filt
35#
36# 6: 006f25d0 20 OBJECT WEAK DEFAULT 20 vtable for dpdoc::Annot
37num_re = '(?P<num>[0-9]+\:)'
38address_re = '(?P<address>[0-9a-f]+)'
39size_re = '(?P<size>[0-9]+)'
40ustr_re = '(?P<{}>[A-Z]+)'
41type_re = ustr_re.format('type')
42link_re = ustr_re.format('link')
43visibility_re = ustr_re.format('visibility')
44ndx_re = '(?P<ndx>[0-9]+)'
45name_re = '(?P<name>.*)'
46
47readelf = r'[ ]+{}[ ]+{}[ ]+{}[ ]+{}[ ]+{}[ ]+{}[ ]+{}[ ]+{}'
48readelf = readelf.format(num_re, address_re, size_re, type_re, link_re, visibility_re,ndx_re,name_re)
49readelf_re = re.compile(readelf)
50
51#
52# Regexp for readelf --sections|c++filt
53#
54# [ 0] NULL 00000000 000000 000000 00 0 0 0
55# [ 1] .interp PROGBITS 00000154 000154 000019 00 A 0 0 1
56num_re = '(?P<num>\[[ ]*[0-9]+\])'
57name_re = '(?P<name>.+)'
58type_re = ustr_re.format('type')
59address_re = '(?P<address>[0-9a-f]+)'
60offset_re = '(?P<offset>[0-9a-f]+)'
61size_re = '(?P<size>[0-9a-f]+)'
62sections = r'[ ]+{}[ ]+{}[ ]+{}[ ]+{}[ ]+{}[ ]+{}[ ]+.*'
63sections = sections.format(num_re, name_re, type_re, address_re, offset_re, size_re)
64sections_re = re.compile(sections)
65
66#
67# Regexp for vtable-dumper --demangle|c++filt
68#
69# 0 0000000000000000
70# 4 006e9fd0 (& typeinfo for zip::EditableStream)
71# Inherit from dputils::GuardedStream
72# Inherit from dpio::StreamClient
73# 8 002a793d zip::EditableStream::~EditableStream()
74dynvtable_idx_re = re.compile(r'(?P<index>[0-9]+)[ ]+(?P<vtable_index>[-]?0[x]?[0-9a-f]+)')
75dynvtable_entry_re = re.compile(r'(?P<index>[0-9]+)[ ]+(?P<address>[0-9a-f]+) (?P<name>.*)')
76dynvtable_inherit_re = re.compile(r'Inherit from (?P<inherit>.*)')
77
78#
79# Global variables
80#
81global_namespace = Namespace('global')
82namespaces = {'global':global_namespace} # Root namespace with one (without namespace) 'global'
83matched_lines = [] # Matched lines from readelf symbols output
84sections_lines = [] # Matched lines from readelf sections output
85sections_addr = [] # Contains tuple (section_addr_start, section_addr_end, section_offset)
86address_size = 4 # Target address size (32bits by default)
87classes_with_inheritance = []
88namespace_dependencies = {}
89
90display = ProgressionDisplay()
91
92def line_count(filename):
93 """Do 'wc -l <filename>'
94
95 Returns
96 -------
97 int
98 Line count of filename
99 """
100 try:
101 return int(subprocess.check_output(['wc', '-l', filename]).split()[0])
102 except:
103 return 0
104
105def findBinOffset(addr):
106 """Find offset into binary file from target address
107 Sections must have been extracted
108
109 Parameters
110 ----------
111 addr : int
112 Target address
113
114 Returns
115 -------
116 int
117 Offset or None if not found
118 """
119 for (start, end, offset) in sections_addr:
120 if addr >= start and addr <= end:
121 return (addr - start) + offset
122 return None
123
124def funcnameFromProtype(fullname):
125 """Return function name (name + parameters)
126 from prototype (includes namespaces + base class)
127
128 Parameters
129 ----------
130 fullname : str
131 Full function name (package::NCXStreamReceiver::totalLengthReady(unsigned int))
132
133 Returns
134 -------
135 str
136 Function name + parameters
137 """
138 if fullname.startswith('typeinfo'):
139 return ('typeinfo()', '')
140 parser = CPPPrototypeParser(fullname)
141
142 return (parser.funcname + parser.fullparameters, '::'.join(parser.namespaces))
143
144def findObjectInCache(name):
145 """Find class object in namespaces
146
147 Parameters
148 ----------
149 name : str
150 Full class name (package::NCXStreamReceiver)
151
152 Returns
153 -------
154 obj
155 Found class or None
156 """
157
158 parser = CPPPrototypeParser(name)
159
160 if parser.is_function:
161 return global_namespace.child(parser.funcname)
162
163 if not parser.namespaces:
164 return global_namespace.child(parser.classname)
165
166 if not parser.namespaces[0] in namespaces.keys():
167 return None
168
169 namespace = namespaces[parser.namespaces[0]]
170
171 # Don't directly use find on root to avoid duplicate name in sub namespaces
172 # eg : ns0::ns1::ns2::ns0::ns3::func
173
174 for targetNamespace in parser.namespaces[1:]:
175 namespace = namespace.find(targetNamespace)
176 if not namespace: return None
177
178 # print('findObjectInCache({}) --> {}'.format(name, namespace.name))
179 return namespace.find(parser.classname)
180
181def findObjectFromAddr(addr):
182 """Find object from address (in readelf output)
183
184 Parameters
185 ----------
186 addr : int
187 Object address
188
189 Returns
190 -------
191 obj
192 Matched object or None
193 """
194 for match in matched_lines:
195 if int(match.group('address'),16) == addr:
196 #print(match.groups())
197 return match
198
199 return None
200
201def createClass(fullname):
202 """Find class object in namespaces or create
203 it if it doesn't exists
204
205 Parameters
206 ----------
207 name : str
208 Full class name (package::NCXStreamReceiver)
209
210 Returns
211 -------
212 obj
213 Found class or created one
214 """
215 parser = CPPPrototypeParser(fullname)
216 class_ = Class(parser.classname, '::'.join(parser.namespaces))
217
218 if not parser.namespaces:
219 global_namespace.addChild(class_)
220 else:
221 if not parser.namespaces[0] in namespaces.keys():
222 lastNamespace = Namespace(parser.namespaces[0])
223 namespaces[parser.namespaces[0]] = lastNamespace
224 else:
225 lastNamespace = namespaces[parser.namespaces[0]]
226 for name in parser.namespaces[1:]:
227 newNamespace = lastNamespace.child(name)
228 if not newNamespace:
229 newNamespace = Namespace(name)
230 lastNamespace.addChild(newNamespace)
231 lastNamespace = newNamespace
232 lastNamespace.addChild(class_)
233 return class_
234
235
236def parseDynamicVtable(filename):
237 """Parse dynamic vtable (vtable-dumper) file
238 and fix static vtable
239
240 Parameters
241 ----------
242 filename : str
243 Filename to parse
244 """
245
246 display.setTarget(' * Parse dynamic vtable', line_count(filename), 10)
247 curObj = None
248 with open(filename, 'r') as fd:
249 for line in fd.readlines():
250 display.progress(1)
251 # Empty line -> end of current class
252 line = line.strip()
253 if len(line) == 0:
254 curObj = None
255 continue
256
257 if curObj is None:
258 # New vtable
259 if not line.startswith('Vtable for '): continue
260 objname = line[len('Vtable for '):]
261 curObj = findObjectInCache(objname)
262 else:
263 # First, try object vtable entry
264 match = dynvtable_entry_re.match(line)
265 if match:
266 idx = int(match.group('index'))
267 func = None
268 if match.group('name') == '__cxa_pure_virtual':
269 func = Function('virtfunc{}()'.format(idx), 0, True, True)
270 else:
271 funcaddr = int(match.group('address'),16)
272 match = findObjectFromAddr(funcaddr)
273 funcname = 'unknown_virtfunc{}()'.format(idx)
274 funcnamespaces = ''
275 if match:
276 (funcname, funcnamespaces) = funcnameFromProtype(match.group('name'))
277 else:
278 sys.stderr.write('Error dynvtable0, no match for {}\n'.format(hex(funcaddr)))
279 func = Function(funcname, funcaddr, virtual=True, namespace=funcnamespaces)
280 curObj.updateVirtualFunction(int(idx/address_size), func)
281 continue
282 # Index vtable entry
283 match = dynvtable_idx_re.match(line)
284 if match:
285 funcaddr = int(match.group('vtable_index'),16)
286 funcname = 'vtable_index{}'.format(-funcaddr)
287 func = Function(funcname, funcaddr, True)
288 curObj.updateVirtualFunction(int(int(match.group('index'))/address_size), func)
289 continue
290 # Inherit entry
291 match = dynvtable_inherit_re.match(line)
292 if match:
293 basename = match.group('inherit')
294 base = findObjectInCache(basename)
295 if not base:
296 base = createClass(basename)
297 curObj.addBaseClass(base)
298 classes_with_inheritance.append(curObj)
299 continue
300
301 sys.stderr.write('Error dynvtable, no match for {}\n'.format(line))
302 sys.stderr.flush()
303 display.finish()
304
305def fixupInheritance():
306 """Fix inheritance : each class reports implemented pure virtual function
307 into its base(s)
308 """
309 global classes_with_inheritance
310 display.setTarget(' * Fixup inheritance', len(classes_with_inheritance))
311 for class_ in classes_with_inheritance:
312 display.progress(1)
313 class_.fixupInheritance()
314 display.finish()
315 classes_with_inheritance = None # Release memory
316
317def parseStaticVtable(binfile):
318 """Parse vtable vtable object into binary file
319 and create static vtable entries
320
321 Parameters
322 ----------
323 binfile : str
324 Filename of binary file to inspect objects
325 """
326
327 display.setTarget(' * Parse static vtable', len(matched_lines), 10)
328 with open(binfile, 'rb') as fd:
329 for match in matched_lines:
330 display.progress(1)
331 name = match.group('name')
332 if not name.startswith('vtable for'): continue
333 address = int(match.group('address'), 16)
334 # vtable for mtext::cts::ListOfGlyphRunsCTS
335 classname = name[len('vtable for '):]
336 class_ = findObjectInCache(classname)
337 if class_ is None:
338 class_ = createClass(classname)
339 binaddr = findBinOffset(address)
340 fd.seek(binaddr)
341 nb_funcs = int(int(match.group('size'))/address_size)
342 #print('vtable {} {} funcs'.format(classname, nb_funcs))
343 fd.seek(binaddr)
344 for i in range(0, nb_funcs):
345 funcaddr = 0
346 if address_size == 4:
347 funcaddr, = struct.unpack('<I', fd.read(address_size))
348 elif address_size == 8:
349 funcaddr, = struct.unpack('<Q', fd.read(address_size))
350 func = None
351 if funcaddr == 0:
352 # Address == 0 --> pure virtual
353 func = Function('virtfunc{}()'.format(i), 0, True, True)
354 else:
355 funcname = ''
356 funcnamespaces = ''
357 if funcaddr == 0 or hex(funcaddr).startswith('0xf'): # Negative address
358 funcname = 'vtable_index{}'.format(-funcaddr)
359 else:
360 match = findObjectFromAddr(funcaddr)
361 try:
362 if not match:
363 sys.stderr.write('No func found at : {}\n'.format(hex(funcaddr)))
364 (funcname, funcnamespaces) = funcnameFromProtype(match.group('name'))
365 except:
366 if match:
367 sys.stderr.write('FFP except : {}'.format(match.group('name')))
368 funcname = 'unknown_virtfunc{}()'.format(i)
369 func = Function(funcname, funcaddr, virtual=True, namespace=funcnamespaces)
370 try:
371 # print('Add virt {}'.format(func))
372 class_.addVirtualFunction(func)
373 except:
374 print(match.group('name'))
375 print(class_)
376 raise
377 display.finish()
378
379def parseTypeinfo():
380 """Parse typeinfo objects in matched_lines
381 and create classes
382 """
383 typeinfos = []
384 for match in matched_lines:
385 name = match.group('name')
386 # "typeinfo for css::MediaParser"
387 if not name.startswith('typeinfo for'): continue
388 typeinfos.append(name[len('typeinfo for '):])
389 # print(name)
390
391 # Sort array before creating class in order to have the right
392 # class hierarchy
393 for classname in sorted(typeinfos):
394 #print(classname)
395 createClass(classname)
396
397def parseSymbolFile(filename):
398 """Parse readelf symbols output file
399 and fill matched_lines
400
401 Parameters
402 ----------
403 filename : str
404 Filename to parse
405 """
406
407 display.setTarget(' * Parse symbols file', line_count(filename), 10)
408 with open(filename, 'r') as fd:
409 for line in fd.readlines():
410 display.progress(1)
411 line = line.rstrip()
412 if not line: continue
413 match = readelf_re.match(line)
414 if not match: continue
415 if match.group('type') not in ('FUNC', 'OBJECT'):
416 continue
417 if match.group('link') not in ('GLOBAL', 'WEAK'):
418 continue
419 if match.group('visibility') not in ('DEFAULT'):
420 continue
421 matched_lines.append(match)
422 if matched_lines:
423 address_size = int(len(matched_lines[0].group('address'))/2)
424 display.finish()
425
426def parseSectionFile(filename):
427 """Parse readelf sections output file
428 and fill sections_lines and sections_addr
429
430 Parameters
431 ----------
432 filename : str
433 Filename to parse
434 """
435
436 # We assume there is about 20 sections
437 display.setTarget(' * Parse sections file (1/2)', line_count(filename))
438 with open(filename, 'r') as fd:
439 for line in fd.readlines():
440 display.progress(1)
441 line = line.rstrip()
442 if not line: continue
443 match = sections_re.match(line)
444 if not match: continue
445 sections_lines.append(match)
446 display.finish()
447
448 display.setTarget(' * Parse sections file (2/2)', line_count(filename))
449 for match in sections_lines:
450 display.progress(1)
451 start = int(match.group('address'), 16)
452 size = int(match.group('size'), 16)
453 offset = int(match.group('offset'), 16)
454 sections_addr.append((start, start+size, offset))
455
456 display.finish()
457
458def addAllMembers():
459 """Add all other members that have not been computed
460 """
461
462 display.setTarget(' * Add all members', len(matched_lines), 10)
463 for match in matched_lines:
464 display.progress(1)
465 virtual = False
466 funcaddr = int(match.group('address'),16)
467 name = match.group('name')
468 if name.startswith('typeinfo') or\
469 name.startswith('vtable'):
470 continue
471 if name.startswith('non-virtual thunk to '):
472 name = name[len('non-virtual thunk to '):]
473 virtual = True
474
475 parser = CPPPrototypeParser(name)
476 class_ = None
477 obj = None
478 funcname = ''
479 classname = ''
480 if match.group('type') == 'FUNC':
481 classname = parser.fullclassname
482 # C functions doesn't have () in their name
483 if not '(' in name:
484 obj = Function(name + '()', funcaddr)
485 global_namespace.addChild(obj)
486 continue
487 else:
488 (funcname, funcnamespaces) = funcnameFromProtype(name)
489 obj = Function(funcname, funcaddr, virtual=virtual, namespace=funcnamespaces)
490 # No classname : add into global namespace
491 if not classname:
492 global_namespace.addChild(obj)
493 continue
494 class_ = findObjectInCache(classname)
495 else: # Object
496 if parser.funcname:
497 obj = Attribute(parser.funcname, funcaddr)
498 classname = parser.classname
499 elif parser.classname:
500 obj = Attribute(parser.classname, funcaddr)
501 # No namespaces : add into global namespace
502 if not parser.namespaces:
503 global_namespace.addChild(obj)
504 continue
505 classname = '::'.join(parser.namespaces)
506 class_ = findObjectInCache(parser.fullclassname)
507
508 # Try to search in namespaces from C++ method
509 if not class_ and parser.namespaces:
510 class_ = findObjectInCache('::'.join(parser.namespaces))
511
512 # Try to search in namespaces from C function
513 if not class_ and classname in namespaces.keys():
514 class_ = namespaces[classname]
515
516 # If still not, it's a new class/function
517 if not class_:
518 if not classname:
519 sys.stderr.write('AAM Err3 "{}" "{}"\n'.format(name, funcname))
520 continue
521 else:
522 class_ = createClass(classname)
523
524 try:
525 # Could be class or namespace
526 class_.addChild(obj)
527 except:
528 sys.stderr.write('Not class {} {}\n'.format(name, class_.name))
529 sys.stderr.flush()
530 # raise
531 display.finish()
532
533def fixBadClassAssertion():
534 """We could have consider obj from a class and created it,
535 but in fact, it just namespace
536 """
537 toDelete = []
538 toAdd = []
539 display.setTarget(' * Fix bad class assertion (1/2)', len(global_namespace.childs))
540 for obj in global_namespace.childs:
541 display.progress(1)
542 if type(obj) == Class and obj.looksLikeNamespace():
543 if obj.name in namespaces:
544 newNamespace = namespaces[obj.name]
545 else:
546 newNamespace = Namespace(obj.name)
547 newNamespace.fillFrom(obj)
548 toAdd.append(newNamespace)
549 toDelete.append(obj)
550 display.finish()
551
552 display.setTarget(' * Fix bad class assertion (2/2)', len(toDelete)+len(toAdd))
553 for obj in toDelete:
554 display.progress(1)
555 global_namespace.removeChild(obj)
556
557 for obj in toAdd:
558 display.progress(1)
559 global_namespace.addChild(obj)
560 display.finish()
561
562def analyseDependencies():
563 """Find classes present in method parameters but
564 not previously found
565 """
566 display.setTarget(' * Analyse dependencies (1/2)', len(namespaces.keys()))
567 allParams = []
568 for namespace in namespaces.values():
569 display.progress(1)
570 params = namespace.getDependencies()
571 for param in params:
572 allParams.append(param)
573 display.finish()
574
575 if allParams:
576 allParams = list(set(allParams))
577 display.setTarget(' * Analyse dependencies (2/2)', len(allParams))
578 for param in allParams:
579 display.progress(1)
580 class_ = findObjectInCache(param)
581 if not class_:
582 createClass(param)
583 display.finish()
584
585header_re = re.compile('[A-Za-z0-9_-]+')
586def headerNameToFilename(name):
587 """Transform namespace name into filename
588 Keep only characters from header_re and change name
589 in lower case
590
591 Parameters
592 ----------
593 name : str
594 Name to transform
595
596 Returns
597 -------
598 str
599 Computed name
600 """
601 if '::' in name:
602 parser = CPPPrototypeParser(name)
603 if not parser.namespaces:
604 return None
605 res = parser.namespaces[0].lower()
606 else:
607 res = name.lower()
608 if '.so' in res:
609 res = os.path.basename(res).split('.so')[0] # Remove .so* extension
610 res = ''.join([c for c in res if header_re.match(c)])
611
612 return res
613
614
615def outputHeader(namespace, filename):
616 """Create header file from namespace description
617
618 Parameters
619 ----------
620 namespace : str
621 Namespace name
622
623 filename : str
624 Output filename
625 """
626
627 dependecies = namespace.getDependencies()
628 # Remove standard dependencies
629 if 'std' in dependecies:
630 dependecies.remove('std')
631 elif '__gnu_cxx' in dependecies:
632 dependecies.remove('__gnu_cxx')
633 headers = []
634
635 define = '_{}'.format(os.path.basename(filename).upper().replace('.', '_'))
636 with open(filename, 'w') as fd:
637 fd.write('/*\n')
638 fd.write(' File automatically generated by SOAdvancedDissector.py\n')
639 fd.write(' More information at http://indefero.soutade.fr/p/soadvanceddissector\n')
640 fd.write('*/\n\n')
641
642 fd.write('#ifndef {}\n'.format(define))
643 fd.write('#define {}\n\n'.format(define))
644
645 for dep in dependecies:
646 headername = headerNameToFilename(dep)
647 if headername and not headername in headers:
648 fd.write('#include <{}.h>\n'.format(headername))
649 # Filter multiple headers
650 headers.append(headername)
651
652 if dependecies:
653 fd.write('\n\n')
654
655 fd.write('{}'.format(namespace))
656
657 fd.write('#endif // {}'.format(define))
658
659def writeResult(target, outputDir, cleanOutputDir):
660 """Write result into header files (one per namespace)
661
662 Parameters
663 ----------
664 cleanOutputDir : bool
665 Clean output directory before processing
666 """
667
668 if cleanOutputDir:
669 print('Clean {}'.format(outputDir))
670 shutil.rmtree(outputDir, ignore_errors=True)
671
672 if not os.path.exists(outputDir):
673 os.mkdir(outputDir)
674
675 setPrintIndent(True)
676 keys = namespaces.keys()
677 display.setTarget(' * Write output files', len(keys))
678 for namespace in keys:
679 if namespace == 'std': continue # Don't try to write std classes
680 filename = '{}.h'.format(headerNameToFilename(namespace))
681 if namespace == 'global':
682 filename = '{}.h'.format(headerNameToFilename(target))
683 outputHeader(namespaces[namespace], '{}/{}'.format(outputDir, filename))
684 display.progress(1)
685 display.finish()
686 setPrintIndent(False)
687
688
689if __name__ == "__main__":
690 parser = argparse.ArgumentParser(description='Extract interfaces (classes, functions, variables) from a GNU/Linux shared library')
691 parser.add_argument('-f', '--file', help="Target file",
692 dest="target", required=True)
693 parser.add_argument('-s', '--section-file', help="Section file (result from 'readelf --sections|c++filt')",
694 dest="section_file", required=True)
695 parser.add_argument('-S', '--symbol-file', help="Symbol file (result from 'readelf -sW|c++filt')",
696 dest="symbol_file", required=True)
697 parser.add_argument('-V', '--vtable-file', help="Dynamic vtable file (result from 'vtable-dumper --demangle|c++filt')",
698 dest="vtable_file")
699 parser.add_argument('-o', '--output-dir', help="output directory (default ./output)",
700 default="./output", dest="output_dir")
701 parser.add_argument('-c', '--clean-output-dir', help="Clean output directory before computing (instead update it)",
702 default=False, action="store_true", dest="clean_output")
703 parser.add_argument('-r', '--print-raw-virtual-table', help="Print raw virtual table (debug purpose)",
704 default=False, action="store_true", dest="raw_virtual_table")
705 args = parser.parse_args()
706
707 setPrintRawVirtualTable(args.raw_virtual_table)
708
709 print('Analyse {}'.format(args.target))
710
711 parseSectionFile(args.section_file)
712 parseSymbolFile(args.symbol_file)
713 parseTypeinfo()
714 parseStaticVtable(args.target)
715
716 if args.vtable_file:
717 parseDynamicVtable(args.vtable_file)
718 fixupInheritance()
719
720 addAllMembers()
721
722 if args.vtable_file:
723 fixBadClassAssertion()
724
725 analyseDependencies()
726 writeResult(args.target, args.output_dir, args.clean_output)
727
728 print('Result wrote in {}'.format(args.output_dir))

Archive Download this file

Branches