magik.py

A pure Python replacement for libmagic. Supports most libmagic features, plus

several additional features not provided by libmagic. Tailored specifically

for quickly searching blocks of data for multiple embedded signatures.

all = ['Magic ']

def init(self, **kwargs): # These are set by signature keyword tags. # Keyword tags can also set any other object attributes, # including those in binwalk.core.module.Result. self.jump = 0 self.many = False self.adjust = 0 self.strlen = 0 self.string = False self.invalid = False self.once = False self.overlap = False

    # These are set by code internally
    self.id = 0

    # Kwargs overrides the defaults set above
    super(self.__class__, self).__init__(**kwargs)

    self.valid = (not self.invalid)

class SignatureLine(object):

'''
Responsible for parsing signature lines from magic signature files.
'''

# Printed strings are truncated to this size
MAX_STRING_SIZE = 128

def __init__(self, line):
    '''
    Class constructor. Responsible for parsing a line from a signature file.
    @line - A line of text from the signature file.
    Returns None.
    '''
    self.tags = {}
    self.text = line
    self.regex = False

    # Split the line on any white space; for this to work, backslash-escaped
    # spaces ('\ ') are replaced with their escaped hex value ('\x20').
    #
    # [offset] [data type] [comparison value] [format string]
    # 0        belong      0x12345678         Foo file type,
    # >4       string      x                  file name: %s,
    parts = line.replace('\\ ', '\\x20').split(None, 3)

    # Sanity check on the split line
    if len(parts) not in [3, 4]:
        raise ParserException("Invalid signature line: '%s'" % line)

    # The indentation level is determined by the number of '>' characters at
    # the beginning of the signature line.
    self.level = parts[0].count('>')

    # Get rid of the indentation characters and try to convert the remaining
    # characters to an integer offset. This will fail if the offset is a complex
    # value (e.g., '(4.l+16)').
    self.offset = parts[0].replace('>', '')
    try:
        self.offset = int(self.offset, 0)
    except ValueError as e:
        pass

    # self.type is the specified data type ('belong', 'string', etc)
    self.type = parts[1]
    self.opvalue = None
    self.operator = None

    # Each data type can specify an additional operation to be performed on the
    # data being scanned before performing a comparison (e.g., 'belong&0xFF' will
    # AND the data with 0xFF before the comparison is performed).
    #
    # We support the following operators:
    for operator in ['&', '|', '*', '+', '-', '/', '~', '^']:
        # Look for each operator in self.type
        if operator in self.type:
            # If found, split self.type into the type and operator value
            (self.type, self.opvalue) = self.type.split(operator, 1)

            # Keep a record of the specified operator
            self.operator = operator

            # Try to convert the operator value into an integer. This works for
            # simple operator values, but not for complex types (e.g.,
            # '(4.l+12)').
            try:
                self.opvalue = int(self.opvalue, 0)
            except ValueError as e:
                pass

            # Only one operator type is supported, so break as soon as one
            # is found
            break

    # If the specified type starts with 'u' (e.g., 'ubelong'), then it is
    # unsigned; else, it is signed
    if self.type[0] == 'u':
        self.signed = False
        self.type = self.type[1:]
    else:
        self.signed = True

    # Big endian values start with 'be' ('belong'), little endian values start with 'le' ('lelong').
    # The struct module uses '>' to denote big endian and '<' to denote
    # little endian.
    if self.type.startswith('be'):
        self.type = self.type[2:]
        self.endianess = '>'
    elif self.type.startswith('le'):
        self.endianess = '<'
        self.type = self.type[2:]
    # Assume big endian if no endianess was explicitly specified
    else:
        self.endianess = '>'

    # Check the comparison value for the type of comparison to be performed (e.g.,
    # '=0x1234', '>0x1234', etc). If no operator is specified, '=' is implied.
    if parts[2][0] in ['=', '!', '>', '<', '&', '|', '^', '~']:
        self.condition = parts[2][0]
        self.value = parts[2][1:]
    else:
        self.condition = '='
        self.value = parts[2]

    # If this is a wildcard value, explicitly set self.value to None
    if self.value == 'x':
        self.value = None
    # String values need to be decoded, as they may contain escape
    # characters (e.g., '\x20')
    elif self.type == 'string':
        # String types support multiplication to easily match large
        # repeating byte sequences
        if '*' in self.value:
            try:
                p = self.value.split('*')
                self.value = p[0]
                for n in p[1:]:
                    self.value *= int(n, 0)
            except KeyboardInterrupt as e:
                raise e
            except Exception as e:
                raise ParserException("Failed to expand string '%s' with integer '%s' in line '%s'" % (self.value, n, line))
        try:
            self.value = binwalk.core.compat.string_decode(self.value)
        except ValueError as e:
            raise ParserException("Failed to decode string value '%s' in line '%s'" % (self.value, line))
    # If a regex was specified, compile it
    elif self.type == 'regex':
        self.regex = True

        try:
            self.value = re.compile(self.value)
        except KeyboardInterrupt as e:
            raise e
        except Exception as e:
            raise ParserException("Invalid regular expression '%s': %s" % (self.value, str(e)))
    # Non-string types are integer values
    else:
        try:
            self.value = int(self.value, 0)
        except ValueError as e:
            raise ParserException("Failed to convert value '%s' to an integer on line '%s'" % (self.value, line))

    # Sanity check to make sure the first line of a signature has an
    # explicit value
    if self.level == 0 and self.value is None:
        raise ParserException("First element of a signature must specify a non-wildcard value: '%s'" % (line))

    # Set the size and struct format value for the specified data type.
    # This must be done, obviously, after the value has been parsed out
    # above.
    if self.type == 'string':
        # Strings don't have a struct format value, since they don't have
        # to be unpacked
        self.fmt = None

        # If a string type has a specific value, set the comparison size to
        # the length of that string
        if self.value:
            self.size = len(self.value)
        # Else, truncate the string to self.MAX_STRING_SIZE
        else:
            self.size = self.MAX_STRING_SIZE
    elif self.type == 'regex':
        # Regular expressions don't have a struct format value, since they
        # don't have to be unpacked
        self.fmt = None
        # The size of a matching regex is unknown until it is applied to
        # some data
        self.size = self.MAX_STRING_SIZE
    elif self.type == 'byte':
        self.fmt = 'b'
        self.size = 1
    elif self.type == 'short':
        self.fmt = 'h'
        self.size = 2
    elif self.type == 'quad':
        self.fmt = 'q'
        self.size = 8
    # Assume 4 byte length for all other supported data types
    elif self.type in ['long', 'date']:
        self.fmt = 'i'
        self.size = 4
    else:
        raise ParserException("Unknown data type '%s' in line '%s'" % (self.type, line))

    # The struct module uses the same characters for specifying signed and unsigned data types,
    # except that signed data types are upper case. The above if-else code sets self.fmt to the
    # lower case (unsigned) values.
    if not self.signed:
        self.fmt = self.fmt.upper()

    # If a struct format was identified, create a format string to be passed to struct.unpack
    # which specifies the endianess and data type format.
    if self.fmt:
        self.pkfmt = '%c%c' % (self.endianess, self.fmt)
    else:
        self.pkfmt = None

    # Check if a format string was specified (this is optional)
    if len(parts) == 4:
        # %lld formats are only supported if Python was built with HAVE_LONG_LONG
        self.format = parts[3].replace('%ll', '%l')

        # Regex to parse out tags, which are contained within curly braces
        retag = re.compile(r'\{.*?\}')

        # Parse out tag keywords from the format string
        for match in retag.finditer(self.format):
            # Get rid of the curly braces.
            tag = match.group().replace('{', '').replace('}', '')

            # If the tag specifies a value, it will be colon delimited
            # (e.g., '{name:%s}')
            if ':' in tag:
                (n, v) = tag.split(':', 1)
            else:
                n = tag
                v = True

            # Create a new SignatureTag instance and append it to self.tags
            self.tags[n] = v

        # Remove all tags from the printable format string
        self.format = retag.sub('', self.format).strip()
    else:
        self.format = ""

class Signature(object):

'''
Class to hold signature data and generate signature regular expressions.
'''

def __init__(self, id, first_line):
    '''
    Class constructor.
    @id         - A ID value to uniquely identify this signature.
    @first_line - The first SignatureLine of the signature (subsequent
                  SignatureLines should be added via self.append).
    Returns None.
    '''
    self.id = id
    self.lines = [first_line]
    self.title = first_line.format
    self.offset = first_line.offset
    self.regex = self._generate_regex(first_line)
    try:
        self.confidence = first_line.tags['confidence']
    except KeyError:
        self.confidence = first_line.size

def _generate_regex(self, line):
    '''
    Generates a regular expression from the magic bytes of a signature.
    The regex is used by Magic._analyze.
    @line - The first SignatureLine object of the signature.
    Returns a compile regular expression.
    '''
    restr = ""

    # Strings and single byte signatures are taken at face value;
    # multi-byte integer values are turned into regex strings based
    # on their data type size and endianess.
    if line.type == 'regex':
        # Regex types are already compiled expressions.
        # Note that since re.finditer is used, unless the specified
        # regex accounts for it, overlapping signatures will be ignored.
        return line.value
    if line.type == 'string':
        restr = line.value
    elif line.size == 1:
        restr = chr(line.value)
    elif line.size == 2:
        if line.endianess == '<':
            restr = chr(line.value & 0xFF) + chr(line.value >> 8)
        elif line.endianess == '>':
            restr = chr(line.value >> 8) + chr(line.value & 0xFF)
    elif line.size == 4:
        if line.endianess == '<':
            restr = (chr(line.value & 0xFF) +
                     chr((line.value >> 8) & 0xFF) +
                     chr((line.value >> 16) & 0xFF) +
                     chr(line.value >> 24))
        elif line.endianess == '>':
            restr = (chr(line.value >> 24) +
                     chr((line.value >> 16) & 0xFF) +
                     chr((line.value >> 8) & 0xFF) +
                     chr(line.value & 0xFF))
    elif line.size == 8:
        if line.endianess == '<':
            restr = (chr(line.value & 0xFF) +
                     chr((line.value >> 8) & 0xFF) +
                     chr((line.value >> 16) & 0xFF) +
                     chr((line.value >> 24) & 0xFF) +
                     chr((line.value >> 32) & 0xFF) +
                     chr((line.value >> 40) & 0xFF) +
                     chr((line.value >> 48) & 0xFF) +
                     chr(line.value >> 56))
        elif line.endianess == '>':
            restr = (chr(line.value >> 56) +
                     chr((line.value >> 48) & 0xFF) +
                     chr((line.value >> 40) & 0xFF) +
                     chr((line.value >> 32) & 0xFF) +
                     chr((line.value >> 24) & 0xFF) +
                     chr((line.value >> 16) & 0xFF) +
                     chr((line.value >> 8) & 0xFF) +
                     chr(line.value & 0xFF))

    # Since re.finditer is used on a per-signature basis, signatures should be crafted carefully
    # to ensure that they aren't potentially self-overlapping (e.g., a signature of "ABCDAB" could
    # be confused by the byte sequence "ABCDABCDAB"). The longer the signature, the less likely an
    # unintentional overlap is, although files could still be maliciously crafted to cause false
    # negative results.
    #
    # Thus, unless a signature has been explicitly marked as knowingly overlapping ('{overlap}'),
    # spit out a warning about any self-overlapping signatures.
    if not binwalk.core.compat.has_key(line.tags, 'overlap'):
        for i in range(1, line.size):
            if restr[i:] == restr[0:(line.size - i)]:
                binwalk.core.common.warning("Signature '%s' is a self-overlapping signature!" % line.text)
                break

    return re.compile(re.escape(restr))

def append(self, line):
    '''
    Add a new SignatureLine object to the signature.
    @line - A new SignatureLine instance.
    Returns None.
    '''
    # This method is kind of useless, but may be a nice wrapper for future
    # code.
    self.lines.append(line)

class Magic(object):

'''
Primary class for loading signature files and scanning
blocks of arbitrary data for matching signatures.
'''

def __init__(self, exclude=[], include=[], invalid=False):
    '''
    Class constructor.
    @include - A list of regex strings describing which signatures should be included in the scan results.
    @exclude - A list of regex strings describing which signatures should not be included in the scan results.
    @invalid - If set to True, invalid results will not be ignored.
    Returns None.
    '''
    # Used to save the block of data passed to self.scan (see additional
    # comments in self.scan)
    self.data = ""
    # A list of Signature class objects, populated by self.parse (see also:
    # self.load)
    self.signatures = []
    # A set of signatures with the 'once' keyword that have already been
    # displayed once
    self.display_once = set()
    self.dirty = True

    self.show_invalid = invalid
    self.includes = [re.compile(x) for x in include]
    self.excludes = [re.compile(x) for x in exclude]

    # Regex rule to replace backspace characters (an the preceeding character)
    # in formatted signature strings (see self._analyze).
    self.bspace = re.compile(".\\\\b")
    # Regex rule to match printable ASCII characters in formatted signature
    # strings (see self._analyze).
    self.printable = re.compile("[ -~]*")
    # Regex rule to find format strings
    self.fmtstr = re.compile("%[^%]")
    # Regex rule to find periods (see self._do_math)
    self.period = re.compile("\.")

def _filtered(self, text):
    '''
    Tests if a string should be filtered out or not.
    @text - The string to check against filter rules.
    Returns True if the string should be filtered out, i.e., not displayed.
    Returns False if the string should be displayed.
    '''
    filtered = None
    # Text is converted to lower case first, partially for historical
    # purposes, but also because it simplifies writing filter rules
    # (e.g., don't have to worry about case sensitivity).
    text = text.lower()

    for include in self.includes:
        if include.search(text):
            filtered = False
            break

    # If exclusive include filters have been specified and did
    # not match the text, then the text should be filtered out.
    if self.includes and filtered == None:
        return True

    for exclude in self.excludes:
        if exclude.search(text):
            filtered = True
            break

    # If no explicit exclude filters were matched, then the
    # text should *not* be filtered.
    if filtered == None:
        filtered = False

    return filtered

def _do_math(self, offset, expression):
    '''
    Parses and evaluates complex expressions, e.g., "(4.l+12)", "(6*32)", etc.
    @offset      - The offset inside self.data that the current signature starts at.
    @expressions - The expression to evaluate.
    Returns an integer value that is the result of the evaluated expression.
    '''
    # Does the expression contain an offset (e.g., "(4.l+12)")?
    if '.' in expression and '(' in expression:
        replacements = {}

        for period in [match.start() for match in self.period.finditer(expression)]:
            # Separate the offset field into the integer offset and type
            # values (o and t respsectively)
            s = expression[:period].rfind('(') + 1
            # The offset address may be an evaluatable expression, such as '(4+0.L)', typically the result
            # of the original offset being something like '(&0.L)'.
            o = binwalk.core.common.MathExpression(expression[s:period]).value
            t = expression[period + 1]

            # Re-build just the parsed offset portion of the expression
            text = "%s.%c" % (expression[s:period], t)

            # Have we already evaluated this offset expression? If so, skip
            # it.
            if binwalk.core.common.has_key(replacements, text):
                continue

            # The offset specified in the expression is relative to the
            # starting offset inside self.data
            o += offset

            # Read the value from self.data at the specified offset
            try:
                # Big and little endian byte format
                if t in ['b', 'B']:
                    v = struct.unpack('b', binwalk.core.compat.str2bytes(self.data[o:o + 1]))[0]
                # Little endian short format
                elif t == 's':
                    v = struct.unpack('<h', binwalk.core.compat.str2bytes(self.data[o:o + 2]))[0]
                # Little endian long format
                elif t == 'l':
                    v = struct.unpack('<i', binwalk.core.compat.str2bytes(self.data[o:o + 4]))[0]
                # Big endian short format
                elif t == 'S':
                    v = struct.unpack('>h', binwalk.core.compat.str2bytes(self.data[o:o + 2]))[0]
                # Bit endian long format
                elif t == 'L':
                    v = struct.unpack('>i', binwalk.core.compat.str2bytes(self.data[o:o + 4]))[0]
            # struct.error is thrown if there is not enough bytes in
            # self.data for the specified format type
            except struct.error as e:
                v = 0

            # Keep track of all the recovered values from self.data
            replacements[text] = v

        # Finally, replace all offset expressions with their corresponding
        # text value
        v = expression
        for (text, value) in binwalk.core.common.iterator(replacements):
            v = v.replace(text, "%d" % value)

    # If no offset, then it's just an evaluatable math expression (e.g.,
    # "(32+0x20)")
    else:
        v = expression

    # Evaluate the final expression
    value = binwalk.core.common.MathExpression(v).value

    return value

def _analyze(self, signature, offset):
    '''
    Analyzes self.data for the specified signature data at the specified offset .
    @signature - The signature to apply to the data.
    @offset    - The offset in self.data to apply the signature to.
    Returns a dictionary of tags parsed from the data.
    '''
    description = []
    tag_strlen = None
    max_line_level = 0
    previous_line_end = 0
    tags = {'id': signature.id, 'offset':
            offset, 'invalid': False, 'once': False}

    # Apply each line of the signature to self.data, starting at the
    # specified offset
    for n in range(0, len(signature.lines)):
        line = signature.lines[n]

        # Ignore indentation levels above the current max indent level
        if line.level <= max_line_level:
            # If the relative offset of this signature line is just an
            # integer value, use it
            if isinstance(line.offset, int):
                line_offset = line.offset
            # Else, evaluate the complex expression
            else:
                # Format the previous_line_end value into a string. Add the '+' sign to explicitly
                # state that this value is to be added to any subsequent values in the expression
                # (e.g., '&0' becomes '4+0').
                ple = '%d+' % previous_line_end
                # Allow users to use either the '&0' (libmagic) or '&+0' (explcit addition) sytaxes;
                # replace both with the ple text.
                line_offset_text = line.offset.replace('&+', ple).replace('&', ple)
                # Evaluate the expression
                line_offset = self._do_math(offset, line_offset_text)

            # Sanity check
            if not isinstance(line_offset, int):
                raise ParserException("Failed to convert offset '%s' to a number: '%s'" % (line.offset, line.text))

            # The start of the data needed by this line is at offset + line_offset.
            # The end of the data will be line.size bytes later.
            start = offset + line_offset
            end = start + line.size

            # If the line has a packed format string, unpack it
            if line.pkfmt:
                try:
                    dvalue = struct.unpack(line.pkfmt, binwalk.core.compat.str2bytes(self.data[start:end]))[0]
                # Not enough bytes left in self.data for the specified
                # format size
                except struct.error as e:
                    dvalue = 0
            # Else, this is a string
            else:
                # Wildcard strings have line.value == None
                if line.value is None:
                    # Check to see if this is a string whose size is known and has been specified on a previous
                    # signature line.
                    if binwalk.core.compat.has_key(tags, 'strlen') and binwalk.core.compat.has_key(line.tags, 'string'):
                        dvalue = self.data[start:(start + tags['strlen'])]
                    # Else, just terminate the string at the first newline,
                    # carriage return, or NULL byte
                    else:
                        dvalue = self.data[start:end].split('\x00')[0].split('\r')[0].split('\n')[0]
                # Non-wildcard strings have a known length, specified in
                # the signature line
                else:
                    dvalue = self.data[start:end]

            # Some integer values have special operations that need to be performed on them
            # before comparison (e.g., "belong&0x0000FFFF"). Complex math expressions are
            # supported here as well.
            # if isinstance(dvalue, int) and line.operator:
            if line.operator:
                try:
                    # If the operator value of this signature line is just
                    # an integer value, use it
                    if isinstance(line.opvalue, int) or isinstance(line.opvalue, long):
                        opval = line.opvalue
                    # Else, evaluate the complex expression
                    else:
                        opval = self._do_math(offset, line.opvalue)

                    # Perform the specified operation
                    if line.operator == '&':
                        dvalue &= opval
                    elif line.operator == '|':
                        dvalue |= opval
                    elif line.operator == '*':
                        dvalue *= opval
                    elif line.operator == '+':
                        dvalue += opval
                    elif line.operator == '-':
                        dvalue -= opval
                    elif line.operator == '/':
                        dvalue /= opval
                    elif line.operator == '~':
                        dvalue = ~opval
                    elif line.operator == '^':
                        dvalue ^= opval
                except KeyboardInterrupt as e:
                    raise e
                except Exception as e:
                    raise ParserException("Operation '" +
                                          str(dvalue) +
                                          " " +
                                          str(line.operator) +
                                          "= " +
                                          str(line.opvalue) +
                                          "' failed: " + str(e))

            # Does the data (dvalue) match the specified comparison?
            if ((line.value is None) or
                (line.regex and line.value.match(dvalue)) or
                (line.condition == '=' and dvalue == line.value) or
                (line.condition == '>' and dvalue > line.value) or
                (line.condition == '<' and dvalue < line.value) or
                (line.condition == '!' and dvalue != line.value) or
                (line.condition == '~' and (dvalue == ~line.value)) or
                (line.condition == '^' and (dvalue ^ line.value)) or
                (line.condition == '&' and (dvalue & line.value)) or
                    (line.condition == '|' and (dvalue | line.value))):

                # Up until this point, date fields are treated as integer values,
                # but we want to display them as nicely formatted strings.
                if line.type == 'date':
                    ts = datetime.datetime.utcfromtimestamp(dvalue)
                    dvalue = ts.strftime("%Y-%m-%d %H:%M:%S")

                # Generate the tuple for the format string
                dvalue_tuple = ()
                for x in self.fmtstr.finditer(line.format):
                    dvalue_tuple += (dvalue,)

                # Format the description string
                desc = line.format % dvalue_tuple

                # If there was any description string, append it to the
                # list of description string parts
                if desc:
                    description.append(desc)

                # Process tag keywords specified in the signature line. These have already been parsed out of the
                # original format string so that they can be processed
                # separately from the printed description string.
                for (tag_name, tag_value) in binwalk.core.compat.iterator(line.tags):
                    # If the tag value is a string, try to format it
                    if isinstance(tag_value, str):
                        # Generate the tuple for the format string
                        dvalue_tuple = ()
                        for x in self.fmtstr.finditer(tag_value):
                            dvalue_tuple += (dvalue,)

                        # Format the tag string
                        tags[tag_name] = tag_value % dvalue_tuple
                    # Else, just use the raw tag value
                    else:
                        tags[tag_name] = tag_value

                    # Some tag values are intended to be integer values, so
                    # try to convert them as such
                    try:
                        tags[tag_name] = int(tags[tag_name], 0)
                    except KeyboardInterrupt as e:
                        raise e
                    except Exception as e:
                        pass

                # Abort processing soon as this signature is marked invalid, unless invalid results
                # were explicitly requested. This means that the sooner invalid checks are made in a
                # given signature, the faster the scan can filter out false
                # positives.
                if not self.show_invalid and tags['invalid']:
                    break

                # Look ahead to the next line in the signature; if its indent level is greater than
                # that of the current line, then track the end of data for the current line. This is
                # so that subsequent lines can use the '>>&0' offset syntax to specify relative offsets
                # from previous lines.
                try:
                    next_line = signature.lines[n + 1]
                    if next_line.level > line.level:
                        if line.type == 'string':
                            previous_line_end = line_offset + len(dvalue)
                        else:
                            previous_line_end = line_offset + line.size
                except IndexError as e:
                    pass

                # If this line satisfied its comparison, +1 the max
                # indentation level
                max_line_level = line.level + 1
            else:
                # No match on the first line, abort
                if line.level == 0:
                    break
                else:
                    # If this line did not satisfy its comparison, then higher
                    # indentation levels will not be accepted.
                    max_line_level = line.level

    # Join the formatted description strings and remove backspace
    # characters (plus the preceeding character as well)
    tags['description'] = self.bspace.sub('', " ".join(description))

    # This should never happen
    if not tags['description']:
        tags['display'] = False
        tags['invalid'] = True

    # If the formatted string contains non-printable characters, consider
    # it invalid
    if self.printable.match(tags['description']).group() != tags['description']:
        tags['invalid'] = True

    return tags

def match(self, data):
    '''
    Match the beginning of a data buffer to a signature.
    @data - The data buffer to match against the loaded signature list.
    Returns a list of SignatureResult objects.
    '''
    return self.scan(data, 1)

def scan(self, data, dlen=None):
    '''
    Scan a data block for matching signatures.
    @data - A string of data to scan.
    @dlen - If specified, signatures at offsets larger than dlen will be ignored.
    Returns a list of SignatureResult objects.
    '''
    results = []
    matched_offsets = set()

    # Since data can potentially be quite a large string, make it available to other
    # methods via a class attribute so that it doesn't need to be passed around to
    # different methods over and over again.
    self.data = data

    # If dlen wasn't specified, search all of self.data
    if dlen is None:
        dlen = len(data)

    for signature in self.signatures:
        # Use regex to search the data block for potential signature
        # matches (fast)
        for match in signature.regex.finditer(data):
            # Take the offset of the start of the signature into account
            offset = match.start() - signature.offset

            # Signatures are ordered based on the length of their magic bytes (largest first).
            # If this offset has already been matched to a previous signature, ignore it unless
            # self.show_invalid has been specified. Also ignore obviously invalid offsets (<0)
            # as well as those outside the specified self.data range (dlen).
            if (offset not in matched_offsets or self.show_invalid) and offset >= 0 and offset < dlen:
            # if offset >= 0 and offset < dlen:
                # Analyze the data at this offset using the current
                # signature rule
                tags = self._analyze(signature, offset)

                # Generate a SignatureResult object and append it to the results list if the
                # signature is valid, or if invalid results were requested.
                if (not tags['invalid'] or self.show_invalid) and not self._filtered(tags['description']):
                    # Only display results with the 'once' tag once.
                    if tags['once']:
                        if signature.title in self.display_once:
                            continue
                        else:
                            self.display_once.add(signature.title)

                    # Append the result to the results list
                    results.append(SignatureResult(**tags))

                    # Add this offset to the matched_offsets set, so that it can be ignored by
                    # subsequent loops.
                    matched_offsets.add(offset)

    # Sort results by offset
    results.sort(key=lambda x: x.offset, reverse=False)

    return results

def load(self, fname):
    '''
    Load signatures from a file.
    @fname - Path to signature file.
    Returns None.
    '''
    # Magic files must be ASCII, else encoding issues can arise.
    fp = open(fname, "r")
    lines = fp.readlines()
    self.parse(lines)
    fp.close()

def parse(self, lines):
    '''
    Parse signature file lines.
    @lines - A list of lines from a signature file.
    Returns None.
    '''
    signature = None

    for line in lines:
        # Split at the first comment delimiter (if any) and strip the
        # result
        line = line.split('#')[0].strip()
        # Ignore blank lines and lines that are nothing but comments.
        # We also don't support the '!mime' style line entries.
        if line and line[0] != '!':
            # Parse this signature line
            sigline = SignatureLine(line)
            # Level 0 means the first line of a signature entry
            if sigline.level == 0:
                # If there is an existing signature, append it to the signature list,
                # unless the text in its title field has been filtered by user-defined
                # filter rules.
                if signature and not self._filtered(signature.title):
                    self.signatures.append(signature)

                # Create a new signature object; use the size of self.signatures to
                # assign each signature a unique ID.
                signature = Signature(len(self.signatures), sigline)
            # Else, just append this line to the existing signature
            elif signature:
                # signature.append(sigline)
                signature.lines.append(sigline)
            # If this is not the first line of a signature entry and there is no other
            # existing signature entry, something is very wrong with the
            # signature file.
            else:
                raise ParserException("Invalid signature line: '%s'" % line)

    # Add the final signature to the signature list
    if signature:
        if not self._filtered(signature.lines[0].format):
            self.signatures.append(signature)

    # Sort signatures by confidence (aka, length of their magic bytes),
    # largest first
    self.signatures.sort(key=lambda x: x.confidence, reverse=True)

Name		Name	Last commit message	Last commit date
Latest commit History 1 Commit
README.md		README.md

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Repository files navigation

magik.py

A pure Python replacement for libmagic. Supports most libmagic features, plus

several additional features not provided by libmagic. Tailored specifically

for quickly searching blocks of data for multiple embedded signatures.

About

Releases

Packages

Robertino10/magik.py

Folders and files

Latest commit

History

Repository files navigation

magik.py

A pure Python replacement for libmagic. Supports most libmagic features, plus

several additional features not provided by libmagic. Tailored specifically

for quickly searching blocks of data for multiple embedded signatures.

About

Resources

Stars

Watchers

Forks

Releases

Packages 0

Packages