2023-06-11 12:31:23 +02:00
|
|
|
# SPDX-License-Identifier: MIT
|
|
|
|
#
|
|
|
|
# Copyright The SCons Foundation
|
|
|
|
|
2024-04-15 15:35:35 +02:00
|
|
|
"""
|
|
|
|
SCons hash utility routines.
|
2023-06-11 12:31:23 +02:00
|
|
|
|
2024-04-15 15:35:35 +02:00
|
|
|
Routines for working with content and signature hashes.
|
2023-06-11 12:31:23 +02:00
|
|
|
"""
|
|
|
|
|
2024-04-15 15:35:35 +02:00
|
|
|
import functools
|
2023-06-11 12:31:23 +02:00
|
|
|
import hashlib
|
|
|
|
import sys
|
2024-04-15 15:35:35 +02:00
|
|
|
from typing import Optional, Union
|
2023-06-11 12:31:23 +02:00
|
|
|
|
2024-04-15 15:35:35 +02:00
|
|
|
from .sctypes import to_bytes
|
2023-06-11 12:31:23 +02:00
|
|
|
|
|
|
|
|
|
|
|
# Default hash function and format. SCons-internal.
|
|
|
|
DEFAULT_HASH_FORMATS = ['md5', 'sha1', 'sha256']
|
|
|
|
ALLOWED_HASH_FORMATS = []
|
|
|
|
_HASH_FUNCTION = None
|
|
|
|
_HASH_FORMAT = None
|
|
|
|
|
|
|
|
|
|
|
|
def _attempt_init_of_python_3_9_hash_object(hash_function_object, sys_used=sys):
|
|
|
|
"""Initialize hash function with non-security indicator.
|
|
|
|
|
|
|
|
In Python 3.9 and onwards, :mod:`hashlib` constructors accept a
|
|
|
|
keyword argument *usedforsecurity*, which, if set to ``False``,
|
|
|
|
lets us continue to use algorithms that have been deprecated either
|
|
|
|
by FIPS or by Python itself, as the MD5 algorithm SCons prefers is
|
|
|
|
not being used for security purposes as much as a short, 32 char
|
|
|
|
hash that is resistant to accidental collisions.
|
|
|
|
|
|
|
|
In prior versions of python, :mod:`hashlib` returns a native function
|
|
|
|
wrapper, which errors out when it's queried for the optional
|
|
|
|
parameter, so this function wraps that call.
|
|
|
|
|
|
|
|
It can still throw a ValueError if the initialization fails due to
|
|
|
|
FIPS compliance issues, but that is assumed to be the responsibility
|
|
|
|
of the caller.
|
|
|
|
"""
|
|
|
|
if hash_function_object is None:
|
|
|
|
return None
|
|
|
|
|
|
|
|
# https://stackoverflow.com/a/11887885 details how to check versions
|
|
|
|
# with the "packaging" library. However, for our purposes, checking
|
|
|
|
# the version is greater than or equal to 3.9 is good enough, as the API
|
|
|
|
# is guaranteed to have support for the 'usedforsecurity' flag in 3.9. See
|
|
|
|
# https://docs.python.org/3/library/hashlib.html#:~:text=usedforsecurity
|
|
|
|
# for the version support notes.
|
|
|
|
if (sys_used.version_info.major > 3) or (
|
|
|
|
sys_used.version_info.major == 3 and sys_used.version_info.minor >= 9
|
|
|
|
):
|
|
|
|
return hash_function_object(usedforsecurity=False)
|
|
|
|
|
|
|
|
# Note that this can throw a ValueError in FIPS-enabled versions of
|
|
|
|
# Linux prior to 3.9. The OpenSSL hashlib will throw on first init here,
|
|
|
|
# but it is assumed to be responsibility of the caller to diagnose the
|
|
|
|
# ValueError & potentially display the error to screen.
|
|
|
|
return hash_function_object()
|
|
|
|
|
|
|
|
|
|
|
|
def _set_allowed_viable_default_hashes(hashlib_used, sys_used=sys) -> None:
|
|
|
|
"""Check if the default hash algorithms can be called.
|
|
|
|
|
|
|
|
This util class is sometimes called prior to setting the
|
|
|
|
user-selected hash algorithm, meaning that on FIPS-compliant systems
|
|
|
|
the library would default-initialize MD5 and throw an exception in
|
|
|
|
set_hash_format. A common case is using the SConf options, which can
|
|
|
|
run prior to main, and thus ignore the options.hash_format variable.
|
|
|
|
|
|
|
|
This function checks the DEFAULT_HASH_FORMATS and sets the
|
|
|
|
ALLOWED_HASH_FORMATS to only the ones that can be called. In Python
|
|
|
|
>= 3.9 this will always default to MD5 as in Python 3.9 there is an
|
|
|
|
optional attribute "usedforsecurity" set for the method.
|
|
|
|
|
|
|
|
Throws if no allowed hash formats are detected.
|
|
|
|
"""
|
|
|
|
global ALLOWED_HASH_FORMATS
|
|
|
|
_last_error = None
|
|
|
|
# note: if you call this method repeatedly, example using timeout,
|
|
|
|
# this is needed. Otherwise it keeps appending valid formats to the string.
|
|
|
|
ALLOWED_HASH_FORMATS = []
|
|
|
|
|
|
|
|
for test_algorithm in DEFAULT_HASH_FORMATS:
|
|
|
|
_test_hash = getattr(hashlib_used, test_algorithm, None)
|
|
|
|
# we know hashlib claims to support it... check to see if we can call it.
|
|
|
|
if _test_hash is not None:
|
|
|
|
# The hashing library will throw an exception on initialization
|
|
|
|
# in FIPS mode, meaning if we call the default algorithm returned
|
|
|
|
# with no parameters, it'll throw if it's a bad algorithm,
|
|
|
|
# otherwise it will append it to the known good formats.
|
|
|
|
try:
|
|
|
|
_attempt_init_of_python_3_9_hash_object(_test_hash, sys_used)
|
|
|
|
ALLOWED_HASH_FORMATS.append(test_algorithm)
|
|
|
|
except ValueError as e:
|
|
|
|
_last_error = e
|
|
|
|
continue
|
|
|
|
|
|
|
|
if len(ALLOWED_HASH_FORMATS) == 0:
|
2024-04-15 15:35:35 +02:00
|
|
|
from SCons.Errors import ( # pylint: disable=import-outside-toplevel
|
2023-06-11 12:31:23 +02:00
|
|
|
SConsEnvironmentError,
|
2024-04-15 15:35:35 +02:00
|
|
|
)
|
2023-06-11 12:31:23 +02:00
|
|
|
|
|
|
|
# chain the exception thrown with the most recent error from hashlib.
|
|
|
|
raise SConsEnvironmentError(
|
|
|
|
'No usable hash algorithms found.'
|
|
|
|
'Most recent error from hashlib attached in trace.'
|
|
|
|
) from _last_error
|
|
|
|
|
|
|
|
|
|
|
|
_set_allowed_viable_default_hashes(hashlib)
|
|
|
|
|
|
|
|
|
|
|
|
def get_hash_format():
|
|
|
|
"""Retrieves the hash format or ``None`` if not overridden.
|
|
|
|
|
|
|
|
A return value of ``None``
|
|
|
|
does not guarantee that MD5 is being used; instead, it means that the
|
|
|
|
default precedence order documented in :func:`SCons.Util.set_hash_format`
|
|
|
|
is respected.
|
|
|
|
"""
|
|
|
|
return _HASH_FORMAT
|
|
|
|
|
|
|
|
|
|
|
|
def _attempt_get_hash_function(hash_name, hashlib_used=hashlib, sys_used=sys):
|
|
|
|
"""Wrapper used to try to initialize a hash function given.
|
|
|
|
|
|
|
|
If successful, returns the name of the hash function back to the user.
|
|
|
|
|
|
|
|
Otherwise returns None.
|
|
|
|
"""
|
|
|
|
try:
|
|
|
|
_fetch_hash = getattr(hashlib_used, hash_name, None)
|
|
|
|
if _fetch_hash is None:
|
|
|
|
return None
|
|
|
|
_attempt_init_of_python_3_9_hash_object(_fetch_hash, sys_used)
|
|
|
|
return hash_name
|
|
|
|
except ValueError:
|
|
|
|
# If attempt_init_of_python_3_9 throws, this is typically due to FIPS
|
|
|
|
# being enabled. However, if we get to this point, the viable hash
|
|
|
|
# function check has either been bypassed or otherwise failed to
|
|
|
|
# properly restrict the user to only the supported functions.
|
|
|
|
# As such throw the UserError as an internal assertion-like error.
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
def set_hash_format(hash_format, hashlib_used=hashlib, sys_used=sys):
|
|
|
|
"""Sets the default hash format used by SCons.
|
|
|
|
|
|
|
|
If `hash_format` is ``None`` or
|
|
|
|
an empty string, the default is determined by this function.
|
|
|
|
|
|
|
|
Currently the default behavior is to use the first available format of
|
|
|
|
the following options: MD5, SHA1, SHA256.
|
|
|
|
"""
|
|
|
|
global _HASH_FORMAT, _HASH_FUNCTION
|
|
|
|
|
|
|
|
_HASH_FORMAT = hash_format
|
|
|
|
if hash_format:
|
|
|
|
hash_format_lower = hash_format.lower()
|
|
|
|
if hash_format_lower not in ALLOWED_HASH_FORMATS:
|
2024-04-15 15:35:35 +02:00
|
|
|
from SCons.Errors import ( # pylint: disable=import-outside-toplevel
|
2023-06-11 12:31:23 +02:00
|
|
|
UserError,
|
2024-04-15 15:35:35 +02:00
|
|
|
)
|
2023-06-11 12:31:23 +02:00
|
|
|
|
|
|
|
# User can select something not supported by their OS but
|
|
|
|
# normally supported by SCons, example, selecting MD5 in an
|
|
|
|
# OS with FIPS-mode turned on. Therefore we first check if
|
|
|
|
# SCons supports it, and then if their local OS supports it.
|
|
|
|
if hash_format_lower in DEFAULT_HASH_FORMATS:
|
|
|
|
raise UserError(
|
|
|
|
'While hash format "%s" is supported by SCons, the '
|
|
|
|
'local system indicates only the following hash '
|
|
|
|
'formats are supported by the hashlib library: %s'
|
|
|
|
% (hash_format_lower, ', '.join(ALLOWED_HASH_FORMATS))
|
|
|
|
)
|
|
|
|
|
|
|
|
# The hash format isn't supported by SCons in any case.
|
|
|
|
# Warn the user, and if we detect that SCons supports more
|
|
|
|
# algorithms than their local system supports,
|
|
|
|
# warn the user about that too.
|
|
|
|
if ALLOWED_HASH_FORMATS == DEFAULT_HASH_FORMATS:
|
|
|
|
raise UserError(
|
|
|
|
'Hash format "%s" is not supported by SCons. Only '
|
|
|
|
'the following hash formats are supported: %s'
|
|
|
|
% (hash_format_lower, ', '.join(ALLOWED_HASH_FORMATS))
|
|
|
|
)
|
|
|
|
|
|
|
|
raise UserError(
|
|
|
|
'Hash format "%s" is not supported by SCons. '
|
|
|
|
'SCons supports more hash formats than your local system '
|
|
|
|
'is reporting; SCons supports: %s. Your local system only '
|
|
|
|
'supports: %s'
|
|
|
|
% (
|
|
|
|
hash_format_lower,
|
|
|
|
', '.join(DEFAULT_HASH_FORMATS),
|
|
|
|
', '.join(ALLOWED_HASH_FORMATS),
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
# This is not expected to fail. If this fails it means the
|
|
|
|
# set_allowed_viable_default_hashes function did not throw,
|
|
|
|
# or when it threw, the exception was caught and ignored, or
|
|
|
|
# the global ALLOWED_HASH_FORMATS was changed by an external user.
|
|
|
|
_HASH_FUNCTION = _attempt_get_hash_function(
|
|
|
|
hash_format_lower, hashlib_used, sys_used
|
|
|
|
)
|
|
|
|
|
|
|
|
if _HASH_FUNCTION is None:
|
2024-04-15 15:35:35 +02:00
|
|
|
from SCons.Errors import ( # pylint: disable=import-outside-toplevel
|
2023-06-11 12:31:23 +02:00
|
|
|
UserError,
|
2024-04-15 15:35:35 +02:00
|
|
|
)
|
2023-06-11 12:31:23 +02:00
|
|
|
|
|
|
|
raise UserError(
|
|
|
|
f'Hash format "{hash_format_lower}" is not available in your '
|
|
|
|
'Python interpreter. Expected to be supported algorithm by '
|
|
|
|
'set_allowed_viable_default_hashes. Assertion error in SCons.'
|
|
|
|
)
|
|
|
|
else:
|
|
|
|
# Set the default hash format based on what is available, defaulting
|
|
|
|
# to the first supported hash algorithm (usually md5) for backwards
|
|
|
|
# compatibility. In FIPS-compliant systems this usually defaults to
|
|
|
|
# SHA1, unless that too has been disabled.
|
|
|
|
for choice in ALLOWED_HASH_FORMATS:
|
|
|
|
_HASH_FUNCTION = _attempt_get_hash_function(choice, hashlib_used, sys_used)
|
|
|
|
|
|
|
|
if _HASH_FUNCTION is not None:
|
|
|
|
break
|
|
|
|
else:
|
|
|
|
# This is not expected to happen in practice.
|
2024-04-15 15:35:35 +02:00
|
|
|
from SCons.Errors import ( # pylint: disable=import-outside-toplevel
|
2023-06-11 12:31:23 +02:00
|
|
|
UserError,
|
2024-04-15 15:35:35 +02:00
|
|
|
)
|
2023-06-11 12:31:23 +02:00
|
|
|
|
|
|
|
raise UserError(
|
|
|
|
'Your Python interpreter does not have MD5, SHA1, or SHA256. '
|
|
|
|
'SCons requires at least one. Expected to support one or more '
|
|
|
|
'during set_allowed_viable_default_hashes.'
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# Ensure that this is initialized in case either:
|
|
|
|
# 1. This code is running in a unit test.
|
|
|
|
# 2. This code is running in a consumer that does hash operations while
|
|
|
|
# SConscript files are being loaded.
|
|
|
|
set_hash_format(None)
|
|
|
|
|
|
|
|
|
|
|
|
def get_current_hash_algorithm_used():
|
|
|
|
"""Returns the current hash algorithm name used.
|
|
|
|
|
|
|
|
Where the python version >= 3.9, this is expected to return md5.
|
|
|
|
If python's version is <= 3.8, this returns md5 on non-FIPS-mode platforms, and
|
|
|
|
sha1 or sha256 on FIPS-mode Linux platforms.
|
|
|
|
|
|
|
|
This function is primarily useful for testing, where one expects a value to be
|
|
|
|
one of N distinct hashes, and therefore the test needs to know which hash to select.
|
|
|
|
"""
|
|
|
|
return _HASH_FUNCTION
|
|
|
|
|
|
|
|
|
|
|
|
def _get_hash_object(hash_format, hashlib_used=hashlib, sys_used=sys):
|
|
|
|
"""Allocates a hash object using the requested hash format.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
hash_format: Hash format to use.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
hashlib object.
|
|
|
|
"""
|
|
|
|
if hash_format is None:
|
|
|
|
if _HASH_FUNCTION is None:
|
2024-04-15 15:35:35 +02:00
|
|
|
from SCons.Errors import ( # pylint: disable=import-outside-toplevel
|
2023-06-11 12:31:23 +02:00
|
|
|
UserError,
|
2024-04-15 15:35:35 +02:00
|
|
|
)
|
2023-06-11 12:31:23 +02:00
|
|
|
|
|
|
|
raise UserError(
|
|
|
|
'There is no default hash function. Did you call '
|
|
|
|
'a hashing function before SCons was initialized?'
|
|
|
|
)
|
|
|
|
return _attempt_init_of_python_3_9_hash_object(
|
|
|
|
getattr(hashlib_used, _HASH_FUNCTION, None), sys_used
|
|
|
|
)
|
|
|
|
|
|
|
|
if not hasattr(hashlib, hash_format):
|
|
|
|
from SCons.Errors import UserError # pylint: disable=import-outside-toplevel
|
|
|
|
|
|
|
|
raise UserError(
|
|
|
|
f'Hash format "{hash_format}" is not available in your Python interpreter.'
|
|
|
|
)
|
|
|
|
|
|
|
|
return _attempt_init_of_python_3_9_hash_object(
|
|
|
|
getattr(hashlib, hash_format), sys_used
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def hash_signature(s, hash_format=None):
|
|
|
|
"""
|
|
|
|
Generate hash signature of a string
|
|
|
|
|
|
|
|
Args:
|
|
|
|
s: either string or bytes. Normally should be bytes
|
|
|
|
hash_format: Specify to override default hash format
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
String of hex digits representing the signature
|
|
|
|
"""
|
|
|
|
m = _get_hash_object(hash_format)
|
|
|
|
try:
|
|
|
|
m.update(to_bytes(s))
|
|
|
|
except TypeError:
|
|
|
|
m.update(to_bytes(str(s)))
|
|
|
|
|
|
|
|
return m.hexdigest()
|
|
|
|
|
|
|
|
|
2024-04-15 15:35:35 +02:00
|
|
|
def hash_file_signature(fname, chunksize: int=65536, hash_format=None):
|
2023-06-11 12:31:23 +02:00
|
|
|
"""
|
|
|
|
Generate the md5 signature of a file
|
|
|
|
|
|
|
|
Args:
|
|
|
|
fname: file to hash
|
|
|
|
chunksize: chunk size to read
|
|
|
|
hash_format: Specify to override default hash format
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
String of Hex digits representing the signature
|
|
|
|
"""
|
|
|
|
|
|
|
|
m = _get_hash_object(hash_format)
|
|
|
|
with open(fname, "rb") as f:
|
|
|
|
while True:
|
|
|
|
blck = f.read(chunksize)
|
|
|
|
if not blck:
|
|
|
|
break
|
|
|
|
m.update(to_bytes(blck))
|
2024-04-15 15:35:35 +02:00
|
|
|
# TODO: can use this when base is Python 3.8+
|
|
|
|
# while (blk := f.read(chunksize)) != b'':
|
|
|
|
# m.update(to_bytes(blk))
|
|
|
|
|
2023-06-11 12:31:23 +02:00
|
|
|
return m.hexdigest()
|
|
|
|
|
|
|
|
|
|
|
|
def hash_collect(signatures, hash_format=None):
|
|
|
|
"""
|
|
|
|
Collects a list of signatures into an aggregate signature.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
signatures: a list of signatures
|
|
|
|
hash_format: Specify to override default hash format
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
the aggregate signature
|
|
|
|
"""
|
|
|
|
|
|
|
|
if len(signatures) == 1:
|
|
|
|
return signatures[0]
|
|
|
|
|
|
|
|
return hash_signature(', '.join(signatures), hash_format)
|
|
|
|
|
|
|
|
|
|
|
|
_MD5_WARNING_SHOWN = False
|
|
|
|
|
|
|
|
|
2024-04-15 15:35:35 +02:00
|
|
|
def _show_md5_warning(function_name) -> None:
|
2023-06-11 12:31:23 +02:00
|
|
|
"""Shows a deprecation warning for various MD5 functions."""
|
|
|
|
|
|
|
|
global _MD5_WARNING_SHOWN
|
|
|
|
|
|
|
|
if not _MD5_WARNING_SHOWN:
|
|
|
|
import SCons.Warnings # pylint: disable=import-outside-toplevel
|
|
|
|
|
|
|
|
SCons.Warnings.warn(
|
|
|
|
SCons.Warnings.DeprecatedWarning,
|
|
|
|
f"Function {function_name} is deprecated",
|
|
|
|
)
|
|
|
|
_MD5_WARNING_SHOWN = True
|
|
|
|
|
|
|
|
|
|
|
|
def MD5signature(s):
|
|
|
|
"""Deprecated. Use :func:`hash_signature` instead."""
|
|
|
|
|
|
|
|
_show_md5_warning("MD5signature")
|
|
|
|
return hash_signature(s)
|
|
|
|
|
|
|
|
|
2024-04-15 15:35:35 +02:00
|
|
|
def MD5filesignature(fname, chunksize: int=65536):
|
2023-06-11 12:31:23 +02:00
|
|
|
"""Deprecated. Use :func:`hash_file_signature` instead."""
|
|
|
|
|
|
|
|
_show_md5_warning("MD5filesignature")
|
|
|
|
return hash_file_signature(fname, chunksize)
|
|
|
|
|
|
|
|
|
|
|
|
def MD5collect(signatures):
|
|
|
|
"""Deprecated. Use :func:`hash_collect` instead."""
|
|
|
|
|
|
|
|
_show_md5_warning("MD5collect")
|
|
|
|
return hash_collect(signatures)
|
|
|
|
|
|
|
|
|
|
|
|
# Local Variables:
|
|
|
|
# tab-width:4
|
|
|
|
# indent-tabs-mode:nil
|
|
|
|
# End:
|
|
|
|
# vim: set expandtab tabstop=4 shiftwidth=4:
|