Hash Identifier
#!/usr/bin/env python3
import re
import sys
import hashlib
from collections import defaultdict
def identify_hash(hash_string):
"""Identify the type of hash based on pattern, length, and character set."""
# Clean the hash string
hash_string = hash_string.strip()
# Check for empty string
if not hash_string:
return "Empty string"
# Check for common hash formats with special syntax
if hash_string.startswith('$1$'):
return "MD5 (Unix)"
if hash_string.startswith('$2a$') or hash_string.startswith('$2b$') or hash_string.startswith('$2y$'):
return "Bcrypt"
if hash_string.startswith('$5$'):
return "SHA-256 (Unix)"
if hash_string.startswith('$6$'):
return "SHA-512 (Unix)"
if hash_string.startswith('$pbkdf2-sha256$'):
return "PBKDF2-SHA256"
if hash_string.startswith('$sha1$'):
return "SHA-1 (Unix)"
if hash_string.startswith('$pdf$'):
return "PDF (Hashcat format)"
if hash_string.startswith('$P$') or hash_string.startswith('$H$'):
return "PHPass (WordPress/phpBB)"
if hash_string.startswith('$apr1$'):
return "APR1-MD5"
if re.match(r'^[a-fA-F0-9]{32}:[a-fA-F0-9]{32}$', hash_string):
return "MD5(Half:Salt)"
# Check for common hash lengths
hash_length = len(hash_string)
possible_types = []
# Check if the hash is hexadecimal
if re.match(r'^[a-fA-F0-9]+$', hash_string):
if hash_length == 32:
possible_types.append("MD5")
possible_types.append("MD4")
possible_types.append("NTLM")
possible_types.append("RIPEMD-128")
elif hash_length == 40:
possible_types.append("SHA-1")
possible_types.append("RIPEMD-160")
elif hash_length == 64:
possible_types.append("SHA-256")
possible_types.append("RIPEMD-256")
elif hash_length == 96:
possible_types.append("SHA-384")
elif hash_length == 128:
possible_types.append("SHA-512")
possible_types.append("Whirlpool")
elif hash_length == 16:
possible_types.append("MySQL323")
possible_types.append("DES(Oracle)")
elif hash_length == 41 and hash_string.startswith('*'):
possible_types.append("MySQL5")
elif hash_length == 56:
possible_types.append("SHA-224")
elif hash_length == 8:
possible_types.append("CRC32")
possible_types.append("ADLER32")
# Check for Base64 character set (with potential padding)
if re.match(r'^[A-Za-z0-9+/]+={0,2}$', hash_string):
if hash_length == 24:
possible_types.append("MD5 (Base64)")
elif hash_length == 28:
possible_types.append("SHA-1 (Base64)")
elif hash_length == 44:
possible_types.append("SHA-256 (Base64)")
elif hash_length == 88:
possible_types.append("SHA-512 (Base64)")
else:
possible_types.append("Base64 encoded")
# No specific hash type identified, give general suggestion
if not possible_types:
if re.match(r'^[a-fA-F0-9]+$', hash_string):
return f"Unknown hash (Hexadecimal, {hash_length} chars)"
else:
return f"Unknown format (possibly not a standard hash, or custom format)"
return " or ".join(possible_types)
def main():
if len(sys.argv) != 2:
print("Usage: python hash_identifier.py <hash_file>")
sys.exit(1)
hash_file = sys.argv[1]
try:
with open(hash_file, 'r') as f:
lines = f.readlines()
print(f"Analyzing {len(lines)} hashes from {hash_file}...\n")
hash_types = defaultdict(int)
for i, line in enumerate(lines, 1):
hash_string = line.strip()
if not hash_string or hash_string.startswith('#'):
continue
hash_type = identify_hash(hash_string)
hash_types[hash_type] += 1
# Print the first few and last few hash identifications
if i <= 3 or i > len(lines) - 3:
print(f"Line {i}: {hash_string[:40]}{'...' if len(hash_string) > 40 else ''} -> {hash_type}")
elif i == 4 and len(lines) > 6:
print(f"... ({len(lines) - 6} more hashes) ...")
print("\nSummary of hash types:")
for hash_type, count in sorted(hash_types.items(), key=lambda x: x[1], reverse=True):
print(f" {hash_type}: {count}")
except FileNotFoundError:
print(f"Error: File '{hash_file}' not found.")
sys.exit(1)
except Exception as e:
print(f"Error: {e}")
sys.exit(1)
if __name__ == "__main__":
main()