Skip to main content

Hash Identifier

#!/usr/bin/env python3

import re
import sys
import hashlib
from collections import defaultdict

def identify_hash(hash_string):
    """Identify the type of hash based on pattern, length, and character set."""
    
    # Clean the hash string
    hash_string = hash_string.strip()
    
    # Check for empty string
    if not hash_string:
        return "Empty string"
    
    # Check for common hash formats with special syntax
    if hash_string.startswith('$1$'):
        return "MD5 (Unix)"
    if hash_string.startswith('$2a$') or hash_string.startswith('$2b$') or hash_string.startswith('$2y$'):
        return "Bcrypt"
    if hash_string.startswith('$5$'):
        return "SHA-256 (Unix)"
    if hash_string.startswith('$6$'):
        return "SHA-512 (Unix)"
    if hash_string.startswith('$pbkdf2-sha256$'):
        return "PBKDF2-SHA256"
    if hash_string.startswith('$sha1$'):
        return "SHA-1 (Unix)"
    if hash_string.startswith('$pdf$'):
        return "PDF (Hashcat format)"
    if hash_string.startswith('$P$') or hash_string.startswith('$H$'):
        return "PHPass (WordPress/phpBB)"
    if hash_string.startswith('$apr1$'):
        return "APR1-MD5"
    if re.match(r'^[a-fA-F0-9]{32}:[a-fA-F0-9]{32}$', hash_string):
        return "MD5(Half:Salt)"
    
    # Check for common hash lengths
    hash_length = len(hash_string)
    possible_types = []
    
    # Check if the hash is hexadecimal
    if re.match(r'^[a-fA-F0-9]+$', hash_string):
        if hash_length == 32:
            possible_types.append("MD5")
            possible_types.append("MD4")
            possible_types.append("NTLM")
            possible_types.append("RIPEMD-128")
        elif hash_length == 40:
            possible_types.append("SHA-1")
            possible_types.append("RIPEMD-160")
        elif hash_length == 64:
            possible_types.append("SHA-256")
            possible_types.append("RIPEMD-256")
        elif hash_length == 96:
            possible_types.append("SHA-384")
        elif hash_length == 128:
            possible_types.append("SHA-512")
            possible_types.append("Whirlpool")
        elif hash_length == 16:
            possible_types.append("MySQL323")
            possible_types.append("DES(Oracle)")
        elif hash_length == 41 and hash_string.startswith('*'):
            possible_types.append("MySQL5")
        elif hash_length == 56:
            possible_types.append("SHA-224")
        elif hash_length == 8:
            possible_types.append("CRC32")
            possible_types.append("ADLER32")
    
    # Check for Base64 character set (with potential padding)
    if re.match(r'^[A-Za-z0-9+/]+={0,2}$', hash_string):
        if hash_length == 24:
            possible_types.append("MD5 (Base64)")
        elif hash_length == 28:
            possible_types.append("SHA-1 (Base64)")
        elif hash_length == 44:
            possible_types.append("SHA-256 (Base64)")
        elif hash_length == 88:
            possible_types.append("SHA-512 (Base64)")
        else:
            possible_types.append("Base64 encoded")
    
    # No specific hash type identified, give general suggestion
    if not possible_types:
        if re.match(r'^[a-fA-F0-9]+$', hash_string):
            return f"Unknown hash (Hexadecimal, {hash_length} chars)"
        else:
            return f"Unknown format (possibly not a standard hash, or custom format)"
    
    return " or ".join(possible_types)

def main():
    if len(sys.argv) != 2:
        print("Usage: python hash_identifier.py <hash_file>")
        sys.exit(1)
    
    hash_file = sys.argv[1]
    
    try:
        with open(hash_file, 'r') as f:
            lines = f.readlines()
        
        print(f"Analyzing {len(lines)} hashes from {hash_file}...\n")
        
        hash_types = defaultdict(int)
        
        for i, line in enumerate(lines, 1):
            hash_string = line.strip()
            if not hash_string or hash_string.startswith('#'):
                continue
                
            hash_type = identify_hash(hash_string)
            hash_types[hash_type] += 1
            
            # Print the first few and last few hash identifications
            if i <= 3 or i > len(lines) - 3:
                print(f"Line {i}: {hash_string[:40]}{'...' if len(hash_string) > 40 else ''} -> {hash_type}")
            elif i == 4 and len(lines) > 6:
                print(f"... ({len(lines) - 6} more hashes) ...")
        
        print("\nSummary of hash types:")
        for hash_type, count in sorted(hash_types.items(), key=lambda x: x[1], reverse=True):
            print(f"  {hash_type}: {count}")
        
    except FileNotFoundError:
        print(f"Error: File '{hash_file}' not found.")
        sys.exit(1)
    except Exception as e:
        print(f"Error: {e}")
        sys.exit(1)

if __name__ == "__main__":
    main()