Scripts

Binary Parser
Compromised User Detector
GPG Verify
Hash Identifier
HMAC
HMAC Integrity Checker
Liber8tion Cracker
PDF to Hashcat
PDF to John
Steg
Binary Log Parser and Anomaly Detector

Binary Parser

import struct
import socket
import datetime
from collections import defaultdict, Counter

def parse_binary_logs(file_path):
    """Parse binary log file according to the specified format."""
    login_attempts = []
    
    with open(file_path, 'rb') as f:
        data = f.read()
        
    offset = 0
    while offset < len(data):
        # Read username length (4-byte integer, big-endian)
        username_length = struct.unpack('>I', data[offset:offset+4])[0]
        offset += 4
        
        # Read username (variable length string)
        username = data[offset:offset+username_length].decode('utf-8')
        offset += username_length
        
        # Read IPv4 address (4 bytes)
        ip_bytes = data[offset:offset+4]
        ip_address = socket.inet_ntoa(ip_bytes)
        offset += 4
        
        # Read timestamp (4-byte Unix timestamp)
        timestamp = struct.unpack('>I', data[offset:offset+4])[0]
        datetime_obj = datetime.datetime.fromtimestamp(timestamp, tz=datetime.timezone.utc)
        offset += 4
        
        # Read success flag (1-byte boolean)
        success = bool(data[offset])
        offset += 1
        
        # Store the parsed login attempt
        login_attempts.append({
            'username': username,
            'ip_address': ip_address,
            'timestamp': timestamp,
            'datetime': datetime_obj,
            'success': success
        })
    
    return login_attempts

def analyze_logs(login_attempts):
    """Basic analysis of the log data to extract key metrics."""
    # Get the earliest timestamp (start date of the log)
    earliest_timestamp = min(login_attempts, key=lambda x: x['timestamp'])['timestamp']
    start_date_utc = datetime.datetime.fromtimestamp(earliest_timestamp, tz=datetime.timezone.utc)
    
    # Count unique usernames
    unique_usernames = set(attempt['username'] for attempt in login_attempts)
    
    # Count unique IP addresses
    unique_ips = set(attempt['ip_address'] for attempt in login_attempts)
    
    # Count total login attempts
    total_attempts = len(login_attempts)
    
    return {
        'start_date_utc': start_date_utc,
        'total_attempts': total_attempts,
        'unique_usernames': len(unique_usernames),
        'unique_ips': len(unique_ips),
        'usernames': unique_usernames,
        'ip_addresses': unique_ips
    }

def analyze_login_patterns(login_attempts):
    """Analyze login patterns to identify potentially compromised users."""
    # Track login data per user
    user_data = defaultdict(lambda: {
        'ips': set(),
        'successful_logins': 0,
        'failed_logins': 0,
        'login_times': [],
        'success_after_failure': 0,
        'unique_ips_count': 0
    })
    
    # Group login attempts by user
    user_attempts = defaultdict(list)
    for attempt in login_attempts:
        user = attempt['username']
        user_attempts[user].append(attempt)
        
        # Track IPs used
        user_data[user]['ips'].add(attempt['ip_address'])
        
        # Track login success/failure
        if attempt['success']:
            user_data[user]['successful_logins'] += 1
        else:
            user_data[user]['failed_logins'] += 1
            
        # Track login times
        user_data[user]['login_times'].append(attempt['timestamp'])
    
    # Calculate additional metrics
    for user, attempts in user_attempts.items():
        # Sort attempts by timestamp
        sorted_attempts = sorted(attempts, key=lambda x: x['timestamp'])
        
        # Check for successful login after failures
        for i in range(1, len(sorted_attempts)):
            if (not sorted_attempts[i-1]['success'] and 
                sorted_attempts[i]['success'] and
                sorted_attempts[i]['ip_address'] != sorted_attempts[i-1]['ip_address']):
                user_data[user]['success_after_failure'] += 1
                
        # Count unique IPs
        user_data[user]['unique_ips_count'] = len(user_data[user]['ips'])
    
    # Identify suspicious users based on multiple criteria
    suspicious_users = []
    for user, data in user_data.items():
        suspicion_score = 0
        
        # Multiple IPs used (especially if significantly more than other users)
        if data['unique_ips_count'] > 3:
            suspicion_score += data['unique_ips_count']
        
        # High number of failed logins followed by successful ones from different IPs
        if data['success_after_failure'] > 0:
            suspicion_score += data['success_after_failure'] * 10
            
        # High ratio of failed to successful logins
        if data['successful_logins'] > 0:
            failure_ratio = data['failed_logins'] / data['successful_logins']
            if failure_ratio > 3:
                suspicion_score += failure_ratio
                
        if suspicion_score > 10:
            suspicious_users.append((user, suspicion_score, data))
    
    # Sort by suspicion score
    suspicious_users.sort(key=lambda x: x[1], reverse=True)
    
    return suspicious_users, user_data

def main():
    file_path = input("Enter the path to the binary log file: ")
    
    try:
        login_attempts = parse_binary_logs(file_path)
        print(f"Successfully parsed {len(login_attempts)} login attempts.")
        
        # Basic log analysis for the requested metrics
        log_metrics = analyze_logs(login_attempts)
        
        print("\n===== LOG METRICS =====")
        print(f"Start date of the log (UTC): {log_metrics['start_date_utc'].strftime('%Y-%m-%d %H:%M:%S UTC')}")
        print(f"Total login attempts recorded: {log_metrics['total_attempts']}")
        print(f"Number of unique usernames: {log_metrics['unique_usernames']}")
        print(f"Number of unique IP addresses: {log_metrics['unique_ips']}")
        
        # Advanced pattern analysis for suspicious activity
        suspicious_users, user_data = analyze_login_patterns(login_attempts)
        
        print("\n===== SUSPICIOUS ACTIVITY ANALYSIS =====")
        print(f"Total users analyzed: {len(user_data)}")
        
        if suspicious_users:
            print("\nPotentially compromised users (sorted by suspicion score):")
            for user, score, data in suspicious_users:
                print(f"\nUsername: {user} (Suspicion Score: {score:.2f})")
                print(f"  Unique IPs: {data['unique_ips_count']}")
                print(f"  Successful logins: {data['successful_logins']}")
                print(f"  Failed logins: {data['failed_logins']}")
                print(f"  Successful logins after failures from different IPs: {data['success_after_failure']}")
                print(f"  IP addresses used: {', '.join(data['ips'])}")
        else:
            print("\nNo suspicious users identified.")
        
    except Exception as e:
        print(f"Error processing the file: {e}")

if __name__ == "__main__":
    main()

Compromised User Detector

import struct
import socket
import datetime
from collections import defaultdict, Counter

def parse_binary_logs(file_path):
    """Parse binary log file according to the specified format."""
    login_attempts = []
    
    with open(file_path, 'rb') as f:
        data = f.read()
        
    offset = 0
    while offset < len(data):
        # Read username length (4-byte integer, big-endian)
        username_length = struct.unpack('>I', data[offset:offset+4])[0]
        offset += 4
        
        # Read username (variable length string)
        username = data[offset:offset+username_length].decode('utf-8')
        offset += username_length
        
        # Read IPv4 address (4 bytes)
        ip_bytes = data[offset:offset+4]
        ip_address = socket.inet_ntoa(ip_bytes)
        offset += 4
        
        # Read timestamp (4-byte Unix timestamp)
        timestamp = struct.unpack('>I', data[offset:offset+4])[0]
        datetime_obj = datetime.datetime.fromtimestamp(timestamp, tz=datetime.timezone.utc)
        offset += 4
        
        # Read success flag (1-byte boolean)
        success = bool(data[offset])
        offset += 1
        
        # Store the parsed login attempt
        login_attempts.append({
            'username': username,
            'ip_address': ip_address,
            'timestamp': timestamp,
            'datetime': datetime_obj,
            'success': success
        })
    
    return login_attempts

def analyze_logs(login_attempts):
    """Basic analysis of the log data to extract key metrics."""
    # Get the earliest timestamp (start date of the log)
    earliest_timestamp = min(login_attempts, key=lambda x: x['timestamp'])['timestamp']
    start_date_utc = datetime.datetime.fromtimestamp(earliest_timestamp, tz=datetime.timezone.utc)
    
    # Count unique usernames
    unique_usernames = set(attempt['username'] for attempt in login_attempts)
    
    # Count unique IP addresses
    unique_ips = set(attempt['ip_address'] for attempt in login_attempts)
    
    # Count total login attempts
    total_attempts = len(login_attempts)
    
    return {
        'start_date_utc': start_date_utc,
        'total_attempts': total_attempts,
        'unique_usernames': len(unique_usernames),
        'unique_ips': len(unique_ips),
        'usernames': unique_usernames,
        'ip_addresses': unique_ips
    }

def analyze_login_patterns(login_attempts):
    """Analyze login patterns to identify potentially compromised users."""
    # Track login data per user
    user_data = defaultdict(lambda: {
        'ips': set(),
        'successful_logins': 0,
        'failed_logins': 0,
        'login_times': [],
        'success_after_failure': 0,
        'unique_ips_count': 0,
        'unusual_hour_logins': 0,
        'successful_after_multiple_failures': 0,
        'geographic_anomalies': 0
    })
    
    # Group login attempts by user
    user_attempts = defaultdict(list)
    for attempt in login_attempts:
        user = attempt['username']
        user_attempts[user].append(attempt)
        
        # Track IPs used
        user_data[user]['ips'].add(attempt['ip_address'])
        
        # Track login success/failure
        if attempt['success']:
            user_data[user]['successful_logins'] += 1
        else:
            user_data[user]['failed_logins'] += 1
            
        # Track login times
        user_data[user]['login_times'].append(attempt['timestamp'])
        
        # Track unusual hour logins (between 1am and 5am)
        hour = attempt['datetime'].hour
        if 1 <= hour <= 5:
            user_data[user]['unusual_hour_logins'] += 1
    
    # Calculate additional metrics
    for user, attempts in user_attempts.items():
        # Sort attempts by timestamp
        sorted_attempts = sorted(attempts, key=lambda x: x['timestamp'])
        
        # Check for successful login after failures
        consecutive_failures = 0
        for i in range(1, len(sorted_attempts)):
            current = sorted_attempts[i]
            previous = sorted_attempts[i-1]
            
            # Check for IP hopping (successful login from different IP)
            if (not previous['success'] and 
                current['success'] and
                current['ip_address'] != previous['ip_address']):
                user_data[user]['success_after_failure'] += 1
                
            # Count consecutive failures
            if not previous['success']:
                consecutive_failures += 1
            else:
                consecutive_failures = 0
                
            # Successful login after multiple consecutive failures
            if current['success'] and consecutive_failures >= 3:
                user_data[user]['successful_after_multiple_failures'] += 1
                consecutive_failures = 0
                
        # Count unique IPs
        user_data[user]['unique_ips_count'] = len(user_data[user]['ips'])
        
        # Calculate rough geographic anomalies based on IP
        # This is simplified - in reality, you'd use IP geolocation
        if user_data[user]['unique_ips_count'] >= 3:
            ips_first_octet = [int(ip.split('.')[0]) for ip in user_data[user]['ips']]
            unique_first_octets = len(set(ips_first_octet))
            if unique_first_octets >= 2:  # Different network classes suggest geographic spread
                user_data[user]['geographic_anomalies'] += unique_first_octets
    
    # Calculate comprehensive suspicion scores
    suspicious_users = []
    for user, data in user_data.items():
        # Initialize base suspicion score
        suspicion_score = 0
        
        # Factor 1: Multiple IPs used
        ip_anomaly_score = (data['unique_ips_count'] - 1) * 5  # Expect one normal IP
        suspicion_score += max(0, ip_anomaly_score)
        
        # Factor 2: Successful logins after failures from different IPs
        suspicion_score += data['success_after_failure'] * 15
        
        # Factor 3: Failed login ratio
        if data['successful_logins'] > 0:
            failure_ratio = data['failed_logins'] / data['successful_logins']
            suspicion_score += min(failure_ratio * 3, 20)  # Cap at 20 points
        elif data['failed_logins'] > 5:  # Only failures, no successes
            suspicion_score += 20
            
        # Factor 4: Unusual hour logins
        suspicion_score += data['unusual_hour_logins'] * 2
        
        # Factor 5: Successful after multiple failures
        suspicion_score += data['successful_after_multiple_failures'] * 25
        
        # Factor 6: Geographic anomalies
        suspicion_score += data['geographic_anomalies'] * 8
        
        # Add context for this score
        reasons = []
        if data['unique_ips_count'] > 1:
            reasons.append(f"Used {data['unique_ips_count']} different IPs")
        if data['success_after_failure'] > 0:
            reasons.append(f"Successful login after failure from different IP: {data['success_after_failure']} times")
        if data['failed_logins'] > 3:
            reasons.append(f"High number of failed logins: {data['failed_logins']}")
        if data['unusual_hour_logins'] > 0:
            reasons.append(f"Logins during unusual hours: {data['unusual_hour_logins']}")
        if data['successful_after_multiple_failures'] > 0:
            reasons.append(f"Successful login after multiple failures: {data['successful_after_multiple_failures']}")
        if data['geographic_anomalies'] > 0:
            reasons.append(f"Potential geographic anomalies detected")
            
        suspicious_users.append((user, suspicion_score, data, reasons))
    
    # Sort by suspicion score
    suspicious_users.sort(key=lambda x: x[1], reverse=True)
    
    # Identify the most likely compromised user
    most_likely_compromised = suspicious_users[0][0] if suspicious_users else None
    
    return suspicious_users, user_data, most_likely_compromised

def main():
    file_path = input("Enter the path to the binary log file: ")
    
    try:
        login_attempts = parse_binary_logs(file_path)
        print(f"Successfully parsed {len(login_attempts)} login attempts.")
        
        # Basic log analysis for the requested metrics
        log_metrics = analyze_logs(login_attempts)
        
        print("\n===== LOG METRICS =====")
        print(f"Start date of the log (UTC): {log_metrics['start_date_utc'].strftime('%Y-%m-%d %H:%M:%S UTC')}")
        print(f"Total login attempts recorded: {log_metrics['total_attempts']}")
        print(f"Number of unique usernames: {log_metrics['unique_usernames']}")
        print(f"Number of unique IP addresses: {log_metrics['unique_ips']}")
        
        # Advanced pattern analysis for suspicious activity
        suspicious_users, user_data, most_likely_compromised = analyze_login_patterns(login_attempts)
        
        print("\n===== COMPROMISED USER IDENTIFICATION =====")
        if most_likely_compromised:
            print(f"\n🚨 MOST LIKELY COMPROMISED USER: {most_likely_compromised} 🚨")
            
            # Find this user in the suspicious_users list
            for user, score, data, reasons in suspicious_users:
                if user == most_likely_compromised:
                    print(f"Suspicion Score: {score:.2f}")
                    print("\nReasons for suspicion:")
                    for i, reason in enumerate(reasons, 1):
                        print(f"  {i}. {reason}")
                    print("\nDetailed metrics:")
                    print(f"  Unique IPs: {data['unique_ips_count']}")
                    print(f"  Successful logins: {data['successful_logins']}")
                    print(f"  Failed logins: {data['failed_logins']}")
                    print(f"  Logins during unusual hours: {data['unusual_hour_logins']}")
                    print(f"  IP addresses used: {', '.join(data['ips'])}")
                    break
        else:
            print("No compromised user identified.")
        
        print("\n===== OTHER SUSPICIOUS USERS =====")
        if len(suspicious_users) > 1:
            for user, score, data, reasons in suspicious_users[1:6]:  # Show top 5 after the most suspicious
                if score > 10:  # Only show users with meaningful suspicion scores
                    print(f"\nUsername: {user} (Suspicion Score: {score:.2f})")
                    print("Reasons for suspicion:")
                    for i, reason in enumerate(reasons, 1):
                        print(f"  {i}. {reason}")
        else:
            print("No other suspicious users identified.")
        
    except Exception as e:
        print(f"Error processing the file: {e}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    main()

GPG Verify

#!/bin/bash

echo "Verifying signature files in the current directory..."

for sig_file in *.sig; do
  if [[ -f "$sig_file" ]]; then
    original_file="${sig_file%.sig}"
    echo ""
    echo "Verifying signature for \"$original_file\" using \"$sig_file\"..."
    gpg --verify "$sig_file" "$original_file"
    if [ $? -ne 0 ]; then
      echo "[ERROR] Signature verification failed for \"$original_file\". The file may have been tampered with or the signature is invalid."
    else
      echo "[OK] Signature verification successful for \"$original_file\"."
    fi
  fi
done

echo ""
echo "Verification process complete."

Hash Identifier

#!/usr/bin/env python3

import re
import sys
import hashlib
from collections import defaultdict

def identify_hash(hash_string):
    """Identify the type of hash based on pattern, length, and character set."""
    
    # Clean the hash string
    hash_string = hash_string.strip()
    
    # Check for empty string
    if not hash_string:
        return "Empty string"
    
    # Check for common hash formats with special syntax
    if hash_string.startswith('$1$'):
        return "MD5 (Unix)"
    if hash_string.startswith('$2a$') or hash_string.startswith('$2b$') or hash_string.startswith('$2y$'):
        return "Bcrypt"
    if hash_string.startswith('$5$'):
        return "SHA-256 (Unix)"
    if hash_string.startswith('$6$'):
        return "SHA-512 (Unix)"
    if hash_string.startswith('$pbkdf2-sha256$'):
        return "PBKDF2-SHA256"
    if hash_string.startswith('$sha1$'):
        return "SHA-1 (Unix)"
    if hash_string.startswith('$pdf$'):
        return "PDF (Hashcat format)"
    if hash_string.startswith('$P$') or hash_string.startswith('$H$'):
        return "PHPass (WordPress/phpBB)"
    if hash_string.startswith('$apr1$'):
        return "APR1-MD5"
    if re.match(r'^[a-fA-F0-9]{32}:[a-fA-F0-9]{32}$', hash_string):
        return "MD5(Half:Salt)"
    
    # Check for common hash lengths
    hash_length = len(hash_string)
    possible_types = []
    
    # Check if the hash is hexadecimal
    if re.match(r'^[a-fA-F0-9]+$', hash_string):
        if hash_length == 32:
            possible_types.append("MD5")
            possible_types.append("MD4")
            possible_types.append("NTLM")
            possible_types.append("RIPEMD-128")
        elif hash_length == 40:
            possible_types.append("SHA-1")
            possible_types.append("RIPEMD-160")
        elif hash_length == 64:
            possible_types.append("SHA-256")
            possible_types.append("RIPEMD-256")
        elif hash_length == 96:
            possible_types.append("SHA-384")
        elif hash_length == 128:
            possible_types.append("SHA-512")
            possible_types.append("Whirlpool")
        elif hash_length == 16:
            possible_types.append("MySQL323")
            possible_types.append("DES(Oracle)")
        elif hash_length == 41 and hash_string.startswith('*'):
            possible_types.append("MySQL5")
        elif hash_length == 56:
            possible_types.append("SHA-224")
        elif hash_length == 8:
            possible_types.append("CRC32")
            possible_types.append("ADLER32")
    
    # Check for Base64 character set (with potential padding)
    if re.match(r'^[A-Za-z0-9+/]+={0,2}$', hash_string):
        if hash_length == 24:
            possible_types.append("MD5 (Base64)")
        elif hash_length == 28:
            possible_types.append("SHA-1 (Base64)")
        elif hash_length == 44:
            possible_types.append("SHA-256 (Base64)")
        elif hash_length == 88:
            possible_types.append("SHA-512 (Base64)")
        else:
            possible_types.append("Base64 encoded")
    
    # No specific hash type identified, give general suggestion
    if not possible_types:
        if re.match(r'^[a-fA-F0-9]+$', hash_string):
            return f"Unknown hash (Hexadecimal, {hash_length} chars)"
        else:
            return f"Unknown format (possibly not a standard hash, or custom format)"
    
    return " or ".join(possible_types)

def main():
    if len(sys.argv) != 2:
        print("Usage: python hash_identifier.py <hash_file>")
        sys.exit(1)
    
    hash_file = sys.argv[1]
    
    try:
        with open(hash_file, 'r') as f:
            lines = f.readlines()
        
        print(f"Analyzing {len(lines)} hashes from {hash_file}...\n")
        
        hash_types = defaultdict(int)
        
        for i, line in enumerate(lines, 1):
            hash_string = line.strip()
            if not hash_string or hash_string.startswith('#'):
                continue
                
            hash_type = identify_hash(hash_string)
            hash_types[hash_type] += 1
            
            # Print the first few and last few hash identifications
            if i <= 3 or i > len(lines) - 3:
                print(f"Line {i}: {hash_string[:40]}{'...' if len(hash_string) > 40 else ''} -> {hash_type}")
            elif i == 4 and len(lines) > 6:
                print(f"... ({len(lines) - 6} more hashes) ...")
        
        print("\nSummary of hash types:")
        for hash_type, count in sorted(hash_types.items(), key=lambda x: x[1], reverse=True):
            print(f"  {hash_type}: {count}")
        
    except FileNotFoundError:
        print(f"Error: File '{hash_file}' not found.")
        sys.exit(1)
    except Exception as e:
        print(f"Error: {e}")
        sys.exit(1)

if __name__ == "__main__":
    main()

HMAC

#!/usr/bin/env python3
"""
Simple HMAC Verification Script

This script verifies HMAC signatures for message files by:
1. Finding all message_#.txt and message_#.hmac file pairs
2. Checking each line to verify the HMAC integrity
3. Reporting only basic verification results without additional analysis

Usage:
    python simple_hmac_verify.py --directory <logs_directory> --key <hmac_key>
"""

import hmac
import hashlib
import os
import sys
import re
import glob
import argparse
from datetime import datetime

# The valid signing key
VALID_KEY = 'ciCloud-API-20240315-4f7b9c'

def calculate_hmac(message, key):
    """Calculate HMAC signature for a message."""
    key_bytes = key.encode('utf-8')
    message_bytes = message.encode('utf-8')
    signature = hmac.new(key_bytes, message_bytes, hashlib.sha256)
    return signature.hexdigest()

def verify_hmac(message, signature, key):
    """Verify if a message's HMAC signature is valid."""
    calculated_signature = calculate_hmac(message, key)
    return hmac.compare_digest(calculated_signature, signature)

def read_file(file_path):
    """Read a file and return its lines."""
    with open(file_path, 'r') as f:
        return [line.rstrip() for line in f.readlines()]

def find_file_pairs(directory):
    """Find matching message/HMAC file pairs in the directory."""
    file_pairs = []
    
    # Find all message_*.txt files
    message_files = glob.glob(os.path.join(directory, "message_*.txt"))
    
    for message_file in message_files:
        # Extract the number part
        match = re.search(r'message_(\d+)\.txt$', message_file)
        if match:
            number = match.group(1)
            hmac_file = os.path.join(directory, f"message_{number}.hmac")
            
            # Check if the corresponding HMAC file exists
            if os.path.exists(hmac_file):
                file_pairs.append((message_file, hmac_file))
    
    return file_pairs

def process_file_pair(message_file, hmac_file, key):
    """Process a single message/HMAC file pair."""
    # Extract file number for identification
    match = re.search(r'message_(\d+)\.txt$', message_file)
    file_id = match.group(1) if match else os.path.basename(message_file)
    
    try:
        # Read files
        message_lines = read_file(message_file)
        hmac_lines = read_file(hmac_file)
        
        total_lines = min(len(message_lines), len(hmac_lines))
        valid_lines = 0
        invalid_lines = 0
        mismatched_entries = []
        
        print(f"Processing file {file_id}: {os.path.basename(message_file)}")
        print(f"  - Total lines: {total_lines}")
        
        # Process each line
        for i in range(total_lines):
            message = message_lines[i]
            signature = hmac_lines[i]
            
            # Skip empty lines
            if not message or not signature:
                continue
                
            # Debug: Print first few characters of message and signature
            if i < 3:  # Just print a few examples for debugging
                print(f"  - Line {i+1} check:")
                print(f"    Message: {message[:30]}{'...' if len(message) > 30 else ''}")
                print(f"    Signature: {signature[:30]}{'...' if len(signature) > 30 else ''}")
                print(f"    Calculated: {calculate_hmac(message, key)[:30]}...")
            
            # Verify HMAC
            is_valid = verify_hmac(message, signature, key)
            
            if is_valid:
                valid_lines += 1
            else:
                invalid_lines += 1
                mismatched_entries.append({
                    'line': i + 1,
                    'message': message,
                    'provided_signature': signature,
                    'calculated_signature': calculate_hmac(message, key)
                })
        
        result = {
            'file_id': file_id,
            'message_file': message_file,
            'hmac_file': hmac_file,
            'total_lines': total_lines,
            'valid_lines': valid_lines,
            'invalid_lines': invalid_lines,
            'mismatched_entries': mismatched_entries[:10]  # Only include first 10 for brevity
        }
        
        print(f"  - Valid lines: {valid_lines}")
        print(f"  - Invalid lines: {invalid_lines}")
        print(f"  - Integrity: {'INTACT' if invalid_lines == 0 else 'COMPROMISED'}")
        print()
        
        return result
        
    except Exception as e:
        print(f"Error processing file pair ({message_file}, {hmac_file}): {e}")
        return {
            'file_id': file_id,
            'message_file': message_file,
            'hmac_file': hmac_file,
            'error': str(e)
        }

def main():
    """Main entry point for the script."""
    parser = argparse.ArgumentParser(description='Simple HMAC Verification')
    parser.add_argument('--directory', '-d', required=True, help='Directory containing log files')
    parser.add_argument('--key', '-k', default=VALID_KEY, help=f'HMAC signing key (default: {VALID_KEY})')
    parser.add_argument('--verbose', '-v', action='store_true', help='Enable verbose output')
    
    args = parser.parse_args()
    
    try:
        start_time = datetime.now()
        print(f"Starting HMAC verification in {args.directory}")
        print(f"Using key: {args.key}")
        print(f"Started at: {start_time.isoformat()}")
        print("-" * 60)
        
        # Find all file pairs
        file_pairs = find_file_pairs(args.directory)
        
        if not file_pairs:
            print(f"No matching message/HMAC file pairs found in {args.directory}")
            sys.exit(1)
        
        print(f"Found {len(file_pairs)} file pairs")
        print("-" * 60)
        
        # Process each file pair
        results = []
        total_files = len(file_pairs)
        files_with_errors = 0
        files_with_mismatches = 0
        total_lines_processed = 0
        total_mismatched_lines = 0
        
        for message_file, hmac_file in file_pairs:
            result = process_file_pair(message_file, hmac_file, args.key)
            results.append(result)
            
            if 'error' in result:
                files_with_errors += 1
            else:
                total_lines_processed += result['total_lines']
                total_mismatched_lines += result['invalid_lines']
                
                if result['invalid_lines'] > 0:
                    files_with_mismatches += 1
        
        # Summary
        print("-" * 60)
        print("VERIFICATION SUMMARY")
        print("-" * 60)
        print(f"Total file pairs processed: {total_files}")
        print(f"Files with errors: {files_with_errors}")
        print(f"Files with mismatched HMACs: {files_with_mismatches}")
        print(f"Total lines processed: {total_lines_processed}")
        print(f"Total mismatched lines: {total_mismatched_lines}")
        
        end_time = datetime.now()
        duration = end_time - start_time
        print(f"Duration: {duration.total_seconds():.2f} seconds")
        
        # List files with mismatches
        if files_with_mismatches > 0:
            print("\nFiles with mismatched HMACs:")
            for result in results:
                if 'invalid_lines' in result and result['invalid_lines'] > 0:
                    print(f"- {os.path.basename(result['message_file'])}: {result['invalid_lines']} mismatched lines")
                    
                    # Show example of first mismatched entry
                    if args.verbose and result['mismatched_entries']:
                        first_mismatch = result['mismatched_entries'][0]
                        print(f"  Example (line {first_mismatch['line']}):")
                        print(f"  Message: {first_mismatch['message'][:50]}...")
                        print(f"  Provided HMAC: {first_mismatch['provided_signature']}")
                        print(f"  Calculated HMAC: {first_mismatch['calculated_signature']}")
                        print()
        
    except Exception as e:
        print(f"Error: {e}")
        import traceback
        traceback.print_exc()
        sys.exit(1)

if __name__ == "__main__":
    main()

HMAC Integrity Checker

#!/usr/bin/env python3
"""
DNS Subdomain Batch Integrity Checker

This script processes multiple message/HMAC file pairs in a directory, following the pattern:
message_#.txt and message_#.hmac

It automatically detects and verifies all matching pairs in the specified directory,
generating a comprehensive report of integrity issues across all files.

Usage:
    python dns_batch_integrity.py --directory <logs_directory> --output <output_dir>
"""

import hmac
import hashlib
import sys
import os
import re
import argparse
import json
import glob
from datetime import datetime
from typing import Dict, List, Any, Tuple, Set

# The valid signing key
VALID_KEY = 'ciCloud-API-20240315-4f7b9c'

class DNSSubdomainBatchChecker:
    def __init__(self, key: str = VALID_KEY):
        """
        Initialize the DNS Subdomain Integrity Checker.
        
        Args:
            key: The HMAC signing key
        """
        self.key = key
        
        # Initialize common DNS patterns to check for tampering
        self.common_subdomains = {
            'www', 'mail', 'api', 'admin', 'portal', 'test', 'dev', 'staging', 
            'secure', 'vpn', 'internal', 'mx', 'smtp', 'pop', 'imap', 'webmail',
            'remote', 'cdn', 'dns', 'ns1', 'ns2', 'ldap', 'db', 'mysql', 'ftp'
        }
        
        # Suspicious TLDs often used in attacks
        self.suspicious_tlds = {
            'xyz', 'top', 'club', 'cyou', 'icu', 'rest', 'space', 'casa',
            'monster', 'bar', 'gq', 'tk', 'ml', 'cf', 'ga'
        }
        
        # Common character substitutions used in spoofing
        self.char_substitutions = {
            '0': 'o', 'o': '0',
            '1': 'l', 'l': '1', 'i': '1',
            '5': 's', 's': '5',
            '3': 'e', 'e': '3',
            '4': 'a', 'a': '4',
            '6': 'g', 'g': '6',
            '7': 't', 't': '7',
            '8': 'b', 'b': '8'
        }
    
    def calculate_hmac(self, message: str) -> str:
        """
        Calculate HMAC signature for a message.
        
        Args:
            message: The message to sign
            
        Returns:
            The HMAC signature (hex encoded)
        """
        key_bytes = self.key.encode('utf-8')
        message_bytes = message.encode('utf-8')
        signature = hmac.new(key_bytes, message_bytes, hashlib.sha256)
        return signature.hexdigest()
    
    def verify_hmac(self, message: str, signature: str) -> bool:
        """
        Verify if a message's HMAC signature is valid.
        
        Args:
            message: The message to verify
            signature: The provided HMAC signature
            
        Returns:
            True if signature is valid, False otherwise
        """
        calculated_signature = self.calculate_hmac(message)
        # Use constant-time comparison to prevent timing attacks
        return hmac.compare_digest(calculated_signature, signature)
    
    def read_file(self, file_path: str) -> List[str]:
        """
        Read a file and return its lines.
        
        Args:
            file_path: Path to the file
            
        Returns:
            List of lines from the file
        """
        with open(file_path, 'r') as f:
            return [line.rstrip() for line in f.readlines()]
    
    def find_file_pairs(self, directory: str) -> List[Tuple[str, str]]:
        """
        Find matching message/HMAC file pairs in the directory.
        
        Args:
            directory: Directory to search for files
            
        Returns:
            List of tuples (message_file_path, hmac_file_path)
        """
        file_pairs = []
        
        # Find all message_*.txt files
        message_files = glob.glob(os.path.join(directory, "message_*.txt"))
        
        for message_file in message_files:
            # Extract the number part
            match = re.search(r'message_(\d+)\.txt$', message_file)
            if match:
                number = match.group(1)
                hmac_file = os.path.join(directory, f"message_{number}.hmac")
                
                # Check if the corresponding HMAC file exists
                if os.path.exists(hmac_file):
                    file_pairs.append((message_file, hmac_file))
        
        return file_pairs
    
    def extract_domain_info(self, log_entry: str) -> Dict[str, Any]:
        """
        Extract domain and subdomain information from a log entry.
        
        Args:
            log_entry: A log entry string
            
        Returns:
            Dictionary with extracted domain information
        """
        domain_info = {
            'has_domain': False,
            'domain': '',
            'subdomain': '',
            'tld': ''
        }
        
        # Try to find domain patterns in the log entry
        # This regex looks for domain.tld or subdomain.domain.tld patterns
        domain_matches = re.findall(r'([a-zA-Z0-9][-a-zA-Z0-9]*(\.[a-zA-Z0-9][-a-zA-Z0-9]*)+)', log_entry)
        
        if domain_matches:
            domain_info['has_domain'] = True
            full_domain = domain_matches[0][0]
            domain_info['domain'] = full_domain
            
            # Split by dots to extract subdomain and TLD
            parts = full_domain.split('.')
            
            if len(parts) >= 2:
                domain_info['tld'] = parts[-1].lower()
                
                if len(parts) > 2:
                    domain_info['subdomain'] = '.'.join(parts[:-2])
        
        return domain_info
    
    def detect_tampering(self, log_entry: str) -> Dict[str, Any]:
        """
        Detect possible tampering in a DNS log entry.
        
        Args:
            log_entry: A log entry string
            
        Returns:
            Dictionary with tampering analysis
        """
        analysis = {
            'is_suspicious': False,
            'tampering_patterns': set(),
            'possible_original': '',
            'risk_level': 'low',
            'reasons': []
        }
        
        # Extract any domain information from the log entry
        domain_info = self.extract_domain_info(log_entry)
        
        if domain_info['has_domain']:
            # Check for suspicious TLDs
            if domain_info['tld'] in self.suspicious_tlds:
                analysis['is_suspicious'] = True
                analysis['tampering_patterns'].add('suspicious_tld')
                analysis['risk_level'] = 'medium'
                analysis['reasons'].append(f"Suspicious TLD found: .{domain_info['tld']}")
            
            # Check for subdomain issues
            if domain_info['subdomain']:
                subdomain = domain_info['subdomain']
                
                # Check for character substitutions
                for char in subdomain:
                    if char in self.char_substitutions:
                        analysis['is_suspicious'] = True
                        analysis['tampering_patterns'].add('character_substitution')
                        analysis['risk_level'] = 'high'
                        analysis['reasons'].append(f"Possible character substitution: '{char}' might be '{self.char_substitutions[char]}'")
                        
                        # Generate a possible original by replacing the character
                        possible_original = log_entry.replace(subdomain, 
                                                             subdomain.replace(char, self.char_substitutions[char]))
                        analysis['possible_original'] = possible_original
                
                # Check for similar but different subdomains
                for common_sub in self.common_subdomains:
                    if subdomain != common_sub and self.levenshtein_distance(subdomain, common_sub) <= 2:
                        analysis['is_suspicious'] = True
                        analysis['tampering_patterns'].add('similar_subdomain')
                        analysis['risk_level'] = 'high'
                        analysis['reasons'].append(f"Subdomain '{subdomain}' is suspiciously similar to common subdomain '{common_sub}'")
                        
                        # Generate a possible original version
                        possible_original = log_entry.replace(subdomain, common_sub)
                        analysis['possible_original'] = possible_original
                
                # Check for unusually long subdomains (potential data exfiltration)
                if len(subdomain) > 30:
                    analysis['is_suspicious'] = True
                    analysis['tampering_patterns'].add('exfiltration_subdomain')
                    analysis['risk_level'] = 'high'
                    analysis['reasons'].append(f"Unusually long subdomain (length: {len(subdomain)}) may indicate data exfiltration")
        
        # Check for IP address patterns
        ip_matches = re.findall(r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b', log_entry)
        if ip_matches:
            # Check for suspicious IP ranges
            for ip in ip_matches:
                octets = [int(octet) for octet in ip.split('.')]
                
                # Check for loopback or private IP misuse
                if octets[0] == 127 or (octets[0] == 10) or \
                   (octets[0] == 172 and 16 <= octets[1] <= 31) or \
                   (octets[0] == 192 and octets[1] == 168):
                    analysis['is_suspicious'] = True
                    analysis['tampering_patterns'].add('internal_ip_exposure')
                    analysis['risk_level'] = 'critical'
                    analysis['reasons'].append(f"Internal IP address exposed: {ip}")
        
        # Check for DNS record types and modifications
        record_types = ['A', 'AAAA', 'MX', 'CNAME', 'TXT', 'NS', 'SOA', 'SRV', 'PTR']
        for record_type in record_types:
            # Look for record type followed by manipulation indicators
            pattern = r'\b' + record_type + r'\s+(?:changed|modified|updated|deleted|removed|added)\b'
            if re.search(pattern, log_entry, re.IGNORECASE):
                analysis['is_suspicious'] = True
                analysis['tampering_patterns'].add('dns_record_modification')
                analysis['risk_level'] = 'high'
                analysis['reasons'].append(f"DNS {record_type} record modification detected")
        
        # Look for DNS amplification or reflection attack patterns
        if re.search(r'\b(?:amplification|reflection|flood|ddos)\b', log_entry, re.IGNORECASE) and domain_info['has_domain']:
            analysis['is_suspicious'] = True
            analysis['tampering_patterns'].add('dns_amplification')
            analysis['risk_level'] = 'critical'
            analysis['reasons'].append(f"Possible DNS amplification attack signature")
        
        # Update risk level based on number of patterns
        if len(analysis['tampering_patterns']) >= 3:
            analysis['risk_level'] = 'critical'
        elif len(analysis['tampering_patterns']) == 2:
            analysis['risk_level'] = 'high' if analysis['risk_level'] != 'critical' else 'critical'
        
        return analysis
    
    @staticmethod
    def levenshtein_distance(s1: str, s2: str) -> int:
        """
        Calculate the Levenshtein distance between two strings.
        
        Args:
            s1: First string
            s2: Second string
            
        Returns:
            The Levenshtein distance
        """
        if len(s1) < len(s2):
            return DNSSubdomainBatchChecker.levenshtein_distance(s2, s1)
        
        if len(s2) == 0:
            return len(s1)
        
        previous_row = range(len(s2) + 1)
        for i, c1 in enumerate(s1):
            current_row = [i + 1]
            for j, c2 in enumerate(s2):
                insertions = previous_row[j + 1] + 1
                deletions = current_row[j] + 1
                substitutions = previous_row[j] + (c1 != c2)
                current_row.append(min(insertions, deletions, substitutions))
            previous_row = current_row
        
        return previous_row[-1]
    
    def process_file_pair(self, message_file: str, hmac_file: str) -> Dict[str, Any]:
        """
        Process a single message/HMAC file pair.
        
        Args:
            message_file: Path to the message file
            hmac_file: Path to the HMAC file
            
        Returns:
            Dictionary with processing results
        """
        # Extract file number for identification
        match = re.search(r'message_(\d+)\.txt$', message_file)
        file_id = match.group(1) if match else os.path.basename(message_file)
        
        # Read files
        try:
            message_content = self.read_file(message_file)
            hmac_content = self.read_file(hmac_file)
            
            # Verify each line
            results = {
                'file_id': file_id,
                'message_file': message_file,
                'hmac_file': hmac_file,
                'total_lines': min(len(message_content), len(hmac_content)),
                'valid_lines': 0,
                'invalid_lines': 0,
                'suspicious_lines': 0,
                'invalid_entries': [],
                'tampering_summary': {
                    'patterns': {},
                    'risk_levels': {
                        'low': 0,
                        'medium': 0,
                        'high': 0,
                        'critical': 0
                    }
                }
            }
            
            # Process lines
            for i in range(min(len(message_content), len(hmac_content))):
                message = message_content[i]
                signature = hmac_content[i]
                
                # Skip empty lines
                if not message or not signature:
                    continue
                
                # Verify HMAC
                is_valid = self.verify_hmac(message, signature)
                
                if is_valid:
                    results['valid_lines'] += 1
                else:
                    results['invalid_lines'] += 1
                    
                    # Generate correct signature
                    correct_signature = self.calculate_hmac(message)
                    
                    # Check for tampering
                    tampering_analysis = self.detect_tampering(message)
                    
                    invalid_entry = {
                        'line_number': i + 1,
                        'message': message,
                        'provided_signature': signature,
                        'correct_signature': correct_signature,
                        'tampering_analysis': tampering_analysis
                    }
                    
                    results['invalid_entries'].append(invalid_entry)
                    
                    # Update tampering statistics
                    if tampering_analysis['is_suspicious']:
                        results['suspicious_lines'] += 1
                        results['tampering_summary']['risk_levels'][tampering_analysis['risk_level']] += 1
                        
                        # Count pattern occurrences
                        for pattern in tampering_analysis['tampering_patterns']:
                            if pattern not in results['tampering_summary']['patterns']:
                                results['tampering_summary']['patterns'][pattern] = 0
                            results['tampering_summary']['patterns'][pattern] += 1
            
            return results
            
        except Exception as e:
            print(f"Error processing file pair ({message_file}, {hmac_file}): {e}")
            return {
                'file_id': file_id,
                'message_file': message_file,
                'hmac_file': hmac_file,
                'error': str(e)
            }
    
    def process_directory(self, directory: str) -> Dict[str, Any]:
        """
        Process all matching file pairs in a directory.
        
        Args:
            directory: Directory containing message_*.txt and message_*.hmac files
            
        Returns:
            Dictionary with processing results for all files
        """
        # Find all matching file pairs
        file_pairs = self.find_file_pairs(directory)
        
        if not file_pairs:
            print(f"No matching message/HMAC file pairs found in {directory}")
            return {'error': 'No matching file pairs found'}
        
        # Process each file pair
        results = {
            'directory': directory,
            'total_files': len(file_pairs),
            'processed_files': 0,
            'files_with_errors': 0,
            'total_lines_processed': 0,
            'total_invalid_lines': 0,
            'total_suspicious_lines': 0,
            'file_results': [],
            'overall_tampering_summary': {
                'patterns': {},
                'risk_levels': {
                    'low': 0,
                    'medium': 0,
                    'high': 0,
                    'critical': 0
                }
            }
        }
        
        for message_file, hmac_file in file_pairs:
            print(f"Processing file pair: {os.path.basename(message_file)} and {os.path.basename(hmac_file)}")
            
            # Process file pair
            file_result = self.process_file_pair(message_file, hmac_file)
            results['file_results'].append(file_result)
            
            # Update overall statistics
            if 'error' in file_result:
                results['files_with_errors'] += 1
            else:
                results['processed_files'] += 1
                results['total_lines_processed'] += file_result['total_lines']
                results['total_invalid_lines'] += file_result['invalid_lines']
                results['total_suspicious_lines'] += file_result['suspicious_lines']
                
                # Aggregate tampering patterns
                for pattern, count in file_result['tampering_summary']['patterns'].items():
                    if pattern not in results['overall_tampering_summary']['patterns']:
                        results['overall_tampering_summary']['patterns'][pattern] = 0
                    results['overall_tampering_summary']['patterns'][pattern] += count
                
                # Aggregate risk levels
                for level in ['low', 'medium', 'high', 'critical']:
                    results['overall_tampering_summary']['risk_levels'][level] += \
                        file_result['tampering_summary']['risk_levels'][level]
        
        return results
    
    def save_corrected_hmac_files(self, results: Dict[str, Any], output_dir: str) -> None:
        """
        Save corrected HMAC files for each processed file pair.
        
        Args:
            results: Overall processing results
            output_dir: Output directory
        """
        corrected_dir = os.path.join(output_dir, 'corrected_hmac_files')
        os.makedirs(corrected_dir, exist_ok=True)
        
        for file_result in results['file_results']:
            if 'error' in file_result:
                continue
            
            # Get original file content
            message_file = file_result['message_file']
            hmac_file = file_result['hmac_file']
            
            try:
                # Read original message file
                message_content = self.read_file(message_file)
                
                # Create corrected HMAC file
                corrected_hmac_path = os.path.join(corrected_dir, os.path.basename(hmac_file))
                
                with open(corrected_hmac_path, 'w') as f:
                    for message in message_content:
                        if message:  # Skip empty lines
                            correct_signature = self.calculate_hmac(message)
                            f.write(f"{correct_signature}\n")
                
                print(f"Created corrected HMAC file: {corrected_hmac_path}")
                
            except Exception as e:
                print(f"Error creating corrected HMAC file for {os.path.basename(hmac_file)}: {e}")
    
    def save_results(self, results: Dict[str, Any], output_dir: str) -> None:
        """
        Save processing results to output files.
        
        Args:
            results: Overall processing results
            output_dir: Output directory
        """
        os.makedirs(output_dir, exist_ok=True)
        
        # Save overall JSON results
        with open(os.path.join(output_dir, 'batch_results.json'), 'w') as f:
            # Convert sets to lists for JSON serialization
            serializable_results = json.dumps(results, indent=2, default=lambda x: list(x) if isinstance(x, set) else x)
            f.write(serializable_results)
        
        # Save detailed report
        with open(os.path.join(output_dir, 'integrity_report.txt'), 'w') as f:
            f.write(f"DNS Subdomain Batch Integrity Report\n")
            f.write(f"==================================\n\n")
            f.write(f"Generated: {datetime.now().isoformat()}\n\n")
            
            f.write(f"Overall Summary:\n")
            f.write(f"---------------\n")
            f.write(f"Directory processed: {results['directory']}\n")
            f.write(f"Total file pairs: {results['total_files']}\n")
            f.write(f"Successfully processed: {results['processed_files']}\n")
            f.write(f"Files with errors: {results['files_with_errors']}\n")
            f.write(f"Total log lines processed: {results['total_lines_processed']}\n")
            f.write(f"Total invalid lines: {results['total_invalid_lines']}\n")
            f.write(f"Total suspicious lines: {results['total_suspicious_lines']}\n\n")
            
            # Risk level summary
            if results['total_suspicious_lines'] > 0:
                f.write(f"Risk Level Distribution:\n")
                for level in ['low', 'medium', 'high', 'critical']:
                    count = results['overall_tampering_summary']['risk_levels'][level]
                    indicator = '!' * (1 if level == 'low' else 2 if level == 'medium' else 3 if level == 'high' else 4)
                    f.write(f"  {indicator} {level.upper()}: {count}\n")
                
                f.write(f"\nTampering Patterns Detected:\n")
                for pattern, count in sorted(results['overall_tampering_summary']['patterns'].items(), 
                                           key=lambda x: x[1], reverse=True):
                    f.write(f"  - {pattern}: {count}\n")
            
            # Per-file summary
            f.write(f"\nPer-File Summary:\n")
            f.write(f"----------------\n")
            for file_result in results['file_results']:
                if 'error' in file_result:
                    f.write(f"File {file_result['file_id']}: ERROR - {file_result['error']}\n")
                else:
                    integrity_status = "COMPROMISED" if file_result['invalid_lines'] > 0 else "INTACT"
                    risk_level = "HIGH RISK" if (file_result['tampering_summary']['risk_levels']['high'] > 0 or 
                                              file_result['tampering_summary']['risk_levels']['critical'] > 0) else \
                                 "MEDIUM RISK" if file_result['tampering_summary']['risk_levels']['medium'] > 0 else \
                                 "LOW RISK" if file_result['suspicious_lines'] > 0 else "SAFE"
                    
                    f.write(f"File {file_result['file_id']}: {integrity_status} - {risk_level}\n")
                    f.write(f"  Message file: {os.path.basename(file_result['message_file'])}\n")
                    f.write(f"  Lines: {file_result['total_lines']} total, {file_result['invalid_lines']} invalid, {file_result['suspicious_lines']} suspicious\n")
                    
                    if file_result['suspicious_lines'] > 0:
                        # Show the first few suspicious entries
                        suspicious_entries = [entry for entry in file_result['invalid_entries'] 
                                           if entry['tampering_analysis']['is_suspicious']]
                        
                        f.write(f"  Top suspicious entries ({min(3, len(suspicious_entries))} of {len(suspicious_entries)}):\n")
                        for i, entry in enumerate(suspicious_entries[:3]):
                            f.write(f"    Line {entry['line_number']}: {entry['message'][:50]}{'...' if len(entry['message']) > 50 else ''}\n")
                            f.write(f"      Risk: {entry['tampering_analysis']['risk_level'].upper()}\n")
                            f.write(f"      Patterns: {', '.join(entry['tampering_analysis']['tampering_patterns'])}\n")
                    
                    f.write("\n")
        
        # Create a file with high-risk entries for immediate attention
        high_risk_entries = []
        for file_result in results['file_results']:
            if 'error' in file_result:
                continue
                
            file_id = file_result['file_id']
            for entry in file_result['invalid_entries']:
                if entry['tampering_analysis']['is_suspicious'] and \
                   entry['tampering_analysis']['risk_level'] in ['high', 'critical']:
                    entry_copy = entry.copy()
                    entry_copy['file_id'] = file_id
                    high_risk_entries.append(entry_copy)
        
        if high_risk_entries:
            with open(os.path.join(output_dir, 'high_risk_entries.txt'), 'w') as f:
                f.write(f"HIGH RISK DNS LOG ENTRIES - IMMEDIATE ATTENTION REQUIRED\n")
                f.write(f"======================================================\n\n")
                f.write(f"Generated: {datetime.now().isoformat()}\n")
                f.write(f"Total high-risk entries: {len(high_risk_entries)}\n\n")
                
                # Sort by risk level (critical first)
                high_risk_entries.sort(key=lambda x: 0 if x['tampering_analysis']['risk_level'] == 'critical' else 1)
                
                for entry in high_risk_entries:
                    f.write(f"File {entry['file_id']}, Line {entry['line_number']} - [{entry['tampering_analysis']['risk_level'].upper()}]\n")
                    f.write(f"  Message: {entry['message']}\n")
                    f.write(f"  Provided signature: {entry['provided_signature']}\n")
                    f.write(f"  Correct signature: {entry['correct_signature']}\n")
                    f.write(f"  Tampering patterns: {', '.join(entry['tampering_analysis']['tampering_patterns'])}\n")
                    f.write(f"  Reasons:\n")
                    for reason in entry['tampering_analysis']['reasons']:
                        f.write(f"    - {reason}\n")
                    
                    if entry['tampering_analysis']['possible_original']:
                        f.write(f"  Possible original: {entry['tampering_analysis']['possible_original']}\n")
                    
                    f.write("\n")
        
        # Save corrected HMAC files
        self.save_corrected_hmac_files(results, output_dir)

def main():
    """Main entry point for the script."""
    parser = argparse.ArgumentParser(description='DNS Subdomain Batch Integrity Checker')
    parser.add_argument('--directory', '-d', required=True, help='Directory containing log files')
    parser.add_argument('--output', '-o', default='batch_output', help='Output directory (default: batch_output)')
    parser.add_argument('--key', '-k', default=VALID_KEY, help=f'HMAC signing key (default: {VALID_KEY})')
    
    args = parser.parse_args()
    
    checker = DNSSubdomainBatchChecker(key=args.key)
    
    try:
        start_time = datetime.now()
        print(f"Starting batch processing of DNS log files in {args.directory}")
        print(f"Started at: {start_time.isoformat()}")
        
        results = checker.process_directory(args.directory)
        
        if 'error' in results:
            print(f"Error: {results['error']}")
            sys.exit(1)
        
        # Save results
        checker.save_results(results, args.output)
        
        end_time = datetime.now()
        duration = end_time - start_time
        
        print(f"\nBatch processing completed!")
        print(f"Duration: {duration.total_seconds():.2f} seconds")
        print(f"Files processed: {results['processed_files']} of {results['total_files']}")
        print(f"Total lines checked: {results['total_lines_processed']}")
        print(f"Invalid lines detected: {results['total_invalid_lines']}")
        print(f"Suspicious lines detected: {results['total_suspicious_lines']}")
        print(f"Results saved to: {args.output}")
        
        if results['total_suspicious_lines'] > 0:
            print(f"\n⚠️ WARNING: {results['total_suspicious_lines']} suspicious log entries detected!")
            high_risk = results['overall_tampering_summary']['risk_levels']['high'] + \
                       results['overall_tampering_summary']['risk_levels']['critical']
            
            if high_risk > 0:
                print(f"❗ CRITICAL: {high_risk} high or critical risk entries found!")
                print(f"Check {os.path.join(args.output, 'high_risk_entries.txt')} for details")
        
    except Exception as e:
        print(f"Error: {e}")
        import traceback
        traceback.print_exc()
        sys.exit(1)

if __name__ == "__main__":
    main()

Liber8tion Cracker

#!/usr/bin/env python3

import os
import argparse
import subprocess
import sys
import tempfile
import shutil

def run_hashcat(cmd, description):
    """Run a hashcat command with proper logging"""
    print(f"[+] {description}")
    print(f"[+] Command: {' '.join(cmd)}")
    try:
        subprocess.run(cmd, check=False)
    except Exception as e:
        print(f"[-] Error running hashcat: {e}")

def main():
    parser = argparse.ArgumentParser(description='Crack hashes using the Liber8ion Passphrase Standard')
    parser.add_argument('--hash-file', required=True, help='File containing hashes to crack')
    parser.add_argument('--hash-type', required=True, help='Hashcat hash type (e.g. 0 for MD5, 100 for SHA1)')
    parser.add_argument('--wordlist', default='/usr/share/wordlists/rockyou.txt', help='Dictionary wordlist')
    parser.add_argument('--output', default='cracked_passwords.txt', help='Output file for cracked passwords')
    args = parser.parse_args()

    # Create temporary directory
    temp_dir = tempfile.mkdtemp(prefix="liber8ion_")
    print(f"[+] Using temporary directory: {temp_dir}")

    # Path for the potfile
    potfile = os.path.join(temp_dir, "liber8ion.potfile")
    
    # Create a smaller dictionary with lowercase words
    print(f"[+] Creating optimized wordlist from {args.wordlist}...")
    lowercase_dict = os.path.join(temp_dir, "lowercase_dict.txt")
    uppercase_dict = os.path.join(temp_dir, "uppercase_dict.txt")
    
    try:
        # Take a reasonable subset to avoid memory issues
        with open(args.wordlist, 'r', encoding='latin-1', errors='ignore') as infile, \
             open(lowercase_dict, 'w') as lower_out, \
             open(uppercase_dict, 'w') as upper_out:
            for i, line in enumerate(infile):
                if i >= 100000:  # Limit to first 100k words
                    break
                word = line.strip()
                if word and len(word) >= 3 and len(word) <= 10:  # Filter reasonable word lengths
                    lower_out.write(f"{word.lower()}\n")
                    upper_out.write(f"{word.upper()}\n")
    except Exception as e:
        print(f"[-] Error processing wordlist: {e}")
        sys.exit(1)

    # Create file with digits
    digits_dict = os.path.join(temp_dir, "digits.txt")
    with open(digits_dict, 'w') as f:
        for i in range(10):
            f.write(f"{i}\n")
    
    # Create special character dictionaries
    hyphen_dict = os.path.join(temp_dir, "hyphen.txt")
    with open(hyphen_dict, 'w') as f:
        f.write("-\n")
    
    special_chars_dict = os.path.join(temp_dir, "special_chars.txt")
    with open(special_chars_dict, 'w') as f:
        for c in "!@#$%^&*()-_=+[]{}|;:,.<>?/":
            f.write(f"{c}\n")
    
    # Create liber8 file
    liber8_dict = os.path.join(temp_dir, "liber8.txt")
    with open(liber8_dict, 'w') as f:
        f.write("liber8\n")
    
    # Generate specific pattern dictionaries for each type
    print("[+] Generating pattern dictionaries...")
    
    # For Type 1 (All lowercase, hyphen separator)
    type1_patterns = os.path.join(temp_dir, "type1_patterns.txt")
    try:
        with open(lowercase_dict, 'r') as word_file, open(type1_patterns, 'w') as out_file:
            words = [w.strip() for w in word_file.readlines()]
            for word in words[:1000]:  # Limit to first 1000 words for efficient processing
                out_file.write(f"{word}-liber8-\n")
    except Exception as e:
        print(f"[-] Error generating Type 1 patterns: {e}")
    
    # Generate all types of patterns with special characters
    # For Types 2, 3, and 4
    special_chars = "!@#$%^&*()-_=+[]{}|;:,.<>?/"
    
    # Type 2 (All lowercase, any special char)
    type2_patterns = os.path.join(temp_dir, "type2_patterns.txt")
    try:
        with open(lowercase_dict, 'r') as word_file, open(type2_patterns, 'w') as out_file:
            words = [w.strip() for w in word_file.readlines()]
            for word in words[:500]:  # Limit to 500 words
                for special_char in special_chars:
                    out_file.write(f"{word}{special_char}liber8{special_char}\n")
    except Exception as e:
        print(f"[-] Error generating Type 2 patterns: {e}")
    
    # Type 3 lower patterns (lowercase first word, any special char)
    type3_lower_patterns = os.path.join(temp_dir, "type3_lower_patterns.txt")
    try:
        with open(lowercase_dict, 'r') as word_file, open(type3_lower_patterns, 'w') as out_file:
            words = [w.strip() for w in word_file.readlines()]
            for word in words[:500]:  # Limit to 500 words
                for special_char in special_chars:
                    out_file.write(f"{word}{special_char}liber8{special_char}\n")
    except Exception as e:
        print(f"[-] Error generating Type 3 lower patterns: {e}")
    
    # Type 3 upper patterns (uppercase first word, any special char)
    type3_upper_patterns = os.path.join(temp_dir, "type3_upper_patterns.txt")
    try:
        with open(uppercase_dict, 'r') as word_file, open(type3_upper_patterns, 'w') as out_file:
            words = [w.strip() for w in word_file.readlines()]
            for word in words[:500]:  # Limit to 500 words
                for special_char in special_chars:
                    out_file.write(f"{word}{special_char}liber8{special_char}\n")
    except Exception as e:
        print(f"[-] Error generating Type 3 upper patterns: {e}")
    
    # Type 4 digit patterns - with digits at end of first word
    type4_first_digit_patterns = os.path.join(temp_dir, "type4_first_digit_patterns.txt")
    try:
        with open(lowercase_dict, 'r') as word_file, open(type4_first_digit_patterns, 'w') as out_file:
            words = [w.strip() for w in word_file.readlines()]
            for word in words[:300]:  # Limit words
                for digit in range(10):
                    for special_char in special_chars[:5]:  # Limit special chars
                        out_file.write(f"{word}{digit}{special_char}liber8{special_char}\n")
    except Exception as e:
        print(f"[-] Error generating Type 4 first word digit patterns: {e}")
    
    print("\n[+] Starting hash cracking with Liber8ion Passphrase Standard...")

    # Type 1: word1-liber8-word2 (all lowercase, hyphen separators)
    print("\n[+] Cracking Type 1 passphrases...")
    cmd = [
        "hashcat", "-a0", f"-m{args.hash_type}", args.hash_file,
        type1_patterns, lowercase_dict,
        "--potfile-path", potfile
    ]
    run_hashcat(cmd, "Trying Type 1 patterns: word1-liber8-word2 (all lowercase)")

    # Type 2: word1<special>liber8<special>word2 (all lowercase)
    print("\n[+] Cracking Type 2 passphrases...")
    cmd = [
        "hashcat", "-a0", f"-m{args.hash_type}", args.hash_file,
        type2_patterns, lowercase_dict,
        "--potfile-path", potfile
    ]
    run_hashcat(cmd, "Trying Type 2 patterns: word1<special>liber8<special>word2 (all lowercase)")

    # Type 3: Each word all lowercase OR all uppercase
    print("\n[+] Cracking Type 3 passphrases - lowercase first word...")
    cmd = [
        "hashcat", "-a0", f"-m{args.hash_type}", args.hash_file,
        type3_lower_patterns, lowercase_dict,
        "--potfile-path", potfile
    ]
    run_hashcat(cmd, "Trying Type 3 patterns: lower<special>liber8<special>lower")
    
    cmd = [
        "hashcat", "-a0", f"-m{args.hash_type}", args.hash_file,
        type3_lower_patterns, uppercase_dict,
        "--potfile-path", potfile
    ]
    run_hashcat(cmd, "Trying Type 3 patterns: lower<special>liber8<special>UPPER")
    
    print("\n[+] Cracking Type 3 passphrases - uppercase first word...")
    cmd = [
        "hashcat", "-a0", f"-m{args.hash_type}", args.hash_file,
        type3_upper_patterns, lowercase_dict,
        "--potfile-path", potfile
    ]
    run_hashcat(cmd, "Trying Type 3 patterns: UPPER<special>liber8<special>lower")
    
    cmd = [
        "hashcat", "-a0", f"-m{args.hash_type}", args.hash_file,
        type3_upper_patterns, uppercase_dict,
        "--potfile-path", potfile
    ]
    run_hashcat(cmd, "Trying Type 3 patterns: UPPER<special>liber8<special>UPPER")

    # Type 4: One word with digit appended
    print("\n[+] Cracking Type 4 passphrases - first word with digit...")
    cmd = [
        "hashcat", "-a0", f"-m{args.hash_type}", args.hash_file,
        type4_first_digit_patterns, lowercase_dict,
        "--potfile-path", potfile
    ]
    run_hashcat(cmd, "Trying Type 4 patterns: word1+digit<special>liber8<special>word2")
    
    cmd = [
        "hashcat", "-a0", f"-m{args.hash_type}", args.hash_file,
        type4_first_digit_patterns, uppercase_dict,
        "--potfile-path", potfile
    ]
    run_hashcat(cmd, "Trying Type 4 patterns: word1+digit<special>liber8<special>WORD2")
    
    # Type 4 with second word with digit
    # For this, we'll use the Type 3 patterns but with a rule to append a digit
    print("\n[+] Cracking Type 4 passphrases - second word with digit...")
    
    # Create a digit append rule file
    append_digit_rule = os.path.join(temp_dir, "append_digit.rule")
    with open(append_digit_rule, 'w') as f:
        for i in range(10):
            f.write(f"${i}\n")
    
    # For lowercase second word with digit
    cmd = [
        "hashcat", "-a0", f"-m{args.hash_type}", args.hash_file,
        type3_lower_patterns, lowercase_dict,
        "-r", append_digit_rule,
        "--potfile-path", potfile
    ]
    run_hashcat(cmd, "Trying Type 4 patterns: word1<special>liber8<special>word2+digit")
    
    # For uppercase second word with digit
    cmd = [
        "hashcat", "-a0", f"-m{args.hash_type}", args.hash_file,
        type3_lower_patterns, uppercase_dict,
        "-r", append_digit_rule,
        "--potfile-path", potfile
    ]
    run_hashcat(cmd, "Trying Type 4 patterns: word1<special>liber8<special>WORD2+digit")
    
    # Same for uppercase first words
    cmd = [
        "hashcat", "-a0", f"-m{args.hash_type}", args.hash_file,
        type3_upper_patterns, lowercase_dict,
        "-r", append_digit_rule,
        "--potfile-path", potfile
    ]
    run_hashcat(cmd, "Trying Type 4 patterns: WORD1<special>liber8<special>word2+digit")
    
    cmd = [
        "hashcat", "-a0", f"-m{args.hash_type}", args.hash_file,
        type3_upper_patterns, uppercase_dict,
        "-r", append_digit_rule,
        "--potfile-path", potfile
    ]
    run_hashcat(cmd, "Trying Type 4 patterns: WORD1<special>liber8<special>WORD2+digit")

    # Process results to the output file
    print(f"\n[+] Processing results to {args.output}...")
    with open(potfile, 'r') as pot, open(args.output, 'w') as out:
        for line in pot:
            if ':' in line:
                hash_val, plaintext = line.strip().split(':', 1)
                out.write(f"{hash_val}:{plaintext}\n")
    
    print(f"\n[+] Cracking complete! Results saved to {args.output}")
    print(f"[+] To show your cracked passwords: cat {args.output}")

    # Ask if user wants to remove temp files
    response = input(f"\n[?] Remove temporary files in {temp_dir}? (y/n): ")
    if response.lower() == 'y':
        try:
            shutil.rmtree(temp_dir)
            print(f"[+] Temporary directory {temp_dir} removed")
        except Exception as e:
            print(f"[-] Error removing temporary directory: {e}")
    else:
        print(f"[+] Temporary files kept in {temp_dir}")

if __name__ == "__main__":
    main()

PDF to Hashcat

#!/usr/bin/env python

# Copyright (c) 2013 Shane Quigley, < shane at softwareontheside.info >

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

# modified to only output hash for hashcat by philsmd, 2015

import re
import sys
import os
from xml.dom import minidom

PY3 = sys.version_info[0] == 3

class PdfParser:
    def __init__(self, file_name):
        self.file_name = file_name
        f = open(file_name, 'rb')
        self.encrypted = f.read()
        f.close()
        self.process = True
        psr = re.compile(b'PDF-\d\.\d')
        try:
            self.pdf_spec = psr.findall(self.encrypted)[0]
        except IndexError:
            sys.stderr.write("%s is not a PDF file!\n" % file_name)
            self.process = False

    def parse(self):
        if not self.process:
            return

        try:
            trailer = self.get_trailer()
        except RuntimeError:
            e = sys.exc_info()[1]
            sys.stderr.write("%s : %s\n" % (self.file_name, str(e)))
            return
        # print >> sys.stderr, trailer
        object_id = self.get_object_id(b'Encrypt', trailer)
        # print >> sys.stderr, object_id
        if(len(object_id) == 0):
            raise RuntimeError("Could not find object id")
        encryption_dictionary = self.get_encryption_dictionary(object_id)
        # print >> sys.stderr, encryption_dictionary
        dr = re.compile(b'\d+')
        vr = re.compile(b'\/V \d')
        rr = re.compile(b'\/R \d')
        try:
            v = dr.findall(vr.findall(encryption_dictionary)[0])[0]
        except IndexError:
            raise RuntimeError("Could not find /V")
        r = dr.findall(rr.findall(encryption_dictionary)[0])[0]
        lr = re.compile(b'\/Length \d+')
        longest = 0
        # According to the docs:
        # Length : (Optional; PDF 1.4; only if V is 2 or 3). Default value: 40
        length = b'40'
        for le in lr.findall(encryption_dictionary):
            if(int(dr.findall(le)[0]) > longest):
                longest = int(dr.findall(le)[0])
                length = dr.findall(le)[0]
        pr = re.compile(b'\/P -?\d+')
        try:
            p = pr.findall(encryption_dictionary)[0]
        except IndexError:
                # print >> sys.stderr, "** dict:", encryption_dictionary
                raise RuntimeError("Could not find /P")
        pr = re.compile(b'-?\d+')
        p = pr.findall(p)[0]
        meta = '1' if self.is_meta_data_encrypted(encryption_dictionary) else '0'
        idr = re.compile(b'\/ID\s*\[\s*<\w+>\s*<\w+>\s*\]')
        try:
            i_d = idr.findall(trailer)[0] # id key word
        except IndexError:
            # some pdf files use () instead of <>
            idr = re.compile(b'\/ID\s*\[\s*\(\w+\)\s*\(\w+\)\s*\]')
            try:
                i_d = idr.findall(trailer)[0] # id key word
            except IndexError:
                # print >> sys.stderr, "** idr:", idr
                # print >> sys.stderr, "** trailer:", trailer
                raise RuntimeError("Could not find /ID tag")
                return
        idr = re.compile(b'<\w+>')
        try:
            i_d = idr.findall(trailer)[0]
        except IndexError:
            idr = re.compile(b'\(\w+\)')
            i_d = idr.findall(trailer)[0]
        i_d = i_d.replace(b'<',b'')
        i_d = i_d.replace(b'>',b'')
        i_d = i_d.lower()
        passwords = self.get_passwords_for_JtR(encryption_dictionary)
        output = '$pdf$'+v.decode('ascii')+'*'+r.decode('ascii')+'*'+length.decode('ascii')+'*'
        output += p.decode('ascii')+'*'+meta+'*'
        output += str(int(len(i_d)/2))+'*'+i_d.decode('ascii')+'*'+passwords
        sys.stdout.write("%s\n" % output.encode('UTF-8'))

    def get_passwords_for_JtR(self, encryption_dictionary):
        output = ""
        letters = [b"U", b"O"]
        if(b"1.7" in self.pdf_spec):
            letters = [b"U", b"O", b"UE", b"OE"]
        for let in letters:
            pr_str = b'\/' + let + b'\s*\([^)]+\)'
            pr = re.compile(pr_str)
            pas = pr.findall(encryption_dictionary)
            if(len(pas) > 0):
                pas = pr.findall(encryption_dictionary)[0]
                # because regexs in python suck <=== LOL
                while(pas[-2] == b'\\'):
                    pr_str += b'[^)]+\)'
                    pr = re.compile(pr_str)
                    # print >> sys.stderr, "pr_str:", pr_str
                    # print >> sys.stderr, encryption_dictionary
                    try:
                        pas = pr.findall(encryption_dictionary)[0]
                    except IndexError:
                        break
                output += self.get_password_from_byte_string(pas)+"*"
            else:
                pr = re.compile(let + b'\s*<\w+>')
                pas = pr.findall(encryption_dictionary)
                if not pas:
                    continue
                pas = pas[0]
                pr = re.compile(b'<\w+>')
                pas = pr.findall(pas)[0]
                pas = pas.replace(b"<",b"")
                pas = pas.replace(b">",b"")
                if PY3:
                    output += str(int(len(pas)/2))+'*'+str(pas.lower(),'ascii')+'*'
                else:
                    output += str(int(len(pas)/2))+'*'+pas.lower()+'*'
        return output[:-1]

    def is_meta_data_encrypted(self, encryption_dictionary):
        mr = re.compile(b'\/EncryptMetadata\s\w+')
        if(len(mr.findall(encryption_dictionary)) > 0):
            wr = re.compile(b'\w+')
            is_encrypted = wr.findall(mr.findall(encryption_dictionary)[0])[-1]
            if(is_encrypted == b"false"):
                return False
            else:
                return True
        else:
            return True

    def parse_meta_data(self, trailer):
        root_object_id = self.get_object_id(b'Root', trailer)
        root_object = self.get_pdf_object(root_object_id)
        object_id = self.get_object_id(b'Metadata', root_object)
        xmp_metadata_object = self.get_pdf_object(object_id)
        return self.get_xmp_values(xmp_metadata_object)

    def get_xmp_values(self, xmp_metadata_object):
        xmp_metadata_object = xmp_metadata_object.partition(b"stream")[2]
        xmp_metadata_object = xmp_metadata_object.partition(b"endstream")[0]
        try:
            xml_metadata = minidom.parseString(xmp_metadata_object)
        except:
            return ""
        values = []
        values.append(self.get_dc_value("title", xml_metadata))
        values.append(self.get_dc_value("creator", xml_metadata))
        values.append(self.get_dc_value("description", xml_metadata))
        values.append(self.get_dc_value("subject", xml_metadata))
        created_year = xml_metadata.getElementsByTagName("xmp:CreateDate")
        if(len(created_year) > 0):
            created_year = created_year[0].firstChild.data[0:4]
            values.append(str(created_year))
        return " ".join(values).replace(":", "")

    def get_dc_value(self, value, xml_metadata):
        output = xml_metadata.getElementsByTagName("dc:"+value)
        if(len(output) > 0):
            output = output[0]
            output = output.getElementsByTagName("rdf:li")[0]
            if(output.firstChild):
                output = output.firstChild.data
                return output
        return ""

    def get_encryption_dictionary(self, object_id):
        encryption_dictionary = self.get_pdf_object(object_id)
        for o in encryption_dictionary.split(b"endobj"):
            if(object_id+b" obj" in o):
                encryption_dictionary = o
        return encryption_dictionary

    def get_object_id(self, name , trailer):
        oir = re.compile(b'\/' + name + b'\s\d+\s\d\sR')
        try:
            object_id = oir.findall(trailer)[0]
        except IndexError:
            # print >> sys.stderr, " ** get_object_id: name \"", name, "\", trailer ", trailer
            return ""
        oir = re.compile(b'\d+ \d')
        object_id = oir.findall(object_id)[0]
        return object_id

    def get_pdf_object(self, object_id):
        output = object_id+b" obj" + \
            self.encrypted.partition(b"\r"+object_id+b" obj")[2]
        if(output == object_id+b" obj"):
            output = object_id+b" obj" + \
            self.encrypted.partition(b"\n"+object_id+b" obj")[2]
        output = output.partition(b"endobj")[0] + b"endobj"
        # print >> sys.stderr, output
        return output

    def get_trailer(self):
        trailer = self.get_data_between(b"trailer", b">>", b"/ID")
        if(trailer == b""):
            trailer = self.get_data_between(b"DecodeParms", b"stream", b"")
            if(trailer == ""):
                raise RuntimeError("Can't find trailer")
        if(trailer != "" and trailer.find(b"Encrypt") == -1):
            # print >> sys.stderr, trailer
            raise RuntimeError("File not encrypted")
        return trailer

    def get_data_between(self, s1, s2, tag):
        output = b""
        inside_first = False
        lines = re.split(b'\n|\r', self.encrypted)
        for line in lines:
            inside_first = inside_first or line.find(s1) != -1
            if(inside_first):
                output += line
                if(line.find(s2) != -1):
                    if(tag == b"" or output.find(tag) != -1):
                        break
                    else:
                        output = b""
                        inside_first = False
        return output

    def get_hex_byte(self, o_or_u, i):
        if PY3:
            return hex(o_or_u[i]).replace('0x', '')
        else:
            return hex(ord(o_or_u[i])).replace('0x', '')

    def get_password_from_byte_string(self, o_or_u):
        pas = ""
        escape_seq = False
        escapes = 0
        excluded_indexes = [0, 1, 2]
        #For UE & OE in 1.7 spec
        if not PY3:
            if(o_or_u[2] != '('):
                excluded_indexes.append(3)
        else:
            if(o_or_u[2] != 40):
                excluded_indexes.append(3)
        for i in range(len(o_or_u)):
            if(i not in excluded_indexes):
                if(len(self.get_hex_byte(o_or_u, i)) == 1 \
                   and o_or_u[i] != "\\"[0]):
                    pas += "0"  # need to be 2 digit hex numbers
                is_back_slash = True
                if not PY3:
                    is_back_slash = o_or_u[i] != "\\"[0]
                else:
                    is_back_slash = o_or_u[i] != 92
                if(is_back_slash or escape_seq):
                    if(escape_seq):
                        if not PY3:
                            esc = "\\"+o_or_u[i]
                        else:
                            esc = "\\"+chr(o_or_u[i])
                        esc = self.unescape(esc)
                        if(len(hex(ord(esc[0])).replace('0x', '')) == 1):
                            pas += "0"
                        pas += hex(ord(esc[0])).replace('0x', '')
                        escape_seq = False
                    else:
                        pas += self.get_hex_byte(o_or_u, i)
                else:
                    escape_seq = True
                    escapes += 1
        output = len(o_or_u)-(len(excluded_indexes)+1)-escapes
        return str(output)+'*'+pas[:-2]

    def unescape(self, esc):
        escape_seq_map = {'\\n':"\n", '\\s':"\s", '\\e':"\e",
                '\\r':"\r", '\\t':"\t", '\\v':"\v", '\\f':"\f",
                '\\b':"\b", '\\a':"\a", "\\)":")",
                "\\(":"(", "\\\\":"\\" }

        return escape_seq_map[esc]

if __name__ == "__main__":
    if len(sys.argv) < 2:
        sys.stderr.write("Usage: %s <PDF file(s)>\n" % \
                         os.path.basename(sys.argv[0]))
        sys.exit(-1)
    for j in range(1, len(sys.argv)):
        if not PY3:
            filename = sys.argv[j].decode('UTF-8')
        else:
            filename = sys.argv[j]
        # sys.stderr.write("Analyzing %s\n" % sys.argv[j].decode('UTF-8'))
        parser = PdfParser(filename)
        try:
            parser.parse()
        except RuntimeError:
            e = sys.exc_info()[1]
            sys.stderr.write("%s : %s\n" % (filename, str(e)))

PDF to John

#!/usr/bin/env python3

# This software is Copyright (c) 2023 Benjamin Dornel <benjamindornel@gmail.com>
# and it is hereby released to the general public under the following terms:
# Redistribution and use in source and binary forms, with or without
# modification, are permitted.


import argparse
import logging

try:
    from pyhanko.pdf_utils.misc import PdfReadError
    from pyhanko.pdf_utils.reader import PdfFileReader
except ImportError:
    print("pyhanko is missing, run 'pip install --user pyhanko==0.20.1' to install it!")
    exit(1)

logger = logging.getLogger(__name__)


class SecurityRevision:
    """Represents Standard Security Handler Revisions
    and the corresponding key length for the /O and /U entries

    In Revision 5, the /O and /U entries were extended to 48 bytes,
    with three logical parts -- a 32 byte verification hash,
    an 8 byte validation salt, and an 8 byte key salt."""

    revisions = {
        2: 32,  # RC4_BASIC
        3: 32,  # RC4_EXTENDED
        4: 32,  # RC4_OR_AES128
        5: 48,  # AES_R5_256
        6: 48,  # AES_256
    }

    @classmethod
    def get_key_length(cls, revision):
        """
        Get the key length for a given revision,
        defaults to 48 if no revision is specified.
        """
        return cls.revisions.get(revision, 48)


class PdfHashExtractor:
    """
    Extracts hash and encryption information from a PDF file

    Attributes:
    - `file_name`: PDF file path.
    - `strict`: Boolean that controls whether an error is raised, if a PDF
        has problems e.g. Multiple definitions in encryption dictionary
        for a specific key. Defaults to `False`.
    - `algorithm`: Encryption algorithm used by the standard security handler
    - `length`: The length of the encryption key, in bits. Defaults to 40.
    - `permissions`: User access permissions
    - `revision`: Revision of the standard security handler
    """

    def __init__(self, file_name: str, strict: bool = False):
        self.file_name = file_name

        with open(file_name, "rb") as doc:
            self.pdf = PdfFileReader(doc, strict=strict)
            self.encrypt_dict = self.pdf._get_encryption_params()

            if not self.encrypt_dict:
                raise RuntimeError("File not encrypted")

            self.algorithm: int = self.encrypt_dict.get("/V")
            self.length: int = self.encrypt_dict.get("/Length", 40)
            self.permissions: int = self.encrypt_dict["/P"]
            self.revision: int = self.encrypt_dict["/R"]

    @property
    def document_id(self) -> bytes:
        return self.pdf.document_id[0]

    @property
    def encrypt_metadata(self) -> str:
        """
        Get a string representation of whether metadata is encrypted.

        Returns "1" if metadata is encrypted, "0" otherwise.
        """
        return str(int(self.pdf.security_handler.encrypt_metadata))

    def parse(self) -> str:
        """
        Parse PDF encryption information into a formatted string for John
        """
        passwords = self.get_passwords()
        fields = [
            f"$pdf${self.algorithm}",
            self.revision,
            self.length,
            self.permissions,
            self.encrypt_metadata,
            len(self.document_id),
            self.document_id.hex(),
            passwords,
        ]
        return "*".join(map(str, fields))

    def get_passwords(self) -> str:
        """
        Creates a string consisting of the hexidecimal string of the
        /U, /O, /UE and /OE entries and their corresponding byte string length
        """
        passwords = []
        keys = ("udata", "odata", "oeseed", "ueseed")
        max_key_length = SecurityRevision.get_key_length(self.revision)

        for key in keys:
            if data := getattr(self.pdf.security_handler, key):
                data: bytes = data[:max_key_length]
                passwords.extend([str(len(data)), data.hex()])

        return "*".join(passwords)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="PDF Hash Extractor")
    parser.add_argument(
        "pdf_files", nargs="+", help="PDF file(s) to extract information from"
    )
    parser.add_argument(
        "-d", "--debug", action="store_true", help="Print the encryption dictionary"
    )
    args = parser.parse_args()

    for filename in args.pdf_files:
        try:
            extractor = PdfHashExtractor(filename)
            pdf_hash = extractor.parse()
            print(pdf_hash)

            if args.debug:
                if extractor.encrypt_dict:
                    print("Encryption Dictionary:")
                    for key, value in extractor.encrypt_dict.items():
                        print(f"{key}: {value}")
                else:
                    print("No encryption dictionary found in the PDF.")

        except PdfReadError as error:
            logger.error("%s : %s", filename, error, exc_info=True)

Steg

#!/usr/bin/env python3
"""
Steganography Extraction Tool

This script extracts hidden data from images using various steganography techniques.
Usage: python steg_extract.py <image_file>
"""

import sys
import os
import numpy as np
from PIL import Image
import binascii
import re
import zlib
import struct
from bitstring import BitArray

def extract_lsb(image_path, bit_depth=1):
    """Extract data hidden using LSB (Least Significant Bit) steganography."""
    try:
        img = Image.open(image_path)
        pixels = np.array(img)
        
        # Flatten the pixel array and extract LSBs
        flat_pixels = pixels.flatten()
        
        # Get the least significant bits
        bits = ""
        for pixel in flat_pixels:
            # Extract the specified number of least significant bits
            for i in range(bit_depth):
                bits += str((pixel >> i) & 1)
        
        # Convert bits to bytes
        bytes_data = BitArray(bin=bits).bytes
        
        # Try to find printable text
        printable_data = ""
        for i in range(len(bytes_data)):
            char = bytes_data[i:i+1]
            if 32 <= ord(char) <= 126 or ord(char) in (10, 13, 9):  # Printable ASCII or newline/tab
                printable_data += char.decode('ascii', errors='ignore')
            else:
                printable_data += '.'
        
        return {
            'raw_bits': bits[:100] + "...",  # First 100 bits
            'raw_bytes': binascii.hexlify(bytes_data[:50]).decode('ascii') + "...",  # First 50 bytes
            'possible_text': printable_data[:1000]  # First 1000 printable chars
        }
    except Exception as e:
        return {'error': f"LSB extraction failed: {str(e)}"}

def extract_metadata(image_path):
    """Extract metadata from the image that might contain hidden information."""
    try:
        img = Image.open(image_path)
        metadata = {}
        
        # Extract EXIF data if available
        if hasattr(img, '_getexif') and img._getexif():
            metadata['exif'] = str(img._getexif())
        
        # Extract other metadata
        metadata['format'] = img.format
        metadata['mode'] = img.mode
        metadata['info'] = str(img.info)
        
        return metadata
    except Exception as e:
        return {'error': f"Metadata extraction failed: {str(e)}"}

def extract_color_plane(image_path):
    """Extract data from color planes separately to find potential hidden information."""
    try:
        img = Image.open(image_path)
        if img.mode != 'RGB' and img.mode != 'RGBA':
            return {'error': "Not an RGB/RGBA image"}
        
        planes = {}
        pixels = np.array(img)
        
        # Extract red, green, blue planes
        if img.mode == 'RGB' or img.mode == 'RGBA':
            planes['red'] = pixels[:,:,0]
            planes['green'] = pixels[:,:,1]
            planes['blue'] = pixels[:,:,2]
        
        # Check for unusual patterns in each plane
        results = {}
        for plane_name, plane_data in planes.items():
            # Look for unusual distributions (e.g., even/odd patterns)
            even_count = np.sum(plane_data % 2 == 0)
            odd_count = np.sum(plane_data % 2 == 1)
            
            # If there's a significant imbalance, it might indicate steganography
            results[f"{plane_name}_analysis"] = {
                'even_pixels': even_count,
                'odd_pixels': odd_count,
                'imbalance': abs(even_count - odd_count) / (even_count + odd_count)
            }
            
            # Extract LSB from this color plane only
            bits = "".join([str(p & 1) for p in plane_data.flatten()])
            results[f"{plane_name}_lsb_sample"] = bits[:100] + "..."
        
        return results
    except Exception as e:
        return {'error': f"Color plane extraction failed: {str(e)}"}

def extract_hidden_files(image_path):
    """Look for embedded files using common signatures/headers."""
    try:
        with open(image_path, 'rb') as f:
            data = f.read()
        
        # Common file signatures to look for
        file_signatures = {
            b'\x50\x4B\x03\x04': 'ZIP',
            b'\x52\x61\x72\x21\x1A\x07': 'RAR',
            b'\x25\x50\x44\x46': 'PDF',
            b'\xFF\xD8\xFF': 'JPG',
            b'\x89\x50\x4E\x47': 'PNG',
            b'\x47\x49\x46\x38': 'GIF',
            b'\x7F\x45\x4C\x46': 'ELF',
            b'\xD0\xCF\x11\xE0': 'MS Office',
            b'\x50\x4B\x05\x06': 'ZIP (empty)',
            b'\x1F\x8B\x08': 'GZIP',
            b'\x42\x5A\x68': 'BZ2',
            b'\x75\x73\x74\x61\x72': 'TAR',
            b'\x49\x44\x33': 'MP3',
            b'\x4D\x5A': 'EXE',
        }
        
        found_files = []
        for signature, filetype in file_signatures.items():
            # Find all occurrences of the signature
            offsets = [m.start() for m in re.finditer(re.escape(signature), data)]
            for offset in offsets:
                found_files.append({
                    'type': filetype,
                    'offset': offset,
                    'signature': binascii.hexlify(signature).decode('ascii')
                })
        
        return found_files
    except Exception as e:
        return {'error': f"Hidden file extraction failed: {str(e)}"}

def extract_parity_steganography(image_path):
    """Check for parity-based steganography."""
    try:
        img = Image.open(image_path)
        pixels = np.array(img)
        
        # Count the parity of pixels in each row and column
        row_parity = np.sum(pixels.sum(axis=2) % 2, axis=1) % 2
        col_parity = np.sum(pixels.sum(axis=2) % 2, axis=0) % 2
        
        # Convert to binary strings (potentially hidden messages)
        row_message = "".join([str(int(bit)) for bit in row_parity])
        col_message = "".join([str(int(bit)) for bit in col_parity])
        
        return {
            'row_parity_bits': row_message,
            'col_parity_bits': col_message
        }
    except Exception as e:
        return {'error': f"Parity steganography extraction failed: {str(e)}"}

def extract_hidden_text(image_path):
    """Extract text from the image using several methods."""
    try:
        with open(image_path, 'rb') as f:
            data = f.read()
        
        # Look for ASCII/UTF-8 text patterns
        possible_strings = []
        ascii_regex = rb'[ -~\r\n\t]{8,}'  # 8+ printable ASCII chars
        for match in re.finditer(ascii_regex, data):
            possible_strings.append(match.group(0).decode('ascii', errors='ignore'))
        
        return {
            'possible_strings': possible_strings[:20]  # Return first 20 found strings
        }
    except Exception as e:
        return {'error': f"Text extraction failed: {str(e)}"}

def analyze_bit_distribution(image_path):
    """Analyze bit distribution for statistical anomalies."""
    try:
        img = Image.open(image_path)
        pixels = np.array(img)
        
        # Analyze distribution of each bit position
        bit_counts = []
        for bit_pos in range(8):
            mask = 1 << bit_pos
            bit_count = np.sum((pixels & mask) > 0)
            bit_counts.append(bit_count)
        
        total_bits = pixels.size * 8
        bit_frequencies = [count / total_bits for count in bit_counts]
        
        # Calculate deviation from expected 0.5 frequency
        deviations = [abs(freq - 0.5) for freq in bit_frequencies]
        
        return {
            'bit_frequencies': bit_frequencies,
            'deviations': deviations,
            'analysis': "High deviation in LSBs may indicate steganography"
        }
    except Exception as e:
        return {'error': f"Bit distribution analysis failed: {str(e)}"}

def extract_stegano_data(image_path):
    """Main function to extract steganographic data from an image."""
    results = {
        'filename': os.path.basename(image_path),
        'filesize': os.path.getsize(image_path)
    }
    
    # Run all extraction methods
    results['lsb_extraction'] = extract_lsb(image_path)
    results['lsb_extraction_2bit'] = extract_lsb(image_path, bit_depth=2)
    results['metadata'] = extract_metadata(image_path)
    results['color_planes'] = extract_color_plane(image_path)
    results['hidden_files'] = extract_hidden_files(image_path)
    results['parity_data'] = extract_parity_steganography(image_path)
    results['text_data'] = extract_hidden_text(image_path)
    results['bit_distribution'] = analyze_bit_distribution(image_path)
    
    return results

def save_extracted_data(results, original_image_path):
    """Save extracted data to files."""
    base_name = os.path.splitext(os.path.basename(original_image_path))[0]
    output_dir = f"{base_name}_extracted"
    
    # Create directory if it doesn't exist
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    # Save main results as text
    with open(f"{output_dir}/results.txt", 'w') as f:
        f.write(f"Steganography Extraction Results for {results['filename']}\n")
        f.write(f"File size: {results['filesize']} bytes\n\n")
        
        # Write metadata
        f.write("=== METADATA ===\n")
        for k, v in results['metadata'].items():
            f.write(f"{k}: {v}\n")
        
        # Write LSB extraction results
        f.write("\n=== LSB EXTRACTION ===\n")
        f.write(f"LSB sample: {results['lsb_extraction']['raw_bits']}\n")
        f.write(f"Bytes: {results['lsb_extraction']['raw_bytes']}\n")
        f.write(f"Possible text:\n{results['lsb_extraction']['possible_text']}\n")
        
        # Write 2-bit LSB extraction
        f.write("\n=== 2-BIT LSB EXTRACTION ===\n")
        f.write(f"LSB sample: {results['lsb_extraction_2bit']['raw_bits']}\n")
        f.write(f"Bytes: {results['lsb_extraction_2bit']['raw_bytes']}\n")
        f.write(f"Possible text:\n{results['lsb_extraction_2bit']['possible_text']}\n")
        
        # Write color plane analysis
        f.write("\n=== COLOR PLANE ANALYSIS ===\n")
        for k, v in results['color_planes'].items():
            f.write(f"{k}: {v}\n")
        
        # Write hidden files
        f.write("\n=== POSSIBLE HIDDEN FILES ===\n")
        for file_info in results['hidden_files']:
            f.write(f"Type: {file_info['type']}, Offset: {file_info['offset']}, Signature: {file_info['signature']}\n")
        
        # Write parity data
        f.write("\n=== PARITY STEGANOGRAPHY ===\n")
        f.write(f"Row parity: {results['parity_data']['row_parity_bits']}\n")
        f.write(f"Column parity: {results['parity_data']['col_parity_bits']}\n")
        
        # Write found text strings
        f.write("\n=== POSSIBLE HIDDEN TEXT ===\n")
        for s in results['text_data']['possible_strings']:
            f.write(f"{s}\n")
            f.write("---\n")
        
        # Write bit distribution analysis
        f.write("\n=== BIT DISTRIBUTION ANALYSIS ===\n")
        f.write("Bit position frequencies (0-7, LSB to MSB):\n")
        for i, freq in enumerate(results['bit_distribution']['bit_frequencies']):
            f.write(f"Bit {i}: {freq:.4f} (deviation: {results['bit_distribution']['deviations'][i]:.4f})\n")
    
    # If we found potential embedded files, try to extract them
    if results['hidden_files']:
        with open(original_image_path, 'rb') as f:
            data = f.read()
        
        for i, file_info in enumerate(results['hidden_files']):
            # Create a name for the extracted file
            ext = file_info['type'].lower().split()[0]  # Use the first word of the type as extension
            output_file = f"{output_dir}/extracted_file_{i}.{ext}"
            
            # Get start position from offset
            start_pos = file_info['offset']
            
            # Write the data to a file, up to 10MB maximum
            with open(output_file, 'wb') as out_f:
                out_f.write(data[start_pos:start_pos + 10*1024*1024])
    
    return output_dir

def main():
    if len(sys.argv) != 2:
        print(f"Usage: {sys.argv[0]} <image_file>")
        sys.exit(1)
    
    image_path = sys.argv[1]
    if not os.path.exists(image_path):
        print(f"Error: File '{image_path}' not found.")
        sys.exit(1)
    
    print(f"Analyzing {image_path} for steganographic data...")
    results = extract_stegano_data(image_path)
    
    # Save results to files
    output_dir = save_extracted_data(results, image_path)
    print(f"Analysis complete. Results saved to {output_dir}/")

if __name__ == "__main__":
    main()

Binary Log Parser and Anomaly Detector

#!/usr/bin/env python3
"""
Binary Log Parser and Anomaly Detector

This script parses a custom binary format for login attempt logs and identifies 
potentially compromised accounts based on anomalous behavior.

Format:
- username_length: 4-byte integer (big-endian)
- username: variable-length string
- ip: 4-byte IPv4 address
- timestamp: 4-byte Unix timestamp (big-endian)
- success: 1-byte boolean

Usage:
    python log_analyzer.py --input <log_file> [--output <output_file>] [--sql <sql_file>]
"""

import argparse
import struct
import socket
import sqlite3
import json
import os
import sys
from datetime import datetime
from collections import defaultdict

def parse_binary_log(file_path):
    """
    Parse the binary log file according to the specified format.
    
    Args:
        file_path: Path to the binary log file
        
    Returns:
        List of login attempt records
    """
    logs = []
    
    try:
        with open(file_path, 'rb') as f:
            data = f.read()
            
        offset = 0
        while offset < len(data):
            # Read username length (4-byte integer, big-endian)
            username_length = struct.unpack('>I', data[offset:offset+4])[0]
            offset += 4
            
            # Read username (variable length string)
            username = data[offset:offset+username_length].decode('utf-8')
            offset += username_length
            
            # Read IP address (4-byte IPv4 address)
            ip_bytes = data[offset:offset+4]
            ip_address = socket.inet_ntoa(ip_bytes)
            offset += 4
            
            # Read timestamp (4-byte Unix timestamp, big-endian)
            timestamp = struct.unpack('>I', data[offset:offset+4])[0]
            login_time = datetime.fromtimestamp(timestamp)
            offset += 4
            
            # Read success flag (1-byte boolean)
            success = data[offset] == 1
            offset += 1
            
            # Add the parsed entry to our array
            logs.append({
                'username': username,
                'ip_address': ip_address,
                'timestamp': timestamp,
                'login_time': login_time,
                'success': success
            })
            
        print(f"Successfully parsed {len(logs)} login attempts")
        return logs
        
    except Exception as e:
        print(f"Error parsing log file: {str(e)}")
        sys.exit(1)

def detect_anomalies(logs):
    """
    Analyze logs to identify potentially compromised accounts.
    
    Args:
        logs: List of parsed login attempt records
        
    Returns:
        List of users with anomaly scores and suspicious behavior details
    """
    # Group logs by username
    user_logs = defaultdict(list)
    for log in logs:
        user_logs[log['username']].append(log)
    
    anomalies = []
    
    # Business hours (assuming 9 AM to 5 PM)
    business_start_hour = 9
    business_end_hour = 17
    
    # Time threshold for rapid location changes (in seconds)
    location_change_threshold = 3600  # 1 hour
    
    # Analyze each user's login patterns
    for username, user_log in user_logs.items():
        # Sort logs by timestamp
        user_log.sort(key=lambda x: x['timestamp'])
        
        # Calculate anomaly indicators
        unique_ips = set(log['ip_address'] for log in user_log)
        failed_attempts = sum(1 for log in user_log if not log['success'])
        successful_attempts = sum(1 for log in user_log if log['success'])
        
        # Check for rapid location changes
        rapid_location_changes = 0
        for i in range(1, len(user_log)):
            current_log = user_log[i]
            previous_log = user_log[i-1]
            
            if current_log['ip_address'] != previous_log['ip_address']:
                time_diff = current_log['timestamp'] - previous_log['timestamp']
                if time_diff < location_change_threshold:
                    rapid_location_changes += 1
        
        # Calculate after-hours logins
        after_hours_logins = sum(
            1 for log in user_log 
            if log['login_time'].hour < business_start_hour or log['login_time'].hour >= business_end_hour
        )
        
        # Calculate anomaly score based on these factors
        # Weights can be adjusted based on the relative importance of each factor
        anomaly_score = (
            (len(unique_ips) * 10) + 
            (failed_attempts * 5) + 
            (rapid_location_changes * 20) + 
            (after_hours_logins * 3)
        )
        
        anomalies.append({
            'username': username,
            'anomaly_score': anomaly_score,
            'unique_ips': len(unique_ips),
            'ip_addresses': list(unique_ips),
            'failed_attempts': failed_attempts,
            'successful_attempts': successful_attempts,
            'rapid_location_changes': rapid_location_changes,
            'after_hours_logins': after_hours_logins,
            'total_attempts': len(user_log)
        })
    
    # Sort by anomaly score (descending)
    anomalies.sort(key=lambda x: x['anomaly_score'], reverse=True)
    
    return anomalies

def create_database(logs, db_path=':memory:'):
    """
    Create a SQLite database with the login data
    
    Args:
        logs: List of parsed login attempt records
        db_path: Path to save the SQLite database (default: in-memory)
        
    Returns:
        SQLite connection
    """
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    
    # Create table
    cursor.execute('''
    CREATE TABLE login_attempts (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        username TEXT NOT NULL,
        ip_address TEXT NOT NULL,
        timestamp INTEGER NOT NULL,
        login_time TEXT NOT NULL,
        success INTEGER NOT NULL
    )
    ''')
    
    # Create indexes
    cursor.execute('CREATE INDEX idx_username ON login_attempts(username)')
    cursor.execute('CREATE INDEX idx_ip_address ON login_attempts(ip_address)')
    cursor.execute('CREATE INDEX idx_timestamp ON login_attempts(timestamp)')
    cursor.execute('CREATE INDEX idx_success ON login_attempts(success)')
    
    # Insert data
    for log in logs:
        cursor.execute(
            'INSERT INTO login_attempts (username, ip_address, timestamp, login_time, success) VALUES (?, ?, ?, ?, ?)',
            (
                log['username'],
                log['ip_address'],
                log['timestamp'],
                log['login_time'].isoformat(),
                1 if log['success'] else 0
            )
        )
    
    conn.commit()
    return conn

def generate_sql_script():
    """
    Generate a SQL script for creating the table and analyzing login data
    
    Returns:
        SQL script as a string
    """
    return '''-- Create a table to store login attempts
CREATE TABLE login_attempts (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    username TEXT NOT NULL,
    ip_address TEXT NOT NULL,
    timestamp INTEGER NOT NULL,
    login_time TEXT NOT NULL,
    success INTEGER NOT NULL
);

-- Create indexes for efficient searching
CREATE INDEX idx_username ON login_attempts(username);
CREATE INDEX idx_ip_address ON login_attempts(ip_address);
CREATE INDEX idx_timestamp ON login_attempts(timestamp);
CREATE INDEX idx_success ON login_attempts(success);

-- Query to find users with multiple IP addresses
SELECT 
    username,
    COUNT(DISTINCT ip_address) AS unique_ip_count
FROM 
    login_attempts
GROUP BY 
    username
HAVING 
    unique_ip_count > 1
ORDER BY 
    unique_ip_count DESC;

-- Query to find failed login attempts followed by successful ones
SELECT 
    a.username,
    COUNT(*) AS suspicious_patterns
FROM 
    login_attempts a
JOIN 
    login_attempts b 
ON 
    a.username = b.username
    AND a.timestamp < b.timestamp
    AND a.success = 0
    AND b.success = 1
    AND (b.timestamp - a.timestamp) < 300 -- Within 5 minutes
GROUP BY 
    a.username
ORDER BY 
    suspicious_patterns DESC;

-- Query to find rapid login attempts from different locations
SELECT 
    a.username,
    a.ip_address AS ip1,
    b.ip_address AS ip2,
    datetime(a.login_time) AS time1,
    datetime(b.login_time) AS time2,
    (julianday(b.login_time) - julianday(a.login_time)) * 24 * 60 AS minutes_between
FROM 
    login_attempts a
JOIN 
    login_attempts b 
ON 
    a.username = b.username
    AND a.ip_address != b.ip_address
    AND a.id < b.id
    AND (julianday(b.login_time) - julianday(a.login_time)) * 24 * 60 < 60 -- Less than 60 minutes apart
ORDER BY 
    minutes_between ASC;

-- Query to find users with after-hours login activity
SELECT 
    username,
    COUNT(*) AS after_hours_logins
FROM 
    login_attempts
WHERE 
    (strftime('%H', login_time) < '09' OR strftime('%H', login_time) >= '17')
GROUP BY 
    username
ORDER BY 
    after_hours_logins DESC;

-- Comprehensive anomaly detection query
WITH 
    unique_ips AS (
        SELECT 
            username, 
            COUNT(DISTINCT ip_address) AS ip_count
        FROM 
            login_attempts
        GROUP BY 
            username
    ),
    failed_logins AS (
        SELECT 
            username, 
            SUM(CASE WHEN success = 0 THEN 1 ELSE 0 END) AS failed_count
        FROM 
            login_attempts
        GROUP BY 
            username
    ),
    after_hours AS (
        SELECT 
            username, 
            COUNT(*) AS after_hours_count
        FROM 
            login_attempts
        WHERE 
            (strftime('%H', login_time) < '09' OR strftime('%H', login_time) >= '17')
        GROUP BY 
            username
    ),
    rapid_location_changes AS (
        SELECT 
            a.username,
            COUNT(*) AS rapid_changes
        FROM 
            login_attempts a
        JOIN 
            login_attempts b 
        ON 
            a.username = b.username
            AND a.ip_address != b.ip_address
            AND a.id < b.id
            AND (b.timestamp - a.timestamp) < 3600 -- Less than 1 hour apart
        GROUP BY 
            a.username
    )
SELECT 
    u.username,
    COALESCE(u.ip_count, 0) AS unique_ip_count,
    COALESCE(f.failed_count, 0) AS failed_logins,
    COALESCE(a.after_hours_count, 0) AS after_hours_logins,
    COALESCE(r.rapid_changes, 0) AS rapid_location_changes,
    (COALESCE(u.ip_count, 0) * 10) + 
    (COALESCE(f.failed_count, 0) * 5) + 
    (COALESCE(r.rapid_changes, 0) * 20) + 
    (COALESCE(a.after_hours_count, 0) * 3) AS anomaly_score
FROM 
    unique_ips u
LEFT JOIN 
    failed_logins f ON u.username = f.username
LEFT JOIN 
    after_hours a ON u.username = a.username
LEFT JOIN 
    rapid_location_changes r ON u.username = r.username
ORDER BY 
    anomaly_score DESC
LIMIT 10;
'''

def analyze_compromised_user(conn, username):
    """
    Perform detailed analysis on a potentially compromised user
    
    Args:
        conn: SQLite connection
        username: Username to analyze
        
    Returns:
        Dictionary with detailed analysis
    """
    cursor = conn.cursor()
    
    # Get all login attempts for this user
    cursor.execute(
        '''
        SELECT 
            timestamp,
            login_time,
            ip_address,
            success
        FROM 
            login_attempts
        WHERE 
            username = ?
        ORDER BY 
            timestamp ASC
        ''',
        (username,)
    )
    
    logins = cursor.fetchall()
    
    # Analyze suspicious patterns
    suspicious_events = []
    previous_ip = None
    previous_time = None
    
    for timestamp, login_time, ip_address, success in logins:
        if previous_ip and previous_ip != ip_address:
            time_diff = timestamp - previous_time
            if time_diff < 3600:  # Less than 1 hour
                suspicious_events.append({
                    'event_type': 'rapid_location_change',
                    'previous_ip': previous_ip,
                    'new_ip': ip_address,
                    'minutes_between': time_diff / 60
                })
        
        previous_ip = ip_address
        previous_time = timestamp
    
    # Get login success rate
    cursor.execute(
        '''
        SELECT 
            COUNT(*) AS total,
            SUM(CASE WHEN success = 1 THEN 1 ELSE 0 END) AS successful
        FROM 
            login_attempts
        WHERE 
            username = ?
        ''',
        (username,)
    )
    
    total, successful = cursor.fetchone()
    success_rate = (successful / total) * 100 if total > 0 else 0
    
    return {
        'username': username,
        'login_count': total,
        'success_rate': success_rate,
        'suspicious_events': suspicious_events,
        'login_history': [
            {
                'timestamp': timestamp,
                'login_time': login_time,
                'ip_address': ip_address,
                'success': bool(success)
            }
            for timestamp, login_time, ip_address, success in logins
        ]
    }

def main():
    """
    Main function to process arguments and run the analysis
    """
    parser = argparse.ArgumentParser(description='Analyze binary login logs for compromised accounts')
    parser.add_argument('--input', '-i', required=True, help='Path to binary log file')
    parser.add_argument('--output', '-o', help='Path to save analysis results (JSON)')
    parser.add_argument('--sql', '-s', help='Path to save SQL script')
    parser.add_argument('--db', '-d', help='Path to save SQLite database')
    parser.add_argument('--verbose', '-v', action='store_true', help='Enable verbose output')
    
    args = parser.parse_args()
    
    # Parse the binary log file
    print(f"Parsing binary log file: {args.input}")
    logs = parse_binary_log(args.input)
    
    # Analyze for anomalies
    print("Analyzing for suspicious behavior...")
    anomalies = detect_anomalies(logs)
    
    # Print top suspicious users
    print("\nTop potentially compromised accounts:")
    for i, anomaly in enumerate(anomalies[:5]):
        print(f"{i+1}. Username: {anomaly['username']}")
        print(f"   Anomaly Score: {anomaly['anomaly_score']}")
        print(f"   Unique IPs: {anomaly['unique_ips']}")
        print(f"   Failed/Successful Logins: {anomaly['failed_attempts']}/{anomaly['successful_attempts']}")
        print(f"   Rapid Location Changes: {anomaly['rapid_location_changes']}")
        print(f"   After-Hours Logins: {anomaly['after_hours_logins']}")
        print()
    
    # Identify the most likely compromised user
    if anomalies:
        compromised_user = anomalies[0]['username']
        print(f"RESULT: The most likely compromised account is: {compromised_user}")
        
        # Create database for SQL analysis
        db_path = args.db if args.db else ':memory:'
        conn = create_database(logs, db_path)
        
        # Get detailed analysis for the compromised user
        detailed_analysis = analyze_compromised_user(conn, compromised_user)
        
        if args.verbose:
            print("\nDetailed analysis for the compromised account:")
            print(f"Login history for {compromised_user}:")
            for entry in detailed_analysis['login_history']:
                status = "SUCCESS" if entry['success'] else "FAILED"
                print(f"{entry['login_time']} | {entry['ip_address']} | {status}")
            
            if detailed_analysis['suspicious_events']:
                print("\nSuspicious events:")
                for event in detailed_analysis['suspicious_events']:
                    print(f"IP changed from {event['previous_ip']} to {event['new_ip']} "
                          f"in {event['minutes_between']:.1f} minutes")
    else:
        print("No anomalies detected in the log data")
    
    # Save results to output file
    if args.output:
        with open(args.output, 'w') as f:
            json.dump({
                'summary': {
                    'total_logs': len(logs),
                    'total_users': len({log['username'] for log in logs}),
                    'compromised_user': compromised_user if anomalies else None
                },
                'anomalies': anomalies,
                'detailed_analysis': detailed_analysis if anomalies else None
            }, f, indent=4, default=str)
        print(f"Analysis results saved to {args.output}")
    
    # Save SQL script
    if args.sql:
        with open(args.sql, 'w') as f:
            f.write(generate_sql_script())
        print(f"SQL script saved to {args.sql}")
    
    # Report if database was saved
    if args.db:
        print(f"SQLite database saved to {args.db}")

if __name__ == "__main__":
    main()