Scripts

Binary Parser

import struct
import socket
import datetime
from collections import defaultdict, Counter

def parse_binary_logs(file_path):
    """Parse binary log file according to the specified format."""
    login_attempts = []
    
    with open(file_path, 'rb') as f:
        data = f.read()
        
    offset = 0
    while offset < len(data):
        # Read username length (4-byte integer, big-endian)
        username_length = struct.unpack('>I', data[offset:offset+4])[0]
        offset += 4
        
        # Read username (variable length string)
        username = data[offset:offset+username_length].decode('utf-8')
        offset += username_length
        
        # Read IPv4 address (4 bytes)
        ip_bytes = data[offset:offset+4]
        ip_address = socket.inet_ntoa(ip_bytes)
        offset += 4
        
        # Read timestamp (4-byte Unix timestamp)
        timestamp = struct.unpack('>I', data[offset:offset+4])[0]
        datetime_obj = datetime.datetime.fromtimestamp(timestamp, tz=datetime.timezone.utc)
        offset += 4
        
        # Read success flag (1-byte boolean)
        success = bool(data[offset])
        offset += 1
        
        # Store the parsed login attempt
        login_attempts.append({
            'username': username,
            'ip_address': ip_address,
            'timestamp': timestamp,
            'datetime': datetime_obj,
            'success': success
        })
    
    return login_attempts

def analyze_logs(login_attempts):
    """Basic analysis of the log data to extract key metrics."""
    # Get the earliest timestamp (start date of the log)
    earliest_timestamp = min(login_attempts, key=lambda x: x['timestamp'])['timestamp']
    start_date_utc = datetime.datetime.fromtimestamp(earliest_timestamp, tz=datetime.timezone.utc)
    
    # Count unique usernames
    unique_usernames = set(attempt['username'] for attempt in login_attempts)
    
    # Count unique IP addresses
    unique_ips = set(attempt['ip_address'] for attempt in login_attempts)
    
    # Count total login attempts
    total_attempts = len(login_attempts)
    
    return {
        'start_date_utc': start_date_utc,
        'total_attempts': total_attempts,
        'unique_usernames': len(unique_usernames),
        'unique_ips': len(unique_ips),
        'usernames': unique_usernames,
        'ip_addresses': unique_ips
    }

def analyze_login_patterns(login_attempts):
    """Analyze login patterns to identify potentially compromised users."""
    # Track login data per user
    user_data = defaultdict(lambda: {
        'ips': set(),
        'successful_logins': 0,
        'failed_logins': 0,
        'login_times': [],
        'success_after_failure': 0,
        'unique_ips_count': 0
    })
    
    # Group login attempts by user
    user_attempts = defaultdict(list)
    for attempt in login_attempts:
        user = attempt['username']
        user_attempts[user].append(attempt)
        
        # Track IPs used
        user_data[user]['ips'].add(attempt['ip_address'])
        
        # Track login success/failure
        if attempt['success']:
            user_data[user]['successful_logins'] += 1
        else:
            user_data[user]['failed_logins'] += 1
            
        # Track login times
        user_data[user]['login_times'].append(attempt['timestamp'])
    
    # Calculate additional metrics
    for user, attempts in user_attempts.items():
        # Sort attempts by timestamp
        sorted_attempts = sorted(attempts, key=lambda x: x['timestamp'])
        
        # Check for successful login after failures
        for i in range(1, len(sorted_attempts)):
            if (not sorted_attempts[i-1]['success'] and 
                sorted_attempts[i]['success'] and
                sorted_attempts[i]['ip_address'] != sorted_attempts[i-1]['ip_address']):
                user_data[user]['success_after_failure'] += 1
                
        # Count unique IPs
        user_data[user]['unique_ips_count'] = len(user_data[user]['ips'])
    
    # Identify suspicious users based on multiple criteria
    suspicious_users = []
    for user, data in user_data.items():
        suspicion_score = 0
        
        # Multiple IPs used (especially if significantly more than other users)
        if data['unique_ips_count'] > 3:
            suspicion_score += data['unique_ips_count']
        
        # High number of failed logins followed by successful ones from different IPs
        if data['success_after_failure'] > 0:
            suspicion_score += data['success_after_failure'] * 10
            
        # High ratio of failed to successful logins
        if data['successful_logins'] > 0:
            failure_ratio = data['failed_logins'] / data['successful_logins']
            if failure_ratio > 3:
                suspicion_score += failure_ratio
                
        if suspicion_score > 10:
            suspicious_users.append((user, suspicion_score, data))
    
    # Sort by suspicion score
    suspicious_users.sort(key=lambda x: x[1], reverse=True)
    
    return suspicious_users, user_data

def main():
    file_path = input("Enter the path to the binary log file: ")
    
    try:
        login_attempts = parse_binary_logs(file_path)
        print(f"Successfully parsed {len(login_attempts)} login attempts.")
        
        # Basic log analysis for the requested metrics
        log_metrics = analyze_logs(login_attempts)
        
        print("\n===== LOG METRICS =====")
        print(f"Start date of the log (UTC): {log_metrics['start_date_utc'].strftime('%Y-%m-%d %H:%M:%S UTC')}")
        print(f"Total login attempts recorded: {log_metrics['total_attempts']}")
        print(f"Number of unique usernames: {log_metrics['unique_usernames']}")
        print(f"Number of unique IP addresses: {log_metrics['unique_ips']}")
        
        # Advanced pattern analysis for suspicious activity
        suspicious_users, user_data = analyze_login_patterns(login_attempts)
        
        print("\n===== SUSPICIOUS ACTIVITY ANALYSIS =====")
        print(f"Total users analyzed: {len(user_data)}")
        
        if suspicious_users:
            print("\nPotentially compromised users (sorted by suspicion score):")
            for user, score, data in suspicious_users:
                print(f"\nUsername: {user} (Suspicion Score: {score:.2f})")
                print(f"  Unique IPs: {data['unique_ips_count']}")
                print(f"  Successful logins: {data['successful_logins']}")
                print(f"  Failed logins: {data['failed_logins']}")
                print(f"  Successful logins after failures from different IPs: {data['success_after_failure']}")
                print(f"  IP addresses used: {', '.join(data['ips'])}")
        else:
            print("\nNo suspicious users identified.")
        
    except Exception as e:
        print(f"Error processing the file: {e}")

if __name__ == "__main__":
    main()

Compromised User Detector

import struct
import socket
import datetime
from collections import defaultdict, Counter

def parse_binary_logs(file_path):
    """Parse binary log file according to the specified format."""
    login_attempts = []
    
    with open(file_path, 'rb') as f:
        data = f.read()
        
    offset = 0
    while offset < len(data):
        # Read username length (4-byte integer, big-endian)
        username_length = struct.unpack('>I', data[offset:offset+4])[0]
        offset += 4
        
        # Read username (variable length string)
        username = data[offset:offset+username_length].decode('utf-8')
        offset += username_length
        
        # Read IPv4 address (4 bytes)
        ip_bytes = data[offset:offset+4]
        ip_address = socket.inet_ntoa(ip_bytes)
        offset += 4
        
        # Read timestamp (4-byte Unix timestamp)
        timestamp = struct.unpack('>I', data[offset:offset+4])[0]
        datetime_obj = datetime.datetime.fromtimestamp(timestamp, tz=datetime.timezone.utc)
        offset += 4
        
        # Read success flag (1-byte boolean)
        success = bool(data[offset])
        offset += 1
        
        # Store the parsed login attempt
        login_attempts.append({
            'username': username,
            'ip_address': ip_address,
            'timestamp': timestamp,
            'datetime': datetime_obj,
            'success': success
        })
    
    return login_attempts

def analyze_logs(login_attempts):
    """Basic analysis of the log data to extract key metrics."""
    # Get the earliest timestamp (start date of the log)
    earliest_timestamp = min(login_attempts, key=lambda x: x['timestamp'])['timestamp']
    start_date_utc = datetime.datetime.fromtimestamp(earliest_timestamp, tz=datetime.timezone.utc)
    
    # Count unique usernames
    unique_usernames = set(attempt['username'] for attempt in login_attempts)
    
    # Count unique IP addresses
    unique_ips = set(attempt['ip_address'] for attempt in login_attempts)
    
    # Count total login attempts
    total_attempts = len(login_attempts)
    
    return {
        'start_date_utc': start_date_utc,
        'total_attempts': total_attempts,
        'unique_usernames': len(unique_usernames),
        'unique_ips': len(unique_ips),
        'usernames': unique_usernames,
        'ip_addresses': unique_ips
    }

def analyze_login_patterns(login_attempts):
    """Analyze login patterns to identify potentially compromised users."""
    # Track login data per user
    user_data = defaultdict(lambda: {
        'ips': set(),
        'successful_logins': 0,
        'failed_logins': 0,
        'login_times': [],
        'success_after_failure': 0,
        'unique_ips_count': 0,
        'unusual_hour_logins': 0,
        'successful_after_multiple_failures': 0,
        'geographic_anomalies': 0
    })
    
    # Group login attempts by user
    user_attempts = defaultdict(list)
    for attempt in login_attempts:
        user = attempt['username']
        user_attempts[user].append(attempt)
        
        # Track IPs used
        user_data[user]['ips'].add(attempt['ip_address'])
        
        # Track login success/failure
        if attempt['success']:
            user_data[user]['successful_logins'] += 1
        else:
            user_data[user]['failed_logins'] += 1
            
        # Track login times
        user_data[user]['login_times'].append(attempt['timestamp'])
        
        # Track unusual hour logins (between 1am and 5am)
        hour = attempt['datetime'].hour
        if 1 <= hour <= 5:
            user_data[user]['unusual_hour_logins'] += 1
    
    # Calculate additional metrics
    for user, attempts in user_attempts.items():
        # Sort attempts by timestamp
        sorted_attempts = sorted(attempts, key=lambda x: x['timestamp'])
        
        # Check for successful login after failures
        consecutive_failures = 0
        for i in range(1, len(sorted_attempts)):
            current = sorted_attempts[i]
            previous = sorted_attempts[i-1]
            
            # Check for IP hopping (successful login from different IP)
            if (not previous['success'] and 
                current['success'] and
                current['ip_address'] != previous['ip_address']):
                user_data[user]['success_after_failure'] += 1
                
            # Count consecutive failures
            if not previous['success']:
                consecutive_failures += 1
            else:
                consecutive_failures = 0
                
            # Successful login after multiple consecutive failures
            if current['success'] and consecutive_failures >= 3:
                user_data[user]['successful_after_multiple_failures'] += 1
                consecutive_failures = 0
                
        # Count unique IPs
        user_data[user]['unique_ips_count'] = len(user_data[user]['ips'])
        
        # Calculate rough geographic anomalies based on IP
        # This is simplified - in reality, you'd use IP geolocation
        if user_data[user]['unique_ips_count'] >= 3:
            ips_first_octet = [int(ip.split('.')[0]) for ip in user_data[user]['ips']]
            unique_first_octets = len(set(ips_first_octet))
            if unique_first_octets >= 2:  # Different network classes suggest geographic spread
                user_data[user]['geographic_anomalies'] += unique_first_octets
    
    # Calculate comprehensive suspicion scores
    suspicious_users = []
    for user, data in user_data.items():
        # Initialize base suspicion score
        suspicion_score = 0
        
        # Factor 1: Multiple IPs used
        ip_anomaly_score = (data['unique_ips_count'] - 1) * 5  # Expect one normal IP
        suspicion_score += max(0, ip_anomaly_score)
        
        # Factor 2: Successful logins after failures from different IPs
        suspicion_score += data['success_after_failure'] * 15
        
        # Factor 3: Failed login ratio
        if data['successful_logins'] > 0:
            failure_ratio = data['failed_logins'] / data['successful_logins']
            suspicion_score += min(failure_ratio * 3, 20)  # Cap at 20 points
        elif data['failed_logins'] > 5:  # Only failures, no successes
            suspicion_score += 20
            
        # Factor 4: Unusual hour logins
        suspicion_score += data['unusual_hour_logins'] * 2
        
        # Factor 5: Successful after multiple failures
        suspicion_score += data['successful_after_multiple_failures'] * 25
        
        # Factor 6: Geographic anomalies
        suspicion_score += data['geographic_anomalies'] * 8
        
        # Add context for this score
        reasons = []
        if data['unique_ips_count'] > 1:
            reasons.append(f"Used {data['unique_ips_count']} different IPs")
        if data['success_after_failure'] > 0:
            reasons.append(f"Successful login after failure from different IP: {data['success_after_failure']} times")
        if data['failed_logins'] > 3:
            reasons.append(f"High number of failed logins: {data['failed_logins']}")
        if data['unusual_hour_logins'] > 0:
            reasons.append(f"Logins during unusual hours: {data['unusual_hour_logins']}")
        if data['successful_after_multiple_failures'] > 0:
            reasons.append(f"Successful login after multiple failures: {data['successful_after_multiple_failures']}")
        if data['geographic_anomalies'] > 0:
            reasons.append(f"Potential geographic anomalies detected")
            
        suspicious_users.append((user, suspicion_score, data, reasons))
    
    # Sort by suspicion score
    suspicious_users.sort(key=lambda x: x[1], reverse=True)
    
    # Identify the most likely compromised user
    most_likely_compromised = suspicious_users[0][0] if suspicious_users else None
    
    return suspicious_users, user_data, most_likely_compromised

def main():
    file_path = input("Enter the path to the binary log file: ")
    
    try:
        login_attempts = parse_binary_logs(file_path)
        print(f"Successfully parsed {len(login_attempts)} login attempts.")
        
        # Basic log analysis for the requested metrics
        log_metrics = analyze_logs(login_attempts)
        
        print("\n===== LOG METRICS =====")
        print(f"Start date of the log (UTC): {log_metrics['start_date_utc'].strftime('%Y-%m-%d %H:%M:%S UTC')}")
        print(f"Total login attempts recorded: {log_metrics['total_attempts']}")
        print(f"Number of unique usernames: {log_metrics['unique_usernames']}")
        print(f"Number of unique IP addresses: {log_metrics['unique_ips']}")
        
        # Advanced pattern analysis for suspicious activity
        suspicious_users, user_data, most_likely_compromised = analyze_login_patterns(login_attempts)
        
        print("\n===== COMPROMISED USER IDENTIFICATION =====")
        if most_likely_compromised:
            print(f"\n🚨 MOST LIKELY COMPROMISED USER: {most_likely_compromised} 🚨")
            
            # Find this user in the suspicious_users list
            for user, score, data, reasons in suspicious_users:
                if user == most_likely_compromised:
                    print(f"Suspicion Score: {score:.2f}")
                    print("\nReasons for suspicion:")
                    for i, reason in enumerate(reasons, 1):
                        print(f"  {i}. {reason}")
                    print("\nDetailed metrics:")
                    print(f"  Unique IPs: {data['unique_ips_count']}")
                    print(f"  Successful logins: {data['successful_logins']}")
                    print(f"  Failed logins: {data['failed_logins']}")
                    print(f"  Logins during unusual hours: {data['unusual_hour_logins']}")
                    print(f"  IP addresses used: {', '.join(data['ips'])}")
                    break
        else:
            print("No compromised user identified.")
        
        print("\n===== OTHER SUSPICIOUS USERS =====")
        if len(suspicious_users) > 1:
            for user, score, data, reasons in suspicious_users[1:6]:  # Show top 5 after the most suspicious
                if score > 10:  # Only show users with meaningful suspicion scores
                    print(f"\nUsername: {user} (Suspicion Score: {score:.2f})")
                    print("Reasons for suspicion:")
                    for i, reason in enumerate(reasons, 1):
                        print(f"  {i}. {reason}")
        else:
            print("No other suspicious users identified.")
        
    except Exception as e:
        print(f"Error processing the file: {e}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    main()

GPG Verify

#!/bin/bash

echo "Verifying signature files in the current directory..."

for sig_file in *.sig; do
  if [[ -f "$sig_file" ]]; then
    original_file="${sig_file%.sig}"
    echo ""
    echo "Verifying signature for \"$original_file\" using \"$sig_file\"..."
    gpg --verify "$sig_file" "$original_file"
    if [ $? -ne 0 ]; then
      echo "[ERROR] Signature verification failed for \"$original_file\". The file may have been tampered with or the signature is invalid."
    else
      echo "[OK] Signature verification successful for \"$original_file\"."
    fi
  fi
done

echo ""
echo "Verification process complete."

Hash Identifier

#!/usr/bin/env python3

import re
import sys
import hashlib
from collections import defaultdict

def identify_hash(hash_string):
    """Identify the type of hash based on pattern, length, and character set."""
    
    # Clean the hash string
    hash_string = hash_string.strip()
    
    # Check for empty string
    if not hash_string:
        return "Empty string"
    
    # Check for common hash formats with special syntax
    if hash_string.startswith('$1$'):
        return "MD5 (Unix)"
    if hash_string.startswith('$2a$') or hash_string.startswith('$2b$') or hash_string.startswith('$2y$'):
        return "Bcrypt"
    if hash_string.startswith('$5$'):
        return "SHA-256 (Unix)"
    if hash_string.startswith('$6$'):
        return "SHA-512 (Unix)"
    if hash_string.startswith('$pbkdf2-sha256$'):
        return "PBKDF2-SHA256"
    if hash_string.startswith('$sha1$'):
        return "SHA-1 (Unix)"
    if hash_string.startswith('$pdf$'):
        return "PDF (Hashcat format)"
    if hash_string.startswith('$P$') or hash_string.startswith('$H$'):
        return "PHPass (WordPress/phpBB)"
    if hash_string.startswith('$apr1$'):
        return "APR1-MD5"
    if re.match(r'^[a-fA-F0-9]{32}:[a-fA-F0-9]{32}$', hash_string):
        return "MD5(Half:Salt)"
    
    # Check for common hash lengths
    hash_length = len(hash_string)
    possible_types = []
    
    # Check if the hash is hexadecimal
    if re.match(r'^[a-fA-F0-9]+$', hash_string):
        if hash_length == 32:
            possible_types.append("MD5")
            possible_types.append("MD4")
            possible_types.append("NTLM")
            possible_types.append("RIPEMD-128")
        elif hash_length == 40:
            possible_types.append("SHA-1")
            possible_types.append("RIPEMD-160")
        elif hash_length == 64:
            possible_types.append("SHA-256")
            possible_types.append("RIPEMD-256")
        elif hash_length == 96:
            possible_types.append("SHA-384")
        elif hash_length == 128:
            possible_types.append("SHA-512")
            possible_types.append("Whirlpool")
        elif hash_length == 16:
            possible_types.append("MySQL323")
            possible_types.append("DES(Oracle)")
        elif hash_length == 41 and hash_string.startswith('*'):
            possible_types.append("MySQL5")
        elif hash_length == 56:
            possible_types.append("SHA-224")
        elif hash_length == 8:
            possible_types.append("CRC32")
            possible_types.append("ADLER32")
    
    # Check for Base64 character set (with potential padding)
    if re.match(r'^[A-Za-z0-9+/]+={0,2}$', hash_string):
        if hash_length == 24:
            possible_types.append("MD5 (Base64)")
        elif hash_length == 28:
            possible_types.append("SHA-1 (Base64)")
        elif hash_length == 44:
            possible_types.append("SHA-256 (Base64)")
        elif hash_length == 88:
            possible_types.append("SHA-512 (Base64)")
        else:
            possible_types.append("Base64 encoded")
    
    # No specific hash type identified, give general suggestion
    if not possible_types:
        if re.match(r'^[a-fA-F0-9]+$', hash_string):
            return f"Unknown hash (Hexadecimal, {hash_length} chars)"
        else:
            return f"Unknown format (possibly not a standard hash, or custom format)"
    
    return " or ".join(possible_types)

def main():
    if len(sys.argv) != 2:
        print("Usage: python hash_identifier.py <hash_file>")
        sys.exit(1)
    
    hash_file = sys.argv[1]
    
    try:
        with open(hash_file, 'r') as f:
            lines = f.readlines()
        
        print(f"Analyzing {len(lines)} hashes from {hash_file}...\n")
        
        hash_types = defaultdict(int)
        
        for i, line in enumerate(lines, 1):
            hash_string = line.strip()
            if not hash_string or hash_string.startswith('#'):
                continue
                
            hash_type = identify_hash(hash_string)
            hash_types[hash_type] += 1
            
            # Print the first few and last few hash identifications
            if i <= 3 or i > len(lines) - 3:
                print(f"Line {i}: {hash_string[:40]}{'...' if len(hash_string) > 40 else ''} -> {hash_type}")
            elif i == 4 and len(lines) > 6:
                print(f"... ({len(lines) - 6} more hashes) ...")
        
        print("\nSummary of hash types:")
        for hash_type, count in sorted(hash_types.items(), key=lambda x: x[1], reverse=True):
            print(f"  {hash_type}: {count}")
        
    except FileNotFoundError:
        print(f"Error: File '{hash_file}' not found.")
        sys.exit(1)
    except Exception as e:
        print(f"Error: {e}")
        sys.exit(1)

if __name__ == "__main__":
    main()

HMAC

#!/usr/bin/env python3
"""
Simple HMAC Verification Script

This script verifies HMAC signatures for message files by:
1. Finding all message_#.txt and message_#.hmac file pairs
2. Checking each line to verify the HMAC integrity
3. Reporting only basic verification results without additional analysis

Usage:
    python simple_hmac_verify.py --directory <logs_directory> --key <hmac_key>
"""

import hmac
import hashlib
import os
import sys
import re
import glob
import argparse
from datetime import datetime

# The valid signing key
VALID_KEY = 'ciCloud-API-20240315-4f7b9c'

def calculate_hmac(message, key):
    """Calculate HMAC signature for a message."""
    key_bytes = key.encode('utf-8')
    message_bytes = message.encode('utf-8')
    signature = hmac.new(key_bytes, message_bytes, hashlib.sha256)
    return signature.hexdigest()

def verify_hmac(message, signature, key):
    """Verify if a message's HMAC signature is valid."""
    calculated_signature = calculate_hmac(message, key)
    return hmac.compare_digest(calculated_signature, signature)

def read_file(file_path):
    """Read a file and return its lines."""
    with open(file_path, 'r') as f:
        return [line.rstrip() for line in f.readlines()]

def find_file_pairs(directory):
    """Find matching message/HMAC file pairs in the directory."""
    file_pairs = []
    
    # Find all message_*.txt files
    message_files = glob.glob(os.path.join(directory, "message_*.txt"))
    
    for message_file in message_files:
        # Extract the number part
        match = re.search(r'message_(\d+)\.txt$', message_file)
        if match:
            number = match.group(1)
            hmac_file = os.path.join(directory, f"message_{number}.hmac")
            
            # Check if the corresponding HMAC file exists
            if os.path.exists(hmac_file):
                file_pairs.append((message_file, hmac_file))
    
    return file_pairs

def process_file_pair(message_file, hmac_file, key):
    """Process a single message/HMAC file pair."""
    # Extract file number for identification
    match = re.search(r'message_(\d+)\.txt$', message_file)
    file_id = match.group(1) if match else os.path.basename(message_file)
    
    try:
        # Read files
        message_lines = read_file(message_file)
        hmac_lines = read_file(hmac_file)
        
        total_lines = min(len(message_lines), len(hmac_lines))
        valid_lines = 0
        invalid_lines = 0
        mismatched_entries = []
        
        print(f"Processing file {file_id}: {os.path.basename(message_file)}")
        print(f"  - Total lines: {total_lines}")
        
        # Process each line
        for i in range(total_lines):
            message = message_lines[i]
            signature = hmac_lines[i]
            
            # Skip empty lines
            if not message or not signature:
                continue
                
            # Debug: Print first few characters of message and signature
            if i < 3:  # Just print a few examples for debugging
                print(f"  - Line {i+1} check:")
                print(f"    Message: {message[:30]}{'...' if len(message) > 30 else ''}")
                print(f"    Signature: {signature[:30]}{'...' if len(signature) > 30 else ''}")
                print(f"    Calculated: {calculate_hmac(message, key)[:30]}...")
            
            # Verify HMAC
            is_valid = verify_hmac(message, signature, key)
            
            if is_valid:
                valid_lines += 1
            else:
                invalid_lines += 1
                mismatched_entries.append({
                    'line': i + 1,
                    'message': message,
                    'provided_signature': signature,
                    'calculated_signature': calculate_hmac(message, key)
                })
        
        result = {
            'file_id': file_id,
            'message_file': message_file,
            'hmac_file': hmac_file,
            'total_lines': total_lines,
            'valid_lines': valid_lines,
            'invalid_lines': invalid_lines,
            'mismatched_entries': mismatched_entries[:10]  # Only include first 10 for brevity
        }
        
        print(f"  - Valid lines: {valid_lines}")
        print(f"  - Invalid lines: {invalid_lines}")
        print(f"  - Integrity: {'INTACT' if invalid_lines == 0 else 'COMPROMISED'}")
        print()
        
        return result
        
    except Exception as e:
        print(f"Error processing file pair ({message_file}, {hmac_file}): {e}")
        return {
            'file_id': file_id,
            'message_file': message_file,
            'hmac_file': hmac_file,
            'error': str(e)
        }

def main():
    """Main entry point for the script."""
    parser = argparse.ArgumentParser(description='Simple HMAC Verification')
    parser.add_argument('--directory', '-d', required=True, help='Directory containing log files')
    parser.add_argument('--key', '-k', default=VALID_KEY, help=f'HMAC signing key (default: {VALID_KEY})')
    parser.add_argument('--verbose', '-v', action='store_true', help='Enable verbose output')
    
    args = parser.parse_args()
    
    try:
        start_time = datetime.now()
        print(f"Starting HMAC verification in {args.directory}")
        print(f"Using key: {args.key}")
        print(f"Started at: {start_time.isoformat()}")
        print("-" * 60)
        
        # Find all file pairs
        file_pairs = find_file_pairs(args.directory)
        
        if not file_pairs:
            print(f"No matching message/HMAC file pairs found in {args.directory}")
            sys.exit(1)
        
        print(f"Found {len(file_pairs)} file pairs")
        print("-" * 60)
        
        # Process each file pair
        results = []
        total_files = len(file_pairs)
        files_with_errors = 0
        files_with_mismatches = 0
        total_lines_processed = 0
        total_mismatched_lines = 0
        
        for message_file, hmac_file in file_pairs:
            result = process_file_pair(message_file, hmac_file, args.key)
            results.append(result)
            
            if 'error' in result:
                files_with_errors += 1
            else:
                total_lines_processed += result['total_lines']
                total_mismatched_lines += result['invalid_lines']
                
                if result['invalid_lines'] > 0:
                    files_with_mismatches += 1
        
        # Summary
        print("-" * 60)
        print("VERIFICATION SUMMARY")
        print("-" * 60)
        print(f"Total file pairs processed: {total_files}")
        print(f"Files with errors: {files_with_errors}")
        print(f"Files with mismatched HMACs: {files_with_mismatches}")
        print(f"Total lines processed: {total_lines_processed}")
        print(f"Total mismatched lines: {total_mismatched_lines}")
        
        end_time = datetime.now()
        duration = end_time - start_time
        print(f"Duration: {duration.total_seconds():.2f} seconds")
        
        # List files with mismatches
        if files_with_mismatches > 0:
            print("\nFiles with mismatched HMACs:")
            for result in results:
                if 'invalid_lines' in result and result['invalid_lines'] > 0:
                    print(f"- {os.path.basename(result['message_file'])}: {result['invalid_lines']} mismatched lines")
                    
                    # Show example of first mismatched entry
                    if args.verbose and result['mismatched_entries']:
                        first_mismatch = result['mismatched_entries'][0]
                        print(f"  Example (line {first_mismatch['line']}):")
                        print(f"  Message: {first_mismatch['message'][:50]}...")
                        print(f"  Provided HMAC: {first_mismatch['provided_signature']}")
                        print(f"  Calculated HMAC: {first_mismatch['calculated_signature']}")
                        print()
        
    except Exception as e:
        print(f"Error: {e}")
        import traceback
        traceback.print_exc()
        sys.exit(1)

if __name__ == "__main__":
    main()

HMAC Integrity Checker

#!/usr/bin/env python3
"""
DNS Subdomain Batch Integrity Checker

This script processes multiple message/HMAC file pairs in a directory, following the pattern:
message_#.txt and message_#.hmac

It automatically detects and verifies all matching pairs in the specified directory,
generating a comprehensive report of integrity issues across all files.

Usage:
    python dns_batch_integrity.py --directory <logs_directory> --output <output_dir>
"""

import hmac
import hashlib
import sys
import os
import re
import argparse
import json
import glob
from datetime import datetime
from typing import Dict, List, Any, Tuple, Set

# The valid signing key
VALID_KEY = 'ciCloud-API-20240315-4f7b9c'

class DNSSubdomainBatchChecker:
    def __init__(self, key: str = VALID_KEY):
        """
        Initialize the DNS Subdomain Integrity Checker.
        
        Args:
            key: The HMAC signing key
        """
        self.key = key
        
        # Initialize common DNS patterns to check for tampering
        self.common_subdomains = {
            'www', 'mail', 'api', 'admin', 'portal', 'test', 'dev', 'staging', 
            'secure', 'vpn', 'internal', 'mx', 'smtp', 'pop', 'imap', 'webmail',
            'remote', 'cdn', 'dns', 'ns1', 'ns2', 'ldap', 'db', 'mysql', 'ftp'
        }
        
        # Suspicious TLDs often used in attacks
        self.suspicious_tlds = {
            'xyz', 'top', 'club', 'cyou', 'icu', 'rest', 'space', 'casa',
            'monster', 'bar', 'gq', 'tk', 'ml', 'cf', 'ga'
        }
        
        # Common character substitutions used in spoofing
        self.char_substitutions = {
            '0': 'o', 'o': '0',
            '1': 'l', 'l': '1', 'i': '1',
            '5': 's', 's': '5',
            '3': 'e', 'e': '3',
            '4': 'a', 'a': '4',
            '6': 'g', 'g': '6',
            '7': 't', 't': '7',
            '8': 'b', 'b': '8'
        }
    
    def calculate_hmac(self, message: str) -> str:
        """
        Calculate HMAC signature for a message.
        
        Args:
            message: The message to sign
            
        Returns:
            The HMAC signature (hex encoded)
        """
        key_bytes = self.key.encode('utf-8')
        message_bytes = message.encode('utf-8')
        signature = hmac.new(key_bytes, message_bytes, hashlib.sha256)
        return signature.hexdigest()
    
    def verify_hmac(self, message: str, signature: str) -> bool:
        """
        Verify if a message's HMAC signature is valid.
        
        Args:
            message: The message to verify
            signature: The provided HMAC signature
            
        Returns:
            True if signature is valid, False otherwise
        """
        calculated_signature = self.calculate_hmac(message)
        # Use constant-time comparison to prevent timing attacks
        return hmac.compare_digest(calculated_signature, signature)
    
    def read_file(self, file_path: str) -> List[str]:
        """
        Read a file and return its lines.
        
        Args:
            file_path: Path to the file
            
        Returns:
            List of lines from the file
        """
        with open(file_path, 'r') as f:
            return [line.rstrip() for line in f.readlines()]
    
    def find_file_pairs(self, directory: str) -> List[Tuple[str, str]]:
        """
        Find matching message/HMAC file pairs in the directory.
        
        Args:
            directory: Directory to search for files
            
        Returns:
            List of tuples (message_file_path, hmac_file_path)
        """
        file_pairs = []
        
        # Find all message_*.txt files
        message_files = glob.glob(os.path.join(directory, "message_*.txt"))
        
        for message_file in message_files:
            # Extract the number part
            match = re.search(r'message_(\d+)\.txt$', message_file)
            if match:
                number = match.group(1)
                hmac_file = os.path.join(directory, f"message_{number}.hmac")
                
                # Check if the corresponding HMAC file exists
                if os.path.exists(hmac_file):
                    file_pairs.append((message_file, hmac_file))
        
        return file_pairs
    
    def extract_domain_info(self, log_entry: str) -> Dict[str, Any]:
        """
        Extract domain and subdomain information from a log entry.
        
        Args:
            log_entry: A log entry string
            
        Returns:
            Dictionary with extracted domain information
        """
        domain_info = {
            'has_domain': False,
            'domain': '',
            'subdomain': '',
            'tld': ''
        }
        
        # Try to find domain patterns in the log entry
        # This regex looks for domain.tld or subdomain.domain.tld patterns
        domain_matches = re.findall(r'([a-zA-Z0-9][-a-zA-Z0-9]*(\.[a-zA-Z0-9][-a-zA-Z0-9]*)+)', log_entry)
        
        if domain_matches:
            domain_info['has_domain'] = True
            full_domain = domain_matches[0][0]
            domain_info['domain'] = full_domain
            
            # Split by dots to extract subdomain and TLD
            parts = full_domain.split('.')
            
            if len(parts) >= 2:
                domain_info['tld'] = parts[-1].lower()
                
                if len(parts) > 2:
                    domain_info['subdomain'] = '.'.join(parts[:-2])
        
        return domain_info
    
    def detect_tampering(self, log_entry: str) -> Dict[str, Any]:
        """
        Detect possible tampering in a DNS log entry.
        
        Args:
            log_entry: A log entry string
            
        Returns:
            Dictionary with tampering analysis
        """
        analysis = {
            'is_suspicious': False,
            'tampering_patterns': set(),
            'possible_original': '',
            'risk_level': 'low',
            'reasons': []
        }
        
        # Extract any domain information from the log entry
        domain_info = self.extract_domain_info(log_entry)
        
        if domain_info['has_domain']:
            # Check for suspicious TLDs
            if domain_info['tld'] in self.suspicious_tlds:
                analysis['is_suspicious'] = True
                analysis['tampering_patterns'].add('suspicious_tld')
                analysis['risk_level'] = 'medium'
                analysis['reasons'].append(f"Suspicious TLD found: .{domain_info['tld']}")
            
            # Check for subdomain issues
            if domain_info['subdomain']:
                subdomain = domain_info['subdomain']
                
                # Check for character substitutions
                for char in subdomain:
                    if char in self.char_substitutions:
                        analysis['is_suspicious'] = True
                        analysis['tampering_patterns'].add('character_substitution')
                        analysis['risk_level'] = 'high'
                        analysis['reasons'].append(f"Possible character substitution: '{char}' might be '{self.char_substitutions[char]}'")
                        
                        # Generate a possible original by replacing the character
                        possible_original = log_entry.replace(subdomain, 
                                                             subdomain.replace(char, self.char_substitutions[char]))
                        analysis['possible_original'] = possible_original
                
                # Check for similar but different subdomains
                for common_sub in self.common_subdomains:
                    if subdomain != common_sub and self.levenshtein_distance(subdomain, common_sub) <= 2:
                        analysis['is_suspicious'] = True
                        analysis['tampering_patterns'].add('similar_subdomain')
                        analysis['risk_level'] = 'high'
                        analysis['reasons'].append(f"Subdomain '{subdomain}' is suspiciously similar to common subdomain '{common_sub}'")
                        
                        # Generate a possible original version
                        possible_original = log_entry.replace(subdomain, common_sub)
                        analysis['possible_original'] = possible_original
                
                # Check for unusually long subdomains (potential data exfiltration)
                if len(subdomain) > 30:
                    analysis['is_suspicious'] = True
                    analysis['tampering_patterns'].add('exfiltration_subdomain')
                    analysis['risk_level'] = 'high'
                    analysis['reasons'].append(f"Unusually long subdomain (length: {len(subdomain)}) may indicate data exfiltration")
        
        # Check for IP address patterns
        ip_matches = re.findall(r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b', log_entry)
        if ip_matches:
            # Check for suspicious IP ranges
            for ip in ip_matches:
                octets = [int(octet) for octet in ip.split('.')]
                
                # Check for loopback or private IP misuse
                if octets[0] == 127 or (octets[0] == 10) or \
                   (octets[0] == 172 and 16 <= octets[1] <= 31) or \
                   (octets[0] == 192 and octets[1] == 168):
                    analysis['is_suspicious'] = True
                    analysis['tampering_patterns'].add('internal_ip_exposure')
                    analysis['risk_level'] = 'critical'
                    analysis['reasons'].append(f"Internal IP address exposed: {ip}")
        
        # Check for DNS record types and modifications
        record_types = ['A', 'AAAA', 'MX', 'CNAME', 'TXT', 'NS', 'SOA', 'SRV', 'PTR']
        for record_type in record_types:
            # Look for record type followed by manipulation indicators
            pattern = r'\b' + record_type + r'\s+(?:changed|modified|updated|deleted|removed|added)\b'
            if re.search(pattern, log_entry, re.IGNORECASE):
                analysis['is_suspicious'] = True
                analysis['tampering_patterns'].add('dns_record_modification')
                analysis['risk_level'] = 'high'
                analysis['reasons'].append(f"DNS {record_type} record modification detected")
        
        # Look for DNS amplification or reflection attack patterns
        if re.search(r'\b(?:amplification|reflection|flood|ddos)\b', log_entry, re.IGNORECASE) and domain_info['has_domain']:
            analysis['is_suspicious'] = True
            analysis['tampering_patterns'].add('dns_amplification')
            analysis['risk_level'] = 'critical'
            analysis['reasons'].append(f"Possible DNS amplification attack signature")
        
        # Update risk level based on number of patterns
        if len(analysis['tampering_patterns']) >= 3:
            analysis['risk_level'] = 'critical'
        elif len(analysis['tampering_patterns']) == 2:
            analysis['risk_level'] = 'high' if analysis['risk_level'] != 'critical' else 'critical'
        
        return analysis
    
    @staticmethod
    def levenshtein_distance(s1: str, s2: str) -> int:
        """
        Calculate the Levenshtein distance between two strings.
        
        Args:
            s1: First string
            s2: Second string
            
        Returns:
            The Levenshtein distance
        """
        if len(s1) < len(s2):
            return DNSSubdomainBatchChecker.levenshtein_distance(s2, s1)
        
        if len(s2) == 0:
            return len(s1)
        
        previous_row = range(len(s2) + 1)
        for i, c1 in enumerate(s1):
            current_row = [i + 1]
            for j, c2 in enumerate(s2):
                insertions = previous_row[j + 1] + 1
                deletions = current_row[j] + 1
                substitutions = previous_row[j] + (c1 != c2)
                current_row.append(min(insertions, deletions, substitutions))
            previous_row = current_row
        
        return previous_row[-1]
    
    def process_file_pair(self, message_file: str, hmac_file: str) -> Dict[str, Any]:
        """
        Process a single message/HMAC file pair.
        
        Args:
            message_file: Path to the message file
            hmac_file: Path to the HMAC file
            
        Returns:
            Dictionary with processing results
        """
        # Extract file number for identification
        match = re.search(r'message_(\d+)\.txt$', message_file)
        file_id = match.group(1) if match else os.path.basename(message_file)
        
        # Read files
        try:
            message_content = self.read_file(message_file)
            hmac_content = self.read_file(hmac_file)
            
            # Verify each line
            results = {
                'file_id': file_id,
                'message_file': message_file,
                'hmac_file': hmac_file,
                'total_lines': min(len(message_content), len(hmac_content)),
                'valid_lines': 0,
                'invalid_lines': 0,
                'suspicious_lines': 0,
                'invalid_entries': [],
                'tampering_summary': {
                    'patterns': {},
                    'risk_levels': {
                        'low': 0,
                        'medium': 0,
                        'high': 0,
                        'critical': 0
                    }
                }
            }
            
            # Process lines
            for i in range(min(len(message_content), len(hmac_content))):
                message = message_content[i]
                signature = hmac_content[i]
                
                # Skip empty lines
                if not message or not signature:
                    continue
                
                # Verify HMAC
                is_valid = self.verify_hmac(message, signature)
                
                if is_valid:
                    results['valid_lines'] += 1
                else:
                    results['invalid_lines'] += 1
                    
                    # Generate correct signature
                    correct_signature = self.calculate_hmac(message)
                    
                    # Check for tampering
                    tampering_analysis = self.detect_tampering(message)
                    
                    invalid_entry = {
                        'line_number': i + 1,
                        'message': message,
                        'provided_signature': signature,
                        'correct_signature': correct_signature,
                        'tampering_analysis': tampering_analysis
                    }
                    
                    results['invalid_entries'].append(invalid_entry)
                    
                    # Update tampering statistics
                    if tampering_analysis['is_suspicious']:
                        results['suspicious_lines'] += 1
                        results['tampering_summary']['risk_levels'][tampering_analysis['risk_level']] += 1
                        
                        # Count pattern occurrences
                        for pattern in tampering_analysis['tampering_patterns']:
                            if pattern not in results['tampering_summary']['patterns']:
                                results['tampering_summary']['patterns'][pattern] = 0
                            results['tampering_summary']['patterns'][pattern] += 1
            
            return results
            
        except Exception as e:
            print(f"Error processing file pair ({message_file}, {hmac_file}): {e}")
            return {
                'file_id': file_id,
                'message_file': message_file,
                'hmac_file': hmac_file,
                'error': str(e)
            }
    
    def process_directory(self, directory: str) -> Dict[str, Any]:
        """
        Process all matching file pairs in a directory.
        
        Args:
            directory: Directory containing message_*.txt and message_*.hmac files
            
        Returns:
            Dictionary with processing results for all files
        """
        # Find all matching file pairs
        file_pairs = self.find_file_pairs(directory)
        
        if not file_pairs:
            print(f"No matching message/HMAC file pairs found in {directory}")
            return {'error': 'No matching file pairs found'}
        
        # Process each file pair
        results = {
            'directory': directory,
            'total_files': len(file_pairs),
            'processed_files': 0,
            'files_with_errors': 0,
            'total_lines_processed': 0,
            'total_invalid_lines': 0,
            'total_suspicious_lines': 0,
            'file_results': [],
            'overall_tampering_summary': {
                'patterns': {},
                'risk_levels': {
                    'low': 0,
                    'medium': 0,
                    'high': 0,
                    'critical': 0
                }
            }
        }
        
        for message_file, hmac_file in file_pairs:
            print(f"Processing file pair: {os.path.basename(message_file)} and {os.path.basename(hmac_file)}")
            
            # Process file pair
            file_result = self.process_file_pair(message_file, hmac_file)
            results['file_results'].append(file_result)
            
            # Update overall statistics
            if 'error' in file_result:
                results['files_with_errors'] += 1
            else:
                results['processed_files'] += 1
                results['total_lines_processed'] += file_result['total_lines']
                results['total_invalid_lines'] += file_result['invalid_lines']
                results['total_suspicious_lines'] += file_result['suspicious_lines']
                
                # Aggregate tampering patterns
                for pattern, count in file_result['tampering_summary']['patterns'].items():
                    if pattern not in results['overall_tampering_summary']['patterns']:
                        results['overall_tampering_summary']['patterns'][pattern] = 0
                    results['overall_tampering_summary']['patterns'][pattern] += count
                
                # Aggregate risk levels
                for level in ['low', 'medium', 'high', 'critical']:
                    results['overall_tampering_summary']['risk_levels'][level] += \
                        file_result['tampering_summary']['risk_levels'][level]
        
        return results
    
    def save_corrected_hmac_files(self, results: Dict[str, Any], output_dir: str) -> None:
        """
        Save corrected HMAC files for each processed file pair.
        
        Args:
            results: Overall processing results
            output_dir: Output directory
        """
        corrected_dir = os.path.join(output_dir, 'corrected_hmac_files')
        os.makedirs(corrected_dir, exist_ok=True)
        
        for file_result in results['file_results']:
            if 'error' in file_result:
                continue
            
            # Get original file content
            message_file = file_result['message_file']
            hmac_file = file_result['hmac_file']
            
            try:
                # Read original message file
                message_content = self.read_file(message_file)
                
                # Create corrected HMAC file
                corrected_hmac_path = os.path.join(corrected_dir, os.path.basename(hmac_file))
                
                with open(corrected_hmac_path, 'w') as f:
                    for message in message_content:
                        if message:  # Skip empty lines
                            correct_signature = self.calculate_hmac(message)
                            f.write(f"{correct_signature}\n")
                
                print(f"Created corrected HMAC file: {corrected_hmac_path}")
                
            except Exception as e:
                print(f"Error creating corrected HMAC file for {os.path.basename(hmac_file)}: {e}")
    
    def save_results(self, results: Dict[str, Any], output_dir: str) -> None:
        """
        Save processing results to output files.
        
        Args:
            results: Overall processing results
            output_dir: Output directory
        """
        os.makedirs(output_dir, exist_ok=True)
        
        # Save overall JSON results
        with open(os.path.join(output_dir, 'batch_results.json'), 'w') as f:
            # Convert sets to lists for JSON serialization
            serializable_results = json.dumps(results, indent=2, default=lambda x: list(x) if isinstance(x, set) else x)
            f.write(serializable_results)
        
        # Save detailed report
        with open(os.path.join(output_dir, 'integrity_report.txt'), 'w') as f:
            f.write(f"DNS Subdomain Batch Integrity Report\n")
            f.write(f"==================================\n\n")
            f.write(f"Generated: {datetime.now().isoformat()}\n\n")
            
            f.write(f"Overall Summary:\n")
            f.write(f"---------------\n")
            f.write(f"Directory processed: {results['directory']}\n")
            f.write(f"Total file pairs: {results['total_files']}\n")
            f.write(f"Successfully processed: {results['processed_files']}\n")
            f.write(f"Files with errors: {results['files_with_errors']}\n")
            f.write(f"Total log lines processed: {results['total_lines_processed']}\n")
            f.write(f"Total invalid lines: {results['total_invalid_lines']}\n")
            f.write(f"Total suspicious lines: {results['total_suspicious_lines']}\n\n")
            
            # Risk level summary
            if results['total_suspicious_lines'] > 0:
                f.write(f"Risk Level Distribution:\n")
                for level in ['low', 'medium', 'high', 'critical']:
                    count = results['overall_tampering_summary']['risk_levels'][level]
                    indicator = '!' * (1 if level == 'low' else 2 if level == 'medium' else 3 if level == 'high' else 4)
                    f.write(f"  {indicator} {level.upper()}: {count}\n")
                
                f.write(f"\nTampering Patterns Detected:\n")
                for pattern, count in sorted(results['overall_tampering_summary']['patterns'].items(), 
                                           key=lambda x: x[1], reverse=True):
                    f.write(f"  - {pattern}: {count}\n")
            
            # Per-file summary
            f.write(f"\nPer-File Summary:\n")
            f.write(f"----------------\n")
            for file_result in results['file_results']:
                if 'error' in file_result:
                    f.write(f"File {file_result['file_id']}: ERROR - {file_result['error']}\n")
                else:
                    integrity_status = "COMPROMISED" if file_result['invalid_lines'] > 0 else "INTACT"
                    risk_level = "HIGH RISK" if (file_result['tampering_summary']['risk_levels']['high'] > 0 or 
                                              file_result['tampering_summary']['risk_levels']['critical'] > 0) else \
                                 "MEDIUM RISK" if file_result['tampering_summary']['risk_levels']['medium'] > 0 else \
                                 "LOW RISK" if file_result['suspicious_lines'] > 0 else "SAFE"
                    
                    f.write(f"File {file_result['file_id']}: {integrity_status} - {risk_level}\n")
                    f.write(f"  Message file: {os.path.basename(file_result['message_file'])}\n")
                    f.write(f"  Lines: {file_result['total_lines']} total, {file_result['invalid_lines']} invalid, {file_result['suspicious_lines']} suspicious\n")
                    
                    if file_result['suspicious_lines'] > 0:
                        # Show the first few suspicious entries
                        suspicious_entries = [entry for entry in file_result['invalid_entries'] 
                                           if entry['tampering_analysis']['is_suspicious']]
                        
                        f.write(f"  Top suspicious entries ({min(3, len(suspicious_entries))} of {len(suspicious_entries)}):\n")
                        for i, entry in enumerate(suspicious_entries[:3]):
                            f.write(f"    Line {entry['line_number']}: {entry['message'][:50]}{'...' if len(entry['message']) > 50 else ''}\n")
                            f.write(f"      Risk: {entry['tampering_analysis']['risk_level'].upper()}\n")
                            f.write(f"      Patterns: {', '.join(entry['tampering_analysis']['tampering_patterns'])}\n")
                    
                    f.write("\n")
        
        # Create a file with high-risk entries for immediate attention
        high_risk_entries = []
        for file_result in results['file_results']:
            if 'error' in file_result:
                continue
                
            file_id = file_result['file_id']
            for entry in file_result['invalid_entries']:
                if entry['tampering_analysis']['is_suspicious'] and \
                   entry['tampering_analysis']['risk_level'] in ['high', 'critical']:
                    entry_copy = entry.copy()
                    entry_copy['file_id'] = file_id
                    high_risk_entries.append(entry_copy)
        
        if high_risk_entries:
            with open(os.path.join(output_dir, 'high_risk_entries.txt'), 'w') as f:
                f.write(f"HIGH RISK DNS LOG ENTRIES - IMMEDIATE ATTENTION REQUIRED\n")
                f.write(f"======================================================\n\n")
                f.write(f"Generated: {datetime.now().isoformat()}\n")
                f.write(f"Total high-risk entries: {len(high_risk_entries)}\n\n")
                
                # Sort by risk level (critical first)
                high_risk_entries.sort(key=lambda x: 0 if x['tampering_analysis']['risk_level'] == 'critical' else 1)
                
                for entry in high_risk_entries:
                    f.write(f"File {entry['file_id']}, Line {entry['line_number']} - [{entry['tampering_analysis']['risk_level'].upper()}]\n")
                    f.write(f"  Message: {entry['message']}\n")
                    f.write(f"  Provided signature: {entry['provided_signature']}\n")
                    f.write(f"  Correct signature: {entry['correct_signature']}\n")
                    f.write(f"  Tampering patterns: {', '.join(entry['tampering_analysis']['tampering_patterns'])}\n")
                    f.write(f"  Reasons:\n")
                    for reason in entry['tampering_analysis']['reasons']:
                        f.write(f"    - {reason}\n")
                    
                    if entry['tampering_analysis']['possible_original']:
                        f.write(f"  Possible original: {entry['tampering_analysis']['possible_original']}\n")
                    
                    f.write("\n")
        
        # Save corrected HMAC files
        self.save_corrected_hmac_files(results, output_dir)

def main():
    """Main entry point for the script."""
    parser = argparse.ArgumentParser(description='DNS Subdomain Batch Integrity Checker')
    parser.add_argument('--directory', '-d', required=True, help='Directory containing log files')
    parser.add_argument('--output', '-o', default='batch_output', help='Output directory (default: batch_output)')
    parser.add_argument('--key', '-k', default=VALID_KEY, help=f'HMAC signing key (default: {VALID_KEY})')
    
    args = parser.parse_args()
    
    checker = DNSSubdomainBatchChecker(key=args.key)
    
    try:
        start_time = datetime.now()
        print(f"Starting batch processing of DNS log files in {args.directory}")
        print(f"Started at: {start_time.isoformat()}")
        
        results = checker.process_directory(args.directory)
        
        if 'error' in results:
            print(f"Error: {results['error']}")
            sys.exit(1)
        
        # Save results
        checker.save_results(results, args.output)
        
        end_time = datetime.now()
        duration = end_time - start_time
        
        print(f"\nBatch processing completed!")
        print(f"Duration: {duration.total_seconds():.2f} seconds")
        print(f"Files processed: {results['processed_files']} of {results['total_files']}")
        print(f"Total lines checked: {results['total_lines_processed']}")
        print(f"Invalid lines detected: {results['total_invalid_lines']}")
        print(f"Suspicious lines detected: {results['total_suspicious_lines']}")
        print(f"Results saved to: {args.output}")
        
        if results['total_suspicious_lines'] > 0:
            print(f"\n⚠️ WARNING: {results['total_suspicious_lines']} suspicious log entries detected!")
            high_risk = results['overall_tampering_summary']['risk_levels']['high'] + \
                       results['overall_tampering_summary']['risk_levels']['critical']
            
            if high_risk > 0:
                print(f"❗ CRITICAL: {high_risk} high or critical risk entries found!")
                print(f"Check {os.path.join(args.output, 'high_risk_entries.txt')} for details")
        
    except Exception as e:
        print(f"Error: {e}")
        import traceback
        traceback.print_exc()
        sys.exit(1)

if __name__ == "__main__":
    main()

Liber8tion Cracker

#!/usr/bin/env python3

import os
import argparse
import subprocess
import sys
import tempfile
import shutil

def run_hashcat(cmd, description):
    """Run a hashcat command with proper logging"""
    print(f"[+] {description}")
    print(f"[+] Command: {' '.join(cmd)}")
    try:
        subprocess.run(cmd, check=False)
    except Exception as e:
        print(f"[-] Error running hashcat: {e}")

def main():
    parser = argparse.ArgumentParser(description='Crack hashes using the Liber8ion Passphrase Standard')
    parser.add_argument('--hash-file', required=True, help='File containing hashes to crack')
    parser.add_argument('--hash-type', required=True, help='Hashcat hash type (e.g. 0 for MD5, 100 for SHA1)')
    parser.add_argument('--wordlist', default='/usr/share/wordlists/rockyou.txt', help='Dictionary wordlist')
    parser.add_argument('--output', default='cracked_passwords.txt', help='Output file for cracked passwords')
    args = parser.parse_args()

    # Create temporary directory
    temp_dir = tempfile.mkdtemp(prefix="liber8ion_")
    print(f"[+] Using temporary directory: {temp_dir}")

    # Path for the potfile
    potfile = os.path.join(temp_dir, "liber8ion.potfile")
    
    # Create a smaller dictionary with lowercase words
    print(f"[+] Creating optimized wordlist from {args.wordlist}...")
    lowercase_dict = os.path.join(temp_dir, "lowercase_dict.txt")
    uppercase_dict = os.path.join(temp_dir, "uppercase_dict.txt")
    
    try:
        # Take a reasonable subset to avoid memory issues
        with open(args.wordlist, 'r', encoding='latin-1', errors='ignore') as infile, \
             open(lowercase_dict, 'w') as lower_out, \
             open(uppercase_dict, 'w') as upper_out:
            for i, line in enumerate(infile):
                if i >= 100000:  # Limit to first 100k words
                    break
                word = line.strip()
                if word and len(word) >= 3 and len(word) <= 10:  # Filter reasonable word lengths
                    lower_out.write(f"{word.lower()}\n")
                    upper_out.write(f"{word.upper()}\n")
    except Exception as e:
        print(f"[-] Error processing wordlist: {e}")
        sys.exit(1)

    # Create file with digits
    digits_dict = os.path.join(temp_dir, "digits.txt")
    with open(digits_dict, 'w') as f:
        for i in range(10):
            f.write(f"{i}\n")
    
    # Create special character dictionaries
    hyphen_dict = os.path.join(temp_dir, "hyphen.txt")
    with open(hyphen_dict, 'w') as f:
        f.write("-\n")
    
    special_chars_dict = os.path.join(temp_dir, "special_chars.txt")
    with open(special_chars_dict, 'w') as f:
        for c in "!@#$%^&*()-_=+[]{}|;:,.<>?/":
            f.write(f"{c}\n")
    
    # Create liber8 file
    liber8_dict = os.path.join(temp_dir, "liber8.txt")
    with open(liber8_dict, 'w') as f:
        f.write("liber8\n")
    
    # Generate specific pattern dictionaries for each type
    print("[+] Generating pattern dictionaries...")
    
    # For Type 1 (All lowercase, hyphen separator)
    type1_patterns = os.path.join(temp_dir, "type1_patterns.txt")
    try:
        with open(lowercase_dict, 'r') as word_file, open(type1_patterns, 'w') as out_file:
            words = [w.strip() for w in word_file.readlines()]
            for word in words[:1000]:  # Limit to first 1000 words for efficient processing
                out_file.write(f"{word}-liber8-\n")
    except Exception as e:
        print(f"[-] Error generating Type 1 patterns: {e}")
    
    # Generate all types of patterns with special characters
    # For Types 2, 3, and 4
    special_chars = "!@#$%^&*()-_=+[]{}|;:,.<>?/"
    
    # Type 2 (All lowercase, any special char)
    type2_patterns = os.path.join(temp_dir, "type2_patterns.txt")
    try:
        with open(lowercase_dict, 'r') as word_file, open(type2_patterns, 'w') as out_file:
            words = [w.strip() for w in word_file.readlines()]
            for word in words[:500]:  # Limit to 500 words
                for special_char in special_chars:
                    out_file.write(f"{word}{special_char}liber8{special_char}\n")
    except Exception as e:
        print(f"[-] Error generating Type 2 patterns: {e}")
    
    # Type 3 lower patterns (lowercase first word, any special char)
    type3_lower_patterns = os.path.join(temp_dir, "type3_lower_patterns.txt")
    try:
        with open(lowercase_dict, 'r') as word_file, open(type3_lower_patterns, 'w') as out_file:
            words = [w.strip() for w in word_file.readlines()]
            for word in words[:500]:  # Limit to 500 words
                for special_char in special_chars:
                    out_file.write(f"{word}{special_char}liber8{special_char}\n")
    except Exception as e:
        print(f"[-] Error generating Type 3 lower patterns: {e}")
    
    # Type 3 upper patterns (uppercase first word, any special char)
    type3_upper_patterns = os.path.join(temp_dir, "type3_upper_patterns.txt")
    try:
        with open(uppercase_dict, 'r') as word_file, open(type3_upper_patterns, 'w') as out_file:
            words = [w.strip() for w in word_file.readlines()]
            for word in words[:500]:  # Limit to 500 words
                for special_char in special_chars:
                    out_file.write(f"{word}{special_char}liber8{special_char}\n")
    except Exception as e:
        print(f"[-] Error generating Type 3 upper patterns: {e}")
    
    # Type 4 digit patterns - with digits at end of first word
    type4_first_digit_patterns = os.path.join(temp_dir, "type4_first_digit_patterns.txt")
    try:
        with open(lowercase_dict, 'r') as word_file, open(type4_first_digit_patterns, 'w') as out_file:
            words = [w.strip() for w in word_file.readlines()]
            for word in words[:300]:  # Limit words
                for digit in range(10):
                    for special_char in special_chars[:5]:  # Limit special chars
                        out_file.write(f"{word}{digit}{special_char}liber8{special_char}\n")
    except Exception as e:
        print(f"[-] Error generating Type 4 first word digit patterns: {e}")
    
    print("\n[+] Starting hash cracking with Liber8ion Passphrase Standard...")

    # Type 1: word1-liber8-word2 (all lowercase, hyphen separators)
    print("\n[+] Cracking Type 1 passphrases...")
    cmd = [
        "hashcat", "-a0", f"-m{args.hash_type}", args.hash_file,
        type1_patterns, lowercase_dict,
        "--potfile-path", potfile
    ]
    run_hashcat(cmd, "Trying Type 1 patterns: word1-liber8-word2 (all lowercase)")

    # Type 2: word1<special>liber8<special>word2 (all lowercase)
    print("\n[+] Cracking Type 2 passphrases...")
    cmd = [
        "hashcat", "-a0", f"-m{args.hash_type}", args.hash_file,
        type2_patterns, lowercase_dict,
        "--potfile-path", potfile
    ]
    run_hashcat(cmd, "Trying Type 2 patterns: word1<special>liber8<special>word2 (all lowercase)")

    # Type 3: Each word all lowercase OR all uppercase
    print("\n[+] Cracking Type 3 passphrases - lowercase first word...")
    cmd = [
        "hashcat", "-a0", f"-m{args.hash_type}", args.hash_file,
        type3_lower_patterns, lowercase_dict,
        "--potfile-path", potfile
    ]
    run_hashcat(cmd, "Trying Type 3 patterns: lower<special>liber8<special>lower")
    
    cmd = [
        "hashcat", "-a0", f"-m{args.hash_type}", args.hash_file,
        type3_lower_patterns, uppercase_dict,
        "--potfile-path", potfile
    ]
    run_hashcat(cmd, "Trying Type 3 patterns: lower<special>liber8<special>UPPER")
    
    print("\n[+] Cracking Type 3 passphrases - uppercase first word...")
    cmd = [
        "hashcat", "-a0", f"-m{args.hash_type}", args.hash_file,
        type3_upper_patterns, lowercase_dict,
        "--potfile-path", potfile
    ]
    run_hashcat(cmd, "Trying Type 3 patterns: UPPER<special>liber8<special>lower")
    
    cmd = [
        "hashcat", "-a0", f"-m{args.hash_type}", args.hash_file,
        type3_upper_patterns, uppercase_dict,
        "--potfile-path", potfile
    ]
    run_hashcat(cmd, "Trying Type 3 patterns: UPPER<special>liber8<special>UPPER")

    # Type 4: One word with digit appended
    print("\n[+] Cracking Type 4 passphrases - first word with digit...")
    cmd = [
        "hashcat", "-a0", f"-m{args.hash_type}", args.hash_file,
        type4_first_digit_patterns, lowercase_dict,
        "--potfile-path", potfile
    ]
    run_hashcat(cmd, "Trying Type 4 patterns: word1+digit<special>liber8<special>word2")
    
    cmd = [
        "hashcat", "-a0", f"-m{args.hash_type}", args.hash_file,
        type4_first_digit_patterns, uppercase_dict,
        "--potfile-path", potfile
    ]
    run_hashcat(cmd, "Trying Type 4 patterns: word1+digit<special>liber8<special>WORD2")
    
    # Type 4 with second word with digit
    # For this, we'll use the Type 3 patterns but with a rule to append a digit
    print("\n[+] Cracking Type 4 passphrases - second word with digit...")
    
    # Create a digit append rule file
    append_digit_rule = os.path.join(temp_dir, "append_digit.rule")
    with open(append_digit_rule, 'w') as f:
        for i in range(10):
            f.write(f"${i}\n")
    
    # For lowercase second word with digit
    cmd = [
        "hashcat", "-a0", f"-m{args.hash_type}", args.hash_file,
        type3_lower_patterns, lowercase_dict,
        "-r", append_digit_rule,
        "--potfile-path", potfile
    ]
    run_hashcat(cmd, "Trying Type 4 patterns: word1<special>liber8<special>word2+digit")
    
    # For uppercase second word with digit
    cmd = [
        "hashcat", "-a0", f"-m{args.hash_type}", args.hash_file,
        type3_lower_patterns, uppercase_dict,
        "-r", append_digit_rule,
        "--potfile-path", potfile
    ]
    run_hashcat(cmd, "Trying Type 4 patterns: word1<special>liber8<special>WORD2+digit")
    
    # Same for uppercase first words
    cmd = [
        "hashcat", "-a0", f"-m{args.hash_type}", args.hash_file,
        type3_upper_patterns, lowercase_dict,
        "-r", append_digit_rule,
        "--potfile-path", potfile
    ]
    run_hashcat(cmd, "Trying Type 4 patterns: WORD1<special>liber8<special>word2+digit")
    
    cmd = [
        "hashcat", "-a0", f"-m{args.hash_type}", args.hash_file,
        type3_upper_patterns, uppercase_dict,
        "-r", append_digit_rule,
        "--potfile-path", potfile
    ]
    run_hashcat(cmd, "Trying Type 4 patterns: WORD1<special>liber8<special>WORD2+digit")

    # Process results to the output file
    print(f"\n[+] Processing results to {args.output}...")
    with open(potfile, 'r') as pot, open(args.output, 'w') as out:
        for line in pot:
            if ':' in line:
                hash_val, plaintext = line.strip().split(':', 1)
                out.write(f"{hash_val}:{plaintext}\n")
    
    print(f"\n[+] Cracking complete! Results saved to {args.output}")
    print(f"[+] To show your cracked passwords: cat {args.output}")

    # Ask if user wants to remove temp files
    response = input(f"\n[?] Remove temporary files in {temp_dir}? (y/n): ")
    if response.lower() == 'y':
        try:
            shutil.rmtree(temp_dir)
            print(f"[+] Temporary directory {temp_dir} removed")
        except Exception as e:
            print(f"[-] Error removing temporary directory: {e}")
    else:
        print(f"[+] Temporary files kept in {temp_dir}")

if __name__ == "__main__":
    main()

PDF to Hashcat

#!/usr/bin/env python

# Copyright (c) 2013 Shane Quigley, < shane at softwareontheside.info >

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

# modified to only output hash for hashcat by philsmd, 2015

import re
import sys
import os
from xml.dom import minidom

PY3 = sys.version_info[0] == 3

class PdfParser:
    def __init__(self, file_name):
        self.file_name = file_name
        f = open(file_name, 'rb')
        self.encrypted = f.read()
        f.close()
        self.process = True
        psr = re.compile(b'PDF-\d\.\d')
        try:
            self.pdf_spec = psr.findall(self.encrypted)[0]
        except IndexError:
            sys.stderr.write("%s is not a PDF file!\n" % file_name)
            self.process = False

    def parse(self):
        if not self.process:
            return

        try:
            trailer = self.get_trailer()
        except RuntimeError:
            e = sys.exc_info()[1]
            sys.stderr.write("%s : %s\n" % (self.file_name, str(e)))
            return
        # print >> sys.stderr, trailer
        object_id = self.get_object_id(b'Encrypt', trailer)
        # print >> sys.stderr, object_id
        if(len(object_id) == 0):
            raise RuntimeError("Could not find object id")
        encryption_dictionary = self.get_encryption_dictionary(object_id)
        # print >> sys.stderr, encryption_dictionary
        dr = re.compile(b'\d+')
        vr = re.compile(b'\/V \d')
        rr = re.compile(b'\/R \d')
        try:
            v = dr.findall(vr.findall(encryption_dictionary)[0])[0]
        except IndexError:
            raise RuntimeError("Could not find /V")
        r = dr.findall(rr.findall(encryption_dictionary)[0])[0]
        lr = re.compile(b'\/Length \d+')
        longest = 0
        # According to the docs:
        # Length : (Optional; PDF 1.4; only if V is 2 or 3). Default value: 40
        length = b'40'
        for le in lr.findall(encryption_dictionary):
            if(int(dr.findall(le)[0]) > longest):
                longest = int(dr.findall(le)[0])
                length = dr.findall(le)[0]
        pr = re.compile(b'\/P -?\d+')
        try:
            p = pr.findall(encryption_dictionary)[0]
        except IndexError:
                # print >> sys.stderr, "** dict:", encryption_dictionary
                raise RuntimeError("Could not find /P")
        pr = re.compile(b'-?\d+')
        p = pr.findall(p)[0]
        meta = '1' if self.is_meta_data_encrypted(encryption_dictionary) else '0'
        idr = re.compile(b'\/ID\s*\[\s*<\w+>\s*<\w+>\s*\]')
        try:
            i_d = idr.findall(trailer)[0] # id key word
        except IndexError:
            # some pdf files use () instead of <>
            idr = re.compile(b'\/ID\s*\[\s*\(\w+\)\s*\(\w+\)\s*\]')
            try:
                i_d = idr.findall(trailer)[0] # id key word
            except IndexError:
                # print >> sys.stderr, "** idr:", idr
                # print >> sys.stderr, "** trailer:", trailer
                raise RuntimeError("Could not find /ID tag")
                return
        idr = re.compile(b'<\w+>')
        try:
            i_d = idr.findall(trailer)[0]
        except IndexError:
            idr = re.compile(b'\(\w+\)')
            i_d = idr.findall(trailer)[0]
        i_d = i_d.replace(b'<',b'')
        i_d = i_d.replace(b'>',b'')
        i_d = i_d.lower()
        passwords = self.get_passwords_for_JtR(encryption_dictionary)
        output = '$pdf$'+v.decode('ascii')+'*'+r.decode('ascii')+'*'+length.decode('ascii')+'*'
        output += p.decode('ascii')+'*'+meta+'*'
        output += str(int(len(i_d)/2))+'*'+i_d.decode('ascii')+'*'+passwords
        sys.stdout.write("%s\n" % output.encode('UTF-8'))

    def get_passwords_for_JtR(self, encryption_dictionary):
        output = ""
        letters = [b"U", b"O"]
        if(b"1.7" in self.pdf_spec):
            letters = [b"U", b"O", b"UE", b"OE"]
        for let in letters:
            pr_str = b'\/' + let + b'\s*\([^)]+\)'
            pr = re.compile(pr_str)
            pas = pr.findall(encryption_dictionary)
            if(len(pas) > 0):
                pas = pr.findall(encryption_dictionary)[0]
                # because regexs in python suck <=== LOL
                while(pas[-2] == b'\\'):
                    pr_str += b'[^)]+\)'
                    pr = re.compile(pr_str)
                    # print >> sys.stderr, "pr_str:", pr_str
                    # print >> sys.stderr, encryption_dictionary
                    try:
                        pas = pr.findall(encryption_dictionary)[0]
                    except IndexError:
                        break
                output += self.get_password_from_byte_string(pas)+"*"
            else:
                pr = re.compile(let + b'\s*<\w+>')
                pas = pr.findall(encryption_dictionary)
                if not pas:
                    continue
                pas = pas[0]
                pr = re.compile(b'<\w+>')
                pas = pr.findall(pas)[0]
                pas = pas.replace(b"<",b"")
                pas = pas.replace(b">",b"")
                if PY3:
                    output += str(int(len(pas)/2))+'*'+str(pas.lower(),'ascii')+'*'
                else:
                    output += str(int(len(pas)/2))+'*'+pas.lower()+'*'
        return output[:-1]

    def is_meta_data_encrypted(self, encryption_dictionary):
        mr = re.compile(b'\/EncryptMetadata\s\w+')
        if(len(mr.findall(encryption_dictionary)) > 0):
            wr = re.compile(b'\w+')
            is_encrypted = wr.findall(mr.findall(encryption_dictionary)[0])[-1]
            if(is_encrypted == b"false"):
                return False
            else:
                return True
        else:
            return True

    def parse_meta_data(self, trailer):
        root_object_id = self.get_object_id(b'Root', trailer)
        root_object = self.get_pdf_object(root_object_id)
        object_id = self.get_object_id(b'Metadata', root_object)
        xmp_metadata_object = self.get_pdf_object(object_id)
        return self.get_xmp_values(xmp_metadata_object)

    def get_xmp_values(self, xmp_metadata_object):
        xmp_metadata_object = xmp_metadata_object.partition(b"stream")[2]
        xmp_metadata_object = xmp_metadata_object.partition(b"endstream")[0]
        try:
            xml_metadata = minidom.parseString(xmp_metadata_object)
        except:
            return ""
        values = []
        values.append(self.get_dc_value("title", xml_metadata))
        values.append(self.get_dc_value("creator", xml_metadata))
        values.append(self.get_dc_value("description", xml_metadata))
        values.append(self.get_dc_value("subject", xml_metadata))
        created_year = xml_metadata.getElementsByTagName("xmp:CreateDate")
        if(len(created_year) > 0):
            created_year = created_year[0].firstChild.data[0:4]
            values.append(str(created_year))
        return " ".join(values).replace(":", "")

    def get_dc_value(self, value, xml_metadata):
        output = xml_metadata.getElementsByTagName("dc:"+value)
        if(len(output) > 0):
            output = output[0]
            output = output.getElementsByTagName("rdf:li")[0]
            if(output.firstChild):
                output = output.firstChild.data
                return output
        return ""

    def get_encryption_dictionary(self, object_id):
        encryption_dictionary = self.get_pdf_object(object_id)
        for o in encryption_dictionary.split(b"endobj"):
            if(object_id+b" obj" in o):
                encryption_dictionary = o
        return encryption_dictionary

    def get_object_id(self, name , trailer):
        oir = re.compile(b'\/' + name + b'\s\d+\s\d\sR')
        try:
            object_id = oir.findall(trailer)[0]
        except IndexError:
            # print >> sys.stderr, " ** get_object_id: name \"", name, "\", trailer ", trailer
            return ""
        oir = re.compile(b'\d+ \d')
        object_id = oir.findall(object_id)[0]
        return object_id

    def get_pdf_object(self, object_id):
        output = object_id+b" obj" + \
            self.encrypted.partition(b"\r"+object_id+b" obj")[2]
        if(output == object_id+b" obj"):
            output = object_id+b" obj" + \
            self.encrypted.partition(b"\n"+object_id+b" obj")[2]
        output = output.partition(b"endobj")[0] + b"endobj"
        # print >> sys.stderr, output
        return output

    def get_trailer(self):
        trailer = self.get_data_between(b"trailer", b">>", b"/ID")
        if(trailer == b""):
            trailer = self.get_data_between(b"DecodeParms", b"stream", b"")
            if(trailer == ""):
                raise RuntimeError("Can't find trailer")
        if(trailer != "" and trailer.find(b"Encrypt") == -1):
            # print >> sys.stderr, trailer
            raise RuntimeError("File not encrypted")
        return trailer

    def get_data_between(self, s1, s2, tag):
        output = b""
        inside_first = False
        lines = re.split(b'\n|\r', self.encrypted)
        for line in lines:
            inside_first = inside_first or line.find(s1) != -1
            if(inside_first):
                output += line
                if(line.find(s2) != -1):
                    if(tag == b"" or output.find(tag) != -1):
                        break
                    else:
                        output = b""
                        inside_first = False
        return output

    def get_hex_byte(self, o_or_u, i):
        if PY3:
            return hex(o_or_u[i]).replace('0x', '')
        else:
            return hex(ord(o_or_u[i])).replace('0x', '')

    def get_password_from_byte_string(self, o_or_u):
        pas = ""
        escape_seq = False
        escapes = 0
        excluded_indexes = [0, 1, 2]
        #For UE & OE in 1.7 spec
        if not PY3:
            if(o_or_u[2] != '('):
                excluded_indexes.append(3)
        else:
            if(o_or_u[2] != 40):
                excluded_indexes.append(3)
        for i in range(len(o_or_u)):
            if(i not in excluded_indexes):
                if(len(self.get_hex_byte(o_or_u, i)) == 1 \
                   and o_or_u[i] != "\\"[0]):
                    pas += "0"  # need to be 2 digit hex numbers
                is_back_slash = True
                if not PY3:
                    is_back_slash = o_or_u[i] != "\\"[0]
                else:
                    is_back_slash = o_or_u[i] != 92
                if(is_back_slash or escape_seq):
                    if(escape_seq):
                        if not PY3:
                            esc = "\\"+o_or_u[i]
                        else:
                            esc = "\\"+chr(o_or_u[i])
                        esc = self.unescape(esc)
                        if(len(hex(ord(esc[0])).replace('0x', '')) == 1):
                            pas += "0"
                        pas += hex(ord(esc[0])).replace('0x', '')
                        escape_seq = False
                    else:
                        pas += self.get_hex_byte(o_or_u, i)
                else:
                    escape_seq = True
                    escapes += 1
        output = len(o_or_u)-(len(excluded_indexes)+1)-escapes
        return str(output)+'*'+pas[:-2]

    def unescape(self, esc):
        escape_seq_map = {'\\n':"\n", '\\s':"\s", '\\e':"\e",
                '\\r':"\r", '\\t':"\t", '\\v':"\v", '\\f':"\f",
                '\\b':"\b", '\\a':"\a", "\\)":")",
                "\\(":"(", "\\\\":"\\" }

        return escape_seq_map[esc]

if __name__ == "__main__":
    if len(sys.argv) < 2:
        sys.stderr.write("Usage: %s <PDF file(s)>\n" % \
                         os.path.basename(sys.argv[0]))
        sys.exit(-1)
    for j in range(1, len(sys.argv)):
        if not PY3:
            filename = sys.argv[j].decode('UTF-8')
        else:
            filename = sys.argv[j]
        # sys.stderr.write("Analyzing %s\n" % sys.argv[j].decode('UTF-8'))
        parser = PdfParser(filename)
        try:
            parser.parse()
        except RuntimeError:
            e = sys.exc_info()[1]
            sys.stderr.write("%s : %s\n" % (filename, str(e)))

PDF to John

#!/usr/bin/env python3

# This software is Copyright (c) 2023 Benjamin Dornel <benjamindornel@gmail.com>
# and it is hereby released to the general public under the following terms:
# Redistribution and use in source and binary forms, with or without
# modification, are permitted.


import argparse
import logging

try:
    from pyhanko.pdf_utils.misc import PdfReadError
    from pyhanko.pdf_utils.reader import PdfFileReader
except ImportError:
    print("pyhanko is missing, run 'pip install --user pyhanko==0.20.1' to install it!")
    exit(1)

logger = logging.getLogger(__name__)


class SecurityRevision:
    """Represents Standard Security Handler Revisions
    and the corresponding key length for the /O and /U entries

    In Revision 5, the /O and /U entries were extended to 48 bytes,
    with three logical parts -- a 32 byte verification hash,
    an 8 byte validation salt, and an 8 byte key salt."""

    revisions = {
        2: 32,  # RC4_BASIC
        3: 32,  # RC4_EXTENDED
        4: 32,  # RC4_OR_AES128
        5: 48,  # AES_R5_256
        6: 48,  # AES_256
    }

    @classmethod
    def get_key_length(cls, revision):
        """
        Get the key length for a given revision,
        defaults to 48 if no revision is specified.
        """
        return cls.revisions.get(revision, 48)


class PdfHashExtractor:
    """
    Extracts hash and encryption information from a PDF file

    Attributes:
    - `file_name`: PDF file path.
    - `strict`: Boolean that controls whether an error is raised, if a PDF
        has problems e.g. Multiple definitions in encryption dictionary
        for a specific key. Defaults to `False`.
    - `algorithm`: Encryption algorithm used by the standard security handler
    - `length`: The length of the encryption key, in bits. Defaults to 40.
    - `permissions`: User access permissions
    - `revision`: Revision of the standard security handler
    """

    def __init__(self, file_name: str, strict: bool = False):
        self.file_name = file_name

        with open(file_name, "rb") as doc:
            self.pdf = PdfFileReader(doc, strict=strict)
            self.encrypt_dict = self.pdf._get_encryption_params()

            if not self.encrypt_dict:
                raise RuntimeError("File not encrypted")

            self.algorithm: int = self.encrypt_dict.get("/V")
            self.length: int = self.encrypt_dict.get("/Length", 40)
            self.permissions: int = self.encrypt_dict["/P"]
            self.revision: int = self.encrypt_dict["/R"]

    @property
    def document_id(self) -> bytes:
        return self.pdf.document_id[0]

    @property
    def encrypt_metadata(self) -> str:
        """
        Get a string representation of whether metadata is encrypted.

        Returns "1" if metadata is encrypted, "0" otherwise.
        """
        return str(int(self.pdf.security_handler.encrypt_metadata))

    def parse(self) -> str:
        """
        Parse PDF encryption information into a formatted string for John
        """
        passwords = self.get_passwords()
        fields = [
            f"$pdf${self.algorithm}",
            self.revision,
            self.length,
            self.permissions,
            self.encrypt_metadata,
            len(self.document_id),
            self.document_id.hex(),
            passwords,
        ]
        return "*".join(map(str, fields))

    def get_passwords(self) -> str:
        """
        Creates a string consisting of the hexidecimal string of the
        /U, /O, /UE and /OE entries and their corresponding byte string length
        """
        passwords = []
        keys = ("udata", "odata", "oeseed", "ueseed")
        max_key_length = SecurityRevision.get_key_length(self.revision)

        for key in keys:
            if data := getattr(self.pdf.security_handler, key):
                data: bytes = data[:max_key_length]
                passwords.extend([str(len(data)), data.hex()])

        return "*".join(passwords)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="PDF Hash Extractor")
    parser.add_argument(
        "pdf_files", nargs="+", help="PDF file(s) to extract information from"
    )
    parser.add_argument(
        "-d", "--debug", action="store_true", help="Print the encryption dictionary"
    )
    args = parser.parse_args()

    for filename in args.pdf_files:
        try:
            extractor = PdfHashExtractor(filename)
            pdf_hash = extractor.parse()
            print(pdf_hash)

            if args.debug:
                if extractor.encrypt_dict:
                    print("Encryption Dictionary:")
                    for key, value in extractor.encrypt_dict.items():
                        print(f"{key}: {value}")
                else:
                    print("No encryption dictionary found in the PDF.")

        except PdfReadError as error:
            logger.error("%s : %s", filename, error, exc_info=True)

Steg

#!/usr/bin/env python3
"""
Steganography Extraction Tool

This script extracts hidden data from images using various steganography techniques.
Usage: python steg_extract.py <image_file>
"""

import sys
import os
import numpy as np
from PIL import Image
import binascii
import re
import zlib
import struct
from bitstring import BitArray

def extract_lsb(image_path, bit_depth=1):
    """Extract data hidden using LSB (Least Significant Bit) steganography."""
    try:
        img = Image.open(image_path)
        pixels = np.array(img)
        
        # Flatten the pixel array and extract LSBs
        flat_pixels = pixels.flatten()
        
        # Get the least significant bits
        bits = ""
        for pixel in flat_pixels:
            # Extract the specified number of least significant bits
            for i in range(bit_depth):
                bits += str((pixel >> i) & 1)
        
        # Convert bits to bytes
        bytes_data = BitArray(bin=bits).bytes
        
        # Try to find printable text
        printable_data = ""
        for i in range(len(bytes_data)):
            char = bytes_data[i:i+1]
            if 32 <= ord(char) <= 126 or ord(char) in (10, 13, 9):  # Printable ASCII or newline/tab
                printable_data += char.decode('ascii', errors='ignore')
            else:
                printable_data += '.'
        
        return {
            'raw_bits': bits[:100] + "...",  # First 100 bits
            'raw_bytes': binascii.hexlify(bytes_data[:50]).decode('ascii') + "...",  # First 50 bytes
            'possible_text': printable_data[:1000]  # First 1000 printable chars
        }
    except Exception as e:
        return {'error': f"LSB extraction failed: {str(e)}"}

def extract_metadata(image_path):
    """Extract metadata from the image that might contain hidden information."""
    try:
        img = Image.open(image_path)
        metadata = {}
        
        # Extract EXIF data if available
        if hasattr(img, '_getexif') and img._getexif():
            metadata['exif'] = str(img._getexif())
        
        # Extract other metadata
        metadata['format'] = img.format
        metadata['mode'] = img.mode
        metadata['info'] = str(img.info)
        
        return metadata
    except Exception as e:
        return {'error': f"Metadata extraction failed: {str(e)}"}

def extract_color_plane(image_path):
    """Extract data from color planes separately to find potential hidden information."""
    try:
        img = Image.open(image_path)
        if img.mode != 'RGB' and img.mode != 'RGBA':
            return {'error': "Not an RGB/RGBA image"}
        
        planes = {}
        pixels = np.array(img)
        
        # Extract red, green, blue planes
        if img.mode == 'RGB' or img.mode == 'RGBA':
            planes['red'] = pixels[:,:,0]
            planes['green'] = pixels[:,:,1]
            planes['blue'] = pixels[:,:,2]
        
        # Check for unusual patterns in each plane
        results = {}
        for plane_name, plane_data in planes.items():
            # Look for unusual distributions (e.g., even/odd patterns)
            even_count = np.sum(plane_data % 2 == 0)
            odd_count = np.sum(plane_data % 2 == 1)
            
            # If there's a significant imbalance, it might indicate steganography
            results[f"{plane_name}_analysis"] = {
                'even_pixels': even_count,
                'odd_pixels': odd_count,
                'imbalance': abs(even_count - odd_count) / (even_count + odd_count)
            }
            
            # Extract LSB from this color plane only
            bits = "".join([str(p & 1) for p in plane_data.flatten()])
            results[f"{plane_name}_lsb_sample"] = bits[:100] + "..."
        
        return results
    except Exception as e:
        return {'error': f"Color plane extraction failed: {str(e)}"}

def extract_hidden_files(image_path):
    """Look for embedded files using common signatures/headers."""
    try:
        with open(image_path, 'rb') as f:
            data = f.read()
        
        # Common file signatures to look for
        file_signatures = {
            b'\x50\x4B\x03\x04': 'ZIP',
            b'\x52\x61\x72\x21\x1A\x07': 'RAR',
            b'\x25\x50\x44\x46': 'PDF',
            b'\xFF\xD8\xFF': 'JPG',
            b'\x89\x50\x4E\x47': 'PNG',
            b'\x47\x49\x46\x38': 'GIF',
            b'\x7F\x45\x4C\x46': 'ELF',
            b'\xD0\xCF\x11\xE0': 'MS Office',
            b'\x50\x4B\x05\x06': 'ZIP (empty)',
            b'\x1F\x8B\x08': 'GZIP',
            b'\x42\x5A\x68': 'BZ2',
            b'\x75\x73\x74\x61\x72': 'TAR',
            b'\x49\x44\x33': 'MP3',
            b'\x4D\x5A': 'EXE',
        }
        
        found_files = []
        for signature, filetype in file_signatures.items():
            # Find all occurrences of the signature
            offsets = [m.start() for m in re.finditer(re.escape(signature), data)]
            for offset in offsets:
                found_files.append({
                    'type': filetype,
                    'offset': offset,
                    'signature': binascii.hexlify(signature).decode('ascii')
                })
        
        return found_files
    except Exception as e:
        return {'error': f"Hidden file extraction failed: {str(e)}"}

def extract_parity_steganography(image_path):
    """Check for parity-based steganography."""
    try:
        img = Image.open(image_path)
        pixels = np.array(img)
        
        # Count the parity of pixels in each row and column
        row_parity = np.sum(pixels.sum(axis=2) % 2, axis=1) % 2
        col_parity = np.sum(pixels.sum(axis=2) % 2, axis=0) % 2
        
        # Convert to binary strings (potentially hidden messages)
        row_message = "".join([str(int(bit)) for bit in row_parity])
        col_message = "".join([str(int(bit)) for bit in col_parity])
        
        return {
            'row_parity_bits': row_message,
            'col_parity_bits': col_message
        }
    except Exception as e:
        return {'error': f"Parity steganography extraction failed: {str(e)}"}

def extract_hidden_text(image_path):
    """Extract text from the image using several methods."""
    try:
        with open(image_path, 'rb') as f:
            data = f.read()
        
        # Look for ASCII/UTF-8 text patterns
        possible_strings = []
        ascii_regex = rb'[ -~\r\n\t]{8,}'  # 8+ printable ASCII chars
        for match in re.finditer(ascii_regex, data):
            possible_strings.append(match.group(0).decode('ascii', errors='ignore'))
        
        return {
            'possible_strings': possible_strings[:20]  # Return first 20 found strings
        }
    except Exception as e:
        return {'error': f"Text extraction failed: {str(e)}"}

def analyze_bit_distribution(image_path):
    """Analyze bit distribution for statistical anomalies."""
    try:
        img = Image.open(image_path)
        pixels = np.array(img)
        
        # Analyze distribution of each bit position
        bit_counts = []
        for bit_pos in range(8):
            mask = 1 << bit_pos
            bit_count = np.sum((pixels & mask) > 0)
            bit_counts.append(bit_count)
        
        total_bits = pixels.size * 8
        bit_frequencies = [count / total_bits for count in bit_counts]
        
        # Calculate deviation from expected 0.5 frequency
        deviations = [abs(freq - 0.5) for freq in bit_frequencies]
        
        return {
            'bit_frequencies': bit_frequencies,
            'deviations': deviations,
            'analysis': "High deviation in LSBs may indicate steganography"
        }
    except Exception as e:
        return {'error': f"Bit distribution analysis failed: {str(e)}"}

def extract_stegano_data(image_path):
    """Main function to extract steganographic data from an image."""
    results = {
        'filename': os.path.basename(image_path),
        'filesize': os.path.getsize(image_path)
    }
    
    # Run all extraction methods
    results['lsb_extraction'] = extract_lsb(image_path)
    results['lsb_extraction_2bit'] = extract_lsb(image_path, bit_depth=2)
    results['metadata'] = extract_metadata(image_path)
    results['color_planes'] = extract_color_plane(image_path)
    results['hidden_files'] = extract_hidden_files(image_path)
    results['parity_data'] = extract_parity_steganography(image_path)
    results['text_data'] = extract_hidden_text(image_path)
    results['bit_distribution'] = analyze_bit_distribution(image_path)
    
    return results

def save_extracted_data(results, original_image_path):
    """Save extracted data to files."""
    base_name = os.path.splitext(os.path.basename(original_image_path))[0]
    output_dir = f"{base_name}_extracted"
    
    # Create directory if it doesn't exist
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    # Save main results as text
    with open(f"{output_dir}/results.txt", 'w') as f:
        f.write(f"Steganography Extraction Results for {results['filename']}\n")
        f.write(f"File size: {results['filesize']} bytes\n\n")
        
        # Write metadata
        f.write("=== METADATA ===\n")
        for k, v in results['metadata'].items():
            f.write(f"{k}: {v}\n")
        
        # Write LSB extraction results
        f.write("\n=== LSB EXTRACTION ===\n")
        f.write(f"LSB sample: {results['lsb_extraction']['raw_bits']}\n")
        f.write(f"Bytes: {results['lsb_extraction']['raw_bytes']}\n")
        f.write(f"Possible text:\n{results['lsb_extraction']['possible_text']}\n")
        
        # Write 2-bit LSB extraction
        f.write("\n=== 2-BIT LSB EXTRACTION ===\n")
        f.write(f"LSB sample: {results['lsb_extraction_2bit']['raw_bits']}\n")
        f.write(f"Bytes: {results['lsb_extraction_2bit']['raw_bytes']}\n")
        f.write(f"Possible text:\n{results['lsb_extraction_2bit']['possible_text']}\n")
        
        # Write color plane analysis
        f.write("\n=== COLOR PLANE ANALYSIS ===\n")
        for k, v in results['color_planes'].items():
            f.write(f"{k}: {v}\n")
        
        # Write hidden files
        f.write("\n=== POSSIBLE HIDDEN FILES ===\n")
        for file_info in results['hidden_files']:
            f.write(f"Type: {file_info['type']}, Offset: {file_info['offset']}, Signature: {file_info['signature']}\n")
        
        # Write parity data
        f.write("\n=== PARITY STEGANOGRAPHY ===\n")
        f.write(f"Row parity: {results['parity_data']['row_parity_bits']}\n")
        f.write(f"Column parity: {results['parity_data']['col_parity_bits']}\n")
        
        # Write found text strings
        f.write("\n=== POSSIBLE HIDDEN TEXT ===\n")
        for s in results['text_data']['possible_strings']:
            f.write(f"{s}\n")
            f.write("---\n")
        
        # Write bit distribution analysis
        f.write("\n=== BIT DISTRIBUTION ANALYSIS ===\n")
        f.write("Bit position frequencies (0-7, LSB to MSB):\n")
        for i, freq in enumerate(results['bit_distribution']['bit_frequencies']):
            f.write(f"Bit {i}: {freq:.4f} (deviation: {results['bit_distribution']['deviations'][i]:.4f})\n")
    
    # If we found potential embedded files, try to extract them
    if results['hidden_files']:
        with open(original_image_path, 'rb') as f:
            data = f.read()
        
        for i, file_info in enumerate(results['hidden_files']):
            # Create a name for the extracted file
            ext = file_info['type'].lower().split()[0]  # Use the first word of the type as extension
            output_file = f"{output_dir}/extracted_file_{i}.{ext}"
            
            # Get start position from offset
            start_pos = file_info['offset']
            
            # Write the data to a file, up to 10MB maximum
            with open(output_file, 'wb') as out_f:
                out_f.write(data[start_pos:start_pos + 10*1024*1024])
    
    return output_dir

def main():
    if len(sys.argv) != 2:
        print(f"Usage: {sys.argv[0]} <image_file>")
        sys.exit(1)
    
    image_path = sys.argv[1]
    if not os.path.exists(image_path):
        print(f"Error: File '{image_path}' not found.")
        sys.exit(1)
    
    print(f"Analyzing {image_path} for steganographic data...")
    results = extract_stegano_data(image_path)
    
    # Save results to files
    output_dir = save_extracted_data(results, image_path)
    print(f"Analysis complete. Results saved to {output_dir}/")

if __name__ == "__main__":
    main()

Binary Log Parser and Anomaly Detector

#!/usr/bin/env python3
"""
Binary Log Parser and Anomaly Detector

This script parses a custom binary format for login attempt logs and identifies 
potentially compromised accounts based on anomalous behavior.

Format:
- username_length: 4-byte integer (big-endian)
- username: variable-length string
- ip: 4-byte IPv4 address
- timestamp: 4-byte Unix timestamp (big-endian)
- success: 1-byte boolean

Usage:
    python log_analyzer.py --input <log_file> [--output <output_file>] [--sql <sql_file>]
"""

import argparse
import struct
import socket
import sqlite3
import json
import os
import sys
from datetime import datetime
from collections import defaultdict

def parse_binary_log(file_path):
    """
    Parse the binary log file according to the specified format.
    
    Args:
        file_path: Path to the binary log file
        
    Returns:
        List of login attempt records
    """
    logs = []
    
    try:
        with open(file_path, 'rb') as f:
            data = f.read()
            
        offset = 0
        while offset < len(data):
            # Read username length (4-byte integer, big-endian)
            username_length = struct.unpack('>I', data[offset:offset+4])[0]
            offset += 4
            
            # Read username (variable length string)
            username = data[offset:offset+username_length].decode('utf-8')
            offset += username_length
            
            # Read IP address (4-byte IPv4 address)
            ip_bytes = data[offset:offset+4]
            ip_address = socket.inet_ntoa(ip_bytes)
            offset += 4
            
            # Read timestamp (4-byte Unix timestamp, big-endian)
            timestamp = struct.unpack('>I', data[offset:offset+4])[0]
            login_time = datetime.fromtimestamp(timestamp)
            offset += 4
            
            # Read success flag (1-byte boolean)
            success = data[offset] == 1
            offset += 1
            
            # Add the parsed entry to our array
            logs.append({
                'username': username,
                'ip_address': ip_address,
                'timestamp': timestamp,
                'login_time': login_time,
                'success': success
            })
            
        print(f"Successfully parsed {len(logs)} login attempts")
        return logs
        
    except Exception as e:
        print(f"Error parsing log file: {str(e)}")
        sys.exit(1)

def detect_anomalies(logs):
    """
    Analyze logs to identify potentially compromised accounts.
    
    Args:
        logs: List of parsed login attempt records
        
    Returns:
        List of users with anomaly scores and suspicious behavior details
    """
    # Group logs by username
    user_logs = defaultdict(list)
    for log in logs:
        user_logs[log['username']].append(log)
    
    anomalies = []
    
    # Business hours (assuming 9 AM to 5 PM)
    business_start_hour = 9
    business_end_hour = 17
    
    # Time threshold for rapid location changes (in seconds)
    location_change_threshold = 3600  # 1 hour
    
    # Analyze each user's login patterns
    for username, user_log in user_logs.items():
        # Sort logs by timestamp
        user_log.sort(key=lambda x: x['timestamp'])
        
        # Calculate anomaly indicators
        unique_ips = set(log['ip_address'] for log in user_log)
        failed_attempts = sum(1 for log in user_log if not log['success'])
        successful_attempts = sum(1 for log in user_log if log['success'])
        
        # Check for rapid location changes
        rapid_location_changes = 0
        for i in range(1, len(user_log)):
            current_log = user_log[i]
            previous_log = user_log[i-1]
            
            if current_log['ip_address'] != previous_log['ip_address']:
                time_diff = current_log['timestamp'] - previous_log['timestamp']
                if time_diff < location_change_threshold:
                    rapid_location_changes += 1
        
        # Calculate after-hours logins
        after_hours_logins = sum(
            1 for log in user_log 
            if log['login_time'].hour < business_start_hour or log['login_time'].hour >= business_end_hour
        )
        
        # Calculate anomaly score based on these factors
        # Weights can be adjusted based on the relative importance of each factor
        anomaly_score = (
            (len(unique_ips) * 10) + 
            (failed_attempts * 5) + 
            (rapid_location_changes * 20) + 
            (after_hours_logins * 3)
        )
        
        anomalies.append({
            'username': username,
            'anomaly_score': anomaly_score,
            'unique_ips': len(unique_ips),
            'ip_addresses': list(unique_ips),
            'failed_attempts': failed_attempts,
            'successful_attempts': successful_attempts,
            'rapid_location_changes': rapid_location_changes,
            'after_hours_logins': after_hours_logins,
            'total_attempts': len(user_log)
        })
    
    # Sort by anomaly score (descending)
    anomalies.sort(key=lambda x: x['anomaly_score'], reverse=True)
    
    return anomalies

def create_database(logs, db_path=':memory:'):
    """
    Create a SQLite database with the login data
    
    Args:
        logs: List of parsed login attempt records
        db_path: Path to save the SQLite database (default: in-memory)
        
    Returns:
        SQLite connection
    """
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    
    # Create table
    cursor.execute('''
    CREATE TABLE login_attempts (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        username TEXT NOT NULL,
        ip_address TEXT NOT NULL,
        timestamp INTEGER NOT NULL,
        login_time TEXT NOT NULL,
        success INTEGER NOT NULL
    )
    ''')
    
    # Create indexes
    cursor.execute('CREATE INDEX idx_username ON login_attempts(username)')
    cursor.execute('CREATE INDEX idx_ip_address ON login_attempts(ip_address)')
    cursor.execute('CREATE INDEX idx_timestamp ON login_attempts(timestamp)')
    cursor.execute('CREATE INDEX idx_success ON login_attempts(success)')
    
    # Insert data
    for log in logs:
        cursor.execute(
            'INSERT INTO login_attempts (username, ip_address, timestamp, login_time, success) VALUES (?, ?, ?, ?, ?)',
            (
                log['username'],
                log['ip_address'],
                log['timestamp'],
                log['login_time'].isoformat(),
                1 if log['success'] else 0
            )
        )
    
    conn.commit()
    return conn

def generate_sql_script():
    """
    Generate a SQL script for creating the table and analyzing login data
    
    Returns:
        SQL script as a string
    """
    return '''-- Create a table to store login attempts
CREATE TABLE login_attempts (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    username TEXT NOT NULL,
    ip_address TEXT NOT NULL,
    timestamp INTEGER NOT NULL,
    login_time TEXT NOT NULL,
    success INTEGER NOT NULL
);

-- Create indexes for efficient searching
CREATE INDEX idx_username ON login_attempts(username);
CREATE INDEX idx_ip_address ON login_attempts(ip_address);
CREATE INDEX idx_timestamp ON login_attempts(timestamp);
CREATE INDEX idx_success ON login_attempts(success);

-- Query to find users with multiple IP addresses
SELECT 
    username,
    COUNT(DISTINCT ip_address) AS unique_ip_count
FROM 
    login_attempts
GROUP BY 
    username
HAVING 
    unique_ip_count > 1
ORDER BY 
    unique_ip_count DESC;

-- Query to find failed login attempts followed by successful ones
SELECT 
    a.username,
    COUNT(*) AS suspicious_patterns
FROM 
    login_attempts a
JOIN 
    login_attempts b 
ON 
    a.username = b.username
    AND a.timestamp < b.timestamp
    AND a.success = 0
    AND b.success = 1
    AND (b.timestamp - a.timestamp) < 300 -- Within 5 minutes
GROUP BY 
    a.username
ORDER BY 
    suspicious_patterns DESC;

-- Query to find rapid login attempts from different locations
SELECT 
    a.username,
    a.ip_address AS ip1,
    b.ip_address AS ip2,
    datetime(a.login_time) AS time1,
    datetime(b.login_time) AS time2,
    (julianday(b.login_time) - julianday(a.login_time)) * 24 * 60 AS minutes_between
FROM 
    login_attempts a
JOIN 
    login_attempts b 
ON 
    a.username = b.username
    AND a.ip_address != b.ip_address
    AND a.id < b.id
    AND (julianday(b.login_time) - julianday(a.login_time)) * 24 * 60 < 60 -- Less than 60 minutes apart
ORDER BY 
    minutes_between ASC;

-- Query to find users with after-hours login activity
SELECT 
    username,
    COUNT(*) AS after_hours_logins
FROM 
    login_attempts
WHERE 
    (strftime('%H', login_time) < '09' OR strftime('%H', login_time) >= '17')
GROUP BY 
    username
ORDER BY 
    after_hours_logins DESC;

-- Comprehensive anomaly detection query
WITH 
    unique_ips AS (
        SELECT 
            username, 
            COUNT(DISTINCT ip_address) AS ip_count
        FROM 
            login_attempts
        GROUP BY 
            username
    ),
    failed_logins AS (
        SELECT 
            username, 
            SUM(CASE WHEN success = 0 THEN 1 ELSE 0 END) AS failed_count
        FROM 
            login_attempts
        GROUP BY 
            username
    ),
    after_hours AS (
        SELECT 
            username, 
            COUNT(*) AS after_hours_count
        FROM 
            login_attempts
        WHERE 
            (strftime('%H', login_time) < '09' OR strftime('%H', login_time) >= '17')
        GROUP BY 
            username
    ),
    rapid_location_changes AS (
        SELECT 
            a.username,
            COUNT(*) AS rapid_changes
        FROM 
            login_attempts a
        JOIN 
            login_attempts b 
        ON 
            a.username = b.username
            AND a.ip_address != b.ip_address
            AND a.id < b.id
            AND (b.timestamp - a.timestamp) < 3600 -- Less than 1 hour apart
        GROUP BY 
            a.username
    )
SELECT 
    u.username,
    COALESCE(u.ip_count, 0) AS unique_ip_count,
    COALESCE(f.failed_count, 0) AS failed_logins,
    COALESCE(a.after_hours_count, 0) AS after_hours_logins,
    COALESCE(r.rapid_changes, 0) AS rapid_location_changes,
    (COALESCE(u.ip_count, 0) * 10) + 
    (COALESCE(f.failed_count, 0) * 5) + 
    (COALESCE(r.rapid_changes, 0) * 20) + 
    (COALESCE(a.after_hours_count, 0) * 3) AS anomaly_score
FROM 
    unique_ips u
LEFT JOIN 
    failed_logins f ON u.username = f.username
LEFT JOIN 
    after_hours a ON u.username = a.username
LEFT JOIN 
    rapid_location_changes r ON u.username = r.username
ORDER BY 
    anomaly_score DESC
LIMIT 10;
'''

def analyze_compromised_user(conn, username):
    """
    Perform detailed analysis on a potentially compromised user
    
    Args:
        conn: SQLite connection
        username: Username to analyze
        
    Returns:
        Dictionary with detailed analysis
    """
    cursor = conn.cursor()
    
    # Get all login attempts for this user
    cursor.execute(
        '''
        SELECT 
            timestamp,
            login_time,
            ip_address,
            success
        FROM 
            login_attempts
        WHERE 
            username = ?
        ORDER BY 
            timestamp ASC
        ''',
        (username,)
    )
    
    logins = cursor.fetchall()
    
    # Analyze suspicious patterns
    suspicious_events = []
    previous_ip = None
    previous_time = None
    
    for timestamp, login_time, ip_address, success in logins:
        if previous_ip and previous_ip != ip_address:
            time_diff = timestamp - previous_time
            if time_diff < 3600:  # Less than 1 hour
                suspicious_events.append({
                    'event_type': 'rapid_location_change',
                    'previous_ip': previous_ip,
                    'new_ip': ip_address,
                    'minutes_between': time_diff / 60
                })
        
        previous_ip = ip_address
        previous_time = timestamp
    
    # Get login success rate
    cursor.execute(
        '''
        SELECT 
            COUNT(*) AS total,
            SUM(CASE WHEN success = 1 THEN 1 ELSE 0 END) AS successful
        FROM 
            login_attempts
        WHERE 
            username = ?
        ''',
        (username,)
    )
    
    total, successful = cursor.fetchone()
    success_rate = (successful / total) * 100 if total > 0 else 0
    
    return {
        'username': username,
        'login_count': total,
        'success_rate': success_rate,
        'suspicious_events': suspicious_events,
        'login_history': [
            {
                'timestamp': timestamp,
                'login_time': login_time,
                'ip_address': ip_address,
                'success': bool(success)
            }
            for timestamp, login_time, ip_address, success in logins
        ]
    }

def main():
    """
    Main function to process arguments and run the analysis
    """
    parser = argparse.ArgumentParser(description='Analyze binary login logs for compromised accounts')
    parser.add_argument('--input', '-i', required=True, help='Path to binary log file')
    parser.add_argument('--output', '-o', help='Path to save analysis results (JSON)')
    parser.add_argument('--sql', '-s', help='Path to save SQL script')
    parser.add_argument('--db', '-d', help='Path to save SQLite database')
    parser.add_argument('--verbose', '-v', action='store_true', help='Enable verbose output')
    
    args = parser.parse_args()
    
    # Parse the binary log file
    print(f"Parsing binary log file: {args.input}")
    logs = parse_binary_log(args.input)
    
    # Analyze for anomalies
    print("Analyzing for suspicious behavior...")
    anomalies = detect_anomalies(logs)
    
    # Print top suspicious users
    print("\nTop potentially compromised accounts:")
    for i, anomaly in enumerate(anomalies[:5]):
        print(f"{i+1}. Username: {anomaly['username']}")
        print(f"   Anomaly Score: {anomaly['anomaly_score']}")
        print(f"   Unique IPs: {anomaly['unique_ips']}")
        print(f"   Failed/Successful Logins: {anomaly['failed_attempts']}/{anomaly['successful_attempts']}")
        print(f"   Rapid Location Changes: {anomaly['rapid_location_changes']}")
        print(f"   After-Hours Logins: {anomaly['after_hours_logins']}")
        print()
    
    # Identify the most likely compromised user
    if anomalies:
        compromised_user = anomalies[0]['username']
        print(f"RESULT: The most likely compromised account is: {compromised_user}")
        
        # Create database for SQL analysis
        db_path = args.db if args.db else ':memory:'
        conn = create_database(logs, db_path)
        
        # Get detailed analysis for the compromised user
        detailed_analysis = analyze_compromised_user(conn, compromised_user)
        
        if args.verbose:
            print("\nDetailed analysis for the compromised account:")
            print(f"Login history for {compromised_user}:")
            for entry in detailed_analysis['login_history']:
                status = "SUCCESS" if entry['success'] else "FAILED"
                print(f"{entry['login_time']} | {entry['ip_address']} | {status}")
            
            if detailed_analysis['suspicious_events']:
                print("\nSuspicious events:")
                for event in detailed_analysis['suspicious_events']:
                    print(f"IP changed from {event['previous_ip']} to {event['new_ip']} "
                          f"in {event['minutes_between']:.1f} minutes")
    else:
        print("No anomalies detected in the log data")
    
    # Save results to output file
    if args.output:
        with open(args.output, 'w') as f:
            json.dump({
                'summary': {
                    'total_logs': len(logs),
                    'total_users': len({log['username'] for log in logs}),
                    'compromised_user': compromised_user if anomalies else None
                },
                'anomalies': anomalies,
                'detailed_analysis': detailed_analysis if anomalies else None
            }, f, indent=4, default=str)
        print(f"Analysis results saved to {args.output}")
    
    # Save SQL script
    if args.sql:
        with open(args.sql, 'w') as f:
            f.write(generate_sql_script())
        print(f"SQL script saved to {args.sql}")
    
    # Report if database was saved
    if args.db:
        print(f"SQLite database saved to {args.db}")

if __name__ == "__main__":
    main()