Steg
#!/usr/bin/env python3
"""
Steganography Extraction Tool
This script extracts hidden data from images using various steganography techniques.
Usage: python steg_extract.py <image_file>
"""
import sys
import os
import numpy as np
from PIL import Image
import binascii
import re
import zlib
import struct
from bitstring import BitArray
def extract_lsb(image_path, bit_depth=1):
"""Extract data hidden using LSB (Least Significant Bit) steganography."""
try:
img = Image.open(image_path)
pixels = np.array(img)
# Flatten the pixel array and extract LSBs
flat_pixels = pixels.flatten()
# Get the least significant bits
bits = ""
for pixel in flat_pixels:
# Extract the specified number of least significant bits
for i in range(bit_depth):
bits += str((pixel >> i) & 1)
# Convert bits to bytes
bytes_data = BitArray(bin=bits).bytes
# Try to find printable text
printable_data = ""
for i in range(len(bytes_data)):
char = bytes_data[i:i+1]
if 32 <= ord(char) <= 126 or ord(char) in (10, 13, 9): # Printable ASCII or newline/tab
printable_data += char.decode('ascii', errors='ignore')
else:
printable_data += '.'
return {
'raw_bits': bits[:100] + "...", # First 100 bits
'raw_bytes': binascii.hexlify(bytes_data[:50]).decode('ascii') + "...", # First 50 bytes
'possible_text': printable_data[:1000] # First 1000 printable chars
}
except Exception as e:
return {'error': f"LSB extraction failed: {str(e)}"}
def extract_metadata(image_path):
"""Extract metadata from the image that might contain hidden information."""
try:
img = Image.open(image_path)
metadata = {}
# Extract EXIF data if available
if hasattr(img, '_getexif') and img._getexif():
metadata['exif'] = str(img._getexif())
# Extract other metadata
metadata['format'] = img.format
metadata['mode'] = img.mode
metadata['info'] = str(img.info)
return metadata
except Exception as e:
return {'error': f"Metadata extraction failed: {str(e)}"}
def extract_color_plane(image_path):
"""Extract data from color planes separately to find potential hidden information."""
try:
img = Image.open(image_path)
if img.mode != 'RGB' and img.mode != 'RGBA':
return {'error': "Not an RGB/RGBA image"}
planes = {}
pixels = np.array(img)
# Extract red, green, blue planes
if img.mode == 'RGB' or img.mode == 'RGBA':
planes['red'] = pixels[:,:,0]
planes['green'] = pixels[:,:,1]
planes['blue'] = pixels[:,:,2]
# Check for unusual patterns in each plane
results = {}
for plane_name, plane_data in planes.items():
# Look for unusual distributions (e.g., even/odd patterns)
even_count = np.sum(plane_data % 2 == 0)
odd_count = np.sum(plane_data % 2 == 1)
# If there's a significant imbalance, it might indicate steganography
results[f"{plane_name}_analysis"] = {
'even_pixels': even_count,
'odd_pixels': odd_count,
'imbalance': abs(even_count - odd_count) / (even_count + odd_count)
}
# Extract LSB from this color plane only
bits = "".join([str(p & 1) for p in plane_data.flatten()])
results[f"{plane_name}_lsb_sample"] = bits[:100] + "..."
return results
except Exception as e:
return {'error': f"Color plane extraction failed: {str(e)}"}
def extract_hidden_files(image_path):
"""Look for embedded files using common signatures/headers."""
try:
with open(image_path, 'rb') as f:
data = f.read()
# Common file signatures to look for
file_signatures = {
b'\x50\x4B\x03\x04': 'ZIP',
b'\x52\x61\x72\x21\x1A\x07': 'RAR',
b'\x25\x50\x44\x46': 'PDF',
b'\xFF\xD8\xFF': 'JPG',
b'\x89\x50\x4E\x47': 'PNG',
b'\x47\x49\x46\x38': 'GIF',
b'\x7F\x45\x4C\x46': 'ELF',
b'\xD0\xCF\x11\xE0': 'MS Office',
b'\x50\x4B\x05\x06': 'ZIP (empty)',
b'\x1F\x8B\x08': 'GZIP',
b'\x42\x5A\x68': 'BZ2',
b'\x75\x73\x74\x61\x72': 'TAR',
b'\x49\x44\x33': 'MP3',
b'\x4D\x5A': 'EXE',
}
found_files = []
for signature, filetype in file_signatures.items():
# Find all occurrences of the signature
offsets = [m.start() for m in re.finditer(re.escape(signature), data)]
for offset in offsets:
found_files.append({
'type': filetype,
'offset': offset,
'signature': binascii.hexlify(signature).decode('ascii')
})
return found_files
except Exception as e:
return {'error': f"Hidden file extraction failed: {str(e)}"}
def extract_parity_steganography(image_path):
"""Check for parity-based steganography."""
try:
img = Image.open(image_path)
pixels = np.array(img)
# Count the parity of pixels in each row and column
row_parity = np.sum(pixels.sum(axis=2) % 2, axis=1) % 2
col_parity = np.sum(pixels.sum(axis=2) % 2, axis=0) % 2
# Convert to binary strings (potentially hidden messages)
row_message = "".join([str(int(bit)) for bit in row_parity])
col_message = "".join([str(int(bit)) for bit in col_parity])
return {
'row_parity_bits': row_message,
'col_parity_bits': col_message
}
except Exception as e:
return {'error': f"Parity steganography extraction failed: {str(e)}"}
def extract_hidden_text(image_path):
"""Extract text from the image using several methods."""
try:
with open(image_path, 'rb') as f:
data = f.read()
# Look for ASCII/UTF-8 text patterns
possible_strings = []
ascii_regex = rb'[ -~\r\n\t]{8,}' # 8+ printable ASCII chars
for match in re.finditer(ascii_regex, data):
possible_strings.append(match.group(0).decode('ascii', errors='ignore'))
return {
'possible_strings': possible_strings[:20] # Return first 20 found strings
}
except Exception as e:
return {'error': f"Text extraction failed: {str(e)}"}
def analyze_bit_distribution(image_path):
"""Analyze bit distribution for statistical anomalies."""
try:
img = Image.open(image_path)
pixels = np.array(img)
# Analyze distribution of each bit position
bit_counts = []
for bit_pos in range(8):
mask = 1 << bit_pos
bit_count = np.sum((pixels & mask) > 0)
bit_counts.append(bit_count)
total_bits = pixels.size * 8
bit_frequencies = [count / total_bits for count in bit_counts]
# Calculate deviation from expected 0.5 frequency
deviations = [abs(freq - 0.5) for freq in bit_frequencies]
return {
'bit_frequencies': bit_frequencies,
'deviations': deviations,
'analysis': "High deviation in LSBs may indicate steganography"
}
except Exception as e:
return {'error': f"Bit distribution analysis failed: {str(e)}"}
def extract_stegano_data(image_path):
"""Main function to extract steganographic data from an image."""
results = {
'filename': os.path.basename(image_path),
'filesize': os.path.getsize(image_path)
}
# Run all extraction methods
results['lsb_extraction'] = extract_lsb(image_path)
results['lsb_extraction_2bit'] = extract_lsb(image_path, bit_depth=2)
results['metadata'] = extract_metadata(image_path)
results['color_planes'] = extract_color_plane(image_path)
results['hidden_files'] = extract_hidden_files(image_path)
results['parity_data'] = extract_parity_steganography(image_path)
results['text_data'] = extract_hidden_text(image_path)
results['bit_distribution'] = analyze_bit_distribution(image_path)
return results
def save_extracted_data(results, original_image_path):
"""Save extracted data to files."""
base_name = os.path.splitext(os.path.basename(original_image_path))[0]
output_dir = f"{base_name}_extracted"
# Create directory if it doesn't exist
if not os.path.exists(output_dir):
os.makedirs(output_dir)
# Save main results as text
with open(f"{output_dir}/results.txt", 'w') as f:
f.write(f"Steganography Extraction Results for {results['filename']}\n")
f.write(f"File size: {results['filesize']} bytes\n\n")
# Write metadata
f.write("=== METADATA ===\n")
for k, v in results['metadata'].items():
f.write(f"{k}: {v}\n")
# Write LSB extraction results
f.write("\n=== LSB EXTRACTION ===\n")
f.write(f"LSB sample: {results['lsb_extraction']['raw_bits']}\n")
f.write(f"Bytes: {results['lsb_extraction']['raw_bytes']}\n")
f.write(f"Possible text:\n{results['lsb_extraction']['possible_text']}\n")
# Write 2-bit LSB extraction
f.write("\n=== 2-BIT LSB EXTRACTION ===\n")
f.write(f"LSB sample: {results['lsb_extraction_2bit']['raw_bits']}\n")
f.write(f"Bytes: {results['lsb_extraction_2bit']['raw_bytes']}\n")
f.write(f"Possible text:\n{results['lsb_extraction_2bit']['possible_text']}\n")
# Write color plane analysis
f.write("\n=== COLOR PLANE ANALYSIS ===\n")
for k, v in results['color_planes'].items():
f.write(f"{k}: {v}\n")
# Write hidden files
f.write("\n=== POSSIBLE HIDDEN FILES ===\n")
for file_info in results['hidden_files']:
f.write(f"Type: {file_info['type']}, Offset: {file_info['offset']}, Signature: {file_info['signature']}\n")
# Write parity data
f.write("\n=== PARITY STEGANOGRAPHY ===\n")
f.write(f"Row parity: {results['parity_data']['row_parity_bits']}\n")
f.write(f"Column parity: {results['parity_data']['col_parity_bits']}\n")
# Write found text strings
f.write("\n=== POSSIBLE HIDDEN TEXT ===\n")
for s in results['text_data']['possible_strings']:
f.write(f"{s}\n")
f.write("---\n")
# Write bit distribution analysis
f.write("\n=== BIT DISTRIBUTION ANALYSIS ===\n")
f.write("Bit position frequencies (0-7, LSB to MSB):\n")
for i, freq in enumerate(results['bit_distribution']['bit_frequencies']):
f.write(f"Bit {i}: {freq:.4f} (deviation: {results['bit_distribution']['deviations'][i]:.4f})\n")
# If we found potential embedded files, try to extract them
if results['hidden_files']:
with open(original_image_path, 'rb') as f:
data = f.read()
for i, file_info in enumerate(results['hidden_files']):
# Create a name for the extracted file
ext = file_info['type'].lower().split()[0] # Use the first word of the type as extension
output_file = f"{output_dir}/extracted_file_{i}.{ext}"
# Get start position from offset
start_pos = file_info['offset']
# Write the data to a file, up to 10MB maximum
with open(output_file, 'wb') as out_f:
out_f.write(data[start_pos:start_pos + 10*1024*1024])
return output_dir
def main():
if len(sys.argv) != 2:
print(f"Usage: {sys.argv[0]} <image_file>")
sys.exit(1)
image_path = sys.argv[1]
if not os.path.exists(image_path):
print(f"Error: File '{image_path}' not found.")
sys.exit(1)
print(f"Analyzing {image_path} for steganographic data...")
results = extract_stegano_data(image_path)
# Save results to files
output_dir = save_extracted_data(results, image_path)
print(f"Analysis complete. Results saved to {output_dir}/")
if __name__ == "__main__":
main()