import json
import base64
import re
import string

input_file = 'd:\\NEW\\kingjoki\\play.ninjasage.har'
output_file = 'd:\\NEW\\kingjoki\\play_ninjasage_decoded_strings.txt'

def extract_strings(data_bytes, min_length=4):
    """Extract printable strings from bytes."""
    result = ""
    try:
        # Simple regex to find sequences of printable characters
        # AMF strings are often length-prefixed, but the content is just bytes.
        # We look for standard ASCII printable characters.
        chars = []
        for b in data_bytes:
            c = chr(b)
            if c in string.printable and b not in [0x09, 0x0A, 0x0D]: # Keep basic printables, exclude whitespace control mostly except space?
                # Actually let's include all printable
                chars.append(c)
            else:
                if len(chars) >= min_length:
                    result += "".join(chars) + "\n"
                chars = []
        if len(chars) >= min_length:
            result += "".join(chars) + "\n"
    except Exception as e:
        return f"[Error extracting strings: {e}]"
    return result

def clean_amf_strings(data_bytes):
    # A more robust "strings" implementation similar to unix `strings`
    try:
        # Decode as latin-1 to keep byte values, then regex
        text = data_bytes.decode('latin-1')
        # Find sequences of 4 or more printable characters
        # We allow alphanumeric, punctuation, and space.
        matches = re.findall(r'[ -~]{4,}', text)
        return matches
    except:
        return []

try:
    with open(input_file, 'r', encoding='utf-8') as f:
        har_data = json.load(f)

    entries = har_data.get('log', {}).get('entries', [])
    
    with open(output_file, 'w', encoding='utf-8') as out:
        out.write(f"Extracted Strings from {input_file}\n")
        out.write("="*50 + "\n\n")

        for i, entry in enumerate(entries):
            req = entry.get('request', {})
            res = entry.get('response', {})
            
            url = req.get('url')
            
            out.write(f"Entry #{i+1}\n")
            out.write(f"URL: {url}\n")
            out.write(f"Time: {entry.get('startedDateTime')}\n")
            
            # Request Body (if any and base64)
            # Charles HAR for AMF usually puts request body in postData text/mimeType
            # but sometimes it's not base64 encoded in the JSON if it's "binary" text?
            # Let's check postData
            postData = req.get('postData', {})
            if postData.get('text'):
                out.write("Request Data (Strings):\n")
                # If it looks like base64, decode it
                text = postData['text']
                # Heuristic: if it has no spaces and length % 4 == 0, might be b64
                # But safer to just try decoding if it looks like b64 or just extract strings if it's raw
                try:
                    # AMF requests in Charles HAR might be hex or raw?
                    # The previous inspection showed empty request text for the first entry.
                    # If present, we try to extract strings.
                    pass
                except:
                    pass
            
            # Response Body
            content = res.get('content', {})
            if content.get('encoding') == 'base64' and content.get('text'):
                try:
                    decoded_bytes = base64.b64decode(content['text'])
                    strings = clean_amf_strings(decoded_bytes)
                    if strings:
                        out.write("Response Data (Strings):\n")
                        for s in strings:
                            out.write(f"  {s}\n")
                    else:
                        out.write("  [No readable strings found]\n")
                except Exception as e:
                    out.write(f"  [Error decoding Base64: {e}]\n")
            
            out.write("-" * 30 + "\n")

    print(f"Extraction complete. Results saved to {output_file}")

except Exception as e:
    print(f"Error processing file: {e}")
