Python urllib Insecure HTTP urlretrieve Vulnerability

Medium Risk Insecure Transport

PythonurllibHTTPurlretrieveFile DownloadInsecure Transport

What it is

Application uses urllib.request.urlretrieve() with HTTP URLs to download files, exposing data transmission to eavesdropping and potential file tampering during transfer.

Language:

import urllib.request
import os
from flask import request

@app.route('/download_update')
def download_update():
    # Vulnerable: HTTP download without integrity check
    update_url = 'http://updates.example.com/app-update.zip'
    local_file = '/tmp/update.zip'
    
    # Insecure: File downloaded over HTTP
    urllib.request.urlretrieve(update_url, local_file)
    return 'Update downloaded'

@app.route('/fetch_resource')
def fetch_resource():
    # Vulnerable: User-controlled download URL
    resource_url = request.args.get('url')
    filename = request.args.get('filename', 'download.tmp')
    
    # Dangerous: No validation of URL or destination
    local_path = f'/tmp/{filename}'
    urllib.request.urlretrieve(resource_url, local_path)
    return f'Downloaded to {local_path}'

import urllib.request
import urllib.parse
import ssl
import hashlib
import os
from flask import request

def validate_secure_download_url(url):
    """Validate URL for secure downloading."""
    parsed = urllib.parse.urlparse(url)
    
    if parsed.scheme != 'https':
        raise ValueError('Only HTTPS downloads allowed')
    
    # Allowlist of permitted download domains
    allowed_domains = [
        'downloads.trusted.com',
        'secure.example.com',
        'cdn.verified.com'
    ]
    
    if parsed.netloc not in allowed_domains:
        raise ValueError(f'Domain {parsed.netloc} not in allowlist')
    
    return parsed

def secure_urlretrieve(url, filename, expected_hash=None, max_size=50*1024*1024):
    """Secure file download with validation."""
    # Validate URL
    validate_secure_download_url(url)
    
    # Validate filename
    if '..' in filename or filename.startswith('/'):
        raise ValueError('Invalid filename')
    
    # Create secure download directory
    download_dir = '/var/secure_downloads'
    os.makedirs(download_dir, exist_ok=True)
    
    local_path = os.path.join(download_dir, filename)
    
    # Create secure SSL context
    context = ssl.create_default_context()
    
    # Custom opener with progress tracking
    def download_with_progress(url, filename, max_size):
        with urllib.request.urlopen(url, timeout=30, context=context) as response:
            # Check content length
            content_length = response.headers.get('Content-Length')
            if content_length and int(content_length) > max_size:
                raise ValueError(f'File too large: {content_length} bytes')
            
            # Download with size limit
            downloaded = 0
            hash_obj = hashlib.sha256()
            
            with open(filename, 'wb') as f:
                while True:
                    chunk = response.read(8192)
                    if not chunk:
                        break
                    
                    downloaded += len(chunk)
                    if downloaded > max_size:
                        os.remove(filename)
                        raise ValueError(f'Download exceeded size limit: {max_size} bytes')
                    
                    hash_obj.update(chunk)
                    f.write(chunk)
            
            return hash_obj.hexdigest()
    
    try:
        file_hash = download_with_progress(url, local_path, max_size)
        
        # Verify hash if provided
        if expected_hash and file_hash != expected_hash:
            os.remove(local_path)
            raise ValueError('File integrity check failed')
        
        return local_path, file_hash
        
    except Exception as e:
        # Clean up on error
        if os.path.exists(local_path):
            os.remove(local_path)
        raise RuntimeError(f'Secure download failed: {str(e)}')

@app.route('/download_update')
def download_update():
    """Secure update download with integrity verification."""
    try:
        # Secure download with known hash
        update_url = 'https://secure.example.com/app-update.zip'
        expected_hash = 'a1b2c3d4e5f6...'  # Known good hash
        
        local_file, file_hash = secure_urlretrieve(
            update_url,
            'app-update.zip',
            expected_hash=expected_hash
        )
        
        return {
            'status': 'success',
            'file': local_file,
            'hash': file_hash
        }
        
    except (ValueError, RuntimeError) as e:
        return {'error': str(e)}, 500

@app.route('/fetch_resource')
def fetch_resource():
    """Secure resource download with validation."""
    resource_url = request.args.get('url', '')
    filename = request.args.get('filename', '')
    
    if not resource_url or not filename:
        return {'error': 'URL and filename required'}, 400
    
    # Validate filename format
    if not filename.replace('-', '').replace('_', '').replace('.', '').isalnum():
        return {'error': 'Invalid filename format'}, 400
    
    try:
        local_file, file_hash = secure_urlretrieve(
            resource_url,
            filename,
            max_size=10*1024*1024  # 10MB limit
        )
        
        return {
            'status': 'downloaded',
            'file': os.path.basename(local_file),
            'size': os.path.getsize(local_file),
            'hash': file_hash
        }
        
    except (ValueError, RuntimeError) as e:
        return {'error': str(e)}, 500

# Additional security: File type validation
def validate_file_type(filename, allowed_types):
    """Validate file extension against allowlist."""
    ext = os.path.splitext(filename)[1].lower()
    if ext not in allowed_types:
        raise ValueError(f'File type {ext} not allowed')

@app.route('/download_document')
def download_document():
    """Download with file type validation."""
    doc_url = request.args.get('url', '')
    filename = request.args.get('filename', '')
    
    try:
        # Validate file type
        allowed_types = ['.pdf', '.txt', '.docx', '.xlsx']
        validate_file_type(filename, allowed_types)
        
        local_file, file_hash = secure_urlretrieve(
            doc_url,
            filename,
            max_size=5*1024*1024  # 5MB limit for documents
        )
        
        return {
            'status': 'downloaded',
            'document': os.path.basename(local_file),
            'hash': file_hash
        }
        
    except (ValueError, RuntimeError) as e:
        return {'error': str(e)}, 500

💡 Why This Fix Works

See fix suggestions for detailed explanation.

Why it happens

Code downloads files via HTTP: urllib.request.urlretrieve('http://example.com/file.zip', 'local.zip'). HTTP transmits file contents unencrypted. Downloaded files intercepted or modified by network attackers. Man-in-the-middle can inject malicious content. No confidentiality or integrity protection.

Root causes

Using urllib.request.urlretrieve() with HTTP URLs

URLs from Configuration Using HTTP Protocol

File URLs from config: DOWNLOAD_URL = os.environ['FILE_URL']; urlretrieve(DOWNLOAD_URL, filename). Configuration contains http:// URLs. Environment variables or config files not validated. Legacy settings or external sources may specify HTTP. Missing URL validation allows insecure downloads.

Using urlretrieve() for Software or Update Downloads

Downloading application updates: urlretrieve(update_url, 'update.tar.gz'). Software packages, plugins, or dependencies over HTTP. No integrity verification. Attackers inject malicious code during download. HTTP for software distribution creates supply chain attack vector. Critical for self-updating applications.

Not Validating File Integrity After Download

Downloading without verification: urlretrieve(url, filename); extract(filename). No checksum or signature validation. Even with HTTPS, integrity checks recommended. HTTP downloads without verification completely unprotected. File corruption or tampering undetected. Combined with HTTP, enables trivial content modification.

Legacy urlretrieve Usage Instead of Modern Alternatives

Using deprecated urlretrieve: import urllib.request; urlretrieve(). urlretrieve() legacy Python 2 function. Limited error handling and progress monitoring. Modern alternatives provide better APIs. requests library preferred for HTTP downloads. urllib.request.urlopen() with file writes more flexible than urlretrieve().

Fixes

Always Use HTTPS URLs for urlretrieve() Downloads

Use HTTPS exclusively: urllib.request.urlretrieve('https://example.com/file.zip', 'local.zip'). Replace all http:// with https://. For file downloads, HTTPS mandatory. Provides encryption and server authentication. Certificate validation prevents man-in-the-middle attacks during downloads.

Validate URL Schemes Before Downloading Files

Check protocol before download: from urllib.parse import urlparse; if urlparse(url).scheme != 'https': raise ValueError('HTTPS required'); urlretrieve(url, filename). Validate URLs from all sources. Reject HTTP and other insecure schemes. Fail fast on protocol violations.

Use requests Library for Modern HTTP Downloads

Replace urlretrieve with requests: import requests; r = requests.get('https://example.com/file', stream=True, verify=True); with open('file', 'wb') as f: f.write(r.content). Better error handling. Progress monitoring. Connection pooling. Explicit certificate verification. Modern API.

Always Verify File Integrity with Checksums or Signatures

Download and verify: urlretrieve(url, file); import hashlib; hash = hashlib.sha256(open(file, 'rb').read()).hexdigest(); if hash != expected: raise ValueError('Invalid hash'). Download checksums separately. Use GPG signatures for software. Integrity verification detects tampering.

Use TLS Context for Certificate Pinning on Critical Downloads

Pin certificates for sensitive files: import ssl; context = ssl.create_default_context(cafile='/path/to/cert.pem'); urlretrieve(url, file, context=context). Specific CA certificate. Prevents man-in-the-middle with compromised CAs. Critical for update systems or configuration downloads.

Migrate to Object Storage APIs for File Distribution

Use cloud storage SDKs: boto3.client('s3').download_file('bucket', 'key', 'local'). HTTPS by default. Built-in integrity checking. Access control and encryption. Versioning and lifecycle management. Modern file distribution infrastructure with superior security to HTTP downloads.

Detect This Vulnerability in Your Code

Sourcery automatically identifies python urllib insecure http urlretrieve vulnerability and many other security issues in your codebase.

Scan Your Code for Free Explore More Vulnerabilities

Python urllib Insecure HTTP urlretrieve Vulnerability

What it is

💡 Why This Fix Works

Why it happens

Root causes

Using urllib.request.urlretrieve() with HTTP URLs

URLs from Configuration Using HTTP Protocol

Using urlretrieve() for Software or Update Downloads

Not Validating File Integrity After Download

Legacy urlretrieve Usage Instead of Modern Alternatives

Fixes

Always Use HTTPS URLs for urlretrieve() Downloads

Validate URL Schemes Before Downloading Files

Use requests Library for Modern HTTP Downloads

Always Verify File Integrity with Checksums or Signatures

Use TLS Context for Certificate Pinning on Critical Downloads

Migrate to Object Storage APIs for File Distribution

Arbitrary file write vulnerability from unvalidated user-controlled file paths

Python urllib Insecure HTTP urlopen Vulnerability

Python urllib Insecure FTP urlretrieve Vulnerability

Python Requests Disabled Certificate Validation Vulnerability

Detect This Vulnerability in Your Code

Python urllib Insecure HTTP urlretrieve Vulnerability

What it is

💡 Why This Fix Works

Why it happens

Root causes

Using urllib.request.urlretrieve() with HTTP URLs

URLs from Configuration Using HTTP Protocol

Using urlretrieve() for Software or Update Downloads

Not Validating File Integrity After Download

Legacy urlretrieve Usage Instead of Modern Alternatives

Fixes

Always Use HTTPS URLs for urlretrieve() Downloads

Validate URL Schemes Before Downloading Files

Use requests Library for Modern HTTP Downloads

Always Verify File Integrity with Checksums or Signatures

Use TLS Context for Certificate Pinning on Critical Downloads

Migrate to Object Storage APIs for File Distribution

Related Vulnerabilities

Arbitrary file write vulnerability from unvalidated user-controlled file paths

Python urllib Insecure HTTP urlopen Vulnerability

Python urllib Insecure FTP urlretrieve Vulnerability

Python Requests Disabled Certificate Validation Vulnerability

Detect This Vulnerability in Your Code