October 5, 2025
82 Views
Welcome

JSON Parser AFL++ Fuzzing Tutorial

Fuzzing has become one of the most effective techniques for discovering security vulnerabilities in software. In this hands-on tutorial, we'll build a deliberately vulnerable JSON parser and use AFL++ (American Fuzzy Lop) to automatically discover five different types of memory corruption vulnerabilities.

Fuzzing a Vulnerable JSON Parser: A Complete Guide to Finding Memory Corruption Bugs

Introduction

Fuzzing has become one of the most effective techniques for discovering security vulnerabilities in software. In this hands-on tutorial, we'll build a deliberately vulnerable JSON parser and use AFL++ (American Fuzzy Lop) to automatically discover five different types of memory corruption vulnerabilities.

This guide demonstrates:

  • How to design effective fuzzing test cases (seeds)
  • Understanding AFL++ fuzzing workflow
  • Analyzing crashes with AddressSanitizer (ASAN)
  • Real-world vulnerability patterns in parsers

The Vulnerable JSON Parser

Our target is a simple JSON parser (json_parser.c) that supports three data types:

  • Strings: "name": "value"
  • Numbers: "port": 8080
  • Arrays: "array": [1, 2, 3]

Complete Source Code

c
1#include <stdio.h>
2#include <stdlib.h>
3#include <string.h>
4#include <ctype.h>
5
6#define MAX_KEY_LEN 64
7#define MAX_VALUE_LEN 256
8#define MAX_ARRAY_SIZE 1024
9
10// JSON key-value pair structure
11typedef struct {
12    char *key;
13    char *value;
14    int type;  // 0=string, 1=number, 2=array
15} json_pair_t;
16
17// Global array buffer
18int *global_array = NULL;
19size_t global_array_size = 0;
20
21// Vulnerability 1: Stack buffer overflow
22void process_key(char *key) {
23    char local_key[MAX_KEY_LEN];
24    strcpy(local_key, key);  // Dangerous! No bounds check
25    printf("Processing key: %s\n", local_key);
26}
27
28// Vulnerability 2: Format string vulnerability
29void log_value(char *value) {
30    fprintf(stderr, "Value: ");
31    fprintf(stderr, value);  // Dangerous! User input as format string
32    fprintf(stderr, "\n");
33}
34
35// Vulnerability 3: Integer overflow
36size_t calculate_array_size(unsigned int count, unsigned int item_size) {
37    // Dangerous! Possible integer overflow
38    size_t total = count * item_size;
39    return total;
40}
41
42// Vulnerability 4: Heap buffer overflow
43void process_array(int *data, size_t data_count) {
44    if (!global_array) {
45        fprintf(stderr, "Error: Array buffer not initialized\n");
46        return;
47    }
48
49    // Dangerous! No check if data_count exceeds global_array_size
50    memcpy(global_array, data, data_count * sizeof(int));
51
52    printf("Processed %zu array elements\n", data_count);
53}
54
55// Cleanup function
56void cleanup_array() {
57    if (global_array) {
58        free(global_array);
59        global_array = NULL;
60    }
61}
62
63// Vulnerability 5: Use-After-Free
64void print_array_info() {
65    // Dangerous! May access freed memory
66    if (global_array_size > 0) {
67        printf("Array size: %zu\n", global_array_size);
68        if (global_array) {
69            printf("First element: %d\n", global_array[0]);  // UAF!
70        }
71    }
72}
73
74// Simple JSON parser
75char* skip_whitespace(char *json) {
76    while (*json && isspace(*json)) json++;
77    return json;
78}
79
80char* parse_string(char *json, char **result) {
81    json = skip_whitespace(json);
82
83    if (*json != '"') {
84        return NULL;
85    }
86
87    json++; // skip opening "
88    char *start = json;
89
90    while (*json && *json != '"') json++;
91
92    if (*json != '"') {
93        return NULL;
94    }
95
96    size_t len = json - start;
97    *result = malloc(len + 1);
98    strncpy(*result, start, len);
99    (*result)[len] = '\0';
100
101    json++; // skip closing "
102    return json;
103}
104
105char* parse_number(char *json, int *result) {
106    json = skip_whitespace(json);
107
108    *result = atoi(json);
109
110    while (*json && (isdigit(*json) || *json == '-')) json++;
111
112    return json;
113}
114
115char* parse_array(char *json, int **array, size_t *count) {
116    json = skip_whitespace(json);
117
118    if (*json != '[') {
119        return NULL;
120    }
121
122    json++; // skip [
123
124    // Parse array length
125    unsigned int capacity = 10;
126    *array = malloc(capacity * sizeof(int));
127    *count = 0;
128
129    json = skip_whitespace(json);
130
131    while (*json && *json != ']') {
132        int num;
133        json = parse_number(json, &num);
134
135        if (*count >= capacity) {
136            capacity *= 2;
137            *array = realloc(*array, capacity * sizeof(int));
138        }
139
140        (*array)[(*count)++] = num;
141
142        json = skip_whitespace(json);
143        if (*json == ',') {
144            json++;
145            json = skip_whitespace(json);
146        }
147    }
148
149    if (*json != ']') {
150        free(*array);
151        return NULL;
152    }
153
154    json++; // skip ]
155    return json;
156}
157
158int parse_json_pair(char *json) {
159    char *key = NULL;
160    char *value = NULL;
161    int num_value;
162    int *array_data = NULL;
163    size_t array_count = 0;
164
165    json = skip_whitespace(json);
166
167    if (*json != '{') {
168        fprintf(stderr, "Error: Expected '{'\n");
169        return -1;
170    }
171
172    json++; // skip {
173
174    while (1) {
175        json = skip_whitespace(json);
176
177        if (*json == '}') {
178            break;
179        }
180
181        // Parse key
182        json = parse_string(json, &key);
183        if (!json) {
184            fprintf(stderr, "Error: Failed to parse key\n");
185            return -1;
186        }
187
188        // Trigger stack overflow
189        process_key(key);
190
191        json = skip_whitespace(json);
192
193        if (*json != ':') {
194            fprintf(stderr, "Error: Expected ':'\n");
195            free(key);
196            return -1;
197        }
198
199        json++; // skip :
200        json = skip_whitespace(json);
201
202        // Parse value based on type
203        if (*json == '"') {
204            // String value
205            json = parse_string(json, &value);
206            if (!json) {
207                free(key);
208                return -1;
209            }
210
211            // Trigger format string vulnerability
212            log_value(value);
213
214            free(value);
215        }
216        else if (*json == '[') {
217            // Array value
218            json = parse_array(json, &array_data, &array_count);
219            if (!json) {
220                free(key);
221                return -1;
222            }
223
224            // Trigger integer overflow and heap overflow
225            global_array_size = calculate_array_size(array_count, sizeof(int));
226
227            if (global_array_size > 0) {
228                global_array = malloc(global_array_size);
229                if (global_array) {
230                    process_array(array_data, array_count);
231                }
232            }
233
234            free(array_data);
235        }
236        else if (isdigit(*json) || *json == '-') {
237            // Number value
238            json = parse_number(json, &num_value);
239            printf("Number: %d\n", num_value);
240        }
241
242        free(key);
243
244        json = skip_whitespace(json);
245        if (*json == ',') {
246            json++;
247        }
248    }
249
250    return 0;
251}
252
253int main(int argc, char *argv[]) {
254    if (argc != 2) {
255        printf("Usage: %s <json_file>\n", argv[0]);
256        return 1;
257    }
258
259    FILE *fp = fopen(argv[1], "r");
260    if (!fp) {
261        fprintf(stderr, "Cannot open file: %s\n", argv[1]);
262        return 1;
263    }
264
265    // Read entire file
266    fseek(fp, 0, SEEK_END);
267    long file_size = ftell(fp);
268    fseek(fp, 0, SEEK_SET);
269
270    char *json_content = malloc(file_size + 1);
271    fread(json_content, 1, file_size, fp);
272    json_content[file_size] = '\0';
273    fclose(fp);
274
275    printf("Parsing JSON...\n");
276
277    // Parse JSON
278    int result = parse_json_pair(json_content);
279
280    free(json_content);
281
282    if (result == 0) {
283        printf("JSON parsed successfully\n");
284
285        // Cleanup resources
286        cleanup_array();
287
288        // Trigger UAF
289        print_array_info();
290    }
291
292    return result;
293}

Intentional Vulnerabilities

We've embedded five classic memory safety vulnerabilities:

1. Stack Buffer Overflow (Critical)

c
1void process_key(char *key) {
2    char local_key[MAX_KEY_LEN];  // 64 bytes
3    strcpy(local_key, key);       // No bounds check!
4    printf("Processing key: %s\n", local_key);
5}

Trigger: JSON key exceeding 64 bytes
Impact: Stack corruption, potential RIP/RBP overwrite

2. Format String Vulnerability (High)

c
1void log_value(char *value) {
2    fprintf(stderr, "Value: ");
3    fprintf(stderr, value);  // Dangerous!
4    fprintf(stderr, "\n");
5}

Trigger: JSON value containing format specifiers (%s, %p, %n)
Impact: Information disclosure, arbitrary memory write

3. Integer Overflow (Medium)

c
1size_t calculate_array_size(unsigned int count, unsigned int item_size) {
2    size_t total = count * item_size;  // Can overflow!
3    return total;
4}

Trigger: Extremely large array element count
Impact: Small buffer allocation followed by large copy → heap overflow

4. Heap Buffer Overflow (Critical)

c
1void process_array(int *data, size_t data_count) {
2    if (!global_array) return;
3
4    // No bounds check against global_array_size!
5    memcpy(global_array, data, data_count * sizeof(int));
6}

Trigger: Array data exceeds allocated buffer
Exploitation chain: Integer overflow → small buffer → large data → heap overflow

5. Use-After-Free (Medium)

c
1// In main():
2cleanup_array();        // Frees global_array
3print_array_info();     // Accesses freed memory!
4
5void print_array_info() {
6    if (global_array_size > 0) {
7        if (global_array) {
8            printf("First element: %d\n", global_array[0]);  // UAF!
9        }
10    }
11}

Trigger: Normal JSON parsing with arrays
Impact: Access to freed memory, potential information leak

Compilation

Compile multiple versions for different purposes:

bash
1# 1. AFL++ instrumented version (for fuzzing)
2afl-clang-fast -o json_parser_fuzz json_parser.c
3
4# 2. AddressSanitizer version (for vulnerability detection)
5afl-clang-fast -fsanitize=address -fsanitize=undefined -g -O1 \
6    -o json_parser_asan json_parser.c
7
8# 3. Debug version (for exploitation)
9gcc -g -fno-stack-protector -z execstack -no-pie \
10    -o json_parser_debug json_parser.c
11
12# 4. Normal version (for comparison)
13gcc -o json_parser_normal json_parser.c

Testing the Basic Seed

basic seed: seed.json

json
1{
2  "name": "test",
3  "port": 8080,
4  "array": [1, 2, 3]
5}
bash
1root@softsec2:/opt/json_fuzzing_exercise# ./json_parser_normal seed.json
2Parsing JSON...
3Processing key: name
4Value: test
5Processing key: port
6Number: 8080
7Processing key: array
8Processed 3 array elements
9JSON parsed successfully
10Array size: 12

Seed Design Strategy

Effective fuzzing starts with well-designed seed files. Our strategy focuses on:

  • Coverage of all 3 data types (string/number/array)
  • Boundary value testing (extra-long keys/values, huge arrays)
  • Malformed formats (missing quotes, unmatched brackets)
  • Attack payloads (format strings, overflow data)

We'll create 10 seed variants based on the basic seed:

Seed 2: Long Key (Stack Overflow)

bash
1cat > seeds/02_long_key.json << 'EOF'
2{"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa": "value"}
3EOF

Purpose: Trigger strcpy overflow in process_key()

Seed 3: Format String

bash
1cat > seeds/03_format_string.json << 'EOF'
2{"name": "%s%s%s%n"}
3EOF

Purpose: Trigger format string vulnerability in log_value()

Seed 4: Large Array

bash
1# Generate array with 1000 elements
2echo -n '{"array": [' > seeds/04_large_array.json
3for i in {1..1000}; do
4    echo -n "$i"
5    [ $i -lt 1000 ] && echo -n ","
6done >> seeds/04_large_array.json
7echo ']}' >> seeds/04_large_array.json

Purpose: Trigger integer overflow and heap overflow

Seed 5: Missing Quote

bash
1cat > seeds/05_invalid_quote.json << 'EOF'
2{"name: "test"}
3EOF

Purpose: Test error handling

Seed 6: Unmatched Bracket

bash
1cat > seeds/06_unmatched_bracket.json << 'EOF'
2{"name": "test"
3EOF

Purpose: Test parser robustness

Seed 7: Nested Structure

bash
1cat > seeds/07_nested.json << 'EOF'
2{"obj": {"key": "value"}}
3EOF

Note: Current parser doesn't support nesting, will trigger error handling

Seed 8: Empty JSON

bash
1echo '{}' > seeds/08_empty.json

Purpose: Boundary condition testing

Seed 9: Negative Number

bash
1cat > seeds/09_negative.json << 'EOF'
2{"port": -1}
3EOF

Seed 10: Long Value

bash
1printf '{"msg": "%0500d"}' 1 > seeds/10_long_value.json

AFL++ Fuzzing

Start the fuzzer:

bash
1# Launch AFL++
2afl-fuzz -i seeds -o output \
3    -M fuzzer01 \
4    -- ./json_parser_fuzz @@

Parameters explained:

  • -i seeds: Input seed directory
  • -o output: Output directory
  • -M fuzzer01: Main fuzzer instance
  • @@: AFL++ replaces this with test case path

With well-designed seeds, results appear within minutes:

AFL Fuzzing Results

bash
1root@softsec2:/opt/json_fuzzing_exercise/output/fuzzer01/crashes# ls
2id:000000,sig:11,src:000002,time:3113,execs:9537,op:havoc,rep:8
3id:000001,sig:11,src:000002,time:3815,execs:15601,op:havoc,rep:16
4id:000002,sig:11,src:000002+000008,time:6913,execs:33715,op:splice,rep:4
5id:000003,sig:11,src:000006,time:52490,execs:37465,op:havoc,rep:8
6id:000004,sig:11,src:000006,time:103749,execs:40472,op:havoc,rep:16
7README.txt

Real Crash Analysis

Initial Testing with Normal Binary

bash
1root@softsec2:/opt/json_fuzzing_exercise/output/fuzzer01/crashes# ../../../json_parser_normal "id:000000,sig:11,src:000002,time:3113,execs:9537,op:havoc,rep:8"
2Parsing JSON...
3Processing key: pa
4Value: 0x561fbb39a8b0    # ⚠️ Memory address leaked!
5Processing key: b
6Value: bb39a8b0          # ⚠️ Address fragment leaked!
7Processing key: c
8Value:
9JSON parsed successfully

ASAN Analysis

bash
1root@softsec2:/opt/json_fuzzing_exercise/output/fuzzer01/crashes# ../../../json_parser_asan "id:000000,sig:11,src:000002,time:3113,execs:9537,op:havoc,rep:8"
2Parsing JSON...
3Processing key: pa
4Value: 0xffffffff
5Processing key: b
6Value: ffffffff
7Processing key: c
8Value: AddressSanitizer:DEADLYSIGNAL
9=================================================================
10==1749957==ERROR: AddressSanitizer: SEGV on unknown address 0x00009fff7fff (pc 0x0000004344a2 bp 0x7ffe31b6b460 sp 0x7ffe31b6abd8 T0)
11==1749957==The signal is caused by a READ memory access.
12    #0 0x4344a2 in __asan::QuickCheckForUnpoisonedRegion(unsigned long, unsigned long) asan_interceptors.cpp.o
13    #1 0x43fc2e in printf_common(void*, char const*, __va_list_tag*) asan_interceptors.cpp.o
14    #2 0x4414b9 in fprintf (/opt/json_fuzzing_exercise/json_parser_asan+0x4414b9)
15    #3 0x4cf1bf in log_value /opt/json_fuzzing_exercise/json_parser.c:31:5
16    #4 0x4d0f38 in parse_json_pair /opt/json_fuzzing_exercise/json_parser.c:212:13
17    #5 0x4d1ab4 in main /opt/json_fuzzing_exercise/json_parser.c:278:18
18    #6 0x7fcec770bd8f in __libc_start_call_main csu/../sysdeps/nptl/libc_start_call_main.h:58:16
19    #7 0x7fcec770be3f in __libc_start_main csu/../csu/libc-start.c:392:3
20    #8 0x420394 in _start (/opt/json_fuzzing_exercise/json_parser_asan+0x420394)
21
22AddressSanitizer can not provide additional info.
23SUMMARY: AddressSanitizer: SEGV asan_interceptors.cpp.o in __asan::QuickCheckForUnpoisonedRegion(unsigned long, unsigned long)
24==1749957==ABORTING

Examining Crash Content

bash
1root@softsec2:/opt/json_fuzzing_exercise/output/fuzzer01/crashes# hexdump -C "id:000000,sig:11,src:000002,time:3113,execs:9537,op:havoc,rep:8"
200000000  7b 22 70 61 22 3a 20 22  25 70 22 2c 20 22 62 22  |{"pa": "%p", "b"|
300000010  3a 20 22 25 78 22 2c 20  22 63 22 3a 20 22 25 6e  |: "%x", "c": "%n|
400000020  22 7d 6f 3e 74 22 35 20  73 08                    |"}o>t"5 s.|
50000002a

AFL++ generated payload:

json
1{"pa": "%p", "b": "%x", "c": "%n"}o>t"5 s.

Format specifiers found:

  • %p - Pointer leak
  • %x - Hexadecimal value leak
  • %n - Memory write format specifier (dangerous!)

Vulnerability Assessment

  1. Trigger location: log_value() function (json_parser.c:31)
  2. Vulnerability type: Format string injection (%n attempts memory write, invalid address causes crash)

Security impact:

  1. Information disclosure - %p/%x leak memory addresses → ASLR bypass
  2. Arbitrary memory write - %n can write to memory → Control program flow
  3. Exploitation chain - Address leak + crafted %n → RCE (Remote Code Execution)

Results

Within just a few minutes, we discovered 8 crashes:

AFL Progress


12mins after

afl3


20mins after
afl4

Key Takeaways

  1. Seed design is critical - Well-crafted seeds accelerate vulnerability discovery
  2. ASAN is essential - Reveals detailed crash information for analysis
  3. Format strings are dangerous - User input must never be used directly as format strings

Fixes

Fix Format String Vulnerability

Before:

c
1void log_value(char *value) {
2    fprintf(stderr, value);  // Dangerous!
3}

After:

c
1void log_value(char *value) {
2    fprintf(stderr, "%s", value);  // Safe!
3}

Remember: Use these techniques responsibly and only on systems you're authorized to test.

References

  1. AFLplusplus. (n.d.). AFL++ (American Fuzzy Lop plus plus). GitHub. https://github.com/AFLplusplus/AFLplusplus
  2. OWASP Foundation. (n.d.-a). Format string attack. https://owasp.org/www-community/attacks/Format_string_attack
  3. OWASP Foundation. (n.d.-b). Secure coding practices quick reference guide. https://owasp.org/www-project-secure-coding-practices-quick-reference-guide/
  4. Software Engineering Institute. (n.d.). SEI CERT C coding standard. Carnegie Mellon University. https://wiki.sei.cmu.edu/confluence/display/c/SEI+CERT+C+Coding+Standard

Enjoyed this article?

Share it with your friends and colleagues!

Welcome
Last updated: October 5, 2025
相关文章
正在检查服务状态...
JSON Parser AFL++ Fuzzing Tutorial - ICTRUN