Fuzzing a Vulnerable JSON Parser: A Complete Guide to Finding Memory Corruption Bugs
Introduction
Fuzzing has become one of the most effective techniques for discovering security vulnerabilities in software. In this hands-on tutorial, we'll build a deliberately vulnerable JSON parser and use AFL++ (American Fuzzy Lop) to automatically discover five different types of memory corruption vulnerabilities.
This guide demonstrates:
- How to design effective fuzzing test cases (seeds)
- Understanding AFL++ fuzzing workflow
- Analyzing crashes with AddressSanitizer (ASAN)
- Real-world vulnerability patterns in parsers
The Vulnerable JSON Parser
Our target is a simple JSON parser (json_parser.c) that supports three data types:
- Strings:
"name": "value" - Numbers:
"port": 8080 - Arrays:
"array": [1, 2, 3]
Complete Source Code
1#include <stdio.h>
2#include <stdlib.h>
3#include <string.h>
4#include <ctype.h>
5
6#define MAX_KEY_LEN 64
7#define MAX_VALUE_LEN 256
8#define MAX_ARRAY_SIZE 1024
9
10// JSON key-value pair structure
11typedef struct {
12 char *key;
13 char *value;
14 int type; // 0=string, 1=number, 2=array
15} json_pair_t;
16
17// Global array buffer
18int *global_array = NULL;
19size_t global_array_size = 0;
20
21// Vulnerability 1: Stack buffer overflow
22void process_key(char *key) {
23 char local_key[MAX_KEY_LEN];
24 strcpy(local_key, key); // Dangerous! No bounds check
25 printf("Processing key: %s\n", local_key);
26}
27
28// Vulnerability 2: Format string vulnerability
29void log_value(char *value) {
30 fprintf(stderr, "Value: ");
31 fprintf(stderr, value); // Dangerous! User input as format string
32 fprintf(stderr, "\n");
33}
34
35// Vulnerability 3: Integer overflow
36size_t calculate_array_size(unsigned int count, unsigned int item_size) {
37 // Dangerous! Possible integer overflow
38 size_t total = count * item_size;
39 return total;
40}
41
42// Vulnerability 4: Heap buffer overflow
43void process_array(int *data, size_t data_count) {
44 if (!global_array) {
45 fprintf(stderr, "Error: Array buffer not initialized\n");
46 return;
47 }
48
49 // Dangerous! No check if data_count exceeds global_array_size
50 memcpy(global_array, data, data_count * sizeof(int));
51
52 printf("Processed %zu array elements\n", data_count);
53}
54
55// Cleanup function
56void cleanup_array() {
57 if (global_array) {
58 free(global_array);
59 global_array = NULL;
60 }
61}
62
63// Vulnerability 5: Use-After-Free
64void print_array_info() {
65 // Dangerous! May access freed memory
66 if (global_array_size > 0) {
67 printf("Array size: %zu\n", global_array_size);
68 if (global_array) {
69 printf("First element: %d\n", global_array[0]); // UAF!
70 }
71 }
72}
73
74// Simple JSON parser
75char* skip_whitespace(char *json) {
76 while (*json && isspace(*json)) json++;
77 return json;
78}
79
80char* parse_string(char *json, char **result) {
81 json = skip_whitespace(json);
82
83 if (*json != '"') {
84 return NULL;
85 }
86
87 json++; // skip opening "
88 char *start = json;
89
90 while (*json && *json != '"') json++;
91
92 if (*json != '"') {
93 return NULL;
94 }
95
96 size_t len = json - start;
97 *result = malloc(len + 1);
98 strncpy(*result, start, len);
99 (*result)[len] = '\0';
100
101 json++; // skip closing "
102 return json;
103}
104
105char* parse_number(char *json, int *result) {
106 json = skip_whitespace(json);
107
108 *result = atoi(json);
109
110 while (*json && (isdigit(*json) || *json == '-')) json++;
111
112 return json;
113}
114
115char* parse_array(char *json, int **array, size_t *count) {
116 json = skip_whitespace(json);
117
118 if (*json != '[') {
119 return NULL;
120 }
121
122 json++; // skip [
123
124 // Parse array length
125 unsigned int capacity = 10;
126 *array = malloc(capacity * sizeof(int));
127 *count = 0;
128
129 json = skip_whitespace(json);
130
131 while (*json && *json != ']') {
132 int num;
133 json = parse_number(json, &num);
134
135 if (*count >= capacity) {
136 capacity *= 2;
137 *array = realloc(*array, capacity * sizeof(int));
138 }
139
140 (*array)[(*count)++] = num;
141
142 json = skip_whitespace(json);
143 if (*json == ',') {
144 json++;
145 json = skip_whitespace(json);
146 }
147 }
148
149 if (*json != ']') {
150 free(*array);
151 return NULL;
152 }
153
154 json++; // skip ]
155 return json;
156}
157
158int parse_json_pair(char *json) {
159 char *key = NULL;
160 char *value = NULL;
161 int num_value;
162 int *array_data = NULL;
163 size_t array_count = 0;
164
165 json = skip_whitespace(json);
166
167 if (*json != '{') {
168 fprintf(stderr, "Error: Expected '{'\n");
169 return -1;
170 }
171
172 json++; // skip {
173
174 while (1) {
175 json = skip_whitespace(json);
176
177 if (*json == '}') {
178 break;
179 }
180
181 // Parse key
182 json = parse_string(json, &key);
183 if (!json) {
184 fprintf(stderr, "Error: Failed to parse key\n");
185 return -1;
186 }
187
188 // Trigger stack overflow
189 process_key(key);
190
191 json = skip_whitespace(json);
192
193 if (*json != ':') {
194 fprintf(stderr, "Error: Expected ':'\n");
195 free(key);
196 return -1;
197 }
198
199 json++; // skip :
200 json = skip_whitespace(json);
201
202 // Parse value based on type
203 if (*json == '"') {
204 // String value
205 json = parse_string(json, &value);
206 if (!json) {
207 free(key);
208 return -1;
209 }
210
211 // Trigger format string vulnerability
212 log_value(value);
213
214 free(value);
215 }
216 else if (*json == '[') {
217 // Array value
218 json = parse_array(json, &array_data, &array_count);
219 if (!json) {
220 free(key);
221 return -1;
222 }
223
224 // Trigger integer overflow and heap overflow
225 global_array_size = calculate_array_size(array_count, sizeof(int));
226
227 if (global_array_size > 0) {
228 global_array = malloc(global_array_size);
229 if (global_array) {
230 process_array(array_data, array_count);
231 }
232 }
233
234 free(array_data);
235 }
236 else if (isdigit(*json) || *json == '-') {
237 // Number value
238 json = parse_number(json, &num_value);
239 printf("Number: %d\n", num_value);
240 }
241
242 free(key);
243
244 json = skip_whitespace(json);
245 if (*json == ',') {
246 json++;
247 }
248 }
249
250 return 0;
251}
252
253int main(int argc, char *argv[]) {
254 if (argc != 2) {
255 printf("Usage: %s <json_file>\n", argv[0]);
256 return 1;
257 }
258
259 FILE *fp = fopen(argv[1], "r");
260 if (!fp) {
261 fprintf(stderr, "Cannot open file: %s\n", argv[1]);
262 return 1;
263 }
264
265 // Read entire file
266 fseek(fp, 0, SEEK_END);
267 long file_size = ftell(fp);
268 fseek(fp, 0, SEEK_SET);
269
270 char *json_content = malloc(file_size + 1);
271 fread(json_content, 1, file_size, fp);
272 json_content[file_size] = '\0';
273 fclose(fp);
274
275 printf("Parsing JSON...\n");
276
277 // Parse JSON
278 int result = parse_json_pair(json_content);
279
280 free(json_content);
281
282 if (result == 0) {
283 printf("JSON parsed successfully\n");
284
285 // Cleanup resources
286 cleanup_array();
287
288 // Trigger UAF
289 print_array_info();
290 }
291
292 return result;
293}Intentional Vulnerabilities
We've embedded five classic memory safety vulnerabilities:
1. Stack Buffer Overflow (Critical)
1void process_key(char *key) {
2 char local_key[MAX_KEY_LEN]; // 64 bytes
3 strcpy(local_key, key); // No bounds check!
4 printf("Processing key: %s\n", local_key);
5}Trigger: JSON key exceeding 64 bytes
Impact: Stack corruption, potential RIP/RBP overwrite
2. Format String Vulnerability (High)
1void log_value(char *value) {
2 fprintf(stderr, "Value: ");
3 fprintf(stderr, value); // Dangerous!
4 fprintf(stderr, "\n");
5}Trigger: JSON value containing format specifiers (%s, %p, %n)
Impact: Information disclosure, arbitrary memory write
3. Integer Overflow (Medium)
1size_t calculate_array_size(unsigned int count, unsigned int item_size) {
2 size_t total = count * item_size; // Can overflow!
3 return total;
4}Trigger: Extremely large array element count
Impact: Small buffer allocation followed by large copy → heap overflow
4. Heap Buffer Overflow (Critical)
1void process_array(int *data, size_t data_count) {
2 if (!global_array) return;
3
4 // No bounds check against global_array_size!
5 memcpy(global_array, data, data_count * sizeof(int));
6}Trigger: Array data exceeds allocated buffer
Exploitation chain: Integer overflow → small buffer → large data → heap overflow
5. Use-After-Free (Medium)
1// In main():
2cleanup_array(); // Frees global_array
3print_array_info(); // Accesses freed memory!
4
5void print_array_info() {
6 if (global_array_size > 0) {
7 if (global_array) {
8 printf("First element: %d\n", global_array[0]); // UAF!
9 }
10 }
11}Trigger: Normal JSON parsing with arrays
Impact: Access to freed memory, potential information leak
Compilation
Compile multiple versions for different purposes:
1# 1. AFL++ instrumented version (for fuzzing)
2afl-clang-fast -o json_parser_fuzz json_parser.c
3
4# 2. AddressSanitizer version (for vulnerability detection)
5afl-clang-fast -fsanitize=address -fsanitize=undefined -g -O1 \
6 -o json_parser_asan json_parser.c
7
8# 3. Debug version (for exploitation)
9gcc -g -fno-stack-protector -z execstack -no-pie \
10 -o json_parser_debug json_parser.c
11
12# 4. Normal version (for comparison)
13gcc -o json_parser_normal json_parser.cTesting the Basic Seed
basic seed: seed.json
1{
2 "name": "test",
3 "port": 8080,
4 "array": [1, 2, 3]
5}1root@softsec2:/opt/json_fuzzing_exercise# ./json_parser_normal seed.json
2Parsing JSON...
3Processing key: name
4Value: test
5Processing key: port
6Number: 8080
7Processing key: array
8Processed 3 array elements
9JSON parsed successfully
10Array size: 12Seed Design Strategy
Effective fuzzing starts with well-designed seed files. Our strategy focuses on:
- Coverage of all 3 data types (string/number/array)
- Boundary value testing (extra-long keys/values, huge arrays)
- Malformed formats (missing quotes, unmatched brackets)
- Attack payloads (format strings, overflow data)
We'll create 10 seed variants based on the basic seed:
Seed 2: Long Key (Stack Overflow)
1cat > seeds/02_long_key.json << 'EOF'
2{"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa": "value"}
3EOFPurpose: Trigger strcpy overflow in process_key()
Seed 3: Format String
1cat > seeds/03_format_string.json << 'EOF'
2{"name": "%s%s%s%n"}
3EOFPurpose: Trigger format string vulnerability in log_value()
Seed 4: Large Array
1# Generate array with 1000 elements
2echo -n '{"array": [' > seeds/04_large_array.json
3for i in {1..1000}; do
4 echo -n "$i"
5 [ $i -lt 1000 ] && echo -n ","
6done >> seeds/04_large_array.json
7echo ']}' >> seeds/04_large_array.jsonPurpose: Trigger integer overflow and heap overflow
Seed 5: Missing Quote
1cat > seeds/05_invalid_quote.json << 'EOF'
2{"name: "test"}
3EOFPurpose: Test error handling
Seed 6: Unmatched Bracket
1cat > seeds/06_unmatched_bracket.json << 'EOF'
2{"name": "test"
3EOFPurpose: Test parser robustness
Seed 7: Nested Structure
1cat > seeds/07_nested.json << 'EOF'
2{"obj": {"key": "value"}}
3EOFNote: Current parser doesn't support nesting, will trigger error handling
Seed 8: Empty JSON
1echo '{}' > seeds/08_empty.jsonPurpose: Boundary condition testing
Seed 9: Negative Number
1cat > seeds/09_negative.json << 'EOF'
2{"port": -1}
3EOFSeed 10: Long Value
1printf '{"msg": "%0500d"}' 1 > seeds/10_long_value.jsonAFL++ Fuzzing
Start the fuzzer:
1# Launch AFL++
2afl-fuzz -i seeds -o output \
3 -M fuzzer01 \
4 -- ./json_parser_fuzz @@Parameters explained:
-i seeds: Input seed directory-o output: Output directory-M fuzzer01: Main fuzzer instance@@: AFL++ replaces this with test case path
With well-designed seeds, results appear within minutes:

1root@softsec2:/opt/json_fuzzing_exercise/output/fuzzer01/crashes# ls
2id:000000,sig:11,src:000002,time:3113,execs:9537,op:havoc,rep:8
3id:000001,sig:11,src:000002,time:3815,execs:15601,op:havoc,rep:16
4id:000002,sig:11,src:000002+000008,time:6913,execs:33715,op:splice,rep:4
5id:000003,sig:11,src:000006,time:52490,execs:37465,op:havoc,rep:8
6id:000004,sig:11,src:000006,time:103749,execs:40472,op:havoc,rep:16
7README.txtReal Crash Analysis
Initial Testing with Normal Binary
1root@softsec2:/opt/json_fuzzing_exercise/output/fuzzer01/crashes# ../../../json_parser_normal "id:000000,sig:11,src:000002,time:3113,execs:9537,op:havoc,rep:8"
2Parsing JSON...
3Processing key: pa
4Value: 0x561fbb39a8b0 # ⚠️ Memory address leaked!
5Processing key: b
6Value: bb39a8b0 # ⚠️ Address fragment leaked!
7Processing key: c
8Value:
9JSON parsed successfullyASAN Analysis
1root@softsec2:/opt/json_fuzzing_exercise/output/fuzzer01/crashes# ../../../json_parser_asan "id:000000,sig:11,src:000002,time:3113,execs:9537,op:havoc,rep:8"
2Parsing JSON...
3Processing key: pa
4Value: 0xffffffff
5Processing key: b
6Value: ffffffff
7Processing key: c
8Value: AddressSanitizer:DEADLYSIGNAL
9=================================================================
10==1749957==ERROR: AddressSanitizer: SEGV on unknown address 0x00009fff7fff (pc 0x0000004344a2 bp 0x7ffe31b6b460 sp 0x7ffe31b6abd8 T0)
11==1749957==The signal is caused by a READ memory access.
12 #0 0x4344a2 in __asan::QuickCheckForUnpoisonedRegion(unsigned long, unsigned long) asan_interceptors.cpp.o
13 #1 0x43fc2e in printf_common(void*, char const*, __va_list_tag*) asan_interceptors.cpp.o
14 #2 0x4414b9 in fprintf (/opt/json_fuzzing_exercise/json_parser_asan+0x4414b9)
15 #3 0x4cf1bf in log_value /opt/json_fuzzing_exercise/json_parser.c:31:5
16 #4 0x4d0f38 in parse_json_pair /opt/json_fuzzing_exercise/json_parser.c:212:13
17 #5 0x4d1ab4 in main /opt/json_fuzzing_exercise/json_parser.c:278:18
18 #6 0x7fcec770bd8f in __libc_start_call_main csu/../sysdeps/nptl/libc_start_call_main.h:58:16
19 #7 0x7fcec770be3f in __libc_start_main csu/../csu/libc-start.c:392:3
20 #8 0x420394 in _start (/opt/json_fuzzing_exercise/json_parser_asan+0x420394)
21
22AddressSanitizer can not provide additional info.
23SUMMARY: AddressSanitizer: SEGV asan_interceptors.cpp.o in __asan::QuickCheckForUnpoisonedRegion(unsigned long, unsigned long)
24==1749957==ABORTINGExamining Crash Content
1root@softsec2:/opt/json_fuzzing_exercise/output/fuzzer01/crashes# hexdump -C "id:000000,sig:11,src:000002,time:3113,execs:9537,op:havoc,rep:8"
200000000 7b 22 70 61 22 3a 20 22 25 70 22 2c 20 22 62 22 |{"pa": "%p", "b"|
300000010 3a 20 22 25 78 22 2c 20 22 63 22 3a 20 22 25 6e |: "%x", "c": "%n|
400000020 22 7d 6f 3e 74 22 35 20 73 08 |"}o>t"5 s.|
50000002aAFL++ generated payload:
1{"pa": "%p", "b": "%x", "c": "%n"}o>t"5 s.Format specifiers found:
%p- Pointer leak%x- Hexadecimal value leak%n- Memory write format specifier (dangerous!)
Vulnerability Assessment
- Trigger location: log_value() function (json_parser.c:31)
- Vulnerability type: Format string injection (
%nattempts memory write, invalid address causes crash)
Security impact:
- Information disclosure -
%p/%xleak memory addresses → ASLR bypass - Arbitrary memory write -
%ncan write to memory → Control program flow - Exploitation chain - Address leak + crafted
%n→ RCE (Remote Code Execution)
Results
Within just a few minutes, we discovered 8 crashes:

12mins after

20mins after

Key Takeaways
- Seed design is critical - Well-crafted seeds accelerate vulnerability discovery
- ASAN is essential - Reveals detailed crash information for analysis
- Format strings are dangerous - User input must never be used directly as format strings
Fixes
Fix Format String Vulnerability
Before:
1void log_value(char *value) {
2 fprintf(stderr, value); // Dangerous!
3}After:
1void log_value(char *value) {
2 fprintf(stderr, "%s", value); // Safe!
3}Remember: Use these techniques responsibly and only on systems you're authorized to test.
References
- AFLplusplus. (n.d.). AFL++ (American Fuzzy Lop plus plus). GitHub. https://github.com/AFLplusplus/AFLplusplus
- OWASP Foundation. (n.d.-a). Format string attack. https://owasp.org/www-community/attacks/Format_string_attack
- OWASP Foundation. (n.d.-b). Secure coding practices quick reference guide. https://owasp.org/www-project-secure-coding-practices-quick-reference-guide/
- Software Engineering Institute. (n.d.). SEI CERT C coding standard. Carnegie Mellon University. https://wiki.sei.cmu.edu/confluence/display/c/SEI+CERT+C+Coding+Standard