The test code:
use rand::Rng;
use std::{
sync::{Arc, Mutex},
thread,
time::Instant,
vec,
};
const INT_SIZE: usize = 8; // in 64 machine
const ARRAY_SIZE: usize = 4096 * 1024 * 1024 / INT_SIZE; // 4096MB
const ITERATIONS: usize = 100_000_000;
fn main() {
// change it in [1, 2, 4, 16, 36, 64, 128, 256, 512, 1024]
let num_of_mutex = 16;
let thread_counts = [1, 2, 4, 8, 12, 16, 24, 32, 48, 64, 128];
for thread_count in thread_counts {
let mut vecs = Vec::with_capacity(thread_count);
for _ in 0..thread_count {
let size = ARRAY_SIZE / thread_count;
vecs.push(vec![1; size]);
}
let mut threads = vec![];
let start_time = Instant::now();
let mut global_mutexs = vec![];
for _ in 0..num_of_mutex {
global_mutexs.push(Arc::new(Mutex::new(())));
}
let len = vecs.len();
for vec in vecs {
let mutex = global_mutexs.clone();
threads.push(thread::spawn(move || {
let mut rng = rand::thread_rng();
let mut sum = 0;
for i in 0..ITERATIONS / len {
let m = mutex[i & (num_of_mutex - 1)].lock().unwrap();
let index = rng.gen_range(0..vec.len());
sum += vec[index];
drop(m);
}
return sum;
}));
}
let mut sum = 0;
for handle in threads {
sum += handle.join().unwrap();
}
let elapsed = start_time.elapsed();
println!("threads: {}, elapsed: {:?}", thread_count, elapsed);
println!("sum : {sum}");
}
}
The results:
mutexes/threads | 1 | 2 | 4 | 8 | 12 | 16 | 24 | 32 | 48 | 64 | 128 |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 5.980172444 | 2.899437975 | 1.447906311 | 0.828731566 | 0.689618066 | 0.612355429 | 0.589401394 | 0.587380871 | 0.525477567 | 0.578456362 | 0.552132325 |
1 | 7.970250774 | 17.29034894 | 22.60164692 | 25.97284605 | 28.12352579 | 33.31359697 | 31.18786342 | 31.61139126 | 29.23225856 | 30.94094675 | 31.59191497 |
2 | 7.883931727 | 15.97845738 | 16.11107368 | 18.73377898 | 20.34614133 | 23.02624802 | 22.69439808 | 23.15802647 | 21.80570219 | 22.48815498 | 22.98585238 |
4 | 7.975676415 | 10.25364766 | 11.88074538 | 15.40198137 | 15.51024255 | 16.35328034 | 15.46874828 | 15.7982897 | 15.48703267 | 15.67227903 | 15.35829948 |
8 | 8.058803258 | 8.138193999 | 7.619081588 | 7.936418179 | 7.654288652 | 7.901945312 | 7.642439744 | 7.861542054 | 7.730389506 | 7.821229611 | 7.748344488 |
16 | 9.797308994 | 6.213334839 | 4.455407945 | 4.496371955 | 4.291254249 | 4.130849346 | 4.347601475 | 4.294096757 | 3.990391527 | 4.028562691 | 4.059085994 |
32 | 8.742854719 | 4.847656612 | 3.301780829 | 2.578327826 | 2.480488617 | 2.331294827 | 2.388718271 | 2.306257478 | 2.421350161 | 2.278177495 | 2.26569423 |
64 | 8.042672888 | 4.963568223 | 3.012473492 | 2.08243512 | 1.828237002 | 1.653421053 | 1.550811454 | 1.536452054 | 1.519761769 | 1.618966043 | 1.48010674 |
128 | 8.62801309 | 4.978525185 | 2.637936755 | 1.777546296 | 1.549096849 | 1.359814529 | 1.43875245 | 1.385468038 | 1.238832309 | 1.249940559 | 1.248131329 |
256 | 8.584906215 | 4.591742459 | 2.441556366 | 1.504790937 | 1.335449235 | 1.169191715 | 1.115906268 | 1.230570609 | 1.075581823 | 1.048285585 | 1.02977064 |
512 | 8.171549127 | 4.182283461 | 2.37535305 | 1.54202412 | 1.1690348 | 1.054650104 | 1.015366906 | 1.153238581 | 0.993319168 | 0.998864737 | 0.981392837 |
1024 | 8.533398132 | 4.175120792 | 2.209645233 | 1.412410651 | 1.055442085 | 0.938202817 | 1.122801927 | 0.940661156 | 0.888767412 | 0.914867532 | 0.92237305 |
The drawing script:
import matplotlib.pyplot as plt
data = [
[5.980172444, 2.899437975, 1.447906311, 0.828731566, 0.689618066, 0.612355429, 0.589401394, 0.587380871, 0.525477567, 0.578456362, 0.552132325],
[7.970250774, 17.29034894, 22.60164692, 25.97284605, 28.12352579, 33.31359697, 31.18786342, 31.61139126, 29.23225856, 30.94094675, 31.59191497],
[7.883931727, 15.97845738, 16.11107368, 18.73377898, 20.34614133, 23.02624802, 22.69439808, 23.15802647, 21.80570219, 22.48815498, 22.98585238],
[7.975676415, 10.25364766, 11.88074538, 15.40198137, 15.51024255, 16.35328034, 15.46874828, 15.7982897, 15.48703267, 15.67227903, 15.35829948],
[8.058803258, 8.363385774, 7.619081588, 7.936418179, 7.654288652, 7.901945312, 7.642439744, 7.861542054, 7.730389506, 7.821229611, 7.748344488],
[9.797308994, 6.213334839, 4.455407945, 4.496371955, 4.291254249, 4.130849346, 4.347601475, 4.294096757, 3.990391527, 4.028562691, 4.059085994],
[8.742854719, 4.847656612, 3.301780829, 2.578327826, 2.480488617, 2.331294827, 2.388718271, 2.306257478, 2.421350161, 2.278177495, 2.26569423],
[8.042672888, 4.963568223, 3.012473492, 2.08243512, 1.828237002, 1.653421053, 1.550811454, 1.536452054, 1.519761769, 1.618966043, 1.48010674],
[8.62801309, 4.978525185, 2.637936755, 1.777546296, 1.549096849, 1.359814529, 1.43875245, 1.385468038, 1.238832309, 1.249940559, 1.248131329],
[8.584906215, 4.591742459, 2.441556366, 1.504790937, 1.335449235, 1.169191715, 1.115906268, 1.230570609, 1.075581823, 1.048285585, 1.02977064],
[8.171549127, 4.182283461, 2.37535305, 1.54202412, 1.1690348, 1.054650104, 1.015366906, 1.153238581, 0.993319168, 0.998864737, 0.981392837],
[8.533398132, 4.175120792, 2.209645233, 1.412410651, 1.055442085, 0.938202817, 1.122801927, 0.940661156, 0.888767412, 0.914867532, 0.92237305]
]
row_labels = [0, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]
column_labels = [1, 2, 4, 8, 12, 16, 24, 32, 48, 64, 128]
for row, label in zip(data, row_labels):
plt.plot(column_labels, row, label=f"Locks: {label}", marker='o')
plt.legend()
plt.xlabel("Threads")
plt.ylabel("Time (s)")
plt.show()
The chart:
All tests were conducted on Linux with an X86 CPU with 16 cores.
lscpu
Architecture: x86_64
CPU op-mode(s): 32-bit, 64-bit
Byte Order: Little Endian
CPU(s): 16
On-line CPU(s) list: 0-15
Thread(s) per core: 1
Core(s) per socket: 16
Socket(s): 1
NUMA node(s): 1
Vendor ID: GenuineIntel
BIOS Vendor ID: Smdbmds
CPU family: 6
Model: 94
Model name: Intel(R) Xeon(R) Gold 6133 CPU @ 2.50GHz
BIOS Model name: 3.0
Stepping: 3
CPU MHz: 2499.998
BogoMIPS: 4999.99
Hypervisor vendor: KVM
Virtualization type: full
L1d cache: 32K
L1i cache: 32K
L2 cache: 4096K
L3 cache: 28160K
NUMA node0 CPU(s): 0-15