Hash分批采样

由于实现比较简单, 这里直接上代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
package cn.idea360.mpdemo;

import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;

/**
* 批量采样测试
*
* @author cuishiying
* @date 2022-09-16
*/
public class SampleTest {

private final AtomicInteger index = new AtomicInteger(0);

private static final int MAX_BATCH = 10;

private int getAndIncrementIndex() {
if (index.get() == MAX_BATCH) {
index.compareAndSet(MAX_BATCH, 0);
}
return index.getAndIncrement();
}

private int hash(String data) {
return data.hashCode() & Integer.MAX_VALUE;
}

public static void main(String[] args) {
int sampleCount = 10000;
List<String> userIds = new ArrayList<>();
for (int i = 0; i < sampleCount; i++) {
userIds.add(UUID.randomUUID().toString().replace("-", ""));
}

int sum = 0;
SampleTest sampleTest = new SampleTest();
for (int i = 0; i < MAX_BATCH; i++) {
int currentIndex = sampleTest.getAndIncrementIndex();
Set<String> collect = userIds.stream().filter(userId -> sampleTest.hash(userId) % MAX_BATCH == currentIndex).collect(Collectors.toSet());
sum += collect.size();
}
System.out.printf("样本数: [%d], 采样批次: [%d], 总采样数: [%d]%n", sampleCount, MAX_BATCH, sum);
}
}