Java批量导入时如何去除重复数据并返回结果

概述

本篇是 POI系列 的最后一篇。传送门 Java开发中如何用POI优雅的导出Excel文件, Java开发中如何用POI优雅的导入Excel文件.

场景分析

大多数开发中是不需要重复的数据的, 所以后端开发中需要做去重操作, 而且为了更加友好的交互, 我们需要将导入失败的数据返回给用户。一般数据重复有以下几个场景:

  1. Excel中本身存在重复数据, 即本次导入存在重复数据;
  2. 数据库中已经存在了该条数据, 即历史导入存在重复数据;

为了减轻数据库的压力, 这里在设计中引入缓存 Redis

整体思路如下:

  1. bitmap判断是否存在;
  2. 内存中数据是否重复;
  3. redis和mysql批量插入;
  4. 数据库中插入失败处理;

代码实现

为简化无聊的 CRUD 编写, 引入了 mybatis-plus 的逆向 generator 插件。

pom.xml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>2.3.0.RELEASE</version>
<relativePath/> <!-- lookup parent from repository -->
</parent>
<groupId>cn.idea360</groupId>
<artifactId>idc-mp</artifactId>
<version>0.0.1</version>
<name>idc-mp</name>
<description>Demo project for Spring Boot</description>

<properties>
<java.version>1.8</java.version>
</properties>

<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>

<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-devtools</artifactId>
<scope>runtime</scope>
<optional>true</optional>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<optional>true</optional>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>org.junit.vintage</groupId>
<artifactId>junit-vintage-engine</artifactId>
</exclusion>
</exclusions>
</dependency>

<dependency>
<groupId>com.baomidou</groupId>
<artifactId>mybatis-plus-boot-starter</artifactId>
<version>3.3.1</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<scope>runtime</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/com.alibaba/druid -->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>druid</artifactId>
<version>1.1.22</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.baomidou/mybatis-plus-generator -->
<dependency>
<groupId>com.baomidou</groupId>
<artifactId>mybatis-plus-generator</artifactId>
<version>3.3.1</version>
</dependency>

<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-freemarker</artifactId>
</dependency>
<!-- https://mvnrepository.com/artifact/io.springfox/springfox-swagger2 -->
<dependency>
<groupId>io.springfox</groupId>
<artifactId>springfox-swagger2</artifactId>
<version>2.9.2</version>
</dependency>
<!-- https://mvnrepository.com/artifact/io.springfox/springfox-swagger-ui -->
<dependency>
<groupId>io.springfox</groupId>
<artifactId>springfox-swagger-ui</artifactId>
<version>2.9.2</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-redis</artifactId>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-pool2 -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-pool2</artifactId>
<version>2.8.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.alibaba/fastjson -->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.68</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-lang3 -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.10</version>
</dependency>

</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
</plugin>
</plugins>
</build>

</project>

application.yml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
server:
port: 8888
spring:
datasource:
type: com.alibaba.druid.pool.DruidDataSource
driverClassName: com.mysql.cj.jdbc.Driver
url: jdbc:mysql://localhost:3306/mp_base?allowMultiQueries=true&useUnicode=true&characterEncoding=UTF-8&useSSL=false&serverTimezone=UTC&allowPublicKeyRetrieval=true
username: root
password: root

redis:
database: 0
host: localhost
port: 6379
password: # 密码(默认为空)
timeout: 6000ms # 连接超时时长(毫秒)
lettuce:
pool:
max-active: 1000 # 连接池最大连接数(使用负值表示没有限制)
max-wait: -1ms # 连接池最大阻塞等待时间(使用负值表示没有限制)
max-idle: 10 # 连接池中的最大空闲连接
min-idle: 5 # 连接池中的最小空闲连接

mysql-schema

1
2
3
4
5
6
7
8
9
10
DROP TABLE IF EXISTS user;

CREATE TABLE user
(
id BIGINT(20) NOT NULL AUTO_INCREMENT COMMENT '主键ID',
name VARCHAR(30) DEFAULT NULL UNIQUE COMMENT '姓名',
age INT(11) DEFAULT NULL COMMENT '年龄',
email VARCHAR(50) DEFAULT NULL COMMENT '邮箱',
PRIMARY KEY (id)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

generator

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
public class MysqlGenerator {

/**
* RUN THIS
*/
public static void main(String[] args) {
// 代码生成器
AutoGenerator mpg = new AutoGenerator();

// 全局配置
GlobalConfig gc = new GlobalConfig();
String projectPath = System.getProperty("user.dir");
gc.setOutputDir(projectPath + "/idc-mp/src/main/java");
gc.setAuthor("当我遇上你");
gc.setOpen(false);
gc.setFileOverride(true);// 是否覆盖文件
gc.setBaseResultMap(true);// XML ResultMap
gc.setBaseColumnList(true);// XML columList
gc.setDateType(DateType.ONLY_DATE);
gc.setSwagger2(true); // 实体属性 Swagger2 注解
gc.setIdType(IdType.AUTO);
gc.setMapperName("%sMapper");
gc.setXmlName("%sMapper");
gc.setServiceName("%sService");
gc.setServiceImplName("%sServiceImpl");
gc.setControllerName("%sController");
mpg.setGlobalConfig(gc);

// 数据源配置
DataSourceConfig dsc = new DataSourceConfig();
dsc.setUrl("jdbc:mysql://localhost:3306/mp_base?useUnicode=true&serverTimezone=GMT&useSSL=false&characterEncoding=utf8");
// dsc.setSchemaName("public");
dsc.setDriverName("com.mysql.cj.jdbc.Driver");
dsc.setUsername("root");
dsc.setPassword("root");
mpg.setDataSource(dsc);

// 包配置
PackageConfig pc = new PackageConfig();
pc.setModuleName("mp");
pc.setParent("cn.idea360.demo.modules");
mpg.setPackageInfo(pc);

// 自定义配置
InjectionConfig cfg = new InjectionConfig() {
@Override
public void initMap() {
// to do nothing
}
};
List<FileOutConfig> focList = new ArrayList<>();
focList.add(new FileOutConfig("/templates/mapper.xml.ftl") {
@Override
public String outputFile(TableInfo tableInfo) {
// 自定义输入文件名称
return projectPath + "/idc-mp/src/main/resources/mapper/" + pc.getModuleName()
+ "/" + tableInfo.getEntityName() + "Mapper" + StringPool.DOT_XML;
}
});
cfg.setFileOutConfigList(focList);
mpg.setCfg(cfg);
mpg.setTemplate(new TemplateConfig().setXml(null));

// 策略配置
StrategyConfig strategy = new StrategyConfig();
strategy.setNaming(NamingStrategy.underline_to_camel);
strategy.setColumnNaming(NamingStrategy.underline_to_camel);
// strategy.setSuperEntityClass("com.baomidou.mybatisplus.samples.generator.common.BaseEntity");
strategy.setEntityLombokModel(true);
// strategy.setSuperControllerClass("com.baomidou.mybatisplus.samples.generator.common.BaseController");
strategy.setInclude(new String[]{"user"});
strategy.setRestControllerStyle(true);
strategy.setSuperEntityColumns("id");
strategy.setControllerMappingHyphenStyle(true);
// strategy.setTablePrefix(pc.getModuleName() + "_");
mpg.setStrategy(strategy);
// 选择 freemarker 引擎需要指定如下加,注意 pom 依赖必须有!
mpg.setTemplateEngine(new FreemarkerTemplateEngine());
mpg.execute();
}

}

Redis

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
@Configuration
public class RedisConfig {

@Bean
public RedisTemplate<String, Serializable> redisTemplate(LettuceConnectionFactory connectionFactory) {
RedisTemplate<String, Serializable> redisTemplate = new RedisTemplate<>();
redisTemplate.setKeySerializer(new StringRedisSerializer());
redisTemplate.setValueSerializer(new GenericJackson2JsonRedisSerializer());
redisTemplate.setConnectionFactory(connectionFactory);
return redisTemplate;
}

// @Bean
// public HashOperations<String, String, Object> hashOperations(RedisTemplate<String, Serializable> redisTemplate) {
// return redisTemplate.opsForHash();
// }
//
// @Bean
// public ValueOperations<String, Serializable> valueOperations(RedisTemplate<String, Serializable> redisTemplate) {
// return redisTemplate.opsForValue();
// }
//
// @Bean
// public ListOperations<String, Serializable> listOperations(RedisTemplate<String, Serializable> redisTemplate) {
// return redisTemplate.opsForList();
// }
//
// @Bean
// public SetOperations<String, Serializable> setOperations(RedisTemplate<String, Serializable> redisTemplate) {
// return redisTemplate.opsForSet();
// }
//
// @Bean
// public ZSetOperations<String, Serializable> zSetOperations(RedisTemplate<String, Serializable> redisTemplate) {
// return redisTemplate.opsForZSet();
// }
}

User.java

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
@Data
@EqualsAndHashCode(callSuper = false)
@Accessors(chain = true)
@ApiModel(value="User对象", description="")
public class User implements Serializable {

private static final long serialVersionUID = 1L;

@TableId(value = "id", type = IdType.AUTO)
private Long id;

@ApiModelProperty(value = "姓名")
private String name;

@ApiModelProperty(value = "年龄")
private Integer age;

@ApiModelProperty(value = "邮箱")
private String email;

public User(String name) {
this.name = name;
}

/**
* 因为会在List中判断user是否存在, 所以需要重写equals和hashCode方法
* @param o
* @return
*/
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
User user = (User) o;
return Objects.equals(name, user.name);
}

@Override
public int hashCode() {
return Objects.hash(name);
}
}

HashUtils.java

1
2
3
4
5
6
public class HashUtils {

public static int hash(String data) {
return data.hashCode() & Integer.MAX_VALUE;
}
}

核心逻辑

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
/**
* <p>
* 服务实现类
* </p>
*
* @author 当我遇上你
* @since 2020-05-19
*/
@Slf4j
@Service
public class UserServiceImpl extends ServiceImpl<UserMapper, User> implements UserService {

@Autowired
private RedisTemplate redisTemplate;

/**
* 1. bitmap判断是否存在
* 2. 内存中数据是否重复
* 3. redis和mysql批量插入
* 4. 数据库中是否插入失败
* @param list
* @return
*/
@Override
public JSONObject importBatch(List<User> list) {
if (CollectionUtils.isEmpty(list)) {
throw new NullPointerException("数据为空");
}
CopyOnWriteArrayList<User> importFailList = new CopyOnWriteArrayList<>();
CopyOnWriteArrayList<User> importSuccessList = new CopyOnWriteArrayList<>();

list.stream().forEach(user -> {
Boolean exist = redisTemplate.opsForValue().getBit("user", HashUtils.hash(user.getName()));
if (exist) {
log.error("Redis中name={}的用户已存在", user.getName());
// 数据已存在,数据放入失败集合
importFailList.add(user);
return;
}
if (importSuccessList.contains(user)) {
log.error("内存中name={}的用户已存在", user.getName());
importFailList.add(user);
return;
}
importSuccessList.add(user);
});



if (!CollectionUtils.isEmpty(importSuccessList)) {
try {
// 批量插入数据库
this.saveBatch(importSuccessList);
} catch (Exception e) {
log.error("MySQL写入冲突:{}", e.getMessage());
Iterator<User> iterator = importSuccessList.iterator();
while (iterator.hasNext()) {
User user = iterator.next();
if (user.getId() == null) {
log.error("MySQL中name={}的用户已存在", user.getName());
importFailList.add(user);
importSuccessList.remove(user);
}
}
}
// 将导入成功的数据批量写入bitmap
redisTemplate.executePipelined(new RedisCallback<String>() {
@Override
public String doInRedis(RedisConnection redisConnection) throws DataAccessException {
importSuccessList.stream().forEach(user -> {
redisConnection.setBit("user".getBytes(), HashUtils.hash(user.getName()), true);
});
return null;
}
});
}

JSONObject result = new JSONObject();
result.put("success", importSuccessList);
result.put("failure", importFailList);
return result;
}

}

场景测试

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
@Slf4j
@SpringBootTest
class UserServiceImplTest {

@Autowired
UserService userService;

/**
* 模拟内存中存在重复数据
*
* 2020-05-19 15:18:10.468 ERROR 6612 --- [ main] c.i.d.m.mp.service.impl.UserServiceImpl : 内存中name=张三的用户已存在
* 2020-05-19 15:18:10.475 WARN 6612 --- [ main] c.i.d.m.mp.service.impl.UserServiceImpl : SqlSession [org.apache.ibatis.session.defaults.DefaultSqlSession@304e1e4e] was not registered for synchronization because DataSource is not transactional
* 2020-05-19 15:18:10.533 INFO 6612 --- [ main] com.alibaba.druid.pool.DruidDataSource : {dataSource-1} inited
* 2020-05-19 15:18:10.794 INFO 6612 --- [ main] c.i.d.m.m.s.impl.UserServiceImplTest : {"success":[{"id":1,"name":"张三"}],"failure":[{"name":"张三"}]}
*/
@Test
void importBatch1() {
User user1 = new User("张三");
User user2 = new User("张三");
List<User> userList = Arrays.asList(user1, user2);
JSONObject result = userService.importBatch(userList);
log.info(result.toJSONString());
}

/**
* 模拟Redis中存在重复数据
*
* 2020-05-19 15:18:40.700 ERROR 13352 --- [ main] c.i.d.m.mp.service.impl.UserServiceImpl : Redis中name=张三的用户已存在
* 2020-05-19 15:18:40.708 WARN 13352 --- [ main] c.i.d.m.mp.service.impl.UserServiceImpl : SqlSession [org.apache.ibatis.session.defaults.DefaultSqlSession@60251ddb] was not registered for synchronization because DataSource is not transactional
* 2020-05-19 15:18:40.768 INFO 13352 --- [ main] com.alibaba.druid.pool.DruidDataSource : {dataSource-1} inited
* 2020-05-19 15:18:41.043 INFO 13352 --- [ main] c.i.d.m.m.s.impl.UserServiceImplTest : {"success":[{"id":2,"name":"李四"}],"failure":[{"name":"张三"}]}
*/
@Test
void importBatch2() {
User user1 = new User("张三");
User user2 = new User("李四");
List<User> userList = Arrays.asList(user1, user2);
JSONObject result = userService.importBatch(userList);
log.info(result.toJSONString());
}

/**
* 手动在MySQL中添加1条数据, 模拟MySQL中存在重复数据
*
* 2020-05-19 15:19:22.337 ERROR 14128 --- [ main] c.i.d.m.mp.service.impl.UserServiceImpl : Redis中name=张三的用户已存在
* 2020-05-19 15:19:22.339 ERROR 14128 --- [ main] c.i.d.m.mp.service.impl.UserServiceImpl : Redis中name=李四的用户已存在
* 2020-05-19 15:19:22.347 WARN 14128 --- [ main] c.i.d.m.mp.service.impl.UserServiceImpl : SqlSession [org.apache.ibatis.session.defaults.DefaultSqlSession@69fe8c75] was not registered for synchronization because DataSource is not transactional
* 2020-05-19 15:19:22.405 INFO 14128 --- [ main] com.alibaba.druid.pool.DruidDataSource : {dataSource-1} inited
* 2020-05-19 15:19:22.609 ERROR 14128 --- [ main] c.i.d.m.mp.service.impl.UserServiceImpl : MySQL写入冲突:cn.idea360.demo.modules.mp.mapper.UserMapper.insert (batch index #1) failed. Cause: java.sql.BatchUpdateException: Duplicate entry '王五' for key 'name'
* ; Duplicate entry '王五' for key 'name'; nested exception is java.sql.BatchUpdateException: Duplicate entry '王五' for key 'name'
* 2020-05-19 15:19:22.609 ERROR 14128 --- [ main] c.i.d.m.mp.service.impl.UserServiceImpl : MySQL中name=王五的用户已存在
* 2020-05-19 15:19:22.697 INFO 14128 --- [ main] c.i.d.m.m.s.impl.UserServiceImplTest : {"success":[],"failure":[{"name":"张三"},{"name":"李四"},{"name":"王五"}]}
*/
@Test
void importBatch3() {
User user1 = new User("张三");
User user2 = new User("李四");
User user3 = new User("王五");
List<User> userList = Arrays.asList(user1, user2, user3);
JSONObject result = userService.importBatch(userList);
log.info(result.toJSONString());
}
}

最后

本文到此结束,感谢阅读。如果您觉得不错,请关注公众号【当我遇上你】支持一下。