初始化项目

master
曹世达 5 months ago
parent 3d461e1c69
commit 9cd4e9b6d8

38
.gitignore vendored

@ -0,0 +1,38 @@
target/
!.mvn/wrapper/maven-wrapper.jar
!**/src/main/**/target/
!**/src/test/**/target/
### IntelliJ IDEA ###
.idea/modules.xml
.idea/jarRepositories.xml
.idea/compiler.xml
.idea/libraries/
*.iws
*.iml
*.ipr
### Eclipse ###
.apt_generated
.classpath
.factorypath
.project
.settings
.springBeans
.sts4-cache
### NetBeans ###
/nbproject/private/
/nbbuild/
/dist/
/nbdist/
/.nb-gradle/
build/
!**/src/main/**/build/
!**/src/test/**/build/
### VS Code ###
.vscode/
### Mac OS ###
.DS_Store

@ -0,0 +1,31 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>space.caoshd</groupId>
<artifactId>text-tools</artifactId>
<version>1.0.0-SNAPSHOT</version>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>com.h2database</groupId>
<artifactId>h2</artifactId>
<version>2.2.224</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.13.2</version>
<scope>test</scope>
</dependency>
</dependencies>
</project>

@ -0,0 +1,15 @@
package space.caoshd.text_tools;
import space.caoshd.text_tools.model.Delimiter;
import space.caoshd.text_tools.service.DBAccessService;
import space.caoshd.text_tools.service.LoadFileToDBService;
public class Main {
public static void main(String[] args) {
String inputPath = "E:\\workspace\\text-tools\\src\\main\\resources\\data_more.csv";
LoadFileToDBService loadFileToDBService = new LoadFileToDBService();
DBAccessService dbAccessService = new DBAccessService();
loadFileToDBService.setDbAccessService(dbAccessService);
loadFileToDBService.load(inputPath, Delimiter.COMMA);
}
}

@ -0,0 +1,41 @@
package space.caoshd.text_tools.config;
import space.caoshd.text_tools.util.FileUtils;
public class ProjectConfig {
public static String getAndCreateWorkDir() {
String baseDir = getBaseDir();
String tempDir = "work";
return FileUtils.mkdir(baseDir, tempDir);
}
public static String getAndCreateMergeDir() {
String baseDir = getAndCreateWorkDir();
String tempDir = "merge";
return FileUtils.mkdir(baseDir, tempDir);
}
public static String getAndCreateSliceDir() {
String baseDir = getAndCreateWorkDir();
String tempDir = "slice";
return FileUtils.mkdir(baseDir, tempDir);
}
public static String getAndCreateSortDir() {
String baseDir = getAndCreateWorkDir();
String tempDir = "sort";
return FileUtils.mkdir(baseDir, tempDir);
}
public static String getAndCreateDataDir() {
String baseDir = getAndCreateWorkDir();
String tempDir = "data";
return FileUtils.mkdir(baseDir, tempDir);
}
public static String getBaseDir() {
return System.getenv("user.dir");
}
}

@ -0,0 +1,32 @@
package space.caoshd.text_tools.model;
public enum Delimiter {
ESCAPE("\\u001b", "\u001b"),
TAB("\\t", "\t"),
COMMA(",", ",");
/**
*
*/
private final String regex;
/**
*
*/
private final String literal;
Delimiter(String regex, String literal) {
this.regex = regex;
this.literal = literal;
}
public String getRegex() {
return regex;
}
public String getLiteral() {
return literal;
}
}

@ -0,0 +1,207 @@
package space.caoshd.text_tools.service;
import space.caoshd.text_tools.config.ProjectConfig;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
public class DBAccessService {
private String filename;
private boolean memory;
private Connection connection;
public Connection getConnection() {
return connection;
}
public void setConnection(Connection connection) {
this.connection = connection;
}
public boolean isMemory() {
return memory;
}
public void setMemory(boolean memory) {
this.memory = memory;
}
public String getFilename() {
return filename;
}
public void setFilename(String filename) {
this.filename = filename;
}
public void init() {
try {
String username = "root";
String password = "root";
String className = "org.h2.Driver";
String url = computeUrl();
Class.forName(className);
this.connection = DriverManager.getConnection(url, username, password);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
public void close() {
if (connection != null) {
try {
connection.close();
} catch (SQLException e) {
throw new RuntimeException(e);
}
}
}
private String computeUrl() {
String url;
if (memory) {
url = "jdbc:h2:mem:" + filename;
} else {
String dataDir = ProjectConfig.getAndCreateDataDir();
url = "jdbc:h2:file:" + dataDir + "/" + filename;
}
return url;
}
public void inert(String sql, List<String> params) {
try {
PreparedStatement preparedStatement = connection.prepareStatement(sql);
for (int i = 0; i < params.size(); i++) {
preparedStatement.setObject(i + 1, params.get(i));
}
preparedStatement.execute();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
public void inert(Connection connection, String sql, List<String> params) {
try {
PreparedStatement preparedStatement = connection.prepareStatement(sql);
for (int i = 0; i < params.size(); i++) {
preparedStatement.setObject(i + 1, params.get(i));
}
preparedStatement.execute();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
public void batchInert(String sql, List<List<String>> batchParams) {
try {
PreparedStatement preparedStatement = connection.prepareStatement(sql);
for (int j = 0, batchParamsSize = batchParams.size(); j < batchParamsSize; j++) {
List<?> batchParam = batchParams.get(j);
for (int i = 0; i < batchParam.size(); i++) {
preparedStatement.setObject(i + 1, batchParam.get(i));
}
preparedStatement.addBatch();//将sql语句打包到一个容器中
if (j % 500 == 0) {
preparedStatement.executeBatch();//将容器中的sql语句提交
preparedStatement.clearBatch();//清空容器,为下一次打包做准备
}
}
preparedStatement.executeBatch();
preparedStatement.clearBatch();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
public void execute(String sql) {
execute(sql, new ArrayList<>());
}
public void execute(String sql, List<String> params) {
try {
PreparedStatement preparedStatement = connection.prepareStatement(sql);
for (int i = 0; i < params.size(); i++) {
preparedStatement.setObject(i + 1, params.get(i));
}
preparedStatement.executeUpdate();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
public List<Map<String, String>> list(String sql) {
return list(sql, new ArrayList<>());
}
public List<Map<String, String>> list(String sql, List<?> params) {
try {
List<Map<String, String>> result = new ArrayList<>();
PreparedStatement preparedStatement = connection.prepareStatement(sql);
for (int i = 0; i < params.size(); i++) {
preparedStatement.setObject(i + 1, params.get(i));
}
ResultSet resultSet = preparedStatement.executeQuery();
ResultSetMetaData metaData = resultSet.getMetaData();
int columnCount = metaData.getColumnCount();
while (resultSet.next()) {
Map<String, String> row = new LinkedHashMap<>();
for (int i = 1; i <= columnCount; i++) {
String columnLabel = metaData.getColumnLabel(i);
String columnValue = resultSet.getString(i);
row.put(columnLabel.toUpperCase(), columnValue);
}
result.add(row);
}
return result;
} catch (Exception e) {
throw new RuntimeException(e);
}
}
public Map<String, String> one(String sql, List<?> params) {
try {
PreparedStatement preparedStatement = connection.prepareStatement(sql);
for (int i = 0; i < params.size(); i++) {
preparedStatement.setObject(i + 1, params.get(i));
}
ResultSet resultSet = preparedStatement.executeQuery();
if (!resultSet.next()) {
return null;
}
ResultSetMetaData metaData = resultSet.getMetaData();
int columnCount = metaData.getColumnCount();
Map<String, String> result = new HashMap<>();
for (int i = 1; i <= columnCount; i++) {
String columnLabel = metaData.getColumnLabel(i);
String columnValue = resultSet.getString(i);
result.put(columnLabel.toUpperCase(), columnValue);
}
return result;
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}

@ -0,0 +1,127 @@
package space.caoshd.text_tools.service;
import space.caoshd.text_tools.model.Delimiter;
import space.caoshd.text_tools.util.FileUtils;
import space.caoshd.text_tools.util.IdUtils;
import space.caoshd.text_tools.util.StrUtils;
import java.io.BufferedReader;
import java.util.List;
import java.util.Map;
public class LoadFileToDBService {
private DBAccessService dbAccessService;
public DBAccessService getDbAccessService() {
return dbAccessService;
}
public void setDbAccessService(DBAccessService dbAccessService) {
this.dbAccessService = dbAccessService;
}
public void load(String filePath, Delimiter delimiter) {
String firstLine = FileUtils.readFirstLine(filePath);
List<String> firstLineItems = StrUtils.split(firstLine, delimiter);
String filename = FileUtils.getBaseName(filePath);
String tableCreateSql = getTableCreateSql(filename, firstLineItems);
String tableImportSql = getTableImportSql(filename, firstLineItems);
String tableSortSql = getTableSortSql(filename, firstLineItems);
String taskId = IdUtils.generate();
String finalName = taskId + "_" + filename;
dbAccessService.setFilename(finalName);
dbAccessService.setMemory(true);
dbAccessService.init();
dbAccessService.execute(tableCreateSql);
// dbAccessService.batchInert(tableImportSql, lineItems);
BufferedReader reader = FileUtils.getReader(filePath);
String line;
while (null != (line = FileUtils.readOneLine(reader))) {
List<String> lineItems = StrUtils.split(line, delimiter);
dbAccessService.inert(tableImportSql, lineItems);
}
List<Map<String, String>> list = dbAccessService.list(tableSortSql);
for (Map<String, String> stringStringMap : list) {
for (Map.Entry<String, String> stringStringEntry : stringStringMap.entrySet()) {
String value = stringStringEntry.getValue();
System.out.print(value + ",");
}
System.out.println();
}
dbAccessService.close();
}
private String getTableSortSql(String filename, List<String> firstLineItems) {
StringBuilder sqlBuilder = new StringBuilder();
sqlBuilder.append("SELECT * FROM ");
sqlBuilder.append(filename);
sqlBuilder.append(" ORDER BY ");
int size = firstLineItems.size();
for (int i = 0; i < size; i++) {
sqlBuilder.append("COLUMN");
sqlBuilder.append(i + 1);
if (i != size - 1) {
sqlBuilder.append(", ");
}
}
return sqlBuilder.toString();
}
private String getTableImportSql(String filename, List<String> firstLineItems) {
StringBuilder sqlBuilder = new StringBuilder();
sqlBuilder.append("INSERT INTO ");
sqlBuilder.append(filename);
sqlBuilder.append(" VALUES (");
int size = firstLineItems.size();
for (int i = 0; i < size; i++) {
sqlBuilder.append("?");
if (i != size - 1) {
sqlBuilder.append(",");
}
}
sqlBuilder.append(")");
String tableCreateSql = sqlBuilder.toString();
System.out.println(sqlBuilder);
return tableCreateSql;
}
private static String getTableCreateSql(String filename, List<String> firstLineItems) {
StringBuilder sqlBuilder = new StringBuilder();
sqlBuilder.append("CREATE TABLE ");
sqlBuilder.append(filename);
sqlBuilder.append(" (\n");
int size = firstLineItems.size();
for (int i = 0; i < size; i++) {
sqlBuilder.append(" COLUMN");
sqlBuilder.append(i + 1);
sqlBuilder.append(" VARCHAR(100)");
if (i != size - 1) {
sqlBuilder.append(",");
}
sqlBuilder.append("\n");
}
sqlBuilder.append(")");
String tableCreateSql = sqlBuilder.toString();
System.out.println(sqlBuilder);
return tableCreateSql;
}
}

@ -0,0 +1,163 @@
package space.caoshd.text_tools.util;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.Closeable;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Reader;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
public class FileUtils {
public static long countLine(String filePath) {
try (BufferedReader reader = new BufferedReader(new FileReader(filePath))) {
long count = 0;
while (reader.readLine() != null) {
count++;
}
return count;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
public static BufferedReader getReader(String filePath) {
try {
return new BufferedReader(new FileReader(filePath));
} catch (FileNotFoundException e) {
throw new RuntimeException(e);
}
}
public static BufferedWriter getWriter(String filePath) {
return getWriter(filePath, true);
}
public static BufferedWriter getWriter(String filePath, boolean append) {
try {
return new BufferedWriter(new FileWriter(filePath, append));
} catch (IOException e) {
throw new RuntimeException(e);
}
}
public static String getFileName(String filePath) {
Path path = Paths.get(filePath);
return path.getFileName().toString();
}
public static String getBaseName(String filePath) {
String fileName = getFileName(filePath);
int dotIndex = fileName.lastIndexOf('.');
if (dotIndex > 0) {
return fileName.substring(0, dotIndex);
} else {
return fileName;
}
}
public static String getExtName(String filePath) {
int dotIndex = filePath.lastIndexOf('.');
if (dotIndex > 0 && dotIndex < filePath.length() - 1) {
return filePath.substring(dotIndex);
} else {
return "";
}
}
public static String getDirPath(String filePath, String childDir) {
File file = new File(filePath);
File dir = file.getParentFile();
if (childDir != null) {
return new File(dir, childDir).getAbsolutePath();
} else {
return dir.getAbsolutePath();
}
}
public static String readOneLine(BufferedReader reader) {
try {
return reader.readLine();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
public static List<String> readLines(String filePath) {
List<String> result = new ArrayList<>();
try (Reader reader = new FileReader(filePath);
BufferedReader bufferReader = new BufferedReader(reader)) {
String line;
while (null != (line = bufferReader.readLine())) {
result.add(line);
}
return result;
} catch (Exception e) {
throw new RuntimeException(e);
}
}
public static void writeLine(BufferedWriter writer, String line) {
try {
writer.write(line);
writer.newLine();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
public static void writeLines(String filePath, List<String> lines) {
BufferedWriter writer = getWriter(filePath);
for (String line : lines) {
writeLine(writer, line);
}
close(writer);
}
public static void close(Closeable cloneable) {
if (cloneable != null) {
try {
cloneable.close();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
public static void mkdir(String dirPath) {
mkdir(dirPath, null);
}
public static String mkdir(String dirPath, String childDirPath) {
File dirFile;
if (childDirPath == null) {
dirFile = new File(dirPath);
} else {
dirFile = new File(dirPath, childDirPath);
}
if (dirFile.isDirectory()) {
return dirFile.getAbsolutePath();
}
if (dirFile.mkdirs()) {
System.out.println("mkdir: " + dirFile.getAbsolutePath());
}
return dirFile.getAbsolutePath();
}
public static String readFirstLine(String filePath) {
BufferedReader reader = getReader(filePath);
String result = readOneLine(reader);
close(reader);
return result;
}
}

@ -0,0 +1,16 @@
package space.caoshd.text_tools.util;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Random;
public class IdUtils {
public static String generate() {
SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmmssSSS");
String currentTime = sdf.format(new Date());
Random random = new Random();
int randomNumber = random.nextInt(10000); // 生成0到9999之间的随机数
String randomNumberStr = String.format("%04d", randomNumber); // 格式化为四位数的字符串不足四位前面补0
return currentTime + randomNumberStr;
}
}

@ -0,0 +1,114 @@
package space.caoshd.text_tools.util;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Properties;
public class PropHelper {
private Map<String, String> propertyMap;
private Properties properties;
public PropHelper(InputStream stream) {
loadProperties(stream);
parseProperties();
}
public PropHelper(String filename) {
this(ResUtils.getClassPathFileStream(filename));
}
private void loadProperties(InputStream stream) {
Properties properties = new Properties();
try {
properties.load(stream);
this.properties = properties;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
public void parseProperties() {
Map<String, String> result = new HashMap<>();
properties.forEach((k, v) -> result.put((String) k, (String) v));
propertyMap = result;
}
public String getString(String key) {
return Optional.ofNullable(propertyMap.get(key)).orElse("");
}
public String getString(String key, String defaultValue) {
return Optional.ofNullable(propertyMap.get(key)).orElse(defaultValue);
}
public Integer getInt(String key, Integer defaultInt) {
String value = propertyMap.get(key);
if (value == null || value.trim().isEmpty()) {
return defaultInt;
}
return Integer.valueOf(propertyMap.get(key));
}
public Integer getInt(String key) {
return getInt(key, 0);
}
public Boolean getBoolean(String key, Boolean defaultBoolean) {
String value = propertyMap.get(key);
if (value == null || value.trim().isEmpty()) {
return defaultBoolean;
}
return Boolean.valueOf(value);
}
public Boolean getBoolean(String key) {
return getBoolean(key, false);
}
public Long getLong(String key, Long defaultLong) {
String value = propertyMap.get(key);
if (value == null || value.trim().isEmpty()) {
return defaultLong;
}
return Long.valueOf(propertyMap.get(key));
}
public Long getLong(String key) {
return getLong(key, 0L);
}
public List<String> getStringList(String key, List<String> defaultStringList) {
String values = propertyMap.get(key);
if (Objects.isNull(values)) {
return defaultStringList;
}
List<String> result = new ArrayList<>();
for (String value : values.split(",")) {
if (!value.trim().isEmpty()) {
result.add(value.trim());
}
}
return result;
}
public List<String> getStringList(String key) {
return getStringList(key, new ArrayList<>());
}
public Map<String, String> getPropertyMap() {
return propertyMap;
}
public Properties getProperties() {
return properties;
}
}

@ -0,0 +1,19 @@
package space.caoshd.text_tools.util;
import java.io.File;
import java.io.InputStream;
import java.net.URL;
public class ResUtils {
public static InputStream getClassPathFileStream(String filename) {
return ResUtils.class.getClassLoader().getResourceAsStream(filename);
}
public static File getClassPathFile(String filename) {
URL resource = ResUtils.class.getClassLoader().getResource(filename);
assert resource != null;
return new File(resource.getFile());
}
}

@ -0,0 +1,44 @@
package space.caoshd.text_tools.util;
import space.caoshd.text_tools.model.Delimiter;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
public class StrUtils {
public static List<String> split(String line) {
return Arrays.asList(line.split(Delimiter.COMMA.getRegex()));
}
public static List<String> split(String line, Delimiter delimiter) {
return Arrays.asList(line.split(delimiter.getRegex()));
}
public static List<List<String>> splitAll(List<String> lines) {
return splitAll(lines, Delimiter.COMMA);
}
public static List<List<String>> splitAll(List<String> lines, Delimiter delimiter) {
return lines.stream().map(line -> StrUtils.split(line, delimiter)).collect(Collectors.toList());
}
public static String join(List<String> lineItem, String delimiter) {
return String.join(delimiter, lineItem);
}
public static List<String> joinAll(List<List<String>> lineItems, String delimiter) {
return lineItems.stream().map(lineItem -> StrUtils.join(lineItem, delimiter)).collect(Collectors.toList());
}
public static Double parseDouble(String str) {
try {
return Double.parseDouble(str);
} catch (NumberFormatException e) {
return null;
}
}
}

@ -0,0 +1,14 @@
5,A,C
1,F,C
4,A,C
1,d,C
6,A,C
3,A,C
1,A,C
8,A,C
1,E,C
5,A,C
1,A,E
2,A,C
1,A,C
9,A,C
1 5 A C
2 1 F C
3 4 A C
4 1 d C
5 6 A C
6 3 A C
7 1 A C
8 8 A C
9 1 E C
10 5 A C
11 1 A E
12 2 A C
13 1 A C
14 9 A C

File diff suppressed because it is too large Load Diff

@ -0,0 +1,47 @@
package space.caoshd.text_tools.slice;
import org.junit.Test;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Random;
public class SlicerTest {
@Test
public void execute() {
String CHARACTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
int CHARACTER_COUNT = CHARACTERS.length();
Random RANDOM = new Random();
long numberOfRows = 1000000;
String fileName = "data_more.csv";
try (BufferedWriter writer = new BufferedWriter(new FileWriter(fileName))) {
for (int i = 0; i < numberOfRows; i++) {
for (int j = 0; j < 10; j++) {
// Decide if we want a number or a character
if (RANDOM.nextBoolean()) {
// Generate a random number between 0 and 999
int randomNumber = RANDOM.nextInt(1000);
writer.write(String.valueOf(randomNumber));
} else {
// Generate a random character from the CHARACTERS string
char randomChar = CHARACTERS.charAt(RANDOM.nextInt(CHARACTER_COUNT));
writer.write(String.valueOf(randomChar));
}
// Add a comma if it's not the last column
if (j < 9) {
writer.write(",");
}
}
// Move to the next line
writer.newLine();
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
Loading…
Cancel
Save