优化文件内容检索

master
曹世达 5 months ago
parent 9cd4e9b6d8
commit bbe877f386

@ -15,6 +15,17 @@
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.groovy</groupId>
<artifactId>groovy</artifactId>
<version>4.0.21</version>
</dependency>
<dependency>
<groupId>com.ibm.icu</groupId>
<artifactId>icu4j</artifactId>
<version>59.1</version>
</dependency>
<dependency>
<groupId>com.h2database</groupId>
<artifactId>h2</artifactId>

@ -6,7 +6,11 @@ import space.caoshd.text_tools.service.LoadFileToDBService;
public class Main {
public static void main(String[] args) {
String inputPath = "E:\\workspace\\text-tools\\src\\main\\resources\\data_more.csv";
// h2Sort();
}
static void h2Sort() {
String inputPath = "d:\\workspace\\text-tools\\data_more.csv";
LoadFileToDBService loadFileToDBService = new LoadFileToDBService();
DBAccessService dbAccessService = new DBAccessService();
loadFileToDBService.setDbAccessService(dbAccessService);

@ -0,0 +1,47 @@
package space.caoshd.text_tools.config
import space.caoshd.text_tools.util.FileUtils
class GlobalConfig {
static String createWorkDir() {
String baseDir = getBaseDir()
String tempDir = "work"
return FileUtils.mkdir(baseDir, tempDir)
}
static String createSearchDir() {
String baseDir = createWorkDir()
String tempDir = "search"
return FileUtils.mkdir(baseDir, tempDir)
}
static String createMergeDir() {
String baseDir = createWorkDir()
String tempDir = "merge"
return FileUtils.mkdir(baseDir, tempDir)
}
static String createSliceDir() {
String baseDir = createWorkDir()
String tempDir = "slice"
return FileUtils.mkdir(baseDir, tempDir)
}
static String createSortDir() {
String baseDir = createWorkDir()
String tempDir = "sort"
return FileUtils.mkdir(baseDir, tempDir)
}
static String createDataDir() {
String baseDir = createWorkDir()
String tempDir = "data"
return FileUtils.mkdir(baseDir, tempDir)
}
static String getBaseDir() {
return System.getProperty("user.dir")
}
}

@ -1,41 +0,0 @@
package space.caoshd.text_tools.config;
import space.caoshd.text_tools.util.FileUtils;
public class ProjectConfig {
public static String getAndCreateWorkDir() {
String baseDir = getBaseDir();
String tempDir = "work";
return FileUtils.mkdir(baseDir, tempDir);
}
public static String getAndCreateMergeDir() {
String baseDir = getAndCreateWorkDir();
String tempDir = "merge";
return FileUtils.mkdir(baseDir, tempDir);
}
public static String getAndCreateSliceDir() {
String baseDir = getAndCreateWorkDir();
String tempDir = "slice";
return FileUtils.mkdir(baseDir, tempDir);
}
public static String getAndCreateSortDir() {
String baseDir = getAndCreateWorkDir();
String tempDir = "sort";
return FileUtils.mkdir(baseDir, tempDir);
}
public static String getAndCreateDataDir() {
String baseDir = getAndCreateWorkDir();
String tempDir = "data";
return FileUtils.mkdir(baseDir, tempDir);
}
public static String getBaseDir() {
return System.getenv("user.dir");
}
}

@ -0,0 +1,12 @@
package space.caoshd.text_tools.model
class FileItem {
String relativePath
String absolutePath
String relativeDir
String absoluteDir
String name
String baseName
Map<String, Object> context = [:]
List<LineItem> lineItems = []
}

@ -0,0 +1,7 @@
package space.caoshd.text_tools.model
class LineItem {
String lineNum
String lineContent
String matchKeyword
}

@ -0,0 +1,14 @@
package space.caoshd.text_tools.model
import java.util.function.Predicate
class SearchOptions {
String basePath
String scanPath
String outputFile
String scanWorkDir
Predicate<String> predicate
List<String> includeFilenameSuffixes = []
List<String> excludeFilenameSuffixes = []
Boolean scanContent = Boolean.TRUE
}

@ -1,6 +1,6 @@
package space.caoshd.text_tools.service;
import space.caoshd.text_tools.config.ProjectConfig;
import space.caoshd.text_tools.config.GlobalConfig;
import java.sql.Connection;
import java.sql.DriverManager;
@ -9,6 +9,7 @@ import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
@ -74,7 +75,7 @@ public class DBAccessService {
if (memory) {
url = "jdbc:h2:mem:" + filename;
} else {
String dataDir = ProjectConfig.getAndCreateDataDir();
String dataDir = GlobalConfig.createDataDir();
url = "jdbc:h2:file:" + dataDir + "/" + filename;
}
return url;
@ -155,6 +156,7 @@ public class DBAccessService {
public List<Map<String, String>> list(String sql, List<?> params) {
try {
System.out.println(new Date());
List<Map<String, String>> result = new ArrayList<>();
PreparedStatement preparedStatement = connection.prepareStatement(sql);
for (int i = 0; i < params.size(); i++) {
@ -168,8 +170,11 @@ public class DBAccessService {
for (int i = 1; i <= columnCount; i++) {
String columnLabel = metaData.getColumnLabel(i);
String columnValue = resultSet.getString(i);
row.put(columnLabel.toUpperCase(), columnValue);
System.out.print(columnValue + ",");
// row.put(columnLabel.toUpperCase(), columnValue);
}
System.out.println(new Date());
result.add(row);
}
return result;

@ -0,0 +1,140 @@
package space.caoshd.text_tools.service
import groovy.io.FileType
import space.caoshd.text_tools.config.GlobalConfig
import space.caoshd.text_tools.model.FileItem
import space.caoshd.text_tools.model.LineItem
import space.caoshd.text_tools.model.SearchOptions
import space.caoshd.text_tools.util.FileUtils
import space.caoshd.text_tools.util.IdUtils
import java.util.stream.Collectors
class KeywordSearchService {
SearchOptions searchOptions
File scanConfigFile
List<FileItem> scanFileItems
void execute() {
def searchDir = GlobalConfig.createSearchDir()
def taskDir = IdUtils.generate()
def scanWorkDir = FileUtils.mkdir(searchDir, taskDir)
searchOptions.scanWorkDir = scanWorkDir
if (new File(searchOptions.scanPath).isFile()) {
scanConfigFile = new File(searchOptions.scanPath)
} else {
scanConfigFile = createScanConfigFile()
}
if (searchOptions.scanContent) {
scanFileByConfigList()
scanLineByFileItems()
outputByFileItems()
}
}
File createScanConfigFile() {
def result = new File(searchOptions.scanWorkDir, "ScanFilePath.txt")
result.withWriter { writer ->
new File(searchOptions.scanPath).eachFileRecurse(FileType.FILES, { file ->
if (isScanFile(file)) {
writer.write(file.absolutePath)
writer.newLine()
}
})
}
return result
}
boolean isScanFile(File file) {
def include = searchOptions.includeFilenameSuffixes.stream().allMatch { suffix -> return file.absolutePath.containsIgnoreCase(suffix)
}
if (!include) return false
def exclude = searchOptions.excludeFilenameSuffixes.stream().allMatch { suffix -> return file.absolutePath.containsIgnoreCase(suffix)
}
return !exclude
}
void scanFileByConfigList() {
List<FileItem> scanFileItems = []
scanConfigFile.eachLine { filePath -> scanFileItems.add(scanFile(new File(filePath)))
}
this.scanFileItems = scanFileItems
}
FileItem scanFile(File file) {
def result = new FileItem()
result.name = file.getName()
result.absolutePath = FileUtils.convertPathToLinux(file.absolutePath)
result.relativePath = FileUtils.computeRelativePath(searchOptions.basePath, file.absolutePath)
result.absoluteDir = FileUtils.convertPathToLinux(new File(result.absolutePath).getParent())
result.relativeDir = FileUtils.convertPathToLinux(new File(result.relativePath).getParent())
result.baseName = FileUtils.getBaseName(result.absolutePath)
return result
}
void scanLineByFileItems() {
def groupedByAbsoluteDirFileItems = scanFileItems.stream().collect(Collectors.groupingBy { FileItem fileItem -> fileItem.absoluteDir })
groupedByAbsoluteDirFileItems.each { absoluteDir, fileItems ->
def context = [:] as LinkedHashMap<String, Object>
fileItems.each { fileItem -> context.put("sibling", fileItem) }
fileItems.each { fileItem -> fileItem.context = context }
}
scanFileItems.each { fileItem ->
def absolutePath = fileItem.absolutePath
def file = new File(absolutePath)
// noinspection GroovyMissingReturnStatement
file.eachLine { line, lineNum ->
def lineItem = scanLine(line, lineNum)
if (lineItem) {
fileItem.lineItems.add(lineItem)
}
}
}
}
LineItem scanLine(String line, int lineNum) {
def predicate = searchOptions.predicate
if (!predicate.test(line)) {
return null
}
def result = new LineItem()
result.lineContent = line
result.lineNum = lineNum
return result
}
void outputByFileItems() {
scanFileItems.each { fileItem -> output(fileItem) }
}
void output(FileItem fileItem) {
def lineItems = fileItem.lineItems
if (lineItems.isEmpty()) {
return
}
def outputFile = new File(searchOptions.scanWorkDir, "detail.tsv")
outputFile.withWriter { writer ->
fileItem.lineItems.each { lineItem ->
def columns = [
fileItem.name,
lineItem.lineContent.replace("\t", "")
]
writer.write(columns.join("\t"))
writer.newLine()
}
}
}
}

@ -33,7 +33,7 @@ public class LoadFileToDBService {
String taskId = IdUtils.generate();
String finalName = taskId + "_" + filename;
dbAccessService.setFilename(finalName);
dbAccessService.setMemory(true);
dbAccessService.setMemory(false);
dbAccessService.init();
dbAccessService.execute(tableCreateSql);

@ -0,0 +1,167 @@
package space.caoshd.text_tools.util
import java.nio.file.Path
import java.nio.file.Paths
class FileUtils {
static String convertPathToLinux(String windowsPath) {
return windowsPath.replaceAll("\\\\", "/")
}
static String computeRelativePath(String basePath, String absolutePath) {
basePath = convertPathToLinux(basePath)
absolutePath = convertPathToLinux(absolutePath)
def filePath = absolutePath.replace(basePath, "")
if (filePath.startsWith("/")) {
filePath = filePath.substring(1)
}
filePath
}
static long countLine(String filePath) {
try (BufferedReader reader = new BufferedReader(new FileReader(filePath))) {
long count = 0
while (reader.readLine() != null) {
count++
}
return count
} catch (IOException e) {
throw new RuntimeException(e)
}
}
static BufferedReader getReader(String filePath) {
try {
return new BufferedReader(new FileReader(filePath))
} catch (FileNotFoundException e) {
throw new RuntimeException(e)
}
}
static BufferedWriter getWriter(String filePath) {
return getWriter(filePath, true)
}
static BufferedWriter getWriter(String filePath, boolean append) {
try {
return new BufferedWriter(new FileWriter(filePath, append))
} catch (IOException e) {
throw new RuntimeException(e)
}
}
static String getFileName(String filePath) {
Path path = Paths.get(filePath)
return path.getFileName().toString()
}
static String getBaseName(String filePath) {
String fileName = getFileName(filePath)
int dotIndex = fileName.lastIndexOf('.')
if (dotIndex > 0) {
return fileName.substring(0, dotIndex)
} else {
return fileName
}
}
static String getExtName(String filePath) {
int dotIndex = filePath.lastIndexOf('.')
if (dotIndex > 0 && dotIndex < filePath.length() - 1) {
return filePath.substring(dotIndex)
} else {
return ""
}
}
static String getDirPath(String filePath, String childDir) {
File file = new File(filePath)
File dir = file.getParentFile()
if (childDir != null) {
return new File(dir, childDir).getAbsolutePath()
} else {
return dir.getAbsolutePath()
}
}
static String readOneLine(BufferedReader reader) {
try {
return reader.readLine()
} catch (IOException e) {
throw new RuntimeException(e)
}
}
static List<String> readLines(String filePath) {
List<String> result = new ArrayList<>()
try (Reader reader = new FileReader(filePath)
BufferedReader bufferReader = new BufferedReader(reader)) {
String line
while (null != (line = bufferReader.readLine())) {
result.add(line)
}
return result
} catch (Exception e) {
throw new RuntimeException(e)
}
}
static void writeLine(BufferedWriter writer, String line) {
try {
writer.write(line)
writer.newLine()
} catch (IOException e) {
throw new RuntimeException(e)
}
}
static void writeLines(String filePath, List<String> lines) {
BufferedWriter writer = getWriter(filePath)
for (String line : lines) {
writeLine(writer, line)
}
close(writer)
}
static void close(Closeable cloneable) {
if (cloneable != null) {
try {
cloneable.close()
} catch (IOException e) {
throw new RuntimeException(e)
}
}
}
static void mkdir(String dirPath) {
mkdir(dirPath, null)
}
static String mkdir(String dirPath, String childDirPath) {
File dirFile
if (childDirPath == null) {
dirFile = new File(dirPath)
} else {
dirFile = new File(dirPath, childDirPath)
}
if (dirFile.isDirectory()) {
return dirFile.getAbsolutePath()
}
if (dirFile.mkdirs()) {
System.out.println("mkdir: " + dirFile.getAbsolutePath())
}
return dirFile.getAbsolutePath()
}
static String readFirstLine(String filePath) {
BufferedReader reader = getReader(filePath)
String result = readOneLine(reader)
close(reader)
return result
}
}

@ -1,163 +0,0 @@
package space.caoshd.text_tools.util;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.Closeable;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Reader;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
public class FileUtils {
public static long countLine(String filePath) {
try (BufferedReader reader = new BufferedReader(new FileReader(filePath))) {
long count = 0;
while (reader.readLine() != null) {
count++;
}
return count;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
public static BufferedReader getReader(String filePath) {
try {
return new BufferedReader(new FileReader(filePath));
} catch (FileNotFoundException e) {
throw new RuntimeException(e);
}
}
public static BufferedWriter getWriter(String filePath) {
return getWriter(filePath, true);
}
public static BufferedWriter getWriter(String filePath, boolean append) {
try {
return new BufferedWriter(new FileWriter(filePath, append));
} catch (IOException e) {
throw new RuntimeException(e);
}
}
public static String getFileName(String filePath) {
Path path = Paths.get(filePath);
return path.getFileName().toString();
}
public static String getBaseName(String filePath) {
String fileName = getFileName(filePath);
int dotIndex = fileName.lastIndexOf('.');
if (dotIndex > 0) {
return fileName.substring(0, dotIndex);
} else {
return fileName;
}
}
public static String getExtName(String filePath) {
int dotIndex = filePath.lastIndexOf('.');
if (dotIndex > 0 && dotIndex < filePath.length() - 1) {
return filePath.substring(dotIndex);
} else {
return "";
}
}
public static String getDirPath(String filePath, String childDir) {
File file = new File(filePath);
File dir = file.getParentFile();
if (childDir != null) {
return new File(dir, childDir).getAbsolutePath();
} else {
return dir.getAbsolutePath();
}
}
public static String readOneLine(BufferedReader reader) {
try {
return reader.readLine();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
public static List<String> readLines(String filePath) {
List<String> result = new ArrayList<>();
try (Reader reader = new FileReader(filePath);
BufferedReader bufferReader = new BufferedReader(reader)) {
String line;
while (null != (line = bufferReader.readLine())) {
result.add(line);
}
return result;
} catch (Exception e) {
throw new RuntimeException(e);
}
}
public static void writeLine(BufferedWriter writer, String line) {
try {
writer.write(line);
writer.newLine();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
public static void writeLines(String filePath, List<String> lines) {
BufferedWriter writer = getWriter(filePath);
for (String line : lines) {
writeLine(writer, line);
}
close(writer);
}
public static void close(Closeable cloneable) {
if (cloneable != null) {
try {
cloneable.close();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
public static void mkdir(String dirPath) {
mkdir(dirPath, null);
}
public static String mkdir(String dirPath, String childDirPath) {
File dirFile;
if (childDirPath == null) {
dirFile = new File(dirPath);
} else {
dirFile = new File(dirPath, childDirPath);
}
if (dirFile.isDirectory()) {
return dirFile.getAbsolutePath();
}
if (dirFile.mkdirs()) {
System.out.println("mkdir: " + dirFile.getAbsolutePath());
}
return dirFile.getAbsolutePath();
}
public static String readFirstLine(String filePath) {
BufferedReader reader = getReader(filePath);
String result = readOneLine(reader);
close(reader);
return result;
}
}

@ -0,0 +1,12 @@
package space.caoshd.text_tools.util
import java.text.SimpleDateFormat
class IdUtils {
static String generate() {
FileUtils.convertPathToLinux("")
def currentTime = new SimpleDateFormat("yyyyMMddHHmmssSSS").format(new Date())
def randomNumberStr = String.format("%04d", new Random().nextInt(10000))
return currentTime + randomNumberStr
}
}

@ -1,16 +0,0 @@
package space.caoshd.text_tools.util;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Random;
public class IdUtils {
public static String generate() {
SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmmssSSS");
String currentTime = sdf.format(new Date());
Random random = new Random();
int randomNumber = random.nextInt(10000); // 生成0到9999之间的随机数
String randomNumberStr = String.format("%04d", randomNumber); // 格式化为四位数的字符串不足四位前面补0
return currentTime + randomNumberStr;
}
}

@ -0,0 +1,104 @@
package space.caoshd.text_tools.util
class PropHelper {
private Map<String, String> propertyMap
private Properties properties
PropHelper(InputStream stream) {
loadProperties(stream)
parseProperties()
}
PropHelper(String filename) {
this(ResUtils.getClassPathFileStream(filename))
}
private void loadProperties(InputStream stream) {
Properties properties = new Properties()
try {
properties.load(stream)
this.properties = properties
} catch (IOException e) {
throw new RuntimeException(e)
}
}
void parseProperties() {
Map<String, String> result = new HashMap<>()
properties.forEach((k, v) -> result.put((String) k, (String) v))
propertyMap = result
}
String getString(String key) {
return Optional.ofNullable(propertyMap.get(key)).orElse("")
}
String getString(String key, String defaultValue) {
return Optional.ofNullable(propertyMap.get(key)).orElse(defaultValue)
}
Integer getInt(String key, Integer defaultInt) {
String value = propertyMap.get(key)
if (value == null || value.trim().isEmpty()) {
return defaultInt
}
return Integer.valueOf(propertyMap.get(key))
}
Integer getInt(String key) {
return getInt(key, 0)
}
Boolean getBoolean(String key, Boolean defaultBoolean) {
String value = propertyMap.get(key)
if (value == null || value.trim().isEmpty()) {
return defaultBoolean
}
return Boolean.valueOf(value)
}
Boolean getBoolean(String key) {
return getBoolean(key, false)
}
Long getLong(String key, Long defaultLong) {
String value = propertyMap.get(key)
if (value == null || value.trim().isEmpty()) {
return defaultLong
}
return Long.valueOf(propertyMap.get(key))
}
Long getLong(String key) {
return getLong(key, 0L)
}
List<String> getStringList(String key, List<String> defaultStringList) {
String values = propertyMap.get(key)
if (Objects.isNull(values)) {
return defaultStringList
}
List<String> result = new ArrayList<>()
for (String value : values.split(",")) {
if (!value.trim().isEmpty()) {
result.add(value.trim())
}
}
return result
}
List<String> getStringList(String key) {
return getStringList(key, new ArrayList<>())
}
Map<String, String> getPropertyMap() {
return propertyMap
}
Properties getProperties() {
return properties
}
}

@ -1,114 +0,0 @@
package space.caoshd.text_tools.util;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Properties;
public class PropHelper {
private Map<String, String> propertyMap;
private Properties properties;
public PropHelper(InputStream stream) {
loadProperties(stream);
parseProperties();
}
public PropHelper(String filename) {
this(ResUtils.getClassPathFileStream(filename));
}
private void loadProperties(InputStream stream) {
Properties properties = new Properties();
try {
properties.load(stream);
this.properties = properties;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
public void parseProperties() {
Map<String, String> result = new HashMap<>();
properties.forEach((k, v) -> result.put((String) k, (String) v));
propertyMap = result;
}
public String getString(String key) {
return Optional.ofNullable(propertyMap.get(key)).orElse("");
}
public String getString(String key, String defaultValue) {
return Optional.ofNullable(propertyMap.get(key)).orElse(defaultValue);
}
public Integer getInt(String key, Integer defaultInt) {
String value = propertyMap.get(key);
if (value == null || value.trim().isEmpty()) {
return defaultInt;
}
return Integer.valueOf(propertyMap.get(key));
}
public Integer getInt(String key) {
return getInt(key, 0);
}
public Boolean getBoolean(String key, Boolean defaultBoolean) {
String value = propertyMap.get(key);
if (value == null || value.trim().isEmpty()) {
return defaultBoolean;
}
return Boolean.valueOf(value);
}
public Boolean getBoolean(String key) {
return getBoolean(key, false);
}
public Long getLong(String key, Long defaultLong) {
String value = propertyMap.get(key);
if (value == null || value.trim().isEmpty()) {
return defaultLong;
}
return Long.valueOf(propertyMap.get(key));
}
public Long getLong(String key) {
return getLong(key, 0L);
}
public List<String> getStringList(String key, List<String> defaultStringList) {
String values = propertyMap.get(key);
if (Objects.isNull(values)) {
return defaultStringList;
}
List<String> result = new ArrayList<>();
for (String value : values.split(",")) {
if (!value.trim().isEmpty()) {
result.add(value.trim());
}
}
return result;
}
public List<String> getStringList(String key) {
return getStringList(key, new ArrayList<>());
}
public Map<String, String> getPropertyMap() {
return propertyMap;
}
public Properties getProperties() {
return properties;
}
}

@ -0,0 +1,15 @@
package space.caoshd.text_tools.util
class ResUtils {
static InputStream getClassPathFileStream(String filename) {
return ResUtils.class.getClassLoader().getResourceAsStream(filename)
}
static File getClassPathFile(String filename) {
URL resource = ResUtils.class.getClassLoader().getResource(filename)
assert resource != null
return new File(resource.getFile())
}
}

@ -1,19 +0,0 @@
package space.caoshd.text_tools.util;
import java.io.File;
import java.io.InputStream;
import java.net.URL;
public class ResUtils {
public static InputStream getClassPathFileStream(String filename) {
return ResUtils.class.getClassLoader().getResourceAsStream(filename);
}
public static File getClassPathFile(String filename) {
URL resource = ResUtils.class.getClassLoader().getResource(filename);
assert resource != null;
return new File(resource.getFile());
}
}

@ -0,0 +1,41 @@
package space.caoshd.text_tools.util
import space.caoshd.text_tools.model.Delimiter
import java.util.stream.Collectors
class StrUtils {
static List<String> split(String line) {
return Arrays.asList(line.split(Delimiter.COMMA.getRegex()))
}
static List<String> split(String line, Delimiter delimiter) {
return Arrays.asList(line.split(delimiter.getRegex()))
}
static List<List<String>> splitAll(List<String> lines) {
return splitAll(lines, Delimiter.COMMA)
}
static List<List<String>> splitAll(List<String> lines, Delimiter delimiter) {
return lines.stream().map(line -> split(line, delimiter)).collect(Collectors.toList())
}
static String join(List<String> lineItem, String delimiter) {
return String.join(delimiter, lineItem)
}
static List<String> joinAll(List<List<String>> lineItems, String delimiter) {
return lineItems.stream().map(lineItem -> join(lineItem, delimiter)).collect(Collectors.toList())
}
static Double parseDouble(String str) {
try {
return Double.parseDouble(str)
} catch (NumberFormatException ignored) {
return null
}
}
}

@ -1,44 +0,0 @@
package space.caoshd.text_tools.util;
import space.caoshd.text_tools.model.Delimiter;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
public class StrUtils {
public static List<String> split(String line) {
return Arrays.asList(line.split(Delimiter.COMMA.getRegex()));
}
public static List<String> split(String line, Delimiter delimiter) {
return Arrays.asList(line.split(delimiter.getRegex()));
}
public static List<List<String>> splitAll(List<String> lines) {
return splitAll(lines, Delimiter.COMMA);
}
public static List<List<String>> splitAll(List<String> lines, Delimiter delimiter) {
return lines.stream().map(line -> StrUtils.split(line, delimiter)).collect(Collectors.toList());
}
public static String join(List<String> lineItem, String delimiter) {
return String.join(delimiter, lineItem);
}
public static List<String> joinAll(List<List<String>> lineItems, String delimiter) {
return lineItems.stream().map(lineItem -> StrUtils.join(lineItem, delimiter)).collect(Collectors.toList());
}
public static Double parseDouble(String str) {
try {
return Double.parseDouble(str);
} catch (NumberFormatException e) {
return null;
}
}
}

@ -0,0 +1,70 @@
package space.caoshd.text_tools.service
import com.ibm.icu.text.CharsetDetector
import com.ibm.icu.text.CharsetMatch
import java.nio.file.Files
import java.nio.file.Path
import java.nio.file.Paths
class EncodingCheck {
static void main(String[] args) {
// //
// def filePath = "C:\\Users\\caoshd\\Desktop\\经纬度.txt"
// String encoding = null
// try {
// Path path = Paths.get(filePath)
// byte[] data = Files.readAllBytes(path)
// CharsetDetector charsetDetector = new CharsetDetector()
// charsetDetector.setText(data)
// CharsetMatch charsetMatch = charsetDetector.detect()
// encoding = charsetMatch.getName()
// println encoding
// } catch (IOException e) {
// //
// }
getFileCharsetByICU4J(new File("C:\\\\Users\\\\caoshd\\\\Desktop\\\\经纬度.txt"))
}
// static void getFileEncode(File file){
// String charsetName = null
// try{
// CodepageDetectorProxy detector = CodepageDetectorProxy.getInstance()
// detector.add(new ParsingDetector(false))
// detector.add(new ByteOrderMarkDetector())
// detector.add(JChardetFacade.getInstance())
// detector.add(ASCIIDetector.getInstance())
// detector.add(UnicodeDetector.getInstance())
// java.nio.charset.Charset charset = detector.detectCodepage(file.toURL())
// if (charset != null) {
// charsetName = charset.name()
// }else{
// charsetName = "GB2312"
// }
// }catch (Exception e){
// e.printStackTrace()
// }
// System.out.println("charsetName====="+charsetName)
// }
static void getFileCharsetByICU4J(File file) {
String encoding = null
try {
Path path = Paths.get(file.getPath())
byte[] data = Files.readAllBytes(path)
CharsetDetector detector = new CharsetDetector()
detector.setText(data)
CharsetMatch match = detector.detect()
if (match == null) {
encoding = "默认"
}
encoding = match.getName()
} catch (IOException var6) {
}
System.out.println(encoding)
}
}

@ -0,0 +1,29 @@
package space.caoshd.text_tools.service
import org.junit.Before
import org.junit.Test
import space.caoshd.text_tools.model.SearchOptions
class KeywordSearchServiceTest {
KeywordSearchService keywordSearchService
@Before
void setUp() {
def searchOptions = new SearchOptions()
searchOptions.includeFilenameSuffixes = [".java"]
searchOptions.excludeFilenameSuffixes = ["Test.java"]
searchOptions.scanPath = "D:\\workspace\\text-tools"
searchOptions.basePath = "D:\\workspace\\text-tools"
searchOptions.predicate = { String line -> line.trim().startsWith("import") }
keywordSearchService = new KeywordSearchService()
keywordSearchService.searchOptions = searchOptions
}
@Test
void execute() {
keywordSearchService.execute()
}
}

@ -15,7 +15,7 @@ public class SlicerTest {
String CHARACTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
int CHARACTER_COUNT = CHARACTERS.length();
Random RANDOM = new Random();
long numberOfRows = 1000000;
long numberOfRows = 10000000*3;
String fileName = "data_more.csv";
try (BufferedWriter writer = new BufferedWriter(new FileWriter(fileName))) {
for (int i = 0; i < numberOfRows; i++) {

@ -0,0 +1,4 @@
D:\workspace\text-tools\src\main\java\space\caoshd\text_tools\Main.java
D:\workspace\text-tools\src\main\java\space\caoshd\text_tools\model\Delimiter.java
D:\workspace\text-tools\src\main\java\space\caoshd\text_tools\service\DBAccessService.java
D:\workspace\text-tools\src\main\java\space\caoshd\text_tools\service\LoadFileToDBService.java
Loading…
Cancel
Save