diff --git a/pom.xml b/pom.xml index 4ad1e43..b8e01a1 100644 --- a/pom.xml +++ b/pom.xml @@ -15,6 +15,17 @@ UTF-8 + + org.apache.groovy + groovy + 4.0.21 + + + com.ibm.icu + icu4j + 59.1 + + com.h2database h2 diff --git a/src/main/java/space/caoshd/text_tools/Main.java b/src/main/java/space/caoshd/text_tools/Main.java index ecc6ee1..05f43af 100644 --- a/src/main/java/space/caoshd/text_tools/Main.java +++ b/src/main/java/space/caoshd/text_tools/Main.java @@ -6,7 +6,11 @@ import space.caoshd.text_tools.service.LoadFileToDBService; public class Main { public static void main(String[] args) { - String inputPath = "E:\\workspace\\text-tools\\src\\main\\resources\\data_more.csv"; + // h2Sort(); + } + + static void h2Sort() { + String inputPath = "d:\\workspace\\text-tools\\data_more.csv"; LoadFileToDBService loadFileToDBService = new LoadFileToDBService(); DBAccessService dbAccessService = new DBAccessService(); loadFileToDBService.setDbAccessService(dbAccessService); diff --git a/src/main/java/space/caoshd/text_tools/config/GlobalConfig.groovy b/src/main/java/space/caoshd/text_tools/config/GlobalConfig.groovy new file mode 100644 index 0000000..54bb7c8 --- /dev/null +++ b/src/main/java/space/caoshd/text_tools/config/GlobalConfig.groovy @@ -0,0 +1,47 @@ +package space.caoshd.text_tools.config + +import space.caoshd.text_tools.util.FileUtils + +class GlobalConfig { + + static String createWorkDir() { + String baseDir = getBaseDir() + String tempDir = "work" + return FileUtils.mkdir(baseDir, tempDir) + } + + static String createSearchDir() { + String baseDir = createWorkDir() + String tempDir = "search" + return FileUtils.mkdir(baseDir, tempDir) + } + + static String createMergeDir() { + String baseDir = createWorkDir() + String tempDir = "merge" + return FileUtils.mkdir(baseDir, tempDir) + } + + static String createSliceDir() { + String baseDir = createWorkDir() + String tempDir = "slice" + return FileUtils.mkdir(baseDir, tempDir) + } + + static String createSortDir() { + String baseDir = createWorkDir() + String tempDir = "sort" + return FileUtils.mkdir(baseDir, tempDir) + } + + static String createDataDir() { + String baseDir = createWorkDir() + String tempDir = "data" + return FileUtils.mkdir(baseDir, tempDir) + } + + static String getBaseDir() { + return System.getProperty("user.dir") + } + +} diff --git a/src/main/java/space/caoshd/text_tools/config/ProjectConfig.java b/src/main/java/space/caoshd/text_tools/config/ProjectConfig.java deleted file mode 100644 index fffb8ff..0000000 --- a/src/main/java/space/caoshd/text_tools/config/ProjectConfig.java +++ /dev/null @@ -1,41 +0,0 @@ -package space.caoshd.text_tools.config; - -import space.caoshd.text_tools.util.FileUtils; - -public class ProjectConfig { - - public static String getAndCreateWorkDir() { - String baseDir = getBaseDir(); - String tempDir = "work"; - return FileUtils.mkdir(baseDir, tempDir); - } - - public static String getAndCreateMergeDir() { - String baseDir = getAndCreateWorkDir(); - String tempDir = "merge"; - return FileUtils.mkdir(baseDir, tempDir); - } - - public static String getAndCreateSliceDir() { - String baseDir = getAndCreateWorkDir(); - String tempDir = "slice"; - return FileUtils.mkdir(baseDir, tempDir); - } - - public static String getAndCreateSortDir() { - String baseDir = getAndCreateWorkDir(); - String tempDir = "sort"; - return FileUtils.mkdir(baseDir, tempDir); - } - - public static String getAndCreateDataDir() { - String baseDir = getAndCreateWorkDir(); - String tempDir = "data"; - return FileUtils.mkdir(baseDir, tempDir); - } - - public static String getBaseDir() { - return System.getenv("user.dir"); - } - -} diff --git a/src/main/java/space/caoshd/text_tools/model/FileItem.groovy b/src/main/java/space/caoshd/text_tools/model/FileItem.groovy new file mode 100644 index 0000000..2ae3c22 --- /dev/null +++ b/src/main/java/space/caoshd/text_tools/model/FileItem.groovy @@ -0,0 +1,12 @@ +package space.caoshd.text_tools.model + +class FileItem { + String relativePath + String absolutePath + String relativeDir + String absoluteDir + String name + String baseName + Map context = [:] + List lineItems = [] +} diff --git a/src/main/java/space/caoshd/text_tools/model/LineItem.groovy b/src/main/java/space/caoshd/text_tools/model/LineItem.groovy new file mode 100644 index 0000000..f296058 --- /dev/null +++ b/src/main/java/space/caoshd/text_tools/model/LineItem.groovy @@ -0,0 +1,7 @@ +package space.caoshd.text_tools.model + +class LineItem { + String lineNum + String lineContent + String matchKeyword +} diff --git a/src/main/java/space/caoshd/text_tools/model/SearchOptions.groovy b/src/main/java/space/caoshd/text_tools/model/SearchOptions.groovy new file mode 100644 index 0000000..7620acf --- /dev/null +++ b/src/main/java/space/caoshd/text_tools/model/SearchOptions.groovy @@ -0,0 +1,14 @@ +package space.caoshd.text_tools.model + +import java.util.function.Predicate + +class SearchOptions { + String basePath + String scanPath + String outputFile + String scanWorkDir + Predicate predicate + List includeFilenameSuffixes = [] + List excludeFilenameSuffixes = [] + Boolean scanContent = Boolean.TRUE +} diff --git a/src/main/java/space/caoshd/text_tools/service/DBAccessService.java b/src/main/java/space/caoshd/text_tools/service/DBAccessService.java index 2481a8b..57a305b 100644 --- a/src/main/java/space/caoshd/text_tools/service/DBAccessService.java +++ b/src/main/java/space/caoshd/text_tools/service/DBAccessService.java @@ -1,6 +1,6 @@ package space.caoshd.text_tools.service; -import space.caoshd.text_tools.config.ProjectConfig; +import space.caoshd.text_tools.config.GlobalConfig; import java.sql.Connection; import java.sql.DriverManager; @@ -9,6 +9,7 @@ import java.sql.ResultSet; import java.sql.ResultSetMetaData; import java.sql.SQLException; import java.util.ArrayList; +import java.util.Date; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; @@ -74,7 +75,7 @@ public class DBAccessService { if (memory) { url = "jdbc:h2:mem:" + filename; } else { - String dataDir = ProjectConfig.getAndCreateDataDir(); + String dataDir = GlobalConfig.createDataDir(); url = "jdbc:h2:file:" + dataDir + "/" + filename; } return url; @@ -155,6 +156,7 @@ public class DBAccessService { public List> list(String sql, List params) { try { + System.out.println(new Date()); List> result = new ArrayList<>(); PreparedStatement preparedStatement = connection.prepareStatement(sql); for (int i = 0; i < params.size(); i++) { @@ -168,8 +170,11 @@ public class DBAccessService { for (int i = 1; i <= columnCount; i++) { String columnLabel = metaData.getColumnLabel(i); String columnValue = resultSet.getString(i); - row.put(columnLabel.toUpperCase(), columnValue); + System.out.print(columnValue + ","); + // row.put(columnLabel.toUpperCase(), columnValue); } + System.out.println(new Date()); + result.add(row); } return result; diff --git a/src/main/java/space/caoshd/text_tools/service/KeywordSearchService.groovy b/src/main/java/space/caoshd/text_tools/service/KeywordSearchService.groovy new file mode 100644 index 0000000..eb950bb --- /dev/null +++ b/src/main/java/space/caoshd/text_tools/service/KeywordSearchService.groovy @@ -0,0 +1,140 @@ +package space.caoshd.text_tools.service + +import groovy.io.FileType +import space.caoshd.text_tools.config.GlobalConfig +import space.caoshd.text_tools.model.FileItem +import space.caoshd.text_tools.model.LineItem +import space.caoshd.text_tools.model.SearchOptions +import space.caoshd.text_tools.util.FileUtils +import space.caoshd.text_tools.util.IdUtils + +import java.util.stream.Collectors + +class KeywordSearchService { + + SearchOptions searchOptions + + File scanConfigFile + + List scanFileItems + + void execute() { + def searchDir = GlobalConfig.createSearchDir() + def taskDir = IdUtils.generate() + def scanWorkDir = FileUtils.mkdir(searchDir, taskDir) + searchOptions.scanWorkDir = scanWorkDir + + if (new File(searchOptions.scanPath).isFile()) { + scanConfigFile = new File(searchOptions.scanPath) + } else { + scanConfigFile = createScanConfigFile() + } + + if (searchOptions.scanContent) { + scanFileByConfigList() + scanLineByFileItems() + outputByFileItems() + } + } + + File createScanConfigFile() { + def result = new File(searchOptions.scanWorkDir, "ScanFilePath.txt") + result.withWriter { writer -> + new File(searchOptions.scanPath).eachFileRecurse(FileType.FILES, { file -> + if (isScanFile(file)) { + writer.write(file.absolutePath) + writer.newLine() + } + }) + } + + return result + } + + boolean isScanFile(File file) { + def include = searchOptions.includeFilenameSuffixes.stream().allMatch { suffix -> return file.absolutePath.containsIgnoreCase(suffix) + } + if (!include) return false + + def exclude = searchOptions.excludeFilenameSuffixes.stream().allMatch { suffix -> return file.absolutePath.containsIgnoreCase(suffix) + } + return !exclude + } + + void scanFileByConfigList() { + List scanFileItems = [] + scanConfigFile.eachLine { filePath -> scanFileItems.add(scanFile(new File(filePath))) + } + this.scanFileItems = scanFileItems + } + + FileItem scanFile(File file) { + def result = new FileItem() + result.name = file.getName() + result.absolutePath = FileUtils.convertPathToLinux(file.absolutePath) + result.relativePath = FileUtils.computeRelativePath(searchOptions.basePath, file.absolutePath) + result.absoluteDir = FileUtils.convertPathToLinux(new File(result.absolutePath).getParent()) + result.relativeDir = FileUtils.convertPathToLinux(new File(result.relativePath).getParent()) + result.baseName = FileUtils.getBaseName(result.absolutePath) + return result + } + + void scanLineByFileItems() { + def groupedByAbsoluteDirFileItems = scanFileItems.stream().collect(Collectors.groupingBy { FileItem fileItem -> fileItem.absoluteDir }) + + groupedByAbsoluteDirFileItems.each { absoluteDir, fileItems -> + def context = [:] as LinkedHashMap + fileItems.each { fileItem -> context.put("sibling", fileItem) } + fileItems.each { fileItem -> fileItem.context = context } + } + + scanFileItems.each { fileItem -> + def absolutePath = fileItem.absolutePath + def file = new File(absolutePath) + // noinspection GroovyMissingReturnStatement + file.eachLine { line, lineNum -> + def lineItem = scanLine(line, lineNum) + if (lineItem) { + fileItem.lineItems.add(lineItem) + } + } + } + } + + LineItem scanLine(String line, int lineNum) { + def predicate = searchOptions.predicate + + if (!predicate.test(line)) { + return null + } + + def result = new LineItem() + result.lineContent = line + result.lineNum = lineNum + return result + } + + void outputByFileItems() { + scanFileItems.each { fileItem -> output(fileItem) } + } + + void output(FileItem fileItem) { + def lineItems = fileItem.lineItems + if (lineItems.isEmpty()) { + return + } + + def outputFile = new File(searchOptions.scanWorkDir, "detail.tsv") + outputFile.withWriter { writer -> + fileItem.lineItems.each { lineItem -> + def columns = [ + fileItem.name, + lineItem.lineContent.replace("\t", "") + ] + writer.write(columns.join("\t")) + writer.newLine() + } + } + } + +} diff --git a/src/main/java/space/caoshd/text_tools/service/LoadFileToDBService.java b/src/main/java/space/caoshd/text_tools/service/LoadFileToDBService.java index 123ea25..add143e 100644 --- a/src/main/java/space/caoshd/text_tools/service/LoadFileToDBService.java +++ b/src/main/java/space/caoshd/text_tools/service/LoadFileToDBService.java @@ -33,7 +33,7 @@ public class LoadFileToDBService { String taskId = IdUtils.generate(); String finalName = taskId + "_" + filename; dbAccessService.setFilename(finalName); - dbAccessService.setMemory(true); + dbAccessService.setMemory(false); dbAccessService.init(); dbAccessService.execute(tableCreateSql); diff --git a/src/main/java/space/caoshd/text_tools/util/FileUtils.groovy b/src/main/java/space/caoshd/text_tools/util/FileUtils.groovy new file mode 100644 index 0000000..e5dcf54 --- /dev/null +++ b/src/main/java/space/caoshd/text_tools/util/FileUtils.groovy @@ -0,0 +1,167 @@ +package space.caoshd.text_tools.util + + +import java.nio.file.Path +import java.nio.file.Paths + +class FileUtils { + + static String convertPathToLinux(String windowsPath) { + return windowsPath.replaceAll("\\\\", "/") + } + + static String computeRelativePath(String basePath, String absolutePath) { + basePath = convertPathToLinux(basePath) + absolutePath = convertPathToLinux(absolutePath) + def filePath = absolutePath.replace(basePath, "") + if (filePath.startsWith("/")) { + filePath = filePath.substring(1) + } + filePath + } + + static long countLine(String filePath) { + try (BufferedReader reader = new BufferedReader(new FileReader(filePath))) { + long count = 0 + while (reader.readLine() != null) { + count++ + } + return count + } catch (IOException e) { + throw new RuntimeException(e) + } + } + + static BufferedReader getReader(String filePath) { + try { + return new BufferedReader(new FileReader(filePath)) + } catch (FileNotFoundException e) { + throw new RuntimeException(e) + } + } + + static BufferedWriter getWriter(String filePath) { + return getWriter(filePath, true) + } + + static BufferedWriter getWriter(String filePath, boolean append) { + try { + return new BufferedWriter(new FileWriter(filePath, append)) + } catch (IOException e) { + throw new RuntimeException(e) + } + } + + static String getFileName(String filePath) { + Path path = Paths.get(filePath) + return path.getFileName().toString() + } + + static String getBaseName(String filePath) { + String fileName = getFileName(filePath) + int dotIndex = fileName.lastIndexOf('.') + if (dotIndex > 0) { + return fileName.substring(0, dotIndex) + } else { + return fileName + } + } + + static String getExtName(String filePath) { + int dotIndex = filePath.lastIndexOf('.') + if (dotIndex > 0 && dotIndex < filePath.length() - 1) { + return filePath.substring(dotIndex) + } else { + return "" + } + } + + static String getDirPath(String filePath, String childDir) { + File file = new File(filePath) + File dir = file.getParentFile() + if (childDir != null) { + return new File(dir, childDir).getAbsolutePath() + } else { + return dir.getAbsolutePath() + } + } + + static String readOneLine(BufferedReader reader) { + try { + return reader.readLine() + } catch (IOException e) { + throw new RuntimeException(e) + } + } + + static List readLines(String filePath) { + List result = new ArrayList<>() + try (Reader reader = new FileReader(filePath) + BufferedReader bufferReader = new BufferedReader(reader)) { + String line + while (null != (line = bufferReader.readLine())) { + result.add(line) + } + return result + } catch (Exception e) { + throw new RuntimeException(e) + } + } + + static void writeLine(BufferedWriter writer, String line) { + try { + writer.write(line) + writer.newLine() + } catch (IOException e) { + throw new RuntimeException(e) + } + } + + static void writeLines(String filePath, List lines) { + BufferedWriter writer = getWriter(filePath) + for (String line : lines) { + writeLine(writer, line) + } + close(writer) + } + + static void close(Closeable cloneable) { + if (cloneable != null) { + try { + cloneable.close() + } catch (IOException e) { + throw new RuntimeException(e) + } + } + } + + static void mkdir(String dirPath) { + mkdir(dirPath, null) + } + + static String mkdir(String dirPath, String childDirPath) { + File dirFile + + if (childDirPath == null) { + dirFile = new File(dirPath) + } else { + dirFile = new File(dirPath, childDirPath) + } + + if (dirFile.isDirectory()) { + return dirFile.getAbsolutePath() + } + if (dirFile.mkdirs()) { + System.out.println("mkdir: " + dirFile.getAbsolutePath()) + } + return dirFile.getAbsolutePath() + } + + static String readFirstLine(String filePath) { + BufferedReader reader = getReader(filePath) + String result = readOneLine(reader) + close(reader) + return result + } + +} diff --git a/src/main/java/space/caoshd/text_tools/util/FileUtils.java b/src/main/java/space/caoshd/text_tools/util/FileUtils.java deleted file mode 100644 index 1e8aaae..0000000 --- a/src/main/java/space/caoshd/text_tools/util/FileUtils.java +++ /dev/null @@ -1,163 +0,0 @@ -package space.caoshd.text_tools.util; - -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.Closeable; -import java.io.File; -import java.io.FileNotFoundException; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import java.io.Reader; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.List; - -public class FileUtils { - - public static long countLine(String filePath) { - try (BufferedReader reader = new BufferedReader(new FileReader(filePath))) { - long count = 0; - - while (reader.readLine() != null) { - count++; - } - return count; - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - public static BufferedReader getReader(String filePath) { - try { - return new BufferedReader(new FileReader(filePath)); - } catch (FileNotFoundException e) { - throw new RuntimeException(e); - } - } - - public static BufferedWriter getWriter(String filePath) { - return getWriter(filePath, true); - } - - public static BufferedWriter getWriter(String filePath, boolean append) { - try { - return new BufferedWriter(new FileWriter(filePath, append)); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - public static String getFileName(String filePath) { - Path path = Paths.get(filePath); - return path.getFileName().toString(); - } - - public static String getBaseName(String filePath) { - String fileName = getFileName(filePath); - int dotIndex = fileName.lastIndexOf('.'); - if (dotIndex > 0) { - return fileName.substring(0, dotIndex); - } else { - return fileName; - } - } - - public static String getExtName(String filePath) { - int dotIndex = filePath.lastIndexOf('.'); - if (dotIndex > 0 && dotIndex < filePath.length() - 1) { - return filePath.substring(dotIndex); - } else { - return ""; - } - } - - public static String getDirPath(String filePath, String childDir) { - File file = new File(filePath); - File dir = file.getParentFile(); - if (childDir != null) { - return new File(dir, childDir).getAbsolutePath(); - } else { - return dir.getAbsolutePath(); - } - } - - public static String readOneLine(BufferedReader reader) { - try { - return reader.readLine(); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - public static List readLines(String filePath) { - List result = new ArrayList<>(); - try (Reader reader = new FileReader(filePath); - BufferedReader bufferReader = new BufferedReader(reader)) { - String line; - while (null != (line = bufferReader.readLine())) { - result.add(line); - } - return result; - } catch (Exception e) { - throw new RuntimeException(e); - } - } - - public static void writeLine(BufferedWriter writer, String line) { - try { - writer.write(line); - writer.newLine(); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - public static void writeLines(String filePath, List lines) { - BufferedWriter writer = getWriter(filePath); - for (String line : lines) { - writeLine(writer, line); - } - close(writer); - } - - public static void close(Closeable cloneable) { - if (cloneable != null) { - try { - cloneable.close(); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - } - - public static void mkdir(String dirPath) { - mkdir(dirPath, null); - } - - public static String mkdir(String dirPath, String childDirPath) { - File dirFile; - - if (childDirPath == null) { - dirFile = new File(dirPath); - } else { - dirFile = new File(dirPath, childDirPath); - } - - if (dirFile.isDirectory()) { - return dirFile.getAbsolutePath(); - } - if (dirFile.mkdirs()) { - System.out.println("mkdir: " + dirFile.getAbsolutePath()); - } - return dirFile.getAbsolutePath(); - } - - public static String readFirstLine(String filePath) { - BufferedReader reader = getReader(filePath); - String result = readOneLine(reader); - close(reader); - return result; - } -} diff --git a/src/main/java/space/caoshd/text_tools/util/IdUtils.groovy b/src/main/java/space/caoshd/text_tools/util/IdUtils.groovy new file mode 100644 index 0000000..ded964d --- /dev/null +++ b/src/main/java/space/caoshd/text_tools/util/IdUtils.groovy @@ -0,0 +1,12 @@ +package space.caoshd.text_tools.util + +import java.text.SimpleDateFormat + +class IdUtils { + static String generate() { + FileUtils.convertPathToLinux("") + def currentTime = new SimpleDateFormat("yyyyMMddHHmmssSSS").format(new Date()) + def randomNumberStr = String.format("%04d", new Random().nextInt(10000)) + return currentTime + randomNumberStr + } +} diff --git a/src/main/java/space/caoshd/text_tools/util/IdUtils.java b/src/main/java/space/caoshd/text_tools/util/IdUtils.java deleted file mode 100644 index ff9935b..0000000 --- a/src/main/java/space/caoshd/text_tools/util/IdUtils.java +++ /dev/null @@ -1,16 +0,0 @@ -package space.caoshd.text_tools.util; - -import java.text.SimpleDateFormat; -import java.util.Date; -import java.util.Random; - -public class IdUtils { - public static String generate() { - SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmmssSSS"); - String currentTime = sdf.format(new Date()); - Random random = new Random(); - int randomNumber = random.nextInt(10000); // 生成0到9999之间的随机数 - String randomNumberStr = String.format("%04d", randomNumber); // 格式化为四位数的字符串,不足四位前面补0 - return currentTime + randomNumberStr; - } -} diff --git a/src/main/java/space/caoshd/text_tools/util/PropHelper.groovy b/src/main/java/space/caoshd/text_tools/util/PropHelper.groovy new file mode 100644 index 0000000..d881b82 --- /dev/null +++ b/src/main/java/space/caoshd/text_tools/util/PropHelper.groovy @@ -0,0 +1,104 @@ +package space.caoshd.text_tools.util + +class PropHelper { + + private Map propertyMap + + private Properties properties + + PropHelper(InputStream stream) { + loadProperties(stream) + parseProperties() + } + + PropHelper(String filename) { + this(ResUtils.getClassPathFileStream(filename)) + } + + private void loadProperties(InputStream stream) { + Properties properties = new Properties() + try { + properties.load(stream) + this.properties = properties + } catch (IOException e) { + throw new RuntimeException(e) + } + } + + void parseProperties() { + Map result = new HashMap<>() + properties.forEach((k, v) -> result.put((String) k, (String) v)) + propertyMap = result + } + + String getString(String key) { + return Optional.ofNullable(propertyMap.get(key)).orElse("") + } + + String getString(String key, String defaultValue) { + return Optional.ofNullable(propertyMap.get(key)).orElse(defaultValue) + } + + Integer getInt(String key, Integer defaultInt) { + String value = propertyMap.get(key) + if (value == null || value.trim().isEmpty()) { + return defaultInt + } + return Integer.valueOf(propertyMap.get(key)) + } + + Integer getInt(String key) { + return getInt(key, 0) + } + + Boolean getBoolean(String key, Boolean defaultBoolean) { + String value = propertyMap.get(key) + if (value == null || value.trim().isEmpty()) { + return defaultBoolean + } + return Boolean.valueOf(value) + } + + Boolean getBoolean(String key) { + return getBoolean(key, false) + } + + Long getLong(String key, Long defaultLong) { + String value = propertyMap.get(key) + if (value == null || value.trim().isEmpty()) { + return defaultLong + } + return Long.valueOf(propertyMap.get(key)) + } + + Long getLong(String key) { + return getLong(key, 0L) + } + + List getStringList(String key, List defaultStringList) { + String values = propertyMap.get(key) + if (Objects.isNull(values)) { + return defaultStringList + } + List result = new ArrayList<>() + for (String value : values.split(",")) { + if (!value.trim().isEmpty()) { + result.add(value.trim()) + } + } + return result + } + + List getStringList(String key) { + return getStringList(key, new ArrayList<>()) + } + + Map getPropertyMap() { + return propertyMap + } + + Properties getProperties() { + return properties + } + +} diff --git a/src/main/java/space/caoshd/text_tools/util/PropHelper.java b/src/main/java/space/caoshd/text_tools/util/PropHelper.java deleted file mode 100644 index 09550f1..0000000 --- a/src/main/java/space/caoshd/text_tools/util/PropHelper.java +++ /dev/null @@ -1,114 +0,0 @@ -package space.caoshd.text_tools.util; - -import java.io.IOException; -import java.io.InputStream; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.Optional; -import java.util.Properties; - -public class PropHelper { - - private Map propertyMap; - - private Properties properties; - - public PropHelper(InputStream stream) { - loadProperties(stream); - parseProperties(); - } - - public PropHelper(String filename) { - this(ResUtils.getClassPathFileStream(filename)); - } - - private void loadProperties(InputStream stream) { - Properties properties = new Properties(); - try { - properties.load(stream); - this.properties = properties; - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - public void parseProperties() { - Map result = new HashMap<>(); - properties.forEach((k, v) -> result.put((String) k, (String) v)); - propertyMap = result; - } - - public String getString(String key) { - return Optional.ofNullable(propertyMap.get(key)).orElse(""); - } - - public String getString(String key, String defaultValue) { - return Optional.ofNullable(propertyMap.get(key)).orElse(defaultValue); - } - - public Integer getInt(String key, Integer defaultInt) { - String value = propertyMap.get(key); - if (value == null || value.trim().isEmpty()) { - return defaultInt; - } - return Integer.valueOf(propertyMap.get(key)); - } - - public Integer getInt(String key) { - return getInt(key, 0); - } - - public Boolean getBoolean(String key, Boolean defaultBoolean) { - String value = propertyMap.get(key); - if (value == null || value.trim().isEmpty()) { - return defaultBoolean; - } - return Boolean.valueOf(value); - } - - public Boolean getBoolean(String key) { - return getBoolean(key, false); - } - - public Long getLong(String key, Long defaultLong) { - String value = propertyMap.get(key); - if (value == null || value.trim().isEmpty()) { - return defaultLong; - } - return Long.valueOf(propertyMap.get(key)); - } - - public Long getLong(String key) { - return getLong(key, 0L); - } - - public List getStringList(String key, List defaultStringList) { - String values = propertyMap.get(key); - if (Objects.isNull(values)) { - return defaultStringList; - } - List result = new ArrayList<>(); - for (String value : values.split(",")) { - if (!value.trim().isEmpty()) { - result.add(value.trim()); - } - } - return result; - } - - public List getStringList(String key) { - return getStringList(key, new ArrayList<>()); - } - - public Map getPropertyMap() { - return propertyMap; - } - - public Properties getProperties() { - return properties; - } - -} diff --git a/src/main/java/space/caoshd/text_tools/util/ResUtils.groovy b/src/main/java/space/caoshd/text_tools/util/ResUtils.groovy new file mode 100644 index 0000000..23d7372 --- /dev/null +++ b/src/main/java/space/caoshd/text_tools/util/ResUtils.groovy @@ -0,0 +1,15 @@ +package space.caoshd.text_tools.util + +class ResUtils { + + static InputStream getClassPathFileStream(String filename) { + return ResUtils.class.getClassLoader().getResourceAsStream(filename) + } + + static File getClassPathFile(String filename) { + URL resource = ResUtils.class.getClassLoader().getResource(filename) + assert resource != null + return new File(resource.getFile()) + } + +} diff --git a/src/main/java/space/caoshd/text_tools/util/ResUtils.java b/src/main/java/space/caoshd/text_tools/util/ResUtils.java deleted file mode 100644 index 42ba4f6..0000000 --- a/src/main/java/space/caoshd/text_tools/util/ResUtils.java +++ /dev/null @@ -1,19 +0,0 @@ -package space.caoshd.text_tools.util; - -import java.io.File; -import java.io.InputStream; -import java.net.URL; - -public class ResUtils { - - public static InputStream getClassPathFileStream(String filename) { - return ResUtils.class.getClassLoader().getResourceAsStream(filename); - } - - public static File getClassPathFile(String filename) { - URL resource = ResUtils.class.getClassLoader().getResource(filename); - assert resource != null; - return new File(resource.getFile()); - } - -} diff --git a/src/main/java/space/caoshd/text_tools/util/StrUtils.groovy b/src/main/java/space/caoshd/text_tools/util/StrUtils.groovy new file mode 100644 index 0000000..2e28799 --- /dev/null +++ b/src/main/java/space/caoshd/text_tools/util/StrUtils.groovy @@ -0,0 +1,41 @@ +package space.caoshd.text_tools.util + +import space.caoshd.text_tools.model.Delimiter + +import java.util.stream.Collectors + +class StrUtils { + + static List split(String line) { + return Arrays.asList(line.split(Delimiter.COMMA.getRegex())) + } + + static List split(String line, Delimiter delimiter) { + return Arrays.asList(line.split(delimiter.getRegex())) + } + + static List> splitAll(List lines) { + return splitAll(lines, Delimiter.COMMA) + } + + static List> splitAll(List lines, Delimiter delimiter) { + return lines.stream().map(line -> split(line, delimiter)).collect(Collectors.toList()) + } + + static String join(List lineItem, String delimiter) { + return String.join(delimiter, lineItem) + } + + static List joinAll(List> lineItems, String delimiter) { + return lineItems.stream().map(lineItem -> join(lineItem, delimiter)).collect(Collectors.toList()) + } + + static Double parseDouble(String str) { + try { + return Double.parseDouble(str) + } catch (NumberFormatException ignored) { + return null + } + } + +} diff --git a/src/main/java/space/caoshd/text_tools/util/StrUtils.java b/src/main/java/space/caoshd/text_tools/util/StrUtils.java deleted file mode 100644 index 12a46a6..0000000 --- a/src/main/java/space/caoshd/text_tools/util/StrUtils.java +++ /dev/null @@ -1,44 +0,0 @@ -package space.caoshd.text_tools.util; - -import space.caoshd.text_tools.model.Delimiter; - -import java.util.Arrays; -import java.util.List; -import java.util.stream.Collectors; - -public class StrUtils { - - - public static List split(String line) { - return Arrays.asList(line.split(Delimiter.COMMA.getRegex())); - } - - public static List split(String line, Delimiter delimiter) { - return Arrays.asList(line.split(delimiter.getRegex())); - } - - public static List> splitAll(List lines) { - return splitAll(lines, Delimiter.COMMA); - } - - public static List> splitAll(List lines, Delimiter delimiter) { - return lines.stream().map(line -> StrUtils.split(line, delimiter)).collect(Collectors.toList()); - } - - public static String join(List lineItem, String delimiter) { - return String.join(delimiter, lineItem); - } - - public static List joinAll(List> lineItems, String delimiter) { - return lineItems.stream().map(lineItem -> StrUtils.join(lineItem, delimiter)).collect(Collectors.toList()); - } - - public static Double parseDouble(String str) { - try { - return Double.parseDouble(str); - } catch (NumberFormatException e) { - return null; - } - } - -} diff --git a/src/test/java/space/caoshd/text_tools/service/EncodingCheck.groovy b/src/test/java/space/caoshd/text_tools/service/EncodingCheck.groovy new file mode 100644 index 0000000..af78d2c --- /dev/null +++ b/src/test/java/space/caoshd/text_tools/service/EncodingCheck.groovy @@ -0,0 +1,70 @@ +package space.caoshd.text_tools.service + +import com.ibm.icu.text.CharsetDetector +import com.ibm.icu.text.CharsetMatch + +import java.nio.file.Files +import java.nio.file.Path +import java.nio.file.Paths + +class EncodingCheck { + + + static void main(String[] args) { +// // 文件路径 +// def filePath = "C:\\Users\\caoshd\\Desktop\\经纬度.txt" +// String encoding = null +// try { +// Path path = Paths.get(filePath) +// byte[] data = Files.readAllBytes(path) +// CharsetDetector charsetDetector = new CharsetDetector() +// charsetDetector.setText(data) +// CharsetMatch charsetMatch = charsetDetector.detect() +// encoding = charsetMatch.getName() +// println encoding +// } catch (IOException e) { +// // なし +// } + + + getFileCharsetByICU4J(new File("C:\\\\Users\\\\caoshd\\\\Desktop\\\\经纬度.txt")) + } + +// static void getFileEncode(File file){ +// String charsetName = null +// try{ +// CodepageDetectorProxy detector = CodepageDetectorProxy.getInstance() +// detector.add(new ParsingDetector(false)) +// detector.add(new ByteOrderMarkDetector()) +// detector.add(JChardetFacade.getInstance()) +// detector.add(ASCIIDetector.getInstance()) +// detector.add(UnicodeDetector.getInstance()) +// java.nio.charset.Charset charset = detector.detectCodepage(file.toURL()) +// if (charset != null) { +// charsetName = charset.name() +// }else{ +// charsetName = "GB2312" +// } +// }catch (Exception e){ +// e.printStackTrace() +// } +// System.out.println("charsetName====="+charsetName) +// } + + static void getFileCharsetByICU4J(File file) { + String encoding = null + try { + Path path = Paths.get(file.getPath()) + byte[] data = Files.readAllBytes(path) + CharsetDetector detector = new CharsetDetector() + detector.setText(data) + CharsetMatch match = detector.detect() + if (match == null) { + encoding = "默认" + } + encoding = match.getName() + } catch (IOException var6) { + } + System.out.println(encoding) + } +} diff --git a/src/test/java/space/caoshd/text_tools/service/KeywordSearchServiceTest.groovy b/src/test/java/space/caoshd/text_tools/service/KeywordSearchServiceTest.groovy new file mode 100644 index 0000000..c6b508a --- /dev/null +++ b/src/test/java/space/caoshd/text_tools/service/KeywordSearchServiceTest.groovy @@ -0,0 +1,29 @@ +package space.caoshd.text_tools.service + +import org.junit.Before +import org.junit.Test +import space.caoshd.text_tools.model.SearchOptions + +class KeywordSearchServiceTest { + + KeywordSearchService keywordSearchService + + @Before + void setUp() { + def searchOptions = new SearchOptions() + searchOptions.includeFilenameSuffixes = [".java"] + searchOptions.excludeFilenameSuffixes = ["Test.java"] + searchOptions.scanPath = "D:\\workspace\\text-tools" + searchOptions.basePath = "D:\\workspace\\text-tools" + searchOptions.predicate = { String line -> line.trim().startsWith("import") } + + keywordSearchService = new KeywordSearchService() + keywordSearchService.searchOptions = searchOptions + } + + @Test + void execute() { + keywordSearchService.execute() + } + +} \ No newline at end of file diff --git a/src/test/java/space/caoshd/text_tools/slice/SlicerTest.java b/src/test/java/space/caoshd/text_tools/slice/SlicerTest.java index 6520779..932c283 100644 --- a/src/test/java/space/caoshd/text_tools/slice/SlicerTest.java +++ b/src/test/java/space/caoshd/text_tools/slice/SlicerTest.java @@ -15,7 +15,7 @@ public class SlicerTest { String CHARACTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"; int CHARACTER_COUNT = CHARACTERS.length(); Random RANDOM = new Random(); - long numberOfRows = 1000000; + long numberOfRows = 10000000*3; String fileName = "data_more.csv"; try (BufferedWriter writer = new BufferedWriter(new FileWriter(fileName))) { for (int i = 0; i < numberOfRows; i++) { diff --git a/work/search/202404211937540945990/ScanFilePath.txt b/work/search/202404211937540945990/ScanFilePath.txt new file mode 100644 index 0000000..44fb9ff --- /dev/null +++ b/work/search/202404211937540945990/ScanFilePath.txt @@ -0,0 +1,4 @@ +D:\workspace\text-tools\src\main\java\space\caoshd\text_tools\Main.java +D:\workspace\text-tools\src\main\java\space\caoshd\text_tools\model\Delimiter.java +D:\workspace\text-tools\src\main\java\space\caoshd\text_tools\service\DBAccessService.java +D:\workspace\text-tools\src\main\java\space\caoshd\text_tools\service\LoadFileToDBService.java