feat(ip): 添加区域数据转换工具
- 实现 AreaCity 数据格式转换功能 - 支持将 ok_data_level3.csv 转换为 area.csv 格式 - 添加数据转换规则处理(id、name、type、parentId 映射) - 集成 CsvUtil 进行 CSV 文件读写操作 - 实现数据过滤(排除国外数据) - 添加数据排序功能以方便对比变更 - 提供完整的转换工具使用说明 ```pull/247/head
parent
c4385080cc
commit
dc956c30cc
|
|
@ -0,0 +1,115 @@
|
|||
package cn.iocoder.yudao.framework.ip.core.utils;
|
||||
|
||||
import cn.hutool.core.io.resource.ResourceUtil;
|
||||
import cn.hutool.core.text.csv.CsvData;
|
||||
import cn.hutool.core.text.csv.CsvRow;
|
||||
import cn.hutool.core.text.csv.CsvUtil;
|
||||
import cn.iocoder.yudao.framework.ip.core.Area;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* AreaCity 数据转换工具
|
||||
*
|
||||
* 将 AreaCity-JsSpider-StatsGov 的 ok_data_level3.csv 格式转换为 yudao-cloud 的 area.csv 格式
|
||||
*
|
||||
* 新格式列:id,pid,deep,name,pinyin_prefix,pinyin,ext_id,ext_name
|
||||
* 旧格式列:id,name,type,parentId
|
||||
*
|
||||
* 转换规则:
|
||||
* 1. id = id (完整行政区划代码,需要补码)
|
||||
* 2. name = ext_name (完整名称)
|
||||
* 3. type = deep + 2 (0->2省, 1->3市, 2->4区)
|
||||
* 4. parentId = 父节点的id (省级 parentId=1)
|
||||
*
|
||||
* 使用方法:
|
||||
* 1. 下载 ok_data_level3-4.csv.7z: https://github.com/xiangyuecn/AreaCity-JsSpider-StatsGov/releases
|
||||
* 2. 解压获取 ok_data_level3.csv 放到 src/test/resources/ok_data_level3.csv
|
||||
* 3. 运行本工具转换
|
||||
* 4. 替换 area_new.csv
|
||||
*
|
||||
* @author 芋道源码
|
||||
* @see https://github.com/YunaiV/yudao-cloud/issues/299
|
||||
*/
|
||||
public class AreaDataConverterTest {
|
||||
|
||||
@Test
|
||||
public void ok_data_level3_to_area() {
|
||||
String inputFile = "ok_data_level3.csv";
|
||||
String outputFile = "src/test/resources/area_new.csv";
|
||||
|
||||
System.out.println("开始转换 AreaCity 数据...");
|
||||
System.out.println("输入文件: " + inputFile);
|
||||
System.out.println("输出文件: " + outputFile);
|
||||
|
||||
try {
|
||||
convert(inputFile, outputFile);
|
||||
System.out.println("转换完成!");
|
||||
} catch (Exception e) {
|
||||
System.err.println("转换失败: " + e.getMessage());
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 执行数据转换
|
||||
*/
|
||||
public void convert(String inputFile, String outputFile) throws IOException {
|
||||
// 读取新格式文件
|
||||
CsvData csvData = CsvUtil.getReader().read(ResourceUtil.getUtf8Reader(inputFile));
|
||||
List<CsvRow> rows = csvData.getRows();
|
||||
|
||||
// 跳过 header
|
||||
rows.remove(0);
|
||||
|
||||
List<AreaObj> areas = new ArrayList<>();
|
||||
areas.add(new AreaObj(Area.ID_CHINA, "中国", 1, Area.ID_GLOBAL));
|
||||
// 转换每一行数据
|
||||
for (CsvRow row : rows) {
|
||||
|
||||
// id
|
||||
String idStr = String.format("%-6s", Integer.parseInt(row.get(0))).replace(' ', '0');
|
||||
|
||||
// 转换层级
|
||||
int deep = Integer.parseInt(row.get(2));
|
||||
int type = deep + 2; // 0->2省, 1->3市, 2->4区
|
||||
|
||||
//名称
|
||||
String name = row.get(7);
|
||||
if (name.contains("国外")) { //排除国外数据
|
||||
continue;
|
||||
}
|
||||
|
||||
//父id
|
||||
String pidStr = String.format("%-6s", Integer.parseInt(row.get(1))).replace(' ', '0');
|
||||
if ("000000".equals(pidStr)) {
|
||||
pidStr = "1"; // 省级,父节点为中国
|
||||
}
|
||||
|
||||
areas.add(new AreaObj(Integer.parseInt(idStr), name, type, Integer.parseInt(pidStr)));
|
||||
}
|
||||
|
||||
//按照yudao原始规则排序【方便对比变更】
|
||||
areas.sort(Comparator.comparingInt(AreaObj::type).thenComparing(AreaObj::id));
|
||||
// 写入转换后的文件
|
||||
FileWriter writer = new FileWriter(outputFile);
|
||||
// 写入 header
|
||||
writer.write("id,name,type,parentId\n");
|
||||
// 写入行
|
||||
for (AreaObj area : areas) {
|
||||
writer.write(String.format("%d,%s,%d,%d\n", area.id, area.name, area.type, area.parentId));
|
||||
}
|
||||
writer.close();
|
||||
System.out.println("转换完成,共处理 " + areas.size() + " 行数据");
|
||||
|
||||
}
|
||||
|
||||
record AreaObj(int id, String name, int type, int parentId) {
|
||||
}
|
||||
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue