(爬虫抓取)最新省市区数据

Posted on 2022-07-20 09:17:38
Author: 可乐小可爱メ
1. 背景

需要省市区数据, 搜索了一些 发现都是没有更新过的旧数据。


2. 思路

从民政部官网找了找数据, 有最新的,但是接口很复杂,也有过一些处理, 那么用爬虫抓一下来用..

3. 代码

city.agent.js

const superagent = require("superagent");
const cheerio = require("cheerio");
require("superagent-charset")(superagent);
const encode = require("./gbk.1");

function getCityData(city) {
return new Promise((resolve, reject) => {
superagent
.get(
`http://xzqh.mca.gov.cn/defaultQuery?shengji=${encode(
city
)}&diji=-1&xianji=-1`
)
.charset("gbk")
.set("accept", "html")
.end((err, res) => {
if (err) {
reject(err);
}
if (typeof res.text === "string") {
const $ = cheerio.load(res.text);
const info = $(".info_table tbody tr .name_left");
const arr = [];
let k = -1;
for (let i = 0; i < info.length; i++) {
const ssqid = info[`${i}`].parent.children[9].children[0]?.data;
if (!ssqid) {
continue;
}
if (!info[`${i}`].children[1].attribs) {
k++;
const city = info[`${i}`].children[0].attribs;
arr[k] = {
ssqid,
ssqname: city.value,
cities: [],
};
} else {
const area = info[`${i}`].children[1].attribs;
arr[k].cities.push({
ssqid,
ssqname: area.alt,
});
}
}
resolve(arr);
}
});
});
}
module.exports = getCityData;


gbk.1.js    // gbk解码   感谢   https://github.com/cnwhy/GBK.js

const GBK = require("./gbk.min.js");        // gbk 文件链接
function encode(str) {
return GBK.URI.encodeURI(str);
}

module.exports = encode;


city.js

const fs = require("fs");
const getCityData = require("./city.agent");
const provinceArr = [
"北京市(京)",
"天津市(津)",
"河北省(冀)",
"山西省(晋)",
"内蒙古自治区(内蒙古)",
"辽宁省(辽)",
"吉林省(吉)",
"黑龙江省(黑)",
"上海市(沪)",
"江苏省(苏)",
"浙江省(浙)",
"安徽省(皖)",
"福建省(闽)",
"江西省(赣)",
"山东省(鲁)",
"河南省(豫)",
"湖北省(鄂)",
"湖南省(湘)",
"广东省(粤)",
"广西壮族自治区(桂)",
"海南省(琼)",
"重庆市(渝)",
"四川省(川、蜀)",
"贵州省(黔、贵)",
"云南省(滇、云)",
"西藏自治区(藏)",
"陕西省(陕、秦)",
"甘肃省(甘、陇)",
"青海省(青)",
"宁夏回族自治区(宁)",
"新疆维吾尔自治区(新)",
"香港特别行政区(港)",
"澳门特别行政区(澳)",
"台湾省(台)",
];
async function InsertData() {
const data = [];
for (let i = 0; i < provinceArr.length; i++) {
const pro = provinceArr[i];
const current = {};
current.ssqname = pro.slice(0, pro.indexOf("("));
const city = await getCityData(pro);
current.city = city;
current.ssqid = String(Number(city[0]?.ssqid || 0) - 100);
data.push(current);
}
return data;
}
(async () => {
const s = await InsertData();
fs.writeFile("./city.agent.json", JSON.stringify(s), err => {
if(err) {
return console.log("err: ", err)
}
console.log("write success!")
})
})();


4. 说明

1. 民政局公开数据, 脚本只是为了获取最新省市区数据。

2. ssqid 直辖市 特区 还要手动改一下

3. 其它数据根据需要 可以自己添加一下

当前评论 (0) 登录后评论

暂无评论