(爬虫抓取)最新省市区数据

Posted on 2022-07-20 09:17:38
Comments: 0
Author: 可乐小可爱メ
1. 背景

需要省市区数据, 搜索了一些 发现都是没有更新过的旧数据。


2. 思路

从民政部官网找了找数据, 有最新的,但是接口很复杂,也有过一些处理, 那么用爬虫抓一下来用..

3. 代码

city.agent.js

const superagent = require("superagent");
const cheerio = require("cheerio");
require("superagent-charset")(superagent);
const encode = require("./gbk.1");

function getCityData(city) {
return new Promise((resolve, reject) => {
superagent
.get(
`http://xzqh.mca.gov.cn/defaultQuery?shengji=${encode(
city
)}&diji=-1&xianji=-1`
)
.charset("gbk")
.set("accept", "html")
.end((err, res) => {
if (err) {
reject(err);
}
if (typeof res.text === "string") {
const $ = cheerio.load(res.text);
const info = $(".info_table tbody tr .name_left");
const arr = [];
let k = -1;
for (let i = 0; i < info.length; i++) {
const ssqid = info[`${i}`].parent.children[9].children[0]?.data;
if (!ssqid) {
continue;
}
if (!info[`${i}`].children[1].attribs) {
k++;
const city = info[`${i}`].children[0].attribs;
arr[k] = {
ssqid,
ssqname: city.value,
cities: [],
};
} else {
const area = info[`${i}`].children[1].attribs;
arr[k].cities.push({
ssqid,
ssqname: area.alt,
});
}
}
resolve(arr);
}
});
});
}
module.exports = getCityData;


gbk.1.js    // gbk解码   感谢   https://github.com/cnwhy/GBK.js

const GBK = require("./gbk.min.js");        // gbk 文件链接
function encode(str) {
return GBK.URI.encodeURI(str);
}

module.exports = encode;


city.js

const fs = require("fs");
const getCityData = require("./city.agent");
const provinceArr = [
"北京市(京)",
"天津市(津)",
"河北省(冀)",
"山西省(晋)",
"内蒙古自治区(内蒙古)",
"辽宁省(辽)",
"吉林省(吉)",
"黑龙江省(黑)",
"上海市(沪)",
"江苏省(苏)",
"浙江省(浙)",
"安徽省(皖)",
"福建省(闽)",
"江西省(赣)",
"山东省(鲁)",
"河南省(豫)",
"湖北省(鄂)",
"湖南省(湘)",
"广东省(粤)",
"广西壮族自治区(桂)",
"海南省(琼)",
"重庆市(渝)",
"四川省(川、蜀)",
"贵州省(黔、贵)",
"云南省(滇、云)",
"西藏自治区(藏)",
"陕西省(陕、秦)",
"甘肃省(甘、陇)",
"青海省(青)",
"宁夏回族自治区(宁)",
"新疆维吾尔自治区(新)",
"香港特别行政区(港)",
"澳门特别行政区(澳)",
"台湾省(台)",
];
async function InsertData() {
const data = [];
for (let i = 0; i < provinceArr.length; i++) {
const pro = provinceArr[i];
const current = {};
current.ssqname = pro.slice(0, pro.indexOf("("));
const city = await getCityData(pro);
current.city = city;
current.ssqid = String(Number(city[0]?.ssqid || 0) - 100);
data.push(current);
}
return data;
}
(async () => {
const s = await InsertData();
fs.writeFile("./city.agent.json", JSON.stringify(s), err => {
if(err) {
return console.log("err: ", err)
}
console.log("write success!")
})
})();


4. 说明

1. 民政局公开数据, 脚本只是为了获取最新省市区数据。

2. ssqid 直辖市 特区 还要手动改一下

3. 其它数据根据需要 可以自己添加一下

chainList transform to renderTree(Array)

Posted on 2022-05-16 22:42:01
Comments: 0
Author: 可乐小可爱メ

1. renderTree to chain

const renderTree = [
{
name: "A",
parent: "step1",
child: "Aa",
siblings: "B",
children: [
{
name: "Aa",
parent: "A",
child: "Aa1",
siblings: "Ab",
children: [
{ name: "Aa1", parent: "Aa", child: null, siblings: "Ab1" },
{
name: "Ab1",
parent: "Aa",
child: "Ab1-1",
siblings: "Ac1",
children: [
{
name: "Ab1-1",
parent: "Ab1",
child: null,
siblings: "Ab1-2",
},
{
name: "Ab1-2",
parent: "Ab1",
child: "Ab1-2-1",
siblings: "Ab1-3",
children: [
{
name: "Ab1-2-1",
parent: "Ab1-2",
child: null,
siblings: null,
},
],
},
{ name: "Ab1-3", parent: "Ab1", child: null, siblings: null },
],
},
{
name: "Ac1",
parent: "Aa",
child: "Ac1-1",
siblings: null,
children: [
{
name: "Ac1-1",
parent: "Ac1",
child: null,
siblings: "Ac1-2",
},
{
name: "Ac1-2",
parent: "Ac1",
child: "Ac1-2-1",
siblings: null,
children: [
{
name: "Ac1-2-1",
parent: "Ac1-2",
child: null,
siblings: null,
},
],
},
],
},
],
},
{ name: "Ab", parent: "A", child: null, siblings: "Ac" },
{ name: "Ac", parent: "A", child: null, siblings: "Ad" },
{
name: "Ad",
parent: "A",
child: "Ad1",
siblings: null,
children: [
{ name: "Ad1", parent: "Ad", child: null, siblings: "Ad2" },
{ name: "Ad2", parent: "Ad", child: null, siblings: null },
],
},
],
},
{
name: "B",
parent: "step1",
child: "B1",
siblings: "C",
children: [{ name: "B1", parent: "B", child: null, siblings: null }],
},
{ name: "C", parent: "step1", child: null, siblings: null },
];

// 简写
function deepClone(obj) {
return JSON.parse(JSON.stringify(obj));
}

const obj = {};
function arrayToChain(arr) {
for (let i = 0; i < arr.length; i++) {
const elm = deepClone(arr[i]);
delete elm.children;
obj[elm.name] = elm;
if (elm.children && elm.children.length > 0) {
arrayToChain(elm.children);
}
}
}
arrayToChain(renderTree);
console.log("obj:: ", obj);



2. chain to renderTree

const chainedList = {
A: {
name: "A",
parent: "step1",
child: "Aa",
siblings: "B",
},
B: {
name: "B",
parent: "step1",
child: "B1",
siblings: "C",
},
C: {
name: "C",
parent: "step1",
child: null,
siblings: null,
},
B1: {
name: "B1",
parent: "B",
child: null,
siblings: null,
},
Aa: {
name: "Aa",
parent: "A",
child: "Aa1",
siblings: "Ab",
},
Aa1: {
name: "Aa1",
parent: "Aa",
child: null,
siblings: "Ab1",
},
Ab1: {
name: "Ab1",
parent: "Aa",
child: "Ab1-1",
siblings: "Ac1",
},
"Ab1-1": {
name: "Ab1-1",
parent: "Ab1",
child: null,
siblings: "Ab1-2",
},
"Ab1-2": {
name: "Ab1-2",
parent: "Ab1",
child: "Ab1-2-1",
siblings: "Ab1-3",
},
"Ab1-3": {
name: "Ab1-3",
parent: "Ab1",
child: null,
siblings: null,
},
"Ab1-2-1": {
name: "Ab1-2-1",
parent: "Ab1-2",
child: null,
siblings: null,
},
Ac1: {
name: "Ac1",
parent: "Aa",
child: "Ac1-1",
siblings: null,
},
Ab: {
name: "Ab",
parent: "A",
child: null,
siblings: "Ac",
},
Ac: {
name: "Ac",
parent: "A",
child: null,
siblings: "Ad",
},
Ad: {
name: "Ad",
parent: "A",
child: "Ad1",
siblings: null,
},
Ad1: {
name: "Ad1",
parent: "Ad",
child: null,
siblings: "Ad2",
},
Ad2: {
name: "Ad2",
parent: "Ad",
child: null,
siblings: null,
},
"Ac1-1": {
name: "Ac1-1",
parent: "Ac1",
child: null,
siblings: "Ac1-2",
},
"Ac1-2": {
name: "Ac1-2",
parent: "Ac1",
child: "Ac1-2-1",
siblings: null,
},
"Ac1-2-1": {
name: "Ac1-2-1",
parent: "Ac1-2",
child: null,
siblings: null,
},
};
var chainedListArray = [];
for (const key in chainedList) {
chainedListArray.push(chainedList[key]);
}
const treeList = [];
let parent = null;
let siblingsKey = "";

const uncleKeys = {};

function chainToArrayTree(currentKey) {
if (currentKey) {
const current = chainedList[currentKey];
if (uncleKeys[currentKey]) uncleKeys[currentKey] = false;
console.log("parent:: ", parent);
if (!parent) {
treeList.push(current);
} else {
parent.children = Array.isArray(parent.children)
? [...parent.children, current]
: [current];
}
// have child
if (current.child) {
parent = current;
currentKey = current.child;
siblingsKey = chainedList[currentKey].siblings;
if (current.siblings) {
uncleKeys[current.siblings] = true;
}
chainToArrayTree(currentKey);
}
// no child but have siblings
else if (!current.child && current.siblings) {
parent = chainedList[current.parent];
currentKey = siblingsKey;
siblingsKey = chainedList[currentKey]?.siblings;
chainToArrayTree(currentKey);
}

// no child and bo siblings
else if (!current.child && !current.siblings) {
// if parent have siblings
if (chainedList[current.parent]?.siblings) {
currentKey = chainedList[current.parent].siblings;
parent = chainedList[chainedList[currentKey].parent];
siblingsKey = chainedList[currentKey].siblings;
chainToArrayTree(currentKey);
} else {
// if parent no siblings, current tree end, recycle to startKey"s siblings
console.log("currentLL::", current);
currentKey = findAncestorSiblings(current);
if (currentKey) {
parent = chainedList[chainedList[currentKey].parent];
siblingsKey = chainedList[currentKey].siblings || null;
chainToArrayTree(currentKey);
}
}
}
}
}

function findAncestorSiblings(current) {
const key = current?.parent || null;
if (key) {
const siblings = chainedList[key]?.siblings || null;
if (siblings && uncleKeys[siblings]) {
return siblings;
} else {
return findAncestorSiblings(chainedList[key]);
}
}
}

chainToArrayTree("A");
console.log("treeList: ", treeList);


BL React Coding

Posted on 2022-03-25 12:10:12
Comments: 0
Author: 可乐小可爱メ

App.tsx

import { useState, useEffect } from "react";
import axios from "axios";

interface IUserInfo {
id: number;
name: string;
username: string;
email: string;
phone: string;
website: string;
}

export default function IndexPage() {
const url = "/bl-api/users/1";
const [userData, setUserData] = useState({} as IUserInfo);
useEffect(() => {
axios
.get(url)
.then((data: IUserInfo | unknown) => {
if (data) {
setUserData(data);
}
})
.catch((e) => {
const data = {
id: 1,
name: "Ray Yin",
username: "rayyin",
email: "ray@bridgelegal.biz",
phone: "8727040355",
website: "bridgelegal.com",
};
setUserData(data);
});
}, []);
return (
// No need to touch code below
<div className="App">
<h2>User Data</h2>
<p>
<strong>Name: </strong>{" "}
{userData?.name || "(Need to populate name here)"}
</p>
<p>
{" "}
<strong>Website: </strong>
{userData?.website || "(Need to populate website here)"}
</p>
<p>
{" "}
<strong>Email: </strong>
{userData?.email || "(Need to populate email here)"}
</p>
<p>
{" "}
<strong>Phone: </strong>
{userData?.phone || "(Need to populate phone here)"}
</p>
</div>
);
}



count.tsx


import React from "react";
const { Component } = React;

interface IData {
id: number;
value: number;
}

// BL-Counter
class BLCounter extends Component {
constructor(props, context) {
super(props, context);
}
render() {
const { id, value, onIncrement } = this.props;
return (
<div className="bl-counter">
<b>{value}</b>
<div className="bl-counter-handler">
<button
className="button is-decrement bl-small"
onClick={() => onIncrement({ id, value: value - 1 })}
>
-
</button>
<button
className="button is-increment bl-small"
onClick={() => onIncrement({ id, value: value + 1 })}
>
+
</button>
</div>
</div>
);
}
}

function BLTotal({ sum }: { sum: number }) {
return <p> BLTotal: {sum}</p>;
}

export default function CountPage() {
const [data, setData] = React.useState<Array<IData>>([
{ id: 1, value: 0 },
{ id: 2, value: 0 },
{ id: 3, value: 0 },
]);

const totalNumber = React.useMemo(() => {
let i = 0;
data.forEach((elm: IData) => {
i += elm.value;
});
return i;
}, [data]);

React.useEffect(() => {
setData([...data, { id: 4, value: 0 }]);
}, []);
// state data for 3 bl-counters

function onChangeCount(payload: IData) {
const { id, value } = payload;
if (id && typeof value === "number") {
const newData = Array.from(data, (elm: IData) => {
if (elm.id === id) {
return { id, value };
} else {
return elm;
}
});
setData(newData);
}
}

return (
<div>
{data.map((counter: IData) => (
<BLCounter
key={counter.id}
id={counter.id}
value={counter.value}
onIncrement={(payload: IData) => onChangeCount(payload)}
onDecrement={(payload: IData) => onChangeCount(payload)}
/>
))}
<BLTotal sum={totalNumber} />
</div>
);
}



迁站后感想

Posted on 2021-11-24 20:06:51
Comments: 0
Author: 可乐小可爱メ

因为一些原因,又一次云服务器down掉,

再一次迁移,花了一些时间, 并且原来数据因为是手动定期备份,没有写定时任务备份,导致博客数据丢失了两篇,一篇是 关于 OAuth授权 包括Gitee Github等的实现, 一篇是Docker入门简单小结。 还有就是原来考虑不周全, 所有博文图片 选择保存到云服务器本地,这次直接全部没了,


经此一役, 还是选了七牛云的oss,

总结下了部署环境, nginx, mysql, redis, node及相关全局包以及权限 还有 node-sass 镜像下载依赖等问题,

再接再厉吧, 争取还是能保存每月一篇技术博客小结

Echarts生成canvas后转图片保存

Posted on 2021-08-29 17:12:37
Comments: 0
Author: 可乐小可爱メ
1.背景

业务场景是 动态邮件报表(含各种echart图标) 定期发送不同人物;

    1.1 难点是 如何动态生成图形化报表图片插入到 email发送.

2. 解决思路

     两个方向解决:

        方法A: node-echarts 服务端绘制生成 保存到本地

        方法B: 客户端生成 转化 插入到email

3. 初探

    方法A 首先被排除, 

    node-charts 包 当前状态 1.1.4 • Public • Published 

    并且实际操作后, 确实如一些帖子分享所说 相关依赖安装失败。 暂时放弃

    方法B 的思路, 是基于已经有的PC项目(已通过echarts 有了相关图表),保存相关图标的 canvas到image, 然后 image 到服务器本地 然后 插入Email.

4. 技术方案

 

核心点是 通过  puppeteer 模拟访问获取.

5. 代码实现

    5.1 Node Server

const Express = require("express");
const request = require("request");
const app = new Express();

app.get("/", (req, res) => {
res.send("20001 server.");
});
app.get("/api", (req, res) => {
const { query } = req;
if (!query || !query.memberId) {
return res.json({ code: 10010, msg: "Request Error" });
}
// 模拟请求 Business Server
request(
{
url: "http://java.server.com/api" + query,
method: "xxx",
},
(err, response, data) => {
// 模拟数据返回
const option = {
title: {
text: "ECharts 入门示例",
},
tooltip: {},
legend: {
data: ["销量"],
},
xAxis: {
data: ["衬衫", "羊毛衫", "雪纺衫", "裤子", "高跟鞋", "袜子"],
},
yAxis: {},
series: [
{
name: "销量",
type: "bar",
data: [5, 20, 36, 10, 10, 20],
},
],
};
if (err) {
// return res.json({
// code: 20010,
// msg: "JAVA Server Error",
// });
return res.json(option);
}
return res.json(data);
}
);
});

app.use("/static", Express.static("static"));

app.listen("20001", () => {
console.log("listen on: http://127.0.0.1:20001");
});

    5.2 Puppeteer.js

const puppeteer = require("puppeteer");
const target = "http://127.0.0.1:20001/static/?memberId=123";
const userAgent =
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36";
puppeteer
.launch({
executablePath:
"C:\Program Files\Google\Chrome\Application\chrome.exe",
headless: false,
})
.then(async (browser) => {
const page = await browser.newPage();
page.setUserAgent(userAgent);
await page.goto(target);
await page.waitForSelector("button[title="save-btn"]").then(() => {
page.click("button[title="save-btn"]");
setTimeout(() => {
browser.close();
}, 2000);
});
});

    5.3 静态Html index.html

<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>canvas to image</title>
</head>

<body>
<div id="main" style="width: 600px; height: 400px"></div>
<button id="save-btn" title="save-btn">save</button>
</body>
<script src="https://cdn.jsdelivr.net/npm/echarts@5.1.2/dist/echarts.min.js"></script>
<script type="text/javascript">
const xhr = new XMLHttpRequest();
const canvasWrap = document.getElementById("main");
const btn = document.getElementById("save-btn");
const { search } = window.location;
xhr.open("GET", "http://127.0.0.1:20001/api" + search);
xhr.onreadystatechange = () => {
if (xhr.readyState === 4 && xhr.status === 200) {
const option = JSON.parse(xhr.response);
const myChart = echarts.init(canvasWrap);
myChart.setOption(option);
}
};
xhr.send();

btn.onclick = function () {
const canvas = canvasWrap.getElementsByTagName("canvas")[0];
const fileName = "xxxxs.jpg";
const dataImg = new Image();
const imgData = canvas.toDataURL("image/jpg");
dataImg.src = imgData;
const blob = dataURLtoBlob(imgData);
const objurl = URL.createObjectURL(blob);
const alink = document.createElement("a");
alink.href = objurl;
alink.download = fileName;
alink.click();

function dataURLtoBlob(dataurl) {
const arr = dataurl.split(",");
const mime = arr[0].match(/:(.*?);/)[1];
const bstr = atob(arr[1]);
let n = bstr.length;
const u8arr = new Uint8Array(n);
while (n--) {
u8arr[n] = bstr.charCodeAt(n);
}
return new Blob([u8arr], { type: mime });
}
};
</script>
</html>

    5.4 定时任务 crontab/node-schedule 请参考之前文章 Node.js 定时任务(node-schedule,pm2,shell脚本)

    5.5 业务接口(JAVA server)根据实际情况 返回当前要生成的 Echarts图标相关参数


6. 注意点

    6.1 以上是win环境代码, 实际到 centos后实测 需要修改 puppeteer.js 中 launch参数

.launch({
headless: false,
args: ["--no-sanbox"],
executablePath: "/usr/bin/chromium-browser",

})

 并且需要更换非root用户 启动puppeteer, 实际执行中 会报相关错误,


需要安装相关依赖 开启配置项。 CentOS解决xhost: unable to open display

    6.2 未完全解决问题    

    当前脚本执行通过log,确认没有报错, 执行完成。 但(centos中)找不到保存的文件




1
2
3
4