大众点评店铺的地图从HTML源码中却找不到坐标(经纬度)信息。
分析JS发现原来它是把坐标(经纬度)信息进行了转换(防采集),就是HTML中的poi参数。
以 http://www.dianping.com/shop/4101814 店铺为例。
源文件中有 poi 的部分(poi: 'HETSIFZVVHWATW')
在其中的下面脚本
<script src="//www.dpfile.com/s/js/p.pak.min.1361abe883291540eae764799aa98b95.js" type="text/javascript"></script>
即:view-source:http://www.dpfile.com/s/js/p.pak.min.1361abe883291540eae764799aa98b95.js
包含javascript实现的 poi 的解码算法
checkPOI: function(a, b) {
a || (a = Array(b));
"array" != $type(a) && (a = Array(a));
for (var d = a.length,
c = 0; c < d; c++) {
var f = a[c] || "";
if (f.poi) {
var g = this.decode(f.poi);
f.lat = g.lat;
f.lng = g.lng;
delete f.poi
}
if (!f.lat || !f.lng) if (b) for (var e in b) f[e] = b[e];
else a.splice(c, 1),
d--
}
b && 1 > a.length && (a = Array(b));
return a
},
decode: function(a) {
var b = -1,
d = 0,
c = "",
f = a.length,
g = a.charCodeAt(f - 1),
a = a.substring(0, f - 1);
f--;
for (var e = 0; e < f; e++) {
var h = parseInt(a.charAt(e), this.options.settings.cha) - this.options.settings.add;
h >= this.options.settings.add && (h -= this.options.settings.plus);
c += h.toString(this.options.settings.cha);
h > d && (b = e, d = h)
}
a = parseInt(c.substring(0, b), this.options.settings.digi);
b = parseInt(c.substring(b + 1), this.options.settings.digi);
g = (a + b - parseInt(g)) / 2;
b = (b - g) / 1E5;
return {
lat: b,
lng: g / 1E5
}
},
参考资料
如何抓取(采集)大众点评网的坐标(经纬度)信息
http://www.site-digger.com/html/articles/20111110/18.html
一个进一步处理后的演示 javaScript 脚本如下:
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<title>Demo</title>
</head>
<body>
<script type="text/javascript">
function decode(C) {
var digi=16;
var add= 10;
var plus=7;
var cha=36;
var I = -1;
var H = 0;
var B = "";
var J = C.length;
var G = C.charCodeAt(J - 1);
C = C.substring(0, J - 1);
J--;
for (var E = 0; E < J; E++) {
var D = parseInt(C.charAt(E), cha) - add;
if (D >= add) {
D = D - plus
}
B += (D).toString(cha);
if (D > H) {
I = E;
H = D
}
}
var A = parseInt(B.substring(0, I), digi);
var F = parseInt(B.substring(I + 1), digi);
var L = (A + F - parseInt(G)) / 2;
var K = (F - L) / 100000;
L /= 100000;
return {
lat: K,
lng: L
}
}
document.write(decode('HHDFJGZVVIHIJG').lat+','+decode('HHDFJGZVVIHIJG').lng);
document.write("<br/>");
document.write(decode('HETSIFZVVHWATW').lat+','+decode('HETSIFZVVHWATW').lng);
</script>
</body>
</html>