闲来无事,有个朋友需要抓取到美团的商家信息。要对商家信息进行分析。
话不多说。直接贴代码,本代码只做学习使用,请不要做为非法使用。
内置的方法包是使用的hutool,大家自行引用。
public void convert(String param) {
String url = "https://cq.meituan.com/meishi/pn{index}/";
int currentPage = 0;
while (true) {
currentPage++;
String html = HttpUtil.get(url.replace("{index}", currentPage + ""));
if (html.contains("对不起,没有符合条件的商家")) {
break;
}
listData(html);
}
}
private void listData(String html) {
List<String> list = ReUtil.findAllGroup1("\"poiId\":(.*?),\"frontImg\"", html);
for (String item :
list) {
String url = "https://www.meituan.com/meishi/" + item + "/";
String detailHtml = HttpRequest.get(url)
.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36")
.header("Host", "www.meituan.com")
.header("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8")
*ex.e**cute().body();
detailData(detailHtml);
}
System.out.printf("完成所有");
}
@Autowired
MeiTuanMeiShiMapper meiTuanMeiShiMapper;
private void detailData(String html) {
List<String> listTitle = ReUtil.findAllGroup1("\"title\":\"(.*?)\"},\"pageId\"", html);
List<String> listDescription = ReUtil.findAllGroup1("\"description\":\"(.*?)\",\"keyword\"", html);
List<String> listOpenTime = ReUtil.findAllGroup1("\"openTime\":\"(.*?)\",\"extraInfos\"", html);
List<String> listPhone = ReUtil.findAllGroup1("\"phone\":\"(.*?)\",\"openTime\"", html);
String title = CollectionUtil.isEmpty(listTitle) ? "" : listTitle.get(0);
String description = CollectionUtil.isEmpty(listDescription) ? "" : listDescription.get(0);
String opentime = CollectionUtil.isEmpty(listOpenTime) ? "" : listOpenTime.get(0);
String phone = CollectionUtil.isEmpty(listPhone) ? "" : listPhone.get(0);
MeiTuanMeiShi meiTuanMeiShi = new MeiTuanMeiShi();
meiTuanMeiShi.setTitle(title);
meiTuanMeiShi.setDescription(description);
meiTuanMeiShi.setOpentime(opentime);
meiTuanMeiShi.setPhone(phone);
meiTuanMeiShiMapper.insert(meiTuanMeiShi);
System.out.printf("完成:"+title);
}