要爬取一个网站遇到了极验的验证码,这周都在想着怎么破解这个,网上搜了好多知乎上看到有人问了这问题,我按照这思路去大概实现了一下。

1.使用htmlunit(这种方式我没成功,模拟鼠标拖拽后轨迹没生成,可以跳过)
我用的是java,我首先先想到了用直接用htmlunit,我做了点初始化
private void initWebClient() {
if (webClient != null) {
return;
}
webClient = new WebClient(BrowserVersion.FIREFOX_24);
webClient.getOptions().setProxyConfig(new ProxyConfig("127.0.0.1",8888));
webClient.getOptions().setActiveXNative(true);
webClient.getOptions().setUseInsecureSSL(true); // 配置证书
webClient.getOptions().setJavaScriptEnabled(true);
webClient.getOptions().setCssEnabled(true);
webClient.setCssErrorHandler(new SilentCssErrorHandler());
webClient.getOptions().setThrowExceptionOnScriptError(false);
webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
CookieManager cookieManager = new CookieManager();
List httpCookies = client.getCookies();//其方式获取的cookie
for (org.apache.http.cookie.Cookie cookie : httpCookies) {
cookieManager.addCookie(new com.gargoylesoftware.htmlunit.util.Cookie(cookie));
}
webClient.setCookieManager(cookieManager);
}