• Rhino+envjs-1.2.js 在java运行网站js 工具类


    java爬虫遇到个页面加密的东西,找了些资料学习学习

    做了个java运行js的工具类,希望对大家有用,其中用到client(获取js)可以自行换成自己的client。主要是用了

    Rhino就是JavaScript引擎,它的目的就是实现Java与JavaScript的互操作性。rhino-1.7R1.jar

    Envjs一个纯js方式在无浏览器环境下模拟浏览器的行为。envjs-1.2.js

    一般网站js中都会用到jauery,所以还用了jauery.js

    import java.io.BufferedReader;
    import java.io.ByteArrayInputStream;
    import java.io.ByteArrayOutputStream;
    import java.io.FileNotFoundException;
    import java.io.FileReader;
    import java.io.IOException;
    import java.io.InputStream;
    import java.io.InputStreamReader;
    import java.io.OutputStream;
    import java.io.Reader;
    import java.lang.ref.SoftReference;
    import java.net.URI;
    import java.nio.charset.Charset;
    import java.util.Locale;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    import java.util.zip.GZIPInputStream;
    
    import org.apache.commons.io.IOUtils;
    import org.apache.commons.lang.StringUtils;
    import org.apache.commons.lang.Validate;
    import org.apache.http.Header;
    import org.apache.http.HeaderElement;
    import org.apache.http.HttpEntity;
    import org.apache.http.ParseException;
    import org.apache.http.client.ClientProtocolException;
    import org.apache.http.client.methods.CloseableHttpResponse;
    import org.apache.http.client.methods.HttpGet;
    import org.apache.http.client.protocol.RequestAcceptEncoding;
    import org.apache.http.impl.DefaultConnectionReuseStrategy;
    import org.apache.http.impl.client.BasicCookieStore;
    import org.apache.http.impl.client.CloseableHttpClient;
    import org.apache.http.impl.client.HttpClientBuilder;
    import org.apache.http.util.Args;
    import org.apache.http.util.ByteArrayBuffer;
    import org.jsoup.Jsoup;
    import org.mozilla.javascript.Context;
    import org.mozilla.javascript.ContextFactory;
    import org.mozilla.javascript.Function;
    import org.mozilla.javascript.Scriptable;
    
    import com.ibm.icu.text.CharsetDetector;
    import com.ibm.icu.text.CharsetMatch;
    
    //import net.sourceforge.htmlunit.corejs.javascript.Context;
    //import net.sourceforge.htmlunit.corejs.javascript.ContextFactory;
    //import net.sourceforge.htmlunit.corejs.javascript.Function;
    //import net.sourceforge.htmlunit.corejs.javascript.Scriptable;
    
    /**
     * 参照http://mybeautiful.iteye.com/blog/1442839
     * http://m.oschina.net/blog/121347
     * http://blog.csdn.net/dwjmantou/article/details/45276967
     * http://lcllcl987.iteye.com/blog/87423
     * ***不可使用htmlunit的包******Cannot call method "setOptimizationLevel" of null
     * @author 5432
     *
     */
    public class RhinoScaper {
        private Context context;
        private Scriptable scriptable;
        /**
         * 初始化方法
         */
        public void init(){
            context = ContextFactory.getGlobal().enterContext();
            scriptable =context.initStandardObjects(null);
            context.setOptimizationLevel(-1);
            context.setLanguageVersion(Context.VERSION_1_5);
    //        初始化测试用,并定义envjs-1.2.js未定义print
            context.evaluateString(scriptable,  
                    "var v='sssaass';"
                    + "var print = function(v) {"+  
                          " java.lang.System.out.println(v);return v ;"+  
                    " };function hah(){return v }",  
                    "print",1,null);
    //        System.out.println("v == " + scriptable.get("v", scriptable)  ); 
            Function prf =  (Function)scriptable.get("print", scriptable);
            Object call = prf.call(Context.getCurrentContext(), scriptable, prf, new Object[]{"test"});
    //        System.out.println("print == "+call.toString());
            Object invokFunction = invokFunction("hah");
    //        System.out.println(invokFunction.toString());
            
            String[] file = { this.getClass().getResource("/")+"envjs-1.2.js", "./lib/jquery.js" }; 
            for (String f : file) {  
                evaluateJs(f);  
            }  
        }
        /**
         * 调用函数
         * @param functionName
         * @param functionArags
         * @return
         */
        public Object invokFunction(String functionName,Object... functionArags) {
            Validate.notNull(context, "context is null");
            Validate.notNull(scriptable, "scriptable is null");
            Function function = (Function) scriptable.get(functionName, scriptable);
            Object call = function.call(Context.getCurrentContext(), scriptable, function, functionArags);
    //        System.out.println("reslult  = "+call.toString());
            return call;
        }
        
        /**
         * 加载js文件
         * (当没有找到对应文件,
         * 且要加载文件名路径包含‘envjs-1.2.js’ 会访问  https://raw.githubusercontent.com/ryan-roemer/envjs-1.2/master/env.rhino.1.2.js
         * 文件名路径包含‘jquery.js’ 会访问 http://apps.bdimg.com/libs/jquery/1.6.0/jquery.js
         * 加载js文件 )
         * @param f 文件名路径
         */
        public void evaluateJs(String f) { 
            Validate.notNull(context, "context is null");
            Validate.notNull(scriptable, "scriptable is null");
            FileReader in = null;  
            try {  
    //            FileInputStream fI = new FileInputStream(f);
    //            String js = IOUtils.toString(fI, "UTF-8");//设置默认js文件编码为utf-8
    //            context.evaluateString(scriptable, js, f, 1, null);
                in = new FileReader(f);  
                context.evaluateReader(scriptable, in, f, 1, null);  
            } catch (FileNotFoundException e1) {  
    //            e1.printStackTrace();  
                if (f.contains("envjs-1.2.js")) {
                    String envjs ="https://raw.githubusercontent.com/ryan-roemer/envjs-1.2/master/env.rhino.1.2.js";
                    try {
                        SoftReference<String> htmlString = Client.getHtmlString(envjs);
                        String jqueryStr = htmlString==null?"":htmlString.get();
    //                    DefaultClient defaultClient = new DefaultClient();
    //                    String jqueryStr =defaultClient.get(envjs).asHtml();
                        context.evaluateString(scriptable, jqueryStr, envjs, 1, null);
                    } catch (Exception e) {
                        e.printStackTrace();
                    }
                } else if (f.contains("jquery.js")) {
                    String jquery = "http://apps.bdimg.com/libs/jquery/1.6.0/jquery.js";
                    Reader bufR =null;
                    try {
                        SoftReference<Reader> htmlReader = Client.getHtmlReader(jquery);
                        bufR = htmlReader==null?new BufferedReader(null):htmlReader.get();
    //                     String js = IOUtils.toString(bufR);
                        context.evaluateReader(scriptable, bufR , jquery, 1, null);
                    } catch (IOException e) {
                        e.printStackTrace();
                    } catch (Exception e) {
                        e.printStackTrace();
                    }finally {
    //                    close(bufR);
                        IOUtils.closeQuietly(bufR);
                    }
                } else{
                    throw new RuntimeException("unknown file "+f);
                }
            } catch (IOException e1) {  
                e1.printStackTrace();  
            }finally {
    //            close(in);
                IOUtils.closeQuietly(in);
            }  
        }
    
        public static void main(String[] args) {
            RhinoScaper rhinoScaper = new RhinoScaper();
            rhinoScaper.init();
    //        rhinoScaper.JSloadString("jsString", "jsname");
    //        rhinoScaper.evaluateJs("E:/Desktop/loginjs.js");
    //        rhinoScaper.loadJS("", classpathURI);
            
    //        电信登录加密测试
            String pwd="111";
            StringBuilder ascending = new StringBuilder();
            SoftReference<String> htmlString = null;
            try {
                htmlString = Client.getHtmlString("http://login.189.cn/bundles/jquery?v=h3Pl8XT8zdNkoI1VbV5sEZOBrSqsxRXX0TIQ9S_lAlM1");
            } catch (Exception e) {
                e.printStackTrace();
            }
            String jsStr =htmlString==null?"":htmlString.get();
            jsStr = jsStr.replaceAll("float:", "floats:").replaceAll("throws", "throwss");
            ascending.append(jsStr);
            ascending.append(";
     var input=document.createElement("input");input.value='"+pwd+"';;input.id= 'pass';input.type='password';");
            ascending.append("
     function getpassword(){ return $(input).valAesEncryptSet()}");
            rhinoScaper.JSloadString(ascending.toString(), "jsname");
            Object result = rhinoScaper.invokFunction("getpassword");
            System.out.println(result);
            try {
                htmlString = Client.getHtmlString("http://www.youdaili.net/Daili/");
                jsStr =htmlString==null?"":htmlString.get();
                String runScript = rhinoScaper.runScript(jsStr);
                System.out.println(runScript);
            } catch (Exception e) {
                e.printStackTrace();
            }
            
    
        }
        /**
         * 运行js
         * @param html
         * @return
         */
        private String runScript(String html) {
            String function = null;int jsfrom = 0;
            Pattern p = Pattern.compile("setTimeout\("(.*)\((.*)\)", 200\);");
            Matcher m = p.matcher(html);
            if(m.find()){
             function = m.group(1);//函数名
             jsfrom = Integer.parseInt(m.group(2));//参数
            }
            JSloadString(Jsoup.parse(html).select("script").html().replace("eval("qo=eval;qo(po);")", "return po"), "jsname");
            Object result = invokFunction(function, jsfrom);
            return result.toString();
        }
        /**
         * 加载js文件
         * @param sourceName 名称
         * @param classpathURI 文件路径
         */
        public void loadJS(String sourceName, String classpathURI) {
            Validate.notNull(context, "context is null");
            Validate.notNull(scriptable, "scriptable is null");
            String js = null;
            InputStream inputStream = null;
            try {
                inputStream = getClass().getResourceAsStream(classpathURI);
                js = IOUtils.toString(inputStream, "UTF-8");//设置默认js文件编码为utf-8
            } catch (IOException e) {
                e.printStackTrace();
            } finally {
                IOUtils.closeQuietly(inputStream);
            }
            context.evaluateString(scriptable, js, sourceName, 1, null);
        }
        /**
         * 加载js字符串
         * @param source js字符串(注意处理js中由于变量名为throws,float类似名称导致的报错)
         * @param sourceName 名称
         */
        public void JSloadString(String source, String sourceName){
            Validate.notNull(context, "context is null");
            Validate.notNull(scriptable, "scriptable is null");
            context.evaluateString(scriptable, source, sourceName, 1, null);
        }
    }
    class Client{
        public static void close(AutoCloseable close) {
            if (close != null) {
                try {
                    close.close();
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        }
        public static CloseableHttpResponse HttpGetResponse(String url) throws IOException, ClientProtocolException {
            HttpGet httpGet = new HttpGet(URI.create(url));
            BasicCookieStore cookieStore = new BasicCookieStore();
            HttpClientBuilder builder = HttpClientBuilder.create().disableContentCompression()
                    .setConnectionReuseStrategy(new DefaultConnectionReuseStrategy()).setUserAgent("Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/537.36");
            builder.addInterceptorLast(new RequestAcceptEncoding());
            builder.setDefaultCookieStore(cookieStore);
            CloseableHttpClient client = builder.build();
            CloseableHttpResponse execute = client.execute(httpGet);
            return execute;
        } 
        public static SoftReference<String> getHtmlString(String url)throws Exception {
            CloseableHttpResponse execute = null; 
            byte[] binary =null;//初次解析内容
            SoftReference<String> result = null;
            try {
                execute = HttpGetResponse(url);
    //            content = execute.getEntity().getContent();
                binary = HttpEntityTOByte(execute.getEntity());
            }finally {
                close(execute);
            }
            String html;
            byte[] decode;
            try {
                System.out.println(execute.getStatusLine().toString());
                System.out.println(execute.getEntity().getContentEncoding());
                
                Args.notNull(binary, "binary");
                decode= decode(binary,execute.getEntity());
                try {
                    String charset = getContentCharSet(execute.getEntity().getContentType().getValue());
                    if (charset != null) {
                        html = new String(decode, Charset.forName(charset));
                    } else {
                        CharsetMatch match = new CharsetDetector().setText(decode)
                                .detect();
                        html = match.getString();
                    }
                } catch (Exception e) {
                    throw new Exception(e);
                }
                result = new SoftReference<String>(html);
            }finally {
                binary =null;
                decode =null;
                html=null;
            }
            return result;
                    
        }
        public static SoftReference<Reader> getHtmlReader(String url)throws Exception {
            CloseableHttpResponse execute = null; 
            byte[] binary =null;//初次解析内容
            SoftReference<Reader> result = null;
            try {
                execute = HttpGetResponse(url);
                binary = HttpEntityTOByte(execute.getEntity());
            }finally {
                close(execute);
            }
            byte[] decode;
            Reader bufR = null;
            try {
                System.out.println(execute.getStatusLine().toString());
                System.out.println(execute.getEntity().getContentEncoding().toString());
                Args.notNull(binary, "binary");
                decode= decode(binary,execute.getEntity());
                bufR= new BufferedReader(new InputStreamReader(new ByteArrayInputStream(decode)));
                result=new SoftReference<Reader>(bufR);   
            }finally {
                binary =null;
                decode =null;
                //close(bufR);
            }
            return result;
                    
        }
        private static String getContentCharSet(String contentType) throws ParseException {
            String charset = null;
            if (StringUtils.isNotEmpty(contentType)) {
                String[] strs = contentType.split(";");
                for (String string : strs) {
                    if (string.contains("charset")) {
                        String[] tmp = string.split("=");
                        if (tmp.length == 2) {
                            return tmp[1];
                        }
                    }
                }
            }
            return charset;
        }
        public static final int BUFFER = 1024;  
        /** 
         * 数据解压缩 gizp
         *  
         * @param data 
         * @return 
         * @throws Exception 
         * @author http://snowolf.iteye.com/blog/643010
         */  
        public static byte[] decompress(byte[] data) throws Exception {  
            ByteArrayInputStream bais = new ByteArrayInputStream(data);  
            ByteArrayOutputStream baos = new ByteArrayOutputStream();  
            // 解压缩  
            decompress(bais, baos);  
            data = baos.toByteArray();  
            baos.flush();  
            close(baos);
            close(bais);
    //        baos.close();  
    //        bais.close();  
            return data;  
        }
        /** 
         * 数据解压缩 
         *  
         * @param is 
         * @param os 
         * @throws Exception 
         */  
        public static void decompress(InputStream is, OutputStream os)  
                throws Exception {  
            GZIPInputStream gis =null;
            byte data[];
            try {
                gis = new GZIPInputStream(is);  
                int count;  
                data = new byte[BUFFER];  
                while ((count = gis.read(data, 0, BUFFER)) != -1) {  
                    os.write(data, 0, count);  
                }
            } finally{
                data = null;
                close(gis);
    //            gis.close();  
            }
        } 
      
        /**
         * gizp解压
         * @param binary 
         * @param res
         * @param entity
         * @return
         * @throws Exception
         *
         */
        public static byte[] decode(byte[] binary, final HttpEntity entity) throws Exception {
            if (entity != null && entity.getContentLength() != 0) {
                final Header ceheader = entity.getContentEncoding();
                if (ceheader != null) {
                    final HeaderElement[] codecs = ceheader.getElements();
                    for (final HeaderElement codec : codecs) {
                        final String codecname = codec.getName().toLowerCase(Locale.US);
                        if ("gzip".equals(codecname) || "x-gzip".equals(codecname)) {
                             return decompress(binary);
                        } else if ("deflate".equals(codecname)) {
                            return binary;
                        } else if ("identity".equals(codecname)) {
    
                            /* Don't need to transform the content - no-op */
                            return binary;
                        } else {
                            throw new Exception("Unsupported Content-Coding: "+codecname );
                        }
                    }
                }
            }
            return binary;
        }
        /**  
         * 将HttpEntity转换成byte数组  
         * @param entity HttpEntity  
         * @return byte[]  
         * @throws IOException  
       * @author EntityUtils.toByteArray(entity)
         */  
        public static byte[] HttpEntityTOByte(HttpEntity entity) throws IOException{  
            final InputStream instream = entity.getContent();
            if (instream == null) {
                return null;
            }
            try {
                Args.check(entity.getContentLength() <= Integer.MAX_VALUE,
                        "HTTP entity too large to be buffered in memory");
                int i = (int)entity.getContentLength();
                if (i < 0) {
                    i = 4096;
                }
                final ByteArrayBuffer buffer = new ByteArrayBuffer(i);
                final byte[] tmp = new byte[4096];
                int l;
                while((l = instream.read(tmp)) != -1) {
                    buffer.append(tmp, 0, l);
                }
                return buffer.toByteArray();
            } finally {
                instream.close();
            }
        } 
    }
  • 相关阅读:
    CSS定位属性position相关介绍
    JavaScript 预解析机制
    使用 python 实现π的计算
    turtle库的学习
    关于使用MVVM模式在WPF的DataGrid控件中实现ComboBox编辑列
    关于wpf窗体中Allowtransparent和WindowsFormsHost的纠纷
    关于WPF中ItemsControl系列控件中Item不能继承父级的DataContext的解决办法
    Python简史
    敏捷开发原则
    线程池
  • 原文地址:https://www.cnblogs.com/wangly/p/5443565.html
Copyright © 2020-2023  润新知