方式一:
1 import java.net.MalformedURLException;
2 import java.net.URL;
3 import java.util.Arrays;
4 import java.util.HashSet;
5 import java.util.Set;
6 import java.util.regex.Pattern;
7
8 public class URLUtil {
9
10 private final static Set<String> PublicSuffixSet = new HashSet<String>(
11 Arrays.asList(new String(
12 "com|org|net|gov|edu|co|tv|mobi|info|asia|xxx|onion|cn|com.cn|edu.cn|gov.cn|net.cn|org.cn|jp|kr|tw|com.hk|hk|com.hk|org.hk|se|com.se|org.se")
13 .split("\|")));
14
15 private static Pattern IP_PATTERN = Pattern.compile("(\d{1,3}\.){3}(\d{1,3})");
16
17 /**
18 * 获取url的顶级域名
19 * @param url
20 * @return
21 */
22 public static String getDomainName(URL url) {
23 String host = url.getHost();
24 if (host.endsWith(".")){
25 host = host.substring(0, host.length() - 1);
26 }
27 if (IP_PATTERN.matcher(host).matches()){
28 return host;
29 }
30
31 int index = 0;
32 String candidate = host;
33 for (; index >= 0;) {
34 index = candidate.indexOf('.');
35 String subCandidate = candidate.substring(index + 1);
36 if (PublicSuffixSet.contains(subCandidate)) {
37 return candidate;
38 }
39 candidate = subCandidate;
40 }
41 return candidate;
42 }
43
44 /**
45 * 获取url的顶级域名
46 * @param url
47 * @return
48 * @throws MalformedURLException
49 */
50 public static String getDomainName(String url) throws MalformedURLException {
51 return getDomainName(new URL(url));
52 }
53
54 /**
55 * 判断两个url顶级域名是否相等
56 * @param url1
57 * @param url2
58 * @return
59 */
60 public static boolean isSameDomainName(URL url1, URL url2) {
61 return getDomainName(url1).equalsIgnoreCase(getDomainName(url2));
62 }
63
64 /**
65 * 判断两个url顶级域名是否相等
66 * @param url1
67 * @param url2
68 * @return
69 * @throws MalformedURLException
70 */
71 public static boolean isSameDomainName(String url1, String url2)
72 throws MalformedURLException {
73 return isSameDomainName(new URL(url1), new URL(url2));
74 }
75
76 public static void main(String[] args) throws Exception {
77 String urlStr = "http://news.hexun.com/2017-09-23/190978248.html";
78 getDomainName(urlStr);
79 getDomainName(new URL(urlStr));
80 }
81
82 }
方式二:
1 import java.net.MalformedURLException;
2 import java.net.URL;
3 import java.util.regex.Matcher;
4 import java.util.regex.Pattern;
5
6 public class DomainUtils {
7 /**
8 * 获取url的顶级域名
9 * @param
10 * @return
11 */
12 public static String getTopDomain(String url){
13 try{
14 //获取值转换为小写
15 String host = new URL(url).getHost().toLowerCase();//news.hexun.com
16 Pattern pattern = Pattern.compile("[^\.]+(\.com\.cn|\.net\.cn|\.org\.cn|\.gov\.cn|\.com|\.net|\.cn|\.org|\.cc|\.me|\.tel|\.mobi|\.asia|\.biz|\.info|\.name|\.tv|\.hk|\.公司|\.中国|\.网络)");
17 Matcher matcher = pattern.matcher(host);
18 while(matcher.find()){
19 return matcher.group();
20 }
21 }catch(MalformedURLException e){
22 e.printStackTrace();
23 }
24 return null;
25 }
26 public static void main(String[] args) {
27 System.out.println(getTopDomain("http://news.hexun.com/2017-09-23/190978248.html"));//hexun.com
28
29 }
30 }