搜索引擎中用的是中文和和英文作为关键字,为了达到快速搜索的目的,常常需要排序,下面就说书如何对中英文字符串进行排序。
中文字符串按首字拼音排序
import net.sourceforge.pinyin4j.PinyinHelper;
public class PinyinComparator implements Comparator {
private String concatPinyinStringArray(String[] pinyinArray) {
StringBuffer pinyinStrBuf = new StringBuffer();
if ((null != pinyinArray) && (pinyinArray.length > 0)) {
for (int i = 0; i < pinyinArray.length; i++) {
pinyinStrBuf.append(pinyinArray[i]);
}
}
String outputString = pinyinStrBuf.toString();
return outputString;
}
public int compare(Object o1, Object o2) {
char c1 = ((String) o1).charAt(0);
char c2 = ((String) o2).charAt(0);
// System.out.println("c1--------->"+c1+"----c2--------------->"+c2);
return concatPinyinStringArray(
PinyinHelper.toHanyuPinyinStringArray(c1)).compareTo(
concatPinyinStringArray(PinyinHelper
.toHanyuPinyinStringArray(c2)));
}
public static void main(String[] args) {
String[] data = { "孙为", "孟的", "宋个", "尹个", "廖好", "张大", "张就", "张你", "徐人", "昆刚",
"曹吃", "曾看", "怡非" };
List<String> list = Arrays.asList(data);
Arrays.sort(data, new PinyinComparator());
System.out.println(list);
}
}
Arrays中的sort算法
下面是java.util.Arrays中sort算法的源码,这里以对int数组排序的算法为例,分析一下jdk源码中的排序算法:
public static void sort(int[] a) {
sort1(a, 0, a.length);
}
private static void sort1(int x[], int off, int len) {
// 对于小数组进行归并排序
if (len < 7) {
for (int i=off; i<len+off; i++)
for (int j=i; j>off && x[j-1]>x[j]; j--)
swap(x, j, j-1);
return;
}
//这里的主要目的是当数组比较大是用快速排序法进行排序,为了防止快速排序的退化,取一个恰当的中位数,作为快速排序的理想中值,使快速排序效率提高。
int m = off + (len >> 1); // Small arrays, middle element
if (len > 7) {
int l = off;
int n = off + len - 1;
if (len > 40) { // Big arrays, pseudomedian of 9
int s = len/8;
l = med3(x, l, l+s, l+2*s);
m = med3(x, m-s, m, m+s);
n = med3(x, n-2*s, n-s, n);
}
m = med3(x, l, m, n); // Mid-size, med of 3
}
//取得的中值
int v = x[m];
//快速排序
int a = off, b = a, c = off + len - 1, d = c;
while(true) {
while (b <= c && x[b] <= v) {
if (x[b] == v) swap(x, a++, b);
b++;
}
while (c >= b && x[c] >= v) {
if (x[c] == v) swap(x, c, d--);
c--;
}
if (b > c) break;
swap(x, b++, c--);
}
// Swap partition elements back to middle
int s, n = off + len;
s = Math.min(a-off, b-a ); vecswap(x, off, b-s, s);
s = Math.min(d-c, n-d-1); vecswap(x, b, n-s, s);
// Recursively sort non-partition-elements
if ((s = b-a) > 1) sort1(x, off, s);
if ((s = d-c) > 1) sort1(x, n-s, s);
}
/**
* Swaps x[a] with x[b].
* 交换 x[a] 和x[b]
*/
private static void swap(int x[], int a, int b) {
int t = x[a];
x[a] = x[b];
x[b] = t;
}
/**
* Swaps x[a .. (a+n-1)] with x[b .. (b+n-1)].
* 交换 x[a .. (a+n-10] 和 x[b .. (b+n-1)]
*/
private static void vecswap(int x[], int a, int b, int n) {
for (int i=0; i<n; i++, a++, b++) swap(x, a, b);
}
/**
* Returns the index of the median of the three indexed integers.
* 返回三个int类型的中值
*/
private static int med3(int x[], int a, int b, int c) {
return (x[a] < x[b] ?(x[b] < x[c] ? b : x[a] < x[c] ? c : a) : (x[b] > x[c] ? b : x[a] > x[c] ? c : a));
}
--------------------------------------------------------------------
PS: 欢迎关注公众号"Devin说",会不定期更新Java相关技术知识。
--------------------------------------------------------------------