定义
package java.lang;
public final class String implements java.io.Serializable, Comparable<String>, CharSequence {
// ...
}
- final 类,不能被继承
- 实现 Serializable 接口,可序列化
- 实现 Comparable 接口,可比较大小
- 实现 CharSequence 接口,StringBuffer和Stringbuilder同样实现该接口
属性
@Stable // never null
private final byte[] value; // JDK 11 内部用byte数组储存值
private final byte coder; // 编码 LATIN1 或 UTF16
static final boolean COMPACT_STRINGS; // 字符串压缩
static {
COMPACT_STRINGS = true;
}
@Native static final byte LATIN1 = 0;
@Native static final byte UTF16 = 1;
private int hash; // 将hashcode缓存起来
public static final Comparator<String> CASE_INSENSITIVE_ORDER = new CaseInsensitiveComparator(); // 内部类
// ...
- hash:缓存hashcode,String经常被比较,将hashcode缓存,提高效率。
- value:JDK 8及以前,value用char数组存储,然而很多时候,字符只需要1个字节来表示。因此从JDK 9以后,value使用byte数组存储,并添加了coder,COMPACT_STRINGS字段,帮助压缩字符串存储空间。
- coder:LATIN1表示1个字符占用1个byte;UTF16表示1个字符占用2个byte。
- COMPACT_STRINGS:默认值为true。当值为false时,字符串必然以UTF16的形式存储。
因此,当COMPACT_STRINGS=true并且每个字符都可用1个字节表示时,coder=LATIN1;否则coder=UTF16
内部类
private static class CaseInsensitiveComparator implements Comparator<String>, java.io.Serializable {
// use serialVersionUID from JDK 1.2.2 for interoperability
private static final long serialVersionUID = 8575799808933029326L;
public int compare(String s1, String s2) {
byte v1[] = s1.value;
byte v2[] = s2.value;
if (s1.coder() == s2.coder()) {
return s1.isLatin1() ? StringLatin1.compareToCI(v1, v2)
: StringUTF16.compareToCI(v1, v2);
}
return s1.isLatin1() ? StringLatin1.compareToCI_UTF16(v1, v2)
: StringUTF16.compareToCI_Latin1(v1, v2);
}
/** Replaces the de-serialized object. */
private Object readResolve() { return CASE_INSENSITIVE_ORDER; }
}
实现忽略大小写的字符串比较。
compareToIgnoreCase方法利用该内部类的方法实现。
构造方法
- null
public String() {
this.value = "".value;
this.coder = "".coder;
}
- char[]
public String(char value[]) {
this(value, 0, value.length, null);
}
public String(char value[], int offset, int count) {
this(value, offset, count, rangeCheck(value, offset, count));
}
private static Void rangeCheck(char[] value, int offset, int count) {
checkBoundsOffCount(offset, count, value.length); // 静态方法 如果数组越界会抛出StringIndexOutOfBoundsException
return null;
}
String(char[] value, int off, int len, Void sig) { // sig与public方法区别开
if (len == 0) {
this.value = "".value;
this.coder = "".coder;
return;
}
if (COMPACT_STRINGS) {
byte[] val = StringUTF16.compress(value, off, len);
if (val != null) {
this.value = val;
this.coder = LATIN1;
return;
}
}
this.coder = UTF16;
this.value = StringUTF16.toBytes(value, off, len);
}
// StringUTF16.compress
public static byte[] compress(char[] val, int off, int len) {
byte[] ret = new byte[len];
if (compress(val, off, ret, 0, len) == len) { // 压缩失败会返回0
return ret; // LATIN1 编码
}
return null;
}
// StringUTF16.compress
// compressedCopy char[] -> byte[]
@HotSpotIntrinsicCandidate
public static int compress(char[] src, int srcOff, byte[] dst, int dstOff, int len) {
for (int i = 0; i < len; i++) {
char c = src[srcOff];
if (c > 0xFF) {
len = 0;
break;
}
dst[dstOff] = (byte)c; // char截断为byte
srcOff++;
dstOff++;
}
return len;
}
- byte[]
// 与char[]类似 多了字符集的解码
// 参数可以是String类型的charsetName 也可以是CharSet类型
public String(byte bytes[], int offset, int length, String charsetName)
throws UnsupportedEncodingException {
if (charsetName == null)
throw new NullPointerException("charsetName");
checkBoundsOffCount(offset, length, bytes.length);
StringCoding.Result ret = StringCoding.decode(charsetName, bytes, offset, length);
this.value = ret.value;
this.coder = ret.coder;
}
- StringBuffer/StringBuilder
// 内容复制 StringBuffer/StringBuilder修改不影响String
public String(StringBuffer buffer) {
this(buffer.toString());
}
public String(StringBuilder builder) {
this(builder, null);
}
方法
- length:返回长度
// UTF16编码的需要将长度/2
public int length() {
return value.length >> coder();
}
byte coder() {
// UTF16 = 1; LATIN1 = 0
return COMPACT_STRINGS ? coder : UTF16;
}
- isEmpty:长度是否为0
- charAt:某位置上的字符
- getChars:获取char数组
- getBytes:获取byte数组
- equals:字符串相等
public boolean equals(Object anObject) {
if (this == anObject) {
return true;
}
if (anObject instanceof String) {
String aString = (String)anObject;
// 编码不同的字符串不可能相同 因为内容相同的字符串总是以相同编码存储
if (coder() == aString.coder()) {
return isLatin1() ? StringLatin1.equals(value, aString.value)
: StringUTF16.equals(value, aString.value);
}
}
return false;
}
- contentEquals:内容相同
public boolean contentEquals(CharSequence cs) {
// Argument is a StringBuffer, StringBuilder
if (cs instanceof AbstractStringBuilder) {
if (cs instanceof StringBuffer) {
// 因为 StringBuffer 线程安全 所以加上synchronized
synchronized(cs) {
return nonSyncContentEquals((AbstractStringBuilder)cs);
}
} else {
return nonSyncContentEquals((AbstractStringBuilder)cs);
}
}
// Argument is a String
if (cs instanceof String) {
return equals(cs);
}
// Argument is a generic CharSequence
int n = cs.length();
if (n != length()) {
return false;
}
byte[] val = this.value;
if (isLatin1()) {
for (int i = 0; i < n; i++) {
if ((val[i] & 0xff) != cs.charAt(i)) {
return false;
}
}
} else {
if (!StringUTF16.contentEquals(val, cs, n)) {
return false;
}
}
return true;
}
- equalsIgnoreCase:忽略大小写字符串相同
- compareTo:字符串比较,按字典序
- compareToIgnoreCase:忽略大小写的字符串比较
- regionMatches:字符串范围内相等
- startsWith:是否以字符串开头
- endsWith:是否以字符串结尾
public boolean endsWith(String suffix) {
return startsWith(suffix, length() - suffix.length());
}
- indexOf:返回第一次出现的下标,未出现返回-1
// StringLatin1.indexOf
public static int indexOf(byte[] value, int valueCount, byte[] str, int strCount, int fromIndex) {
byte first = str[0];
int max = (valueCount - strCount);
for (int i = fromIndex; i <= max; i++) {
// Look for first character.
if (value[i] != first) {
while (++i <= max && value[i] != first);
}
// Found first character, now look at the rest of value
if (i <= max) {
int j = i + 1;
int end = j + strCount - 1;
for (int k = 1; j < end && value[j] == str[k]; j++, k++);
if (j == end) {
// Found whole string.
return i;
}
}
}
return -1;
}
- lastIndexOf:返回最后一次出现的下标,未出现返回-1
- substring:子串
- subSequence:子CharSequence
public CharSequence subSequence(int beginIndex, int endIndex) {
return this.substring(beginIndex, endIndex);
}
- concat:字符串拼接
public String concat(String str) {
if (str.isEmpty()) {
return this;
}
// 编码相同 数组连接起来构造新String
if (coder() == str.coder()) {
byte[] val = this.value;
byte[] oval = str.value;
int len = val.length + oval.length;
byte[] buf = Arrays.copyOf(val, len);
System.arraycopy(oval, 0, buf, val.length, oval.length);
return new String(buf, coder);
}
// 编码不同 统一转成UTF16
int len = length();
int olen = str.length();
byte[] buf = StringUTF16.newBytesFor(len + olen);
getBytes(buf, 0, UTF16);
str.getBytes(buf, len, UTF16);
return new String(buf, UTF16);
}
- replace:字符(串)替换,替换所有出现
- matches:正则匹配
- contains:包含
public boolean contains(CharSequence s) {
return indexOf(s.toString()) >= 0;
}
- replaceFirst:字符串替换,替换第一次出现
- replaceAll:字符串正则替换
- split:字符串分割,可添加限制数量
- join:静态方法,将元素用delimiter连接起来,元素可以是CharSequence,或是迭代器中的元素
public static String join(CharSequence, CharSequence...);
public static String join(CharSequence, Iterable<? extends CharSequence>)
System.out.println(String.join(",", "ab", "c"));
System.out.println(String.join(",", Arrays.asList("ab", "c")));
// output:ab,c
- toLowerCase:转为小写
- toUpperCase:转为大写
- trim:去掉开头结尾的所有空白字符(无法删掉unicode空白字符)
public static String trim(byte[] value) {
int len = value.length;
int st = 0;
while ((st < len) && ((value[st] & 0xff) <= ' ')) {
st++;
}
while ((st < len) && ((value[len - 1] & 0xff) <= ' ')) {
len--;
}
return ((st > 0) || (len < value.length)) ?
newString(value, st, len - st) : null;
}
- strip:去掉开头结尾的所有空白字符
- stripLeading:去掉开头空白
- stripTrailing:去掉结尾空白
- isBlank:是否只含有空白字符
- lines:返回Stream
System.out.println("1
2
3
".lines().count());
// output:3
- toCharArray:返回char数组
- format:静态方法,字符串格式化
- valueOf:静态方法,转化为字符串
public static String valueOf(Object obj) {
return (obj == null) ? "null" : obj.toString();
}
- copyValueOf:静态方法,将char[]复制为字符串
- intern:JDK7之后,可理解为:将首次遇到的字符串加载到常量池中,并返回常量池中的引用
- 常量池中有该字符串的引用,则返回常量池中的引用
- 常量池中没有字符串的引用,则将字符串加载到常量池中,并返回该字符串对象的引用
- repeat:重复字符串
public String repeat(int count) {
if (count < 0) {
throw new IllegalArgumentException("count is negative: " + count);
}
if (count == 1) {
return this;
}
final int len = value.length;
if (len == 0 || count == 0) {
return "";
}
if (len == 1) {
final byte[] single = new byte[count];
Arrays.fill(single, value[0]);
return new String(single, coder);
}
// 长度超出Integer.MAX_VALUE 会抛出异常
if (Integer.MAX_VALUE / count < len) {
throw new OutOfMemoryError("Repeating " + len + " bytes String " + count +
" times will produce a String exceeding maximum size.");
}
final int limit = len * count;
final byte[] multiple = new byte[limit];
System.arraycopy(value, 0, multiple, 0, len);
int copied = len;
for (; copied < limit - copied; copied <<= 1) {
System.arraycopy(multiple, 0, multiple, copied, copied);
}
System.arraycopy(multiple, 0, multiple, copied, limit - copied);
return new String(multiple, coder);
}
注意事项
1. 内存分配
- String s = "abc";
- 当常量池中不存在"abc"这个字符串的引用,在堆内存中new一个新的String对象,将这个对象的引用加入常量池。
- 当常量池中存在"abc"这个字符串的引用,s指向这个引用;
- String s = new String("abc"):在堆上new一个对象
- String s = a + b:在堆上new一个对象
- String s = "a" + "b":相当于Stirng s = "ab"
2. equals
推荐"常量字符串".equals(str)
而不是str.equals("常量字符串")
。
若str为null,则后者会报异常,而前者是安全的。