最近项目中用到需要给出每一个字在string中的索引,但是又因为中文字符跟英文字符长度不一样,得分开处理,
在这里记录一下。
想要达到的效果如下:
将 “测试3.1415engEng”分割开
代码:
std::vector <std::string> splitEachChar(const string chars) { std::vector<std::string> words; std::string input(chars); int len = input.length(); int i = 0; while (i < len) { assert ((input[i] & 0xF8) <= 0xF0); int next = 1; if ((input[i] & 0x80) == 0x00) { std::cout << "one character: " << input[i] << std::endl; } else if ((input[i] & 0xE0) == 0xC0) { next = 2; std::cout << "two character: " << input.substr(i, next) << std::endl; } else if ((input[i] & 0xF0) == 0xE0) { next = 3; std::cout << "three character: " << input.substr(i, next) << std::endl; } else if ((input[i] & 0xF8) == 0xF0) { next = 4; std::cout << "four character: " << input.substr(i, next) << std::endl; } words.push_back(input.substr(i, next)); i += next; } return words; } void testtemp() { string input; while (1) { getline(cin,input); if(input == "exit") break; cout<<"--------------------------------"<<endl; vector <std::string> ret = splitEachChar(input); cout<<input<<endl; for(auto it : ret)cout<<it<<endl; cout<<"--------------------------------"<<endl; } } int main() { testtemp(); return 0; }
参考: