字符串去重与排序

题意：给出一篇英文文章，统计文章中出现的单词并按字典序排序，同时去掉重复的单词；
解题思路：
1、通过strtok函数对文章进行单词的截取，第一个字符如果是大写要转化成小写；
2、通过向set容器中插入单词同时进行排序；
3、输出set容器中的单词；
相应代码如下：

#include<iostream>
#include<string>
#include<stdio.h>
#include<string.h>
#include<algorithm>
#include<set>
using namespace std ;
 
int main()  {
  //  freopen("in.txt","r",stdin) ;
  //  freopen("out.txt","w",stdout) ;
    char s[1000] ;
    string word[1000] ;
    set<string> se ;
    while(gets(s))  {
        char *str ;
        int i = 0 ;
        for(str = strtok(s," ,".:") ; str ; str = strtok(NULL," ,".:")) {
            if(str[0]>='A'&&str[0]<='Z')
                str[0] = str[0] + 32 ;
             se.insert(str);
        }
    }
    set<string>::iterator iter = se.begin() ;
    for( ; iter != se.end() ; iter++)
            cout << *iter << endl ;
        getchar() ;
    return 0 ;
}

下面观摩一下网上大神的代码，受益匪浅：

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#include<algorithm>
#include<iostream>
#include<iterator>
#include<cassert>
#include<sstream>
#include<fstream>
#include<cstdlib>
#include<cstring>
#include<utility>
#include<complex>
#include<string>
#include<cctype>
#include<cstdio>
#include<vector>
#include<bitset>
#include<stack>
#include<queue>
#include<cmath>
#include<deque>
#include<list>
#include<set>
#include<map>
 
#define ll long long
#define sc scanf
#define pf printf
#define pi 2*acos(0.0)
 
#define ft first
#define se second
#define r(input) freopen("input.txt","r",stdin)
#define w(output) freopen("output.txt","w",stdout)
#define maxall(v) *max_element(v.begin(),v.end())
#define minall(v) *min_element(v.begin(),v.end())
#define Sort(v) sort(v.begin(),v.end())
#define un(v) Sort(v), v.erase(unique(v.begin(),v.end()),v.end())
#define cover(a,d) memset(a,d,sizeof(a))
 
using namespace std;
int main()
{
    set<string>res;
    string s;
    while(cin>>s)
    {
        string p="";
        int l=s.size();
        for(int i=0;i<=l;i++){
           if(isalpha(s[i])){
            p+=tolower(s[i]);
           }
           else if(p!=""){
           res.insert(p);
           p="";
           }
        }
    }
    for (std::set<string>::iterator it=res.begin(); it!=res.end();it++)
    cout<<*it<<endl;
 
    return 0;
}

还有一种不要set集合，转用sort排序方法：

#include<stdio.h>
    #include<string.h>
    #include<algorithm>
    using namespace std;
 
    #define N 5005
    #define M 201
 
    struct say{
        char str[M];
    };
 
    say tem[N * M];
 
    char strl(char ch)
    {
        if (ch >= 'A' && ch <= 'Z')
            return ch + 32;
        else
            return ch;
    }
 
    int cmp(const say &a, const say &b)             //排序标准
    {
        return strcmp(a.str, b.str) < 0;
    }
 
    int main()
    {
       // freopen("in.txt","r",stdin) ;
      //  freopen("out.txt","w",stdout) ;
        int cnt = 0;
        memset(tem, 0, sizeof(tem));
        char ch;
        int m = 0, bo = 0;
        while ((ch = getchar()) != EOF)             //得到一个字符
        {
            ch = strl(ch);
            if (ch >= 'a' && ch <= 'z' && bo == 0)  //如果该字符是其所在单词的第一个字母
            {
                bo = 1;                              //下一个单词将不再是该单词的第一个字母
                m = 0;                              //记录该字符
                tem[cnt].str[m++] = ch;
            }
            else if (ch >= 'a' && ch <= 'z')        //记录一个单词，除第一个字母的其他字母
                tem[cnt].str[m++] = ch;
            else                                    //如果记录到一个单词的最后一位
            {
                bo = 0;                              //下一次将是第二个单词开始的字母
                tem[cnt].str[m] = '';               //该单词后加结束标志
                cnt++;                                 //统计下一个单词的开始
            }
        }
 
        sort(tem, tem + cnt, cmp);                     //调用sort函数进行排序
 
        for (int i = 1; i <= cnt; i++)
        {
            if(strcmp(tem[i].str, tem[i - 1].str) == 0)    //去掉重复的单词
                continue;
            puts(tem[i].str);
        }
 
        return 0;
    }

相关阅读:
idea打包jar部署Linux出现乱码
 HTML元素刷新方式
 Linux中Jar启动与停止
 Win10开机自启软件设置
 java后台数据传输到前端少一天，8小时
 mysql字符串提取数组排序
 maven 配置文件
 mac docker安装jupyter notebook镜像
 pycharm使用git
github使用命令
原文地址：https://www.cnblogs.com/NYNU-ACM/p/4236884.html