• 通过shell脚本批处理es数据


      

    #!/bin/sh
    【按照指定的域名-website集合,遍历各个域名,处理url】
    #指定待删除的变量集合
    arr=(6.0)
    cur="`date +%Y%m%d%H%M%S`"
    res_file=${BASH_SOURCE}.${cur}.json.txt
    log_file=${BASH_SOURCE}.${cur}.log
    es_str=''
    for v in ${arr[@]}
     do
     es_str='curl testIP:9200/my_index/my_doc//_search?pretty=true  -d "{"_source": false,"query": {"match": {"website": "'$v'"}},"from": 1,"size": 9999}"'
     echo $es_str
     eval  $es_str >> $res_file
    done
    
    
    str_head='{"delete":{"_id":"'
    str_foot='"}}'
    
    split_file_dir='/data/xiaole_chk_url/domain_iask/'
    bulk_file=${split_file_dir}${BASH_SOURCE}.${cur}.json
    #创建文件
    echo '' > $bulk_file
    
    
    
    
    #单引号字符串的限制:
    #单引号里的任何字符都会原样输出,单引号字符串中的变量是无效的;
    #单引号字串中不能出现单引号(对单引号使用转义符后也不行)。
    #双引号
    #your_name='qinjx'
    #str="Hello, I know your are "$your_name"! 
    "
    #双引号的优点:
    #双引号里可以有变量
    #双引号里可以出现转义字符
    
    
    str_tag='"_id" : "'
    
    #读取文件,生成批处理文件
    while read line
    do
        echo $line
       # if [ $a = $line ]
       # if test $a -eq $line
        if [[ $line == *$str_tag* ]]
        then
            #查找目标字符串
            #${string#substring}从变量$string的开头, 删除最短匹配$substring的子串
    
            a=${line#'"_id" : "'}
            #${string//substring/replacement}
            #Shell字符串比较相等、不相等方法小结 - CSDN博客 https://blog.csdn.net/mr_leehy/article/details/76383091
            #shell中if做比较 - 生活费 - 博客园 http://www.cnblogs.com/276815076/archive/2011/10/30/2229286.html
            b=${a//'",'/''}
            echo $b
            echo ${str_head}${b}${str_foot} >> $bulk_file
        else
            echo bbb
        fi
        unset a
        unset b
    done<$res_file
    
    #{"delete":{"_id":website.com.cn/b/tpoNpaBlFx.html"}}
    #{"delete":{"_id":website.com.cn/b/4W0xcTKZib.html"}}
    #{"delete":{"_id":website.com.cn/b/5dptLwDEaD.html"}}
    #{"delete":{"_id":website.com.cn/b/4OdzPUwb6X.html"}}
    #{"delete":{"_id":website.com.cn/b/2baCMVRsAH.html"}}
    #{"delete":{"_id":website.com.cn/b/2Nb6PnEt0T.html"}}
    #{"delete":{"_id":website.com.cn/b/3GbeNhQvyP.html"}}
    #{"delete":{"_id":website.com.cn/b/3z2wWJWhIf.html"}}
    #{"delete":{"_id":website.com.cn/b/1id9c9K1MT.html"}}
    #{"delete":{"_id":website.com.cn/b/2UYjsh1fcf.html"}}
    #{"delete":{"_id":website.com.cn/b/66PtNs1vbt.html"}}
    
    #执行批处理文件es删除操作
    
    
    curl -XPOST testIP:9200/my_index/my_doc//_bulk --data-binary @$bulk_file >> $log_file
    
    
    exit 0
    
    
    #检查结果
    
    【返回指定id的查询结果】
    #curl 'testIP:9200/my_index/my_doc//_search?pretty=true'  -d '
    #{
    #"query" : {
    #"bool" : {
    #"should" : [
    #{ "match" : { "_id": "website.com.cn/b/KV4Lw3dAw1.html" } },
    #{ "match" : { "_id": "website.com.cn/b/KI9t2kvSlT.html" } },
    #{ "match" : { "_id": "website.com.cn/b/4Hdkz68Vox.html" } },
    #{ "match" : { "_id": "bbs.py168.com/xinxi/25975882.html" } }
    #            ]
    #           }
    #}
    #}'
    
    【检查日志】
    
    #{"took":1980,"errors":false,"items":[{"delete":{"_index":"my_index","_type":"my_doc","_id":"website.com.cn/b/KzVLkQWh9b.html","_version":2,"_shards":{"total":2,"successful":2,"failed":0},"status":200,"found":true}},{"delete":{"_index":"my_index","_type":"my_doc","_id":"website.com.cn/b/Lc2SQpxEPP.html","_version":2,"_shards":{"total":2,"successful":2,"failed":0},"status":200,"found":true}},{"delete":{"_index":"my_index","_type":"my_doc","_id"
    
    
    
    
    
    
    
    #!/bin/sh
    
    【
    #穷举站点值,得到满足一定条件的rul
    #可以在穷举循环中,再加一层穷举,得到满足一定条件的url全集
    】
    
    loop_step=1
    loop_stop=5000
    loop_period_start=0
    loop_period_end=0
    cur="`date +%Y%m%d%H%M%S`"
    res_file=${BASH_SOURCE}.$cur.json.txt
    log_file=${BASH_SOURCE}.$cur.log
    es_str=''
    for((i=0;i<$loop_stop;i++))
    do
     loop_period_start=$((i*loop_step))
     loop_period_end=$((loop_period_start+loop_step))
     echo $loop_period_start
     echo $loop_period_end
     echo  $i
     #查找域名 size为1;查找域名的子模式,比如是否存在/m/疑似手机站,则设为系统最大值9999
    
     es_str='curl testIP:9200/my_index/my_doc//_search?pretty=true  -d "{"query": {"match": {"website": "'${loop_period_start}'"}},"_source":true,"from":1,"size":9999}"'
    
     echo $es_str
     #将执行结果写入结果文件
     eval  $es_str >> $res_file
    done
    
    
    str_head='{"delete":{"_id":"'
    str_foot='"}}'
    
    split_file_dir='/data/xiaole_chk_url/url_mobile/'
    bulk_file=${split_file_dir}${BASH_SOURCE}.${cur}.json
    
    str_tag='"_id" : "'
    str_tag_mobile='/m/'
    
    #读取文件,生成批处理文件
    while read line
    do
        if [[ $line == *$str_tag* ]]
        then
            if [[ $line == *$str_tag_mobile* ]]
            then
                #查找目标字符串
                a=${line#'"_id" : "'}
                b=${a//'",'/''}
                echo $line
                echo ${str_head}${b}${str_foot} >> $bulk_file
             else
                 echo 'filter_1'
            fi
        else
            echo 'filter_o'
        fi
    done<$res_file
    
    #curl -XPOST testIP:9200/my_index/my_doc//_bulk --data-binary @$bulk_file >> $log_file
    
    
    exit 0
    

      

    shell 文件读取 if else 分支  字符串查找 模糊匹配  字符串截取 

    es 批处理 批删除

  • 相关阅读:
    pycharm下同存Python2和Python3的运行方式问题
    Python3安装requests库
    jmeter实现接口传参为一个文件的测试场景
    解决jmeter添加HTTP Cookie Manager后,还是cookie错误的问题
    jmeter添加webSocket请求
    sublime集成Python环境
    jmeter实现排除登录对单个事务的压测
    JavaScript语法
    JavaScript数据
    javaScript基础
  • 原文地址:https://www.cnblogs.com/rsapaper/p/9051092.html
Copyright © 2020-2023  润新知