• Net6 对接 Skywalking


    version: '3.3'
    services:
      elasticsearch:
        image: docker.elastic.co/elasticsearch/elasticsearch:7.5.0
        container_name: elasticsearch
        restart: always
        ports:
          - 9200:9200
        environment:
          - discovery.type=single-node
          - bootstrap.memory_lock=true
          - "ES_JAVA_OPTS=-Xms256m -Xmx256m"
        ulimits:
          memlock:
            soft: -1
            hard: -1
      oap:
        image: apache/skywalking-oap-server:8.8.0
        container_name: oap
        depends_on:
          - elasticsearch
        links:
          - elasticsearch
        restart: always
        ports:
          - 11800:11800
          - 12800:12800
        environment:
          SW_STORAGE: elasticsearch
          SW_STORAGE_ES_CLUSTER_NODES: elasticsearch:9200
      ui:
        image: apache/skywalking-ui
        container_name: ui
        depends_on:
          - oap
        links:
          - oap
        restart: always
        ports:
          - 8080:8080
        environment:
          SW_OAP_ADDRESS: http://oap:12800

    2添加依赖

    nuget 包  kyAPM.Agent.AspNetCore

    3编辑Skywalking配置文件skyapm.json

    {
      "SkyWalking": {
        "ServiceName": "Cnsns.SiteStarV6",
        "Namespace": "",
        "HeaderVersions": [
          "sw8"
        ],
        "Sampling": {
          "SamplePer3Secs": -1,
          "Percentage": -1.0
        },
        "Logging": {
          "Level": "Information",
          "FilePath": "logs\\skyapm-{Date}.log"
        },
        "Transport": {
          "Interval": 3000,
          "ProtocolVersion": "v8",
          "QueueSize": 30000,
          "BatchSize": 3000,
          "gRPC": {
            "Servers": "111.111.13.11:11800",
            "Timeout": 10000,
            "ConnectTimeout": 10000,
            "ReportTimeout": 600000,
            "Authentication": ""
          }
        }
      }
    }
    配置文件生成如下
    1
    、安装CLI(SkyAPM.DotNet.CLI) dotnet tool install -g SkyAPM.DotNet.CLI 2、自动生成skyapm.json文件 dotnet skyapm config [service name] [server]:11800 eg: dotnet skyapm config MySkyWalking_OrderService 111.111.13.11:11800 server name指的就是您刚才配置的SKYWALKING__SERVICENAME,server指的是您Skywalking的ip地址。

    4 在launchSettings.json文件配置SK

    "profiles": { // 项目
        "IIS Express": { // IIS部署项
          "commandName": "IISExpress",
          "launchBrowser": true,
          "launchUrl": "weatherforecast",
          "environmentVariables": {
            "ASPNETCORE_ENVIRONMENT": "Development",
            "ASPNETCORE_HOSTINGSTARTUPASSEMBLIES": "SkyAPM.Agent.AspNetCore",
            "SKYWALKING__SERVICENAME": "MySkyWalkingDemoTest"
          }
        },
        "SkyWalkingDemo": { // castrol部署项
          "commandName": "Project",
          "launchBrowser": true,
          "launchUrl": "weatherforecast",
          "applicationUrl": "http://localhost:5000",
          "environmentVariables": {
            "ASPNETCORE_ENVIRONMENT": "Development",
            "ASPNETCORE_HOSTINGSTARTUPASSEMBLIES": "SkyAPM.Agent.AspNetCore", // 必须配置
            "SKYWALKING__SERVICENAME": "MySkyWalkingDemoTest" // 必须配置,在skywalking做标识
          }
        }
      }

     5 在Program.cs  注册 

    #region 注册Skywalking
    builder.Services.AddSkyApmExtensions(); // 添加Skywalking相关配置
    #endregion 

     6 调用获取链路追踪的Id

    private readonly IEntrySegmentContextAccessor segContext;
    
    public SkywalkingController(IEntrySegmentContextAccessor segContext)
    {
        this.segContext = segContext;
    }
    
    /// <summary>
    /// 获取链接追踪ID
    /// </summary>
    /// <returns></returns>
    public IActionResult GetSkywalkingTraceId()
    {
    return Content(_segContextAccessor.Context.TraceId.ToString()); }

     7自定义链路追踪

    [HttpGet]
    public async Task<IActionResult> SkywalkingTest()
    {
        //获取全局的skywalking的TracId
        var TraceId = _segContext.Context.TraceId;
        Console.WriteLine($"TraceId={TraceId}");
        _segContext.Context.Span.AddLog(LogEvent.Message($"SkywalkingTest---Worker running at: {DateTime.Now}"));
    
        System.Threading.Thread.Sleep(1000);
    
        _segContext.Context.Span.AddLog(LogEvent.Message($"SkywalkingTest---Worker running at--end: {DateTime.Now}"));
    
        return Ok($"Ok,SkywalkingTest-TraceId={TraceId} ");
    }

    网关和服务之间调用,同上配置。

    有时候我们需要发通知(比如那个服务实例出问题了)

    8  配置告警规则

    进入容器
    docker exec -it 12f053748e85 /bin/sh

    通过cat alarm-settings.yml可以查阅文件内容,如下:
    
    docker cp 12f053748e85:/skywalking/config/alarm-settings.yml .
    # Sample alarm rules.
    rules:
      # Rule unique name, must be ended with `_rule`.
      service_resp_time_rule:
        metrics-name: service_resp_time
        op: ">"
        threshold: 1000
        period: 10
        count: 3
        silence-period: 5
        message: Response time of service {name} is more than 1000ms in 3 minutes of last 10 minutes.
      service_sla_rule:
        # Metrics value need to be long, double or int
        metrics-name: service_sla
        op: "<"
        threshold: 8000
        # The length of time to evaluate the metrics
        period: 10
        # How many times after the metrics match the condition, will trigger alarm
        count: 2
        # How many times of checks, the alarm keeps silence after alarm triggered, default as same as period.
        silence-period: 3
        message: Successful rate of service {name} is lower than 80% in 2 minutes of last 10 minutes
      service_resp_time_percentile_rule:
        # Metrics value need to be long, double or int
        metrics-name: service_percentile
        op: ">"
        threshold: 1000,1000,1000,1000,1000
        period: 10
        count: 3
        silence-period: 5
        message: Percentile response time of service {name} alarm in 3 minutes of last 10 minutes, due to more than one condition of p50 > 1000, p75 > 1000, p90 > 1000, p95 > 1000, p99 > 1000
      service_instance_resp_time_rule:
        metrics-name: service_instance_resp_time
        op: ">"
        threshold: 1000
        period: 10
        count: 2
        silence-period: 5
        message: Response time of service instance {name} is more than 1000ms in 2 minutes of last 10 minutes
      database_access_resp_time_rule:
        metrics-name: database_access_resp_time
        threshold: 1000
        op: ">"
        period: 10
        count: 2
        message: Response time of database access {name} is more than 1000ms in 2 minutes of last 10 minutes
      endpoint_relation_resp_time_rule:
        metrics-name: endpoint_relation_resp_time
        threshold: 1000
        op: ">"
        period: 10
        count: 2
        message: Response time of endpoint relation {name} is more than 1000ms in 2 minutes of last 10 minutes
    #  Active endpoint related metrics alarm will cost more memory than service and service instance metrics alarm.
    #  Because the number of endpoint is much more than service and instance.
    #
    #  endpoint_avg_rule:
    #    metrics-name: endpoint_avg
    #    op: ">"
    #    threshold: 1000
    #    period: 10
    #    count: 2
    #    silence-period: 5
    #    message: Response time of endpoint {name} is more than 1000ms in 2 minutes of last 10 minutes
    
    webhooks:
    #  - http://127.0.0.1/notify/
    #  - http://127.0.0.1/go-wechat/

    规则常用指标解读
    rule name: 规则名称,必须唯一,必须以 **_rule**结尾;
    metrics name: oal(Observability Analysis Language)脚本中的度量名;名称在SkyWalking后端服务中已经定义,进入容器skywalking-oap之后,进入如下目录就可以找到。

    
    

    include names: 本规则告警生效的实体名称,如服务名,终端名;
    exclude-names:将此规则作用于不匹配的实体名称上,如服务名,终端名;
    threshold: 阈值,可以是一个数组,即可以配置多个值;
    op: 操作符, 可以设定 >, <, =;
    period: 多久检查一次当前的指标数据是否符合告警规则;以分钟为单位
    count: 超过阈值条件,达到**count**次数,触发告警;
    silence period:在同一个周期,指定的**silence period**时间内,忽略相同的告警消息;
    更多告警规则详情,请参照这个地址:https://github.com/apache/skywalking/blob/master/docs/en/setup/backend/backend-alarm.md

    
    
    修改告警规则
    rules:
        service_test_sal_rule:
            # 指定指标名称
            metrics-name: service_test_sal
            # 小于
            op: "<"
            # 指定阈值
            threshold: 8000
            # 每2分钟检测告警该规则
            period: 2
            # 触发1次规则就告警
            count: 1
            # 设置三分钟内容相同告警,不重复告警
            silence-period: 3
            # 配置告警信息
            message: Successful rate of service {name} is lower than 80% in 2 minutes of last 10 minutes
    概要:服务成功率在过去2分钟内低于80%
    告警API编写
    这个本质还是SkyWalking根据规则进行检查,如果符合规则条件,就通过**WebHook、gRPCHook、WeChat Hook、Dingtalk Hook**等方式进行消息通知;接收到告警数据信息之后,可以自行处理消息。这里为了方便,就采用**WebHook**的方式进行演示,即触发告警条件之后,SkyWalking会调用配置的WebHook 接口,并传递对应的告警信息;
    定义数据模型
    
    public class AlarmMsg
    {
        public int scopeId { get; set; }
        public string? scope { get; set; }
        public string? name { get; set; }
        public string? id0 { get; set; }
        public string? id1 { get; set; }
        public string? ruleName { get; set; }
        public string? alarmMessage { get; set; }
    }
    定义WebHook调用API
    
    /// <summary>
    /// 告警API
    /// </summary>
    /// <param name="msgs"></param>
    /// <returns></returns>
    [HttpPost("AlarmMsg")]
    public void AlarmMsg(List<AlarmMsg> msgs)
    {
        string msg = "触发告警:";
        msg += msgs.FirstOrDefault()?.alarmMessage;
        Console.WriteLine(msg);
        SendMail(msg);
    }
    配置webHook
    
    http://192.168.3.105:7900/api/Skywalking/AlarmMsg
    # Sample alarm rules.
    rules:
      # Rule unique name, must be ended with `_rule`.
      service_resp_time_rule:
        metrics-name: service_resp_time
        op: ">"
        threshold: 1000
        period: 10
        count: 3
        silence-period: 5
        message: Response time of service {name} is more than 1000ms in 3 minutes of last 10 minutes.
      service_sla_rule:
        # Metrics value need to be long, double or int
        metrics-name: service_sla
        op: "<"
        threshold: 8000
        # The length of time to evaluate the metrics
        period: 10
        # How many times after the metrics match the condition, will trigger alarm
        count: 2
        # How many times of checks, the alarm keeps silence after alarm triggered, default as same as period.
        silence-period: 3
        message: Successful rate of service {name} is lower than 80% in 2 minutes of last 10 minutes
      service_resp_time_percentile_rule:
        # Metrics value need to be long, double or int
        metrics-name: service_percentile
        op: ">"
        threshold: 1000,1000,1000,1000,1000
        period: 10
        count: 3
        silence-period: 5
        message: Percentile response time of service {name} alarm in 3 minutes of last 10 minutes, due to more than one condition of p50 > 1000, p75 > 1000, p90 > 1000, p95 > 1000, p99 > 1000
      service_instance_resp_time_rule:
        metrics-name: service_instance_resp_time
        op: ">"
        threshold: 1000
        period: 10
        count: 2
        silence-period: 5
        message: Response time of service instance {name} is more than 1000ms in 2 minutes of last 10 minutes
      database_access_resp_time_rule:
        metrics-name: database_access_resp_time
        threshold: 1000
        op: ">"
        period: 10
        count: 2
        message: Response time of database access {name} is more than 1000ms in 2 minutes of last 10 minutes
      endpoint_relation_resp_time_rule:
        metrics-name: endpoint_relation_resp_time
        threshold: 1000
        op: ">"
        period: 10
        count: 2
        message: Response time of endpoint relation {name} is more than 1000ms in 2 minutes of last 10 minutes
    #  Active endpoint related metrics alarm will cost more memory than service and service instance metrics alarm.
    #  Because the number of endpoint is much more than service and instance.
    #
    #  endpoint_avg_rule:
    #    metrics-name: endpoint_avg
    #    op: ">"
    #    threshold: 1000
    #    period: 10
    #    count: 2
    #    silence-period: 5
    #    message: Response time of endpoint {name} is more than 1000ms in 2 minutes of last 10 minutes
    
    webhooks:
      - http://192.168.3.105:7900/api/Skywalking/AlarmMsg
    #  - http://127.0.0.1/go-wechat/
    rules:
      # 告警规则名称,必须唯一,以_rule结尾
      service_sla_rule:
         # 指定metrics-name
         metrics-name: service_sla
         # 小于
         op: "<" 
         # 指定阈值
         threshold: 8000                                                                           
         # 10分钟检测一次告警规则                                                 
         period: 10                                                                                 
         # 触发2次告警规则就告警                   
         count: 2                                                                                   
         # 设置的3分钟时间段有相同的告警,不重复告警.
         silence-period: 3 
          # 配置告警消息
         message: Successful rate of service {name} is lower than 80% in 2 minutes of last 10 minutes
    webhooks:
    - http://192.168.3.105:7900/api/Skywalking/AlarmMsg

    skywoking 为什么能无侵入,因为在ioc之前他已经注册上了

  • 相关阅读:
    [LeetCode] Rabbits in Forest 森林里的兔子
    [LeetCode] 780. Reaching Points 到达指定点
    [LeetCode] Swim in Rising Water 在上升的水中游泳
    [LeetCode] 777. Swap Adjacent in LR String 交换LR字符串中的相邻项
    [LeetCode] Split BST 分割二叉搜索树
    [LeetCode] Global and Local Inversions 全局与局部的倒置
    [LeetCode] Minimize Max Distance to Gas Station 最小化去加油站的最大距离
    [LeetCode] Sliding Puzzle 滑动拼图
    [LeetCode] Basic Calculator IV 基本计算器之四
    [LeetCode] Jewels and Stones 珠宝和石头
  • 原文地址:https://www.cnblogs.com/jasontarry/p/15891675.html
Copyright © 2020-2023  润新知