In this post we are learning how to monitor your AVA node using prometheus

AVA binary exposes its metrics at  http://localhost:9650/ext/metrics

You could have an idea about how they look like with this command executed in your node:

curl -X POST localhost:9650/ext/metrics


The list of metrics is really big and we are still trying to figure out what of them are usefull for sysadmins. One that never fails for a blockchain node operator is "number of peers". That is what you can see in this post header.

Warnings:
You should never expose publicly your node ports. If you do that you leave open a great surface for attacks. My advice: You better use a firewall to allow your monitoring server and just your monitoring server to query your node metrics.

Adding prometheus job

As we commented on previous posts each node that you are interested on monitoring needs an unique job block of config. For this purpose, edit the prometheus main config file. Usually it lives at /etc/prometheus/prometheus.yml  This is how mine looks like:

  - job_name: 'ava-test-node'
    metrics_path: '/ext/metrics'
    static_configs:
    - targets: ['123.123.123.123:9650']
      labels:
        network: 'ava'
        group: 'ava'

Change the IP for your node IP and name the job as you want. I find useful to use labels. We will use them later in our Grafana panels.

Once this is done just restart your prometheus service and it should start collecting your metrics.

To check it go to your prometheus url and search for your new metrics with this query:


As you can see there are 1342 different metrics that match this simple query. To plot the number of peers as in this post header you could use the following query:

gecko_peers{job="YOUR-JOB-NAME"}

Execute the query and go to the Graph label to see the result. Once you succeed in this you could start creating your first Grafana dashboard.

This is the beginning of my Dashboard. And you can find the json code below. With that json you could import directly the Dashboard into your Grafa and leverage it.

Simple AVA Dashboard (json)

If you used the job label we mentioned before you could just import this json into your Grafana and start tuning it.

Download the latest version:

https://raw.githubusercontent.com/Colm3na/avalanche/master/monitoring/grafana/ava-dashboard.json
{
  "annotations": {
    "list": [
      {
        "builtIn": 1,
        "datasource": "-- Grafana --",
        "enable": true,
        "hide": true,
        "iconColor": "rgba(0, 211, 255, 1)",
        "name": "Annotations & Alerts",
        "type": "dashboard"
      }
    ]
  },
  "editable": true,
  "gnetId": null,
  "graphTooltip": 0,
  "id": 35,
  "iteration": 1591874928400,
  "links": [
    {
      "icon": "external link",
      "tags": [],
      "title": "Blog de ColemenaLabs_SVQ",
      "type": "link",
      "url": "https://blog.colmenalabs.org"
    }
  ],
  "panels": [
    {
      "datasource": null,
      "fieldConfig": {
        "defaults": {
          "custom": {
            "align": null
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          }
        },
        "overrides": []
      },
      "gridPos": {
        "h": 3,
        "w": 23,
        "x": 0,
        "y": 0
      },
      "id": 3,
      "options": {
        "displayMode": "lcd",
        "orientation": "horizontal",
        "reduceOptions": {
          "calcs": [
            "last"
          ],
          "values": false
        },
        "showUnfilled": true
      },
      "pluginVersion": "7.0.0",
      "targets": [
        {
          "expr": "up{group=\"[[group]]\",job=\"[[job]]\"}",
          "format": "table",
          "instant": false,
          "interval": "",
          "legendFormat": "{{job}}",
          "refId": "A"
        }
      ],
      "timeFrom": null,
      "timeShift": null,
      "title": "UP",
      "type": "bargauge"
    },
    {
      "aliasColors": {},
      "bars": false,
      "dashLength": 10,
      "dashes": false,
      "datasource": null,
      "fieldConfig": {
        "defaults": {
          "custom": {}
        },
        "overrides": []
      },
      "fill": 1,
      "fillGradient": 0,
      "gridPos": {
        "h": 12,
        "w": 23,
        "x": 0,
        "y": 3
      },
      "hiddenSeries": false,
      "id": 2,
      "legend": {
        "avg": false,
        "current": false,
        "max": false,
        "min": false,
        "show": true,
        "total": false,
        "values": false
      },
      "lines": true,
      "linewidth": 1,
      "nullPointMode": "null",
      "options": {
        "dataLinks": []
      },
      "percentage": false,
      "pointradius": 2,
      "points": false,
      "renderer": "flot",
      "seriesOverrides": [],
      "spaceLength": 10,
      "stack": false,
      "steppedLine": false,
      "targets": [
        {
          "expr": "gecko_peers{group=\"[[group]]\",job=\"[[job]]\"}",
          "interval": "",
          "legendFormat": "{{job}}",
          "refId": "A"
        }
      ],
      "thresholds": [],
      "timeFrom": null,
      "timeRegions": [],
      "timeShift": null,
      "title": "Peers",
      "tooltip": {
        "shared": true,
        "sort": 0,
        "value_type": "individual"
      },
      "type": "graph",
      "xaxis": {
        "buckets": null,
        "mode": "time",
        "name": null,
        "show": true,
        "values": []
      },
      "yaxes": [
        {
          "format": "short",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": null,
          "show": true
        },
        {
          "format": "short",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": null,
          "show": true
        }
      ],
      "yaxis": {
        "align": false,
        "alignLevel": null
      }
    }
  ],
  "schemaVersion": 25,
  "style": "dark",
  "tags": [],
  "templating": {
    "list": [
      {
        "allValue": null,
        "current": {
          "selected": false,
          "text": "ava",
          "value": "ava"
        },
        "datasource": "DS_PROMETHEUS",
        "definition": "label_values(group)",
        "hide": 0,
        "includeAll": false,
        "label": null,
        "multi": false,
        "name": "group",
        "options": [
          {
            "selected": true,
            "text": "ava",
            "value": "ava"
          },
          {
            "selected": false,
            "text": "kusama",
            "value": "kusama"
          },
          {
            "selected": false,
            "text": "polkadot",
            "value": "polkadot"
          },
          {
            "selected": false,
            "text": "prysm",
            "value": "prysm"
          }
        ],
        "query": "label_values(group)",
        "refresh": 0,
        "regex": "",
        "skipUrlSync": false,
        "sort": 0,
        "tagValuesQuery": "",
        "tags": [],
        "tagsQuery": "",
        "type": "query",
        "useTags": false
      },
      {
        "allValue": null,
        "current": {
          "selected": true,
          "text": "ava-test-node",
          "value": "ava-test-node"
        },
        "datasource": "DS_PROMETHEUS",
        "definition": "label_values(job)",
        "hide": 0,
        "includeAll": false,
        "label": null,
        "multi": false,
        "name": "job",
        "options": [
          {
            "selected": true,
            "text": "ava-test-node",
            "value": "ava-test-node"
          },
          {
            "selected": false,
            "text": "ava_avavirt",
            "value": "ava_avavirt"
          }
        ],
        "query": "label_values(job)",
        "refresh": 0,
        "regex": "/^ava*/",
        "skipUrlSync": false,
        "sort": 0,
        "tagValuesQuery": "",
        "tags": [],
        "tagsQuery": "",
        "type": "query",
        "useTags": false
      }
    ]
  },
  "time": {
    "from": "now-1h",
    "to": "now"
  },
  "timepicker": {
    "refresh_intervals": [
      "10s",
      "30s",
      "1m",
      "5m",
      "15m",
      "30m",
      "1h",
      "2h",
      "1d"
    ]
  },
  "timezone": "",
  "title": "AVA Dashboard",
  "uid": "ColmenaLabs_SVQ",
  "version": 10
}