Skip to content

Commit

Permalink
config error handling
Browse files Browse the repository at this point in the history
  • Loading branch information
matjaz99 committed Dec 14, 2022
1 parent f648415 commit 341c0af
Show file tree
Hide file tree
Showing 16 changed files with 138 additions and 69 deletions.
5 changes: 4 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@

* [CHANGE] Wording *psync* is everywhere replaced with *sync*
* [CHANGE] Metrics now contain additional `provider` label
* [FEATURE] Added environment variable `ALERTMONITOR_DATAPROVIDERS_CONFIG_FILE` to set configuration file path.
* [CHANGE] Metric `alertmonitor_webhook_messages_received_total` renamed to `alertmonitor_webhook_requests_received_total`
* [CHANGE] Metric `alertmonitor_journal_messages_total` renamed to `alertmonitor_journal_events_total`
* [FEATURE] Added environment variables `ALERTMONITOR_DATAPROVIDERS_CONFIG_FILE` to set configuration file path
and `ALERTMONITOR_HTTP_CLIENT_CONNECT_TIMEOUT_SEC` to configure connection establishment timeout.

## 2.4.2-SNAPSHOT

Expand Down
29 changes: 15 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -226,20 +226,21 @@ are configured in `providers.yml`.

A list of supported environment variables:

| EnvVar | Description |
|-------------------------------------------|--------------------------------------------------------------------------------------------------------|
| ALERTMONITOR_DATAPROVIDERS_CONFIG_FILE | Filepath to providers configuration file. Default: /opt/providers.yml |
| ALERTMONITOR_DATA_RETENTION_DAYS | History data in days. Default: 7 |
| ALERTMONITOR_PSYNC_INTERVAL_SEC | Periodic synchronisation interval in seconds. Default: 900 |
| ALERTMONITOR_PROMETHEUS_SERVER | The URL of Prometheus server. Default: http://localhost:9090 |
| ALERTMONITOR_PROMETHEUS_CLIENT_POOL_SIZE | Pool size of http clients for communication with Prometheus API. Default: 1 |
| ALERTMONITOR_HTTP_CLIENT_READ_TIMEOUT_SEC | Timeout of http client requests. Default: 120 |
| ALERTMONITOR_DATE_FORMAT | Date format for displaying in GUI. Default: yyyy/MM/dd H:mm:ss |
| ALERTMONITOR_KAFKA_ENABLED | Enable or disable publishing to Kafka. This is experimental feature! Default: false |
| ALERTMONITOR_KAFKA_SERVER | Hostname and port for Kafka. Default: hostname:9092 |
| ALERTMONITOR_KAFKA_TOPIC | Name of topic. Default: alertmonitor_notifications |
| ALERTMONITOR_MONGODB_ENABLED | Enable or disable storing data to MongoDB. If disabled, data is stored in memory only. Default: false |
| ALERTMONITOR_MONGODB_CONNECTION_STRING | The connection string for MongoDB (username, password and host). |
| EnvVar | Description |
|-----------------------------------------------|--------------------------------------------------------------------------------------------------------|
| ALERTMONITOR_DATAPROVIDERS_CONFIG_FILE | File path to providers.yml configuration file. Default: /opt/alertmonitor/providers.yml |
| ALERTMONITOR_DATA_RETENTION_DAYS | History data in days. Default: 7 |
| ALERTMONITOR_PSYNC_INTERVAL_SEC | Periodic synchronisation interval in seconds. Default: 900 |
| ALERTMONITOR_PROMETHEUS_SERVER | The URL of Prometheus server. Default: http://localhost:9090 |
| ALERTMONITOR_PROMETHEUS_CLIENT_POOL_SIZE | Pool size of http clients for communication with Prometheus API. Default: 1 |
| ALERTMONITOR_HTTP_CLIENT_READ_TIMEOUT_SEC | Timeout of http client requests. Default: 120 |
| ALERTMONITOR_HTTP_CLIENT_CONNECT_TIMEOUT_SEC | Connection timeout of http client. Default: 10 |
| ALERTMONITOR_DATE_FORMAT | Date format for displaying in GUI. Default: yyyy/MM/dd H:mm:ss |
| ALERTMONITOR_KAFKA_ENABLED | Enable or disable publishing to Kafka. This is experimental feature! Default: false |
| ALERTMONITOR_KAFKA_SERVER | Hostname and port for Kafka. Default: hostname:9092 |
| ALERTMONITOR_KAFKA_TOPIC | Name of topic. Default: alertmonitor_notifications |
| ALERTMONITOR_MONGODB_ENABLED | Enable or disable storing data to MongoDB. If disabled, data is stored in memory only. Default: false |
| ALERTMONITOR_MONGODB_CONNECTION_STRING | The connection string for MongoDB (username, password and host). |

### Environment variable substitution

Expand Down
1 change: 1 addition & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ services:
- ALERTMONITOR_PSYNC_INTERVAL_SEC=900
- ALERTMONITOR_PROMETHEUS_SERVER=http://localhost:9090
- ALERTMONITOR_PROMETHEUS_CLIENT_POOL_SIZE=1
- ALERTMONITOR_HTTP_CLIENT_CONNECT_TIMEOUT_SEC=10
- ALERTMONITOR_HTTP_CLIENT_READ_TIMEOUT_SEC=120
- ALERTMONITOR_DATE_FORMAT="yyyy/MM/dd H:mm:ss"
- ALERTMONITOR_KAFKA_ENABLED=false
Expand Down
19 changes: 11 additions & 8 deletions providers.yml
Original file line number Diff line number Diff line change
@@ -1,31 +1,34 @@
version: v1

providers:
- name: prom1
- name: promvm
type: prometheus
uri: /alertmonitor/webhook/prom1
uri: /alertmonitor/webhook/promvm
params:
server: http://prom-1:9090
server: https://promvm/prometheus
clientPoolSize: 1
clientConnectTimeout: 10
clientReadTimeout: 120
syncInterval: 120
- name: prom2
- name: swarm1
type: prometheus
uri: /alertmonitor/webhook/prom2
uri: /alertmonitor/webhook/swarm1
params:
server: http://prom-2:9090
server: https://swarm1/prometheus
clientPoolSize: 1
clientConnectTimeout: 10
clientReadTimeout: 120
syncInterval: 300
data:
retention: 7d
sync:
interval: 60s
- name: cluster-1
- name: eventlogger
type: eventlogger
uri: /alertmonitor/webhook/eventlogger
params:
server: http://prom-1:9090
server: http://swarm1:7073/eventlogger
clientPoolSize: 1
clientConnectTimeout: 10
clientReadTimeout: 120
syncInterval: 0
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/
package si.matjazcerkvenik.alertmonitor.data;

import si.matjazcerkvenik.alertmonitor.model.config.ConfigReader;
import si.matjazcerkvenik.alertmonitor.model.config.ProviderConfig;
import si.matjazcerkvenik.alertmonitor.providers.AbstractDataProvider;
import si.matjazcerkvenik.alertmonitor.providers.EventloggerDataProvider;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
/*
Copyright 2021 Matjaž Cerkvenik
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package si.matjazcerkvenik.alertmonitor.model.config;

public class ConfigException extends Exception {

public ConfigException(String message) {
super(message);
}

public ConfigException(String message, Throwable cause) {
super(message, cause);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,15 @@
import org.yaml.snakeyaml.Yaml;
import org.yaml.snakeyaml.constructor.Constructor;
import org.yaml.snakeyaml.representer.Representer;
import si.matjazcerkvenik.alertmonitor.providers.PrometheusDataProvider;
import si.matjazcerkvenik.alertmonitor.util.LogFactory;

import java.io.*;
import java.util.List;

public class ConfigReader {

public static void main(String... args) {
ConfigReader.loadYaml("providers.yml");
}

public static YamlConfig loadYaml(String path) {
public static YamlConfig loadProvidersYaml(String path) {
Representer representer = new Representer();
representer.getPropertyUtils().setSkipMissingProperties(true);
Yaml yaml = new Yaml(new Constructor(YamlConfig.class), representer);
Expand All @@ -37,14 +35,49 @@ public static YamlConfig loadYaml(String path) {
try {
inputStream = new FileInputStream(f);
YamlConfig config = yaml.load(inputStream);
LogFactory.getLogger().info("providers config loaded: " + path);
LogFactory.getLogger().info("providers config loaded: " + f.getAbsolutePath());
verifyConfigAndSetDefaults(config.getProviders());
return config;
} catch (FileNotFoundException e) {
LogFactory.getLogger().warn("no providers.yml found at " + path);
LogFactory.getLogger().warn("ConfigReader: no file providers.yml found at " + path);
} catch (ConfigException e) {
LogFactory.getLogger().error("ConfigReader: Exception loading providers.yml: " + e.getMessage());
} catch (Exception e) {
LogFactory.getLogger().error("Exception reading providers.yml", e);
LogFactory.getLogger().error("ConfigReader:", e);
}
return null;
}

/**
* Check all parameters if they suit the selected provider type and set default values where needed.
* @return true if config is valid
*/
public static List<ProviderConfig> verifyConfigAndSetDefaults(List<ProviderConfig> configs) throws ConfigException {
for (ProviderConfig pc : configs) {

if (pc.getName() == null) pc.setName("Provider_" + pc.hashCode());
if (pc.getUri() == null) throw new ConfigException("missing uri parameter");
if (pc.getType().equalsIgnoreCase("prometheus")) {
// TODO check params
Object connTim = pc.getParams().get(PrometheusDataProvider.DP_PARAM_KEY_CLIENT_CONNECT_TIMEOUT_SEC);
LogFactory.getLogger().info("connT: " + connTim.toString());
if (connTim == null || connTim.toString().length() == 0) {
connTim = "10";
pc.setParam(PrometheusDataProvider.DP_PARAM_KEY_CLIENT_CONNECT_TIMEOUT_SEC, String.valueOf(connTim));
LogFactory.getLogger().info("default 10 set");
} else {
// check if number
}
} else if (pc.getType().equalsIgnoreCase("eventlogger")) {

} else {

}

}


return configs;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ public class PrometheusApiClient {
private static long requestCount;

private String server;

private boolean secureClient = false;
private int connectTimeout = 10;
private int readTimeout = 120;
Expand Down Expand Up @@ -77,7 +76,7 @@ public void setName(String name) {
*/
public PQueryMessage query(String query) throws PrometheusApiException {

logger.info("PrometheusApi: query: " + query);
logger.info("PrometheusApi[" + name + "]: query: " + query);

RequestBody formBody = new FormBody.Builder()
// add url encoded parameters
Expand Down Expand Up @@ -106,7 +105,7 @@ public PQueryMessage query(String query) throws PrometheusApiException {
*/
public PQueryMessage queryRange(String query, long start, long end, String step) throws PrometheusApiException {

logger.info("PrometheusApi: queryRange: " + query);
logger.info("PrometheusApi[" + name + "]: queryRange: " + query);

RequestBody formBody = new FormBody.Builder()
// add url encoded parameters
Expand Down Expand Up @@ -259,43 +258,43 @@ private String execute(Request request) throws PrometheusApiException {
// TODO new env var connect timeout
OkHttpClient httpClient = HttpClientFactory.instantiateHttpClient(secureClient, connectTimeout, readTimeout);

logger.info("PrometheusApi: request[" + requestCount + "] " + request.method().toUpperCase() + " " + request.url().toString());
logger.info("PrometheusApi[" + name + "]: request[" + requestCount + "] " + request.method().toUpperCase() + " " + request.url().toString());
Response response = httpClient.newCall(request).execute();
logger.info("PrometheusApi: request[" + requestCount + "] code=" + response.code() + ", success=" + response.isSuccessful());
logger.info("PrometheusApi[" + name + "]: request[" + requestCount + "] code=" + response.code() + ", success=" + response.isSuccessful());

code = Integer.toString(response.code());

if (response.body() != null) {
responseBody = response.body().string();
logger.debug("PrometheusApi: request[" + requestCount + "] body: " + responseBody);
logger.debug("PrometheusApi[" + name + "]: request[" + requestCount + "] body: " + responseBody);
}

response.close();

DAO.getInstance().removeWarning("prom_api");

} catch (UnknownHostException e) {
logger.error("PrometheusApi: request[" + requestCount + "] failed: UnknownHostException: " + e.getMessage());
logger.error("PrometheusApi[" + name + "]: request[" + requestCount + "] failed: UnknownHostException: " + e.getMessage());
code = "0";
DAO.getInstance().addWarning("prom_api", "Prometheus API not reachable");
throw new PrometheusApiException("Unknown Host");
} catch (SocketTimeoutException e) {
logger.error("PrometheusApi: request[" + requestCount + "] failed: SocketTimeoutException: " + e.getMessage());
logger.error("PrometheusApi[" + name + "]: request[" + requestCount + "] failed: SocketTimeoutException: " + e.getMessage());
code = "0";
DAO.getInstance().addWarning("prom_api", "Prometheus API not reachable");
throw new PrometheusApiException("Timeout");
} catch (SocketException e) {
logger.error("PrometheusApi: request[" + requestCount + "] failed: SocketException: " + e.getMessage());
logger.error("PrometheusApi[" + name + "]: request[" + requestCount + "] failed: SocketException: " + e.getMessage());
code = "0";
DAO.getInstance().addWarning("prom_api", "Prometheus API not reachable");
throw new PrometheusApiException("Socket Error");
} catch (SSLException e) {
logger.error("PrometheusApi: request[" + requestCount + "] failed: SSLException: " + e.getMessage());
logger.error("PrometheusApi[" + name + "]: request[" + requestCount + "] failed: SSLException: " + e.getMessage());
code = "0";
DAO.getInstance().addWarning("prom_api", "Prometheus API not reachable");
throw new PrometheusApiException("SSL Exception");
} catch (Exception e) {
logger.error("PrometheusApi: request[" + requestCount + "] failed: Exception: ", e);
logger.error("PrometheusApi[" + name + "]: request[" + requestCount + "] failed: Exception: ", e);
code = "0";
DAO.getInstance().addWarning("prom_api", "Prometheus API not reachable");
throw new PrometheusApiException("Unknown Exception");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
import si.matjazcerkvenik.alertmonitor.data.DAO;
import si.matjazcerkvenik.alertmonitor.model.*;
import si.matjazcerkvenik.alertmonitor.model.config.ProviderConfig;
import si.matjazcerkvenik.alertmonitor.model.prometheus.PrometheusApiClient;
import si.matjazcerkvenik.alertmonitor.model.prometheus.PrometheusApiClientPool;
import si.matjazcerkvenik.alertmonitor.util.*;
import si.matjazcerkvenik.alertmonitor.util.Formatter;
Expand All @@ -45,15 +44,11 @@ public abstract class AbstractDataProvider {
protected Timer syncTimer = null;

protected long webhookMessagesReceivedCount = 0;

protected long journalReceivedCount = 0;

protected long lastEventTimestamp = 0;

protected long lastSyncTimestamp = 0;
protected int syncSuccessCount = 0;
protected int syncFailedCount = 0;

protected long raisingEventCount = 0;
protected long clearingEventCount = 0;

Expand All @@ -70,10 +65,11 @@ public void setProviderConfig(ProviderConfig providerConfig) {
*/
public void init() {
// TODO error handling!
LogFactory.getLogger().info(providerConfig.toString());
String server = providerConfig.getParam(PrometheusDataProvider.DP_PARAM_KEY_SERVER);
Boolean secure = server.startsWith("https");
Integer poolSize = Integer.parseInt(providerConfig.getParam(PrometheusDataProvider.DP_PARAM_KEY_CLIENT_POOL_SIZE));
Integer connTimeout = 10; // TODO param, env var
Integer connTimeout = Integer.parseInt(providerConfig.getParam(PrometheusDataProvider.DP_PARAM_KEY_CLIENT_CONNECT_TIMEOUT_SEC));
Integer readTimeout = Integer.parseInt(providerConfig.getParam(PrometheusDataProvider.DP_PARAM_KEY_CLIENT_READ_TIMEOUT_SEC));

prometheusApiClientPool = new PrometheusApiClientPool(providerConfig.getName(), poolSize, secure, connTimeout, readTimeout, server);
Expand All @@ -87,14 +83,14 @@ public List<WebhookMessage> getWebhookMessages() {
}

/**
* Add new notification to journal. Also delete oldest notifications.
* Add new notification to journal. Also delete the oldest notifications.
* @param events notifications
*/
public void addToJournal(List<DEvent> events) {
DAO.getInstance().getDataManager().addToJournal(events);
journalReceivedCount++;
for (DEvent e : events) {
AmMetrics.alertmonitor_journal_messages_total.labels(providerConfig.getName(), e.getSeverity()).inc();
AmMetrics.alertmonitor_journal_events_total.labels(providerConfig.getName(), e.getSeverity()).inc();
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ public class PrometheusDataProvider extends AbstractDataProvider {
public static final String DP_PARAM_KEY_SERVER = "server";
public static final String DP_PARAM_KEY_SYNC_INTERVAL_SEC = "syncInterval";
public static final String DP_PARAM_KEY_CLIENT_POOL_SIZE = "clientPoolSize";
public static final String DP_PARAM_KEY_CLIENT_CONNECT_TIMEOUT_SEC = "clientConnectTimeout";
public static final String DP_PARAM_KEY_CLIENT_READ_TIMEOUT_SEC = "clientReadTimeout";

public PrometheusDataProvider() {
Expand All @@ -44,7 +45,7 @@ public void processIncomingEvent(WebhookMessage m) {

DAO.getInstance().getDataManager().addWebhookMessage(m);
webhookMessagesReceivedCount++;
AmMetrics.alertmonitor_webhook_messages_received_total.labels(providerConfig.getName(), m.getRemoteHost(), m.getMethod().toUpperCase()).inc();
AmMetrics.alertmonitor_webhook_requests_received_total.labels(providerConfig.getName(), m.getRemoteHost(), m.getMethod().toUpperCase()).inc();

try {
AmAlertMessage amAlertMessage = AlertmanagerProcessor.processWebhookMessage(m);
Expand Down
Loading

0 comments on commit 341c0af

Please sign in to comment.