We have updated our Terms of Service, Code of Conduct, and Addendum.

Is it possible to POST to the API to trigger a collector?

Options

Is it possible to use the REST API to run a collector job?

Best Answer

  • Brendan Dalpe
    Brendan Dalpe Posts: 201 mod
    Answer ✓
    Options

    It is possible! You can find more details on the Cribl API Documentation, but Ive listed the general steps below.

    You will need to obtain the JSON configuration of the pre-configured collector. NOTE: If the collector doesnt already exist you will need to create it before running these steps!

    GET /api/v1/m/<worker-group-name>/lib/jobs returns the JSON of the collector configurations. Find the one with the corresponding id field.

    {
      "items": [
        {
          "type": "collection",
          "ttl": "4h",
          "removeFields": [],
          "resumeOnBoot": false,
          "schedule": {},
          "collector": {
            "conf": {
              "discovery": {
                "discoverType": "http",
                "discoverMethod": "get",
                "itemList": [],
                "discoverDataField": "entry",
                "discoverUrl": "`https://1.2.3.4:8089/services/search/jobs`",
                "discoverRequestParams": [
                  {
                    "name": "output_mode",
                    "value": "`json`"
                  },
                  {
                    "name": "search",
                    "value": "`\"search index=_internal\"`"
                  }
                ]
              },
              "collectMethod": "get",
              "pagination": {
                "type": "none"
              },
              "authentication": "login",
              "loginUrl": "`https://1.2.3.4:8089/services/auth/login?output_mode=json`",
              "loginBody": "`username=${username}&password=${password}`",
              "tokenRespAttribute": "sessionKey",
              "authHeaderExpr": "`Splunk ${token}`",
              "username": "admin",
              "password": "redacted",
              "collectUrl": "`${id}/results`",
              "collectRequestHeaders": [],
              "collectRequestParams": [
                {
                  "name": "output_mode",
                  "value": "`json`"
                }
              ]
            },
            "destructive": false,
            "type": "rest"
          },
          "input": {
            "type": "collection",
            "staleChannelFlushMs": 10000,
            "sendToRoutes": true,
            "preprocess": {
              "disabled": true
            },
            "throttleRatePerSec": "0",
            "breakerRulesets": [
              "splunk_test"
            ]
          },
          "id": "splunk",
          "history": []
        },
        ...
      ]
    }
    

    Use this data to POST back to /api/v1/m/<worker-group-name>/jobs with an added run field with the configuration.

    For example, I want to run this collector in preview mode:

    {
      "type": "collection",
      "ttl": "4h",
      "removeFields": [],
      "resumeOnBoot": false,
      "schedule": {},
      "collector": {
        "conf": {
          "discovery": {
            "discoverType": "http",
            "discoverMethod": "get",
            "itemList": [],
            "discoverDataField": "entry",
            "discoverUrl": "`https://1.2.3.4:8089/services/search/jobs`",
            "discoverRequestParams": [
              {
                "name": "output_mode",
                "value": "`json`"
              },
              {
                "name": "search",
                "value": "`\"search index=_internal\"`"
              }
            ]
          },
          "collectMethod": "get",
          "pagination": {
            "type": "none"
          },
          "authentication": "login",
          "loginUrl": "`https://1.2.3.4:8089/services/auth/login?output_mode=json`",
          "loginBody": "`username=${username}&password=${password}`",
          "tokenRespAttribute": "sessionKey",
          "authHeaderExpr": "`Splunk ${token}`",
          "username": "admin",
          "password": "redacted",
          "collectUrl": "`${id}/results`",
          "collectRequestHeaders": [],
          "collectRequestParams": [
            {
              "name": "output_mode",
              "value": "`json`"
            }
          ]
        },
        "destructive": false,
        "type": "rest"
      },
      "input": {
        "type": "collection",
        "staleChannelFlushMs": 10000,
        "sendToRoutes": true,
        "preprocess": {
          "disabled": true
        },
        "throttleRatePerSec": "0",
        "breakerRulesets": [
          "splunk_test"
        ]
      },
      "id": "splunk",
      "history": [],
      "run": {
        "rescheduleDroppedTasks": true,
        "maxTaskReschedule": 1,
        "logLevel": "info",
        "jobTimeout": "0",
        "mode": "preview",
        "timeRangeType": "relative",
        "expression": "true",
        "minTaskSize": "1MB",
        "maxTaskSize": "10MB",
        "capture": {
          "duration": 60,
          "maxEvents": 100,
          "level": "0"
        }
      }
    }
    

    This returns a JSON response with a Job id: {"items":["1621367040.54.adhoc.splunk"],"count":1}

    You can then query the jobs endpoint to get the status of the job.

    GET /api/v1/m/<worker-group-name>/jobs/1621367040.54.adhoc.splunk

    Which provides a JSON response (check the status.state field for more information):

    {
      "items": [
        {
          "id": "1621367040.54.adhoc.splunk",
          "args": {
            "type": "collection",
            "ttl": "60s",
            "removeFields": [],
            "resumeOnBoot": false,
            "schedule": {},
            "collector": {
              "conf": {
                "discovery": {
                  "discoverType": "http",
                  "discoverMethod": "get",
                  "itemList": [],
                  "discoverDataField": "entry",
                  "discoverUrl": "`https://1.2.3.4:8089/services/search/jobs`",
                  "discoverRequestParams": [
                    {
                      "name": "output_mode",
                      "value": "`json`"
                    },
                    {
                      "name": "search",
                      "value": "`\"search index=_internal\"`"
                    }
                  ]
                },
                "collectMethod": "get",
                "pagination": {
                  "type": "none"
                },
                "authentication": "login",
                "loginUrl": "`https://1.2.3.4:8089/services/auth/login?output_mode=json`",
                "loginBody": "`username=${username}&password=${password}`",
                "tokenRespAttribute": "sessionKey",
                "authHeaderExpr": "`Splunk ${token}`",
                "username": "admin",
                "password": "redacted",
                "collectUrl": "`${id}/results`",
                "collectRequestHeaders": [],
                "collectRequestParams": [
                  {
                    "name": "output_mode",
                    "value": "`json`"
                  }
                ],
                "filter": "(true)",
                "discoverToRoutes": false,
                "collectorId": "splunk",
                "removeFields": []
              },
              "destructive": false,
              "type": "rest"
            },
            "input": {
              "type": "collection",
              "staleChannelFlushMs": 10000,
              "sendToRoutes": false,
              "preprocess": {
                "disabled": true
              },
              "throttleRatePerSec": "0",
              "breakerRulesets": [
                "splunk_test"
              ],
              "output": "devnull",
              "pipeline": "passthru",
              "filter": "(true)"
            },
            "id": "splunk",
            "history": [],
            "run": {
              "rescheduleDroppedTasks": true,
              "maxTaskReschedule": 1,
              "logLevel": "info",
              "jobTimeout": "0",
              "mode": "preview",
              "timeRangeType": "relative",
              "expression": "true",
              "minTaskSize": "1MB",
              "maxTaskSize": "10MB",
              "capture": {
                "duration": 60,
                "maxEvents": 100,
                "level": "0"
              },
              "type": "adhoc",
              "taskHeartbeatPeriod": 60
            },
            "initialState": 3,
            "groupId": "default"
          },
          "status": {
            "state": "finished"
          },
          "stats": {
            "tasks": {
              "finished": 1,
              "failed": 0,
              "cancelled": 0,
              "orphaned": 0,
              "inFlight": 0,
              "count": 1,
              "totalExecutionTime": 80,
              "minExecutionTime": 80,
              "maxExecutionTime": 80
            },
            "discoveryComplete": 1,
            "state": {
              "initializing": 1621367040902,
              "paused": 1621367040903,
              "pending": 1621367041736,
              "running": 1621367041738,
              "finished": 1621367041852
            }
          },
          "keep": false
        }
      ],
      "count": 1
    }
    

Answers

  • Brendan Dalpe
    Brendan Dalpe Posts: 201 mod
    Answer ✓
    Options

    It is possible! You can find more details on the Cribl API Documentation, but Ive listed the general steps below.

    You will need to obtain the JSON configuration of the pre-configured collector. NOTE: If the collector doesnt already exist you will need to create it before running these steps!

    GET /api/v1/m/<worker-group-name>/lib/jobs returns the JSON of the collector configurations. Find the one with the corresponding id field.

    {
      "items": [
        {
          "type": "collection",
          "ttl": "4h",
          "removeFields": [],
          "resumeOnBoot": false,
          "schedule": {},
          "collector": {
            "conf": {
              "discovery": {
                "discoverType": "http",
                "discoverMethod": "get",
                "itemList": [],
                "discoverDataField": "entry",
                "discoverUrl": "`https://1.2.3.4:8089/services/search/jobs`",
                "discoverRequestParams": [
                  {
                    "name": "output_mode",
                    "value": "`json`"
                  },
                  {
                    "name": "search",
                    "value": "`\"search index=_internal\"`"
                  }
                ]
              },
              "collectMethod": "get",
              "pagination": {
                "type": "none"
              },
              "authentication": "login",
              "loginUrl": "`https://1.2.3.4:8089/services/auth/login?output_mode=json`",
              "loginBody": "`username=${username}&password=${password}`",
              "tokenRespAttribute": "sessionKey",
              "authHeaderExpr": "`Splunk ${token}`",
              "username": "admin",
              "password": "redacted",
              "collectUrl": "`${id}/results`",
              "collectRequestHeaders": [],
              "collectRequestParams": [
                {
                  "name": "output_mode",
                  "value": "`json`"
                }
              ]
            },
            "destructive": false,
            "type": "rest"
          },
          "input": {
            "type": "collection",
            "staleChannelFlushMs": 10000,
            "sendToRoutes": true,
            "preprocess": {
              "disabled": true
            },
            "throttleRatePerSec": "0",
            "breakerRulesets": [
              "splunk_test"
            ]
          },
          "id": "splunk",
          "history": []
        },
        ...
      ]
    }
    

    Use this data to POST back to /api/v1/m/<worker-group-name>/jobs with an added run field with the configuration.

    For example, I want to run this collector in preview mode:

    {
      "type": "collection",
      "ttl": "4h",
      "removeFields": [],
      "resumeOnBoot": false,
      "schedule": {},
      "collector": {
        "conf": {
          "discovery": {
            "discoverType": "http",
            "discoverMethod": "get",
            "itemList": [],
            "discoverDataField": "entry",
            "discoverUrl": "`https://1.2.3.4:8089/services/search/jobs`",
            "discoverRequestParams": [
              {
                "name": "output_mode",
                "value": "`json`"
              },
              {
                "name": "search",
                "value": "`\"search index=_internal\"`"
              }
            ]
          },
          "collectMethod": "get",
          "pagination": {
            "type": "none"
          },
          "authentication": "login",
          "loginUrl": "`https://1.2.3.4:8089/services/auth/login?output_mode=json`",
          "loginBody": "`username=${username}&password=${password}`",
          "tokenRespAttribute": "sessionKey",
          "authHeaderExpr": "`Splunk ${token}`",
          "username": "admin",
          "password": "redacted",
          "collectUrl": "`${id}/results`",
          "collectRequestHeaders": [],
          "collectRequestParams": [
            {
              "name": "output_mode",
              "value": "`json`"
            }
          ]
        },
        "destructive": false,
        "type": "rest"
      },
      "input": {
        "type": "collection",
        "staleChannelFlushMs": 10000,
        "sendToRoutes": true,
        "preprocess": {
          "disabled": true
        },
        "throttleRatePerSec": "0",
        "breakerRulesets": [
          "splunk_test"
        ]
      },
      "id": "splunk",
      "history": [],
      "run": {
        "rescheduleDroppedTasks": true,
        "maxTaskReschedule": 1,
        "logLevel": "info",
        "jobTimeout": "0",
        "mode": "preview",
        "timeRangeType": "relative",
        "expression": "true",
        "minTaskSize": "1MB",
        "maxTaskSize": "10MB",
        "capture": {
          "duration": 60,
          "maxEvents": 100,
          "level": "0"
        }
      }
    }
    

    This returns a JSON response with a Job id: {"items":["1621367040.54.adhoc.splunk"],"count":1}

    You can then query the jobs endpoint to get the status of the job.

    GET /api/v1/m/<worker-group-name>/jobs/1621367040.54.adhoc.splunk

    Which provides a JSON response (check the status.state field for more information):

    {
      "items": [
        {
          "id": "1621367040.54.adhoc.splunk",
          "args": {
            "type": "collection",
            "ttl": "60s",
            "removeFields": [],
            "resumeOnBoot": false,
            "schedule": {},
            "collector": {
              "conf": {
                "discovery": {
                  "discoverType": "http",
                  "discoverMethod": "get",
                  "itemList": [],
                  "discoverDataField": "entry",
                  "discoverUrl": "`https://1.2.3.4:8089/services/search/jobs`",
                  "discoverRequestParams": [
                    {
                      "name": "output_mode",
                      "value": "`json`"
                    },
                    {
                      "name": "search",
                      "value": "`\"search index=_internal\"`"
                    }
                  ]
                },
                "collectMethod": "get",
                "pagination": {
                  "type": "none"
                },
                "authentication": "login",
                "loginUrl": "`https://1.2.3.4:8089/services/auth/login?output_mode=json`",
                "loginBody": "`username=${username}&password=${password}`",
                "tokenRespAttribute": "sessionKey",
                "authHeaderExpr": "`Splunk ${token}`",
                "username": "admin",
                "password": "redacted",
                "collectUrl": "`${id}/results`",
                "collectRequestHeaders": [],
                "collectRequestParams": [
                  {
                    "name": "output_mode",
                    "value": "`json`"
                  }
                ],
                "filter": "(true)",
                "discoverToRoutes": false,
                "collectorId": "splunk",
                "removeFields": []
              },
              "destructive": false,
              "type": "rest"
            },
            "input": {
              "type": "collection",
              "staleChannelFlushMs": 10000,
              "sendToRoutes": false,
              "preprocess": {
                "disabled": true
              },
              "throttleRatePerSec": "0",
              "breakerRulesets": [
                "splunk_test"
              ],
              "output": "devnull",
              "pipeline": "passthru",
              "filter": "(true)"
            },
            "id": "splunk",
            "history": [],
            "run": {
              "rescheduleDroppedTasks": true,
              "maxTaskReschedule": 1,
              "logLevel": "info",
              "jobTimeout": "0",
              "mode": "preview",
              "timeRangeType": "relative",
              "expression": "true",
              "minTaskSize": "1MB",
              "maxTaskSize": "10MB",
              "capture": {
                "duration": 60,
                "maxEvents": 100,
                "level": "0"
              },
              "type": "adhoc",
              "taskHeartbeatPeriod": 60
            },
            "initialState": 3,
            "groupId": "default"
          },
          "status": {
            "state": "finished"
          },
          "stats": {
            "tasks": {
              "finished": 1,
              "failed": 0,
              "cancelled": 0,
              "orphaned": 0,
              "inFlight": 0,
              "count": 1,
              "totalExecutionTime": 80,
              "minExecutionTime": 80,
              "maxExecutionTime": 80
            },
            "discoveryComplete": 1,
            "state": {
              "initializing": 1621367040902,
              "paused": 1621367040903,
              "pending": 1621367041736,
              "running": 1621367041738,
              "finished": 1621367041852
            }
          },
          "keep": false
        }
      ],
      "count": 1
    }
    
  • Andrew Luke
    Options

    Isn't there a better way of doing this? Sending a huge body in an API request is just asking for errors. I've been trying to use this setup to run some script collectors via API and having no luck. I also have around a dozen script collectors I want to kick off programmatically via the API rather than put on a schedule. Sending a huge ungainly API command for each one is a bit untenable. If there isn't a better way of doing this I suggest the addition of a RunJob API command which has a minimal set of inputs, and its generally beter to have those inputs be parameters rather than prone to error JSON bodies.

    Current error being worked, though I might drop this and find a better way:

    {    "status": "error",    "message": "invalid config jobs: [{\"keyword\":\"required\",\"dataPath\":\"/ohkgZv\",\"schemaPath\":\"#/definitions/collection/required\",\"params\":{\"missingProperty\":\"collector\"},\"message\":\"should have required property 'collector'\"},{\"keyword\":\"if\",\"dataPath\":\"/ohkgZv\",\"schemaPath\":\"#/patternProperties/.*/if\",\"params\":{\"failingKeyword\":\"then\"},\"message\":\"should match \\\"then\\\" schema\"}]"}