Is it possible to use the REST API to run a collector job?
It is possible! You can find more details on the Cribl API Documentation, but I’ve listed the general steps below.
You will need to obtain the JSON configuration of the pre-configured collector. NOTE: If the collector doesn’t already exist you will need to create it before running these steps!
GET /api/v1/m/<worker-group-name>/lib/jobs
returns the JSON of the collector configurations. Find the one with the corresponding id
field.
{
"items": [
{
"type": "collection",
"ttl": "4h",
"removeFields": [],
"resumeOnBoot": false,
"schedule": {},
"collector": {
"conf": {
"discovery": {
"discoverType": "http",
"discoverMethod": "get",
"itemList": [],
"discoverDataField": "entry",
"discoverUrl": "`https://1.2.3.4:8089/services/search/jobs`",
"discoverRequestParams": [
{
"name": "output_mode",
"value": "`json`"
},
{
"name": "search",
"value": "`\"search index=_internal\"`"
}
]
},
"collectMethod": "get",
"pagination": {
"type": "none"
},
"authentication": "login",
"loginUrl": "`https://1.2.3.4:8089/services/auth/login?output_mode=json`",
"loginBody": "`username=${username}&password=${password}`",
"tokenRespAttribute": "sessionKey",
"authHeaderExpr": "`Splunk ${token}`",
"username": "admin",
"password": "redacted",
"collectUrl": "`${id}/results`",
"collectRequestHeaders": [],
"collectRequestParams": [
{
"name": "output_mode",
"value": "`json`"
}
]
},
"destructive": false,
"type": "rest"
},
"input": {
"type": "collection",
"staleChannelFlushMs": 10000,
"sendToRoutes": true,
"preprocess": {
"disabled": true
},
"throttleRatePerSec": "0",
"breakerRulesets": [
"splunk_test"
]
},
"id": "splunk",
"history": []
},
...
]
}
Use this data to POST
back to /api/v1/m/<worker-group-name>/jobs
with an added run
field with the configuration.
For example, I want to run this collector in preview mode:
{
"type": "collection",
"ttl": "4h",
"removeFields": [],
"resumeOnBoot": false,
"schedule": {},
"collector": {
"conf": {
"discovery": {
"discoverType": "http",
"discoverMethod": "get",
"itemList": [],
"discoverDataField": "entry",
"discoverUrl": "`https://1.2.3.4:8089/services/search/jobs`",
"discoverRequestParams": [
{
"name": "output_mode",
"value": "`json`"
},
{
"name": "search",
"value": "`\"search index=_internal\"`"
}
]
},
"collectMethod": "get",
"pagination": {
"type": "none"
},
"authentication": "login",
"loginUrl": "`https://1.2.3.4:8089/services/auth/login?output_mode=json`",
"loginBody": "`username=${username}&password=${password}`",
"tokenRespAttribute": "sessionKey",
"authHeaderExpr": "`Splunk ${token}`",
"username": "admin",
"password": "redacted",
"collectUrl": "`${id}/results`",
"collectRequestHeaders": [],
"collectRequestParams": [
{
"name": "output_mode",
"value": "`json`"
}
]
},
"destructive": false,
"type": "rest"
},
"input": {
"type": "collection",
"staleChannelFlushMs": 10000,
"sendToRoutes": true,
"preprocess": {
"disabled": true
},
"throttleRatePerSec": "0",
"breakerRulesets": [
"splunk_test"
]
},
"id": "splunk",
"history": [],
"run": {
"rescheduleDroppedTasks": true,
"maxTaskReschedule": 1,
"logLevel": "info",
"jobTimeout": "0",
"mode": "preview",
"timeRangeType": "relative",
"expression": "true",
"minTaskSize": "1MB",
"maxTaskSize": "10MB",
"capture": {
"duration": 60,
"maxEvents": 100,
"level": "0"
}
}
}
This returns a JSON response with a Job id: {"items":["1621367040.54.adhoc.splunk"],"count":1}
You can then query the jobs endpoint to get the status of the job.
GET /api/v1/m/<worker-group-name>/jobs/1621367040.54.adhoc.splunk
Which provides a JSON response (check the status.state
field for more information):
{
"items": [
{
"id": "1621367040.54.adhoc.splunk",
"args": {
"type": "collection",
"ttl": "60s",
"removeFields": [],
"resumeOnBoot": false,
"schedule": {},
"collector": {
"conf": {
"discovery": {
"discoverType": "http",
"discoverMethod": "get",
"itemList": [],
"discoverDataField": "entry",
"discoverUrl": "`https://1.2.3.4:8089/services/search/jobs`",
"discoverRequestParams": [
{
"name": "output_mode",
"value": "`json`"
},
{
"name": "search",
"value": "`\"search index=_internal\"`"
}
]
},
"collectMethod": "get",
"pagination": {
"type": "none"
},
"authentication": "login",
"loginUrl": "`https://1.2.3.4:8089/services/auth/login?output_mode=json`",
"loginBody": "`username=${username}&password=${password}`",
"tokenRespAttribute": "sessionKey",
"authHeaderExpr": "`Splunk ${token}`",
"username": "admin",
"password": "redacted",
"collectUrl": "`${id}/results`",
"collectRequestHeaders": [],
"collectRequestParams": [
{
"name": "output_mode",
"value": "`json`"
}
],
"filter": "(true)",
"discoverToRoutes": false,
"collectorId": "splunk",
"removeFields": []
},
"destructive": false,
"type": "rest"
},
"input": {
"type": "collection",
"staleChannelFlushMs": 10000,
"sendToRoutes": false,
"preprocess": {
"disabled": true
},
"throttleRatePerSec": "0",
"breakerRulesets": [
"splunk_test"
],
"output": "devnull",
"pipeline": "passthru",
"filter": "(true)"
},
"id": "splunk",
"history": [],
"run": {
"rescheduleDroppedTasks": true,
"maxTaskReschedule": 1,
"logLevel": "info",
"jobTimeout": "0",
"mode": "preview",
"timeRangeType": "relative",
"expression": "true",
"minTaskSize": "1MB",
"maxTaskSize": "10MB",
"capture": {
"duration": 60,
"maxEvents": 100,
"level": "0"
},
"type": "adhoc",
"taskHeartbeatPeriod": 60
},
"initialState": 3,
"groupId": "default"
},
"status": {
"state": "finished"
},
"stats": {
"tasks": {
"finished": 1,
"failed": 0,
"cancelled": 0,
"orphaned": 0,
"inFlight": 0,
"count": 1,
"totalExecutionTime": 80,
"minExecutionTime": 80,
"maxExecutionTime": 80
},
"discoveryComplete": 1,
"state": {
"initializing": 1621367040902,
"paused": 1621367040903,
"pending": 1621367041736,
"running": 1621367041738,
"finished": 1621367041852
}
},
"keep": false
}
],
"count": 1
}