nixpkgs/ci/github-script/withRateLimit.js
Wolfgang Walther 810b9ba51d
ci/github-script/bot: improve parallelism
We used to employ the worst strategy for parallelism possibly: The rate
limiter capped us at one concurrent request per second, while 100+ items
were handled in parallel. This lead to every item taking the full
duration of the job to proceed, making the data fetched at the beginning
of the job stale at the end. This leads to smaller hiccups when
labeling, or to the merge-bot posting comments after the PR has already
been closed.

GitHub allows 100 concurrent requests, but considers it a best practice
to serialize them. Since serializing all of them causes problems for us,
we should try to go higher.

Since other jobs are running in parallel, we use a conservative value of
20 concurrent requests here. We also introduce the same number of
workers going through the list of items, to make sure that each item is
handled in the shortest time possible from start to finish, before
proceeding to the next. This gives us roughly 2.5 seconds per individual
item - but speeds up the overall execution of the scheduled job to 20-30
seconds from 3-4 minutes before.
2025-11-04 16:13:40 +01:00

64 lines
2.6 KiB
JavaScript

module.exports = async ({ github, core, maxConcurrent = 1 }, callback) => {
const Bottleneck = require('bottleneck')
const stats = {
issues: 0,
prs: 0,
requests: 0,
artifacts: 0,
}
// Rate-Limiting and Throttling, see for details:
// https://github.com/octokit/octokit.js/issues/1069#throttling
// https://docs.github.com/en/rest/using-the-rest-api/best-practices-for-using-the-rest-api
const allLimits = new Bottleneck({
// Avoid concurrent requests
maxConcurrent,
// Will be updated with first `updateReservoir()` call below.
reservoir: 0,
})
// Pause between mutative requests
const writeLimits = new Bottleneck({ minTime: 1000 }).chain(allLimits)
github.hook.wrap('request', async (request, options) => {
// Requests to a different host do not count against the rate limit.
if (options.url.startsWith('https://github.com')) return request(options)
// Requests to the /rate_limit endpoint do not count against the rate limit.
if (options.url === '/rate_limit') return request(options)
// Search requests are in a different resource group, which allows 30 requests / minute.
// We do less than a handful each run, so not implementing throttling for now.
if (options.url.startsWith('/search/')) return request(options)
stats.requests++
if (['POST', 'PUT', 'PATCH', 'DELETE'].includes(options.method))
return writeLimits.schedule(request.bind(null, options))
else return allLimits.schedule(request.bind(null, options))
})
async function updateReservoir() {
let response
try {
response = await github.rest.rateLimit.get()
} catch (err) {
core.error(`Failed updating reservoir:\n${err}`)
// Keep retrying on failed rate limit requests instead of exiting the script early.
return
}
// Always keep 1000 spare requests for other jobs to do their regular duty.
// They normally use below 100, so 1000 is *plenty* of room to work with.
const reservoir = Math.max(0, response.data.resources.core.remaining - 1000)
core.info(`Updating reservoir to: ${reservoir}`)
allLimits.updateSettings({ reservoir })
}
await updateReservoir()
// Update remaining requests every minute to account for other jobs running in parallel.
const reservoirUpdater = setInterval(updateReservoir, 60 * 1000)
try {
await callback(stats)
} finally {
clearInterval(reservoirUpdater)
core.notice(
`Processed ${stats.prs} PRs, ${stats.issues} Issues, made ${stats.requests + stats.artifacts} API requests and downloaded ${stats.artifacts} artifacts.`,
)
}
}