shad-go/distbuild/pkg/scheduler/scheduler.go

289 lines
5.7 KiB
Go
Raw Normal View History

2020-03-28 21:34:09 +00:00
package scheduler
import (
2020-04-05 11:29:46 +00:00
"context"
2020-03-28 21:34:09 +00:00
"sync"
"time"
"go.uber.org/zap"
2020-03-29 16:03:07 +00:00
"gitlab.com/slon/shad-go/distbuild/pkg/api"
2020-03-28 21:34:09 +00:00
"gitlab.com/slon/shad-go/distbuild/pkg/build"
)
type PendingJob struct {
2020-04-04 21:49:25 +00:00
Job *api.JobSpec
2020-03-28 21:34:09 +00:00
Finished chan struct{}
2020-04-05 11:29:46 +00:00
Result *api.JobResult
2020-03-28 21:34:09 +00:00
mu sync.Mutex
pickedUp chan struct{}
}
2020-03-29 16:03:07 +00:00
func (p *PendingJob) finish(res *api.JobResult) {
2020-03-28 21:34:09 +00:00
p.Result = res
close(p.Finished)
}
func (p *PendingJob) pickUp() bool {
p.mu.Lock()
defer p.mu.Unlock()
select {
case <-p.pickedUp:
return false
default:
close(p.pickedUp)
return true
}
}
2020-04-05 11:29:46 +00:00
func (p *PendingJob) enqueue(q chan *PendingJob) {
select {
case q <- p:
case <-p.pickedUp:
2020-03-28 21:34:09 +00:00
}
2020-04-05 11:29:46 +00:00
}
2020-03-28 21:34:09 +00:00
2020-04-05 11:29:46 +00:00
type workerQueue struct {
cacheQueue chan *PendingJob
depQueue chan *PendingJob
2020-03-28 21:34:09 +00:00
}
type Config struct {
CacheTimeout time.Duration
DepsTimeout time.Duration
}
type Scheduler struct {
l *zap.Logger
config Config
mu sync.Mutex
2020-04-05 11:29:46 +00:00
cachedJobs map[build.ID]map[api.WorkerID]struct{}
pendingJobs map[build.ID]*PendingJob
pendingJobDeps map[build.ID]map[*PendingJob]struct{}
2020-03-28 21:34:09 +00:00
2020-04-05 11:29:46 +00:00
workerQueue map[api.WorkerID]*workerQueue
globalQueue chan *PendingJob
2020-03-28 21:34:09 +00:00
}
func NewScheduler(l *zap.Logger, config Config) *Scheduler {
return &Scheduler{
l: l,
config: config,
2020-04-05 11:29:46 +00:00
cachedJobs: make(map[build.ID]map[api.WorkerID]struct{}),
pendingJobs: make(map[build.ID]*PendingJob),
pendingJobDeps: make(map[build.ID]map[*PendingJob]struct{}),
2020-03-28 21:34:09 +00:00
2020-04-05 11:29:46 +00:00
workerQueue: make(map[api.WorkerID]*workerQueue),
globalQueue: make(chan *PendingJob),
2020-03-28 21:34:09 +00:00
}
}
2020-04-05 12:00:33 +00:00
func (c *Scheduler) LocateArtifact(id build.ID) (api.WorkerID, bool) {
c.mu.Lock()
defer c.mu.Unlock()
for id := range c.cachedJobs[id] {
return id, true
}
return "", false
}
2020-03-29 16:03:07 +00:00
func (c *Scheduler) RegisterWorker(workerID api.WorkerID) {
2020-03-28 21:34:09 +00:00
c.mu.Lock()
defer c.mu.Unlock()
2020-04-05 11:29:46 +00:00
_, ok := c.workerQueue[workerID]
2020-03-28 21:34:09 +00:00
if ok {
return
}
2020-04-05 11:29:46 +00:00
c.workerQueue[workerID] = &workerQueue{
cacheQueue: make(chan *PendingJob),
depQueue: make(chan *PendingJob),
}
2020-03-28 21:34:09 +00:00
}
2020-03-29 16:03:07 +00:00
func (c *Scheduler) OnJobComplete(workerID api.WorkerID, jobID build.ID, res *api.JobResult) bool {
2020-03-28 21:34:09 +00:00
c.l.Debug("job completed", zap.String("worker_id", workerID.String()), zap.String("job_id", jobID.String()))
c.mu.Lock()
pendingJob, pendingFound := c.pendingJobs[jobID]
if pendingFound {
delete(c.pendingJobs, jobID)
}
job, ok := c.cachedJobs[jobID]
if !ok {
2020-03-29 16:03:07 +00:00
job = make(map[api.WorkerID]struct{})
2020-03-28 21:34:09 +00:00
c.cachedJobs[jobID] = job
}
job[workerID] = struct{}{}
2020-04-05 11:29:46 +00:00
workerQueue := c.workerQueue[workerID]
for waiter := range c.pendingJobDeps[jobID] {
go waiter.enqueue(workerQueue.depQueue)
}
2020-03-28 21:34:09 +00:00
c.mu.Unlock()
if !pendingFound {
return false
}
c.l.Debug("finishing pending job", zap.String("job_id", jobID.String()))
pendingJob.finish(res)
return true
}
2020-04-05 11:29:46 +00:00
func (c *Scheduler) enqueueCacheLocal(job *PendingJob) bool {
cached := false
2020-03-28 21:34:09 +00:00
2020-04-05 11:29:46 +00:00
for workerID := range c.cachedJobs[job.Job.ID] {
cached = true
go job.enqueue(c.workerQueue[workerID].cacheQueue)
2020-03-28 21:34:09 +00:00
}
2020-04-05 11:29:46 +00:00
return cached
2020-03-28 21:34:09 +00:00
}
var timeAfter = time.After
2020-04-05 11:29:46 +00:00
func (c *Scheduler) putDepQueue(job *PendingJob, dep build.ID) {
depJobs, ok := c.pendingJobDeps[dep]
if !ok {
depJobs = make(map[*PendingJob]struct{})
c.pendingJobDeps[dep] = depJobs
}
depJobs[job] = struct{}{}
}
2020-03-28 21:34:09 +00:00
2020-04-05 11:29:46 +00:00
func (c *Scheduler) deleteDepQueue(job *PendingJob, dep build.ID) {
depJobs := c.pendingJobDeps[dep]
delete(depJobs, job)
if len(depJobs) == 0 {
delete(c.pendingJobDeps, dep)
}
}
2020-03-28 21:34:09 +00:00
2020-04-05 11:29:46 +00:00
func (c *Scheduler) doScheduleJob(job *PendingJob, cached bool) {
if cached {
2020-03-28 21:34:09 +00:00
select {
case <-job.pickedUp:
c.l.Debug("job picked", zap.String("job_id", job.Job.ID.String()))
return
case <-timeAfter(c.config.CacheTimeout):
}
}
2020-04-05 11:29:46 +00:00
c.mu.Lock()
workers := make(map[api.WorkerID]struct{})
2020-03-28 21:34:09 +00:00
2020-04-05 11:29:46 +00:00
for _, dep := range job.Job.Deps {
c.putDepQueue(job, dep)
for workerID := range c.cachedJobs[dep] {
if _, ok := workers[workerID]; ok {
return
}
go job.enqueue(c.workerQueue[workerID].depQueue)
workers[workerID] = struct{}{}
2020-03-28 21:34:09 +00:00
}
}
2020-04-05 11:29:46 +00:00
c.mu.Unlock()
defer func() {
c.mu.Lock()
defer c.mu.Unlock()
for _, dep := range job.Job.Deps {
c.deleteDepQueue(job, dep)
}
}()
c.l.Debug("job is put into dep-local queues", zap.String("job_id", job.Job.ID.String()))
2020-03-28 21:34:09 +00:00
select {
case <-job.pickedUp:
2020-04-05 11:29:46 +00:00
c.l.Debug("job picked", zap.String("job_id", job.Job.ID.String()))
return
case <-timeAfter(c.config.DepsTimeout):
2020-03-28 21:34:09 +00:00
}
2020-04-05 11:29:46 +00:00
go job.enqueue(c.globalQueue)
c.l.Debug("job is put into global queue", zap.String("job_id", job.Job.ID.String()))
<-job.pickedUp
2020-03-28 21:34:09 +00:00
c.l.Debug("job picked", zap.String("job_id", job.Job.ID.String()))
}
2020-04-04 21:49:25 +00:00
func (c *Scheduler) ScheduleJob(job *api.JobSpec) *PendingJob {
2020-04-05 11:29:46 +00:00
var cached bool
2020-03-28 21:34:09 +00:00
c.mu.Lock()
pendingJob, running := c.pendingJobs[job.ID]
if !running {
pendingJob = &PendingJob{
Job: job,
Finished: make(chan struct{}),
pickedUp: make(chan struct{}),
}
c.pendingJobs[job.ID] = pendingJob
2020-04-05 11:29:46 +00:00
cached = c.enqueueCacheLocal(pendingJob)
2020-03-28 21:34:09 +00:00
}
c.mu.Unlock()
if !running {
c.l.Debug("job is scheduled", zap.String("job_id", job.ID.String()))
2020-04-05 11:29:46 +00:00
go c.doScheduleJob(pendingJob, cached)
2020-03-28 21:34:09 +00:00
} else {
c.l.Debug("job is pending", zap.String("job_id", job.ID.String()))
}
return pendingJob
}
2020-04-05 11:29:46 +00:00
func (c *Scheduler) PickJob(ctx context.Context, workerID api.WorkerID) *PendingJob {
2020-03-28 21:34:09 +00:00
c.l.Debug("picking next job", zap.String("worker_id", workerID.String()))
c.mu.Lock()
2020-04-05 11:29:46 +00:00
local := c.workerQueue[workerID]
2020-03-28 21:34:09 +00:00
c.mu.Unlock()
2020-04-05 11:29:46 +00:00
var pg *PendingJob
var queue string
2020-03-28 21:34:09 +00:00
for {
2020-04-05 11:29:46 +00:00
select {
case pg = <-c.globalQueue:
queue = "global"
case pg = <-local.depQueue:
queue = "dep"
case pg = <-local.cacheQueue:
queue = "cache"
case <-ctx.Done():
return nil
2020-03-28 21:34:09 +00:00
}
2020-04-05 11:29:46 +00:00
if pg.pickUp() {
break
2020-03-28 21:34:09 +00:00
}
}
2020-04-05 11:29:46 +00:00
c.l.Debug("picked job",
zap.String("worker_id", workerID.String()),
zap.String("job_id", pg.Job.ID.String()),
zap.String("queue", queue))
2020-03-28 21:34:09 +00:00
2020-04-05 11:29:46 +00:00
return pg
2020-03-28 21:34:09 +00:00
}