Add scheduler

This commit is contained in:
Fedor Korotkiy 2020-03-29 00:34:09 +03:00
parent 8a0fd66a89
commit a60b6dfad1
18 changed files with 614 additions and 183 deletions

View file

@ -15,6 +15,7 @@ import (
"gitlab.com/slon/shad-go/distbuild/pkg/client" "gitlab.com/slon/shad-go/distbuild/pkg/client"
"gitlab.com/slon/shad-go/distbuild/pkg/dist" "gitlab.com/slon/shad-go/distbuild/pkg/dist"
"gitlab.com/slon/shad-go/distbuild/pkg/filecache" "gitlab.com/slon/shad-go/distbuild/pkg/filecache"
"gitlab.com/slon/shad-go/distbuild/pkg/proto"
"gitlab.com/slon/shad-go/distbuild/pkg/worker" "gitlab.com/slon/shad-go/distbuild/pkg/worker"
"gitlab.com/slon/shad-go/tools/testtool" "gitlab.com/slon/shad-go/tools/testtool"
@ -80,6 +81,9 @@ func newEnv(t *testing.T) (e *env, cancel func()) {
coordinatorCache, coordinatorCache,
) )
router := http.NewServeMux()
router.Handle("/coordinator/", http.StripPrefix("/coordinator", env.Coordinator))
for i := 0; i < nWorkers; i++ { for i := 0; i < nWorkers; i++ {
workerName := fmt.Sprintf("worker%d", i) workerName := fmt.Sprintf("worker%d", i)
workerDir := filepath.Join(env.RootDir, workerName) workerDir := filepath.Join(env.RootDir, workerName)
@ -92,7 +96,11 @@ func newEnv(t *testing.T) (e *env, cancel func()) {
artifacts, err = artifact.NewCache(filepath.Join(workerDir, "artifacts")) artifacts, err = artifact.NewCache(filepath.Join(workerDir, "artifacts"))
require.NoError(t, err) require.NoError(t, err)
workerPrefix := fmt.Sprintf("/worker/%d", i)
workerID := proto.WorkerID("http://" + addr + workerPrefix)
w := worker.New( w := worker.New(
workerID,
coordinatorEndpoint, coordinatorEndpoint,
env.Logger.Named(workerName), env.Logger.Named(workerName),
fileCache, fileCache,
@ -100,19 +108,13 @@ func newEnv(t *testing.T) (e *env, cancel func()) {
) )
env.Workers = append(env.Workers, w) env.Workers = append(env.Workers, w)
}
mux := http.NewServeMux() router.Handle(workerPrefix+"/", http.StripPrefix(workerPrefix, w))
mux.Handle("/coordinator/", http.StripPrefix("/coordinator", env.Coordinator))
for i, w := range env.Workers {
workerPrefix := fmt.Sprintf("/worker/%d", i)
mux.Handle(workerPrefix+"/", http.StripPrefix(workerPrefix, w))
} }
env.HTTP = &http.Server{ env.HTTP = &http.Server{
Addr: addr, Addr: addr,
Handler: mux, Handler: router,
} }
lsn, err := net.Listen("tcp", env.HTTP.Addr) lsn, err := net.Listen("tcp", env.HTTP.Addr)

View file

@ -1,46 +1,5 @@
package build package build
import (
"crypto/sha1"
"encoding"
"encoding/hex"
"fmt"
"path/filepath"
)
type ID [sha1.Size]byte
var (
_ = encoding.TextMarshaler(ID{})
_ = encoding.TextUnmarshaler(&ID{})
)
func (id ID) String() string {
return hex.EncodeToString(id[:])
}
func (id ID) Path() string {
return filepath.Join(hex.EncodeToString(id[:1]), hex.EncodeToString(id[:]))
}
func (id ID) MarshalText() ([]byte, error) {
return []byte(hex.EncodeToString(id[:])), nil
}
func (id *ID) UnmarshalText(b []byte) error {
raw, err := hex.DecodeString(string(b))
if err != nil {
return err
}
if len(raw) != len(id) {
return fmt.Errorf("invalid id size: %q", b)
}
copy(id[:], raw)
return nil
}
// Job описывает одну вершину графа сборки. // Job описывает одну вершину графа сборки.
type Job struct { type Job struct {
// ID задаёт уникальный идентификатор джоба. // ID задаёт уникальный идентификатор джоба.

52
distbuild/pkg/build/id.go Normal file
View file

@ -0,0 +1,52 @@
package build
import (
"crypto/rand"
"crypto/sha1"
"encoding"
"encoding/hex"
"fmt"
"path/filepath"
)
type ID [sha1.Size]byte
var (
_ = encoding.TextMarshaler(ID{})
_ = encoding.TextUnmarshaler(&ID{})
)
func (id ID) String() string {
return hex.EncodeToString(id[:])
}
func (id ID) Path() string {
return filepath.Join(hex.EncodeToString(id[:1]), hex.EncodeToString(id[:]))
}
func (id ID) MarshalText() ([]byte, error) {
return []byte(hex.EncodeToString(id[:])), nil
}
func (id *ID) UnmarshalText(b []byte) error {
raw, err := hex.DecodeString(string(b))
if err != nil {
return err
}
if len(raw) != len(id) {
return fmt.Errorf("invalid id size: %q", b)
}
copy(id[:], raw)
return nil
}
func NewID() ID {
var id ID
_, err := rand.Read(id[:])
if err != nil {
panic(fmt.Sprintf("crypto/rand is unavailable: %v", err))
}
return id
}

View file

@ -0,0 +1,31 @@
package build
// TopSort sorts jobs in topological order assuming dependency graph contains no cycles.
func TopSort(jobs []Job) []Job {
var sorted []Job
visited := make([]bool, len(jobs))
jobIDIndex := map[ID]int{}
for i, j := range jobs {
jobIDIndex[j.ID] = i
}
var visit func(jobIndex int)
visit = func(jobIndex int) {
if visited[jobIndex] {
return
}
visited[jobIndex] = true
for _, dep := range jobs[jobIndex].Deps {
visit(jobIDIndex[dep])
}
sorted = append(sorted, jobs[jobIndex])
}
for i := range jobs {
visit(i)
}
return sorted
}

View file

@ -0,0 +1,29 @@
package build
import (
"testing"
"github.com/stretchr/testify/require"
)
func TestTopSort(t *testing.T) {
jobs := []Job{
{
ID: ID{'a'},
Deps: []ID{{'b'}},
},
{
ID: ID{'b'},
Deps: []ID{{'c'}},
},
{
ID: ID{'c'},
},
}
sorted := TopSort(jobs)
require.Equal(t, 3, len(sorted))
require.Equal(t, ID{'c'}, sorted[0].ID)
require.Equal(t, ID{'b'}, sorted[1].ID)
require.Equal(t, ID{'a'}, sorted[2].ID)
}

36
distbuild/pkg/dist/build.go vendored Normal file
View file

@ -0,0 +1,36 @@
package dist
import (
"context"
"gitlab.com/slon/shad-go/distbuild/pkg/build"
"gitlab.com/slon/shad-go/distbuild/pkg/proto"
)
type Build struct {
ID build.ID
Graph *build.Graph
coordinator *Coordinator
uploadComplete chan struct{}
}
func NewBuild(graph *build.Graph, coordinator *Coordinator) *Build {
id := build.NewID()
return &Build{
ID: id,
Graph: graph,
coordinator: coordinator,
uploadComplete: make(chan struct{}),
}
}
func (b *Build) Run(ctx context.Context, onStatusUpdate func(update proto.StatusUpdate) error) error {
panic("implement me")
}
func (b *Build) UploadComplete() {
close(b.uploadComplete)
}

View file

@ -13,19 +13,22 @@ import (
"gitlab.com/slon/shad-go/distbuild/pkg/build" "gitlab.com/slon/shad-go/distbuild/pkg/build"
"gitlab.com/slon/shad-go/distbuild/pkg/filecache" "gitlab.com/slon/shad-go/distbuild/pkg/filecache"
"gitlab.com/slon/shad-go/distbuild/pkg/proto" "gitlab.com/slon/shad-go/distbuild/pkg/proto"
"gitlab.com/slon/shad-go/distbuild/pkg/scheduler"
) )
type Build struct {
}
type Coordinator struct { type Coordinator struct {
log *zap.Logger log *zap.Logger
mux *http.ServeMux mux *http.ServeMux
fileCache *filecache.Cache fileCache *filecache.Cache
mu sync.Mutex mu sync.Mutex
scheduledJobs map[build.ID]*scheduledJob builds map[build.ID]*Build
queue []*scheduledJob scheduler *scheduler.Scheduler
}
var defaultConfig = scheduler.Config{
CacheTimeout: time.Millisecond * 10,
DepsTimeout: time.Millisecond * 100,
} }
func NewCoordinator( func NewCoordinator(
@ -37,10 +40,12 @@ func NewCoordinator(
mux: http.NewServeMux(), mux: http.NewServeMux(),
fileCache: fileCache, fileCache: fileCache,
scheduledJobs: make(map[build.ID]*scheduledJob), builds: make(map[build.ID]*Build),
scheduler: scheduler.NewScheduler(log, defaultConfig),
} }
c.mux.HandleFunc("/build", c.Build) c.mux.HandleFunc("/build", c.Build)
c.mux.HandleFunc("/signal", c.Signal)
c.mux.HandleFunc("/heartbeat", c.Heartbeat) c.mux.HandleFunc("/heartbeat", c.Heartbeat)
return c return c
} }
@ -69,12 +74,17 @@ func (c *Coordinator) doBuild(w http.ResponseWriter, r *http.Request) error {
for _, job := range g.Jobs { for _, job := range g.Jobs {
job := job job := job
s := c.scheduleJob(&job) s := c.scheduler.ScheduleJob(&job)
<-s.done
select {
case <-r.Context().Done():
return r.Context().Err()
case <-s.Finished:
}
c.log.Debug("job finished", zap.String("job_id", job.ID.String())) c.log.Debug("job finished", zap.String("job_id", job.ID.String()))
update := proto.StatusUpdate{JobFinished: s.finished} update := proto.StatusUpdate{JobFinished: s.Result}
if err := enc.Encode(update); err != nil { if err := enc.Encode(update); err != nil {
return err return err
} }
@ -84,6 +94,18 @@ func (c *Coordinator) doBuild(w http.ResponseWriter, r *http.Request) error {
return enc.Encode(update) return enc.Encode(update)
} }
func (c *Coordinator) Signal(w http.ResponseWriter, r *http.Request) {
c.log.Debug("build signal started")
if err := c.doHeartbeat(w, r); err != nil {
c.log.Error("build signal failed", zap.Error(err))
w.WriteHeader(http.StatusBadRequest)
_, _ = w.Write([]byte(err.Error()))
return
}
c.log.Debug("build signal finished")
}
func (c *Coordinator) Build(w http.ResponseWriter, r *http.Request) { func (c *Coordinator) Build(w http.ResponseWriter, r *http.Request) {
if err := c.doBuild(w, r); err != nil { if err := c.doBuild(w, r); err != nil {
c.log.Error("build failed", zap.Error(err)) c.log.Error("build failed", zap.Error(err))
@ -100,34 +122,21 @@ func (c *Coordinator) doHeartbeat(w http.ResponseWriter, r *http.Request) error
return fmt.Errorf("invalid request: %w", err) return fmt.Errorf("invalid request: %w", err)
} }
c.scheduler.RegisterWorker(req.WorkerID)
for _, job := range req.FinishedJob { for _, job := range req.FinishedJob {
job := job job := job
scheduled, ok := c.lookupJob(job.ID) c.scheduler.OnJobComplete(req.WorkerID, job.ID, &job)
if !ok {
continue
} }
c.log.Debug("job finished") rsp := proto.HeartbeatResponse{
scheduled.finish(&job) JobsToRun: map[build.ID]proto.JobSpec{},
} }
var rsp proto.HeartbeatResponse job := c.scheduler.PickJob(req.WorkerID, r.Context().Done())
if job != nil {
var job *build.Job rsp.JobsToRun[job.Job.ID] = proto.JobSpec{Job: *job.Job}
for i := 0; i < 10; i++ {
var ok bool
job, ok = c.pickJob()
if ok {
rsp.JobsToRun = map[build.ID]proto.JobSpec{
job.ID: {Job: *job},
}
break
}
time.Sleep(time.Millisecond)
} }
if err := json.NewEncoder(w).Encode(rsp); err != nil { if err := json.NewEncoder(w).Encode(rsp); err != nil {

View file

@ -1,68 +0,0 @@
package dist
import (
"sync"
"gitlab.com/slon/shad-go/distbuild/pkg/build"
"gitlab.com/slon/shad-go/distbuild/pkg/proto"
)
type scheduledJob struct {
job *build.Job
finished *proto.JobResult
mu sync.Mutex
done chan struct{}
}
func newScheduledJob(job *build.Job) *scheduledJob {
return &scheduledJob{
job: job,
done: make(chan struct{}),
}
}
func (s *scheduledJob) finish(f *proto.JobResult) {
s.mu.Lock()
defer s.mu.Unlock()
if s.finished == nil {
s.finished = f
close(s.done)
}
}
func (c *Coordinator) scheduleJob(job *build.Job) *scheduledJob {
c.mu.Lock()
defer c.mu.Unlock()
if scheduled, ok := c.scheduledJobs[job.ID]; ok {
return scheduled
}
scheduled := newScheduledJob(job)
c.scheduledJobs[job.ID] = scheduled
c.queue = append(c.queue, scheduled)
return scheduled
}
func (c *Coordinator) pickJob() (*build.Job, bool) {
c.mu.Lock()
defer c.mu.Unlock()
if len(c.queue) == 0 {
return nil, false
}
job := c.queue[0].job
c.queue = c.queue[1:]
return job, true
}
func (c *Coordinator) lookupJob(id build.ID) (*scheduledJob, bool) {
c.mu.Lock()
defer c.mu.Unlock()
scheduled, ok := c.scheduledJobs[id]
return scheduled, ok
}

View file

@ -1,15 +0,0 @@
package dist
import (
"gitlab.com/slon/shad-go/distbuild/pkg/build"
"gitlab.com/slon/shad-go/distbuild/pkg/proto"
)
type Cluster struct {
sourceFiles map[build.ID]map[proto.WorkerID]struct{}
artifacts map[build.ID]map[proto.WorkerID]struct{}
}
func (c *Cluster) FindOptimalWorkers(task build.ID, sources, deps []build.ID) []proto.WorkerID {
panic("implement me")
}

View file

@ -9,6 +9,7 @@ type MissingSources struct {
} }
type StatusUpdate struct { type StatusUpdate struct {
SourcesMissing *MissingSources
JobFinished *JobResult JobFinished *JobResult
BuildFailed *BuildFailed BuildFailed *BuildFailed
BuildFinished *BuildFinished BuildFinished *BuildFinished

View file

@ -20,6 +20,10 @@ type JobResult struct {
type WorkerID string type WorkerID string
func (w WorkerID) String() string {
return string(w)
}
type HeartbeatRequest struct { type HeartbeatRequest struct {
// WorkerID задаёт персистентный идентификатор данного воркера. // WorkerID задаёт персистентный идентификатор данного воркера.
// //

View file

@ -0,0 +1,276 @@
package scheduler
import (
"sync"
"time"
"go.uber.org/zap"
"gitlab.com/slon/shad-go/distbuild/pkg/build"
"gitlab.com/slon/shad-go/distbuild/pkg/proto"
)
type PendingJob struct {
Job *build.Job
Result *proto.JobResult
Finished chan struct{}
mu sync.Mutex
pickedUp chan struct{}
}
func (p *PendingJob) finish(res *proto.JobResult) {
p.Result = res
close(p.Finished)
}
func (p *PendingJob) pickUp() bool {
p.mu.Lock()
defer p.mu.Unlock()
select {
case <-p.pickedUp:
return false
default:
close(p.pickedUp)
return true
}
}
type jobQueue struct {
mu sync.Mutex
jobs []*PendingJob
}
func (q *jobQueue) put(job *PendingJob) {
q.mu.Lock()
defer q.mu.Unlock()
q.jobs = append(q.jobs, job)
}
func (q *jobQueue) pop() *PendingJob {
q.mu.Lock()
defer q.mu.Unlock()
if len(q.jobs) == 0 {
return nil
}
job := q.jobs[0]
q.jobs = q.jobs[1:]
return job
}
type Config struct {
CacheTimeout time.Duration
DepsTimeout time.Duration
}
type Scheduler struct {
l *zap.Logger
config Config
mu sync.Mutex
cachedJobs map[build.ID]map[proto.WorkerID]struct{}
pendingJobs map[build.ID]*PendingJob
cacheLocalQueue map[proto.WorkerID]*jobQueue
depLocalQueue map[proto.WorkerID]*jobQueue
globalQueue chan *PendingJob
}
func NewScheduler(l *zap.Logger, config Config) *Scheduler {
return &Scheduler{
l: l,
config: config,
cachedJobs: make(map[build.ID]map[proto.WorkerID]struct{}),
pendingJobs: make(map[build.ID]*PendingJob),
cacheLocalQueue: make(map[proto.WorkerID]*jobQueue),
depLocalQueue: make(map[proto.WorkerID]*jobQueue),
globalQueue: make(chan *PendingJob),
}
}
func (c *Scheduler) RegisterWorker(workerID proto.WorkerID) {
c.mu.Lock()
defer c.mu.Unlock()
_, ok := c.cacheLocalQueue[workerID]
if ok {
return
}
c.cacheLocalQueue[workerID] = new(jobQueue)
c.depLocalQueue[workerID] = new(jobQueue)
}
func (c *Scheduler) OnJobComplete(workerID proto.WorkerID, jobID build.ID, res *proto.JobResult) bool {
c.l.Debug("job completed", zap.String("worker_id", workerID.String()), zap.String("job_id", jobID.String()))
c.mu.Lock()
pendingJob, pendingFound := c.pendingJobs[jobID]
if pendingFound {
delete(c.pendingJobs, jobID)
}
job, ok := c.cachedJobs[jobID]
if !ok {
job = make(map[proto.WorkerID]struct{})
c.cachedJobs[jobID] = job
}
job[workerID] = struct{}{}
c.mu.Unlock()
if !pendingFound {
return false
}
c.l.Debug("finishing pending job", zap.String("job_id", jobID.String()))
pendingJob.finish(res)
return true
}
func (c *Scheduler) findOptimalWorkers(jobID build.ID, deps []build.ID) (cacheLocal, depLocal []proto.WorkerID) {
depLocalSet := map[proto.WorkerID]struct{}{}
c.mu.Lock()
defer c.mu.Unlock()
for workerID := range c.cachedJobs[jobID] {
cacheLocal = append(cacheLocal, workerID)
}
for _, dep := range deps {
for workerID := range c.cachedJobs[dep] {
if _, ok := depLocalSet[workerID]; !ok {
depLocal = append(depLocal, workerID)
depLocalSet[workerID] = struct{}{}
}
}
}
return
}
var timeAfter = time.After
func (c *Scheduler) doScheduleJob(job *PendingJob) {
cacheLocal, depLocal := c.findOptimalWorkers(job.Job.ID, job.Job.Deps)
if len(cacheLocal) != 0 {
c.mu.Lock()
for _, workerID := range cacheLocal {
c.cacheLocalQueue[workerID].put(job)
}
c.mu.Unlock()
c.l.Debug("job is put into cache-local queues", zap.String("job_id", job.Job.ID.String()))
select {
case <-job.pickedUp:
c.l.Debug("job picked", zap.String("job_id", job.Job.ID.String()))
return
case <-timeAfter(c.config.CacheTimeout):
}
}
if len(depLocal) != 0 {
c.mu.Lock()
for _, workerID := range depLocal {
c.depLocalQueue[workerID].put(job)
}
c.mu.Unlock()
c.l.Debug("job is put into dep-local queues", zap.String("job_id", job.Job.ID.String()))
select {
case <-job.pickedUp:
c.l.Debug("job picked", zap.String("job_id", job.Job.ID.String()))
return
case <-timeAfter(c.config.DepsTimeout):
}
}
c.l.Debug("job is put into global queue", zap.String("job_id", job.Job.ID.String()))
select {
case c.globalQueue <- job:
case <-job.pickedUp:
}
c.l.Debug("job picked", zap.String("job_id", job.Job.ID.String()))
}
func (c *Scheduler) ScheduleJob(job *build.Job) *PendingJob {
c.mu.Lock()
pendingJob, running := c.pendingJobs[job.ID]
if !running {
pendingJob = &PendingJob{
Job: job,
Finished: make(chan struct{}),
pickedUp: make(chan struct{}),
}
c.pendingJobs[job.ID] = pendingJob
}
c.mu.Unlock()
if !running {
c.l.Debug("job is scheduled", zap.String("job_id", job.ID.String()))
go c.doScheduleJob(pendingJob)
} else {
c.l.Debug("job is pending", zap.String("job_id", job.ID.String()))
}
return pendingJob
}
func (c *Scheduler) PickJob(workerID proto.WorkerID, canceled <-chan struct{}) *PendingJob {
c.l.Debug("picking next job", zap.String("worker_id", workerID.String()))
var cacheLocal, depLocal *jobQueue
c.mu.Lock()
cacheLocal = c.cacheLocalQueue[workerID]
depLocal = c.depLocalQueue[workerID]
c.mu.Unlock()
for {
job := cacheLocal.pop()
if job == nil {
break
}
if job.pickUp() {
c.l.Debug("picked job from cache-local queue", zap.String("worker_id", workerID.String()), zap.String("job_id", job.Job.ID.String()))
return job
}
}
for {
job := depLocal.pop()
if job == nil {
break
}
if job.pickUp() {
c.l.Debug("picked job from dep-local queue", zap.String("worker_id", workerID.String()), zap.String("job_id", job.Job.ID.String()))
return job
}
}
for {
select {
case job := <-c.globalQueue:
if job.pickUp() {
c.l.Debug("picked job from global queue", zap.String("worker_id", workerID.String()), zap.String("job_id", job.Job.ID.String()))
return job
}
case <-canceled:
return nil
}
}
}

View file

@ -0,0 +1,113 @@
package scheduler
import (
"testing"
"time"
"github.com/jonboulle/clockwork"
"github.com/stretchr/testify/require"
"go.uber.org/goleak"
"go.uber.org/zap/zaptest"
"gitlab.com/slon/shad-go/distbuild/pkg/build"
"gitlab.com/slon/shad-go/distbuild/pkg/proto"
)
const (
workerID0 proto.WorkerID = "w0"
)
func TestScheduler(t *testing.T) {
defer goleak.VerifyNone(t)
clock := clockwork.NewFakeClock()
timeAfter = clock.After
defer func() { timeAfter = time.After }()
config := Config{
CacheTimeout: time.Second,
DepsTimeout: time.Minute,
}
t.Run("SingleJob", func(t *testing.T) {
s := NewScheduler(zaptest.NewLogger(t), config)
job0 := &build.Job{ID: build.NewID()}
pendingJob0 := s.ScheduleJob(job0)
s.RegisterWorker(workerID0)
pickerJob := s.PickJob(workerID0, nil)
require.Equal(t, pendingJob0, pickerJob)
result := &proto.JobResult{ID: job0.ID, ExitCode: 0}
s.OnJobComplete(workerID0, job0.ID, result)
select {
case <-pendingJob0.Finished:
require.Equal(t, pendingJob0.Result, result)
default:
t.Fatalf("job0 is not finished")
}
})
t.Run("PickJobTimeout", func(t *testing.T) {
s := NewScheduler(zaptest.NewLogger(t), config)
canceled := make(chan struct{})
close(canceled)
s.RegisterWorker(workerID0)
require.Nil(t, s.PickJob(workerID0, canceled))
})
t.Run("CacheLocalScheduling", func(t *testing.T) {
s := NewScheduler(zaptest.NewLogger(t), config)
job0 := &build.Job{ID: build.NewID()}
job1 := &build.Job{ID: build.NewID()}
s.RegisterWorker(workerID0)
s.OnJobComplete(workerID0, job0.ID, &proto.JobResult{})
pendingJob1 := s.ScheduleJob(job1)
pendingJob0 := s.ScheduleJob(job0)
// job0 scheduling should be blocked on CacheTimeout
clock.BlockUntil(1)
pickedJob := s.PickJob(workerID0, nil)
require.Equal(t, pendingJob0, pickedJob)
pickedJob = s.PickJob(workerID0, nil)
require.Equal(t, pendingJob1, pickedJob)
clock.Advance(time.Hour)
})
t.Run("DependencyLocalScheduling", func(t *testing.T) {
s := NewScheduler(zaptest.NewLogger(t), config)
job0 := &build.Job{ID: build.NewID()}
job1 := &build.Job{ID: build.NewID(), Deps: []build.ID{job0.ID}}
job2 := &build.Job{ID: build.NewID()}
s.RegisterWorker(workerID0)
s.OnJobComplete(workerID0, job0.ID, &proto.JobResult{})
pendingJob2 := s.ScheduleJob(job2)
pendingJob1 := s.ScheduleJob(job1)
// job1 should be blocked on DepsTimeout
clock.BlockUntil(1)
pickedJob := s.PickJob(workerID0, nil)
require.Equal(t, pendingJob1, pickedJob)
pickedJob = s.PickJob(workerID0, nil)
require.Equal(t, pendingJob2, pickedJob)
clock.Advance(time.Hour)
})
}

View file

@ -32,7 +32,9 @@ func (w *Worker) getJobFromCache(jobID build.ID) (*proto.JobResult, error) {
} }
defer unlock() defer unlock()
res := &proto.JobResult{} res := &proto.JobResult{
ID: jobID,
}
exitCodeStr, err := ioutil.ReadFile(filepath.Join(aRoot, exitCodeFileName)) exitCodeStr, err := ioutil.ReadFile(filepath.Join(aRoot, exitCodeFileName))
if err != nil { if err != nil {

View file

@ -1,8 +1,6 @@
package worker package worker
import ( import (
"go.uber.org/zap"
"gitlab.com/slon/shad-go/distbuild/pkg/proto" "gitlab.com/slon/shad-go/distbuild/pkg/proto"
) )
@ -11,6 +9,7 @@ func (w *Worker) buildHeartbeat() *proto.HeartbeatRequest {
defer w.mu.Unlock() defer w.mu.Unlock()
req := &proto.HeartbeatRequest{ req := &proto.HeartbeatRequest{
WorkerID: w.id,
FinishedJob: w.finishedJobs, FinishedJob: w.finishedJobs,
} }
@ -19,8 +18,6 @@ func (w *Worker) buildHeartbeat() *proto.HeartbeatRequest {
} }
func (w *Worker) jobFinished(job *proto.JobResult) { func (w *Worker) jobFinished(job *proto.JobResult) {
w.log.Debug("job finished", zap.String("job_id", job.ID.String()))
w.mu.Lock() w.mu.Lock()
defer w.mu.Unlock() defer w.mu.Unlock()

View file

@ -18,6 +18,7 @@ import (
) )
type Worker struct { type Worker struct {
id proto.WorkerID
coordinatorEndpoint string coordinatorEndpoint string
log *zap.Logger log *zap.Logger
@ -34,12 +35,14 @@ type Worker struct {
} }
func New( func New(
workerID proto.WorkerID,
coordinatorEndpoint string, coordinatorEndpoint string,
log *zap.Logger, log *zap.Logger,
fileCache *filecache.Cache, fileCache *filecache.Cache,
artifacts *artifact.Cache, artifacts *artifact.Cache,
) *Worker { ) *Worker {
return &Worker{ return &Worker{
id: workerID,
coordinatorEndpoint: coordinatorEndpoint, coordinatorEndpoint: coordinatorEndpoint,
log: log, log: log,
fileCache: fileCache, fileCache: fileCache,
@ -54,27 +57,19 @@ func (w *Worker) ServeHTTP(rw http.ResponseWriter, r *http.Request) {
} }
func (w *Worker) recover() error { func (w *Worker) recover() error {
//err := w.fileCache.Range(func(file build.ID) error {
// w.newSources = append(w.newSources, file)
// return nil
//})
//if err != nil {
// return err
//}
return w.artifacts.Range(func(file build.ID) error { return w.artifacts.Range(func(file build.ID) error {
w.newArtifacts = append(w.newArtifacts, file) w.newArtifacts = append(w.newArtifacts, file)
return nil return nil
}) })
} }
func (w *Worker) sendHeartbeat(req *proto.HeartbeatRequest) (*proto.HeartbeatResponse, error) { func (w *Worker) sendHeartbeat(ctx context.Context, req *proto.HeartbeatRequest) (*proto.HeartbeatResponse, error) {
reqJS, err := json.Marshal(req) reqJS, err := json.Marshal(req)
if err != nil { if err != nil {
return nil, err return nil, err
} }
httpReq, err := http.NewRequest("POST", w.coordinatorEndpoint+"/heartbeat", bytes.NewBuffer(reqJS)) httpReq, err := http.NewRequestWithContext(ctx, "POST", w.coordinatorEndpoint+"/heartbeat", bytes.NewBuffer(reqJS))
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -104,7 +99,7 @@ func (w *Worker) Run(ctx context.Context) error {
for { for {
w.log.Debug("sending heartbeat request") w.log.Debug("sending heartbeat request")
rsp, err := w.sendHeartbeat(w.buildHeartbeat()) rsp, err := w.sendHeartbeat(ctx, w.buildHeartbeat())
if err != nil { if err != nil {
if ctx.Err() != nil { if ctx.Err() != nil {
return ctx.Err() return ctx.Err()
@ -118,13 +113,18 @@ func (w *Worker) Run(ctx context.Context) error {
for _, spec := range rsp.JobsToRun { for _, spec := range rsp.JobsToRun {
spec := spec spec := spec
w.log.Debug("running job", zap.String("job_id", spec.Job.ID.String()))
result, err := w.runJob(ctx, &spec) result, err := w.runJob(ctx, &spec)
if err != nil { if err != nil {
errStr := fmt.Sprintf("job %s failed: %v", spec.Job.ID, err) errStr := fmt.Sprintf("job %s failed: %v", spec.Job.ID, err)
w.log.Debug("job failed", zap.String("job_id", spec.Job.ID.String()), zap.Error(err))
w.jobFinished(&proto.JobResult{ID: spec.Job.ID, Error: &errStr}) w.jobFinished(&proto.JobResult{ID: spec.Job.ID, Error: &errStr})
continue continue
} }
w.log.Debug("job finished", zap.String("job_id", spec.Job.ID.String()))
w.jobFinished(result) w.jobFinished(result)
} }
} }

1
go.mod
View file

@ -8,6 +8,7 @@ require (
github.com/golang/mock v1.4.1 github.com/golang/mock v1.4.1
github.com/gorilla/handlers v1.4.2 github.com/gorilla/handlers v1.4.2
github.com/gorilla/mux v1.7.4 github.com/gorilla/mux v1.7.4
github.com/jonboulle/clockwork v0.1.0
github.com/spf13/cobra v0.0.5 github.com/spf13/cobra v0.0.5
github.com/stretchr/testify v1.4.0 github.com/stretchr/testify v1.4.0
go.uber.org/goleak v1.0.0 go.uber.org/goleak v1.0.0

2
go.sum
View file

@ -35,6 +35,8 @@ github.com/gorilla/mux v1.7.4/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB7
github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM= github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM=
github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
github.com/jonboulle/clockwork v0.1.0 h1:VKV+ZcuP6l3yW9doeqz6ziZGgcynBVQO+obU0+0hcPo=
github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=