Woohoo! The new code runs asDoing the topological sort only once per call to loss would reduce the allocations to 21 and also speed up the Chapel code.
Code:
$ ./go24 # Pi 4B at 1500 MHzAdvent of Code 2024 Day 24 Crossed Wires (GOMAXPROCS=4)Part 1 The z wires output 61495910098126Part 2 Swap wires css,cwt,gdd,jmv,pqt,z05,z09,z37Total execution time 5.212870886 seconds.The optimized program now reads as
Code:
/* Advent of Code 2024 Day 24 Crossed Wires Written 2025 by Eric Olson Parallel go version. One topological sort per call to loss. Increase esize if this produces wrong answers. Increate ksize if it fails to produce any answers. */package mainimport ("time"; . "os"; . "fmt"; "math/rand"; "strings"; "bufio" "sort"; "runtime"; "sync")var tictime time.Timefunc tic(){ tictime=time.Now()}func toc() float64 { now:=time.Now() elapsed:=now.Sub(tictime) return elapsed.Seconds()} type byte=uint8const esize=20const ksize=10var (xstart=0; ystart=0; zstart=0)var (xbits=0; ybits=0; zbits=0)var myrnd=rand.New(rand.NewSource(time.Now().UnixNano()))var ncpus=runtime.GOMAXPROCS(0)func check(ip *int,s string,p string) bool { var i=*ip if len(s)<i+len(p) { return false } for k:=0;k<len(p);k++ { if s[k+i]!=p[k] { return false } } i+=len(p) *ip=i return true}func scanint(ip *int,s string) int { var i=*ip const (d0='0'; d9='9'; minus='-') var (r=0; iold=i; m=-1) for i<len(s) { var d=s[i] if iold==i&&d==minus { m=1 } else if d>=d0 && d<=d9 { r=r*10-int(d-d0) } else { break } i+=1 } *ip=i return m*r}func wire2num(stab []string,w string) int { var (ai=0; bi=len(stab); cold=-1) for { var ci=(bi+ai)/2 if ci==cold { break } if stab[ci]<w { ai=ci } else if stab[ci]>w { bi=ci } else { return ci } cold=ci } return -1}func setinput(xp *int,s string) { var x=*xp var (i=1; iold=i) var j=scanint(&i,s) if iold==i { Printf("Can't find input bitnumber in %s\n",s) Exit(1) } if !check(&i,s,": ") { Printf("Missing : delimiter in %s\n",s) Exit(1) } iold=i var b=scanint(&i,s) if iold==i { Printf("Can't find input bit value in %s\n",s) Exit(1) } if b<0||b>1 { Printf("Bit value out of range in %s\n",s) Exit(1) } var p=(1<<j) if s[0]=='x' { if xbits<=j { xbits=j+1 } } if s[0]=='y' { if ybits<=j { ybits=j+1 } } if b==1 { x|=p } else { x&=^p } *xp=x}type optype=func(x bool,y bool) boolfunc g_AND(x bool,y bool) bool { return x&&y}func g_OR(x bool,y bool) bool { return x||y}func g_XOR(x bool,y bool) bool { return x!=y}var g_OP=[3]optype{g_AND,g_OR,g_XOR}var s_OP=[3]string{"AND","OR","XOR"}type gspec struct { x1,x2,y int f1,f2 bool op optype}func getgate(stab []string,s string) gspec { var v=strings.Split(s," ") if len(v)!=5 { Printf("Gate %s doesn't have 4 fields!\n",s) Exit(1) } var r gspec r.x1=wire2num(stab,v[0]) for i:=range s_OP { if v[1]==s_OP[i] { r.op=g_OP[i] } } if r.op==nil { Printf("Gate %s unrecognizable operation!\n",s) Exit(1) } r.x2=wire2num(stab,v[2]) if v[3]!="->" { Printf("Gate %s missing -> assignment!\n",s) Exit(1) } r.y=wire2num(stab,v[4]) return r}func mkorder(stab []string,gates []gspec) []int { var order=make([]int,len(gates)) var (op=0; oq=0) setfn:=func(fnp *bool,c byte){ if c=='x'||c=='y' { *fnp=false } else { *fnp=true } } for i:=range gates { setfn(&gates[i].f1,stab[gates[i].x1][0]) setfn(&gates[i].f2,stab[gates[i].x2][0]) if !gates[i].f1&&!gates[i].f2 { order[oq]=i oq+=1 } } var leaf=make([][]int,len(stab)) for i:=range gates { var r=&gates[i] leaf[r.x1]=append(leaf[r.x1],i) leaf[r.x2]=append(leaf[r.x2],i) } for op<oq { var i=order[op] var r=&gates[i] if len(leaf[r.y])==0 { op+=1 } else { for _,j:=range leaf[r.y] { if gates[j].x1==r.y { gates[j].f1=false if !gates[j].f2 { order[oq]=j oq+=1 } } else if gates[j].x2==r.y { gates[j].f2=false if !gates[j].f1 { order[oq]=j oq+=1 } } } op+=1 } } return order}func rungates(order []int,gates []gspec,x int,y int) int { var mem=make([]bool,len(gates)+ybits+xbits) for j:=0;j<xbits;j++ { mem[xstart+j]=(x&1)==1 x>>=1 } for j:=0;j<ybits;j++ { mem[ystart+j]=(y&1)==1 y>>=1 } for i:=range order { var r=gates[order[i]] var x1=mem[r.x1] var x2=mem[r.x2] var y=r.op(x1,x2) mem[r.y]=y } var p=1 var p1=0 for j:=0;j<zbits;j++ { if mem[zstart+j] { p1+=p } p*=2 } return p1}func part1(stab []string,gates []gspec,inputs []string) int { var (x=0; y=0) for _,s:=range inputs { if s[0]=='x' { setinput(&x,s) } else if s[0]=='y' { setinput(&y,s) } else { Printf("Unknown register input in %s\n",s) Exit(1) } } return rungates(mkorder(stab,gates),gates,x,y)}type xyspec struct { x,y int}func loss(stab []string,gates []gspec,ensemble []xyspec) int { var order=mkorder(stab,gates) var r=0 for i:=range ensemble { var x=ensemble[i].x var y=ensemble[i].y var z=rungates(order,gates,x,y) var e=x+y-z if e>=0 { r+=e } else { r-=e } x=y } return r}type lspec struct { l int i,j int}type rspec struct { i,j int}func tryswap(stab []string,gates []gspec, ensemble []xyspec,d int) []int { var losses=make([]lspec,len(gates)*(len(gates)-1)/2) var ij=make([]rspec,ncpus+1) ij[0]=rspec{1,0} for n:=1;n<=ncpus;n++ { var (i=ij[n-1].i; j=ij[n-1].j) var na=i*(i-1)/2+j var nb=n*len(losses)/ncpus var delta=nb-na for { if i-j>delta { j+=delta ij[n]=rspec{i,j} break } delta-=i-j i+=1; j=0 } } var rfound rspec var rfzero=rspec{0,0} rfound=rfzero mklosses:=func(n int,C *sync.WaitGroup) { var gatesn=make([]gspec,len(gates)) copy(gatesn,gates) var (i=ij[n-1].i; j=ij[n-1].j) var (ib=ij[n].i; jb=ij[n].j) var k=i*(i-1)/2+j for { gatesn[i].y,gatesn[j].y=gatesn[j].y,gatesn[i].y var ts=loss(stab,gatesn,ensemble) gatesn[i].y,gatesn[j].y=gatesn[j].y,gatesn[i].y if ts==0 { rfound=rspec{i,j} } else { losses[k].i=i losses[k].j=j losses[k].l=ts k+=1 } j+=1 if j>=i { j=0; i+=1 } if (i==ib&&j==jb)||rfound!=rfzero { break } } if C!=nil { C.Done() } } var C sync.WaitGroup for n:=1;n<ncpus;n++ { C.Add(1) go mklosses(n,&C) } mklosses(ncpus,nil) C.Wait() var rf=rfound if rf!=rfzero { var r=make([]int,2) r[0]=rf.i r[1]=rf.j return r } var klen=len(losses) losseslt:=func(i,j int) bool { return losses[i].l<losses[j].l } sort.Slice(losses,losseslt) if klen>ksize { klen=ksize } if d==1 { var r []int return r } for k:=0;k<klen;k++ { var i=losses[k].i var j=losses[k].j gates[i].y,gates[j].y=gates[j].y,gates[i].y var r=tryswap(stab,gates,ensemble,d-1) gates[i].y,gates[j].y=gates[j].y,gates[i].y if len(r)>0 { r=append(r,i) r=append(r,j) return r } } var r []int return r}func mkensemble() []xyspec { var ensemble=make([]xyspec,esize) var xmask=1<<xbits-1 var ymask=1<<ybits-1 for i:=range ensemble { var x=myrnd.Int()&xmask var y=myrnd.Int()&ymask ensemble[i]=xyspec{x,y} } return ensemble}func part2(stab []string,gates []gspec) string { var ensemble=mkensemble() var r=tryswap(stab,gates,ensemble,4) var wll=make([]string,len(r)) for i:=range wll { wll[i]=stab[gates[r[i]].y] } sort.Strings(wll) var p2="" for i:=range wll { if i>0 { p2+=","+wll[i] } else { p2=wll[i] } } return p2}func dowork(){ raw,err:=Open("day24.txt") for err!=nil { Printf("Error opening input for reading!\n") Exit(1) } fp:=bufio.NewScanner(raw) var inputs []string for fp.Scan() { s:=fp.Text() if len(s)==0 { break } inputs=append(inputs,s) } var assign []string for fp.Scan() { s:=fp.Text() assign=append(assign,s) } var slen=len(inputs)+len(assign) var stab=make([]string,slen) var i=0 for _,s:=range inputs { stab[i]=s[0:3] i+=1 } for _,s:=range assign { var v=strings.Split(s," ") stab[i]=v[4] i+=1 } sort.Strings(stab) var x00="x00" xstart=wire2num(stab,x00) var y00="y00" ystart=wire2num(stab,y00) var z00="z00" zstart=wire2num(stab,z00) zbits=len(stab)-zstart var gates=make([]gspec,len(assign)) i=0 for _,s:=range assign { gates[i]=getgate(stab,s) i+=1 } var p1=part1(stab,gates,inputs) var p2=part2(stab,gates) Printf("Part 1 The z wires output %d\n",p1) Printf("Part 2 Swap wires %s\n",p2)}func main(){ tic() Printf("Advent of Code 2024 Day 24 Crossed Wires "+ "(GOMAXPROCS=%d)\n\n",ncpus) dowork() t:=toc() Printf("\nTotal execution time %g seconds.\n",t) Exit(0)}Statistics: Posted by ejolson — Wed Apr 30, 2025 4:04 am