Quantcast
Channel: Raspberry Pi Forums
Viewing all articles
Browse latest Browse all 8621

Teaching and learning resources • Re: Advent of Code 2024

$
0
0
With three kittens working in parallel, a parallel solution to day 24 should be possible. I think computing the loss function over all wire swaps is a task that could easily be distributed across a large number of cores.
The parallel version is finished. On a Pi 4B the results were

Code:

$ ./day24 -nl1 # Pi 4B at 1500 MHzAdvent of Code 2024 Day 24 Crossed WiresPart 1 The z wires output 61495910098126Part 2 Swap wires css,cwt,gdd,jmv,pqt,z05,z09,z37Total execution time 62.579 seconds.
for a parallel speedup of

230.506 / 62.579 = 3.683

which is 92 percent of the theoretical maximum. That's still longer than the 15 second cutoff, so I tried a 10-year-old 12-core dual-socket Xeon E5-2620 server.

Code:

$ export CHPL_RT_NUM_THREADS_PER_LOCALE=MAX_LOGICAL$ ./day24 -nl1 # Xeon E5-2620 12c/24tAdvent of Code 2024 Day 24 Crossed WiresPart 1 The z wires output 61495910098126Part 2 Swap wires css,cwt,gdd,jmv,pqt,z05,z09,z37Total execution time 6.65257 seconds.
For reference the code is

Code:

/*  Advent of Code 2024 Day 24 Crossed Wires    Written 2025 by Eric Olson    Parallel version.    Increase esize if this produces wrong answers.  Increate ksize    if it fails to produce any answers.*/use IO,Time,List,Sort,Random;type byte=uint(8);const esize=20;const ksize=10;var xstart=0,ystart=0,zstart=0;var xbits=0,ybits=0,zbits=0;var myrnd=new randomStream(int);proc check(ref i:int,ref s:bytes,const p:bytes):bool {    if s.size<i+p.size {        return false;    }    for k in 0..<p.size {        if s[k+i]!=p[k] {            return false;        }    }    i+=p.size;    return true;}proc scanint(ref i:int,ref s:bytes):int {    const d0=b"0"[0],d9=b"9"[0],minus=b"-"[0];    var r=0,iold=i,m=-1;    while i<s.size {        var d=s[i];        if iold==i&&d==minus {            m=1;        } else if d>=d0 && d<=d9 {            r=r*10-(d-d0);        } else {            break;        }        i+=1;    }    return m*r;}proc wire2num(ref stab:[]bytes,ref w:bytes):int {    var ai=0,bi=stab.size,cold=-1;    while true {        var ci=(bi+ai)/2;        if ci==cold {            break;        }        if stab[ci]<w {            ai=ci;        } else if stab[ci]>w {            bi=ci;        } else {            return ci;        }        cold=ci;    }    return -1;}proc setinput(ref x:int,ref s:bytes) {    var w=s[0..2];    var iold,i=1;    iold=i;    var j=scanint(i,s);    if iold==i {        writeln("Can't find input bitnumber in ",s);        exit(1);    }    if !check(i,s,b": ") {        writeln("Missing : delimiter in ",s);        exit(1);    }    iold=i;    var b=scanint(i,s);    if iold==i {        writeln("Can't find input bit value in ",s);        exit(1);    }    if b<0||b>1 {        writeln("Bit value out of range in ",s);        exit(1);    }    var p=(1<<j);    if s[0]==b"x"[0] {        if xbits<=j {            xbits=j+1;        }    }    if s[0]==b"y"[0] {        if ybits<=j {            ybits=j+1;        }    }    if b==1 {        x|=p;    } else {        x&=~p;    }}type optype=proc(x:bool,y:bool):bool;proc g_AND(x:bool,y:bool):bool {    return x&&y;}proc g_OR(x:bool,y:bool):bool {    return x||y;}proc g_XOR(x:bool,y:bool):bool {    return x!=y;}proc g_NUL(x:bool,y:bool):bool {    return false;}const g_OP:[0..2]optype=[g_AND,g_OR,g_XOR];const s_OP:[0..2]bytes=[b"AND",b"OR",b"XOR"];record gspec {    var x1,x2,y:int;    var f1,f2:bool;    var op:optype=g_NUL;}proc getgate(ref stab:[]bytes,ref s:bytes):gspec {    var v=s.split();    if v.size!=5 {        writeln("Gate ",s," doesn't have 4 fields!");        exit(1);    }    var r:gspec;    r.x1=wire2num(stab,v[0]);    for i in s_OP.domain {        if v[1]==s_OP[i] {            r.op=g_OP[i];        }    }    if r.op==g_NUL {        writeln("Gate ",s," unrecognizable operation!");        exit(1);    }    r.x2=wire2num(stab,v[2]);    if v[3]!=b"->" {        writeln("Gate ",s," missing -> assignment!");        exit(1);    }    r.y=wire2num(stab,v[4]);    return r;}proc mkorder(ref stab:[]bytes,ref gates:[]gspec):[]int {    var order:[0..<gates.size]int;    var op=0,oq=0;    proc setfn(ref fn:bool,c:byte){        if c==b"x"[0]||c==b"y"[0] {            fn=false;        } else {            fn=true;        }    }    for i in gates.domain {        setfn(gates[i].f1,stab[gates[i].x1][0]);        setfn(gates[i].f2,stab[gates[i].x2][0]);        if !gates[i].f1&&!gates[i].f2 {            order[oq]=i;            oq+=1;        }    }    var leaf:[0..<stab.size]list(int);    for i in gates.domain {        ref r=gates[i];        leaf[r.x1].pushBack(i);        leaf[r.x2].pushBack(i);    }    while op<oq {        var i=order[op];        ref r=gates[i];        if leaf[r.y].size==0 {            op+=1;        } else {            for j in leaf[r.y] {                if gates[j].x1==r.y {                    gates[j].f1=false;                    if !gates[j].f2 {                        order[oq]=j;                        oq+=1;                    }                } else if gates[j].x2==r.y {                    gates[j].f2=false;                    if !gates[j].f1 {                        order[oq]=j;                        oq+=1;                    }                }            }            op+=1;        }    }    return order;}proc rungates(ref stab:[]bytes,ref gates:[]gspec,xp:int,yp:int):int {    ref order=mkorder(stab,gates);    var x=xp,y=yp;    var mem:[0..<stab.size]bool;    var j=xstart;    while stab[j][0]==b"x"[0] {        mem[j]=(x&1)==1;        j+=1;        x>>=1;    }    j=ystart;    while stab[j][0]==b"y"[0] {        mem[j]=(y&1)==1;        j+=1;        y>>=1;    }    for i in order.domain {        ref r=gates[order[i]];        var x1=mem[r.x1];        var x2=mem[r.x2];        var y=r.op(x1,x2);        mem[r.y]=y;    }    j=zstart;    var p=1;    var p1=0;    while j<mem.size {        if mem[j] {            p1+=p;        }        p*=2;        j+=1;    }    return p1;}proc part1(ref stab:[]bytes,ref gates:[]gspec,        ref inputs:list(bytes)):int {    var x=0,y=0;    for s in inputs {        if s[0]==b"x"[0] {            setinput(x,s);        } else if s[0]==b"y"[0] {            setinput(y,s);        } else {            writeln("Unknown register input in ",s);            exit(1);        }    }    return rungates(stab,gates,x,y);}proc loss(ref stab:[]bytes,ref gates:[]gspec,        ref ensemble:[](int,int)):int {    var r=0;    for i in ensemble.domain {        var x=ensemble[i](0);        var y=ensemble[i](1);        var z=rungates(stab,gates,x,y);        var e=abs(x+y-z);        r+=e;        x=y;    }    return r;}record lspec {    var l:int;    var i,j:int;}proc tryswap(ref stab:[]bytes,ref gates:[]gspec,        ref ensemble:[](int,int),d:int):list(int) {    var losses:[0..<gates.size*(gates.size-1)/2]lspec;    var t=loss(stab,gates,ensemble);    var ncpus=here.maxTaskPar;    var ij:[0..ncpus](int,int);    ij[0]=(1,0);    for n in 1..ncpus {        var (i,j)=ij[n-1];        var na=i*(i-1)/2+j;        var nb=n*losses.size/ncpus;        var delta=nb-na;        while true {            if i-j>delta {                j+=delta;                ij[n]=(i,j);                break;            }            delta-=i-j;            i+=1; j=0;        }    }    record rspec {        var i,j:int;    }    var rfound:sync rspec;    var rfzero=new rspec(0,0);    rfound.writeEF(rfzero);    coforall n in 1..ncpus {        var gatesn=gates;        var (i,j)=ij[n-1],(ib,jb)=ij[n];        var k=i*(i-1)/2+j;        while true {            gatesn[i].y<=>gatesn[j].y;            var ts=loss(stab,gatesn,ensemble);            gatesn[i].y<=>gatesn[j].y;            if ts==0 {                rfound.writeFF(new rspec(i,j));            } else {                losses[k].i=i;                losses[k].j=j;                losses[k].l=ts;                k+=1;            }            j+=1;            if j>=i {                j=0; i+=1;            }            if (i==ib&&j==jb)||rfound.readFF()!=rfzero {                break;            }        }    }    var rf=rfound.readFF();    if rf!=rfzero {        var r:list(int);        r.pushBack(rf.i);        r.pushBack(rf.j);        return r;    }    var klen=losses.size;    sort(losses[0..<klen]);    if klen>ksize {        klen=ksize;    }    if d==1 {        var r:list(int);        return r;    }    for k in 0..<klen {        var i=losses[k].i;        var j=losses[k].j;        gates[i].y<=>gates[j].y;        var r=tryswap(stab,gates,ensemble,d-1);        gates[i].y<=>gates[j].y;        if r.size>0 {            r.pushBack(i);            r.pushBack(j);            return r;        }    }    var r:list(int);    return r;}proc mkensemble():[](int,int) {    var ensemble:[0..<esize](int,int);    var xmask=1<<xbits-1;    var ymask=1<<ybits-1;    for i in ensemble.domain {        var x=myrnd.next()&xmask;        var y=myrnd.next()&xmask;        ensemble[i]=(x,y);    }    return ensemble;}proc part2(ref stab:[]bytes,ref gates:[]gspec):bytes {    ref ensemble=mkensemble();    var r=tryswap(stab,gates,ensemble,4);    var wll:[0..<r.size]bytes;    for i in wll.domain {        wll[i]=stab[gates[r[i]].y];    }    sort(wll);    var p2:bytes;    for i in wll.domain {        if i>0 {            p2+=b","+wll[i];        } else {            p2=wll[i];        }    }    return p2;}proc dowork(){    var io=open("day24.txt",ioMode.r);    var fp=io.reader(locking=false);    var s:bytes;    var inputs:list(bytes);    while fp.readLine(s,stripNewline=true) {        if s.size==0 {            break;        }        inputs.pushBack(s);    }    var assign:list(bytes);    while fp.readLine(s,stripNewline=true) {        assign.pushBack(s);    }    var slen=inputs.size+assign.size;    var stab:[0..<slen]bytes;    var i=0;    for s in inputs {        stab[i]=s[0..2];        i+=1;    }    for s in assign {        var v=s.split();        stab[i]=v[4];        i+=1;    }    sort(stab);    var x00=b"x00";    xstart=wire2num(stab,x00);    var y00=b"y00";    ystart=wire2num(stab,y00);    var z00=b"z00";    zstart=wire2num(stab,z00);    zbits=stab.size-zstart;    var gates:[0..<assign.size]gspec;    i=0;    for s in assign {        gates[i]=getgate(stab,s);        i+=1;    }    var p1=part1(stab,gates,inputs);    var p2=part2(stab,gates);    writeln("Part 1 The z wires output ",p1);    writeln("Part 2 Swap wires ",p2);}proc main(){    var t:stopwatch;    t.start();    writeln("Advent of Code 2024 Day 24 Crossed Wires\n");    dowork();    t.stop();    writeln("\nTotal execution time ",t.elapsed()," seconds.");}
The main difficulty in parallelization was using a sync variable to cancel the other threads when a working swap of wires was found. Strangely, an (int,int) tuple is not allowed as a sync variable but a record containing two integers is fine. If I understood the internal workings of Chapel better, maybe that limitation would make more sense.

Statistics: Posted by ejolson — Sat Apr 26, 2025 5:27 am



Viewing all articles
Browse latest Browse all 8621

Trending Articles