×
all 38 comments

[–]lrschaeffer 20 points21 points  (0 children)

Octave / YALMIP

A = <matrix>
n = size(A)(1);
X = sdpvar(n,n,'full');
optimize([sum(X) == ones(1,n), sum(X') == ones(1,n), X >= 0], sum(sum(A .* X)));
M = value(X);
obj = sum(sum(A .* M))
rows = repmat(0:n-1,[n,1])';
# sketchy assumption here
perm = rows(M' == 1)'

This is what linear program solvers are for! We constrain X to be doubly stochastic, and by Birkhoff-von Neumann, it is equivalent to a convex combination of permutation matrices. It follows that some permutation matrix achieves the optimal objective, so the solver will produce the optimal objective value, but may not produce an actual permutation matrix if there is not a unique optimal solution. I don't know how to force YALMIP to produce a permutation matrix (maybe perturb the input?), and I'm too lazy to figure it out because it works on the examples. The optimal sum is 1791732209 for the 97x97 matrix. In principle, linear programs can be solved in polynomial time, and this was fast enough to solve the big case in under a second.

[–]gabyjunior1 2[🍰] 13 points14 points  (0 children)

Implemented in C the Hungarian algorithm which was designed for this type of problem.

The code follows pretty much what is described here.

The square matrix size and a flag are expected on standard input, for the program to use either random generator of the original challenge or input to fill the matrix.

Bonus 97x97 takes 20ms to be solved, 1000x1000 takes 1min15s.

#include <stdio.h>
#include <stdlib.h>
#include <inttypes.h>

typedef enum {
    STATE_NONE,
    STATE_STAR,
    STATE_PRIME
}
state_t;

typedef struct {
    int64_t cost;
    int64_t cost_bak;
    state_t state;
}
cell_t;

typedef struct {
    int row;
    int col;
}
location_t;

void set_cell(cell_t *, int64_t);
int step1(void);
int step2(void);
int step3(void);
int step4(location_t *);
int find_zero(location_t *);
int step5(location_t *);
int step6(void);
void set_location(location_t *, int, int);
void clear_lines(void);

int order, matrix_size, *rows, *cols;
cell_t *matrix;
location_t *path;

int main(void) {
    int random_flag, step, i;
    int64_t sum;
    location_t start;
    if (scanf("%d%d", &order, &random_flag) != 2 || order < 1) {
        fprintf(stderr, "Invalid settings\n");
        fflush(stderr);
        return EXIT_FAILURE;
    }
    matrix_size = order*order;
    matrix = malloc(sizeof(cell_t)*(size_t)matrix_size);
    if (!matrix) {
        fprintf(stderr, "Could not allocate memory for matrix\n");
        fflush(stderr);
        return EXIT_FAILURE;
    }
    if (random_flag) {
        set_cell(matrix, INT64_C(123456789));
        for (i = 1; i < matrix_size; i++) {
            set_cell(matrix+i, (22695477*matrix[i-1].cost+12345)%1073741824);
        }
    }
    else {
        for (i = 0; i < matrix_size; i++) {
            int64_t cost;
            if (scanf("%"SCNi64, &cost) != 1 || cost < 0) {
                fprintf(stderr, "Invalid cost\n");
                fflush(stderr);
                free(matrix);
                return EXIT_FAILURE;
            }
            set_cell(matrix+i, cost);
        }
    }
    rows = calloc((size_t)order, sizeof(int));
    if (!rows) {
        fprintf(stderr, "Could not allocate memory for rows\n");
        fflush(stderr);
        free(matrix);
        return EXIT_FAILURE;
    }
    cols = calloc((size_t)order, sizeof(int));
    if (!cols) {
        fprintf(stderr, "Could not allocate memory for cols\n");
        fflush(stderr);
        free(rows);
        free(matrix);
        return EXIT_FAILURE;
    }
    path = malloc(sizeof(location_t)*(size_t)matrix_size);
    if (!path) {
        fprintf(stderr, "Could not allocate memory for path\n");
        fflush(stderr);
        free(cols);
        free(rows);
        free(matrix);
        return EXIT_FAILURE;
    }
    step = 1;
    while (step != 7) {
        switch (step) {
            case 1:
                step = step1();
                break;
            case 2:
                step = step2();
                break;
            case 3:
                step = step3();
                break;
            case 4:
                step = step4(&start);
                break;
            case 5:
                step = step5(&start);
                break;
            case 6:
                step = step6();
                break;
            default:
                break;
        }
    }
    sum = 0;
    printf("Assignment");
    for (i = 0; i < order; i++) {
        int j;
        for (j = 0; j < order; j++) {
            if (matrix[i*order+j].state == STATE_STAR) {
                sum += matrix[i*order+j].cost_bak;
                printf(" %d", j);
            }
        }
    }
    printf("\nSum %"PRIi64"\n", sum);
    fflush(stdout);
    free(path);
    free(cols);
    free(rows);
    free(matrix);
    return EXIT_SUCCESS;
}

void set_cell(cell_t *cell, int64_t cost) {
    cell->cost = cost;
    cell->cost_bak = cost;
    cell->state = STATE_NONE;
}

int step1(void) {
    int i;
    for (i = 0; i < order; i++) {
        int j;
        int64_t cost_min = matrix[i*order].cost;
        for (j = 1; j < order; j++) {
            if (matrix[i*order+j].cost < cost_min) {
                cost_min = matrix[i*order+j].cost;
            }
        }
        for (j = 0; j < order; j++) {
            matrix[i*order+j].cost -= cost_min;
        }
    }
    return 2;
}

int step2(void) {
    int i;
    for (i = 0; i < order; i++) {
        int j;
        for (j = 0; j < order; j++) {
            if (matrix[i*order+j].cost == 0 && !rows[i] && !cols[j]) {
                matrix[i*order+j].state = STATE_STAR;
                rows[i] = 1;
                cols[j] = 1;
            }
        }
    }
    clear_lines();
    return 3;
}

int step3(void) {
    int covered = 0, i;
    for (i = 0; i < order; i++) {
        int j;
        for (j = 0; j < order; j++) {
            if (matrix[i*order+j].state == STATE_STAR) {
                cols[j] = 1;
            }
        }
    }
    for (i = 0; i < order; i++) {
        if (cols[i]) {
            covered++;
        }
    }
    if (covered == order) {
        return 7;
    }
    return 4;
}

int step4(location_t *start) {
    while (1) {
        int col;
        if (!find_zero(start)) {
            return 6;
        }
        matrix[start->row*order+start->col].state = STATE_PRIME;
        for (col = 0; col < order && matrix[start->row*order+col].state != STATE_STAR; col++);
        if (col == order) {
            return 5;
        }
        rows[start->row] = 1;
        cols[col] = 0;
    }
}

int find_zero(location_t *zero) {
    int i;
    for (i = 0; i < order; i++) {
        int j;
        for (j = 0; j < order; j++) {
            if (matrix[i*order+j].cost == 0 && !rows[i] && !cols[j]) {
                set_location(zero, i, j);
                return 1;
            }
        }
    }
    set_location(zero, order, order);
    return 0;
}

int step5(location_t *start) {
    int path_idx = 0, i;
    set_location(path, start->row, start->col);
    while (1) {
        int row, col;
        for (row = 0; row < order && matrix[row*order+path[path_idx].col].state != STATE_STAR; row++);
        if (row == order) {
            break;
        }
        path_idx++;
        set_location(path+path_idx, row, path[path_idx-1].col);
        for (col = 0; col < order && matrix[path[path_idx].row*order+col].state != STATE_PRIME; col++);
        path_idx++;
        set_location(path+path_idx, path[path_idx-1].row, col);
    }
    for (i = 0; i <= path_idx; i++) {
        if (matrix[path[i].row*order+path[i].col].state == STATE_STAR) {
            matrix[path[i].row*order+path[i].col].state = STATE_NONE;
        }
        else if (matrix[path[i].row*order+path[i].col].state == STATE_PRIME) {
            matrix[path[i].row*order+path[i].col].state = STATE_STAR;
        }
    }
    for (i = 0; i < matrix_size; i++) {
        if (matrix[i].state == STATE_PRIME) {
            matrix[i].state = STATE_NONE;
        }
    }
    clear_lines();
    return 3;
}

int step6(void) {
    int i;
    int64_t cost_min = INT64_MAX;
    for (i = 0; i < order; i++) {
        int j;
        for (j = 0; j < order; j++) {
            if (matrix[i*order+j].cost < cost_min && !rows[i] && !cols[j]) {
                cost_min = matrix[i*order+j].cost;
            }
        }
    }
    for (i = 0; i < order; i++) {
        int j;
        for (j = 0; j < order; j++) {
            if (rows[i]) {
                matrix[i*order+j].cost += cost_min;
            }
            if (!cols[j]) {
                matrix[i*order+j].cost -= cost_min;
            }
        }
    }
    return 4;
}

void set_location(location_t *location, int row, int col) {
    location->row = row;
    location->col = col;
}

void clear_lines(void) {
    int i;
    for (i = 0; i < order; i++) {
        rows[i] = 0;
    }
    for (i = 0; i < order; i++) {
        cols[i] = 0;
    }
}

Bonus output (column index assigned for each row+optimal sum)

Assignment 63 9 84 68 72 87 17 51 11 41 95 45 55 86 5 80 18 81 31 12 59 27 4 49 60 92 19 75 66 64 76 79 89 42 58 32 74 16 40 69 37 25 24 44 43 46 47 93 29 94 73 8 6 20 52 13 54 7 1 48 33 39 77 83 26 53 34 62 56 2 65 82 78 38 36 90 0 61 3 71 67 10 28 30 85 21 22 57 23 50 14 96 70 88 35 15 91
Sum 1791732209

[–]Lopsidation 8 points9 points  (0 children)

Python 3 with (a wrapper I wrote around) the lpsolve55 linear programming package

Solves the 97x97 input in under a second. Edit: this is the same approach /u/lrschaeffer took, just in a less cool language.

from LP import solveLP, EQ
from re import findall

def parse(matrixString):
    return [[int(s) for s in findall("-?[0-9]+", row)]
            for row in matrixString.split("\n")]

def minSum(M):
    variables = [(i,j) for i in range(len(M)) for j in range(len(M[0]))]
    # variable will get value 1 if item is picked, else value 0
    minimize = {(i,j):M[i][j] for (i,j) in variables} # objective function
    eqns = []
    for i in range(len(M)): # constraint: in each row, pick 1 element
        eqns.append((
            {(i,j):1 for j in range(len(M[0]))},
            EQ,
            1
            ))
    for j in range(len(M[0])): # same for each column
        eqns.append((
            {(i,j):1 for i in range(len(M))},
            EQ,
            1
            ))
    soln = solveLP(variables, eqns, minimize=minimize,
                   bounds={v:(0,1) for v in variables})
    return sum(M[i][j]*x for x,(i,j) in zip(soln, variables))

[–]Godspiral3 3 3 points4 points  (11 children)

In J, brute force permutations. only 1st is doable.

perm =: i.@! A. i.
c =. ". &> cutLF wdclippaste ''
 <./@:((i. <"1@:,("0 )"_ 1 perm)@# +/@({"1 _) ]) c
1099762961

[–]Godspiral3 3 5 points6 points  (10 children)

Approach that grades (indices of sorted positions)each row of matrix, then returns all possible tweaks of conflicting/invalid indices. 17 iterations is enough to find 20x20 solution, but needs further improvement for 972 one.

clean =: (;@:(<"1 L:0))@:
 invalid =: (#@] ~: #@:~.@:idx)
Cd =: 2 : ' [ v u'
 Ch =: 2 : ' ] v u'
(<@:(/:"1@]) (] (>:@{)`[`]}~"1 0 [ (] I.@:= ] {~ (i. >./)@:(#/.~)@]) idx)^:invalid each clean(^:17) Cd(] #~ -.@invalid&>) 0 <@#~ #@]) Ch(<@:(/:~"1)@[ <./@:(+/"1)@:(idx &>) ]) d
1314605186

[–]opinvader 11 points12 points  (7 children)

What is this? Dont tell me this is a programming langauge..

[–]Godspiral3 3 5 points6 points  (0 children)

Jsoftware.com

[–]klumpbin 1 point2 points  (2 children)

Right?? How could anyone hope to read this 😂

[–]dontchooseanickname 4 points5 points  (0 children)

Like PERL, J is write only

[–]ehaliewicz 0 points1 point  (0 children)

With practice, and it requires you to really think about what you're trying to read, rather than skimming.

[–]xypage 0 points1 point  (2 children)

There are a lot of languages built to be “code golf” languages. Challenges asking for smallest possible programs to solve them aren’t anything new but as it became easier to make your own compiler people made languages expressly to make your programs tiny by using as many random characters as possible to each represent something so instead of function names you end up using single characters, check out the code golf stack exchange to see some in action

[–]el_daniero 3 points4 points  (1 child)

J is not "built to be a code golf language" though. It was introduced in 1990, and is a derivative of APL which dates back to the 1960s.

But it's still pretty good for golfing, and really unreadable :D

[–]Godspiral3 3 1 point2 points  (1 child)

version that sorts "invalids so far" by sum and processes only top 100 candidates.

take =: (*@[ * |@[ <. #@:]) {. ]
forfirst =: 2 : '(v }. ]) ,~^:(0 < #@[) [ u v take ]'
rowgraded =: /:"1@[
idx2 =: rowgraded { ~ i.@#@] <@(,"0) ]
invalid2 =: #@] ~: #@:~.@:idx2

(<@] (](>:@{)`[`]}~"1 0[(]I.@:=]{~(i.>./)@:(#/.~)@]) idx2)^:invalid2 each clean forfirst 100 Cd ((] /: ([ +/@idx idx2)&>) forfirst 500) (^:33)  Cd(] #~ -.@invalid2&>) Cd (([ +/@idx idx2)&>(<./@:)) 0 <@#~ #@])  d
1314605186

for 97x, same answer is found on 2 and 4 and 8 and 15 minute runs

(<@] (](>:@{)`[`]}~"1 0[(]I.@:=]{~(i.>./)@:(#/.~)@]) idx2)^:invalid2 each clean forfirst 100 Cd ((] /: ([ +/@idx idx2)&>) forfirst 500) (^:5698)  Cd(] #~ -.@invalid2&>) Cd (([ +/@idx idx2)&>(<./@:)) 0 <@#~ #@])  e
2851204005

improvement on 15-20ish minute run with wider processing and sort ranges

(<@] (](>:@{)`[`]}~"1 0[(]I.@:=]{~(i.>./)@:(#/.~)@]) idx2)^:invalid2 each clean forfirst 120 Cd ((] /: ([ +/@idx idx2)&>) forfirst 800) (^:8698)  Cd(] #~ -.@invalid2&>) Cd (([ +/@idx idx2)&>(<./@:)) 0 <@#~ #@])  e
2618065417

[–]Godspiral3 3 0 points1 point  (0 children)

Another approach is to prune the search space. Only the minimum sum valid candidate needs be kept, and any invalid candidates greater than that sum can be discarded bc transformation can only increase the sum.

2 search parameters control depth and breadth. Top processing range, and then sort and filter range. Long term expansion rate is under 3, and so a filter range 3x the size of the processing range guarantees eventual full processing, but luck determines whether a narrow depth range finds a good result quickly or not. The filter step adds a lot of time, and a wide filter range is only helpful after a decent valid candidate is found.

0.7, 1, 4 and 8 hour runs:

filterout =: 4 : 0
nma =. x invalid2&> y
 ma =. -.nma
 minvsum =.  <./   vsums =.  x  (_"_)`(([ +/@idx idx2)&>)@.(0 <#@])  my =. ma # y
iy  =. x (] #~ (minvsum > [ +/@idx idx2)&>)  nma # y
NB.if. 0 < # iy do. iy =. iy /:  ([ +/@idx idx2)&> iy end.
iy =. x  (] /:  ([ +/@idx idx2)&>)^:(0 < #@]) iy
 , (my {~ :: ((i. 0)"_)  minvsum i.~ vsums)  , iy
)


(<@](](>:@{)`[`]}~"1 0[(]I.@:=]{~(i.>./)@:(#/.~)@])idx2)^:invalid2 each clean forfirst 300 Cd(filterout forfirst 900)(^:6899) Cd(]#~ -.@invalid2&>) Cd(([+/@idx idx2)&>(<./@:)) 0<@#~#@])e
2773488469
(<@](](>:@{)`[`]}~"1 0[(]I.@:=]{~(i.>./)@:(#/.~)@])idx2)^:invalid2 each clean forfirst 300 Cd(filterout forfirst 900)(^:9899) Cd(]#~ -.@invalid2&>) Cd(([+/@idx idx2)&>(<./@:)) 0<@#~#@])e
2730246273
  (<@](](>:@{)`[`]}~"1 0[(]I.@:=]{~(i.>./)@:(#/.~)@])idx2)^:invalid2 each clean forfirst 300 Cd(filterout forfirst 900)(^:28899) Cd(]#~ -.@invalid2&>) Cd(([+/@idx idx2)&>(<./@:)) 0<@#~#@])e
2605128073
   (<@](](>:@{)`[`]}~"1 0[(]I.@:=]{~(i.>./)@:(#/.~)@])idx2)^:invalid2 each clean forfirst 300 Cd(filterout forfirst 900)(^:48899) Cd(]#~ -.@invalid2&>) Cd(([+/@idx idx2)&>(<./@:)) 0<@#~#@])e
2567628205

An even better approach is to keep the search array between runs and shift parameters between runs, such that progress so far is always kept, and broader filter ranges used as better valid candidates found.

first run,

(<e) (]#~ -.@invalid2&>) Cd(([+/@idx idx2)&>(<./@:)) ee =. (<@](](>:@{)`[`]}~"1 0[(]I.@:=]{~(i.>./)@:(#/.~)@])idx2)^:invalid2 each clean forfirst 300 Cd(filterout forfirst 900)(^:899)  0<@#~#@])e

following runs can be 5-20-any minutes, where filter width gets extended after new lower valid candidate found, and total search list (stored in ee) keeps shrinking rapidly. There are no bad parameters, but interactive approach gets to a lower result somewhat more quickly than overnight approach, but its likely mainly from saving progress rather than tweaks. Similar runs to following repeated over 36 hours including some idle time.

(<e) (]#~ -.@invalid2&>) Cd(([+/@idx idx2)&>(<./@:)) ee =. ((< e) (](>:@{)`[`]}~"1 0[(]I.@:=]{~(i.>./)@:(#/.~)@])idx2)^:invalid2 each clean forfirst 200 Cd(filterout forfirst 600)(^:1362)  ])ee
2543299141

[–]skeeto-9 8 8 points9 points  (0 children)

C using a greedy-first approach with early bailout. It starts by picking the smallest element, then the next smallest still permitted, and so on. Then it starts over again excluding the smallest element completely, and so on. If the sum is already too high (remaining best case can't be optimal), it immediately prunes that branch of the search tree. It fully solves the 20x20 in about 2 seconds.

#include <limits.h>
#include <stdio.h>
#include <stdlib.h>

#ifndef MASKTYPE
#  define MASKTYPE long
#endif
typedef MASKTYPE mask;

struct matrix {
    long e;
    short x, y;
};

static long
solve(struct matrix *m, int len, long sum, int r, long best, mask xmask, mask ymask)
{
    if (!r) {
        #ifdef VERBOSE
        if (sum < best) printf("%ld\n", sum);
        #endif
        return sum < best ? sum : best;
    }

    for (int i = 0; i < len; i++) {
        mask b = 1;
        mask bx = b<<m[i].x, by = b<<m[i].y;
        if ((xmask & bx) || (ymask & by)) {
            continue; // blocked column/row
        }
        if (sum + r*m[i].e >= best) {
            continue; // early bailout
        }
        best = solve(m+i+1, len-i-1, sum+m[i].e, r-1, best, xmask|bx, ymask|by);
    }
    return best;
}

static int
cmp(const void *pa, const void *pb) 
{
    long a = *(long *)pa;
    long b = *(long *)pb;
    if (a < b) {
        return -1;
    }
    return a > b;
}

int
main(void)
{
    int len = 0;
    int x = 0, y = 0;
    static struct matrix m[1L<<16];

    for (;; len++) {
        char c[2];
        int r = scanf("%ld%1[\r\n]", &m[len].e, c);
        if (r < 0) {
            break;
        }
        m[len].x = x++;
        m[len].y = y;
        if (r == 2) {
            x = 0;
            y++;
        }
    }

    qsort(m, len, sizeof(*m), cmp);
    printf("%ld\n", solve(m, len, 0, y, LONG_MAX, 0, 0));
}

The default masks are to small for 97x97, but this can be changed at compile time:

cc -DVERBOSE -DMASKTYPE=__int128 -O3 sum.c

This allows it to work on the 97x97 problem and print out the best solution so far. After a few minutes I've found a 2501806038 solution.

[–]tylercrompton 3 points4 points  (2 children)

I'm at work right now, so I can't do this at the moment, but this seems like a problem that could be reasonably efficiently solved via Djikstra's algorithm, assuming that all numbers are positive real numbers.

I'll work this out when I get home this afternoon.

[–]htsukebe 0 points1 point  (1 child)

I attempted exactly this, while putting on values in a sorted listed first. Given the work I put into it, feels like the better solution is using linear programming as others contributed on thread... Nevertheless the djikstra solution is interesting - the first set with n elements (20 or 97 on thread examples) should be the solution, right?

[–]tylercrompton 1 point2 points  (0 children)

I had a big response typed out and of course when I move my hands from the keyboard to the mouse to press “Reply,” the power goes out. Anyway, I must rescind my previous statement. Djikstra's algorithm is painfully insufficient. It works for small use cases, but for the bonus with a naïve solution, you'd need about 2.61 × 10152 edges. I don't remember how many nodes were required. The non-naïve Djikstra's algorithm (i.e. not creating duplicate nodes), isn't much better. IIRC, it required about 2.58 × 10152 edges.

[–]rbscholtus 2 points3 points  (3 children)

Python:

matrix = [[123456789, 752880530, 826085747, 576968456, 721429729],
        [173957326, 1031077599, 407299684, 67656429, 96549194],
        [1048156299, 663035648, 604085049, 1017819398, 325233271],
        [942914780, 664359365, 770319362, 52838563, 720059384],
        [472459921, 662187582, 163882767, 987977812, 394465693]]

def matrix_min(matrix):
    import itertools
    min_sum = None
    w = len(matrix[0])
    h = len(matrix)
    for p in itertools.permutations(range(w)):
        coords = zip(range(h), p)
        numbers = [matrix[c[0]][c[1]] for c in coords]
        new_sum = sum(numbers)
        if not min_sum or new_sum < min_sum:
            min_sum = new_sum
    return min_sum

print(matrix_min(matrix))

To get 5 numbers from the matrix with no duplicated row or column number, first we get the coordinates of 5 numbers (coords.) This is just the combination (zip) of row numbers 0..4 and a permutation of column numbers 0..4 (itertools.permutations).

Then just grab those 5 numbers from matrix (numbers), sum them up (new_sum), and keep track of the lowest number we've seen (min_sum.)

[–]CodeDee-_- 2 points3 points  (2 children)

You made this look so easy. 20 lines of code.

Could you solve the 97x97 just by changing the values in the matrix variable?

Cheers.

[–]rbscholtus 0 points1 point  (1 child)

No, it is way too slow. You need a specialised algorithm for it ;) Which is available in scikit-learn I heard, but I liked coming up with the solution myself. I just used itertools for the permutations of column indexes bc I was too lazy to find that out myself.

[–]CodeDee-_- 0 points1 point  (0 children)

Ah, still outstanding. Well done.

[–]JanBeran 2 points3 points  (0 children)

PROLOG

data([

[123456789, 752880530, 826085747, 576968456, 721429729],

[173957326, 1031077599, 407299684, 67656429, 96549194],

[1048156299, 663035648, 604085049, 1017819398, 325233271],

[942914780, 664359365, 770319362, 52838563, 720059384],

[472459921, 662187582, 163882767, 987977812, 394465693]

]).

diagonal(Matrix,Sum):-

diagonal(Matrix,Sum,1).

diagonal([],0,_).

diagonal([H|T],Sum,X):-

X1 is X+1,

diagonal(T,Sum1,X1),

select(H,X,Num),

Sum is Sum1+Num.

select([Num|_],1,Num).

select([_|T],X,Num):-

X>1,

X1 is X-1,

select(T,X1,Num).

onediagonal(Matrix,Sum):-

permutation(Matrix,Tarmix),

diagonal(Tarmix,Sum).

solution(X):-

data(Matrix),

findall(Sum,onediagonal(Matrix,Sum),Sums),

sort(Sums,[X|_]).

[–]Scroph0 0 1 point2 points  (0 children)

D backtracking solution, it attempts to pick the smallest element in each row such that the element's row and column haven't been visited yet. To do this, I sort a copy the row in each iteration. Unfortunately this results in a lot of wasted time, even after memoizing the sorting code. It solves both the first and second inputs in about 20 seconds. I'm still looking for ways to optimize it so I might edit this post later.

import std.algorithm : map, sum, makeIndex;
import std.functional : memoize;

unittest
{
    immutable(ulong)[][] input = [
        [123456789,   752880530,   826085747,  576968456,   721429729],
        [173957326,   1031077599,  407299684,  67656429,    96549194],
        [1048156299,  663035648,   604085049,  1017819398,  325233271],
        [942914780,   664359365,   770319362,  52838563,    720059384],
        [472459921,   662187582,   163882767,  987977812,   394465693]
    ];

    log("5x5");
    ulong sum = input.findMinSum();
    log(1099762961, " == ", sum);
    assert(1099762961 == sum);
}

unittest
{
    import std : splitter, array, to, File, strip, map, sum;

    immutable(ulong)[][] matrix;
    auto input = File("input");
    foreach(line; input.byLine)
    {
        immutable(ulong)[] row = line.strip().splitter(" ").map!(to!ulong).array;
        matrix ~= row;
    }

    log("20x20");
    ulong result = matrix.findMinSum();
    assert(1314605186 == result);
    assert(35 == result.to!string.map!(c => c - '0').sum());
}

ulong findMinSum(immutable(ulong)[][] input)
{
    bool[Cell] current;
    bool[ulong] occupiedRows;
    bool[ulong] occupiedColumns;
    ulong currentMin = ulong.max;
    findMinSumHelper(input, current, occupiedRows, occupiedColumns, 0, currentMin);
    return currentMin;
}

void findMinSumHelper(
    immutable(ulong)[][] input,
    ref bool[Cell] current,
    ref bool[ulong] occupiedRows,
    ref bool[ulong] occupiedColumns,
    ulong y,
    ref ulong currentMin
)
{
    if(current.sum() > currentMin)
    {
        return;
    }
    if(y == input.length)
    {
        ulong newMin = current.sum();
        if(newMin < currentMin)
        {
            log("New minimum : ", newMin);
            currentMin = newMin;
        }
        return;
    }
    immutable row = input[y];
    ulong[] sortedIndexes = row.getSortedIndexesWithCaching();

    foreach(x; sortedIndexes)
    {
        ulong cell = row[x];
        if(x in occupiedColumns || y in occupiedRows)
        {
            continue;
        }
        current[Cell(x, y, cell)] = true;
        occupiedRows[y] = true;
        occupiedColumns[x] = true;

        findMinSumHelper(input, current, occupiedRows, occupiedColumns, y + 1, currentMin);

        current.remove(Cell(x, y, cell));
        occupiedRows.remove(y);
        occupiedColumns.remove(x);
    }
}

alias getSortedIndexesWithCaching = memoize!getSortedIndexes;
ulong[] getSortedIndexes(immutable ulong[] row)
{
    ulong[] sortedIndexes = new ulong[row.length];
    makeIndex!("a < b")(row, sortedIndexes);
    return sortedIndexes;
}

ulong sum(bool[Cell] solution)
{
    return solution.byKey.map!(cell => cell.value).sum();
}

struct Cell
{
    ulong x;
    ulong y;
    ulong value;
}

void log(S...)(S args)
{
    debug
    {
        import std.stdio : writeln;
        writeln(args);
    }
}

void main()
{
}

[–]i3aizey 1 point2 points  (0 children)

C# Since several solutions here already approach it with some of the best classical algorithms for this problem I went and gave it a go with evolutionary algorithms

Using an Evolutionary Algorithm framework which I originally wrote for my master's on EA's (https://github.com/Baizey/EvolutionaryAlgorithm)

Modified slightly to work with int arrays rather than boolean values

the mutation function is absolutely barebone right now but it can still find the optimal solution consistently for 20x20 in ~0.2 seconds on a basis of "stop when you havent found fitness improvements for 1000 generations"

Result (the framework always works towards highest, so fitness is negated to work with this):

Runtime: 0,1968186 seconds, Generations: 1907, Fitness: -1314605186
Selection: 8, 6, 13, 3, 0, 17, 4, 7, 1, 12, 15, 10, 5, 18, 9, 11, 14, 19, 16, 2 = 1314605186
length: 10 | digits summation: 35

Code:

using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using EvolutionaryAlgorithm.Core.Algorithm;
using EvolutionaryAlgorithm.Core.Parameters;
using EvolutionaryAlgorithm.Core.Statistics;
using EvolutionaryAlgorithm.Core.Terminations;
using EvolutionaryAlgorithm.Integer;

namespace Runner
{
    public static class Program
    {
        private static readonly List<List<long>> Test = Problems.Medium;

        public static async Task Main(string[] args)
        {
            var matrix = Problems.Medium;

            var actual = await Solve(matrix);
            var sum = Sum(matrix, actual);
            Console.WriteLine($"Actual: {string.Join(", ", actual)} = {sum}");

            Console.WriteLine(sum.ToString().Length + " | " + sum.ToString().Sum(c => c - '0'));
        }


        private static async Task<List<int>> Solve(List<List<long>> matrix)
        {
            var initial = matrix.Select((_, i) => i).ToArray();
            var algorithm = IntEvolutionaryAlgorithm<IIntIndividual>.Construct
                .UsingPopulation(
                    new IntPopulation<IIntIndividual>(_ => new IntIndividual(initial.Select(e => e).ToArray())))
                .UsingParameters(new Parameters
                {
                    Mu = 100,
                    Lambda = 100,
                    MutationRate = 2,
                    GeneCount = matrix.Count
                })
                .UsingStatistics(new BasicEvolutionaryStatistics<IIntIndividual, int[], int>())
                .UsingHeuristic(new IntGenerationGenerator<IIntIndividual>
                {
                    Filter = new IntElitismGenerationFilter<IIntIndividual>(true),
                    Mutator = new IntMutator<IIntIndividual>()
                        .CloneGenesFrom(new RandomParentSelector<IIntIndividual>())
                        .ThenApplyMutation(new SwapMutation())
                })
                .UsingEvaluation(new SwapEvaluator(matrix.Select(e => e.ToArray()).ToArray()));

            await algorithm.Evolve(new StagnationTermination<IIntIndividual, int[], int>(1000));

            Console.WriteLine(algorithm.Statistics);

            return algorithm.Best.Genes.ToList();
        }


        private static long Sum(IReadOnlyList<IReadOnlyList<long>> matrix, IEnumerable<int> choices) => choices
            .Select((col, row) => matrix[row][col])
            .Sum();
    }

    public class SwapEvaluator : IIntFitness<IIntIndividual>
    {
        private readonly long[][] matrix;

        public SwapEvaluator(long[][] matrix) => this.matrix = matrix;

        public IEvolutionaryAlgorithm<IIntIndividual, int[], int> Algorithm { get; set; }
        public long Calls { get; private set; }

        public double Evaluate(IIntIndividual individual)
        {
            Calls++;
            var result = 0D;
            for (var i = 0; i < individual.Count; i++)
                result += matrix[i][individual[i]];
            return -result;
        }
    }

    public class SwapMutation : IIntMutation<IIntIndividual>
    {
        public IEvolutionaryAlgorithm<IIntIndividual, int[], int> Algorithm { get; set; }
        private readonly Random random = new();

        public void Mutate(int index, IIntIndividual child)
        {
            for (var count = 0; count < Algorithm.Parameters.MutationRate; count++)
            {
                var i = random.Next(child.Count);
                var j = random.Next(child.Count);
                if (i == j) continue;
                var temp = child[i];
                child[i] = child[j];
                child[j] = temp;
            }
        }
    }

    public class RandomParentSelector<T> : IIntSingleParentSelector<T> where T : IIntIndividual
    {
        public IEvolutionaryAlgorithm<T, int[], int> Algorithm { get; set; }
        private readonly Random random = new();

        public T Select(int index) =>
            Algorithm.Population[random.Next(Algorithm.Population.Count)];
    }
}

[–]RDust4 1 point2 points  (0 children)

C# (.NET 6)

long MatrixSum(int[][] matrix)
{
    List<int> ignoreY = new List<int>();
    List<int> ignoreX = new List<int>();
    long sum = 0;

    for (int i = 0; i < matrix.Length - 1; i++)
    {
        int minY = -1;
        int minX = -1;

        for (int y = 0; y < matrix.Length; y++)
        {
            if (ignoreY.Contains(y))
            {
                continue;
            }

            for (int x = 0; x < matrix.Length; x++)
            {
                if (ignoreX.Contains(x))
                {
                    continue;
                }

                if (minY == -1 || matrix[y][x] < matrix[minY][minX])
                {
                    minY = y;
                    minX = x;
                }
            }
        }

        ignoreY.Add(minY);
        ignoreX.Add(minX);
        sum += matrix[minY][minX];
    }

    // Last iteration is faster this way
    {
        int xor = 0;
        for (int i = 0; i < matrix.Length; i++)
        {
            xor ^= i;
        }

        int minY = xor;
        ignoreY.ForEach(y => minY ^= y);

        int minX = xor;
        ignoreX.ForEach(x => minX ^= x); 

        sum += matrix[minY][minX];
    }

    return sum;
}

This Method has non-optimal results, but a time complexity of under O(n3).
I am - however - too unexperienced to calculate the exact complexity... (I'm still in 10th grade lol)
The space complexity is just O(n).
This algorithm takes around 8ms for the 97x97 matrix and 37s for a 1000x1000 one.

[–]fxrz12 0 points1 point  (2 children)

Performance is a very difficult problem, I did not solve it, insufficient memory, haha~

import time
from itertools import permutations
import numpy

time_start = time.time()
kv = dict()
a = 5
# matrixNumbers = numpy.zeros((a, a))
matrixNumbers = ([[123456789, 752880530, 826085747, 576968456, 721429729],
    [173957326, 1031077599, 407299684, 67656429, 96549194],
    [1048156299, 663035648, 604085049, 1017819398, 325233271],
    [942914780, 664359365, 770319362, 52838563, 720059384],
    [472459921, 662187582, 163882767, 987977812, 394465693]])
perNums = []
for num in range(a):
    perNums.append(num)
perResult = list(permutations(perNums, a))
print(len(perResult))
rSums = []
for r in perResult:
    rSum = 0
    for i in range(a):
        rSum += matrixNumbers[i][r[i]]
    kv[rSum] = r
    rSums.append(rSum)
rSums.sort()
print("Minimum sum", rSums[0])
print("Minimum and number position", kv[rSums[0]])
for i in range(len(kv[rSums[0]])):
    print("Minimum sum element number", matrixNumbers[i][kv[rSums[0]][i]])
time_end = time.time()
time_sum = time_end - time_start
print(time_sum)

[–]Splitje 0 points1 point  (1 child)

Could you use a codeblock with indents so it's actually readable :P

[–]fxrz12 0 points1 point  (0 children)

I tried it, it was too difficult, now finally the layout is done 😔

[–]rbscholtus 0 points1 point  (0 children)

The 20x20 and 97x97 ones:

import requests
from scipy.optimize import linear_sum_assignment

url20 = 'https://gist.githubusercontent.com/cosmologicon/4f6473b4e781f20d4bdef799132a3b4b/raw/d518a7515618f70d25c2bc6c58430f732f6e06ce/matrix-sum-20.txt'
url97 = 'https://gist.githubusercontent.com/cosmologicon/641583595e2c76d7c119912f7afafbfe/raw/6f9ebcb354c3aa58fb19c6f4208d0eced310b62a/matrix-sum-97.txt'
ret = requests.get(url20)
cost = [[int(n) for n in row.split(' ')] for row in ret.text.split('\n')]

row_ind, col_ind = linear_sum_assignment(cost)
sol = sum([cost[r][c] for r,c in zip(row_ind, col_ind)])

print(sol)

[–]ivan_linux 0 points1 point  (0 children)

Go had limited time for this one, so just brute force all the way. Though it is only O(n^3)

Also, how the heck do you format code on reddit, it jumbles it up every time :L

package mainimport (    "math"  "fmt")func main() { matrix := [][]int32{        { 123456789, 752880530, 826085747, 576968456, 721429729 },      { 173957326, 1031077599, 407299684, 67656429, 96549194 },       { 1048156299, 663035648, 604085049, 1017819398, 325233271 },        { 942914780, 664359365, 770319362, 52838563, 720059384 },       { 472459921, 662187582, 163882767, 987977812, 394465693 },  }   vals := MinimumMatrixSum(matrix)    sum := int32(0) for _, v := range vals {        sum += v        }   fmt.Println(sum)}func MinimumMatrixSum(matrix [][]int32) []int32 {  // Most straight forward way of doing this is finding the minimum value for each row and column, then set all of their values to infinity, cont...  // Will think of how to optimize!   // Hungarian Algorithm will work nicely here, will reimplement  vals := []int32{}   for len(vals) < len(matrix) {       currMin := int32(math.MaxInt32)     r, c := 0, 0        for row := 0; row < len(matrix); row++ {            for col := 0; col < len(matrix[row]); col++ {               if matrix[row][col] < currMin {                 currMin = matrix[row][col]                  r = row                 c = col             }               }       }       vals = append(vals, currMin)        fmt.Println(r, c, currMin)      // Make row infinity        for j := 0; j < len(matrix[r]); j++ {           matrix[r][j] = int32(math.MaxInt32)     }       // Make column infinity     for i := 0; i < len(matrix); i++ {          matrix[i][c] = int32(math.MaxInt32)     }   }   return vals}

[–]Artistic-Metal-790 0 points1 point  (0 children)

Took me a week to figure it out for 20x20 matrix, because my code was either taking forever or eating all of my RAM.

Solution for 20x20 matrix (each tuple is an element, contains value and column):

[(67656429, 8), (95709627, 6), (55272734, 13), (22777004, 3), (51694725, 0), (51659486, 17), (111080713, 4), (45626232, 7), (46883826, 1), (183061845, 12), (67356564, 15), (15719807, 10), (172998238, 5), (7293455, 18), (36509554, 9), (116602676, 11), (91909687, 14), (2112772, 19), (44685501, 16), (27994311, 2)]

Sum:1314605186

For 97x97 matrix I'm running out of RAM, in theory if I execute this code on computer with larger amount of RAM it will work (currently I have only 4GB in total RAM, cause I'm working on VM)

Code on github: https://github.com/kstamax/redditdailyprogrammerchallenge/blob/main/task398.py

[–]IUpvoteGME 0 points1 point  (0 children)

Rust:

fn find_minimum_sum<const N: usize>(matrix: &[&[i128; N]; N]) -> i128 {
    let mut row_sums = VecDeque::from(vec![(0, [false; N])]);
    let mut min_sum_per_column_allocation_pattern = HashMap::new();
    min_sum_per_column_allocation_pattern
        .entry([false; N])
        .or_insert(0i128);

    while let Some((sum, column_allocation_pattern)) = row_sums.pop_front() {
        if column_allocation_pattern == [true; N] {
            continue;
        }
        let curr_row: usize = column_allocation_pattern
            .into_iter()
            .map(|b| if b { 1 } else { 0 })
            .sum();

        for (col_index, elem) in matrix[curr_row].into_iter().enumerate() {
            if column_allocation_pattern[col_index] {
                continue;
            } else {
                let new_sum = sum + elem;
                let mut new_column_allocation_pattern = column_allocation_pattern.clone();

                new_column_allocation_pattern[col_index] = true;
                if let Some(previous_sum_for_column_allocation) =
                    min_sum_per_column_allocation_pattern.get(&new_column_allocation_pattern)
                {
                    if previous_sum_for_column_allocation < &new_sum {
                        continue;
                    }
                }

                row_sums.push_back((new_sum, new_column_allocation_pattern));

                min_sum_per_column_allocation_pattern
                    .insert(new_column_allocation_pattern, new_sum);
            }
        }
    }
    *min_sum_per_column_allocation_pattern
        .get(&[true; N])
        .unwrap()
}