Consider the example case of computing determinants using expansion by minors. (I know this is not an efficient way to compute determinants, but it demonstrates a case that has lots of operations on scalar values).
I am trying to re-use the graph for this case and my method of doing so works for 2 by 2 matrices and fails for 3 by 3 matrices (see the pythong programs below) and do not know why ?
det_22.py
# Test reusing graph for derivatives of determinant of 2 by 2 matrix.
# The output generated by this program is below:
#
# First gradient passed check.
# Second gradient passed check.
#
# imports
import sys
import torch
import numpy
#
# check_grad
def check_grad(ax) :
# ok, eps99
ok = True
eps99 = 99.0 * numpy.finfo(float).eps
#
# ok
check = ax.data[1,1]
if abs( ax.grad[0,0] - check ) > eps99 :
ok = False
print( f'ax.grad[0,0] = {ax.grad[0,0]}, check = {check}' )
#
# ok
check = -ax.data[1,0]
if abs( ax.grad[0,1] - check ) > eps99 :
ok = False
print( f'ax.grad[0,1] = {ax.grad[0,1]}, check = {check}' )
#
# ok
check = -ax.data[0,1]
if abs( ax.grad[1,0] - check ) > eps99 :
ok = False
print( f'ax.grad[1,0] = {ax.grad[1,0]}, check = {check}' )
#
# ok
check = ax.data[0,0]
if abs( ax.grad[1,1] - check ) > eps99 :
ok = False
print( f'ax.grad[1,1] = {ax.grad[1,1]}, check = {check}' )
#
return ok
#
# main
def main() :
#
# ok
ok = True
#
# n
n = 2
#
# ax
x = numpy.random.uniform(0.0, 1.0, (n , n) )
ax = torch.tensor(x, requires_grad = True)
#
# az
az = ax[0,0] * ax[1,1] - ax[0,1] * ax[1,0]
#
# ax.grad
az.backward(retain_graph = True)
#
# check_grad
if check_grad(ax) :
print( 'First gradient passed check.' )
else :
print( 'First gradient failed check.' )
#
#
# ax.data
x = numpy.random.uniform(0.0, 1.0, (n, n) )
for i in range(n) :
for j in range(n) :
ax.data[i,j] = x[i,j]
#
# ax.grad
ax.grad.zero_()
az.backward(retain_graph = True)
#
# check_grad
if check_grad(ax) :
print( 'Second gradient passed check.' )
else :
print( 'Second gradient failed check.' )
#
main()
det_33.py:
# Test reusing graph for derivatives of determinant of 3 by 3 matrix.
# The output generated by this program is below. The actual numbers
# in the output will vary becasue a different random matrix is chosen
# for each evaluation.
#
# First gradient passed check.
# ax.grad[0,0] = 0.07585514040844837, check = 0.4295608074373773
# ax.grad[0,1] = -0.6133183512861293, check = -0.11782369019260797
# ax.grad[0,2] = 0.5337097801031835, check = 0.040633019648616306
# Second gradient failed check.
#
#
# imports
import torch
import numpy
#
# check_grad
def check_grad(ax) :
# ok, eps99
ok = True
eps99 = 99.0 * numpy.finfo(float).eps
#
# ok
check = ( ax[1,1] * ax[2,2] - ax[1,2] * ax[2,1] )
if abs( ax.grad[0,0] - check ) > eps99 :
ok = False
print( f'ax.grad[0,0] = {ax.grad[0,0]}, check = {check}' )
#
# ok
check = - ( ax[1,0] * ax[2,2] - ax[1,2] * ax[2,0] )
if abs( ax.grad[0,1] - check ) > eps99 :
ok = False
print( f'ax.grad[0,1] = {ax.grad[0,1]}, check = {check}' )
#
# ok
check = ( ax[1,0] * ax[2,1] - ax[1,1] * ax[2,0] )
if abs( ax.grad[0,2] - check ) > eps99 :
ok = False
print( f'ax.grad[0,2] = {ax.grad[0,2]}, check = {check}' )
#
return ok
#
# main
def main() :
#
# ok
ok = True
#
# n
n = 3
#
# ax
x = numpy.random.uniform(0.0, 1.0, (n, n))
ax = torch.tensor(x, requires_grad = True)
#
# ax[0,0] ax[0,1] ax[0,2]
# ax[1,0] ax[1,1] ax[1,2]
# ax[2,0] ax[2,1] ax[2,2]
#
# az
az = ax[0,0] * ( ax[1,1] * ax[2,2] - ax[1,2] * ax[2,1] )
az -= ax[0,1] * ( ax[1,0] * ax[2,2] - ax[1,2] * ax[2,0] )
az += ax[0,2] * ( ax[1,0] * ax[2,1] - ax[1,1] * ax[2,0] )
#
#
# ax.grad
az.backward(retain_graph = True)
#
# check_grad
if check_grad(ax) :
print( 'First gradient passed check.' )
else :
print( 'First gradient failed check.' )
#
#
# ax.data
x = numpy.random.uniform(0.0, 1.0, (n, n) )
for i in range(n) :
for j in range(n) :
ax.data[i,j] = x[i,j]
#
# ax.grad
ax.grad.zero_()
az.backward(retain_graph = True)
#
# check_grad
if check_grad(ax) :
print( 'Second gradient passed check.' )
else :
print( 'Second gradient failed check.' )
#
main()