API tutorial

Expression building

(note: may have old API in some cases)

In [ ]:
import dynet as dy

## ==== Create a new computation graph
# (it is a singleton, we have one at each stage.
# dy.renew_cg() clears the current one and starts anew)
dy.renew_cg()

## ==== Creating Expressions from user input / constants.
x = dy.scalarInput(value)

v = dy.vecInput(dimension)
v.set([1,2,3])

z = dy.matInput(dim1, dim2)

# for example:
z1 = dy.matInput(2, 2)
z1.set([1,2,3,4]) # Column major

# Or directly from a numpy array
z1 = dy.inputTensor([[1,2],[3,4]]) # Row major

## ==== We can take the value of an expression.
# For complex expressions, this will run forward propagation.
print z.value()
print z.npvalue()      # as numpy array
print v.vec_value()    # as vector, if vector
print x.scalar_value() # as scalar, if scalar
print x.value()        # choose the correct one

## ==== Parameters
# Parameters are things we tune during training.
# Usually a matrix or a vector.

# First we create a parameter collection and add the parameters to it.
m = ParameterCollection()
pW = m.add_parameters((8,8)) # an 8x8 matrix
pb = m.add_parameters(8)

# then we create an Expression out of the parameter collection's parameters
W = dy.parameter(pW)
b = dy.parameter(pb)

## ===== Lookup parameters
# Similar to parameters, but are representing a "lookup table"
# that maps numbers to vectors.
# These are used for embedding matrices.
# for example, this will have VOCAB_SIZE rows, each of DIM dimensions.
lp  = m.add_lookup_parameters((VOCAB_SIZE, DIM))

# lookup parameters can be initialized from an existing array, i.e:
# m["lookup"].init_from_array(wv)

e5  = dy.lookup(lp, 5)   # create an Expression from row 5.
e5  = lp[5]           # same
e5c = dy.lookup(lp, 5, update=False)  # as before, but don't update when optimizing.

e5  = dy.lookup_batch(lp, [4, 5])   # create a batched Expression from rows 4 and 5.
e5  = lp.batch([4, 5])           # same

e5.set(9)   # now the e5 expression contains row 9
e5c.set(9)  # ditto


## ===== Combine expression into complex expressions.

# Math
e = e1 + e2
e = e1 * e2   # for vectors/matrices: matrix multiplication (like e1.dot(e2) in numpy)
e = e1 - e2
e = -e1

e = dy.dot_product(e1, e2)
e = dy.cmult(e1, e2)           # component-wise multiply  (like e1*e2 in numpy)
e = dy.cdiv(e1, e2)            # component-wise divide
e = dy.colwise_add(e1, e2)     # column-wise addition

# Matrix Shapes
e = dy.reshape(e1, new_dimension)
e = dy.transpose(e1)

# Per-element unary functions.
e = dy.tanh(e1)
e = dy.exp(e1)
e = dy.log(e1)
e = dy.logistic(e1)   # Sigmoid(x)
e = dy.rectify(e1)    # Relu (= max(x,0))
e = dy.softsign(e1)    # x/(1+|x|)

# softmaxes
e = dy.softmax(e1)
e = dy.log_softmax(e1, restrict=[]) # restrict is a set of indices.
                                 # if not empty, only entries in restrict are part
                                 # of softmax computation, others get 0.


e = dy.sum_cols(e1)


# Picking values from vector expressions
e = dy.pick(e1, k)              # k is unsigned integer, e1 is vector. return e1[k]
e = e1[k]                    # same

e = dy.pickrange(e1, k, v)      # like python's e1[k:v] for lists. e1 is an Expression, k,v integers.
e = e1[k:v]                  # same

e = dy.pickneglogsoftmax(e1, k) # k is unsigned integer. equiv to: (pick(-log(dy.softmax(e1)), k))


# Neural net stuff
dy.noise(e1, stddev) # add a noise to each element from a gausian with standard-dev = stddev
dy.dropout(e1, p)    # apply dropout with probability p

# functions over lists of expressions
e = dy.esum([e1, e2, ...])            # sum
e = dy.average([e1, e2, ...])         # average
e = dy.concatenate_cols([e1, e2, ...])  # e1, e2,.. are column vectors. return a matrix. (sim to np.hstack([e1,e2,...])
e = dy.concatenate([e1, e2, ...])     # concatenate

e = dy.affine_transform([e0,e1,e2, ...])  # e = e0 + ((e1*e2) + (e3*e4) ...)

## Loss functions
e = dy.squared_distance(e1, e2)
e = dy.l1_distance(e1, e2)
e = dy.huber_distance(e1, e2, c=1.345)

# e1 must be a scalar that is a value between 0 and 1
# e2 (ty) must be a scalar that is a value between 0 and 1
# e = ty * log(e1) + (1 - ty) * log(1 - e1)
e = dy.binary_log_loss(e1, e2)

# e1 is row vector or scalar
# e2 is row vector or scalar
# m is number
# e = max(0, m - (e1 - e2))
e = dy.pairwise_rank_loss(e1, e2, m=1.0)

# Convolutions
# e1 \in R^{d x s} (input)
# e2 \in R^{d x m} (filter)
e = dy.conv1d_narrow(e1, e2) # e = e1 *conv e2
e = dy.conv1d_wide(e1, e2)   # e = e1 *conv e2
e = dy.filter1d_narrow(e1, e2) # e = e1 *filter e2

e = dy.kmax_pooling(e1, k) #  kmax-pooling operation (Kalchbrenner et al 2014)
e = dy.kmh_ngram(e1, k) #
e = dy.fold_rows(e1, nrows=2) #





Recipe

In [6]:
import dynet as dy

# create parameter collection
m = dy.ParameterCollection()

# add parameters to parameter collection
pW = m.add_parameters((10,30))
pB = m.add_parameters(10)
lookup = m.add_lookup_parameters((500, 10))
print "added"

# create trainer
trainer = dy.SimpleSGDTrainer(m)

# Regularization is set via the --dynet-l2 commandline flag.
# Learning rate parameters can be passed to the trainer:
# alpha = 0.1  # learning rate
# trainer = dy.SimpleSGDTrainer(m, e0=alpha)

# function for graph creation
def create_network_return_loss(inputs, expected_output):
    """
    inputs is a list of numbers
    """
    dy.renew_cg()
    W = dy.parameter(pW) # from parameters to expressions
    b = dy.parameter(pB)
    emb_vectors = [lookup[i] for i in inputs]
    net_input = dy.concatenate(emb_vectors)
    net_output = dy.softmax( (W*net_input) + b)
    loss = -dy.log(dy.pick(net_output, expected_output))
    return loss

# function for prediction
def create_network_return_best(inputs):
    """
    inputs is a list of numbers
    """
    dy.renew_cg()
    W = dy.parameter(pW)
    b = dy.parameter(pB)
    emb_vectors = [lookup[i] for i in inputs]
    net_input = dy.concatenate(emb_vectors)
    net_output = dy.softmax( (W*net_input) + b)
    return np.argmax(net_output.npvalue())


# train network
for epoch in xrange(5):
    for inp,lbl in ( ([1,2,3],1), ([3,2,4],2) ):
        print inp, lbl
        loss = create_network_return_loss(inp, lbl)
        print loss.value() # need to run loss.value() for the forward prop
        loss.backward()
        trainer.update()

print create_network_return_best([1,2,3])

added
[1, 2, 3] 1
2.71492385864
[3, 2, 4] 2
2.48228144646
[1, 2, 3] 1
2.00279903412
[3, 2, 4] 2
1.82602763176
[1, 2, 3] 1
1.44809651375
[3, 2, 4] 2
1.34181213379
[1, 2, 3] 1
1.03570735455
[3, 2, 4] 2
0.988352060318
[1, 2, 3] 1
0.744616270065
[3, 2, 4] 2
0.732948303223
1

Recipe (using classes)

In [4]:
import dynet as dy
# create parameter collection
m = dy.ParameterCollection()

# create a class encapsulating the network
class OurNetwork(object):
    # The init method adds parameters to the parameter collection.
    def __init__(self, pc):
        self.pW = pc.add_parameters((10,30))
        self.pB = pc.add_parameters(10)
        self.lookup = pc.add_lookup_parameters((500,10))

    # the __call__ method applies the network to an input
    def __call__(self, inputs):
        W = dy.parameter(self.pW)
        b = dy.parameter(self.pB)
        lookup = self.lookup
        emb_vectors = [lookup[i] for i in inputs]
        net_input = dy.concatenate(emb_vectors)
        net_output = dy.softmax( (W*net_input) + b)
        return net_output

    def create_network_return_loss(self, inputs, expected_output):
        dy.renew_cg()
        out = self(inputs)
        loss = -dy.log(dy.pick(out, expected_output))
        return loss

    def create_network_return_best(self, inputs):
        dy.renew_cg()
        out = self(inputs)
        return np.argmax(out.npvalue())


# create network
network = OurNetwork(m)

# create trainer
trainer = dy.SimpleSGDTrainer(m)

# train network
for epoch in xrange(5):
    for inp,lbl in ( ([1,2,3],1), ([3,2,4],2) ):
        print inp, lbl
        loss = network.create_network_return_loss(inp, lbl)
        print loss.value() # need to run loss.value() for the forward prop
        loss.backward()
        trainer.update()

print
print network.create_network_return_best([1,2,3])

[1, 2, 3] 1
2.5900914669
[3, 2, 4] 2
2.00347089767
[1, 2, 3] 1
1.98409461975
[3, 2, 4] 2
1.50869822502
[1, 2, 3] 1
1.50195622444
[3, 2, 4] 2
1.12316584587
[1, 2, 3] 1
1.12293696404
[3, 2, 4] 2
0.831095397472
[1, 2, 3] 1
0.833912611008
[3, 2, 4] 2
0.61754822731

1

or, alternatively, have the training outside of the network class

In [ ]:
# create network
network = OurNetwork(m)

# create trainer
trainer = dy.SimpleSGDTrainer(m)

# train network
for epoch in xrange(5):
    for inp,lbl in ( ([1,2,3],1), ([3,2,4],2) ):
        print inp, lbl
        dy.renew_cg()
        out = network(inp)
        loss = -dy.log(dy.pick(out, lbl))
        print loss.value() # need to run loss.value() for the forward prop
        loss.backward()
        trainer.update()

print
print np.argmax(network([1,2,3]).npvalue())
[1, 2, 3] 1
3.63615298271
[3, 2, 4] 2
3.29473733902
[1, 2, 3] 1
2.81605744362
[3, 2, 4] 2
2.46070289612
[1, 2, 3] 1
2.13946056366
[3, 2, 4] 2
1.77259361744
[1, 2, 3] 1
1.57904195786
[3, 2, 4] 2
1.2269589901
[1, 2, 3] 1
1.13014268875
[3, 2, 4] 2
0.830479979515

1