Hi everyone,
First of all I would like to say that I do not know anything about Python. However, I got this program today and I think I understand more or less what it does. From the results it gives me I think that it does a similar thing twice. So at the end it provides me with two datasets. I would like to shorten the code to do just one thing at a time. For example where at the beginning it says
S=(2,3)
mocapy_seed(S[0], S[1]). Although I do not understand what it means It gives at the output two datasets.
I cannot do it myself because it gives me errrors that I do not understand at all. As I said I do not know Python. Here is the code:
from numpy import *
from numpy.random import random, seed
import sys
from Mocapy import *
# seed
S=(2,3)
mocapy_seed(S[0], S[1])
# nr of sequences
N=15
# seq lengths
T=15
MCMC_STEPS=1
MCMC_BURN_IN=1
H_SIZE=2
DIM=3
TUM=array(((1,0,0), (1,0,0)), 'd')
TUK=array([15]*H_SIZE, 'd')
MUM=array(((10, 9.3), (5.3, 7.1)), 'd')
MUM=None
MUK=array([10]*H_SIZE, 'd')
# The target DBN
th0=DiscreteNode(node_size=H_SIZE)
th1=DiscreteNode(node_size=H_SIZE)
to0=VMFNode(dim=DIM, node_size=H_SIZE, user_mus=TUM, user_kappas=TUK)
start_nodes=[th0, to0]
end_nodes=[th1, to0]
node_list=[th0, th1, to0]
tdbn=DBN(start_nodes, end_nodes)
tdbn.add_intra(0, 1)
tdbn.add_inter(0, 0)
tdbn.construct()
# The model DBN
mh0=DiscreteNode(node_size=H_SIZE, user_cpd=th0.cpd)
mh0.fixed=1
mh1=DiscreteNode(node_size=H_SIZE)
mo0=VMFNode(dim=DIM, node_size=H_SIZE, user_mus=MUM, user_kappas=MUK)
start_nodes=[mh0, mo0]
end_nodes=[mh1, mo0]
node_list=[mh0, mh1, mo0]
mdbn=DBN(start_nodes, end_nodes)
mdbn.add_intra(0, 1)
mdbn.add_inter(0, 0)
mdbn.construct()
# Generate the data
seq_list=[]
mismask_list=[]
if mpi.rank==0:
# Write the data to a file
fp=open("hmm_vmf.data", "w")
for i in range(0, N):
seq, ll=tdbn.sample_sequence(T)
for l in range(0, T):
fp.write(("%f "*DIM+"\n") % tuple(seq[l,1:]))
seq_list.append(seq)
mismask=zeros((T,2))
mismask[:,0]=1
mismask_list.append(mismask)
fp.close()
else:
seq_list=[]
mismask_list=[]
local_seq_list=mpi.scatter(seq_list)
local_mismask_list=mpi.scatter(mismask_list)
mcmc=GibbsRandom(mdbn)
em=EMEngine(mdbn, mcmc, local_seq_list, local_mismask_list)
iteration=1
print th0
print th1
print to0
print "-"*40
print mh0
print mh1
print mo0
while 1:
print "-"*40
if mpi.rank==0:
print "Iteration ", iteration
print "Running on %i nodes." % mpi.size
sys.stdout.flush()
if iteration==1:
em.do_E_step(MCMC_STEPS, MCMC_BURN_IN)
else:
em.do_E_step(MCMC_STEPS, 0, 0)
ll=em.get_loglik()
if mpi.rank==0:
print "LL ", ll
sys.stdout.flush()
print mh0
print mh1
print mo0
em.do_M_step()
sys.stdout.flush()
iteration+=1
Your help will be very much appreciated
Thanks
Regards
Kostas
First of all I would like to say that I do not know anything about Python. However, I got this program today and I think I understand more or less what it does. From the results it gives me I think that it does a similar thing twice. So at the end it provides me with two datasets. I would like to shorten the code to do just one thing at a time. For example where at the beginning it says
S=(2,3)
mocapy_seed(S[0], S[1]). Although I do not understand what it means It gives at the output two datasets.
I cannot do it myself because it gives me errrors that I do not understand at all. As I said I do not know Python. Here is the code:
from numpy import *
from numpy.random import random, seed
import sys
from Mocapy import *
# seed
S=(2,3)
mocapy_seed(S[0], S[1])
# nr of sequences
N=15
# seq lengths
T=15
MCMC_STEPS=1
MCMC_BURN_IN=1
H_SIZE=2
DIM=3
TUM=array(((1,0,0), (1,0,0)), 'd')
TUK=array([15]*H_SIZE, 'd')
MUM=array(((10, 9.3), (5.3, 7.1)), 'd')
MUM=None
MUK=array([10]*H_SIZE, 'd')
# The target DBN
th0=DiscreteNode(node_size=H_SIZE)
th1=DiscreteNode(node_size=H_SIZE)
to0=VMFNode(dim=DIM, node_size=H_SIZE, user_mus=TUM, user_kappas=TUK)
start_nodes=[th0, to0]
end_nodes=[th1, to0]
node_list=[th0, th1, to0]
tdbn=DBN(start_nodes, end_nodes)
tdbn.add_intra(0, 1)
tdbn.add_inter(0, 0)
tdbn.construct()
# The model DBN
mh0=DiscreteNode(node_size=H_SIZE, user_cpd=th0.cpd)
mh0.fixed=1
mh1=DiscreteNode(node_size=H_SIZE)
mo0=VMFNode(dim=DIM, node_size=H_SIZE, user_mus=MUM, user_kappas=MUK)
start_nodes=[mh0, mo0]
end_nodes=[mh1, mo0]
node_list=[mh0, mh1, mo0]
mdbn=DBN(start_nodes, end_nodes)
mdbn.add_intra(0, 1)
mdbn.add_inter(0, 0)
mdbn.construct()
# Generate the data
seq_list=[]
mismask_list=[]
if mpi.rank==0:
# Write the data to a file
fp=open("hmm_vmf.data", "w")
for i in range(0, N):
seq, ll=tdbn.sample_sequence(T)
for l in range(0, T):
fp.write(("%f "*DIM+"\n") % tuple(seq[l,1:]))
seq_list.append(seq)
mismask=zeros((T,2))
mismask[:,0]=1
mismask_list.append(mismask)
fp.close()
else:
seq_list=[]
mismask_list=[]
local_seq_list=mpi.scatter(seq_list)
local_mismask_list=mpi.scatter(mismask_list)
mcmc=GibbsRandom(mdbn)
em=EMEngine(mdbn, mcmc, local_seq_list, local_mismask_list)
iteration=1
print th0
print th1
print to0
print "-"*40
print mh0
print mh1
print mo0
while 1:
print "-"*40
if mpi.rank==0:
print "Iteration ", iteration
print "Running on %i nodes." % mpi.size
sys.stdout.flush()
if iteration==1:
em.do_E_step(MCMC_STEPS, MCMC_BURN_IN)
else:
em.do_E_step(MCMC_STEPS, 0, 0)
ll=em.get_loglik()
if mpi.rank==0:
print "LL ", ll
sys.stdout.flush()
print mh0
print mh1
print mo0
em.do_M_step()
sys.stdout.flush()
iteration+=1
Your help will be very much appreciated
Thanks
Regards
Kostas