Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Low accuracy on MSCOCO #7

Open
ReneeZD opened this issue Jul 28, 2017 · 3 comments
Open

Low accuracy on MSCOCO #7

ReneeZD opened this issue Jul 28, 2017 · 3 comments

Comments

@ReneeZD
Copy link

ReneeZD commented Jul 28, 2017

HI, I'm following your code and try to train the network on MSCOCO
Here is my code

class Caption_Model:
def init(self,char_to_int,int_to_char,vocab_size=26688,max_caption_len=20,folder_path=path,epochs=10,batch_size=64):
self.img_model=Sequential()
self.text_model=Sequential()
self.model=Sequential()
self.vocab_size=vocab_size
self.max_caption_len=max_caption_len
self.folder_path=folder_path
self.data={}
self.char_to_int=char_to_int
self.int_to_char=int_to_char
self.batch_size=batch_size
self.epochs=epochs

def get_image_model(self):
    self.img_model.add(Dense(Embedding_dim,input_dim=4096,activation='relu'))
    self.img_model.add(RepeatVector(self.max_caption_len+1))
    # self.img_model.summary()
    return self.img_model

def get_text_model(self):
    self.text_model.add(Embedding(self.vocab_size,256,input_length=self.max_caption_len+1))
    self.text_model.add(LSTM(512,return_sequences=True))
    #self.text_model.add(Dropout(0.2))
    self.text_model.add(TimeDistributed(Dense(Embedding_dim,activation='relu')))
    # self.text_model.summary()
    return self.text_model

def get_caption_model(self,predict=False):
    self.get_image_model()
    self.get_text_model()
    self.model.add(Merge([self.img_model,self.text_model],mode='concat'))
    self.model.add(LSTM(1000,return_sequences=False))
    self.model.add(Dense(self.vocab_size))
    self.model.add(Activation('softmax'))
    print "Now model.model"
    sgd = SGD(lr=1e-3, decay=1e-6, momentum=0.99, nesterov=True)
    rms = RMSprop(lr=0.005)
    if predict:
        return
    else:
        # weight='/home/paperspace/Document/DeepLearning/ImageCaption/code/Models/checkpoint/weights-improvement-02-5.2473.hdf5'
        # self.model.load_weights(weight)
        self.model.compile(loss='categorical_crossentropy',optimizer='rmsprop',metrics=['accuracy'])

def load_data(self,set_type='train'):
    data={}
    with open(self.folder_path+set_type+'.processed_img.2.pkl') as f:
        data['imgs']=pickle.load(f)
    with open(os.path.join(self.folder_path,'all%spartial_sentences_0.pkl'%set_type)) as f:
        data['partial_sentences']=pickle.load(f)
    return data

def data_generator(self,set_type='train'):
    data=self.load_data(set_type)
    j=0
    temp=data['partial_sentences'].keys()
    partial_sentences,images=[],[]
    next_words=np.zeros((self.batch_size,self.vocab_size)).astype(float)
    count=0
    round_count=0
    while True:
        round_count+=1
        random.shuffle(temp)
        print "the %d round!" %round_count
        for key in temp:
            image=data['imgs'][key]
            for sen in data['partial_sentences'][key]:
                for k in range(len(sen)):
                    count+=1
                    partial=sen[:k+1]
                    partial_sentences.append(partial)
                    images.append(image)
                    # print "index is: ",count-1
                    if k==len(sen)-1:
                        next_words[count-1][self.char_to_int['<end>']]=1
                    else:
                        next_words[count-1][sen[k+1]]=1
                    if count>=self.batch_size:
                        partial_sentences=sequence.pad_sequences(partial_sentences, maxlen=self.max_caption_len+1, padding='post')
                        partial_sentences=np.asarray(partial_sentences)
                        images=np.asarray(images)
                        # partial_sentences=partial_sentences/float(self.vocab_size)
                        # print partial_sentences
                        count=0
                        yield [images,partial_sentences],next_words
                        partial_sentences,images=[],[]
                        next_words=np.zeros((self.batch_size,self.vocab_size)).astype(float)

        j+=1

def train(self):
    self.get_caption_model()
    filepath="Models/checkpoint/weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
    checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
    callbacks_list = [checkpoint]

    self.model.fit_generator(self.data_generator('train'),steps_per_epoch=step_size/self.batch_size,epochs=self.epochs,validation_data=self.data_generator('val'),validation_steps=v_step_size/self.batch_size,callbacks=callbacks_list)
    # self.model.fit_generator(self.data_generator('train'),steps_per_epoch=step_size/self.batch_size,epochs=self.epochs,callbacks=callbacks_list)

    try:
        self.model.save('Models/WholeModel.h5',overwrite=True)
        self.model.save_weights('Models/Weights.h5',overwrite=True)
    except:
        print "Error in saving model."
    print "After training model...\n"

Accuracy maintains about 35% in the end and training loss is about 3.xxx
I just cannot figure out what's wrong with the code.
Could you please offer some help.
Thank you so much!

@MikhailovSergei
Copy link

hi. do u avoid this problem?

@ShixiangWan
Copy link

@ReneeZD @MikhailovSergei Hi, do you fix this problem? I've tried many ways but cannot get proper captions. :(

@ajay9022
Copy link

ajay9022 commented Dec 19, 2018

@ReneeZD Did you get the required results ? If yes, then how ?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

4 participants