Après avoir regardé la vidéo d'Andrew Ng sur la partition Bleu, j'ai voulu en implémenter une à partir de zéro en python. J'ai écrit le code complet en python avec numpy avec parcimonie. Ceci est le code complet
from nltk.translate.bleu_score import sentence_bleu reference = [['this', 'is', 'a', 'test']] candidate = ['this', 'is', 'a', 'test'] score = sentence_bleu(reference, candidate) print(score)
J'ai essayé de tester mon score avec nltk
import numpy as np
def n_gram_generator(sentence,n= 2,n_gram= False):
'''
N-Gram generator with parameters sentence
n is for number of n_grams
The n_gram parameter removes repeating n_grams
'''
sentence = sentence.lower() # converting to lower case
sent_arr = np.array(sentence.split()) # split to string arrays
length = len(sent_arr)
word_list = []
for i in range(length+1):
if i < n:
continue
word_range = list(range(i-n,i))
s_list = sent_arr[word_range]
string = ' '.join(s_list) # converting list to strings
word_list.append(string) # append to word_list
if n_gram:
word_list = list(set(word_list))
return word_list
def bleu_score(original,machine_translated):
'''
Bleu score function given a orginal and a machine translated sentences
'''
mt_length = len(machine_translated.split())
o_length = len(original.split())
# Brevity Penalty
if mt_length>o_length:
BP=1
else:
penality=1-(mt_length/o_length)
BP=np.exp(penality)
# calculating precision
precision_score = []
for i in range(mt_length):
original_n_gram = n_gram_generator(original,i)
machine_n_gram = n_gram_generator(machine_translated,i)
n_gram_list = list(set(machine_n_gram)) # removes repeating strings
# counting number of occurence
machine_score = 0
original_score = 0
for j in n_gram_list:
machine_count = machine_n_gram.count(j)
original_count = original_n_gram.count(j)
machine_score = machine_score+machine_count
original_score = original_score+original_count
precision = original_score/machine_score
precision_score.append(precision)
precisions_sum = np.array(precision_score).sum()
avg_precisions_sum=precisions_sum/mt_length
bleu=BP*np.exp(avg_precisions_sum)
return bleu
if __name__ == "__main__":
original = "this is a test"
bs=bleu_score(original,original)
print("Bleu Score Original",bs)
Le problème est que mon score bleu est d'environ 2.718281 et celui de nltk est de 1 . Qu'est-ce que je fais mal?
Voici quelques raisons possibles:
1) J'ai calculé les ngrammes par rapport à la longueur de la phrase traduite automatiquement. Ici de 1 à 4
2) fonction n_gram_generator que j'ai écrite moi-même et pas sûr de sa précision
3) Certains comment j'ai utilisé une mauvaise fonction ou un score bleu mal calculé
Quelqu'un peut-il consulter mon code et me dire où j'ai commis l'erreur?
3 Réponses :
Votre calcul de score bleu est erroné. Problème:
Code corrigé
0.27098211583470044 0.27098211583470044
Production:
def bleu_score(original,machine_translated):
'''
Bleu score function given a orginal and a machine translated sentences
'''
mt_length = len(machine_translated.split())
o_length = len(original.split())
# Brevity Penalty
if mt_length>o_length:
BP=1
else:
penality=1-(mt_length/o_length)
BP=np.exp(penality)
# Clipped precision
clipped_precision_score = []
for i in range(1, 5):
original_n_gram = Counter(n_gram_generator(original,i))
machine_n_gram = Counter(n_gram_generator(machine_translated,i))
c = sum(machine_n_gram.values())
for j in machine_n_gram:
if j in original_n_gram:
if machine_n_gram[j] > original_n_gram[j]:
machine_n_gram[j] = original_n_gram[j]
else:
machine_n_gram[j] = 0
#print (sum(machine_n_gram.values()), c)
clipped_precision_score.append(sum(machine_n_gram.values())/c)
#print (clipped_precision_score)
weights =[0.25]*4
s = (w_i * math.log(p_i) for w_i, p_i in zip(weights, clipped_precision_score))
s = BP * math.exp(math.fsum(s))
return s
original = "It is a guide to action which ensures that the military alwasy obeys the command of the party"
machine_translated = "It is the guiding principle which guarantees the military forces alwasy being under the command of the party"
print (bleu_score(original, machine_translated))
print (sentence_bleu([original.split()], machine_translated.split()))
J'ai reçu ValueError: erreur de domaine mathématique
Voici une version légèrement modifiée du code source réel de nltk :
0.18174699151949172 0.18174699151949172
Nous pouvons utiliser un exemple du papier original:
rt_raw = [
'It is a guide to action that ensures that the military will forever heed Party commands',
'It is the guiding principle which guarantees the military forces always being under the command of the Party',
'It is the practical guide for the army always to heed the directions of the party'
]
ct_raw = [
'It is a guide to action which ensures that the military always obeys the commands of the party',
'It is to insure the troops forever hearing the activity guidebook that party direct'
]
def process_trans(t):
return t.lower().split()
rt = [process_trans(t) for t in rt_raw]
ct = [process_trans(t) for t in ct_raw]
c1, c2 = ct[0], ct[1]
sentence_bleu_man(rt, c2, weights=(.5, .5, 0, 0))
sentence_bleu(rt, c2, weights=(.5, .5, 0, 0))
Production:
def sentence_bleu_man(
references,
hypothesis,
weights=(0.25, 0.25, 0.25, 0.25)):
# compute modified precision for 1-4 ngrams
p_numerators = Counter()
p_denominators = Counter()
hyp_lengths, ref_lengths = 0, 0
for i, _ in enumerate(weights, start=1):
p_i = modified_precision(references, hypothesis, i)
p_numerators[i] += p_i.numerator
p_denominators[i] += p_i.denominator
# compute brevity penalty
hyp_len = len(hypothesis)
ref_len = closest_ref_length(references, hyp_len)
bp = brevity_penalty(ref_len, hyp_len)
# compute final score
p_n = [
Fraction(p_numerators[i], p_denominators[i],
_normalize=False)
for i, _ in enumerate(weights, start=1)
if p_numerators[i] > 0
]
s = (w_i * math.log(p_i) for w_i, p_i in zip(weights, p_n))
s = bp * math.exp(math.fsum(s))
return s
Voici la solution révisée
# coding: utf-8
import numpy as np
from collections import Counter
import math
from nltk.translate.bleu_score import sentence_bleu
def n_gram_generator(sentence,n= 2,n_gram= False):
'''
N-Gram generator with parameters sentence
n is for number of n_grams
The n_gram parameter removes repeating n_grams
'''
sentence = sentence.lower() # converting to lower case
sent_arr = np.array(sentence.split()) # split to string arrays
length = len(sent_arr)
word_list = []
for i in range(length+1):
if i < n:
continue
word_range = list(range(i-n,i))
s_list = sent_arr[word_range]
string = ' '.join(s_list) # converting list to strings
word_list.append(string) # append to word_list
if n_gram:
word_list = list(set(word_list))
return word_list
def bleu_score(original, machine_translated):
'''
Bleu score function given a orginal and a machine translated sentences
'''
mt_length = len(machine_translated.split())
o_length = len(original.split())
# Brevity Penalty
if mt_length > o_length:
BP=1
else:
penality=1-(mt_length/o_length)
BP = np.exp(penality)
# Clipped precision
clipped_precision_score = []
for ngram_level in range(1, 5): # 1-gram to 4-gram
original_ngram_list = n_gram_generator(original, ngram_level)
original_n_gram = Counter(original_ngram_list)
machine_ngram_list = n_gram_generator(machine_translated, ngram_level)
machine_n_gram = Counter(machine_ngram_list)
num_ngrams_in_translation = sum(machine_n_gram.values()) # number of ngrams in translation
# iterate the unique ngrams in translation (candidate)
for j in machine_n_gram:
if j in original_n_gram: # if found in reference
if machine_n_gram[j] > original_n_gram[j]: # CLIPPING - if found in translation more than in source, clip
machine_n_gram[j] = original_n_gram[j]
else:
machine_n_gram[j] = 0
#print (sum(machine_n_gram.values()), c)
clipped_precision_score.append(float(sum(machine_n_gram.values())) / num_ngrams_in_translation)
#print (clipped_precision_score)
weights = [0.25]*4
s = (w_i * math.log(p_i) for w_i, p_i in zip(weights, clipped_precision_score))
s = BP * math.exp(math.fsum(s))
return s
original = "It is a guide to action which ensures that the military alwasy obeys the command of the party"
machine_translated = "It is the guiding principle which guarantees the military forces alwasy being under the command of the party"
print (bleu_score(original, machine_translated))
print (sentence_bleu([original.split()], machine_translated.split()))