Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expose suggestions scores #567

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ libaspell_la_SOURCES =\
lib/find_speller.cpp\
lib/speller-c.cpp\
lib/string_pair_enumeration-c.cpp\
lib/suggestions-c.cpp\
lib/new_checker.cpp

libaspell_la_LIBADD = $(LTLIBINTL) $(PTHREAD_LIB)
Expand Down
2 changes: 2 additions & 0 deletions auto/MkSrc/CcHelper.pm
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,11 @@ sub make_wide_version ( $ \@ $ ; \% ) {

#ifdef __cplusplus
# define aspell_cast_(type, expr) (static_cast<type>(expr))
# define aspell_rcast_(type, expr) (reinterpret_cast<type>(expr))
# define aspell_cast_from_wide_(str) (static_cast<const void *>(str))
#else
# define aspell_cast_(type, expr) ((type)(expr))
# define aspell_rcast_(type, expr) ((type)(expr))
# define aspell_cast_from_wide_(str) ((const char *)(str))
#endif
---
Expand Down
4 changes: 4 additions & 0 deletions auto/MkSrc/ProcImpl.pm
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,10 @@ $info{class}{proc}{impl} = sub {
$ret .= " if (ret.data)\n";
$ret .= " const_cast<WordList *>(ret.data)->from_internal_ = ths->from_internal_;\n";
}
if ($ret_type->{type} eq 'suggestions') {
$ret .= " if (ret.data)\n";
$ret .= " ret.data->from_internal_ = ths->from_internal_;\n";
}
$ret .= " ";
$ret .= "return " unless $ret_type->{type} eq 'void';
$ret .= $exp;
Expand Down
3 changes: 3 additions & 0 deletions auto/MkSrc/Type.pm
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,11 @@ sub finalized_type ( $ )

local $_ = $name;

s/^array size/unsigned int/;

s/^const // and $d->{const} = true;
s/^array (\d+) // and $d->{array} = $1;
s/ ?array$// and $d->{pointer} = true;
s/ ?pointer$// and $d->{pointer} = true;
s/ ?object$// and $d->{pointer} = false;

Expand Down
48 changes: 48 additions & 0 deletions auto/mk-src.in
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# -*- indent-tabs-mode: t; fill-column: 96 -*-
#
# mk-src.in -- Input file for the Perl program to automatically
# generate interface code.
Expand Down Expand Up @@ -723,6 +724,14 @@ class: speller
const word list
encoded string: word

method: suggestions

posib err
/
suggestions
encoded string: word
void pointer: unused

method: store replacement

posib err
Expand Down Expand Up @@ -967,6 +976,45 @@ class: string enumeration
const void pointer
int: type_width
}
group: suggestions
{
/
class: suggestions
/
method: words
cc extra =>
\#define aspell_suggestions_words_w(type, ths, len) \\
aspell_rcast_(const type * *, aspell_suggestions_words(ths, len))
/
string array
array size pointer: len

# method: words wide
# c impl =>
# return reinterpret_cast<const void * *>(ths->words(len));
# const void pointer array
# array size pointer: len

method: normalized scores
desc => Returns the normalized scores. The values are between 0 and 1 inclusive
where higher numbers indicate a better match. The exact values may not
be stable between releases. The score is currently 1/(d+1) where d is
the distance as given 'distances' method, however this may change in
future released. This number may also incorporate other information,
such as the frequency, in future released.
/
double array
array size pointer: len

method: distances
desc => Return the distances between the words and the correct spelling. This
number is closely related to the edit distance of the word where each
edit is around 1.0. Values less then 0.5 represent a typo or otherwise
very good match. Values less then around 1.2 represent a good match.
/
double array
array size pointer: len
}
group: info
{
/
Expand Down
2 changes: 0 additions & 2 deletions common/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1517,8 +1517,6 @@ namespace acommon {
N_("reverse the order of the suggest list")}
, {"suggest", KeyInfoBool, "true",
N_("suggest possible replacements"), KEYINFO_MAY_CHANGE}
, {"time" , KeyInfoBool, "false",
N_("time load time and suggest time in pipe mode"), KEYINFO_MAY_CHANGE}
};

const KeyInfo * config_impl_keys_begin = config_keys;
Expand Down
3 changes: 3 additions & 0 deletions common/speller.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "posib_err.hpp"
#include "parm_string.hpp"
#include "char_vector.hpp"
#include "suggestions.hpp"

namespace acommon {

Expand Down Expand Up @@ -126,6 +127,8 @@ namespace acommon {
// return null on error
// the word list returned by suggest is only valid until the next
// call to suggest

virtual PosibErr<Suggestions *> suggestions(MutableString, void *) = 0;

virtual PosibErr<void> store_replacement(MutableString,
MutableString) = 0;
Expand Down
9 changes: 8 additions & 1 deletion modules/speller/default/speller_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,14 @@ namespace aspeller {
return &suggest_->suggest(word);
}

bool SpellerImpl::check_simple (ParmString w, WordEntry & w0)
PosibErr<Suggestions *> SpellerImpl::suggestions(MutableString word, void *)
{
suggestions_.reset();
suggestions_.sugs_ = &suggest_->suggestions(word);
return &suggestions_;
}

bool SpellerImpl::check_simple(ParmString w, WordEntry & w0)
{
w0.clear(); // FIXME: is this necessary?
const char * x = w;
Expand Down
7 changes: 6 additions & 1 deletion modules/speller/default/speller_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,12 @@ namespace aspeller {

PosibErr<const WordList *> suggest(MutableString word);
// the suggestion list and the elements in it are only
// valid until the next call to suggest.
// valid until the next call to suggest or suggestions

PosibErr<Suggestions *> suggestions(MutableString word, void *);
// the suggestion are only valid until the next call to suggest or
// suggestions

PosibErr<void> store_replacement(MutableString mis,
MutableString cor);

Expand All @@ -184,6 +188,7 @@ namespace aspeller {
//CopyPtr<DictCollection> wls_;
ClonePtr<Suggest> suggest_;
ClonePtr<Suggest> intr_suggest_;
Suggestions suggestions_;
unsigned int ignore_count;
bool ignore_repl;
String prev_mis_repl_;
Expand Down
97 changes: 62 additions & 35 deletions prog/aspell.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

#include "aspell.h"

#include <math.h>
#include <sys/types.h>
#include <sys/stat.h>

Expand All @@ -41,7 +42,6 @@
#include "convert.hpp"
#include "document_checker.hpp"
#include "enumeration.hpp"
#include "errors.hpp"
#include "file_util.hpp"
#include "fstream.hpp"
#include "info.hpp"
Expand Down Expand Up @@ -117,15 +117,15 @@ void print_error(ParmString msg, ParmString str)
else {var=pe.data;}\
} while(false)
#define BREAK_ON_ERR(command) \
do{PosibErrBase pe(command);\
if(pe.has_err()){print_error(pe.get_err()->mesg); break;}\
} while(false)
PosibErrBase pe(command);\
if(pe.has_err()){print_error(pe.get_err()->mesg); break;} \
0 /* noop */
#define BREAK_ON_ERR_SET(command, type, var)\
type var;\
do{PosibErr< type > pe(command);\
PosibErr< type > pe(command);\
if(pe.has_err()){print_error(pe.get_err()->mesg); break;}\
else {var=pe.data;}\
} while(false)
else {var=pe.data;} \
0 /* noop */


/////////////////////////////////////////////////////////
Expand Down Expand Up @@ -253,10 +253,32 @@ static void line_buffer() {
Conv dconv;
Conv uiconv;

static const KeyInfo extra_config_keys[] = {
{"time" , KeyInfoBool, "false",
N_("time load time and suggest time in pipe mode"), KEYINFO_MAY_CHANGE},
{"show-scores", KeyInfoString, "false",
N_("one of: false, true, dist"), KEYINFO_MAY_CHANGE}
};

enum ShowScores {DontShowScores, ShowNormalized, ShowDistance};

PosibErr<ShowScores> get_show_scores() {
String val = options->retrieve("show-scores");
if (val == "false")
return DontShowScores;
if (val == "true" || val == "")
return ShowNormalized;
if (val == "dist" || val == "distance")
return ShowDistance;
return make_err(reinterpret_cast<const ErrorInfo *>(aerror_bad_value),
"show-scores", val, _("one of false, true or dist"));
}

int main (int argc, const char *argv[])
{
options = new_config(); // this needs to be here because of a bug
// with static initlizers on Darwin.
options->set_extra(extra_config_keys, extra_config_keys + sizeof(extra_config_keys)/sizeof(KeyInfo));
#ifdef USE_LOCALE
setlocale (LC_ALL, "");
#endif
Expand Down Expand Up @@ -705,6 +727,7 @@ void pipe()
bool do_time = options->retrieve_bool("time");
bool suggest = options->retrieve_bool("suggest");
bool include_guesses = options->retrieve_bool("guess");
EXIT_ON_ERR_SET(get_show_scores(), ShowScores, show_scores);
clock_t start,finish;

if (!options->have("mode") && !options->have("filter")) {
Expand Down Expand Up @@ -824,14 +847,19 @@ void pipe()
case 'c':
switch (line[3]) {
case 's':
if (get_word_pair(line + 4, word, word2))
if (get_word_pair(line + 4, word, word2)) {
BREAK_ON_ERR(err = config->replace(word, word2));
}
if (strcmp(word,"suggest") == 0)
suggest = config->retrieve_bool("suggest");
else if (strcmp(word,"time") == 0)
do_time = config->retrieve_bool("time");
else if (strcmp(word,"guess") == 0)
include_guesses = config->retrieve_bool("guess");
else if (strcmp(word,"show-scores") == 0) {
BREAK_ON_ERR_SET(get_show_scores(), ShowScores, val);
show_scores = val;
}
break;
case 'r':
word = trim_wspace(line + 4);
Expand Down Expand Up @@ -889,40 +917,39 @@ void pipe()
ci = ci->next;
}
start = clock();
const AspellWordList * suggestions = 0;
if (suggest)
suggestions = aspell_speller_suggest(speller, word, -1);
const char * * suggestions = 0;
unsigned num_suggestions = 0;
double * normalized_scores = 0;
double * distances = 0;
if (suggest) {
AspellSuggestions * sugs = aspell_speller_suggestions(speller, word, -1, 0);
suggestions = aspell_suggestions_words(sugs, &num_suggestions);
if (show_scores == ShowNormalized)
normalized_scores = aspell_suggestions_normalized_scores(sugs, NULL);
else if (show_scores == ShowDistance)
distances = aspell_suggestions_distances(sugs, NULL);
}
finish = clock();
unsigned offset = mb_len(line0, token.offset + ignore);
if (suggestions && !aspell_word_list_empty(suggestions))
if (num_suggestions > 0)
{
COUT.printf("& %s %u %u:", word,
aspell_word_list_size(suggestions), offset);
AspellStringEnumeration * els
= aspell_word_list_elements(suggestions);
if (options->retrieve_bool("reverse")) {
Vector<String> sugs;
sugs.reserve(aspell_word_list_size(suggestions));
while ( ( w = aspell_string_enumeration_next(els)) != 0)
sugs.push_back(w);
Vector<String>::reverse_iterator i = sugs.rbegin();
while (true) {
COUT.printf(" %s", i->c_str());
++i;
if (i == sugs.rend()) break;
COUT.put(',');
}
} else {
while ( ( w = aspell_string_enumeration_next(els)) != 0) {
COUT.printf(" %s%s", w,
aspell_string_enumeration_at_end(els) ? "" : ",");
}
}
delete_aspell_string_enumeration(els);
num_suggestions, offset);
if (options->retrieve_bool("reverse"))
std::reverse(suggestions, suggestions+num_suggestions);
for (unsigned i = 0; i < num_suggestions; ++i) {
char score[8] = {};
if (normalized_scores)
snprintf(score, 8, " %.0f", floor(normalized_scores[i] * 100.0));
else if (distances)
snprintf(score, 8, " %.2f", distances[i]);
COUT.printf(" %s%s%s", suggestions[i], score,
i == num_suggestions-1 ? "" : ",");
}
if (include_guesses)
COUT.put(guesses);
COUT.put('\n');
} else {
} else {
if (guesses.empty())
COUT.printf("# %s %u\n", word, offset);
else
Expand Down
10 changes: 9 additions & 1 deletion test/wide_test_valid.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,18 @@ int main() {
AspellStringEnumeration * elements = aspell_word_list_elements(suggestions);
const uint16_t * word = aspell_string_enumeration_next_w(uint16_t, elements);
if (memcmp(word, test_word, sizeof(test_incorrect)) != 0) {
fprintf(stderr, "%s", "fail: first suggesion is not what is expected\n");
fprintf(stderr, "%s", "fail: first suggesion is not what is expected (suggest)\n");
fail = 1;
}
delete_aspell_string_enumeration(elements);

AspellSuggestions * sugs = aspell_speller_suggestions_w(spell_checker, test_incorrect, -1, NULL);
unsigned len;
const uint16_t * * words = aspell_suggestions_words_w(uint16_t, sugs, &len);
if (len < 1 || memcmp(words[0], test_word, sizeof(test_incorrect)) != 0) {
fprintf(stderr, "%s", "fail: first suggesion is not what is expected (suggestions)\n");
fail = 1;
}

possible_err = new_aspell_document_checker(spell_checker);
if (aspell_error(possible_err) != 0) {
Expand Down