Browse Source

Initial code commit. Results are poor and it is hard to install

master
Macoy Madson 5 years ago
parent
commit
e98c644711
  1. 16
      .clang-format
  2. 5
      Jamfile
  3. 42
      Jamrules
  4. 2
      README.md
  5. 19
      README.org
  6. 189
      fuzzy.c
  7. 40
      fuzzy.h
  8. 203
      macoyFuzzy.c
  9. 81
      macoyFuzzy.el
  10. 36
      macoyFuzzyTests.el
  11. 56
      utils.c
  12. 14
      utils.h

16
.clang-format

@ -0,0 +1,16 @@
BasedOnStyle: Google
AllowShortBlocksOnASingleLine: false
AllowShortFunctionsOnASingleLine: None
AllowShortIfStatementsOnASingleLine: false
AllowShortLoopsOnASingleLine: false
BreakBeforeBraces: Allman
BreakBeforeTernaryOperators: false
ColumnLimit: 100
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
IndentWidth: 4
Standard: Cpp03
TabWidth: 4
UseTab: ForIndentation
DerivePointerAlignment: false
PointerAlignment: Left

5
Jamfile

@ -0,0 +1,5 @@
SubDir . ;
Main macoyFuzzy$(SUFSHR) : fuzzy.c
utils.c
macoyFuzzy.c ;

42
Jamrules

@ -0,0 +1,42 @@
# Building Tests:
# jam clean
# jam -j4 -q
# (if building the same target repeatedly, don't clean, if that wasn't obvious)
##
## Compiler
##
CC = gcc ;
LINK = gcc ;
LINKFLAGS = -shared ;
if $(UNIX) { SUFSHR = .so ; }
else if $(NT) { SUFSHR = .dll ; }
##
## Compiler arguments
##
# Arguments used on all projects, regardless of any variables
CCFLAGS = -ggdb3 -Wall -fPIC ;
KEEPOBJS = true ; # This doesn't actually fix anything, though it seems like it should
NOARSCAN = true ; # This actually fixes the problem
#AR = ar rUu ; # I was thinking maybe the AR command was wrong (always outputting deterministically)
# It doesn't seem like this is the problem though
AR = ar cr ;
OPTIM = -O0 ;
HDRS = . ../../3rdParty/repositories/emacs/src ;
# Some helpful Jam commands
# -q : stop on failed target
# -jN : use N cores
# -sVAR=VAL : Set VAR to VAL. Note that setting WINDOWS=false is the same as setting UNREAL=true,
# frustratingly
# -dx : print commands being used
# -n : don't actually run commands

2
README.md

@ -1,2 +0,0 @@
# emacs-fuzzy-module
An emacs module for doing fuzzy filtering of a list

19
README.org

@ -0,0 +1,19 @@
* emacs-fuzzy-module
An emacs module for doing fuzzy filtering of a list.
I created this because I was encountering performance problems with ELisp-based solutions like flx-ido. I figure native module performance should be much better.
* Installation
1. To build, make sure you have Jam installed:
: sudo apt install jam
2. Open Jamrules and add the path to your ~emacs/src~ to the ~HDRS~ variable. Ensure there is a space between the ~;~ and your path
3. Run ~jam~ in emacs-fuzzy-module directory
4. Open macoyFuzzy.el and make the ~module-load~ path reference ~macoyFuzzy.so~ (wherever you built it)
5. Evaluate macoyFuzzy.el
6. Add the following to your .emacs:
#+BEGIN_SRC emacs-lisp
(require 'macoy-fuzzy-ido)
(ido-mode 1)
(ido-everywhere 1)
(macoy-fuzzy-ido-mode 1)
#+END_SRC

189
fuzzy.c

@ -0,0 +1,189 @@
#include "fuzzy.h"
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
static bool fuzzy_match_recursive(const char* pattern, const char* str, int* outScore,
const char* strBegin, unsigned char const* srcMatches,
unsigned char* newMatches, int maxMatches, int nextMatch,
int* recursionCount, int recursionLimit);
// Public interface
bool fuzzy_match_simple(char const* pattern, char const* str)
{
while (*pattern != '\0' && *str != '\0')
{
if (tolower(*pattern) == tolower(*str))
++pattern;
++str;
}
return *pattern == '\0' ? true : false;
}
bool fuzzy_match(char const* pattern, char const* str, int* outScore)
{
unsigned char matches[256];
return fuzzy_match_with_matches(pattern, str, outScore, matches, sizeof(matches));
}
bool fuzzy_match_with_matches(char const* pattern, char const* str, int* outScore,
unsigned char* matches, int maxMatches)
{
int recursionCount = 0;
int recursionLimit = 10;
return fuzzy_match_recursive(pattern, str, outScore, str, NULL, matches, maxMatches, 0,
&recursionCount, recursionLimit);
}
// Private implementation
static bool fuzzy_match_recursive(const char* pattern, const char* str, int* outScore,
const char* strBegin, unsigned char const* srcMatches,
unsigned char* matches, int maxMatches, int nextMatch,
int* recursionCount, int recursionLimit)
{
// Count recursions
++(*recursionCount);
if (*recursionCount >= recursionLimit)
return false;
// Detect end of strings
if (*pattern == '\0' || *str == '\0')
return false;
// Recursion params
bool recursiveMatch = false;
unsigned char bestRecursiveMatches[256];
int bestRecursiveScore = 0;
// Loop through pattern and str looking for a match
bool first_match = true;
while (*pattern != '\0' && *str != '\0')
{
// Found match
if (tolower(*pattern) == tolower(*str))
{
// Supplied matches buffer was too short
if (nextMatch >= maxMatches)
return false;
// "Copy-on-Write" srcMatches into matches
if (first_match && srcMatches)
{
memcpy(matches, srcMatches, nextMatch);
first_match = false;
}
// Recursive call that "skips" this match
unsigned char recursiveMatches[256];
int recursiveScore;
if (fuzzy_match_recursive(pattern, str + 1, &recursiveScore, strBegin, matches,
recursiveMatches, sizeof(recursiveMatches), nextMatch,
recursionCount, recursionLimit))
{
// Pick best recursive score
if (!recursiveMatch || recursiveScore > bestRecursiveScore)
{
memcpy(bestRecursiveMatches, recursiveMatches, 256);
bestRecursiveScore = recursiveScore;
}
recursiveMatch = true;
}
// Advance
matches[nextMatch++] = (unsigned char)(str - strBegin);
++pattern;
}
++str;
}
// Determine if full pattern was matched
bool matched = *pattern == '\0' ? true : false;
// Calculate score
if (matched)
{
const int sequential_bonus = 15; // bonus for adjacent matches
const int separator_bonus = 30; // bonus if match occurs after a separator
const int camel_bonus = 30; // bonus if match is uppercase and prev is lower
const int first_letter_bonus = 15; // bonus if the first letter is matched
// penalty applied for every letter in str before the first match
const int leading_letter_penalty = -5;
const int max_leading_letter_penalty = -15; // maximum penalty for leading letters
const int unmatched_letter_penalty = -1; // penalty for every letter that doesn't matter
// Iterate str to end
while (*str != '\0')
++str;
// Initialize score
*outScore = 100;
// Apply leading letter penalty
int penalty = leading_letter_penalty * matches[0];
if (penalty < max_leading_letter_penalty)
penalty = max_leading_letter_penalty;
*outScore += penalty;
// Apply unmatched penalty
int unmatched = (int)(str - strBegin) - nextMatch;
*outScore += unmatched_letter_penalty * unmatched;
// Apply ordering bonuses
for (int i = 0; i < nextMatch; ++i)
{
unsigned char currIdx = matches[i];
if (i > 0)
{
unsigned char prevIdx = matches[i - 1];
// Sequential
if (currIdx == (prevIdx + 1))
*outScore += sequential_bonus;
}
// Check for bonuses based on neighbor character value
if (currIdx > 0)
{
// Camel case
char neighbor = strBegin[currIdx - 1];
char curr = strBegin[currIdx];
if (islower(neighbor) && isupper(curr))
*outScore += camel_bonus;
// Separator
bool neighborSeparator = neighbor == '_' || neighbor == ' ';
if (neighborSeparator)
*outScore += separator_bonus;
}
else
{
// First letter
*outScore += first_letter_bonus;
}
}
}
// Return best result
if (recursiveMatch && (!matched || bestRecursiveScore > *outScore))
{
// Recursive score is better than "this"
memcpy(matches, bestRecursiveMatches, maxMatches);
*outScore = bestRecursiveScore;
return true;
}
else if (matched)
{
// "this" score is better than recursive
return true;
}
else
{
// no match
return false;
}
}

40
fuzzy.h

@ -0,0 +1,40 @@
#pragma once
// LICENSE
//
// This software is dual-licensed to the public domain and under the following
// license: you are granted a perpetual, irrevocable license to copy, modify,
// publish, and distribute this file as you see fit.
//
// VERSION
// 0.2.0 (2017-02-18) Scored matches perform exhaustive search for best score
// 0.1.0 (2016-03-28) Initial release
//
// AUTHOR
// Forrest Smith
//
// NOTES
// Compiling
// You MUST add '#define FTS_FUZZY_MATCH_IMPLEMENTATION' before including this header in ONE
// source file to create implementation.
//
// fuzzy_match_simple(...)
// Returns true if each character in pattern is found sequentially within str
//
// fuzzy_match(...)
// Returns true if pattern is found AND calculates a score.
// Performs exhaustive search via recursion to find all possible matches and match with highest
// score.
// Scores values have no intrinsic meaning. Possible score range is not normalized and varies
// with pattern.
// Recursion is limited internally (default=10) to prevent degenerate cases (pattern="aaaaaa"
// str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
// Uses uint8_t for match indices. Therefore patterns are limited to 256 characters.
// Score system should be tuned for YOUR use case. Words, sentences, file names, or method names
// all prefer different tuning.
#include <stdbool.h>
bool fuzzy_match_simple(char const* pattern, char const* str);
bool fuzzy_match(char const* pattern, char const* str, int* outScore);
bool fuzzy_match_with_matches(char const* pattern, char const* str, int* outScore,
unsigned char* matches, int maxMatches);

203
macoyFuzzy.c

@ -0,0 +1,203 @@
#include <emacs-module.h>
#include <stdlib.h>
#include "fuzzy.h"
#include "utils.h"
int plugin_is_GPL_compatible;
// Note that for now this will affect the quality of the results if e.g. the best result is actually
// match #2049. I'll have to make this good eventually
#define MAX_MATCHES 2048
typedef struct MacoyFuzzyMatch
{
emacs_value string;
int score;
} MacoyFuzzyMatch;
// Return the value of the given element
typedef int (*quicksort_GetValueFunc)(void* value);
// Sorts in place (modifies array)
// Modified from https://rosettacode.org/wiki/Sorting_algorithms/Quicksort#C (GNU FDL license)
void quicksort(void** array, int length, quicksort_GetValueFunc getValue)
{
if (length < 2)
return;
int pivot = getValue(array[length / 2]);
int i, j;
for (i = 0, j = length - 1;; i++, j--)
{
while (getValue(array[i]) < pivot)
i++;
while (getValue(array[j]) > pivot)
j--;
if (i >= j)
break;
void* temp = array[i];
array[i] = array[j];
array[j] = temp;
}
quicksort(array, i, getValue);
quicksort(array + i, length - i, getValue);
}
// Sorts in place (modifies array)
// Modified from https://rosettacode.org/wiki/Sorting_algorithms/Quicksort#C (GNU FDL license)
void quicksortReverse(void** array, int length, quicksort_GetValueFunc getValue)
{
if (length < 2)
return;
int pivot = getValue(array[length / 2]);
int i, j;
for (i = 0, j = length - 1;; i++, j--)
{
while (getValue(array[i]) > pivot)
i++;
while (getValue(array[j]) < pivot)
j--;
if (i >= j)
break;
void* temp = array[i];
array[i] = array[j];
array[j] = temp;
}
quicksort(array, i, getValue);
quicksort(array + i, length - i, getValue);
}
int getFuzzyMatchValue(void* match)
{
return ((MacoyFuzzyMatch*)match)->score;
}
// Make sure to free the returned array
MacoyFuzzyMatch** sortFuzzyMatches(MacoyFuzzyMatch matches[], ptrdiff_t numMatches)
{
MacoyFuzzyMatch** sortedMatches = malloc(sizeof(MacoyFuzzyMatch*) * numMatches);
for (int i = 0; i < numMatches; ++i)
sortedMatches[i] = &matches[i];
quicksortReverse((void**)sortedMatches, numMatches, getFuzzyMatchValue);
return sortedMatches;
}
emacs_value makeListFromFuzzyMatches(emacs_env* env, MacoyFuzzyMatch** matches, ptrdiff_t numElems)
{
emacs_value* values = malloc(sizeof(emacs_value) * numElems);
for (int i = 0; i < numElems; ++i)
values[i] = matches[i]->string;
emacs_value listObject = env->funcall(env, env->intern(env, "list"), numElems, values);
free(values);
return listObject;
}
// Takes a query string and a vector of strings and returns a list of strings matching the query
// TODO: Make the values cached instead of having to copy the strings over every time!
static emacs_value FmacoyFuzzyFilterVector_fun(emacs_env* env, ptrdiff_t nargs, emacs_value args[],
void* data)
{
// Get the string arguments
char* queryBuffer = NULL;
size_t queryBufferSize = 0;
copy_string_contents(env, args[0], &queryBuffer, &queryBufferSize);
// TODO: Make this resizeable or come up with a way to toss out bad scores?
MacoyFuzzyMatch matches[MAX_MATCHES];
int numMatches = 0;
for (int i = 0; i < env->vec_size(env, args[1]); ++i)
{
emacs_value currentString = env->vec_get(env, args[1], i);
char* stringToCheckBuffer = NULL;
size_t stringToCheckBufferSize = 0;
copy_string_contents(env, currentString, &stringToCheckBuffer, &stringToCheckBufferSize);
int score = 0;
bool isMatch = fuzzy_match(queryBuffer, stringToCheckBuffer, &score);
free(stringToCheckBuffer);
// The string didn't match at all; we won't include it in our results
if (!isMatch)
continue;
// Add the value to our matches
if (numMatches + 1 < MAX_MATCHES)
{
MacoyFuzzyMatch* currentMatch = &matches[numMatches++];
currentMatch->string = currentString;
currentMatch->score = score;
}
// Reached max number of matches
else
break;
}
free(queryBuffer);
if (numMatches)
{
MacoyFuzzyMatch** sortedMatches = sortFuzzyMatches(matches, numMatches);
emacs_value matchesList = makeListFromFuzzyMatches(env, sortedMatches, numMatches);
free(sortedMatches);
return matchesList;
}
else
{
emacs_value emptyList[] = {};
return env->funcall(env, env->intern(env, "list"), 0, emptyList);
}
}
static emacs_value FmacoyFuzzyScore_fun(emacs_env* env, ptrdiff_t nargs, emacs_value args[],
void* data)
{
// Get the string arguments
char* queryBuffer = NULL;
size_t queryBufferSize = 0;
char* stringToCheckBuffer = NULL;
size_t stringToCheckBufferSize = 0;
copy_string_contents(env, args[0], &queryBuffer, &queryBufferSize);
copy_string_contents(env, args[1], &stringToCheckBuffer, &stringToCheckBufferSize);
int outScore = 0;
fuzzy_match(queryBuffer, stringToCheckBuffer, &outScore);
free(queryBuffer);
free(stringToCheckBuffer);
return env->make_integer(env, outScore);
}
int emacs_module_init(struct emacs_runtime* ert)
{
emacs_env* env = ert->get_environment(ert);
bind_function(env, "macoy-filter-list-fuzzy",
env->make_function(env, 2, 2, FmacoyFuzzyFilterVector_fun,
"Filter vector items by query and sort by score.", NULL));
bind_function(env, "macoy-filter-fuzzy-score",
env->make_function(
env, 2, 2, FmacoyFuzzyScore_fun,
"(query, string). Returns the score of the string based on query", NULL));
provide(env, "macoy-fuzzy");
return 0;
}
/* Integration
Follow flx-ido defadvice for ido to replace (esp ido-set-matches-1)
*/

81
macoyFuzzy.el

@ -0,0 +1,81 @@
;;; macoyFuzzy.el --- fuzzy module integration for ido
;; Copyright © 2018 Macoy Madson
;; Author: Macoy Madson
;; Maintainer: Macoy Madson
;; Description: Module fuzzy search integration for ido
;; Version: 0.1
;; URL: https://github.com/makuto/emacs-fuzzy-modulel
;; This file is NOT part of GNU Emacs.
;;; License
;; This program is free software; you can redistribute it and/or
;; modify it under the terms of the GNU General Public License as
;; published by the Free Software Foundation; either version 3, or
;; (at your option) any later version.
;;
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;; General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with this program; see the file COPYING. If not, write to
;; the Free Software Foundation, Inc., 51 Franklin Street, Fifth
;; Floor, Boston, MA 02110-1301, USA.
;;; Commentary:
;; This package provides a native alternative to `ido-mode''s
;; built-in flex matching.
;;; Acknowledgments
;; Forrest Smith wrote the fuzzy matching algorithm.
;; flx-ido.el is what I looked at to know how to integrate my module into ido.
;;; Installation:
;; You must have compiled your Emacs with --with-modules for it to support modules.
;; Add the following code to your init file:
;;
;; (require 'macoy-fuzzy-ido)
;; (ido-mode 1)
;; (ido-everywhere 1)
;; (macoy-fuzzy-ido-mode 1)
;;; Code:
;; TODO: Make relative
(module-load "/home/macoy/Development/code/repositories/emacs-fuzzy-module/macoyFuzzy.so")
(require 'macoy-fuzzy)
(define-minor-mode macoy-fuzzy-ido-mode
"Toggle Macoy fuzzy mode"
:init-value nil
:lighter "MacoyFuzzy"
:group 'ido
:global t)
(defun macoy-filter-list-fuzzy-ido (query items)
(if (zerop (length query))
items
(macoy-filter-list-fuzzy query (vconcat original-items))
)
)
(defadvice ido-set-matches-1 (around flx-ido-set-matches-1 activate compile)
"Choose between the regular ido-set-matches-1 and macoy-fuzzy-ido-match"
(if (not macoy-fuzzy-ido-mode)
ad-do-it
(let* ((query ido-text)
(original-items (ad-get-arg 0)))
(setq ad-return-value (macoy-filter-list-fuzzy-ido query original-items)))
))
(provide 'macoy-fuzzy-ido)
;;; macoyFuzzy.el ends here

36
macoyFuzzyTests.el

@ -0,0 +1,36 @@
;; Init
(module-load
"/home/macoy/Development/code/3rdParty/repositories/emacs/modules/macoyFuzzy/macoyFuzzy.so")
(require 'macoy-fuzzy)
;; Score
(message "%s" (macoy-filter-fuzzy-score "test" "blah-tsdfj-sfdasjes-st"))
;; Filter
(message "%s" (macoy-filter-list-fuzzy "test" ["blah-tsdfj-sfdasjes-st" "test" "nomatch" "whew"]))
;; Mode
(define-minor-mode macoy-fuzzy-ido-mode
"Toggle Macoy fuzzy mode"
:init-value nil
:lighter "MacoyFuzzy"
:group 'ido
:global t)
(defun macoy-filter-list-fuzzy-ido (query items)
(if (zerop (length query))
items
(macoy-filter-list-fuzzy query (vconcat original-items))
)
)
(defadvice ido-set-matches-1 (around flx-ido-set-matches-1 activate compile)
"Choose between the regular ido-set-matches-1 and macoy-fuzzy-ido-match"
(if (not macoy-fuzzy-ido-mode)
ad-do-it
(let* ((query ido-text)
(original-items (ad-get-arg 0)))
(setq ad-return-value (macoy-filter-list-fuzzy-ido query original-items)))
))
(macoy-fuzzy-ido-mode 1)

56
utils.c

@ -0,0 +1,56 @@
#include "utils.h"
#include <assert.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <emacs-module.h>
// From https://phst.github.io/emacs-modules#introduction
bool copy_string_contents(emacs_env* env, emacs_value value, char** buffer, size_t* size)
{
ptrdiff_t buffer_size;
if (!env->copy_string_contents(env, value, NULL, &buffer_size))
return false;
assert(env->non_local_exit_check(env) == emacs_funcall_exit_return);
assert(buffer_size > 0);
*buffer = malloc((size_t)buffer_size);
if (*buffer == NULL)
{
env->non_local_exit_signal(env, env->intern(env, "memory-full"), env->intern(env, "nil"));
return false;
}
ptrdiff_t old_buffer_size = buffer_size;
if (!env->copy_string_contents(env, value, *buffer, &buffer_size))
{
free(*buffer);
*buffer = NULL;
return false;
}
assert(env->non_local_exit_check(env) == emacs_funcall_exit_return);
assert(buffer_size == old_buffer_size);
*size = (size_t)(buffer_size - 1);
return true;
}
/* Bind NAME to FUN. */
void bind_function(emacs_env* env, const char* name, emacs_value Sfun)
{
emacs_value Qfset = env->intern(env, "fset");
emacs_value Qsym = env->intern(env, name);
emacs_value args[] = {Qsym, Sfun};
env->funcall(env, Qfset, 2, args);
}
/* Provide FEATURE to Emacs. */
void provide(emacs_env* env, const char* feature)
{
emacs_value Qfeat = env->intern(env, feature);
emacs_value Qprovide = env->intern(env, "provide");
emacs_value args[] = {Qfeat};
env->funcall(env, Qprovide, 1, args);
}

14
utils.h

@ -0,0 +1,14 @@
#pragma once
#include <stdbool.h>
#include <emacs-module.h>
// Free() the buffer once you're done with it
bool copy_string_contents(emacs_env* env, emacs_value value, char** buffer, size_t* size);
/* Bind NAME to FUN. */
void bind_function(emacs_env* env, const char* name, emacs_value Sfun);
/* Provide FEATURE to Emacs. */
void provide(emacs_env* env, const char* feature);
Loading…
Cancel
Save