summaryrefslogtreecommitdiff
path: root/extra/libtextcat/libtextcat-2.2-OOo.patch
diff options
context:
space:
mode:
Diffstat (limited to 'extra/libtextcat/libtextcat-2.2-OOo.patch')
-rw-r--r--extra/libtextcat/libtextcat-2.2-OOo.patch634
1 files changed, 0 insertions, 634 deletions
diff --git a/extra/libtextcat/libtextcat-2.2-OOo.patch b/extra/libtextcat/libtextcat-2.2-OOo.patch
deleted file mode 100644
index 70f9d8d23..000000000
--- a/extra/libtextcat/libtextcat-2.2-OOo.patch
+++ /dev/null
@@ -1,634 +0,0 @@
-diff -ruN libtextcat-2.2.part1/src/constants.h libtextcat-2.2/src/constants.h
---- libtextcat-2.2.part1/src/constants.h 2007-07-25 10:46:49.000000000 +0100
-+++ libtextcat-2.2/src/constants.h 2007-07-25 10:47:25.000000000 +0100
-@@ -39,6 +39,8 @@
- */
- #include <limits.h>
-
-+#define _UTF8_
-+
- #define DESCRIPTION "out of place"
-
- /* Reported matches are those fingerprints with a score less than best
-@@ -59,14 +61,21 @@
- /* Maximum number of n-grams in a fingerprint */
- #define MAXNGRAMS 400
-
--/* Maximum size of an n-gram? */
--#define MAXNGRAMSIZE 5
-+/* Maximum number of character of an n-gram? */
-+#define MAXNGRAMSYMBOL 5
-+
-+/* Maximum size of the string representing an n-gram (must be greater than number of symbol) */
-+#ifdef _UTF8_
-+#define MAXNGRAMSIZE 20
-+#else
-+#define MAXNGRAMSIZE MAXNGRAMSYMBOL
-+#endif
-
- /* Which characters are not acceptable in n-grams? */
- #define INVALID(c) (isspace((int)c) || isdigit((int)c))
-
- /* Minimum size (in characters) for accepting a document */
--#define MINDOCSIZE 25
-+#define MINDOCSIZE 6
-
- /* Maximum penalty for missing an n-gram in fingerprint */
- #define MAXOUTOFPLACE 400
-@@ -76,4 +85,7 @@
-
- #define MAXSCORE INT_MAX
-
-+/* where the fingerprints files are stored */
-+#define DEFAULT_FINGERPRINTS_PATH ""
-+
- #endif
-diff -ruN libtextcat-2.2.part1/src/fingerprint.c libtextcat-2.2/src/fingerprint.c
---- libtextcat-2.2.part1/src/fingerprint.c 2007-07-25 10:46:49.000000000 +0100
-+++ libtextcat-2.2/src/fingerprint.c 2007-07-25 10:47:25.000000000 +0100
-@@ -63,6 +63,10 @@
- * - put table/heap datastructure in a separate file.
- */
-
-+#ifndef _UTF8_
-+#define _UTF8_
-+#endif
-+
- #include "config.h"
- #include <stdio.h>
- #ifdef HAVE_STDLIB_H
-@@ -80,10 +84,12 @@
- #include "wg_mempool.h"
- #include "constants.h"
-
-+#include "utf8misc.h"
-
- #define TABLESIZE (1<<TABLEPOW)
- #define TABLEMASK ((TABLESIZE)-1)
-
-+
- typedef struct {
-
- sint2 rank;
-@@ -134,29 +140,14 @@
- }
-
-
--/* checks if n-gram lex is a prefix of key and of length len */
--inline int issame( char *lex, char *key, int len )
--{
-- int i;
-- for (i=0; i<len; i++) {
-- if ( key[i] != lex[i] ) {
-- return 0;
-- }
-- }
-- if ( lex[i] != 0 ) {
-- return 0;
-- }
-- return 1;
--}
--
-
- /* increases frequency of ngram(p,len) */
--static inline int increasefreq( table_t *t, char *p, int len )
--{
-- uint4 hash = simplehash( p, len ) & TABLEMASK;
-+static int increasefreq( table_t *t, char *p, int len )
-+{
-+ uint4 hash = simplehash( p, len ) & TABLEMASK;
- entry_t *entry = t->table[ hash ];
--
-- while ( entry ) {
-+
-+ while ( entry ) {
- if ( issame( entry->str, p, len ) ) {
- /*** Found it! ***/
- entry->cnt++;
-@@ -168,7 +159,7 @@
- }
-
- /*** Not found, so create ***/
-- entry = wgmempool_alloc( t->pool, sizeof(entry_t) );
-+ entry = (entry_t*)(wgmempool_alloc( t->pool, sizeof(entry_t) ));
- strcpy( entry->str, p );
- entry->cnt = 1;
-
-@@ -181,12 +172,12 @@
- #if 0
-
- /* looks up ngram(p,len) */
--static entry_t *findfreq( table_t *t, char *p, int len )
--{
-- uint4 hash = simplehash( p, len ) & TABLEMASK;
-+static entry_t *findfreq( table_t *t, char *p, int len )
-+{
-+ uint4 hash = simplehash( p, len ) & TABLEMASK;
- entry_t *entry = t->table[ hash ];
--
-- while ( entry ) {
-+
-+ while ( entry ) {
- if ( issame( entry->str, p, len ) ) {
- return entry;
- }
-@@ -219,7 +210,7 @@
- #define GREATER(x,y) ((x).cnt > (y).cnt)
- #define LESS(x,y) ((x).cnt < (y).cnt)
-
--inline static void siftup( table_t *t, unsigned int child )
-+static void siftup( table_t *t, unsigned int child )
- {
- entry_t *heap = t->heap;
- unsigned int parent = (child-1) >> 1;
-@@ -241,7 +232,7 @@
- }
-
-
--inline static void siftdown( table_t *t, unsigned int heapsize, uint4 parent )
-+static void siftdown( table_t *t, unsigned int heapsize, uint4 parent )
- {
- entry_t *heap = t->heap;
- unsigned int child = parent*2 + 1;
-@@ -458,21 +449,27 @@
- return dest;
- }
-
--
-+/**
-+* this function extract all n-gram from past buffer and put them into the table "t"
-+* [modified] by Jocelyn Merand to accept utf-8 multi-character symbols to be used in OpenOffice
-+*/
- static void createngramtable( table_t *t, const char *buf )
- {
- char n[MAXNGRAMSIZE+1];
- const char *p = buf;
- int i;
-+ int pointer = 0;
-
- /*** Get all n-grams where 1<=n<=MAXNGRAMSIZE. Allow underscores only at borders. ***/
-- for (;;p++) {
-+ while(1) {
-
-- const char *q = p;
-+ const char *q = &p[pointer]; /*[modified] previously p++ above (for(;;p++)) now, it's pointer wich is increased so we have to get the new pointer on the buffer*/
- char *m = n;
-
- /*** First char may be an underscore ***/
-- *m++ = *q++;
-+ int decay = charcopy(q, m); /*[modified] previously *q++ = *m++*/
-+ q = &(p[pointer+decay]); /*[modified] the old copying method do not manage multi-character symbols*/
-+ m += decay; /*[modified]*/
- *m = '\0';
-
- increasefreq( t, n, 1 );
-@@ -482,19 +479,22 @@
- }
-
- /*** Let the compiler unroll this ***/
-- for ( i=2; i<=MAXNGRAMSIZE; i++) {
-+ for ( i=2; i<=MAXNGRAMSYMBOL; i++) {
-
-- *m++ = *q;
-+ decay = charcopy(q, m); /*[modified] like above*/
-+ m += decay;
- *m = '\0';
-
- increasefreq( t, n, i );
-
- if ( *q == '_' ) break;
-- q++;
-+ q += decay;
- if ( *q == '\0' ) {
- return;
- }
- }
-+
-+ pointer = nextcharstart(p,pointer); /*[modified] p[pointer] must point on the next start of symbol, but whith utf next start is not surely next char*/
- }
- return;
- }
-diff -ruN libtextcat-2.2.part1/src/fingerprint.h.orig libtextcat-2.2/src/fingerprint.h.orig
---- libtextcat-2.2.part1/src/fingerprint.h.orig 1970-01-01 01:00:00.000000000 +0100
-+++ libtextcat-2.2/src/fingerprint.h.orig 2007-07-25 10:47:22.000000000 +0100
-@@ -0,0 +1,55 @@
-+#ifndef _FINGERPRINT_H_
-+#define _FINGERPRINT_H_
-+/*
-+ * Copyright (C) 2003 WiseGuys Internet B.V.
-+ *
-+ * THE BSD LICENSE
-+ *
-+ * Redistribution and use in source and binary forms, with or without
-+ * modification, are permitted provided that the following conditions
-+ * are met:
-+ *
-+ * - Redistributions of source code must retain the above copyright
-+ * notice, this list of conditions and the following disclaimer.
-+ *
-+ * - Redistributions in binary form must reproduce the above copyright
-+ * notice, this list of conditions and the following disclaimer in the
-+ * documentation and/or other materials provided with the
-+ * distribution.
-+ *
-+ * - Neither the name of the WiseGuys Internet B.V. nor the names of
-+ * its contributors may be used to endorse or promote products derived
-+ * from this software without specific prior written permission.
-+ *
-+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-+ */
-+#include "common.h"
-+
-+#ifdef __cplusplus
-+extern "C" {
-+#endif
-+
-+extern void *fp_Init(const char *name);
-+extern void fp_Done( void *handle );
-+extern int fp_Create( void *handle, const char *buffer, uint4 bufsize, uint4 maxngrams );
-+extern int fp_Read( void *handle, const char *fname, int maxngrams );
-+extern sint4 fp_Compare( void *cat, void *unknown, int cutoff );
-+extern void fp_Show( void *handle );
-+extern const char *fp_Name( void *handle );
-+extern void fp_Print( void *handle, FILE *fp );
-+
-+#ifdef __cplusplus
-+}
-+#endif
-+
-+#endif
-diff -ruN libtextcat-2.2.part1/src/textcat.c libtextcat-2.2/src/textcat.c
---- libtextcat-2.2.part1/src/textcat.c 2007-07-25 10:46:49.000000000 +0100
-+++ libtextcat-2.2/src/textcat.c 2007-07-25 10:47:25.000000000 +0100
-@@ -74,6 +74,7 @@
- typedef struct {
-
- void **fprint;
-+ char *fprint_disable;
- uint4 size;
- uint4 maxsize;
-
-@@ -112,11 +113,21 @@
- fp_Done( h->fprint[i] );
- }
- wg_free( h->fprint );
-+ wg_free( h->fprint_disable );
- wg_free( h );
-
- }
-
--extern void *textcat_Init( const char *conffile )
-+/** Replaces older function */
-+extern void *textcat_Init( const char *conffile ){
-+ return special_textcat_Init( conffile, DEFAULT_FINGERPRINTS_PATH );
-+}
-+
-+/**
-+ * Originaly this function had only one parameter (conffile) it has been modified since OOo use
-+ * Basicaly prefix is the directory path where fingerprints are stored
-+ */
-+extern void *special_textcat_Init( const char *conffile, const char *prefix )
- {
- textcat_t *h;
- char line[1024];
-@@ -134,11 +145,13 @@
- h->size = 0;
- h->maxsize = 16;
- h->fprint = (void **)wg_malloc( sizeof(void*) * h->maxsize );
-+ h->fprint_disable = (char *)wg_malloc( sizeof(char*) * h->maxsize ); /*added to store the state of languages*/
-
- while ( wg_getline( line, 1024, fp ) ) {
- char *p;
- char *segment[4];
-- int res;
-+ char finger_print_file_name[512];
-+ int res;
-
- /*** Skip comments ***/
- #ifdef HAVE_STRCHR
-@@ -156,17 +169,23 @@
- /*** Ensure enough space ***/
- if ( h->size == h->maxsize ) {
- h->maxsize *= 2;
-- h->fprint = (void *)wg_realloc( h->fprint, sizeof(void*) * h->maxsize );
-+ h->fprint = (void **)wg_realloc( h->fprint, sizeof(void*) * h->maxsize );
-+ h->fprint_disable = (char *)wg_realloc( h->fprint_disable, sizeof(char*) * h->maxsize );
- }
-
- /*** Load data ***/
- if ((h->fprint[ h->size ] = fp_Init( segment[1] ))==NULL) {
- goto ERROR;
- }
-- if ( fp_Read( h->fprint[h->size], segment[0], 400 ) == 0 ) {
-+ finger_print_file_name[0] = '\0';
-+ strcat(finger_print_file_name, prefix);
-+ strcat(finger_print_file_name, segment[0]);
-+
-+ if ( fp_Read( h->fprint[h->size], finger_print_file_name, 400 ) == 0 ) {
- textcat_Done(h);
- goto ERROR;
-- }
-+ }
-+ h->fprint_disable[h->size] = 0xF0; /*0xF0 is the code for enabled languages, 0x0F is for disabled*/
- h->size++;
- }
-
-@@ -203,11 +222,18 @@
- result = _TEXTCAT_RESULT_SHORT;
- goto READY;
- }
--
-+
- /*** Calculate the score for each category. ***/
- for (i=0; i<h->size; i++) {
-- int score = fp_Compare( h->fprint[i], unknown, threshold );
-- candidates[i].score = score;
-+ int score;
-+ if(h->fprint_disable[i] & 0x0F){ /*if this language is disabled*/
-+ score = MAXSCORE;
-+ }
-+ else{
-+ score = fp_Compare( h->fprint[i], unknown, threshold );
-+ /*printf("Score for %s : %i\n", fp_Name(h->fprint[i]), score);*/
-+ }
-+ candidates[i].score = score;
- candidates[i].name = fp_Name( h->fprint[i] );
- if ( score < minscore ) {
- minscore = score;
-diff -ruN libtextcat-2.2.part1/src/textcat.h libtextcat-2.2/src/textcat.h
---- libtextcat-2.2.part1/src/textcat.h 2007-07-25 10:46:49.000000000 +0100
-+++ libtextcat-2.2/src/textcat.h 2007-07-25 10:48:18.000000000 +0100
-@@ -55,10 +54,19 @@
- * Returns: handle on success, NULL on error. (At the moment, the
- * only way errors can occur, is when the library cannot read the
- * conffile, or one of the fingerprint files listed in it.)
-+ *
-+ * Replace older function (and has exacly the same behaviour)
-+ * see below
- */
- extern void *textcat_Init( const char *conffile );
-
- /**
-+ * Originaly this function had only one parameter (conffile) it has been modified since OOo must be able to load alternativ DB
-+ * Basicaly prefix is the directory path where fingerprints are stored
-+ */
-+extern void *special_textcat_Init( const char *conffile, const char *prefix );
-+
-+/**
- * textcat_Done() - Free up resources for handle
- */
- extern void textcat_Done( void *handle );
-diff -ruN libtextcat-2.2.part1/src/utf8misc.c libtextcat-2.2/src/utf8misc.c
---- libtextcat-2.2.part1/src/utf8misc.c 1970-01-01 01:00:00.000000000 +0100
-+++ libtextcat-2.2/src/utf8misc.c 2007-07-25 10:48:57.000000000 +0100
-@@ -0,0 +1,132 @@
-+/***************************************************************************
-+ * Copyright (C) 2006 by Jocelyn Merand *
-+ * joc.mer@gmail.com *
-+ * *
-+ * THE BSD LICENSE
-+ *
-+ * Redistribution and use in source and binary forms, with or without
-+ * modification, are permitted provided that the following conditions
-+ * are met:
-+ *
-+ * - Redistributions of source code must retain the above copyright
-+ * notice, this list of conditions and the following disclaimer.
-+ *
-+ * - Redistributions in binary form must reproduce the above copyright
-+ * notice, this list of conditions and the following disclaimer in the
-+ * documentation and/or other materials provided with the
-+ * distribution.
-+ *
-+ * - Neither the name of the WiseGuys Internet B.V. nor the names of
-+ * its contributors may be used to endorse or promote products derived
-+ * from this software without specific prior written permission.
-+ *
-+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-+ ***************************************************************************/
-+
-+#ifndef _UTF8_MISC_H_
-+#include "utf8misc.h"
-+#endif
-+
-+
-+int nextcharstart(const char *str, int position){
-+ int pointer = position;
-+
-+ if(str[pointer] & ESCAPE_MASK){ /*if the first bit of the current char is 1*/
-+
-+ /*then str[pointer] is an escape character*/
-+
-+ char escape_char = ((str[pointer] & WEIGHT_MASK) << 1); /*and we use it to count (by bit translation) following characters (only the weightest part)*/
-+
-+ while(escape_char & ESCAPE_MASK && str[pointer]){/*every step, we move the byte of 1 bit left, when first bit is 0, it's finished*/
-+ escape_char = escape_char <<1;
-+ ++pointer;
-+ }
-+ }
-+ if(str[pointer]){ /*finaly, if we are not on the \0 character, we jump to the next character*/
-+ ++pointer;
-+ }
-+ return pointer;
-+}
-+
-+
-+int charcopy(const char *str, char *dest){
-+
-+ int pointer = 0;
-+ if(str[pointer] & ESCAPE_MASK){ /*if the first bit of the current char is 1*/
-+
-+ /*then str[pointer] is an escape character*/
-+
-+ char escape_char = ((str[pointer] & WEIGHT_MASK) << 1); /*and we use it to count following characters (only the weightest part)*/
-+
-+ while(escape_char & ESCAPE_MASK && str[pointer]){ /*every step, we move the byte of 1 bit left, when first bit is 0, it's finished*/
-+ dest[pointer] = str[pointer];
-+ escape_char = escape_char <<1;
-+ ++pointer;
-+ }
-+ }
-+ if(str[pointer]){
-+ dest[pointer] = str[pointer];
-+ ++pointer;
-+ }
-+
-+ return pointer;
-+}
-+
-+
-+int issame( char *lex, char *key, int len )
-+{
-+ /*printf("[%s] prefix of [%s] with length %i", lex, key, len);*/
-+ int char_counter = 0;
-+ int pointer = 0;
-+ while(char_counter < len) {
-+
-+ if(key[pointer] & ESCAPE_MASK){ /*if the first bit of the current char is 1*/
-+
-+ /*then key[pointer] is an escap character*/
-+
-+ char escape_char = ((key[pointer] & WEIGHT_MASK) << 1); /*and we use it to count (only the weightest part)*/
-+
-+ while(escape_char & ESCAPE_MASK && key[pointer] == lex[pointer] ){
-+ escape_char = escape_char <<1;
-+ ++pointer;
-+ }
-+ }
-+ ++char_counter; /*and we are on a new utf8 character*/
-+ if ( key[pointer] != lex[pointer] ) {
-+ return 0;
-+ /*printf(" NO\n", lex, key, len);*/
-+ }
-+ ++pointer;
-+ }
-+ if ( lex[pointer] != '\0' ) {
-+ return 0;
-+ /*printf(" NO\n");*/
-+ }
-+
-+ /*printf(" YES\n");*/
-+
-+ return 1;
-+}
-+
-+
-+extern int utfstrlen(const char* str){
-+ int char_counter = 0;
-+ int pointer = 0;
-+ while(str[pointer]) {
-+ pointer = nextcharstart(str, pointer);
-+
-+ ++char_counter; /*and we are on a new utf8 character*/
-+ }
-+ return char_counter;
-+}
-+
-diff -ruN libtextcat-2.2.part1/src/utf8misc.h libtextcat-2.2/src/utf8misc.h
---- libtextcat-2.2.part1/src/utf8misc.h 1970-01-01 01:00:00.000000000 +0100
-+++ libtextcat-2.2/src/utf8misc.h 2007-07-25 10:48:57.000000000 +0100
-@@ -0,0 +1,88 @@
-+/***************************************************************************
-+ * Copyright (C) 2006 by Jocelyn Merand *
-+ * joc.mer@gmail.com *
-+ * *
-+ * THE BSD LICENSE
-+ *
-+ * Redistribution and use in source and binary forms, with or without
-+ * modification, are permitted provided that the following conditions
-+ * are met:
-+ *
-+ * - Redistributions of source code must retain the above copyright
-+ * notice, this list of conditions and the following disclaimer.
-+ *
-+ * - Redistributions in binary form must reproduce the above copyright
-+ * notice, this list of conditions and the following disclaimer in the
-+ * documentation and/or other materials provided with the
-+ * distribution.
-+ *
-+ * - Neither the name of the WiseGuys Internet B.V. nor the names of
-+ * its contributors may be used to endorse or promote products derived
-+ * from this software without specific prior written permission.
-+ *
-+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-+ ***************************************************************************/
-+
-+#ifndef _UTF8_MISC_H_
-+#define _UTF8_MISC_H_
-+
-+/**
-+ * These variables are used in character processing functions
-+ * These have been added to manage utf-8 symbols, particularly escape chars
-+ */
-+#ifdef _UTF8_
-+#define ESCAPE_MASK 0x80
-+#define WEIGHT_MASK 0xF0
-+#else
-+#define ESCAPE_MASK 0xFF
-+#define WEIGHT_MASK 0x00
-+#endif
-+
-+
-+/*
-+ * Is used to jump to the next start of char
-+ * of course it's only usefull when encoding is utf-8
-+ * This function have been added by Jocelyn Merand to use libtextcat in OOo
-+ */
-+int nextcharstart(const char *str, int position);
-+
-+
-+/*Copy the char in str to dest
-+ * of course it's only usefull when encoding is utf8 and the symbol is encoded with more than 1 char
-+ * return the number of char jumped
-+ * This function have been added by Jocelyn Merand to use libtextcat in OOo
-+ */
-+int charcopy(const char *str, char *dest);
-+
-+
-+/* checks if n-gram lex is a prefix of key and of length len
-+* if _UTF8_ is defined, it uses escap characters and len is not realy the length of lex
-+* in this case, len is the number of utf-8 char strlen("€") == 3 but len == 1
-+*/
-+int issame( char *lex, char *key, int len );
-+
-+
-+/* Counts the number of characters
-+* if _UTF8_ is defined, it uses escap characters and the result is not realy the length of str
-+* in this case, the result is the number of utf-8 char strlen("€") == 3 but utfstrlen("€") == 1
-+*/
-+#ifdef __cplusplus
-+extern "C" {
-+#endif
-+extern int utfstrlen(const char* str);
-+#ifdef __cplusplus
-+}
-+#endif
-+
-+#endif
-+
---- libtextcat-2.2.part2/src/Makefile.am 2007-07-25 10:55:02.000000000 +0100
-+++ libtextcat-2.2/src/Makefile.am 2007-07-25 10:55:52.000000000 +0100
-@@ -12,11 +12,11 @@
-
- libtextcat_includedir = $(includedir)/libtextcat
- libtextcat_include_HEADERS = \
-- common.h constants.h fingerprint.h textcat.h
-+ common.h constants.h fingerprint.h textcat.h utf8misc.h
-
- lib_LTLIBRARIES = libtextcat.la
- libtextcat_la_SOURCES = \
-- common.c fingerprint.c textcat.c wg_mempool.c
-+ common.c fingerprint.c textcat.c wg_mempool.c utf8misc.c
-
- bin_PROGRAMS = createfp
- createfp_SOURCES = createfp.c