diff options
Diffstat (limited to 'extra/libtextcat/libtextcat-2.2-OOo.patch')
-rw-r--r-- | extra/libtextcat/libtextcat-2.2-OOo.patch | 634 |
1 files changed, 0 insertions, 634 deletions
diff --git a/extra/libtextcat/libtextcat-2.2-OOo.patch b/extra/libtextcat/libtextcat-2.2-OOo.patch deleted file mode 100644 index 70f9d8d23..000000000 --- a/extra/libtextcat/libtextcat-2.2-OOo.patch +++ /dev/null @@ -1,634 +0,0 @@ -diff -ruN libtextcat-2.2.part1/src/constants.h libtextcat-2.2/src/constants.h ---- libtextcat-2.2.part1/src/constants.h 2007-07-25 10:46:49.000000000 +0100 -+++ libtextcat-2.2/src/constants.h 2007-07-25 10:47:25.000000000 +0100 -@@ -39,6 +39,8 @@ - */ - #include <limits.h> - -+#define _UTF8_ -+ - #define DESCRIPTION "out of place" - - /* Reported matches are those fingerprints with a score less than best -@@ -59,14 +61,21 @@ - /* Maximum number of n-grams in a fingerprint */ - #define MAXNGRAMS 400 - --/* Maximum size of an n-gram? */ --#define MAXNGRAMSIZE 5 -+/* Maximum number of character of an n-gram? */ -+#define MAXNGRAMSYMBOL 5 -+ -+/* Maximum size of the string representing an n-gram (must be greater than number of symbol) */ -+#ifdef _UTF8_ -+#define MAXNGRAMSIZE 20 -+#else -+#define MAXNGRAMSIZE MAXNGRAMSYMBOL -+#endif - - /* Which characters are not acceptable in n-grams? */ - #define INVALID(c) (isspace((int)c) || isdigit((int)c)) - - /* Minimum size (in characters) for accepting a document */ --#define MINDOCSIZE 25 -+#define MINDOCSIZE 6 - - /* Maximum penalty for missing an n-gram in fingerprint */ - #define MAXOUTOFPLACE 400 -@@ -76,4 +85,7 @@ - - #define MAXSCORE INT_MAX - -+/* where the fingerprints files are stored */ -+#define DEFAULT_FINGERPRINTS_PATH "" -+ - #endif -diff -ruN libtextcat-2.2.part1/src/fingerprint.c libtextcat-2.2/src/fingerprint.c ---- libtextcat-2.2.part1/src/fingerprint.c 2007-07-25 10:46:49.000000000 +0100 -+++ libtextcat-2.2/src/fingerprint.c 2007-07-25 10:47:25.000000000 +0100 -@@ -63,6 +63,10 @@ - * - put table/heap datastructure in a separate file. - */ - -+#ifndef _UTF8_ -+#define _UTF8_ -+#endif -+ - #include "config.h" - #include <stdio.h> - #ifdef HAVE_STDLIB_H -@@ -80,10 +84,12 @@ - #include "wg_mempool.h" - #include "constants.h" - -+#include "utf8misc.h" - - #define TABLESIZE (1<<TABLEPOW) - #define TABLEMASK ((TABLESIZE)-1) - -+ - typedef struct { - - sint2 rank; -@@ -134,29 +140,14 @@ - } - - --/* checks if n-gram lex is a prefix of key and of length len */ --inline int issame( char *lex, char *key, int len ) --{ -- int i; -- for (i=0; i<len; i++) { -- if ( key[i] != lex[i] ) { -- return 0; -- } -- } -- if ( lex[i] != 0 ) { -- return 0; -- } -- return 1; --} -- - - /* increases frequency of ngram(p,len) */ --static inline int increasefreq( table_t *t, char *p, int len ) --{ -- uint4 hash = simplehash( p, len ) & TABLEMASK; -+static int increasefreq( table_t *t, char *p, int len ) -+{ -+ uint4 hash = simplehash( p, len ) & TABLEMASK; - entry_t *entry = t->table[ hash ]; -- -- while ( entry ) { -+ -+ while ( entry ) { - if ( issame( entry->str, p, len ) ) { - /*** Found it! ***/ - entry->cnt++; -@@ -168,7 +159,7 @@ - } - - /*** Not found, so create ***/ -- entry = wgmempool_alloc( t->pool, sizeof(entry_t) ); -+ entry = (entry_t*)(wgmempool_alloc( t->pool, sizeof(entry_t) )); - strcpy( entry->str, p ); - entry->cnt = 1; - -@@ -181,12 +172,12 @@ - #if 0 - - /* looks up ngram(p,len) */ --static entry_t *findfreq( table_t *t, char *p, int len ) --{ -- uint4 hash = simplehash( p, len ) & TABLEMASK; -+static entry_t *findfreq( table_t *t, char *p, int len ) -+{ -+ uint4 hash = simplehash( p, len ) & TABLEMASK; - entry_t *entry = t->table[ hash ]; -- -- while ( entry ) { -+ -+ while ( entry ) { - if ( issame( entry->str, p, len ) ) { - return entry; - } -@@ -219,7 +210,7 @@ - #define GREATER(x,y) ((x).cnt > (y).cnt) - #define LESS(x,y) ((x).cnt < (y).cnt) - --inline static void siftup( table_t *t, unsigned int child ) -+static void siftup( table_t *t, unsigned int child ) - { - entry_t *heap = t->heap; - unsigned int parent = (child-1) >> 1; -@@ -241,7 +232,7 @@ - } - - --inline static void siftdown( table_t *t, unsigned int heapsize, uint4 parent ) -+static void siftdown( table_t *t, unsigned int heapsize, uint4 parent ) - { - entry_t *heap = t->heap; - unsigned int child = parent*2 + 1; -@@ -458,21 +449,27 @@ - return dest; - } - -- -+/** -+* this function extract all n-gram from past buffer and put them into the table "t" -+* [modified] by Jocelyn Merand to accept utf-8 multi-character symbols to be used in OpenOffice -+*/ - static void createngramtable( table_t *t, const char *buf ) - { - char n[MAXNGRAMSIZE+1]; - const char *p = buf; - int i; -+ int pointer = 0; - - /*** Get all n-grams where 1<=n<=MAXNGRAMSIZE. Allow underscores only at borders. ***/ -- for (;;p++) { -+ while(1) { - -- const char *q = p; -+ const char *q = &p[pointer]; /*[modified] previously p++ above (for(;;p++)) now, it's pointer wich is increased so we have to get the new pointer on the buffer*/ - char *m = n; - - /*** First char may be an underscore ***/ -- *m++ = *q++; -+ int decay = charcopy(q, m); /*[modified] previously *q++ = *m++*/ -+ q = &(p[pointer+decay]); /*[modified] the old copying method do not manage multi-character symbols*/ -+ m += decay; /*[modified]*/ - *m = '\0'; - - increasefreq( t, n, 1 ); -@@ -482,19 +479,22 @@ - } - - /*** Let the compiler unroll this ***/ -- for ( i=2; i<=MAXNGRAMSIZE; i++) { -+ for ( i=2; i<=MAXNGRAMSYMBOL; i++) { - -- *m++ = *q; -+ decay = charcopy(q, m); /*[modified] like above*/ -+ m += decay; - *m = '\0'; - - increasefreq( t, n, i ); - - if ( *q == '_' ) break; -- q++; -+ q += decay; - if ( *q == '\0' ) { - return; - } - } -+ -+ pointer = nextcharstart(p,pointer); /*[modified] p[pointer] must point on the next start of symbol, but whith utf next start is not surely next char*/ - } - return; - } -diff -ruN libtextcat-2.2.part1/src/fingerprint.h.orig libtextcat-2.2/src/fingerprint.h.orig ---- libtextcat-2.2.part1/src/fingerprint.h.orig 1970-01-01 01:00:00.000000000 +0100 -+++ libtextcat-2.2/src/fingerprint.h.orig 2007-07-25 10:47:22.000000000 +0100 -@@ -0,0 +1,55 @@ -+#ifndef _FINGERPRINT_H_ -+#define _FINGERPRINT_H_ -+/* -+ * Copyright (C) 2003 WiseGuys Internet B.V. -+ * -+ * THE BSD LICENSE -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * -+ * - Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * -+ * - Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the -+ * distribution. -+ * -+ * - Neither the name of the WiseGuys Internet B.V. nor the names of -+ * its contributors may be used to endorse or promote products derived -+ * from this software without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ */ -+#include "common.h" -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+extern void *fp_Init(const char *name); -+extern void fp_Done( void *handle ); -+extern int fp_Create( void *handle, const char *buffer, uint4 bufsize, uint4 maxngrams ); -+extern int fp_Read( void *handle, const char *fname, int maxngrams ); -+extern sint4 fp_Compare( void *cat, void *unknown, int cutoff ); -+extern void fp_Show( void *handle ); -+extern const char *fp_Name( void *handle ); -+extern void fp_Print( void *handle, FILE *fp ); -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif -diff -ruN libtextcat-2.2.part1/src/textcat.c libtextcat-2.2/src/textcat.c ---- libtextcat-2.2.part1/src/textcat.c 2007-07-25 10:46:49.000000000 +0100 -+++ libtextcat-2.2/src/textcat.c 2007-07-25 10:47:25.000000000 +0100 -@@ -74,6 +74,7 @@ - typedef struct { - - void **fprint; -+ char *fprint_disable; - uint4 size; - uint4 maxsize; - -@@ -112,11 +113,21 @@ - fp_Done( h->fprint[i] ); - } - wg_free( h->fprint ); -+ wg_free( h->fprint_disable ); - wg_free( h ); - - } - --extern void *textcat_Init( const char *conffile ) -+/** Replaces older function */ -+extern void *textcat_Init( const char *conffile ){ -+ return special_textcat_Init( conffile, DEFAULT_FINGERPRINTS_PATH ); -+} -+ -+/** -+ * Originaly this function had only one parameter (conffile) it has been modified since OOo use -+ * Basicaly prefix is the directory path where fingerprints are stored -+ */ -+extern void *special_textcat_Init( const char *conffile, const char *prefix ) - { - textcat_t *h; - char line[1024]; -@@ -134,11 +145,13 @@ - h->size = 0; - h->maxsize = 16; - h->fprint = (void **)wg_malloc( sizeof(void*) * h->maxsize ); -+ h->fprint_disable = (char *)wg_malloc( sizeof(char*) * h->maxsize ); /*added to store the state of languages*/ - - while ( wg_getline( line, 1024, fp ) ) { - char *p; - char *segment[4]; -- int res; -+ char finger_print_file_name[512]; -+ int res; - - /*** Skip comments ***/ - #ifdef HAVE_STRCHR -@@ -156,17 +169,23 @@ - /*** Ensure enough space ***/ - if ( h->size == h->maxsize ) { - h->maxsize *= 2; -- h->fprint = (void *)wg_realloc( h->fprint, sizeof(void*) * h->maxsize ); -+ h->fprint = (void **)wg_realloc( h->fprint, sizeof(void*) * h->maxsize ); -+ h->fprint_disable = (char *)wg_realloc( h->fprint_disable, sizeof(char*) * h->maxsize ); - } - - /*** Load data ***/ - if ((h->fprint[ h->size ] = fp_Init( segment[1] ))==NULL) { - goto ERROR; - } -- if ( fp_Read( h->fprint[h->size], segment[0], 400 ) == 0 ) { -+ finger_print_file_name[0] = '\0'; -+ strcat(finger_print_file_name, prefix); -+ strcat(finger_print_file_name, segment[0]); -+ -+ if ( fp_Read( h->fprint[h->size], finger_print_file_name, 400 ) == 0 ) { - textcat_Done(h); - goto ERROR; -- } -+ } -+ h->fprint_disable[h->size] = 0xF0; /*0xF0 is the code for enabled languages, 0x0F is for disabled*/ - h->size++; - } - -@@ -203,11 +222,18 @@ - result = _TEXTCAT_RESULT_SHORT; - goto READY; - } -- -+ - /*** Calculate the score for each category. ***/ - for (i=0; i<h->size; i++) { -- int score = fp_Compare( h->fprint[i], unknown, threshold ); -- candidates[i].score = score; -+ int score; -+ if(h->fprint_disable[i] & 0x0F){ /*if this language is disabled*/ -+ score = MAXSCORE; -+ } -+ else{ -+ score = fp_Compare( h->fprint[i], unknown, threshold ); -+ /*printf("Score for %s : %i\n", fp_Name(h->fprint[i]), score);*/ -+ } -+ candidates[i].score = score; - candidates[i].name = fp_Name( h->fprint[i] ); - if ( score < minscore ) { - minscore = score; -diff -ruN libtextcat-2.2.part1/src/textcat.h libtextcat-2.2/src/textcat.h ---- libtextcat-2.2.part1/src/textcat.h 2007-07-25 10:46:49.000000000 +0100 -+++ libtextcat-2.2/src/textcat.h 2007-07-25 10:48:18.000000000 +0100 -@@ -55,10 +54,19 @@ - * Returns: handle on success, NULL on error. (At the moment, the - * only way errors can occur, is when the library cannot read the - * conffile, or one of the fingerprint files listed in it.) -+ * -+ * Replace older function (and has exacly the same behaviour) -+ * see below - */ - extern void *textcat_Init( const char *conffile ); - - /** -+ * Originaly this function had only one parameter (conffile) it has been modified since OOo must be able to load alternativ DB -+ * Basicaly prefix is the directory path where fingerprints are stored -+ */ -+extern void *special_textcat_Init( const char *conffile, const char *prefix ); -+ -+/** - * textcat_Done() - Free up resources for handle - */ - extern void textcat_Done( void *handle ); -diff -ruN libtextcat-2.2.part1/src/utf8misc.c libtextcat-2.2/src/utf8misc.c ---- libtextcat-2.2.part1/src/utf8misc.c 1970-01-01 01:00:00.000000000 +0100 -+++ libtextcat-2.2/src/utf8misc.c 2007-07-25 10:48:57.000000000 +0100 -@@ -0,0 +1,132 @@ -+/*************************************************************************** -+ * Copyright (C) 2006 by Jocelyn Merand * -+ * joc.mer@gmail.com * -+ * * -+ * THE BSD LICENSE -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * -+ * - Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * -+ * - Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the -+ * distribution. -+ * -+ * - Neither the name of the WiseGuys Internet B.V. nor the names of -+ * its contributors may be used to endorse or promote products derived -+ * from this software without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ ***************************************************************************/ -+ -+#ifndef _UTF8_MISC_H_ -+#include "utf8misc.h" -+#endif -+ -+ -+int nextcharstart(const char *str, int position){ -+ int pointer = position; -+ -+ if(str[pointer] & ESCAPE_MASK){ /*if the first bit of the current char is 1*/ -+ -+ /*then str[pointer] is an escape character*/ -+ -+ char escape_char = ((str[pointer] & WEIGHT_MASK) << 1); /*and we use it to count (by bit translation) following characters (only the weightest part)*/ -+ -+ while(escape_char & ESCAPE_MASK && str[pointer]){/*every step, we move the byte of 1 bit left, when first bit is 0, it's finished*/ -+ escape_char = escape_char <<1; -+ ++pointer; -+ } -+ } -+ if(str[pointer]){ /*finaly, if we are not on the \0 character, we jump to the next character*/ -+ ++pointer; -+ } -+ return pointer; -+} -+ -+ -+int charcopy(const char *str, char *dest){ -+ -+ int pointer = 0; -+ if(str[pointer] & ESCAPE_MASK){ /*if the first bit of the current char is 1*/ -+ -+ /*then str[pointer] is an escape character*/ -+ -+ char escape_char = ((str[pointer] & WEIGHT_MASK) << 1); /*and we use it to count following characters (only the weightest part)*/ -+ -+ while(escape_char & ESCAPE_MASK && str[pointer]){ /*every step, we move the byte of 1 bit left, when first bit is 0, it's finished*/ -+ dest[pointer] = str[pointer]; -+ escape_char = escape_char <<1; -+ ++pointer; -+ } -+ } -+ if(str[pointer]){ -+ dest[pointer] = str[pointer]; -+ ++pointer; -+ } -+ -+ return pointer; -+} -+ -+ -+int issame( char *lex, char *key, int len ) -+{ -+ /*printf("[%s] prefix of [%s] with length %i", lex, key, len);*/ -+ int char_counter = 0; -+ int pointer = 0; -+ while(char_counter < len) { -+ -+ if(key[pointer] & ESCAPE_MASK){ /*if the first bit of the current char is 1*/ -+ -+ /*then key[pointer] is an escap character*/ -+ -+ char escape_char = ((key[pointer] & WEIGHT_MASK) << 1); /*and we use it to count (only the weightest part)*/ -+ -+ while(escape_char & ESCAPE_MASK && key[pointer] == lex[pointer] ){ -+ escape_char = escape_char <<1; -+ ++pointer; -+ } -+ } -+ ++char_counter; /*and we are on a new utf8 character*/ -+ if ( key[pointer] != lex[pointer] ) { -+ return 0; -+ /*printf(" NO\n", lex, key, len);*/ -+ } -+ ++pointer; -+ } -+ if ( lex[pointer] != '\0' ) { -+ return 0; -+ /*printf(" NO\n");*/ -+ } -+ -+ /*printf(" YES\n");*/ -+ -+ return 1; -+} -+ -+ -+extern int utfstrlen(const char* str){ -+ int char_counter = 0; -+ int pointer = 0; -+ while(str[pointer]) { -+ pointer = nextcharstart(str, pointer); -+ -+ ++char_counter; /*and we are on a new utf8 character*/ -+ } -+ return char_counter; -+} -+ -diff -ruN libtextcat-2.2.part1/src/utf8misc.h libtextcat-2.2/src/utf8misc.h ---- libtextcat-2.2.part1/src/utf8misc.h 1970-01-01 01:00:00.000000000 +0100 -+++ libtextcat-2.2/src/utf8misc.h 2007-07-25 10:48:57.000000000 +0100 -@@ -0,0 +1,88 @@ -+/*************************************************************************** -+ * Copyright (C) 2006 by Jocelyn Merand * -+ * joc.mer@gmail.com * -+ * * -+ * THE BSD LICENSE -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * -+ * - Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * -+ * - Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the -+ * distribution. -+ * -+ * - Neither the name of the WiseGuys Internet B.V. nor the names of -+ * its contributors may be used to endorse or promote products derived -+ * from this software without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ ***************************************************************************/ -+ -+#ifndef _UTF8_MISC_H_ -+#define _UTF8_MISC_H_ -+ -+/** -+ * These variables are used in character processing functions -+ * These have been added to manage utf-8 symbols, particularly escape chars -+ */ -+#ifdef _UTF8_ -+#define ESCAPE_MASK 0x80 -+#define WEIGHT_MASK 0xF0 -+#else -+#define ESCAPE_MASK 0xFF -+#define WEIGHT_MASK 0x00 -+#endif -+ -+ -+/* -+ * Is used to jump to the next start of char -+ * of course it's only usefull when encoding is utf-8 -+ * This function have been added by Jocelyn Merand to use libtextcat in OOo -+ */ -+int nextcharstart(const char *str, int position); -+ -+ -+/*Copy the char in str to dest -+ * of course it's only usefull when encoding is utf8 and the symbol is encoded with more than 1 char -+ * return the number of char jumped -+ * This function have been added by Jocelyn Merand to use libtextcat in OOo -+ */ -+int charcopy(const char *str, char *dest); -+ -+ -+/* checks if n-gram lex is a prefix of key and of length len -+* if _UTF8_ is defined, it uses escap characters and len is not realy the length of lex -+* in this case, len is the number of utf-8 char strlen("€") == 3 but len == 1 -+*/ -+int issame( char *lex, char *key, int len ); -+ -+ -+/* Counts the number of characters -+* if _UTF8_ is defined, it uses escap characters and the result is not realy the length of str -+* in this case, the result is the number of utf-8 char strlen("€") == 3 but utfstrlen("€") == 1 -+*/ -+#ifdef __cplusplus -+extern "C" { -+#endif -+extern int utfstrlen(const char* str); -+#ifdef __cplusplus -+} -+#endif -+ -+#endif -+ ---- libtextcat-2.2.part2/src/Makefile.am 2007-07-25 10:55:02.000000000 +0100 -+++ libtextcat-2.2/src/Makefile.am 2007-07-25 10:55:52.000000000 +0100 -@@ -12,11 +12,11 @@ - - libtextcat_includedir = $(includedir)/libtextcat - libtextcat_include_HEADERS = \ -- common.h constants.h fingerprint.h textcat.h -+ common.h constants.h fingerprint.h textcat.h utf8misc.h - - lib_LTLIBRARIES = libtextcat.la - libtextcat_la_SOURCES = \ -- common.c fingerprint.c textcat.c wg_mempool.c -+ common.c fingerprint.c textcat.c wg_mempool.c utf8misc.c - - bin_PROGRAMS = createfp - createfp_SOURCES = createfp.c |