User-defined analyzer
A user-defined analyzer processes text into tokens according to a user-defined function.
You can write a user-defined function to process text into tokens according to your needs. Use udr.function_name as the analyzer name with the analyzer option when you create a basic text search index.
Examples
The following function, which is written in C, processes alphabetical and numeric characters into tokens and ignores all special characters except underscore (_):
/*ARGSUSED*/
UDREXPORT
mi_lvarchar* tokenize_alnum(
mi_lvarchar* string,
MI_FPARAM* fparam)
{
mi_integer status = MI_OK;
mi_lvarchar* rtn = NULL;
gl_mchar_t* src = NULL;
gl_mchar_t* tgt = NULL;
mi_integer token = 0;
gl_mchar_t* s;
gl_mchar_t* r;
ifx_gl_init();
if (((src = (gl_mchar_t*)mi_lvarchar_to_string(string)) == NULL) ||
((tgt = (gl_mchar_t*)mi_alloc((strlen(src)*4)+1)) == NULL)) {
status = MI_ERROR;
goto cleanup;
}
s = src;
r = tgt;
while ((s != NULL) && (*s != '\0')) {
if ((ifx_gl_ismalnum(s, IFX_GL_NO_LIMIT)) || (*s == '_')) {
if (!token) {
if (r != tgt) *r++ = ' ';
*r++ = '[';
token = 1;
}
ifx_gl_mbsncpy(r, s, IFX_GL_NULL, 1);
r = ifx_gl_mbsnext(r, IFX_GL_NO_LIMIT);
}
else {
if (token) {
*r++ = ']';
token = 0;
}
}
s = ifx_gl_mbsnext(s, IFX_GL_NO_LIMIT);
}
if (token) *r++ = ']';
*r = '\0';
if ((rtn = mi_string_to_lvarchar((char*)tgt)) == NULL) {
status = MI_ERROR;
goto cleanup;
}
cleanup:
if ((status != MI_OK) &&
(rtn != NULL)) {
mi_var_free(rtn);
rtn = NULL;
}
if (tgt != NULL) mi_free(tgt);
if (src != NULL) mi_free(src);
if (rtn == NULL) mi_fp_setreturnisnull(fparam, 0, MI_TRUE);
return rtn;
}
The following statement registers the function so that the database server can use it:
CREATE FUNCTION tokenize_alnum (lvarchar)
RETURNS lvarchar
WITH (NOT VARIANT)
EXTERNAL NAME "$ONEDB_HOME/extend/myblade/myblade.bld(tokenize_alnum)"
LANGUAGE C;
When an index is created with the analyzer="udr.tokenize_alnum" option, the following example shows that no special characters except the underscore are indexed:
quick! #$%&^^$## Brown fox under_score
[quick] [Brown] [fox] [under_score]