input: add confidence (detection strength) to format_match()
When users don't specify the input format, applications can try to have the format auto-detected. Some of the tests were weak and could result in false positives. Add a 'confidence' parameter to the input modules' format_match() method. Claim high confidence (1) for those formats which can check magic strings or the presence of essential keywords (vcd, wav). Claim medium confidence (10) for those formats which happened to mis-detect inputs in the past (trace32_ad). Claim weak confidence (100) for those formats which totally lack reliable conditions and rather guess than detect (chronovu_la8). Prefer the best match in public scan routines. Return at most one module to callers even if multiple modules matched, to keep the current API. This addresses part of bug #1200.
This commit is contained in:
parent
7102443a4a
commit
54ee427df0
|
@ -37,16 +37,23 @@ struct context {
|
|||
uint64_t samplerate;
|
||||
};
|
||||
|
||||
static int format_match(GHashTable *metadata)
|
||||
static int format_match(GHashTable *metadata, unsigned int *confidence)
|
||||
{
|
||||
int size;
|
||||
|
||||
/*
|
||||
* In the absence of a reliable condition like magic strings,
|
||||
* we can only guess based on the file size. Since this is
|
||||
* rather weak a condition, signal "little confidence" and
|
||||
* optionally give precedence to better matches.
|
||||
*/
|
||||
size = GPOINTER_TO_INT(g_hash_table_lookup(metadata,
|
||||
GINT_TO_POINTER(SR_INPUT_META_FILESIZE)));
|
||||
if (size == CHRONOVU_LA8_FILESIZE)
|
||||
return SR_OK;
|
||||
if (size != CHRONOVU_LA8_FILESIZE)
|
||||
return SR_ERR;
|
||||
*confidence = 100;
|
||||
|
||||
return SR_ERR;
|
||||
return SR_OK;
|
||||
}
|
||||
|
||||
static int init(struct sr_input *in, GHashTable *options)
|
||||
|
|
|
@ -342,7 +342,9 @@ static gboolean check_required_metadata(const uint8_t *metadata, uint8_t *avail)
|
|||
* 128 bytes is normally enough.
|
||||
*
|
||||
* If an input module is found, an instance is created into *in.
|
||||
* Otherwise, *in contains NULL.
|
||||
* Otherwise, *in contains NULL. When multiple input moduless claim
|
||||
* support for the format, the one with highest confidence takes
|
||||
* precedence. Applications will see at most one input module spec.
|
||||
*
|
||||
* If an instance is created, it has the given buffer used for scanning
|
||||
* already submitted to it, to be processed before more data is sent.
|
||||
|
@ -353,9 +355,10 @@ static gboolean check_required_metadata(const uint8_t *metadata, uint8_t *avail)
|
|||
*/
|
||||
SR_API int sr_input_scan_buffer(GString *buf, const struct sr_input **in)
|
||||
{
|
||||
const struct sr_input_module *imod;
|
||||
const struct sr_input_module *imod, *best_imod;
|
||||
GHashTable *meta;
|
||||
unsigned int m, i;
|
||||
unsigned int conf, best_conf;
|
||||
int ret;
|
||||
uint8_t mitem, avail_metadata[8];
|
||||
|
||||
|
@ -364,7 +367,8 @@ SR_API int sr_input_scan_buffer(GString *buf, const struct sr_input **in)
|
|||
avail_metadata[1] = 0;
|
||||
|
||||
*in = NULL;
|
||||
ret = SR_ERR;
|
||||
best_imod = NULL;
|
||||
best_conf = ~0;
|
||||
for (i = 0; input_module_list[i]; i++) {
|
||||
imod = input_module_list[i];
|
||||
if (!imod->metadata[0]) {
|
||||
|
@ -388,45 +392,55 @@ SR_API int sr_input_scan_buffer(GString *buf, const struct sr_input **in)
|
|||
continue;
|
||||
}
|
||||
sr_spew("Trying module %s.", imod->id);
|
||||
ret = imod->format_match(meta);
|
||||
ret = imod->format_match(meta, &conf);
|
||||
g_hash_table_destroy(meta);
|
||||
if (ret == SR_ERR_DATA) {
|
||||
/* Module recognized this buffer, but cannot handle it. */
|
||||
break;
|
||||
continue;
|
||||
} else if (ret == SR_ERR) {
|
||||
/* Module didn't recognize this buffer. */
|
||||
continue;
|
||||
} else if (ret != SR_OK) {
|
||||
/* Can be SR_ERR_NA. */
|
||||
return ret;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Found a matching module. */
|
||||
sr_spew("Module %s matched.", imod->id);
|
||||
*in = sr_input_new(imod, NULL);
|
||||
g_string_insert_len((*in)->buf, 0, buf->str, buf->len);
|
||||
break;
|
||||
sr_spew("Module %s matched, confidence %u.", imod->id, conf);
|
||||
if (conf >= best_conf)
|
||||
continue;
|
||||
best_imod = imod;
|
||||
best_conf = conf;
|
||||
}
|
||||
|
||||
return ret;
|
||||
if (best_imod) {
|
||||
*in = sr_input_new(best_imod, NULL);
|
||||
g_string_insert_len((*in)->buf, 0, buf->str, buf->len);
|
||||
return SR_OK;
|
||||
}
|
||||
|
||||
return SR_ERR;
|
||||
}
|
||||
|
||||
/**
|
||||
* Try to find an input module that can parse the given file.
|
||||
*
|
||||
* If an input module is found, an instance is created into *in.
|
||||
* Otherwise, *in contains NULL.
|
||||
* Otherwise, *in contains NULL. When multiple input moduless claim
|
||||
* support for the format, the one with highest confidence takes
|
||||
* precedence. Applications will see at most one input module spec.
|
||||
*
|
||||
*/
|
||||
SR_API int sr_input_scan_file(const char *filename, const struct sr_input **in)
|
||||
{
|
||||
int64_t filesize;
|
||||
FILE *stream;
|
||||
const struct sr_input_module *imod;
|
||||
const struct sr_input_module *imod, *best_imod;
|
||||
GHashTable *meta;
|
||||
GString *header;
|
||||
size_t count;
|
||||
unsigned int midx, i;
|
||||
unsigned int conf, best_conf;
|
||||
int ret;
|
||||
uint8_t avail_metadata[8];
|
||||
|
||||
|
@ -475,8 +489,8 @@ SR_API int sr_input_scan_file(const char *filename, const struct sr_input **in)
|
|||
avail_metadata[midx] = 0;
|
||||
/* TODO: MIME type */
|
||||
|
||||
ret = SR_ERR;
|
||||
|
||||
best_imod = NULL;
|
||||
best_conf = ~0;
|
||||
for (i = 0; input_module_list[i]; i++) {
|
||||
imod = input_module_list[i];
|
||||
if (!imod->metadata[0]) {
|
||||
|
@ -490,24 +504,30 @@ SR_API int sr_input_scan_file(const char *filename, const struct sr_input **in)
|
|||
|
||||
sr_dbg("Trying module %s.", imod->id);
|
||||
|
||||
ret = imod->format_match(meta);
|
||||
ret = imod->format_match(meta, &conf);
|
||||
if (ret == SR_ERR) {
|
||||
/* Module didn't recognize this buffer. */
|
||||
continue;
|
||||
} else if (ret != SR_OK) {
|
||||
/* Module recognized this buffer, but cannot handle it. */
|
||||
break;
|
||||
continue;
|
||||
}
|
||||
/* Found a matching module. */
|
||||
sr_dbg("Module %s matched.", imod->id);
|
||||
|
||||
*in = sr_input_new(imod, NULL);
|
||||
break;
|
||||
sr_dbg("Module %s matched, confidence %u.", imod->id, conf);
|
||||
if (conf >= best_conf)
|
||||
continue;
|
||||
best_imod = imod;
|
||||
best_conf = conf;
|
||||
}
|
||||
g_hash_table_destroy(meta);
|
||||
g_string_free(header, TRUE);
|
||||
|
||||
return ret;
|
||||
if (best_imod) {
|
||||
*in = sr_input_new(best_imod, NULL);
|
||||
return SR_OK;
|
||||
}
|
||||
|
||||
return SR_ERR;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -156,13 +156,19 @@ static int init(struct sr_input *in, GHashTable *options)
|
|||
return SR_OK;
|
||||
}
|
||||
|
||||
static int format_match(GHashTable *metadata)
|
||||
static int format_match(GHashTable *metadata, unsigned int *confidence)
|
||||
{
|
||||
GString *buf;
|
||||
int rc;
|
||||
|
||||
buf = g_hash_table_lookup(metadata, GINT_TO_POINTER(SR_INPUT_META_HEADER));
|
||||
rc = process_header(buf, NULL);
|
||||
|
||||
return process_header(buf, NULL);
|
||||
if (rc != SR_OK)
|
||||
return rc;
|
||||
*confidence = 10;
|
||||
|
||||
return SR_OK;
|
||||
}
|
||||
|
||||
static int process_header(GString *buf, struct context *inc)
|
||||
|
|
|
@ -265,7 +265,7 @@ static gboolean parse_header(const struct sr_input *in, GString *buf)
|
|||
return status;
|
||||
}
|
||||
|
||||
static int format_match(GHashTable *metadata)
|
||||
static int format_match(GHashTable *metadata, unsigned int *confidence)
|
||||
{
|
||||
GString *buf, *tmpbuf;
|
||||
gboolean status;
|
||||
|
@ -283,7 +283,11 @@ static int format_match(GHashTable *metadata)
|
|||
g_free(name);
|
||||
g_free(contents);
|
||||
|
||||
return status ? SR_OK : SR_ERR;
|
||||
if (!status)
|
||||
return SR_ERR;
|
||||
*confidence = 1;
|
||||
|
||||
return SR_OK;
|
||||
}
|
||||
|
||||
/* Send all accumulated bytes from inc->buffer. */
|
||||
|
|
|
@ -124,7 +124,7 @@ static int parse_wav_header(GString *buf, struct context *inc)
|
|||
return SR_OK;
|
||||
}
|
||||
|
||||
static int format_match(GHashTable *metadata)
|
||||
static int format_match(GHashTable *metadata, unsigned int *confidence)
|
||||
{
|
||||
GString *buf;
|
||||
int ret;
|
||||
|
@ -143,6 +143,8 @@ static int format_match(GHashTable *metadata)
|
|||
if ((ret = parse_wav_header(buf, NULL)) != SR_OK)
|
||||
return ret;
|
||||
|
||||
*confidence = 1;
|
||||
|
||||
return SR_OK;
|
||||
}
|
||||
|
||||
|
|
|
@ -426,6 +426,8 @@ struct sr_input_module {
|
|||
* Check if this input module can load and parse the specified stream.
|
||||
*
|
||||
* @param[in] metadata Metadata the module can use to identify the stream.
|
||||
* @param[out] confidence "Strength" of the detection.
|
||||
* Specialized handlers can take precedence over generic/basic support.
|
||||
*
|
||||
* @retval SR_OK This module knows the format.
|
||||
* @retval SR_ERR_NA There wasn't enough data for this module to
|
||||
|
@ -434,8 +436,15 @@ struct sr_input_module {
|
|||
* it. This means the stream is either corrupt, or indicates a
|
||||
* feature that the module does not support.
|
||||
* @retval SR_ERR This module does not know the format.
|
||||
*
|
||||
* Lower numeric values of 'confidence' mean that the input module
|
||||
* stronger believes in its capability to handle this specific format.
|
||||
* This way, multiple input modules can claim support for a format,
|
||||
* and the application can pick the best match, or try fallbacks
|
||||
* in case of errors. This approach also copes with formats that
|
||||
* are unreliable to detect in the absence of magic signatures.
|
||||
*/
|
||||
int (*format_match) (GHashTable *metadata);
|
||||
int (*format_match) (GHashTable *metadata, unsigned int *confidence);
|
||||
|
||||
/**
|
||||
* Initialize the input module.
|
||||
|
|
Loading…
Reference in New Issue