Research @ Hekkas.Com

Linguistically Enhanced Information Retrieval of Structured Documents

Appendix

"Science must begin with myths, and with the criticism of myths." Karl Popper


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Chapter 9 - Appendix
%
% last change: 16.08.2004
% correction hamid: xx.xx.2004
% correction prof: xx.xx.2004
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\aphorism{Creativity is the power to connect the seemingly unconnected.}{William Plomer}%
\chapter{Appendix}
\label{chapter:appendix}%

 

%----------------------------------------------------
%----------------------------------------------------
%----------------------------------------------------
\section{Extracted Stopword Lists}
\label{app:sec:full_stoplist}

Tables~\ref{tab:final_full_stopword_list_functional} to
\ref{tab:final_full_stopword_list_domain_specific} summarize all
stopwords extracted according to their word categories and stopword
layers, functional $F$, content-related $C$, and domain-specific
$D$. The domain of these stopwords is computer science and
information technology.

\begin{table}[ht]
\centering
\sffamily \footnotesize
\rowcolors{1}{tablerowcolorodd}{tablerowcoloreven}
\caption{Final list of functional stopwords (English, INEX)}
\label{tab:final_full_stopword_list_functional}
\begin{tabular}{L{2cm}L{14cm}}
\rowcolor{tableheadcolor} \color{white} Category & \color{white} Terms \tabularnewline%
%category terms
F\_DET (10) & \texttt{a, an, here, some, that, the, there, these, this, those} \tabularnewline%
F\_AUX (33) & \texttt{am, are, be, became, become, becomes, becoming, been, being, can, cannot, could, did, do, does, doing, done, had, has, have, having, is, make, may, might, must, ought, shall, should, was, were, will, would} \tabularnewline%
F\_PREP (47) & \texttt{about, above, across, after, against, along, among, amongst, around, aside, at, before, beforehand, behind, below, beside, between, beyond, by, down, for, from, in, into, near, of, off, on, onto, out, outside, per, since, through, thru, to, toward, towards, under, until, unto, up, upon, via, with, within, without} \tabularnewline%
F\_PRON (70) & \texttt{another, anybody, anyhow, anyone, anything, anyway, anywhere, during, elsewhere, everybody, everyone, everything, everywhere, he, her, hers, herself, him, himself, his, how, i, it, its, itself, me, mine, my, myself, nobody, none, noone, nowhere, our, ours, ourselves, she, somebody, somehow, someone, something, sometime, somewhat, somewhere, such, that, their, theirs, them, themselves, they, us, we, what, when, whence, where, which, whither, who, whoever, whom, whomever, whose, why, you, your, yours, yourself, yourselves} \tabularnewline%
F\_PART (17) & \texttt{almost, as, down, even, just, no, off, out, over, quite, rather, so, to, too, up, very, yes} \tabularnewline%
F\_CONN (37) & \texttt{after, although, and, because, before, but, further, furthermore, hence, howbeit, if, insofar, instead, like, neither, nonetheless, nor, not, or, since, than, then, thence, therefore, though, thus, unless, until, whenever, whereafter, whereas, whereby, wherein, whereupon, wherever, whether, while} \tabularnewline%
F\_LOG\_OP (3) & \texttt{and, not, or} \tabularnewline%
F\_Q (43) & \texttt{all, billion, both, each, eight, eighty, eleven, every, few, fifteen, fifth, fifty, first, five, forty, four, many, million, much, multiple, nine, ninety, often, one, second, secondly, seven, seventy, six, sixty, some, ten, third, thirty, thousand, three, trillion, twelve, twenty, twice, two, various, zero} \tabularnewline%
\end{tabular}
\end{table}

\begin{table}[ht]
\centering
\sffamily \footnotesize
\rowcolors{1}{tablerowcolorodd}{tablerowcoloreven}
\caption{Final list of content-related stopwords (English, INEX)}
\label{tab:final_full_stopword_list_content_related}
\begin{tabular}{L{2cm}L{14cm}}
\rowcolor{tableheadcolor} \color{white} Category & \color{white} Terms \tabularnewline%
%category terms
CR\_ADV (139) & \texttt{accordingly, actually, actually, afterwards, again, along, already, also, always, any, apart, approximately, away, awfully, back, besides, certainly, clearly, clearly, closely, completely, consequently, currently, definitely, differently, directly, downwards, due, easily, either, else, entirely, especially, even, evenly, ever, exactly, explicitly, extremely, far, finally, first, formerly, frequently, fully, generally, hardly, hereafter, hereby, herein, hereupon, highly, hither, hopefully, however, immediately, inasmuch, increasingly, indeed, independently, inward, lately, latterly, less, likely, mainly, meanwhile, merely, more, moreover, most, mostly, much, namely, nearly, necessarily, never, nevertheless, non, normally, nothing, now, nowhere, obviously, often, once, only, otherwise, over, overall, particularly, perhaps, possibly, preferably, presumably, previously, primarily, probably, quickly, rather, really, reasonably, recently, relatively, respectively, seriously, significantly, similarly, simply, simultaneously, slightly, sometimes, soon, specifically, still, successfully, sure, thereafter, thereby, therein, thereof, thereto, thereupon, thorough, thoroughly, throughout, today, together, truly, typically, unfortunately, unfortunately, unlikely, usually, very, well, whatever, widely, yet} \tabularnewline%
CR\_ADJ (102) & \texttt{additional, alone, appropriate, available, basic, best, better, big, brief, certain, clear, common, complete, current, different, difficult, due, early, easy, enough, entire, except, former, forth, full, general, good, great, greater, greatest, high, higher, highest, immediate, important, independent, initial, inner, kind, large, largely, larger, last, later, latest, latter, least, little, long, longer, longest, low, lower, main, main, major, mean, near, necessary, new, newer, newest, next, novel, old, older, oldest, original, other, own, particular, possible, potential, previous, real, recent, regardless, right, same, sensible, serious, several, significant, similar, simple, single, small, smaller, smallest, sorry, special, specific, standard, suitable, thick, thin, total, useful, whole, young, younger, youngest} \tabularnewline%
CR\_N (76) & \texttt{ability, abstract, addition, address, advantage, amount, area, areas, basis, case, cases, change, changes, cost, curricula, degree, details, difference, effect, end, example, fact, field, focus, form, future, goal, group, hand, help, individual, interest, interests, issue, issues, key, level, means, need, needs, ones, order, others, paper, part, place, point, potential, problem, problems, project, range, regards, result, results, section, set, sets, size, solution, space, step, study, support, terms, time, times, type, types, use, view, vitae, way, ways, work, years} \tabularnewline%
CR\_V (189) & \texttt{able, according, achieve, achieved, added, adding, address, allow, allowing, allows, applied, apply, applying, associated, assume, based, began, beginning, being, building, called, came, change, changing, come, compared, consider, considered, considering, consists, contain, containing, contains, corresponding, create, created, creating, depending, depends, derived, describe, described, detailed, determine, determined, differ, discussed, effect, end, existing, expected, find, finding, fixed, focus, following, follows, form, found, gave, get, gets, getting, give, given, gives, go, goes, going, gone, got, gotten, group, hand, help, improve, improved, include, included, includes, including, increase, increased, increasing, interested, interesting, introduced, involved, issue, issues, keep, keeps, kept, knew, know, known, knows, leading, less, let, lets, like, liked, liked, likes, limited, made, makes, making, maybe, need, needed, needing, needs, obtain, obtained, okay, order, organized, part, place, point, preferred, present, presented, presenting, presents, produce, proposed, provide, provided, provides, providing, published, put, puts, received, reduce, reduced, reducing, regarding, related, remaining, require, required, requires, result, resulting, see, seeing, seem, seemed, seeming, seems, selected, self, set, show, shown, shows, specify, specifying, starting, step, study, sub, support, take, taken, takes, taking, took, type, underlying, understanding, unlike, use, used, uses, using, want, wanted, wanting, wants, went, work, worked, working, works} \tabularnewline%
\end{tabular}
\end{table}

\begin{table}[ht]
\centering
\sffamily \footnotesize
\rowcolors{1}{tablerowcolorodd}{tablerowcoloreven}
\caption{Final list of domain-specific stopwords (English, INEX)}
\label{tab:final_full_stopword_list_domain_specific}
\begin{tabular}{L{2cm}L{14cm}}
\rowcolor{tableheadcolor} \color{white} Category & \color{white} Terms \tabularnewline%
%category terms
D\_ADV (3) & \texttt{automatically, effectively, efficiently} \tabularnewline%
D\_ADJ (6) & \texttt{complex, effective, efficient, local, national, technical} \tabularnewline%
D\_N (58) & \texttt{access, algorithm, algorithms, analysis, application, applications, approach, approaches, architecture, complexity, components, computer, control, data, department, design, development, environment, features, function, functions, hardware, implementation, information, input, institute, knowledge, management, member, method, methods, model, models, network, number, operations, performance, process, professor, program, requirements, research, science, software, structure, system, systems, technique, techniques, technology, tools, trans, university, user, users, value, values, version} \tabularnewline%
D\_V (38) & \texttt{access, computing, control, define, defined, design, designed, develop, developed, developing, distributed, engineering, extended, function, generate, generated, implemented, input, integrated, model, modeling, operating, perform, performed, performing, process, processing, programming, represent, represented, representing, represents, run, running, sets, specified, supported, testing} \tabularnewline%
\end{tabular}
\end{table}

\FloatBarrier

 

%----------------------------------------------------
%----------------------------------------------------
%----------------------------------------------------
\section{Extracted Patterns Suited for Composite Nouns}
\label{app:sec:composite_nouns}

\begin{table}[ht]
\centering
\sffamily \footnotesize
\rowcolors{1}{tablerowcolorodd}{tablerowcoloreven}
\caption{Top 24 extracted composite noun suited patterns of length two}
\label{tab:composite_nouns_len2_examples}
\begin{tabular}{L{10cm}cc}
\rowcolor{tableheadcolor} \color{white} Multi-term & \color{white} $tf$ & \color{white} $df$ \tabularnewline%
%multi-term example
% length 2
execution time & 6.870 & 1.405 \tabularnewline%
electrical engineering & 6.333 & 3.316 \tabularnewline%
response time & 3.803 & 804 \tabularnewline%
source code & 3.768 & 1.403 \tabularnewline%
fault tolerance & 3.758 & 1.024 \tabularnewline%
upper bound & 3.369 & 1.228 \tabularnewline%
worst case & 3.035 & 1.305 \tabularnewline%
experimental results & 2.930 & 1.517 \tabularnewline%
test cases & 2.839 & 511 \tabularnewline%
wide range & 2.657 & 1.969 \tabularnewline%
image processing & 2.563 & 1.307 \tabularnewline%
programming language & 2.347 & 1.294 \tabularnewline%
programming languages & 2.218 & 1.235 \tabularnewline%
neural networks & 2.167 & 868 \tabularnewline%
virtual channels & 2.156 & 135 \tabularnewline%
shared memory & 2.127 & 686 \tabularnewline%
load balancing & 2.073 & 561 \tabularnewline%
distributed computing & 2.022 & 1.237 \tabularnewline%
power consumption & 1.968 & 611 \tabularnewline%
pattern recognition & 1.947 & 917 \tabularnewline%
simulation results & 1.915 & 893 \tabularnewline%
fault coverage & 1.864 & 315 \tabularnewline%
state space & 1.839 & 381 \tabularnewline%
computation time & 1.838 & 750 \tabularnewline%
% artificial intelligence & 1776 & 1177 \tabularnewline%
\end{tabular}
\end{table}

\begin{table}[ht]
\centering
\sffamily \footnotesize
\rowcolors{1}{tablerowcolorodd}{tablerowcoloreven}
\caption{Top 24 extracted composite noun suited patterns of length three}
\label{tab:composite_nouns_len3_examples}
\begin{tabular}{L{10cm}cc}
\rowcolor{tableheadcolor} \color{white} Multi-term & \color{white} $tf$ & \color{white} $df$ \tabularnewline%
%multi-term example
% length 3
digital signal processing & 385 & 301 \tabularnewline%
natural language processing & 303 & 186 \tabularnewline%
partial differential equations & 302 & 210 \tabularnewline%
cache hit ratio & 282 & 46 \tabularnewline%
directed acyclic graph & 251 & 198 \tabularnewline%
average response time & 251 & 121 \tabularnewline%
test pattern generation & 249 & 148 \tabularnewline%
distributed shared memory & 249 & 150 \tabularnewline%
computational fluid dynamics & 247 & 188 \tabularnewline%
finite state machine & 243 & 129 \tabularnewline%
personal digital assistants & 218 & 205 \tabularnewline%
dynamic load balancing & 209 & 90 \tabularnewline%
middle stage switches & 205 & 7 \tabularnewline%
path delay faults & 199 & 22 \tabularnewline%
cache coherence protocol & 193 & 68 \tabularnewline%
artificial neural networks & 191 & 144 \tabularnewline%
finite state machines & 186 & 111 \tabularnewline%
average execution time & 175 & 66 \tabularnewline%
intellectual property rights & 174 & 110 \tabularnewline%
statistical pattern recognition & 170 & 102 \tabularnewline%
cache line size & 170 & 69 \tabularnewline%
state transition diagram & 166 & 71 \tabularnewline%
consistent global checkpoint & 154 & 11 \tabularnewline%
false alarm rate & 150 & 46 \tabularnewline%
% average waiting time & 149 & 45 \tabularnewline%
\end{tabular}
\end{table}

\begin{table}[ht]
\centering
\sffamily \footnotesize
\rowcolors{1}{tablerowcolorodd}{tablerowcoloreven}
\caption{Top 24 extracted composite noun suited patterns of length four}
\label{tab:composite_nouns_len4_examples}
\begin{tabular}{L{10cm}cc}
\rowcolor{tableheadcolor} \color{white} Multi-term & \color{white} $tf$ & \color{white} $df$ \tabularnewline%
%multi-term example
% length 4
automatic test pattern generation & 79 & 68 \tabularnewline%
extended channel dependency graph & 62 & 8 \tabularnewline%
dynamic buffer allocation scheme & 55 & 1 \tabularnewline%
linear feedback shift register & 51 & 38 \tabularnewline%
parallel task completion time & 48 & 1 \tabularnewline%
worst case response time & 40 & 7 \tabularnewline%
call channel occupancy time & 35 & 1 \tabularnewline%
worst case computation time & 31 & 9 \tabularnewline%
linear feedback shift registers & 30 & 24 \tabularnewline%
identically distributed random variables & 30 & 23 \tabularnewline%
solving partial differential equations & 29 & 26 \tabularnewline%
maximum average waiting time & 28 & 1 \tabularnewline%
state space explosion problem & 27 & 16 \tabularnewline%
optimal linear schedule vector & 26 & 3 \tabularnewline%
worst case execution time & 25 & 20 \tabularnewline%
optical flow constraint equation & 25 & 9 \tabularnewline%
hierarchical aggregate selection queries & 25 & 1 \tabularnewline%
distributed memory parallel computers & 25 & 16 \tabularnewline%
cumulative call variable usage & 24 & 1 \tabularnewline%
byte error correcting code & 24 & 5 \tabularnewline%
white box code inheritance & 23 & 2 \tabularnewline%
uniform leader election protocol & 23 & 1 \tabularnewline%
parallel discrete event simulation & 23 & 16 \tabularnewline%
timewheel atomic broadcast protocol & 22 & 2 \tabularnewline%
% field programmable gate arrays & 22 & 19 \tabularnewline%
\end{tabular}
\end{table}

\begin{table}[ht]
\centering
\sffamily \footnotesize
\rowcolors{1}{tablerowcolorodd}{tablerowcoloreven}
\caption{Top 24 extracted composite noun suited patterns of length five}
\label{tab:composite_nouns_len5_examples}
\begin{tabular}{L{10cm}cc}
\rowcolor{tableheadcolor} \color{white} Multi-term & \color{white} $tf$ & \color{white} $df$ \tabularnewline%
%multi-term example
% length 5
handoff call channel occupancy time & 19 & 1 \tabularnewline%
submit queries concerning historical events & 10 & 10 \tabularnewline%
average message latency versus traffic & 10 & 4 \tabularnewline%
scholarly archival journals inform readers & 9 & 9 \tabularnewline%
procedurally generated partial product reduction & 9 & 1 \tabularnewline%
minimal cost distribution tree problem & 9 & 1 \tabularnewline%
generated partial product reduction tree & 9 & 1 \tabularnewline%
vertex versus maximal clique incidence & 8 & 1 \tabularnewline%
row shift invariant wavelet packet & 8 & 1 \tabularnewline%
robust path delay fault coverage & 8 & 2 \tabularnewline%
shift invariant wavelet packet transform & 7 & 1 \tabularnewline%
scheduling precedence constrained parallel tasks & 7 & 1 \tabularnewline%
recoverable distributed shared virtual memory & 7 & 1 \tabularnewline%
maximum average waiting time requirement & 7 & 1 \tabularnewline%
fault tolerant wormhole routing strategy & 7 & 7 \tabularnewline%
extended multicast channel dependency graph & 7 & 1 \tabularnewline%
adaptive row shift invariant wavelet & 7 & 1 \tabularnewline%
time varying flow field visualization & 6 & 1 \tabularnewline%
symmetric symbol error correcting codes & 6 & 1 \tabularnewline%
conditional steady state probability vector & 6 & 3 \tabularnewline%
versus maximal clique incidence matrices & 5 & 1 \tabularnewline%
spanning tree carry lookahead adder & 5 & 2 \tabularnewline%
solicits papers giving preliminary results & 5 & 5 \tabularnewline%
random field modeled textured images & 5 & 1 \tabularnewline%
% posterior class membership probability estimates & 5 & 1 \tabularnewline%
\end{tabular}
\end{table}

\begin{table}[ht]
\centering
\sffamily \footnotesize
\rowcolors{1}{tablerowcolorodd}{tablerowcoloreven}
\caption{Top 24 extracted composite noun suited patterns of length six}
\label{tab:composite_nouns_len6_examples}
\begin{tabular}{L{10cm}cc}
\rowcolor{tableheadcolor} \color{white} Multi-term & \color{white} $tf$ & \color{white} $df$ \tabularnewline%
%multi-term example
% length 6
procedurally generated partial product reduction tree & 9 & 1 \tabularnewline%
row shift invariant wavelet packet transform & 7 & 1 \tabularnewline%
adaptive row shift invariant wavelet packet & 7 & 1 \tabularnewline%
vertex versus maximal clique incidence matrix & 4 & 1 \tabularnewline%
vertex versus maximal clique incidence matrices & 4 & 1 \tabularnewline%
produces dependency preserving nested database schemes & 4 & 1 \tabularnewline%
nonbalanced identically distributed binary random variables & 4 & 1 \tabularnewline%
beam addressed swept volume display unit & 4 & 1 \tabularnewline%
systolic redundant residue arithmetic error correction & 3 & 2 \tabularnewline%
redundant residue arithmetic error correction circuit & 3 & 2 \tabularnewline%
plot normalized deadlocks versus load rate & 3 & 1 \tabularnewline%
partial differential equations describing physical phenomena & 3 & 3 \tabularnewline%
interactive event service giving conference dates & 3 & 3 \tabularnewline%
handoff call channel occupancy time distribution & 3 & 1 \tabularnewline%
disk array controller signals service completion & 3 & 1 \tabularnewline%
database schema satisfies generalized entity integrity & 3 & 1 \tabularnewline%
coarse time scale traffic smoothing mode & 3 & 1 \tabularnewline%
cluster generative statistical dynamic time warping & 3 & 1 \tabularnewline%
wird etwas knapp bei mir sagen & 2 & 1 \tabularnewline%
unstructured sparse symmetric positive definite matrices & 2 & 1 \tabularnewline%
une courbe qui remplit toute une & 2 & 2 \tabularnewline%
ultimately simplify text composition tasks faced & 2 & 2 \tabularnewline%
trusted graphics server applet stored locally & 2 & 2 \tabularnewline%
time instant object component retrieval request & 2 & 1 \tabularnewline%
% test generator generates bad fault coverage & 2 & 1 \tabularnewline%
\end{tabular}
\end{table}

\begin{table}[ht]
\centering
\sffamily \footnotesize
\rowcolors{1}{tablerowcolorodd}{tablerowcoloreven}
\caption{Top 24 extracted composite noun suited patterns of length seven}
\label{tab:composite_nouns_len7_examples}
\begin{tabular}{L{10cm}cc}
\rowcolor{tableheadcolor} \color{white} Multi-term & \color{white} $tf$ & \color{white} $df$ \tabularnewline%
%multi-term example
% length 7
adaptive row shift invariant wavelet packet transform & 7 & 1 \tabularnewline%
systolic redundant residue arithmetic error correction circuit & 3 & 2 \tabularnewline%
wird etwas knapp bei mir sagen wir & 2 & 1 \tabularnewline%
une courbe qui remplit toute une aire & 2 & 2 \tabularnewline%
temporal strata translate temporal query language statements & 2 & 1 \tabularnewline%
recommended practices define ethical standards define educational & 2 & 2 \tabularnewline%
practices define ethical standards define educational curricula & 2 & 2 \tabularnewline%
law enforcement agencies continually analyze vast amounts & 2 & 2 \tabularnewline%
knapp bei mir sagen wir lieber vierzehn & 2 & 1 \tabularnewline%
etwas knapp bei mir sagen wir lieber & 2 & 1 \tabularnewline%
courbe qui remplit toute une aire plaine & 2 & 2 \tabularnewline%
classification assumes locally constant class conditional probabilities & 2 & 2 \tabularnewline%
block sharing implies strong interprocess spatial locality & 2 & 2 \tabularnewline%
beam addressed swept volume display unit employing & 2 & 1 \tabularnewline%
authors propose task assignment effort adjustment factors & 2 & 2 \tabularnewline%
atlas anatomy removes individual anatomical shape variations & 2 & 2 \tabularnewline%
allocation strategies dramatically outperform contiguous allocation strategies & 2 & 1 \tabularnewline%
acyclic channel dependency graph guarantees deadlock freedom & 2 & 1 \tabularnewline%
zeroset copyset clrbit setbit tstbit xrealloc xmalloc & 1 & 1 \tabularnewline%
xrealloc freelist lex error enlist dfainit insert & 1 & 1 \tabularnewline%
xmalloc dfaerror addtok xrealloc freelist lex error & 1 & 1 \tabularnewline%
write aent ante acrt cart aent neat & 1 & 1 \tabularnewline%
woodword ted kaczynski competency birmingham islam blaze & 1 & 1 \tabularnewline%
wiring channels cause extra wiring path delays & 1 & 1 \tabularnewline%
% wireless ticketing wireless healthcare services mobile face & 1 & 1 \tabularnewline%
\end{tabular}
\end{table}

 

\FloatBarrier

%----------------------------------------------------
%----------------------------------------------------
%----------------------------------------------------
\section{Extracted Patterns Suited for Named Entities}
\label{app:sec:named_entities}

\begin{table}[ht]
\centering
\sffamily \footnotesize
\rowcolors{1}{tablerowcolorodd}{tablerowcoloreven}
\caption{Top 24 extracted named entity suited patterns of length two}
\label{tab:named_entities_len2_examples}
\begin{tabular}{L{10cm}cc}
\rowcolor{tableheadcolor} \color{white} Multi-term & \color{white} $tf$ & \color{white} $df$ \tabularnewline%
%multi-term example
% length 2
Pattern Recognition & 5.914 & 1.575 \tabularnewline%
Machine Intelligence & 5.670 & 1.568 \tabularnewline%
Artificial Intelligence & 4.802 & 1.819 \tabularnewline%
Distributed Computing & 3.885 & 1.749 \tabularnewline%
Parallel Processing & 3.057 & 1.227 \tabularnewline%
Image Processing & 2.797 & 1.200 \tabularnewline%
World Wide & 2.734 & 1.537 \tabularnewline%
Wide Web & 2.658 & 1.508 \tabularnewline%
Neural Networks & 2.230 & 715 \tabularnewline%
Signal Processing & 2.039 & 1.006 \tabularnewline%
Carnegie Mellon & 2.024 & 1.147 \tabularnewline%
International Conference & 1.889 & 1.144 \tabularnewline%
Electrical Engineering & 1.581 & 1.076 \tabularnewline%
Programming Languages & 1.509 & 810 \tabularnewline%
United States & 1.473 & 826 \tabularnewline%
Hong Kong & 1.403 & 511 \tabularnewline%
Reader Service & 1.381 & 303 \tabularnewline%
San Diego & 1.336 & 741 \tabularnewline%
Los Angeles & 1.332 & 888 \tabularnewline%
Parallel Computing & 1.283 & 723 \tabularnewline%
Machine Learning & 1.235 & 475 \tabularnewline%
Semantic Web & 1.231 & 179 \tabularnewline%
Monte Carlo & 1.216 & 397 \tabularnewline%
Air Force & 1.139 & 628 \tabularnewline%
% Petri Nets & 1059 & 174 \tabularnewline%
\end{tabular}
\end{table}

\begin{table}[ht]
\centering
\sffamily \footnotesize
\rowcolors{1}{tablerowcolorodd}{tablerowcoloreven}
\caption{Top 24 extracted named entity suited patterns of length three}
\label{tab:named_entities_len3_examples}
\begin{tabular}{L{10cm}cc}
\rowcolor{tableheadcolor} \color{white} Multi-term & \color{white} $tf$ & \color{white} $df$ \tabularnewline%
%multi-term example
% length 3
World Wide Web & 2.650 & 1.505 \tabularnewline%
Pattern Recognition Letters & 444 & 287 \tabularnewline%
Unified Modeling Language & 394 & 274 \tabularnewline%
Internet Engineering Task & 328 & 284 \tabularnewline%
Ad Hoc Networks & 311 & 74 \tabularnewline%
Object Request Broker & 300 & 207 \tabularnewline%
North Carolina State & 298 & 209 \tabularnewline%
Jet Propulsion Laboratory & 282 & 186 \tabularnewline%
Wide Web Consortium & 274 & 232 \tabularnewline%
Engineering Task Force & 274 & 238 \tabularnewline%
International Test Conference & 269 & 183 \tabularnewline%
Artificial Neural Networks & 269 & 180 \tabularnewline%
Extensible Markup Language & 264 & 217 \tabularnewline%
Upper Saddle River & 244 & 129 \tabularnewline%
Distributed Shared Memory & 237 & 102 \tabularnewline%
Stochastic Petri Nets & 234 & 43 \tabularnewline%
Java Virtual Machine & 234 & 138 \tabularnewline%
Guest Editors Introduction & 223 & 223 \tabularnewline%
Reader Interest Survey & 211 & 205 \tabularnewline%
Digital Signal Processing & 211 & 144 \tabularnewline%
Interest Survey Indicate & 204 & 204 \tabularnewline%
Wall Street Journal & 203 & 149 \tabularnewline%
Tau Beta Pi & 195 & 178 \tabularnewline%
Naval Postgraduate School & 194 & 134 \tabularnewline%
% Reader Service Card & 191 & 190 \tabularnewline%
\end{tabular}
\end{table}

\begin{table}[ht]
\centering
\sffamily \footnotesize
\rowcolors{1}{tablerowcolorodd}{tablerowcoloreven}
\caption{Top 24 extracted named entity suited patterns of length four}
\label{tab:named_entities_len4_examples}
\begin{tabular}{L{10cm}cc}
\rowcolor{tableheadcolor} \color{white} Multi-term & \color{white} $tf$ & \color{white} $df$ \tabularnewline%
%multi-term example
% length 4
World Wide Web Consortium & 272 & 230 \tabularnewline%
Internet Engineering Task Force & 272 & 237 \tabularnewline%
Reader Interest Survey Indicate & 204 & 204 \tabularnewline%
Goddard Space Flight Center & 166 & 118 \tabularnewline%
Virtual Reality Modeling Language & 150 & 136 \tabularnewline%
Mobile Ad Hoc Networks & 114 & 47 \tabularnewline%
San Diego Supercomputer Center & 90 & 63 \tabularnewline%
Ad Hoc Wireless Networks & 83 & 45 \tabularnewline%
Web Services Description Language & 77 & 61 \tabularnewline%
Field Programmable Gate Arrays & 73 & 50 \tabularnewline%
Wireless Ad Hoc Networks & 69 & 34 \tabularnewline%
Markov Chain Monte Carlo & 68 & 43 \tabularnewline%
Generalized Stochastic Petri Nets & 66 & 28 \tabularnewline%
Accelerated Strategic Computing Initiative & 62 & 55 \tabularnewline%
Synchronized Multimedia Integration Language & 61 & 45 \tabularnewline%
North Atlantic Test Workshop & 60 & 54 \tabularnewline%
San Jose Mercury News & 56 & 47 \tabularnewline%
Ordered Binary Decision Diagrams & 46 & 28 \tabularnewline%
International Parallel Processing Symposium & 46 & 42 \tabularnewline%
Mobile Ad Hoc Networking & 45 & 30 \tabularnewline%
Enterprise Distributed Object Computing & 43 & 42 \tabularnewline%
Digital Millennium Copyright Act & 40 & 36 \tabularnewline%
Imagery Pattern Recognition Workshop & 35 & 31 \tabularnewline%
World Intellectual Property Organization & 34 & 33 \tabularnewline%
% Parallel Discrete Event Simulation & 34 & 19 \tabularnewline%
\end{tabular}
\end{table}

\begin{table}[ht]
\centering
\sffamily \footnotesize
\rowcolors{1}{tablerowcolorodd}{tablerowcoloreven}
\caption{Top 24 extracted named entity suited patterns of length five}
\label{tab:named_entities_len5_examples}
\begin{tabular}{L{10cm}cc}
\rowcolor{tableheadcolor} \color{white} Multi-term & \color{white} $tf$ & \color{white} $df$ \tabularnewline%
%multi-term example
% length 5
Sara Reese Hedberg Sara Reese & 23 & 23 \tabularnewline%
Reese Hedberg Sara Reese Hedberg & 23 & 23 \tabularnewline%
Linda Dailey Paulson Linda Dailey & 23 & 23 \tabularnewline%
Dailey Paulson Linda Dailey Paulson & 23 & 23 \tabularnewline%
Markov Regenerative Stochastic Petri Nets & 22 & 8 \tabularnewline%
Iowa State College Statistical Laboratory & 22 & 1 \tabularnewline%
Upsilon Pi Epsilon Student Award & 19 & 11 \tabularnewline%
Unified Modeling Language Reference Manual & 19 & 19 \tabularnewline%
Neural Networks Outstanding Paper Award & 19 & 19 \tabularnewline%
Fault Tolerant Wormhole Routing Strategy & 19 & 19 \tabularnewline%
Lance Stafford Larson Student Scholarship & 18 & 13 \tabularnewline%
International World Wide Web Conference & 15 & 12 \tabularnewline%
Virtual Reality Annual International Symposium & 14 & 13 \tabularnewline%
Inverse Visual Problems Involving Discontinuities & 13 & 13 \tabularnewline%
Electronic Delay Storage Automatic Calculator & 13 & 8 \tabularnewline%
Air Force Flight Dynamics Laboratory & 12 & 12 \tabularnewline%
Mary Jean Harrold Mary Jean & 11 & 11 \tabularnewline%
Jean Harrold Mary Jean Harrold & 11 & 11 \tabularnewline%
Enterprise Distributed Object Computing Workshop & 11 & 11 \tabularnewline%
British Association Mathematical Tables Committee & 11 & 3 \tabularnewline%
Air Force Scientific Advisory Board & 11 & 10 \tabularnewline%
Ad Hoc Mobile Wireless Networks & 11 & 11 \tabularnewline%
Wright Patterson Air Force Base & 10 & 8 \tabularnewline%
Robot World Cup Soccer Games & 10 & 9 \tabularnewline%
% Multihop Wireless Ad Hoc Networks & 10 & 10 \tabularnewline%
\end{tabular}
\end{table}

\begin{table}[ht]
\centering
\sffamily \footnotesize
\rowcolors{1}{tablerowcolorodd}{tablerowcoloreven}
\caption{Top 24 extracted named entity suited patterns of length six}
\label{tab:named_entities_len6_examples}
\begin{tabular}{L{10cm}cc}
\rowcolor{tableheadcolor} \color{white} Multi-term & \color{white} $tf$ & \color{white} $df$ \tabularnewline%
%multi-term example
% length 6
Sara Reese Hedberg Sara Reese Hedberg & 23 & 23 \tabularnewline%
Linda Dailey Paulson Linda Dailey Paulson & 23 & 23 \tabularnewline%
Mary Jean Harrold Mary Jean Harrold & 11 & 11 \tabularnewline%
Shari Lawrence Pfleeger Shari Lawrence Pfleeger & 9 & 9 \tabularnewline%
Khaled El Emam Khaled El Emam & 9 & 9 \tabularnewline%
Alberto Del Bimbo Alberto Del Bimbo & 8 & 8 \tabularnewline%
Mo Kim Cheng Albert Mo Kim & 7 & 7 \tabularnewline%
Kim Cheng Albert Mo Kim Cheng & 7 & 7 \tabularnewline%
Hee Yong Youn Hee Yong Youn & 7 & 7 \tabularnewline%
Andy Hunt Dave Thomas Andy Hunt & 7 & 7 \tabularnewline%
Albert Mo Kim Cheng Albert Mo & 7 & 7 \tabularnewline%
Timothy Mark Pinkston Timothy Mark Pinkston & 6 & 6 \tabularnewline%
Sung Yong Shin Sung Yong Shin & 6 & 6 \tabularnewline%
Norris Parker Smith Norris Parker Smith & 6 & 6 \tabularnewline%
Lizy Kurian John Lizy Kurian John & 6 & 6 \tabularnewline%
Laxmi Narayan Bhuyan Laxmi Narayan Bhuyan & 6 & 6 \tabularnewline%
Giovanni De Micheli Giovanni De Micheli & 6 & 6 \tabularnewline%
Dik Lun Lee Dik Lun Lee & 6 & 6 \tabularnewline%
David Alan Grier David Alan Grier & 6 & 6 \tabularnewline%
Beng Chin Ooi Beng Chin Ooi & 6 & 6 \tabularnewline%
Song Chun Zhu Song Chun Zhu & 5 & 5 \tabularnewline%
Sang Lyul Min Sang Lyul Min & 5 & 5 \tabularnewline%
Reversible Jump Markov Chain Monte Carlo & 5 & 4 \tabularnewline%
Pam Frost Gorder Pam Frost Gorder & 5 & 5 \tabularnewline%
% Optimal Infinite Impulse Response Edge Detection & 5 & 5 \tabularnewline%
\end{tabular}
\end{table}

\begin{table}[ht]
\centering
\sffamily \footnotesize
\rowcolors{1}{tablerowcolorodd}{tablerowcoloreven}
\caption{Top 24 extracted named entity suited patterns of length seven}
\label{tab:named_entities_len7_examples}
\begin{tabular}{L{10cm}cc}
\rowcolor{tableheadcolor} \color{white} Multi-term & \color{white} $tf$ & \color{white} $df$ \tabularnewline%
%multi-term example
% length 7
Mo Kim Cheng Albert Mo Kim Cheng & 7 & 7 \tabularnewline%
Albert Mo Kim Cheng Albert Mo Kim & 7 & 7 \tabularnewline%
Optimal Infinite Impulse Response Edge Detection Filters & 5 & 5 \tabularnewline%
Stochastic Petri Nets Representing Generalized Service Networks & 4 & 4 \tabularnewline%
Reversible Jump Markov Chain Monte Carlo Computation & 4 & 4 \tabularnewline%
Oak Ridge Association Junior Faculty Enhancement Award & 3 & 3 \tabularnewline%
International Test Conference Tutorials Washington Sheraton Hotel & 3 & 3 \tabularnewline%
Whitaker Jane Wilhelms Yves Willems Peter Williams & 2 & 2 \tabularnewline%
Victor De La Luz Victor De La & 2 & 2 \tabularnewline%
Time Warp Synchronized Parallel Discrete Event Simulation & 2 & 2 \tabularnewline%
Stereoscopic Image Pairs Assuming Piecewise Continuous Surfaces & 2 & 2 \tabularnewline%
Sillion Bruno Silva Claudio Silva Deborah Silver & 2 & 2 \tabularnewline%
San Diego Supercomputer Center Networked Volume Renderer & 2 & 2 \tabularnewline%
San Diego Supercomputer Center Creative Computing Award & 2 & 2 \tabularnewline%
Robertson Phil Robertson Alyn Rockwood Jon Rokne & 2 & 2 \tabularnewline%
Petri Nets Representing Generalized Service Networks Abstract & 2 & 2 \tabularnewline%
Nets Representing Generalized Service Networks Abstract Abstract & 2 & 2 \tabularnewline%
Natural Microbial Populations Reveals Tertiary Structural Elements & 2 & 2 \tabularnewline%
Marcello Pelillo Josiane Zerubia Guest Editors Curricula & 2 & 2 \tabularnewline%
Larry Aupperle Rick Avila Ron Azuma Norman & 2 & 2 \tabularnewline%
James Arvo Larry Aupperle Rick Avila Ron & 2 & 2 \tabularnewline%
Hee Beng Kuan Tan Hee Beng Kuan & 2 & 2 \tabularnewline%
Hans Hagen Bernd Hamann Pat Hanrahan Chuck & 2 & 2 \tabularnewline%
Hancock Marcello Pelillo Josiane Zerubia Guest Editors & 2 & 2 \tabularnewline%
% Hamann Pat Hanrahan Chuck Hansen Andrew Hanson & 2 & 2 \tabularnewline%
\end{tabular}
\end{table}

 

\FloatBarrier

%----------------------------------------------------
%----------------------------------------------------
%----------------------------------------------------
\section{Extracted Patterns Suited for Formulaic Speech}
\label{app:sec:formulaic_speech}

\begin{table}[ht]
\centering
\sffamily \footnotesize
\rowcolors{1}{tablerowcolorodd}{tablerowcoloreven}
\caption{Top 24 extracted formulaic speech suited patterns of length two}
\label{tab:formulaic_speech_len2_examples}
\begin{tabular}{L{10cm}cc}
\rowcolor{tableheadcolor} \color{white} Multi-term & \color{white} $tf$ & \color{white} $df$ \tabularnewline%
%multi-term example
% length 2
computer science & 29.743 & 8.233 \tabularnewline%
research interests & 14.241 & 6.737 \tabularnewline%
et al & 12.946 & 3.645 \tabularnewline%
software engineering & 11.901 & 2.927 \tabularnewline%
interests include & 11.382 & 5.923 \tabularnewline%
vitae curricula & 11.048 & 11.048 \tabularnewline%
computer vision & 10.632 & 1.851 \tabularnewline%
other hand & 10.266 & 5.282 \tabularnewline%
data set & 8.604 & 1.525 \tabularnewline%
computer society & 8.379 & 3.859 \tabularnewline%
software development & 8.023 & 2.546 \tabularnewline%
distributed systems & 7.985 & 2.749 \tabularnewline%
computer graphics & 7.927 & 1.880 \tabularnewline%
data sets & 6.943 & 1.528 \tabularnewline%
information systems & 6.554 & 2.577 \tabularnewline%
operating system & 6.451 & 2.425 \tabularnewline%
other words & 6.054 & 3.301 \tabularnewline%
see figure & 6.043 & 2.201 \tabularnewline%
total number & 5.978 & 2.601 \tabularnewline%
pattern analysis & 5.691 & 1.546 \tabularnewline%
same time & 5.126 & 3.376 \tabularnewline%
operating systems & 5.106 & 2.434 \tabularnewline%
computer engineering & 5.020 & 2.477 \tabularnewline%
user interface & 4.892 & 1.963 \tabularnewline%
% large number & 4.841 & 2.988 \tabularnewline%
\end{tabular}
\end{table}

\begin{table}[ht]
\centering
\sffamily \footnotesize
\rowcolors{1}{tablerowcolorodd}{tablerowcoloreven}
\caption{Top 24 extracted formulaic speech suited patterns of length three}
\label{tab:formulaic_speech_len3_examples}
\begin{tabular}{L{10cm}cc}
\rowcolor{tableheadcolor} \color{white} Multi-term & \color{white} $tf$ & \color{white} $df$ \tabularnewline%
%multi-term example
% length 3
vitae curricula vitae & 11.048 & 11.048 \tabularnewline%
curricula vitae curricula & 11.048 & 11.048 \tabularnewline%
research interests include & 10.260 & 5.479 \tabularnewline%
parallel and distributed & 6.937 & 2.158 \tabularnewline%
analysis and machine & 5.425 & 1.487 \tabularnewline%
degree in computer & 4.603 & 2.397 \tabularnewline%
institute of technology & 4.563 & 2.775 \tabularnewline%
shown in figure & 4.430 & 2.078 \tabularnewline%
number of processors & 3.559 & 793 \tabularnewline%
described in section & 3.545 & 1.819 \tabularnewline%
department of computer & 3.513 & 2.221 \tabularnewline%
national science foundation & 3.390 & 2.464 \tabularnewline%
university of california & 3.315 & 1.951 \tabularnewline%
science and engineering & 2.985 & 1.829 \tabularnewline%
shown in table & 2.901 & 1.570 \tabularnewline%
professor of computer & 2.663 & 2.010 \tabularnewline%
number of nodes & 2.652 & 932 \tabularnewline%
electrical and computer & 2.486 & 1.442 \tabularnewline%
hardware and software & 2.392 & 1.506 \tabularnewline%
discussed in section & 2.301 & 1.372 \tabularnewline%
point of view & 2.300 & 1.534 \tabularnewline%
organized as follows & 2.152 & 2.115 \tabularnewline%
degree in electrical & 2.112 & 1.339 \tabularnewline%
university of illinois & 2.058 & 1.151 \tabularnewline%
% paper is organized & 2.053 & 2.053 \tabularnewline%
\end{tabular}
\end{table}

\begin{table}[ht]
\centering
\sffamily \footnotesize
\rowcolors{1}{tablerowcolorodd}{tablerowcoloreven}
\caption{Top 24 extracted formulaic speech suited patterns of length four}
\label{tab:formulaic_speech_len4_examples}
\begin{tabular}{L{10cm}cc}
\rowcolor{tableheadcolor} \color{white} Multi-term & \color{white} $tf$ & \color{white} $df$ \tabularnewline%
%multi-term example
% length 4
curricula vitae curricula vitae & 11.048 & 11.048 \tabularnewline%
pattern analysis and machine & 5.404 & 1.480 \tabularnewline%
analysis and machine intelligence & 5.384 & 1.478 \tabularnewline%
science from the university & 3.796 & 2.575 \tabularnewline%
degree in computer science & 3.765 & 2.125 \tabularnewline%
parallel and distributed systems & 3.052 & 1.344 \tabularnewline%
department of computer science & 2.938 & 1.934 \tabularnewline%
professor in the department & 2.342 & 1.796 \tabularnewline%
engineering from the university & 2.337 & 1.742 \tabularnewline%
electrical and computer engineering & 2.334 & 1.383 \tabularnewline%
professor of computer science & 2.287 & 1.739 \tabularnewline%
science at the university & 2.173 & 1.593 \tabularnewline%
parallel and distributed computing & 2.133 & 1.146 \tabularnewline%
vision and pattern recognition & 1.947 & 745 \tabularnewline%
computer vision and pattern & 1.937 & 737 \tabularnewline%
computer science and engineering & 1.917 & 1.225 \tabularnewline%
degree in electrical engineering & 1.814 & 1.220 \tabularnewline%
engineering at the university & 1.389 & 1.060 \tabularnewline%
degrees in computer science & 1.347 & 1.091 \tabularnewline%
presented in this paper & 1.280 & 917 \tabularnewline%
engineering and computer science & 1.259 & 898 \tabularnewline%
due to the fact & 1.176 & 851 \tabularnewline%
electrical engineering and computer & 1.149 & 808 \tabularnewline%
current research interests include & 1.060 & 878 \tabularnewline%
% vitae of curricula vitae & 1.024 & 564 \tabularnewline%
\end{tabular}
\end{table}

\begin{table}[ht]
\centering
\sffamily \footnotesize
\rowcolors{1}{tablerowcolorodd}{tablerowcoloreven}
\caption{Top 24 extracted formulaic speech suited patterns of length five}
\label{tab:formulaic_speech_len5_examples}
\begin{tabular}{L{10cm}cc}
\rowcolor{tableheadcolor} \color{white} Multi-term & \color{white} $tf$ & \color{white} $df$ \tabularnewline%
%multi-term example
% length 5
pattern analysis and machine intelligence & 5.378 & 1.476 \tabularnewline%
computer science from the university & 3.602 & 2.455 \tabularnewline%
paper is organized as follows & 2.005 & 2.005 \tabularnewline%
computer science at the university & 1.998 & 1.465 \tabularnewline%
computer vision and pattern recognition & 1.918 & 733 \tabularnewline%
electrical engineering and computer science & 1.055 & 756 \tabularnewline%
curricula vitae of curricula vitae & 1.024 & 564 \tabularnewline%
electrical engineering from the university & 961 & 798 \tabularnewline%
authors would like to thank & 877 & 865 \tabularnewline%
department of electrical and computer & 822 & 599 \tabularnewline%
associate professor in the department & 771 & 691 \tabularnewline%
assistant professor in the department & 740 & 654 \tabularnewline%
member of the technical staff & 627 & 505 \tabularnewline%
university of texas at austin & 624 & 425 \tabularnewline%
computer engineering from the university & 599 & 522 \tabularnewline%
vitae curricula vitae of curricula & 550 & 550 \tabularnewline%
work was supported in part & 545 & 537 \tabularnewline%
state university of new york & 540 & 382 \tabularnewline%
computer engineering at the university & 537 & 429 \tabularnewline%
associate professor of computer science & 527 & 480 \tabularnewline%
lecture notes in computer science & 521 & 357 \tabularnewline%
knowledge discovery and data mining & 500 & 256 \tabularnewline%
engineering from the indian institute & 477 & 390 \tabularnewline%
programming languages and operating systems & 470 & 269 \tabularnewline%
% university of california at berkeley & 467 & 361 \tabularnewline%
\end{tabular}
\end{table}

\begin{table}[ht]
\centering
\sffamily \footnotesize
\rowcolors{1}{tablerowcolorodd}{tablerowcoloreven}
\caption{Top 24 extracted formulaic speech suited patterns of length six}
\label{tab:formulaic_speech_len6_examples}
\begin{tabular}{L{10cm}cc}
\rowcolor{tableheadcolor} \color{white} Multi-term & \color{white} $tf$ & \color{white} $df$ \tabularnewline%
%multi-term example
% length 6
professor in the department of computer & 1.224 & 986 \tabularnewline%
department of electrical and computer engineering & 818 & 597 \tabularnewline%
vitae curricula vitae of curricula vitae & 550 & 550 \tabularnewline%
curricula vitae curricula vitae of curricula & 550 & 550 \tabularnewline%
science from the university of california & 542 & 459 \tabularnewline%
professor in the department of electrical & 525 & 457 \tabularnewline%
annals of the history of computing & 469 & 218 \tabularnewline%
department of computer science and engineering & 466 & 357 \tabularnewline%
vitae of curricula vitae of curricula & 457 & 241 \tabularnewline%
support for programming languages and operating & 453 & 259 \tabularnewline%
rest of the paper is organized & 452 & 452 \tabularnewline%
transactions on knowledge and data engineering & 358 & 257 \tabularnewline%
computer science department at the university & 356 & 305 \tabularnewline%
professor of electrical and computer engineering & 354 & 317 \tabularnewline%
rest of this paper is organized & 350 & 350 \tabularnewline%
professor in the computer science department & 347 & 309 \tabularnewline%
science from the university of illinois & 330 & 286 \tabularnewline%
research interests are in the areas & 328 & 294 \tabularnewline%
transactions on parallel and distributed systems & 312 & 263 \tabularnewline%
science at the university of california & 303 & 242 \tabularnewline%
science and engineering at the university & 296 & 233 \tabularnewline%
national institute of standards and technology & 294 & 253 \tabularnewline%
transactions on pattern analysis and machine & 290 & 197 \tabularnewline%
remainder of this paper is organized & 290 & 290 \tabularnewline%
% degree in computer science and engineering & 290 & 246 \tabularnewline%
\end{tabular}
\end{table}

\begin{table}[ht]
\centering
\sffamily \footnotesize
\rowcolors{1}{tablerowcolorodd}{tablerowcoloreven}
\caption{Top 24 extracted formulaic speech suited patterns of length seven}
\label{tab:formulaic_speech_len7_examples}
\begin{tabular}{L{10cm}cc}
\rowcolor{tableheadcolor} \color{white} Multi-term & \color{white} $tf$ & \color{white} $df$ \tabularnewline%
%multi-term example
% length 7
degree in computer science from the university & 1.139 & 857 \tabularnewline%
professor in the department of computer science & 1.001 & 833 \tabularnewline%
professor of computer science at the university & 624 & 548 \tabularnewline%
department of computer science at the university & 580 & 476 \tabularnewline%
curricula vitae curricula vitae of curricula vitae & 550 & 550 \tabularnewline%
computer science from the university of california & 515 & 434 \tabularnewline%
vitae of curricula vitae of curricula vitae & 457 & 241 \tabularnewline%
curricula vitae of curricula vitae of curricula & 457 & 241 \tabularnewline%
degrees in computer science from the university & 452 & 412 \tabularnewline%
support for programming languages and operating systems & 448 & 255 \tabularnewline%
architectural support for programming languages and operating & 441 & 258 \tabularnewline%
electrical and computer engineering at the university & 426 & 346 \tabularnewline%
associate professor in the department of computer & 417 & 391 \tabularnewline%
engineering from the indian institute of technology & 413 & 353 \tabularnewline%
assistant professor in the department of computer & 404 & 363 \tabularnewline%
degree in electrical engineering from the university & 351 & 311 \tabularnewline%
computer science from the university of illinois & 328 & 284 \tabularnewline%
computer science at the university of california & 292 & 232 \tabularnewline%
transactions on pattern analysis and machine intelligence & 288 & 196 \tabularnewline%
computer science and engineering at the university & 280 & 218 \tabularnewline%
engineering and computer science at the university & 246 & 197 \tabularnewline%
electrical and computer engineering from the university & 242 & 218 \tabularnewline%
department of electrical engineering and computer science & 242 & 166 \tabularnewline%
circling the appropriate number on the reader & 208 & 208 \tabularnewline%
% computer science from the university of maryland & 196 & 182 \tabularnewline%
\end{tabular}
\end{table}

 

 

\FloatBarrier

%----------------------------------------------------
%----------------------------------------------------
%----------------------------------------------------
\section{Extracted Acronyms}
\label{app:sec:acronyms}

\begin{table}[ht]
\centering
\sffamily \footnotesize
\rowcolors{1}{tablerowcolorodd}{tablerowcoloreven}
\caption{Top 24 extracted acronyms of length two}
\label{tab:acronyms_len2}
\begin{tabular}{llcc}
\rowcolor{tableheadcolor} \color{white} Acronym & \color{white} Full form & \color{white} $tf$ & \color{white} $df$ \tabularnewline%
%acronym full form tf df
% length 2
IP & internet protocol & 90 & 2 \tabularnewline%
IP & intellectual property & 63 & 2 \tabularnewline%
VR & virtual reality & 58 & 2 \tabularnewline%
AI & artificial intelligence & 52 & 4 \tabularnewline%
ML & maximum likelihood & 45 & 1 \tabularnewline%
PE & processing element & 43 & 3 \tabularnewline%
CA & cellular automata & 43 & 2 \tabularnewline%
CT & computed tomography & 42 & 3 \tabularnewline%
RF & radio frequency & 41 & 2 \tabularnewline%
SA & simulated annealing & 39 & 1 \tabularnewline%
GA & genetic algorithm & 33 & 1 \tabularnewline%
IR & information retrieval & 32 & 3 \tabularnewline%
EM & expectation maximization & 31 & 2 \tabularnewline%
OS & operating system & 30 & 2 \tabularnewline%
IT & information technology & 26 & 2 \tabularnewline%
DP & dynamic programming & 23 & 2 \tabularnewline%
NN & nearest neighbor & 23 & 7 \tabularnewline%
PC & program counter & 22 & 2 \tabularnewline%
MR & magnetic resonance & 22 & 4 \tabularnewline%
VE & virtual environment & 20 & 1 \tabularnewline%
MD & molecular dynamics & 19 & 1 \tabularnewline%
SC & sequential consistency & 18 & 1 \tabularnewline%
EU & european union & 18 & 1 \tabularnewline%
NI & network interface & 17 & 2 \tabularnewline%
% AR & augmented reality & 17 & 1 \tabularnewline%
\end{tabular}
\end{table}

\begin{table}[ht]
\centering
\sffamily \footnotesize
\rowcolors{1}{tablerowcolorodd}{tablerowcoloreven}
\caption{Top 24 extracted acronyms of length three}
\label{tab:acronyms_len3}
\begin{tabular}{llcc}
\rowcolor{tableheadcolor} \color{white} Acronym & \color{white} Full form & \color{white} $tf$ & \color{white} $df$ \tabularnewline%
%acronym full form tf df
% length 3
NSF & national science foundation & 207 & 16 \tabularnewline%
ATM & asynchronous transfer mode & 154 & 2 \tabularnewline%
UML & unified modeling language & 105 & 3 \tabularnewline%
RDF & resource description framework & 98 & 4 \tabularnewline%
GPS & global positioning system & 95 & 4 \tabularnewline%
PCA & principal component analysis & 90 & 1 \tabularnewline%
DAG & directed acyclic graph & 90 & 3 \tabularnewline%
API & application programming interface & 86 & 3 \tabularnewline%
MAP & maximum a posteriori & 86 & 3 \tabularnewline%
FFT & fast fourier transform & 83 & 3 \tabularnewline%
LRU & least recently used & 80 & 4 \tabularnewline%
SVD & singular value decomposition & 79 & 2 \tabularnewline%
MIT & massachusetts institute of technology & 74 & 2 \tabularnewline%
DCT & discrete cosine transform & 72 & 3 \tabularnewline%
MPI & message passing interface & 72 & 4 \tabularnewline%
IDL & interface definition language & 71 & 3 \tabularnewline%
GUI & graphical user interface & 70 & 2 \tabularnewline%
RMI & remote method invocation & 68 & 2 \tabularnewline%
CGI & common gateway interface & 67 & 2 \tabularnewline%
JVM & java virtual machine & 65 & 3 \tabularnewline%
SSL & secure sockets layer & 62 & 2 \tabularnewline%
MRI & magnetic resonance imaging & 59 & 7 \tabularnewline%
UDP & user datagram protocol & 59 & 2 \tabularnewline%
MDL & minimum description length & 58 & 3 \tabularnewline%
% SQL & structured query language & 56 & 2 \tabularnewline%
\end{tabular}
\end{table}

\begin{table}[ht]
\centering
\sffamily \footnotesize
\rowcolors{1}{tablerowcolorodd}{tablerowcoloreven}
\caption{Top 24 extracted acronyms of length four}
\label{tab:acronyms_len4}
\begin{tabular}{llcc}
\rowcolor{tableheadcolor} \color{white} Acronym & \color{white} Full form & \color{white} $tf$ & \color{white} $df$ \tabularnewline%
%acronym full form tf df
% length 4
IETF & internet engineering task force & 97 & 4 \tabularnewline%
VRML & virtual reality modeling language & 92 & 3 \tabularnewline%
VLIW & very long instruction word & 69 & 2 \tabularnewline%
SOAP & simple object access protocol & 62 & 10 \tabularnewline%
ISCA & int'l symp. computer architecture & 56 & 1 \tabularnewline%
VLSI & very large scale integration & 55 & 3 \tabularnewline%
NIST & national institute of standards and technology & 54 & 3 \tabularnewline%
SGML & standard generalized markup language & 47 & 3 \tabularnewline%
WSDL & web services description language & 43 & 7 \tabularnewline%
PSTN & public switched telephone network & 42 & 3 \tabularnewline%
SNMP & simple network management protocol & 39 & 2 \tabularnewline%
VLDB & very large data bases & 39 & 2 \tabularnewline%
PARC & palo alto research center & 39 & 3 \tabularnewline%
LFSR & linear feedback shift register & 38 & 2 \tabularnewline%
SMIL & synchronized multimedia integration language & 38 & 2 \tabularnewline%
ARPA & advanced research projects agency & 38 & 3 \tabularnewline%
PODS & principles of database systems & 38 & 3 \tabularnewline%
ATPG & automatic test pattern generation & 36 & 2 \tabularnewline%
ICDE & int'l conf. data eng. & 36 & 2 \tabularnewline%
CDMA & code division multiple access & 35 & 3 \tabularnewline%
CVPR & computer vision and pattern recognition & 35 & 9 \tabularnewline%
MTTF & mean time to failure & 35 & 2 \tabularnewline%
ANSI & american national standards institute & 34 & 3 \tabularnewline%
LDAP & lightweight directory access protocol & 34 & 2 \tabularnewline%
% FDDI & fiber distributed data interface & 33 & 2 \tabularnewline%
\end{tabular}
\end{table}

\begin{table}[ht]
\centering
\sffamily \footnotesize
\rowcolors{1}{tablerowcolorodd}{tablerowcoloreven}
\caption{Top 24 extracted acronyms of length five}
\label{tab:acronyms_len5}
\begin{tabular}{llcc}
\rowcolor{tableheadcolor} \color{white} Acronym & \color{white} Full form & \color{white} $tf$ & \color{white} $df$ \tabularnewline%
%acronym full form tf df
% length 5
DARPA & defense advanced research projects agency & 88 & 6 \tabularnewline%
CORBA & common object request broker architecture & 81 & 3 \tabularnewline%
KAIST & korea advanced institute of science and technology & 52 & 9 \tabularnewline%
CIPIC & center for image processing and integrated computing & 20 & 6 \tabularnewline%
ICDCS & int'l conf. distributed computing systems & 20 & 2 \tabularnewline%
ISSTA & int'l symp. software testing and analysis & 16 & 3 \tabularnewline%
EPSRC & engineering and physical sciences research council & 16 & 3 \tabularnewline%
TAPOS & theory and practice of object systems & 16 & 2 \tabularnewline%
ENIAC & electronic numerical integrator and computer & 16 & 2 \tabularnewline%
NSERC & natural sciences and engineering research council & 15 & 1 \tabularnewline%
RIACS & research institute for advanced computer science & 12 & 3 \tabularnewline%
ICASE & institute for computer applications in science and engineering & 11 & 5 \tabularnewline%
ICANN & internet corporation for assigned names and numbers & 11 & 2 \tabularnewline%
EDSAC & electronic delay storage automatic calculator & 10 & 2 \tabularnewline%
DARPA & defense advanced research project agency & 10 & 2 \tabularnewline%
IJCAI & int'l joint conf. artificial intelligence & 10 & 1 \tabularnewline%
ISCAS & international symposium on circuits and systems & 10 & 1 \tabularnewline%
ICDCS & international conference on distributed computing systems & 10 & 3 \tabularnewline%
TOSEM & transactions on software engineering and methodology & 10 & 3 \tabularnewline%
NPACI & national partnership for advanced computational infrastructure & 9 & 3 \tabularnewline%
AFIPS & american federation of information processing societies & 9 & 1 \tabularnewline%
PPOPP & principles and practice of parallel programming & 9 & 4 \tabularnewline%
HKUST & hong kong university of science and technology & 9 & 3 \tabularnewline%
NSERC & natural science and engineering research council & 8 & 1 \tabularnewline%
% IPDPS & int'l parallel and distributed processing symp. & 8 & 1 \tabularnewline%
\end{tabular}
\end{table}

\begin{table}[ht]
\centering
\sffamily \footnotesize
\rowcolors{1}{tablerowcolorodd}{tablerowcoloreven}
\caption{Top 24 extracted acronyms of length six}
\label{tab:acronyms_len6}
\begin{tabular}{llcc}
\rowcolor{tableheadcolor} \color{white} Acronym & \color{white} Full form & \color{white} $tf$ & \color{white} $df$ \tabularnewline%
%acronym full form tf df
% length 6
ASPLOS & architectural support for programming languages and operating systems & 25 & 3 \tabularnewline%
TAPADS & theoretical aspects of parallel and distributed systems & 11 & 5 \tabularnewline%
SIGMOD & special interest group on management of data & 10 & 1 \tabularnewline%
PECASE & presidential early career award for scientists and engineers & 9 & 3 \tabularnewline%
ICLASS & illinois computer laboratory for aerospace systems and software & 9 & 8 \tabularnewline%
DOCSIS & data over cable service interface specification & 7 & 2 \tabularnewline%
LSSDSV & large scientific and software data set visualization & 6 & 4 \tabularnewline%
UMIACS & university of maryland institute for advanced computer studies & 6 & 2 \tabularnewline%
ASPLOS & architecture support for programming languages and operating systems & 5 & 1 \tabularnewline%
CESDIS & center for excellence in space data and information sciences & 4 & 1 \tabularnewline%
DCGMRP & delay constrained group multicast routing problem & 4 & 1 \tabularnewline%
NCCUSL & national conference of commissioners on uniform state laws & 4 & 1 \tabularnewline%
TOMACS & transactions on modeling and computer systems & 3 & 3 \tabularnewline%
YUPPIE & yorktown ultra parallel polymorphic image engine & 3 & 1 \tabularnewline%
ESPRIT & european strategic programme of research in information technology & 3 & 3 \tabularnewline%
DOCSIS & data over cable system interface specification & 3 & 2 \tabularnewline%
EBCDIC & extended binary coded decimal interchange code & 3 & 1 \tabularnewline%
BARWAN & bay area research wireless access network & 3 & 1 \tabularnewline%
FMOODS & formal methods for open object-based distributed systems & 3 & 1 \tabularnewline%
PCMCIA & personal computer memory card international association & 3 & 1 \tabularnewline%
AHPCRC & army high performance computing research center & 3 & 1 \tabularnewline%
MICCAI & medical image computing and computer assisted intervention & 2 & 1 \tabularnewline%
TIPHON & telecommunications and internet protocol harmonization over networks & 2 & 1 \tabularnewline%
PECASE & presidential early career awards for scientists and engineers & 2 & 2 \tabularnewline%
% GUARDS & generic upgradable architecture for real-time dependable systems & 2 & 1 \tabularnewline%
\end{tabular}
\end{table}

\begin{table}[ht]
\centering
\sffamily \footnotesize
\rowcolors{1}{tablerowcolorodd}{tablerowcoloreven}
\caption{Top 13 extracted acronyms of length seven}
\label{tab:acronyms_len7}
\begin{tabular}{llcc}
\rowcolor{tableheadcolor} \color{white} Acronym & \color{white} Full form & \color{white} $tf$ & \color{white} $df$ \tabularnewline%
%acronym full form tf df
% length 7
EMMCVPR & energy minimization methods in computer vision and pattern recognition & 7 & 3 \tabularnewline%
WYSIWYG & what you see is what you get & 5 & 1 \tabularnewline%
SHOSLIF & self-organizing hierarchical optimal subspace learning and inference framework & 2 & 1 \tabularnewline%
YCAGWYS & you can always get what you see & 1 & 1 \tabularnewline%
ESORICS & european symposium on research in computer security & 1 & 1 \tabularnewline%
SIGCAPH & special interest group for computers and the physically handicapped & 1 & 1 \tabularnewline%
IKIWISI & i'll know it when i see it & 1 & 1 \tabularnewline%
EMERALD & event monitoring enabling responses to anomalous live disturbances & 1 & 1 \tabularnewline%
INSPASS & immigration and naturalization service passenger accelerated service system & 1 & 1 \tabularnewline%
ICIMADE & international conference on intelligent multimedia and distance education & 1 & 1 \tabularnewline%
ICANNGA & int'l conf. artificial neural networks and genetic algorithms & 1 & 1 \tabularnewline%
WYSIWYR & what you see is what you record & 1 & 1 \tabularnewline%
WYSIWYC & what you see is what you compute & 1 & 1 \tabularnewline%
\end{tabular}
\end{table}

 

 

 

 

 

 

 

\FloatBarrier

%----------------------------------------------------
%----------------------------------------------------
%----------------------------------------------------
\section{INEX Topics}
\label{app:sec:inex_topics}

\subsection{CO Topics}

\begin{table}[ht]
\centering
\sffamily \footnotesize
\rowcolors{1}{tablerowcolorodd}{tablerowcoloreven}
\caption{CO Topics used at INEX 2005}
\label{app:topics:co}
\begin{tabular}{cp{13cm}}
\rowcolor{tableheadcolor} \mcc{ID} & \mcc{Query} \tabularnewline%
%id topic
202 & ontologies case study \tabularnewline%
203 & code signing verification \tabularnewline%
204 & moldovan semantic networks \tabularnewline%
205 & marshall mcluhan \tabularnewline%
206 & problems physical limits miniaturization microprocessor \tabularnewline%
207 & DOM and SAX \tabularnewline%
208 & "Artificial Intelligence" history \tabularnewline%
209 & mining frequent pattern itemset sequence graph association \tabularnewline%
210 & +multimedia "document models" "content authoring" \tabularnewline%
211 & applications for mobile devices gps "global positioning system" \tabularnewline%
212 & HMM "hidden Markov model" equation \tabularnewline%
213 & Gibbs sampler \tabularnewline%
214 & "adaptive learning" and "interactive learning" in education \tabularnewline%
215 & Conference on Information and Knowledge Management CIKM \tabularnewline%
216 & multimedia retrieval system architecture \tabularnewline%
217 & user-centered design of web sites \tabularnewline%
218 & computer assisted composing music notes MIDI \tabularnewline%
219 & learning object granularity \tabularnewline%
220 & image annotation ontology \tabularnewline%
221 & capabilities limitations commercial speech recognition software \tabularnewline%
222 & eletronic commerce business strategies \tabularnewline%
223 & wireless ATM multimedia \tabularnewline%
224 & incomplete information database \tabularnewline%
225 & xml security \tabularnewline%
226 & corba java \tabularnewline%
227 & Adaboost Bagging "ensemble learning" \tabularnewline%
228 & "IPv6 deployment" "IPv6 support" \tabularnewline%
229 & "latent semantic anlysis" "latent semantic indexing" \tabularnewline%
230 & +brain research +"differential geometry" \tabularnewline%
231 & markov chains in graph related algorithms \tabularnewline%
232 & Dempster Shafer theory Database experiment \tabularnewline%
233 & Synthesizers for music creation \tabularnewline%
234 & "call for papers" conference workshop +multimedia \tabularnewline%
235 & "Central Intelligence Agency" "Federal Bureau of Investigation" personal privacy surveillance concerns +Carnivore \tabularnewline%
236 & machine translation approaches -programming \tabularnewline%
237 & "Natural Language Processing" techniques "Artificial Intelligence" "Intelligent Information Retrieval" +"Medical Informatics" \tabularnewline%
238 & neural network algorithm for chess \tabularnewline%
239 & quantum computation \tabularnewline%
240 & Software quality control and measurement \tabularnewline%
241 & Single sign on + LDAP \tabularnewline%
\end{tabular}
\end{table}

\FloatBarrier

 

\newpage
\subsection{COS Topics}

\begin{table}[ht]
\centering
\sffamily \footnotesize
\rowcolors{1}{tablerowcolorodd}{tablerowcoloreven}
\caption{COS Topics used at INEX 2005}
\label{app:topics:cos}
\begin{tabular}{cp{13cm}}
\rowcolor{tableheadcolor} \mcc{ID} & \mcc{Query} \tabularnewline%
%id topic
202 & //article[about(., ontologies)]//sec[about(., ontologies case study)] \tabularnewline%
203 & //sec[about(., code signing verification)] \tabularnewline%
204 & //*[about(.//au, moldovan) and about(., "semantic networks")] \tabularnewline%
205 & //bdy//*[about(., "Marshall McLuhan")] \tabularnewline%
207 & //*[about(., "DOM and SAX")] \tabularnewline%
208 & //article[about(., "Artificial Intelligence" history)] \tabularnewline%
210 & //article//(abs|sec)[about(.,+multimedia "document models" "content authoring")] \tabularnewline%
211 & //article//sec[about(.//p, applications mobile devices gps "global positioning system")] \tabularnewline%
212 & //*[(about(., HMM equation) OR about(., "hidden Markov model" equation)) AND .//en > 0] \tabularnewline%
216 & //sec[about(., multimedia retrieval system architecture) or about(.//fig, multimedia retrieval architecture)] \tabularnewline%
219 & //sec[about(., learning object granularity)] \tabularnewline%
220 & //article[about(., image retrieval)]//sec[about(., annotation ontology)] \tabularnewline%
222 & //article[about(. , bussiness strategies)]//sec[about(. , eletronic commerce e-commerce)] \tabularnewline%
223 & //article[about(.//sec, wireless ATM multimedia)] \tabularnewline%
224 & //article[about(.//bb, Lipski)]//*[about(., incomplete information database)] \tabularnewline%
225 & //*[about(.//p, xml security)] \tabularnewline%
226 & //*[about(.//sec, corba java)] \tabularnewline%
228 & //article[about(.//abs, IPv6)]//sec[about(., "IPv6 deployment") or about(., "IPv6 support")] \tabularnewline%
229 & //article[about(.//bdy,"latent semantic analysis" "latent semantic indexing")] \tabularnewline%
230 & //article//sec[about(.,brain research "differential geometry")] \tabularnewline%
231 & //article//sec[about(.,+"markov chains" +algorithm +graphs)] \tabularnewline%
232 & //article[about(.//abs, Dempster-Shafer theory)]//sec[about(., Dempster Shafer database experiment)] \tabularnewline%
233 & //article[about (.//bdy, synthesizers) and about (.//bdy, music)] \tabularnewline%
234 & //article[about(.//atl,"upcoming events") OR about(.//atl,"call for papers")]//sec[about(., +multimedia conference workshop)] \tabularnewline%
236 & //article[about(., machine translation approaches -programming)] \tabularnewline%
238 & //article[about(.//bdy, "artificial intelligence") and .//yr<=2000]//bdy[about(., chess) and about(., algorithm)] \tabularnewline%
239 & //article[about(.//bdy//sec, quantum computation) and (.//yr=2000 or .//yr=2001) and about(.//(atl|abs|kwd), - mechanics)] \tabularnewline%
240 & //article[about(.//(abs|kwd),quality control measure)]//sec[about(.//p,software quality)] \tabularnewline%
\end{tabular}
\end{table}

\FloatBarrier

 

\newpage
\subsection{CAS Topics}

\vspace{-0.1cm}
\begin{table}[ht]
\centering
\sffamily \footnotesize
\rowcolors{1}{tablerowcolorodd}{tablerowcoloreven}
\caption{CAS Topics used at INEX 2005}
\label{app:topics:cas}
\begin{tabular}{cp{13cm}}
\rowcolor{tableheadcolor} \mcc{ID} & \mcc{Query} \tabularnewline%
%id topic
242 & //article//sec[about(., web personalization approaches)] \tabularnewline%
243 & //article//bb[about(., Schafer Anand Mulvenna Riecken)] \tabularnewline%
244 & //article[about (.//fm, "query optimization")]//sec[about (., "join query optimization")] \tabularnewline%
245 & //article//fm[about(., "query optimization")] \tabularnewline%
246 & //article//sec[about (., "join query optimization")] \tabularnewline%
247 & //article[about(.//abs,clustering) or about(.//tig,clustering)]//sec[about(.,evaluation measure)] \tabularnewline%
248 & //article//abs[about(.,clustering)] \tabularnewline%
249 & //article//tig[about(.,clustering)] \tabularnewline%
250 & //article//sec[about(.//p, web retrieval) and about(.//p, link analysis)] \tabularnewline%
251 & //article//sec//p[about(., web retrieval)] \tabularnewline%
252 & //article//sec//p[about(., link analysis)] \tabularnewline%
253 & //article[about(.//abs,evaluation "usability experiment" "digital libraries")]//sec[about(., evaluation methodology measures "usability testing")] \tabularnewline%
254 & //article//abs[about(.,evaluation "usability experiment" "digital libraries")] \tabularnewline%
255 & //article//sec[about(., evaluation methodology measures "usability testing")] \tabularnewline%
256 & //article[about(.//p,"data embedding")]//p[about(.,watermarking)] \tabularnewline%
257 & //sec[about(.,free public licenses gnu Linux "open source")] \tabularnewline%
258 & //article[about(.,intellectual property)]//sec[about(., copyright law)] \tabularnewline%
259 & //article[about(.,intellectual property)] \tabularnewline%
260 & //bdy//*[about(., model checking state space explosion)] \tabularnewline%
261 & //article[about(., gesture recognition)]//sec[about(., application HMM "hidden Markov model")] \tabularnewline%
262 & //article[about(., gesture recognition)] \tabularnewline%
263 & //article//sec[about(., application HMM "hidden Markov model")] \tabularnewline%
264 & //article[about(., "machine learning") AND about(.//sec, "mutual information criterion")] \tabularnewline%
265 & //article[about(.//fm//atl, "digital libraries")]//sec[about(.,"information retrieval")] \tabularnewline%
266 & //article//bdy[about (., thread implementation)] \tabularnewline%
267 & //article//fm//atl[about(., "digital libraries")] \tabularnewline%
268 & //article//sec[about(., "information retrieval")] \tabularnewline%
269 & //article[about(.,interconnected networks)]//p[about(., Crossbar networks)] \tabularnewline%
270 & //article//sec[about( ., introduction information retrieval)] \tabularnewline%
271 & //article//p[about(.,watermarking)] \tabularnewline%
272 & //article//p[about(.,embedding data)] \tabularnewline%
273 & //article//sec[about(., "frequent itemsets")] \tabularnewline%
274 & //article//abs[about(., "data mining")] \tabularnewline%
275 & //article[about(.//abs, "data mining")]//sec[about(., "frequent itemsets")] \tabularnewline%
276 & //article//sec[about(.,evaluation measure)] \tabularnewline%
277 & //article//bb[about(., Baeza-Yates)] \tabularnewline%
278 & //sec[about(. , string matching)] \tabularnewline%
279 & //sec[about(.,approximate algorithm)] \tabularnewline%
280 & //article[ about(.//bb, Baeza-Yates) and about(.//sec , string matching)]//sec[about(., approximate algorithm)] \tabularnewline%
281 & //article//sec[about(., copyright law)] \tabularnewline%
282 & //article[about(., "machine learning")] \tabularnewline%
283 & //article//sec[about(., "mutual information criterion")] \tabularnewline%
284 & //article[about (.//bdy, thread implementation) and about (.//bdy, operating system)] \tabularnewline%
285 & //article//bdy[about(., operating system)] \tabularnewline%
286 & //article[about(.,interconnected networks)] \tabularnewline%
287 & //article//p[about(., Crossbar networks)] \tabularnewline%
288 & //article[about(.//bb, Schafer Anand Mulvenna Riecken)]//sec[about(., web personalization approaches)] \tabularnewline%
\end{tabular}
\end{table}

\FloatBarrier

 

%----------------------------------------------------
%----------------------------------------------------
%----------------------------------------------------
\newpage
\section{Evaluation Results - $nxCG$ Performance of INEX Topics}
\label{app:sec:inex_eval}

This chapter provides the complete set of performance figures of
X-DOSE. Due to readability and limited space, discussions in the
evaluation chapter concentrated on the strict $nxCG$ metric. Here,
the $nxCG$ results of all three quantization functions, gen, strict,
and genLifted, are given.

 

\subsection{Experiment I - Single-Term Index Performance}

The colors in Figure~\ref{app:fig:exp1_1} refer to ST01 (blue), ST02
(purple), ST03 (green), ST04 (orange), and ST05 (cyan).
%
\begin{figure}[ht]
\centering
\subfloat[gen $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp1_1_tokenizer_gen}}
\subfloat[strict $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp1_1_tokenizer_strict}}
\subfloat[genLifted $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp1_1_tokenizer_genLifted}}
\caption{Tokenizer performance}
\label{app:fig:exp1_1}
\end{figure}

The colors in Figure~\ref{app:fig:exp1_2} refer to ST03 (blue), ST07
(purple), and ST09 (green).
%
\begin{figure}[ht]
\centering
\subfloat[gen $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp1_2_tagger_gen}}
\subfloat[strict $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp1_2_tagger_strict}}
\subfloat[genLifted $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp1_2_tagger_genLifted}}
\caption{Tagger performance}
\label{app:fig:exp1_2}
\end{figure}

\newpage
The colors in Figure~\ref{app:fig:exp1_3} refer to ST06 (blue), ST07
(purple), and ST09 (green).
%
\begin{figure}[ht]
\centering
\subfloat[gen $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp1_3_extractor_gen}}
\subfloat[strict $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp1_3_extractor_strict}}
\subfloat[genLifted $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp1_3_extractor_genLifted}}
\caption{Extractor performance}
\label{app:fig:exp1_3}
\end{figure}

The colors in Figure~\ref{app:fig:exp1_4} refer to ST07 (blue) and
ST08 (purple).
%
\begin{figure}[ht]
\centering
\subfloat[gen $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp1_4_stemmer_gen}}
\subfloat[strict $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp1_4_stemmer_strict}}
\subfloat[genLifted $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp1_4_stemmer_genLifted}}
\caption{Stemmer performance}
\label{app:fig:exp1_4}
\end{figure}

The colors in Figure~\ref{app:fig:exp1_5} refer to ST05 (blue), ST06
(purple), ST09 (green), ST10 (orange), ST11 (cyan), and ST12 (red).
%
\begin{figure}[ht]
\centering
\subfloat[gen $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp1_5_stoplist_gen}}
\subfloat[strict $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp1_5_stoplist_strict}}
\subfloat[genLifted $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp1_5_stoplist_genLifted}}
\caption{Stopword filtering performance}
\label{app:fig:exp1_5}
\end{figure}

 

 

\newpage
\subsection{Experiment II - Multi-Term Index Performance}
\label{app:sec:inex_eval_exp2}

The colors in Figure~\ref{app:fig:exp2} refer to MT01 (blue), MT02
(purple), MT03 (green), MT04 (orange), and MT (cyan).
%
\begin{figure}[ht]
\centering
\subfloat[gen $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp2_multiterms_gen}}
\subfloat[strict $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp2_multiterms_strict}}
\subfloat[genLifted $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp2_multiterms_genLifted}}
\caption{Multi-term index performance}
\label{app:fig:exp2}
\end{figure}

 

 

\subsection{Experiment III - Combined Single-Term and Multi-Term Index Performance}

The colors in Figure~\ref{app:fig:exp3} refer to ST09 (blue), MT
(purple), and TOP (green).
%
\begin{figure}[ht]
\centering
\subfloat[gen $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp3_combination_gen}}
\subfloat[strict $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp3_combination_strict}}
\subfloat[genLifted $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp3_combination_genLifted}}
\caption{Combined single-term and multi-term index performance}
\label{app:fig:exp3}
\end{figure}

 

 

\newpage
\subsection{Experiment IV - Content and Structure}

The colors in Figure~\ref{app:fig:exp4_1} refer to CO.Thorough
(blue) and CO.Focused (purple).
%
\begin{figure}[ht]
\centering
\subfloat[gen $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp4_CO_gen}}
\subfloat[strict $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp4_CO_strict}}
\subfloat[genLifted $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp4_CO_genLifted}}
\caption{Performance of CO topics}
\label{app:fig:exp4_1}
\end{figure}

The colors in Figure~\ref{app:fig:exp4_2} refer to COS.Thorough
(blue) and COS.Focused (purple).
%
\begin{figure}[ht]
\centering
\subfloat[gen $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp4_COS_gen}}
\subfloat[strict $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp4_COS_strict}}
\subfloat[genLifted $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp4_COS_genLifted}}
\caption{Performance of COS topics}
\label{app:fig:exp4_2}
\end{figure}

The color in Figure~\ref{app:fig:exp4_3} refers to SSCAS (blue).
%
\begin{figure}[ht]
\centering
\subfloat[gen $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp4_CAS_gen}}
\subfloat[strict $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp4_CAS_strict}}
\subfloat[genLifted $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp4_CAS_genLifted}}
\caption{Performance of SSCAS topics}
\label{app:fig:exp4_3}
\end{figure}

 

%\subsection{Experiment V - Static Term Space versus Dynamic Term Spaces}

 

\newpage
\subsection{Experiment VI - The Effect of Content Importance $ci$}

In the Figures~\ref{app:fig:exp6_1} to~\ref{app:fig:exp6_5}, colors
are used to decode different $ci$ values of 0,0 (blue), 0,2
(purple), 0,5 (green), 0,8 (orange), and 1,0 (cyan).
%
\begin{figure}[ht]
\centering
\subfloat[gen $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp6_ci_CO_Thorough_gen}}
\subfloat[strict $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp6_ci_CO_Thorough_strict}}
\subfloat[genLifted $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp6_ci_CO_Thorough_genLifted}}
\caption{CO.Thorough performance of $ci$}
\label{app:fig:exp6_1}
\end{figure}

\begin{figure}[ht]
\centering
\subfloat[gen $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp6_ci_CO_Focused_gen}}
\subfloat[strict $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp6_ci_CO_Focused_strict}}
\subfloat[genLifted $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp6_ci_CO_Focused_genLifted}}
\caption{CO.Focused performance of $ci$}
\label{app:fig:exp6_2}
\end{figure}

\begin{figure}[ht]
\centering
\subfloat[gen $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp6_ci_COS_Thorough_gen}}
\subfloat[strict $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp6_ci_COS_Thorough_strict}}
\subfloat[genLifted $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp6_ci_COS_Thorough_genLifted}}
\caption{COS.Thorough performance of $ci$}
\label{app:fig:exp6_3}
\end{figure}

\begin{figure}[ht]
\centering
\subfloat[gen $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp6_ci_COS_Focused_gen}}
\subfloat[strict $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp6_ci_COS_Focused_strict}}
\subfloat[genLifted $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp6_ci_COS_Focused_genLifted}}
\caption{COS.Focused performance of $ci$}
\label{app:fig:exp6_4}
\end{figure}

\begin{figure}[ht]
\centering
\subfloat[gen $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp6_ci_SSCAS_gen}}
\subfloat[strict $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp6_ci_SSCAS_strict}}
\subfloat[genLifted $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp6_ci_SSCAS_genLifted}}
\caption{SSCAS performance of $ci$}
\label{app:fig:exp6_5}
\end{figure}

\FloatBarrier

 

\newpage
\subsection{Experiment VII - The Effect of the Generality Factor $gf$}

In the Figures~\ref{app:fig:exp7_1} to~\ref{app:fig:exp7_3}, colors
are used to decode different $gf$ values of 0,0 (blue), 0,2
(purple), 0,5 (green), 0,8 (orange), and 1,0 (cyan).
%
\begin{figure}[ht]
\centering
\subfloat[gen $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp7_gf_COS_Thorough_gen}}
\subfloat[strict $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp7_gf_COS_Thorough_strict}}
\subfloat[genLifted $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp7_gf_COS_Thorough_genLifted}}
\caption{COS.Thorough performance of $gf$}
\label{app:fig:exp7_1}
\end{figure}

\begin{figure}[ht]
\centering
\subfloat[gen $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp7_gf_COS_Focused_gen}}
\subfloat[strict $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp7_gf_COS_Focused_strict}}
\subfloat[genLifted $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp7_gf_COS_Focused_genLifted}}
\caption{COS.Focused performance of $gf$}
\label{app:fig:exp7_2}
\end{figure}

\begin{figure}[ht]
\centering
\subfloat[gen $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp7_gf_SSCAS_gen}}
\subfloat[strict $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp7_gf_SSCAS_strict}}
\subfloat[genLifted $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp7_gf_SSCAS_genLifted}}
\caption{SSCAS performance of $gf$}
\label{app:fig:exp7_3}
\end{figure}

\FloatBarrier

 

\newpage
\subsection{Experiment VIII - INEX 2005 Comparison}

In the Figures~\ref{app:fig:exp8_1} to~\ref{app:fig:exp8_5}, colors
are used to decode the performance of X-DOSE'09 (blue), X-DOSE'05
(purple), and other INEX'05 systems.
%
\begin{figure}[ht]
\centering
\subfloat[gen $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp8_CO_Thorough_gen}}
\subfloat[strict $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp8_CO_Thorough_strict}}
\subfloat[genLifted $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp8_CO_Thorough_genLifted}}
\caption{CO.Thorough performance at INEX 2005}
\label{app:fig:exp8_1}
\end{figure}

\begin{figure}[ht]
\centering
\subfloat[gen $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp8_CO_Focused_gen}}
\subfloat[strict $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp8_CO_Focused_strict}}
\subfloat[genLifted $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp8_CO_Focused_genLifted}}
\caption{CO.Focused performance at INEX 2005}
\label{app:fig:exp8_2}
\end{figure}

\begin{figure}[ht]
\centering
\subfloat[gen $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp8_COS_Thorough_gen}}
\subfloat[strict $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp8_COS_Thorough_strict}}
\subfloat[genLifted $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp8_COS_Thorough_genLifted}}
\caption{COS.Thorough performance at INEX 2005}
\label{app:fig:exp8_3}
\end{figure}

\begin{figure}[ht]
\centering
\subfloat[gen $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp8_COS_Focused_gen}}
\subfloat[strict $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp8_COS_Focused_strict}}
\subfloat[genLifted $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp8_COS_Focused_genLifted}}
\caption{COS.Focused performance at INEX 2005}
\label{app:fig:exp8_4}
\end{figure}

\begin{figure}[ht]
\centering
\subfloat[gen $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp8_SSCAS_gen}}
\subfloat[strict $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp8_SSCAS_strict}}
\subfloat[genLifted $nxCG$]{\includegraphics[width=0.33\textwidth]{10_evaluation/figures/exp8_SSCAS_genLifted}}
\caption{SSCAS performance at INEX 2005}
\label{app:fig:exp8_5}
\end{figure}