<!-- 
RSS generated by JIRA (1001.0.0-SNAPSHOT#100246-sha1:7a5c50119eb0633d306e14180817ddef5e80c75d) at Thu Feb 08 23:10:38 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary add field=key&field=summary to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>FOLIO Jira</title>
    <link>https://folio-org.atlassian.net</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>1001.0.0-SNAPSHOT</version>
        <build-number>100246</build-number>
        <build-date>07-02-2024</build-date>
    </build-info>

<item>
            <title>[UISE-70] Codex search results are taking Nonfiling characters into account when sorting</title>
                <link>https://folio-org.atlassian.net/browse/UISE-70</link>
                <project id="10191" key="UISE">ui-search</project>
                    <description>&lt;p&gt;&lt;b&gt;Overview:&lt;/b&gt; When conducting title level searches in Codex, The sort algorithm does seem to take definite article  and other Nonfiling characters into consideration. This seems to be true for both Swedish and English. &lt;/p&gt;

&lt;p&gt;&lt;b&gt;Steps to Reproduce:&lt;/b&gt;&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;Create a couple of records in Inventory with titles starting on a, &#229;, &#228; or similar&lt;br/&gt;
For example:&lt;br/&gt;
&quot;Den aktansv&#228;rda&quot;&lt;br/&gt;
&quot;Den &#228;kta varan&quot;&lt;br/&gt;
&quot;Den &#229;l&#228;ndska sk&#228;rg&#229;rden&quot;&lt;br/&gt;
&quot;The &#197;land archipelago&quot;&lt;br/&gt;
&quot;&#197;l&#246;ndska sk&#228;rg&#229;rden&quot;&lt;br/&gt;
&quot;The Aland archipelago&quot;&lt;/li&gt;
&lt;/ul&gt;


&lt;ul&gt;
	&lt;li&gt;Go to Codex and conduct a title search for &lt;em&gt;&#229;land&lt;/em&gt;&lt;/li&gt;
	&lt;li&gt;Sort the results on title in ascending order (arrow pointing up)&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;&lt;b&gt;Expected Results:&lt;/b&gt; &lt;br/&gt;
Search results sorted in the following order:&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;The Aland archipelago (&quot;The &quot; should be disregarded)&lt;/li&gt;
	&lt;li&gt;Northern Territories, Asia-Pacific Regional Conflicts and the A&#778;land...&lt;/li&gt;
	&lt;li&gt;A User&#8217;s Guide to the Nestle-Aland 28 Greek New Testament (&quot;A &quot; should be  disregarded)&lt;/li&gt;
	&lt;li&gt;Ware Conterfeyhung eines abscheulichen Aland Fisches...&lt;/li&gt;
	&lt;li&gt;The &#197;land archipelago (&quot;The &quot; should be disregarded)&lt;/li&gt;
	&lt;li&gt;Den &#229;l&#228;ndska sk&#228;rg&#229;rden (&quot;Den &quot; should be disregarded since it is a Swedish definite article)&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;Note: Not all of these results (the result items themselves) are not expected to emerge. Disregard from that, the point is that the nonfiling charachters has been taken into account in the sort.&lt;/p&gt;


&lt;p&gt;&lt;b&gt;Actual Results:&lt;/b&gt; &lt;br/&gt;
See attached image&lt;/p&gt;</description>
                <environment></environment>
        <key id="74813">UISE-70</key>
            <summary>Codex search results are taking Nonfiling characters into account when sorting</summary>
                <type id="10001" iconUrl="https://folio-org.atlassian.net/rest/api/2/universal_avatar/view/type/issuetype/avatar/10303?size=medium">Bug</type>
                                            <priority id="10003" iconUrl="https://dev.folio.org/assets/jira-priority/jira-p4.svg">P4</priority>
                        <status id="6" iconUrl="https://folio-org.atlassian.net/images/icons/statuses/closed.png" description="The issue is considered finished, the resolution is correct. Issues which are closed can be reopened.">Closed</status>
                    <statusCategory id="3" key="done" colorName="green"/>
                                    <resolution id="10000">Won&apos;t Do</resolution>
                                                        <assignee accountid="-1">Unassigned</assignee>
                                                                <reporter accountid="712020:0a02d059-b8b9-48b3-8a58-727ec44d05d2">Theodor Tolstoy (One-Group.se)</reporter>
                                    <labels>
                            <label>chalmers</label>
                            <label>front-end</label>
                            <label>keep-bug</label>
                            <label>triaged</label>
                            <label>ui-only</label>
                    </labels>
                <created>Tue, 20 Feb 2018 18:00:49 +0000</created>
                <updated>Tue, 21 Dec 2021 18:53:08 +0000</updated>
                            <resolved>Tue, 21 Dec 2021 18:53:08 +0000</resolved>
                                                                        <due></due>
                            <votes>0</votes>
                                    <watches>4</watches>
                                                    <timespent seconds="3600">1 hour</timespent>
                                <comments>
                                                            <comment id="180823" author="5af5ed55244bc90a106063c7" created="Wed, 21 Feb 2018 08:58:28 +0000"  >&lt;p&gt;Tagging &lt;a href=&quot;https://folio-org.atlassian.net/secure/ViewProfile.jspa?accountId=557058%3Af80403de-e149-421e-8750-af45c853402f&quot; class=&quot;user-hover&quot; rel=&quot;557058:f80403de-e149-421e-8750-af45c853402f&quot; data-account-id=&quot;557058:f80403de-e149-421e-8750-af45c853402f&quot; accountid=&quot;557058:f80403de-e149-421e-8750-af45c853402f&quot; rel=&quot;noreferrer&quot;&gt;Charlotte Whitt&lt;/a&gt; for awareness.&lt;/p&gt;</comment>
                                                            <comment id="180825" author="5bffed52a1b46046f530c8f7" created="Wed, 21 Feb 2018 11:42:10 +0000"  >&lt;p&gt;I can&apos;t come up with a rationale for why we might expect the &quot;Expected&quot; sort order. Surely &quot;Northern Territories, Asia-Pacific Regional Conflicts and the&quot; should not all be discarded so that the record sorts by &quot;A&#778;land&quot;?&lt;/p&gt;

&lt;p&gt;What am I missing?&lt;/p&gt;</comment>
                                                            <comment id="180827" author="712020:0a02d059-b8b9-48b3-8a58-727ec44d05d2" created="Wed, 21 Feb 2018 12:37:33 +0000"  >&lt;p&gt;&lt;a href=&quot;https://folio-org.atlassian.net/secure/ViewProfile.jspa?accountId=5bffed52a1b46046f530c8f7&quot; class=&quot;user-hover&quot; rel=&quot;5bffed52a1b46046f530c8f7&quot; data-account-id=&quot;5bffed52a1b46046f530c8f7&quot; accountid=&quot;5bffed52a1b46046f530c8f7&quot; rel=&quot;noreferrer&quot;&gt;Mike Taylor&lt;/a&gt;I added an explanation to the results in the expected example&lt;/p&gt;

&lt;p&gt;I am not sure that this is the way we ant FOLIO to deal with Nonfiling characters, but I am pretty sure we must have a discussion on it since it emerged out of the initial impressions i received when visiting Chalmers, and since it is a thing in current systems.&lt;/p&gt;</comment>
                                                            <comment id="180829" author="712020:0a02d059-b8b9-48b3-8a58-727ec44d05d2" created="Wed, 21 Feb 2018 12:56:15 +0000"  >&lt;p&gt;I added a screenshot from inside Sierra on a list of search results sorted alphabetically showcasing how it does not take the &quot;den&quot;  definite article into account.&lt;/p&gt;</comment>
                                                            <comment id="180830" author="5bffed52a1b46046f530c8f7" created="Wed, 21 Feb 2018 13:28:14 +0000"  >&lt;p&gt;Thanks for that explanation &amp;#8211; of course, it makes perfect sense.&lt;/p&gt;

&lt;p&gt;So just to clarify: the term &apos;nonfiling characters&quot; actually refers to &lt;em&gt;words&lt;/em&gt;, such as &quot;the&quot; and &quot;den&quot;, rather than to characters? I guess this is just one of those things where the standard term for the concept is wrong, but we&apos;re stuck with it.&lt;/p&gt;

&lt;p&gt;Here&apos;s another multilingual problem. We can&apos;t just strip &quot;den&quot; from the start of titles for sorting purposes, because then English titles like &quot;Den of Thieves&quot; will be sorted wrongly. So what is the &lt;em&gt;desired&lt;/em&gt; functionality? (Once we figure that out, we can start to think about whether it can actually be implemented.)&lt;/p&gt;</comment>
                                                            <comment id="180832" author="712020:0a02d059-b8b9-48b3-8a58-727ec44d05d2" created="Wed, 21 Feb 2018 13:38:53 +0000"  >&lt;p&gt;For MARC21, this is being handled.(as far as I know).&lt;br/&gt;
Search for &quot;nonfiling characters&quot; on the middle of this page: &lt;a href=&quot;https://www.oclc.org/bibformats/en/2xx/245.html&quot; class=&quot;external-link&quot; rel=&quot;nofollow noreferrer&quot;&gt;https://www.oclc.org/bibformats/en/2xx/245.html&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Someone with more up-to-date knowledge should have a look into this.&lt;/p&gt;
</comment>
                                                            <comment id="180834" author="5bffed52a1b46046f530c8f7" created="Wed, 21 Feb 2018 13:45:33 +0000"  >&lt;p&gt;Yes, that&apos;s a good approach &amp;#8211; the 2nd indicator on the 345 field explicitly states how many leading characters to skip. But the Codex sources will not in general have that information.&lt;/p&gt;</comment>
                                                            <comment id="180835" author="712020:0a02d059-b8b9-48b3-8a58-727ec44d05d2" created="Wed, 21 Feb 2018 21:31:53 +0000"  >&lt;p&gt;That is true, but Ii think there are more automatic approaches that could be used today that are more efficient. I think for example Solr and Elasticsearch could be taught to handle this. &lt;/p&gt;</comment>
                                                            <comment id="180836" author="5bffed52a1b46046f530c8f7" created="Wed, 21 Feb 2018 22:16:38 +0000"  >&lt;p&gt;I hope you&apos;re right &amp;#8211; but (A) mod-codex-ekb is not using either of these; (B) neither is mod-codex-inventory, it&apos;s using the RMB-mediated access to PostgreSQL; and (C) in any case, this can&apos;t be done correctly without knowing the language of each record &amp;#8211; otherwise we get the &quot;Den of Thieves&quot; problem I mentioned above.&lt;/p&gt;</comment>
                                                            <comment id="180837" author="712020:0a02d059-b8b9-48b3-8a58-727ec44d05d2" created="Wed, 21 Feb 2018 22:25:14 +0000"  >&lt;p&gt;That is true, but i thing you can come a long way using automated approaches. &lt;/p&gt;

&lt;p&gt;Maybe this is not the best place to ask this question, but why is  there not a Search engine in Codex?&lt;/p&gt;</comment>
                                                            <comment id="180838" author="5bffed52a1b46046f530c8f7" created="Wed, 21 Feb 2018 22:31:32 +0000"  >&lt;p&gt;There are basically two approaches to searching multiple sources at once.&lt;br/&gt;
1. Harvest everything into one big database and search that.&lt;br/&gt;
2. Search in real time and merge the results.&lt;/p&gt;

&lt;p&gt;There are advantages and disadvantages to each approach. #1 needs more up-front effort and more sysadmin, but yields faster and more consistent results. This is what Summon does. #2 is more lightweight, but slower and dependent on the capability of the sources.&lt;/p&gt;

&lt;p&gt;The Codex is a type-2 solution.&lt;/p&gt;

&lt;p&gt;We would perhaps like to do a type-1 solution, but the fundamental problem is that we can&apos;t in general harvest all the things we want. For example, the EBSCO KB is proprietary and not available for harvesting. So for now at least, this is a non-starter.&lt;/p&gt;</comment>
                                                            <comment id="180839" author="557058:b8e64633-1f7c-402d-9caf-9959a5ba5d0d" created="Tue, 21 Aug 2018 09:25:04 +0000"  >&lt;p&gt;&lt;a href=&quot;https://folio-org.atlassian.net/secure/ViewProfile.jspa?accountId=712020%3A0a02d059-b8b9-48b3-8a58-727ec44d05d2&quot; class=&quot;user-hover&quot; rel=&quot;712020:0a02d059-b8b9-48b3-8a58-727ec44d05d2&quot; data-account-id=&quot;712020:0a02d059-b8b9-48b3-8a58-727ec44d05d2&quot; accountid=&quot;712020:0a02d059-b8b9-48b3-8a58-727ec44d05d2&quot; rel=&quot;noreferrer&quot;&gt;Theodor Tolstoy (One-Group.se)&lt;/a&gt; &lt;a href=&quot;https://folio-org.atlassian.net/secure/ViewProfile.jspa?accountId=5bffed52a1b46046f530c8f7&quot; class=&quot;user-hover&quot; rel=&quot;5bffed52a1b46046f530c8f7&quot; data-account-id=&quot;5bffed52a1b46046f530c8f7&quot; accountid=&quot;5bffed52a1b46046f530c8f7&quot; rel=&quot;noreferrer&quot;&gt;Mike Taylor&lt;/a&gt; guys, I&apos;d like to make sure we are clear about the scope of what can (and will) be done vs what is outside of Core Team conrol. I suggest particular issue in two stages:&lt;/p&gt;

&lt;p&gt;1. Stage 1: address sort and search issues in Inventory (and other modules that index data locally in FOLIO), relevant issues here are 
    &lt;span class=&quot;jira-issue-macro resolved&quot; data-jira-key=&quot;FOLIO-1246&quot; &gt;
                &lt;a href=&quot;https://folio-org.atlassian.net/browse/FOLIO-1246&quot; class=&quot;jira-issue-macro-key issue-link&quot;  title=&quot;Implement Postgres Full Text Search functionality&quot; &gt;
            &lt;img class=&quot;icon&quot; src=&quot;https://folio-org.atlassian.net/rest/api/2/universal_avatar/view/type/issuetype/avatar/10307?size=medium&quot; /&gt;
            FOLIO-1246
        &lt;/a&gt;
                                                    &lt;span class=&quot;aui-lozenge aui-lozenge-subtle aui-lozenge-success jira-macro-single-issue-export-pdf&quot;&gt;Closed&lt;/span&gt;
            &lt;/span&gt;
 (which is an umbrella for more powerful search functionality including ranking, stropwords etc) and 
    &lt;span class=&quot;jira-issue-macro&quot; data-jira-key=&quot;MODINVSTOR-148&quot; &gt;
                &lt;a href=&quot;https://folio-org.atlassian.net/browse/MODINVSTOR-148&quot; class=&quot;jira-issue-macro-key issue-link&quot;  title=&quot;sort according to tenant&amp;#39;s locale&quot; &gt;
            &lt;img class=&quot;icon&quot; src=&quot;https://folio-org.atlassian.net/rest/api/2/universal_avatar/view/type/issuetype/avatar/10318?size=medium&quot; /&gt;
            MODINVSTOR-148
        &lt;/a&gt;
                                                    &lt;span class=&quot;aui-lozenge aui-lozenge-subtle aui-lozenge-complete jira-macro-single-issue-export-pdf&quot;&gt;Open&lt;/span&gt;
            &lt;/span&gt;
 (which is about ensuring that tenant locale is used for driving the DB collation setting and will address locale-specific sorting issues)&lt;/p&gt;

&lt;p&gt;2. Stage 2: address sort and search issues in Codex Search app, here we are generally limited by the quality of results from the upstream sources, one of which we control directly (Inventory) whlle for the other (EBSCO KB) we can request certain tuning.&lt;/p&gt;</comment>
                                                            <comment id="180840" author="5bffed52a1b46046f530c8f7" created="Tue, 21 Aug 2018 14:09:21 +0000"  >&lt;p&gt;Strongly agree. These conversations got into a lot of unnecessary complexity by trying to solve the difficult case of the Codex before having solved the (relatively!) easy case of the local inventory.&lt;/p&gt;</comment>
                                                            <comment id="180841" author="557058:a957226f-df85-4fc8-97f4-8b27a26029ed" created="Tue, 21 Dec 2021 18:53:08 +0000"  >&lt;p&gt;This ticket has been closed because it is over 3 years old and has a very low priority.  &lt;/p&gt;</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10000">
                    <name>Blocks</name>
                                                                <inwardlinks description="is blocked by">
                                        <issuelink>
            <issuekey id="80701">FOLIO-1281</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                            <issuelinktype id="10003">
                    <name>Relates</name>
                                                                <inwardlinks description="relates to">
                                        <issuelink>
            <issuekey id="10074">UXPROD-745</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="74804">UISE-68</issuekey>
        </issuelink>
            <issuelink>
            <issuekey id="74807">UISE-69</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                            <attachment id="61785" name="Capture.PNG" size="104231" author="712020:0a02d059-b8b9-48b3-8a58-727ec44d05d2" created="Wed, 21 Feb 2018 12:53:51 +0000"/>
                            <attachment id="61786" name="Capture.PNG" size="31152" author="712020:0a02d059-b8b9-48b3-8a58-727ec44d05d2" created="Tue, 20 Feb 2018 17:59:50 +0000"/>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                <customfield id="customfield_10000" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummarycf">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10057" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Development Team</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10171"><![CDATA[Prokopovych]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10019" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>0|i00wxq:y</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    <customfield id="customfield_10020" key="com.pyxis.greenhopper.jira:gh-sprint">
                        <customfieldname>Sprint</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        <customfield id="customfield_10024" key="com.atlassian.jira.ext.charting:firstresponsedate">
                        <customfieldname>[CHART] Date of First Response</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>Wed, 21 Feb 2018 08:58:28 +0000</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10025" key="com.atlassian.jira.ext.charting:timeinstatus">
                        <customfieldname>[CHART] Time in Status</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                    </customfields>
    </item>
</channel>
</rss>