<!-- 
RSS generated by JIRA (1001.0.0-SNAPSHOT#100246-sha1:7a5c50119eb0633d306e14180817ddef5e80c75d) at Fri Feb 09 00:13:41 UTC 2024

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary add field=key&field=summary to the URL of your request.
-->
<rss version="0.92" >
<channel>
    <title>FOLIO Jira</title>
    <link>https://folio-org.atlassian.net</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>1001.0.0-SNAPSHOT</version>
        <build-number>100246</build-number>
        <build-date>07-02-2024</build-date>
    </build-info>

<item>
            <title>[MODOAIPMH-10] OAI-PMH: IncompleteResponse &amp; resumableTokens</title>
                <link>https://folio-org.atlassian.net/browse/MODOAIPMH-10</link>
                <project id="10151" key="MODOAIPMH">mod-oai-pmh</project>
                    <description>&lt;p&gt;Add incompleteResponse/resumptionTokens to all applicable verbs.  See &lt;a href=&quot;https://www.openarchives.org/OAI/openarchivesprotocol.html#FlowControl&quot; class=&quot;external-link&quot; rel=&quot;nofollow noreferrer&quot;&gt;https://www.openarchives.org/OAI/openarchivesprotocol.html#FlowControl&lt;/a&gt; and &lt;a href=&quot;http://www.openarchives.org/OAI/2.0/guidelines-repository.htm#resumptionToken&quot; class=&quot;external-link&quot; rel=&quot;nofollow noreferrer&quot;&gt;http://www.openarchives.org/OAI/2.0/guidelines-repository.htm#resumptionToken&lt;/a&gt; for details.&lt;/p&gt;

&lt;p&gt;Applicable Verbs:&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;ListIdentifiers&lt;/li&gt;
	&lt;li&gt;ListRecords&lt;/li&gt;
	&lt;li&gt;ListSets&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;Things to note:&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;the offset/limit query arguments will come in handy here.  See the inventory-storage API docs for details.&lt;/li&gt;
	&lt;li&gt;resumptionToken is an exclusive argument&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;Acceptance Criteria:&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;Incomplete responses/resumption Tokens are implemented for all applicable verbs&lt;/li&gt;
	&lt;li&gt;expirationDate should be calculated using the date-time of the original response and a TTL value configurable via System Property, e.g. resumption_token_ttl_ms
	&lt;ul&gt;
		&lt;li&gt;A reasonable default should be selected in the event the System property is not specified... 10 minutes&lt;/li&gt;
	&lt;/ul&gt;
	&lt;/li&gt;
	&lt;li&gt;expirationDate is provided in the ISO8601 format (YYYY-MM-DDThh:mm:ssZ)&lt;/li&gt;
	&lt;li&gt;The maximum number of records returned should be controlled via a System Property, e.g. max_records_per_response
	&lt;ul&gt;
		&lt;li&gt;A reasonable default should be selected in the event the System property is not specified.... 100?&lt;/li&gt;
	&lt;/ul&gt;
	&lt;/li&gt;
	&lt;li&gt;CompleteListSize and cursor should be returned on all incomplete requests (these are optional in the spec)&lt;/li&gt;
	&lt;li&gt;Caching of result sets is not required at this time.&lt;/li&gt;
&lt;/ul&gt;
</description>
                <environment></environment>
        <key id="64593">MODOAIPMH-10</key>
            <summary>OAI-PMH: IncompleteResponse &amp; resumableTokens</summary>
                <type id="10005" iconUrl="https://folio-org.atlassian.net/rest/api/2/universal_avatar/view/type/issuetype/avatar/10309?size=medium">Story</type>
                            <parent id="10625">UXPROD-993</parent>
                                    <priority id="10002" iconUrl="https://dev.folio.org/assets/jira-priority/jira-p3.svg">P3</priority>
                        <status id="6" iconUrl="https://folio-org.atlassian.net/images/icons/statuses/closed.png" description="The issue is considered finished, the resolution is correct. Issues which are closed can be reopened.">Closed</status>
                    <statusCategory id="3" key="done" colorName="green"/>
                                    <resolution id="10003">Done</resolution>
                                                        <assignee accountid="712020:1b666af5-3f06-4de1-a8ec-5d6d52541b8e">Pavel Korolenok</assignee>
                                                                <reporter accountid="5af5ecdb772036612ff61cf1">Hkaplanian</reporter>
                                    <labels>
                            <label>epam-thunderjet</label>
                    </labels>
                <created>Fri, 28 Sep 2018 17:35:07 +0000</created>
                <updated>Wed, 1 Apr 2020 11:53:03 +0000</updated>
                            <resolved>Fri, 9 Nov 2018 13:42:06 +0000</resolved>
                                                    <fixVersion>1.0.0</fixVersion>
                                        <due></due>
                            <votes>0</votes>
                                    <watches>3</watches>
                                                                <comments>
                                                            <comment id="157175" author="712020:1b666af5-3f06-4de1-a8ec-5d6d52541b8e" created="Tue, 30 Oct 2018 11:31:22 +0000"  >&lt;p&gt;Hi &lt;a href=&quot;https://folio-org.atlassian.net/secure/ViewProfile.jspa?accountId=5cf6c546b87c300f36eb7b9a&quot; class=&quot;user-hover&quot; rel=&quot;5cf6c546b87c300f36eb7b9a&quot; data-account-id=&quot;5cf6c546b87c300f36eb7b9a&quot; accountid=&quot;5cf6c546b87c300f36eb7b9a&quot; rel=&quot;noreferrer&quot;&gt;Craig McNally&lt;/a&gt;, &lt;a href=&quot;https://folio-org.atlassian.net/secure/ViewProfile.jspa?accountId=5af5ecdb772036612ff61cf1&quot; class=&quot;user-hover&quot; rel=&quot;5af5ecdb772036612ff61cf1&quot; data-account-id=&quot;5af5ecdb772036612ff61cf1&quot; accountid=&quot;5af5ecdb772036612ff61cf1&quot; rel=&quot;noreferrer&quot;&gt;Hkaplanian&lt;/a&gt;,&lt;/p&gt;

&lt;p&gt;It seems there is an issue with &lt;em&gt;resumptionToken&lt;/em&gt; implementation based on inventory-instance-storage&apos;s offset/limit query arguments.&lt;/p&gt;

&lt;p&gt;According the the specification:&lt;/p&gt;
&lt;blockquote&gt;
&lt;p&gt;When there are changes in the repository. There may be changes to the complete list returned by the list request sequence. These changes occur when the records disseminated in the list move in or out of the datestamp range of the request because of changes, modifications, or deletions in the repository. In this case, strict idempotency of the incomplete-list requests using resumptionToken values is not required. Instead, the incomplete list returned in response to a re-issued request must include all records with unchanged datestamps within the range of the initial list request. The incomplete list returned in response to a re-issued request may contain records with datestamps that either moved into or out of the range of the initial request. In cases where there are substantial changes to the repository, it may be appropriate for a repository to return a badResumptionToken error, signaling that the harvester should restart the list request sequence.&lt;/p&gt;&lt;/blockquote&gt;

&lt;p&gt;inventory-instance-storage&apos;s offset/limit query arguments does not allow to satisfy this requirement (specifically, in case of re-issued request include all records with unchanged datestamps within the range of the initial list request). I will explain it by an example:&lt;br/&gt;
The complete list contains 5 instances: A, B, C, D, E. And we use limit of 2 (return 2 instances at a time). Here is a flow:&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;Harvester issues ListRecords request&lt;/li&gt;
	&lt;li&gt;The repo responds with an incomplete list of 2 records and resumptionToken (internally offset of 0 is used)&lt;br/&gt;
A, B&lt;/li&gt;
	&lt;li&gt;Harvester issues subsequent request with resumptionToken&lt;/li&gt;
	&lt;li&gt;&lt;b&gt;In the meantime, record B is updated and moved to a new place in the result set (or even out of the result set if it goes beyond upper bound specified)&lt;/b&gt;&lt;/li&gt;
	&lt;li&gt;The repo calculates another set and responds with an incomplete list of 2 records and resumptionToken (internally offset of 2 is used this time)&lt;br/&gt;
D, E&lt;br/&gt;
(&lt;b&gt;record C is lost, because it is moved to the &apos;left&apos; and turned out to be out of subset with offset of 2. The result set looks like - A, C, D, E&lt;/b&gt;)&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;In other words, offset argument does not provide required guarantees (no lost records) when initially requested result set changes, because inventory-instance-storage regenerates the result set when the changes occur.&lt;br/&gt;
I tested this scenario on folio/testing-backed vagrant box and it works as described above:&lt;br/&gt;
There were the following instances after 2 queries:&lt;br/&gt;
Offset 0&lt;br/&gt;
7fbd5d84-62d1-44c6-9c45-6cb173998bbd&lt;br/&gt;
549fad9e-7f8e-4d8e-9a71-00d251817866&lt;br/&gt;
Offset 2&lt;br/&gt;
a89eccf0-57a6-495e-898d-32b9b2210f2f&lt;br/&gt;
a317b304-528c-424f-961c-39174933b454&lt;/p&gt;

&lt;p&gt;Then I changed 549fad9e... instance and repeated the queries:&lt;br/&gt;
Offset 0&lt;br/&gt;
7fbd5d84-62d1-44c6-9c45-6cb173998bbd&lt;br/&gt;
a89eccf0-57a6-495e-898d-32b9b2210f2f    (this item moved from the second subset and could be lost)&lt;br/&gt;
Offset 2&lt;br/&gt;
a317b304-528c-424f-961c-39174933b454&lt;br/&gt;
c1d3be12-ecec-4fab-9237-baf728575185&lt;/p&gt;


&lt;p&gt;For now, I don&apos;t see ways to overcome it using approach with offset/limit query arguments.&lt;br/&gt;
What do you think?&lt;/p&gt;</comment>
                                                            <comment id="157176" author="712020:1b666af5-3f06-4de1-a8ec-5d6d52541b8e" created="Tue, 30 Oct 2018 12:56:07 +0000"  >&lt;p&gt;There are 2 ways to overcome the issue described above:&lt;br/&gt;
&lt;b&gt;First approach&lt;/b&gt;&lt;/p&gt;
&lt;ul class=&quot;alternate&quot; type=&quot;square&quot;&gt;
	&lt;li&gt;Limit the upper bound to the current datetime if it&apos;s not provided or if it&apos;s later than current datetime (this is even recommended according the specification)&lt;br/&gt;
This should guarantee that number of records in the result set will not grow.&lt;/li&gt;
	&lt;li&gt;Add initial total number of records to the resumptionToken and compare it during each sequential call&lt;br/&gt;
If it decreases, it means some records has changed and moved out of result set and we may run into the problem above. To avoid this issue, we can generate badResumptionToken error in this case which is acceptable according to the specification
&lt;blockquote&gt;
&lt;p&gt;In cases where there are substantial changes to the repository, it may be appropriate for a repository to return a badResumptionToken error, signaling that the harvester should restart the list request sequence.&lt;/p&gt;&lt;/blockquote&gt;
&lt;p&gt;&lt;ins&gt;Cons:&lt;/ins&gt;&lt;/p&gt;&lt;/li&gt;
	&lt;li&gt;If the repository changes rapidly compared with the rate at which a harvester issues follow-on requests, it may result in big number of failed request sequences.&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;&lt;b&gt;Second approach&lt;/b&gt;&lt;/p&gt;
&lt;ul class=&quot;alternate&quot; type=&quot;square&quot;&gt;
	&lt;li&gt;Limit the upper bound to the current datetime as in the first approach&lt;/li&gt;
	&lt;li&gt;Sort the result set by createdDate/updatedDate (is it possible via CQL query?)&lt;/li&gt;
	&lt;li&gt;Instead of using &apos;offset&apos; argument, shift the lower bound to the created/updatedDate of the last record in the incomplete result set&lt;br/&gt;
It would guarantee that we will pick up all the records even if the result set changes&lt;br/&gt;
&lt;ins&gt;Cons:&lt;/ins&gt;&lt;/li&gt;
	&lt;li&gt;This approach requires sorting of the result set by created/updatedDate which may affect overall request performance (in particular, if the records are not sorted in this way in the storage).&lt;/li&gt;
&lt;/ul&gt;


&lt;p&gt;In terms of implementation, both approaches require more or less the same effort.&lt;/p&gt;

&lt;p&gt;&lt;a href=&quot;https://folio-org.atlassian.net/secure/ViewProfile.jspa?accountId=5cf6c546b87c300f36eb7b9a&quot; class=&quot;user-hover&quot; rel=&quot;5cf6c546b87c300f36eb7b9a&quot; data-account-id=&quot;5cf6c546b87c300f36eb7b9a&quot; accountid=&quot;5cf6c546b87c300f36eb7b9a&quot; rel=&quot;noreferrer&quot;&gt;Craig McNally&lt;/a&gt;,&lt;br/&gt;
could you please review and share your opinion?&lt;/p&gt;</comment>
                                                            <comment id="157177" author="5cf6c546b87c300f36eb7b9a" created="Tue, 30 Oct 2018 17:43:20 +0000"  >&lt;p&gt;&amp;gt; Limit the upper bound to the current datetime if it&apos;s not provided or if it&apos;s later than current datetime&lt;br/&gt;
Yes, definitely&lt;/p&gt;

&lt;p&gt;I think option #1 is the preferred approach here.  We might consider a slight variation or extension to this approach.&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;include the length of the original result set in the resumption token&lt;/li&gt;
	&lt;li&gt;include the UUID of the next result in the resumption token
	&lt;ul&gt;
		&lt;li&gt;requires that internally we ask for desired length+1&lt;/li&gt;
	&lt;/ul&gt;
	&lt;/li&gt;
	&lt;li&gt;on subsequent requests, compare the result set length with that provided in the resumption token.
	&lt;ul&gt;
		&lt;li&gt;if the same, continue as usual&lt;/li&gt;
		&lt;li&gt;if smaller, a record has left the result set.
		&lt;ul&gt;
			&lt;li&gt;compare the first record in the results with the UUID from the resumptionToken.&lt;/li&gt;
			&lt;li&gt;if the same, update the record set length in the resumption token and continue as usual (the record that left the result set has not been reached yet).&lt;/li&gt;
			&lt;li&gt;if different, respond with badResumptionToken (continuing will result in missing records)&lt;/li&gt;
		&lt;/ul&gt;
		&lt;/li&gt;
	&lt;/ul&gt;
	&lt;/li&gt;
&lt;/ul&gt;
</comment>
                                                            <comment id="157178" author="712020:1b666af5-3f06-4de1-a8ec-5d6d52541b8e" created="Tue, 6 Nov 2018 09:31:47 +0000"  >&lt;p&gt;The story is completed and ready for review.&lt;/p&gt;

&lt;p&gt;&lt;b&gt;Notes:&lt;/b&gt;&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;Flow Control (resumptionToken) is implemented for &lt;em&gt;ListIdentifiers&lt;/em&gt; and &lt;em&gt;ListRecords&lt;/em&gt; verbs. &lt;em&gt;ListSets&lt;/em&gt; verb supports only one set for now, so do not require Flow Control.&lt;/li&gt;
	&lt;li&gt;Inventory-storage&apos;s  offset/limit query params are used as a basis for Flow Control support. The approach is described above.&lt;/li&gt;
	&lt;li&gt;&quot;Encoding State in the resumptionToken&quot; strategy is implemented to support resumptionToken&lt;/li&gt;
	&lt;li&gt;expirationDate is not used as resumptionToken values do not expire (this is a result of using &quot;encoding state&quot; approach). This is acceptable according to the OAI-PMH spec.&lt;/li&gt;
	&lt;li&gt;The maximum number of records returned is controlled via a System Property&lt;/li&gt;
	&lt;li&gt;CompleteListSize and Cursor are returned on all incomplete requests&lt;/li&gt;
	&lt;li&gt;The resumptionToken value is Base-64 encoded (in URL safe manner) so that it can be used as GET param as is. It is done in order to not rely on Harvester to encode resumptionToken before adding it to the subsequent request (e.g. MarcEdit Harvester do not encode it and adds to the request as is)&lt;/li&gt;
&lt;/ul&gt;
</comment>
                    </comments>
                <issuelinks>
                            <issuelinktype id="10003">
                    <name>Relates</name>
                                            <outwardlinks description="relates to">
                                        <issuelink>
            <issuekey id="10253">UXPROD-350</issuekey>
        </issuelink>
                            </outwardlinks>
                                                                <inwardlinks description="relates to">
                                        <issuelink>
            <issuekey id="64614">MODOAIPMH-112</issuekey>
        </issuelink>
                            </inwardlinks>
                                    </issuelinktype>
                    </issuelinks>
                <attachments>
                    </attachments>
                <subtasks>
                    </subtasks>
                <customfields>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    <customfield id="customfield_10000" key="com.atlassian.jira.plugins.jira-development-integration-plugin:devsummarycf">
                        <customfieldname>Development</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10057" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                        <customfieldname>Development Team</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue key="10185"><![CDATA[Thunderjet]]></customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10014" key="com.pyxis.greenhopper.jira:gh-epic-link">
                        <customfieldname>Epic Link</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue key="$xmlutils.escape($text)">OAI-PMH Support</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            <customfield id="customfield_10019" key="com.pyxis.greenhopper.jira:gh-lexo-rank">
                        <customfieldname>Rank</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>0|hzyyrr:</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    <customfield id="customfield_10020" key="com.pyxis.greenhopper.jira:gh-sprint">
                        <customfieldname>Sprint</customfieldname>
                        <customfieldvalues>
                                <customfieldvalue id="1629">oai-pmh - sprint 50</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                                            <customfield id="customfield_10044" key="com.atlassian.jira.plugin.system.customfieldtypes:float">
                        <customfieldname>Story Points</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>5.0</customfieldvalue>
                        </customfieldvalues>
                    </customfield>
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                <customfield id="customfield_10024" key="com.atlassian.jira.ext.charting:firstresponsedate">
                        <customfieldname>[CHART] Date of First Response</customfieldname>
                        <customfieldvalues>
                            <customfieldvalue>Tue, 30 Oct 2018 11:31:22 +0000</customfieldvalue>

                        </customfieldvalues>
                    </customfield>
                                                                <customfield id="customfield_10025" key="com.atlassian.jira.ext.charting:timeinstatus">
                        <customfieldname>[CHART] Time in Status</customfieldname>
                        <customfieldvalues>
                            
                        </customfieldvalues>
                    </customfield>
                                    </customfields>
    </item>
</channel>
</rss>