Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: remove duplicates #186

Merged
merged 1 commit into from
Aug 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,12 @@
<version>3.9.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents.client5</groupId>
<artifactId>httpclient5</artifactId>
<version>5.3.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.testcontainers</groupId>
<artifactId>testcontainers</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ import org.springframework.stereotype.Service
import java.io.BufferedReader
import java.net.HttpURLConnection
import java.net.URI
import java.net.URL

private const val TEN_MINUTES = 600000

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package no.digdir.informasjonsforvaltning.fdk_dataservice_harvester.controller

import no.digdir.informasjonsforvaltning.fdk_dataservice_harvester.model.DuplicateIRI
import no.digdir.informasjonsforvaltning.fdk_dataservice_harvester.rdf.jenaTypeFromAcceptHeader
import no.digdir.informasjonsforvaltning.fdk_dataservice_harvester.service.DataServiceService
import no.digdir.informasjonsforvaltning.fdk_dataservice_harvester.service.EndpointPermissions
Expand Down Expand Up @@ -54,4 +55,14 @@ open class DataServicesController(
ResponseEntity(HttpStatus.NO_CONTENT)
} else ResponseEntity(HttpStatus.FORBIDDEN)

@PostMapping("/duplicates")
fun removeDuplicates(
@AuthenticationPrincipal jwt: Jwt,
@RequestBody duplicates: List<DuplicateIRI>
): ResponseEntity<Void> =
if (endpointPermissions.hasAdminPermission(jwt)) {
dataServiceService.removeDuplicates(duplicates)
ResponseEntity(HttpStatus.OK)
} else ResponseEntity(HttpStatus.FORBIDDEN)

}
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,6 @@ import org.apache.jena.riot.Lang
import org.slf4j.LoggerFactory
import org.springframework.data.repository.findByIdOrNull
import org.springframework.stereotype.Service
import java.time.ZoneId
import java.time.ZonedDateTime
import java.time.format.DateTimeFormatter
import java.util.*

private val LOGGER = LoggerFactory.getLogger(DataServiceHarvester::class.java)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package no.digdir.informasjonsforvaltning.fdk_dataservice_harvester.model

data class DuplicateIRI(
val iriToRetain: String,
val iriToRemove: String,
val keepRemovedFdkId: Boolean = true
)
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
package no.digdir.informasjonsforvaltning.fdk_dataservice_harvester.service

import no.digdir.informasjonsforvaltning.fdk_dataservice_harvester.harvester.formatNowWithOsloTimeZone
import no.digdir.informasjonsforvaltning.fdk_dataservice_harvester.model.DuplicateIRI
import no.digdir.informasjonsforvaltning.fdk_dataservice_harvester.model.FdkIdAndUri
import no.digdir.informasjonsforvaltning.fdk_dataservice_harvester.model.HarvestReport
import no.digdir.informasjonsforvaltning.fdk_dataservice_harvester.rabbit.RabbitMQPublisher
import no.digdir.informasjonsforvaltning.fdk_dataservice_harvester.rdf.*
import no.digdir.informasjonsforvaltning.fdk_dataservice_harvester.repository.DataServiceRepository
import org.apache.jena.rdf.model.ModelFactory
import org.apache.jena.riot.Lang
import org.springframework.data.repository.findByIdOrNull
import org.springframework.http.HttpStatus
import org.springframework.stereotype.Service
import org.springframework.web.server.ResponseStatusException
Expand Down Expand Up @@ -65,4 +67,45 @@ class DataServiceService(
}
}

fun removeDuplicates(duplicates: List<DuplicateIRI>) {
val start = formatNowWithOsloTimeZone()
val reportAsRemoved: MutableList<FdkIdAndUri> = mutableListOf()

duplicates.flatMap { duplicate ->
val remove = dataServiceRepository.findByIdOrNull(duplicate.iriToRemove)
?: throw ResponseStatusException(HttpStatus.BAD_REQUEST, "No data service connected to IRI ${duplicate.iriToRemove}")

val retain = dataServiceRepository.findByIdOrNull(duplicate.iriToRetain)
?.let { if (it.issued > remove.issued) it.copy(issued = remove.issued) else it } // keep earliest issued
?.let { if (it.modified < remove.modified) it.copy(modified = remove.modified) else it } // keep latest modified
?.let {
if (duplicate.keepRemovedFdkId) {
if (it.removed) throw ResponseStatusException(HttpStatus.BAD_REQUEST, "Data service with IRI ${it.uri} has already been removed")
reportAsRemoved.add(FdkIdAndUri(fdkId = it.fdkId, uri = it.uri))
it.copy(fdkId = remove.fdkId)
} else {
if (remove.removed) throw ResponseStatusException(HttpStatus.BAD_REQUEST, "Data service with IRI ${remove.uri} has already been removed")
reportAsRemoved.add(FdkIdAndUri(fdkId = remove.fdkId, uri = remove.uri))
it
}
}
?: remove.copy(uri = duplicate.iriToRetain)

listOf(remove.copy(removed = true), retain.copy(removed = false))
}.run { dataServiceRepository.saveAll(this) }

if (reportAsRemoved.isNotEmpty()) {
rabbitPublisher.send(listOf(
HarvestReport(
id = "duplicate-delete",
url = "https://fellesdatakatalog.digdir.no/duplicates",
harvestError = false,
startTime = start,
endTime = formatNowWithOsloTimeZone(),
removedResources = reportAsRemoved
)
))
}
}

}
1 change: 1 addition & 0 deletions src/main/resources/application.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ logging:
level.org.apache.jena.riot: ERROR
server:
port: 8080
error.include-message: always
application:
dataserviceUri: ${FDK_DATASERVICE_HARVESTER_URI:https://dataservices.staging.fellesdatakatalog.digdir.no}/dataservices
catalogUri: ${FDK_DATASERVICE_HARVESTER_URI:https://dataservices.staging.fellesdatakatalog.digdir.no}/catalogs
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
package no.digdir.informasjonsforvaltning.fdk_dataservice_harvester.contract

import com.fasterxml.jackson.module.kotlin.jacksonObjectMapper
import no.digdir.informasjonsforvaltning.fdk_dataservice_harvester.model.DuplicateIRI
import no.digdir.informasjonsforvaltning.fdk_dataservice_harvester.utils.ApiTestContext
import no.digdir.informasjonsforvaltning.fdk_dataservice_harvester.utils.DATASERVICE_ID_0
import no.digdir.informasjonsforvaltning.fdk_dataservice_harvester.utils.DATA_SERVICE_DBO_0
import no.digdir.informasjonsforvaltning.fdk_dataservice_harvester.utils.DATA_SERVICE_DBO_1
import no.digdir.informasjonsforvaltning.fdk_dataservice_harvester.utils.TestResponseReader
import no.digdir.informasjonsforvaltning.fdk_dataservice_harvester.utils.apiGet
import no.digdir.informasjonsforvaltning.fdk_dataservice_harvester.utils.authorizedRequest
Expand All @@ -15,18 +19,21 @@ import org.junit.jupiter.api.Tag
import org.junit.jupiter.api.Test
import org.junit.jupiter.api.TestInstance
import org.springframework.boot.test.context.SpringBootTest
import org.springframework.http.HttpMethod
import org.springframework.http.HttpStatus
import org.springframework.test.context.ContextConfiguration
import kotlin.test.assertTrue

@TestInstance(TestInstance.Lifecycle.PER_CLASS)
@SpringBootTest(
properties = ["spring.profiles.active=contract-test"],
webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT)
webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT
)
@ContextConfiguration(initializers = [ApiTestContext.Initializer::class])
@Tag("contract")
class DataServicesContract : ApiTestContext() {
private val responseReader = TestResponseReader()
private val mapper = jacksonObjectMapper()

@Test
fun findSpecific() {
Expand Down Expand Up @@ -61,7 +68,12 @@ class DataServicesContract : ApiTestContext() {

@Test
fun unauthorizedForNoToken() {
val response = authorizedRequest(port, "/dataservices/$DATASERVICE_ID_0", null, "DELETE")
val response = authorizedRequest(
port,
"/dataservices/$DATASERVICE_ID_0",
null,
HttpMethod.DELETE
)
assertEquals(HttpStatus.UNAUTHORIZED.value(), response["status"])
}

Expand All @@ -71,15 +83,19 @@ class DataServicesContract : ApiTestContext() {
port,
"/dataservices/$DATASERVICE_ID_0",
JwtToken(Access.ORG_WRITE).toString(),
"DELETE"
HttpMethod.DELETE
)
assertEquals(HttpStatus.FORBIDDEN.value(), response["status"])
}

@Test
fun notFoundWhenIdNotInDB() {
val response =
authorizedRequest(port, "/dataservices/123", JwtToken(Access.ROOT).toString(), "DELETE")
val response = authorizedRequest(
port,
"/dataservices/123",
JwtToken(Access.ROOT).toString(),
HttpMethod.DELETE
)
assertEquals(HttpStatus.NOT_FOUND.value(), response["status"])
}

Expand All @@ -89,10 +105,67 @@ class DataServicesContract : ApiTestContext() {
port,
"/dataservices/$DATASERVICE_ID_0",
JwtToken(Access.ROOT).toString(),
"DELETE"
HttpMethod.DELETE
)
assertEquals(HttpStatus.NO_CONTENT.value(), response["status"])
}
}

@Nested
internal inner class RemoveDuplicates {

@Test
fun unauthorizedForNoToken() {
val body = listOf(DuplicateIRI(iriToRemove = DATA_SERVICE_DBO_0.uri, iriToRetain = DATA_SERVICE_DBO_1.uri))
val response = authorizedRequest(
port,
"/dataservices/duplicates",
null,
HttpMethod.POST,
mapper.writeValueAsString(body)
)
assertEquals(HttpStatus.UNAUTHORIZED.value(), response["status"])
}

@Test
fun forbiddenWithNonSysAdminRole() {
val body = listOf(DuplicateIRI(iriToRemove = DATA_SERVICE_DBO_0.uri, iriToRetain = DATA_SERVICE_DBO_1.uri))
val response = authorizedRequest(
port,
"/dataservices/duplicates",
JwtToken(Access.ORG_WRITE).toString(),
HttpMethod.POST,
mapper.writeValueAsString(body)
)
assertEquals(HttpStatus.FORBIDDEN.value(), response["status"])
}

@Test
fun badRequestWhenRemoveIRINotInDB() {
val body = listOf(DuplicateIRI(iriToRemove = "https://123.no", iriToRetain = DATA_SERVICE_DBO_1.uri))
val response =
authorizedRequest(
port,
"/dataservices/duplicates",
JwtToken(Access.ROOT).toString(),
HttpMethod.POST,
mapper.writeValueAsString(body)
)
assertEquals(HttpStatus.BAD_REQUEST.value(), response["status"])
}

@Test
fun okWithSysAdminRole() {
val body = listOf(DuplicateIRI(iriToRemove = DATA_SERVICE_DBO_0.uri, iriToRetain = DATA_SERVICE_DBO_1.uri))
val response = authorizedRequest(
port,
"/dataservices/duplicates",
JwtToken(Access.ROOT).toString(),
HttpMethod.POST,
mapper.writeValueAsString(body)
)
assertEquals(HttpStatus.OK.value(), response["status"])
}
}

}
Loading
Loading