diff --git a/.github/workflows/add_bugs_to_project.yml b/.github/workflows/add_bugs_to_project.yml index ee39d7ce5eb..a44d9b153d7 100644 --- a/.github/workflows/add_bugs_to_project.yml +++ b/.github/workflows/add_bugs_to_project.yml @@ -11,7 +11,7 @@ jobs: name: Add bug to project runs-on: ubuntu-latest steps: - - uses: actions/add-to-project@v1.0.2 + - uses: actions/add-to-project@v2.0.0 with: project-url: https://github.com/orgs/IQSS/projects/34 github-token: ${{ secrets.ADD_TO_PROJECT_PAT }} diff --git a/.github/workflows/container_integration_tests.yml b/.github/workflows/container_integration_tests.yml new file mode 100644 index 00000000000..d31d5029d52 --- /dev/null +++ b/.github/workflows/container_integration_tests.yml @@ -0,0 +1,285 @@ +name: Container Integration Tests Workflow + +on: + workflow_dispatch: + push: + branches: + - develop + - master + paths-ignore: + - "doc/**" + - "**/*.md" + - ".github/ISSUE_TEMPLATE/**" + - ".github/*.md" + pull_request: + branches: + - develop + - master + paths-ignore: + - "doc/**" + - "**/*.md" + - ".github/ISSUE_TEMPLATE/**" + - ".github/*.md" + +concurrency: + group: "container-integration-tests-${{ github.ref }}" + cancel-in-progress: true + +jobs: + main-integration-tests-workflow: + runs-on: ubuntu-latest + timeout-minutes: 60 + + defaults: + run: + shell: bash + + permissions: + contents: read + checks: write + pull-requests: write + + steps: + + # --------------------------- + # CHECKOUT + # --------------------------- + - name: Checkout repository + uses: actions/checkout@v6 + + # --------------------------- + # VERIFY DOCKER + # --------------------------- + - name: Verify Docker + run: | + set -euo pipefail + docker version + + # --------------------------- + # SETUP JAVA + MAVEN + # --------------------------- + - name: Setup Java + uses: actions/setup-java@v5 + with: + distribution: "temurin" + java-version: "21" + cache: "maven" + + - name: Verify Maven + run: | + set -euo pipefail + mvn -version + + # --------------------------- + # BUILD IMAGES (Dataverse-native) + # --------------------------- + - name: Build Dataverse containers via Maven + run: | + set -euo pipefail + mvn -Pct -T 1C package + + # --------------------------- + # START CONTAINERS (BACKGROUND) + # --------------------------- + - name: Start Dataverse stack + run: | + set -euo pipefail + mvn -Pct docker:start \ + -Ddataverse.feature.index-harvested-metadata-source=true \ + -Ddataverse.oai.server.maxidentifiers=2 \ + -Ddataverse.oai.server.maxrecords=2 + + # --------------------------- + # WAIT FOR API READINESS + # --------------------------- + - name: Wait for Dataverse API readiness + run: | + set -euo pipefail + URL="http://localhost:8080/api/info/version" + MAX_ATTEMPTS=10 + SLEEP_TIME=15 + echo "Waiting for Dataverse readiness..." + for attempt in $(seq 1 $MAX_ATTEMPTS); do + echo "Attempt $attempt..." + RESPONSE=$(curl -s --max-time 15 "$URL" || true) + STATUS=$(echo "$RESPONSE" | jq -r '.status' 2>/dev/null || echo "NOT_READY") + if [ "$STATUS" = "OK" ]; then + echo "Dataverse endpoint is READY." + echo "Dataverse waiting for full readiness. Waiting 30 more seconds." + sleep 30 + echo "Response: $RESPONSE" + exit 0 + fi + echo "Not ready. Sleeping ${SLEEP_TIME}s..." + sleep $SLEEP_TIME + if [ $SLEEP_TIME -lt 60 ]; then + SLEEP_TIME=$((SLEEP_TIME * 2)) + if [ $SLEEP_TIME -gt 60 ]; then + SLEEP_TIME=60 + fi + fi + done + echo "Dataverse failed to become ready." + docker ps + CONTAINERS="$(docker ps -aq)" + if [ -n "$CONTAINERS" ]; then + for cid in $CONTAINERS; do + echo "===== Logs for container $cid =====" + docker logs "$cid" || true + done + else + echo "No running containers to show logs for." + fi + exit 1 + + # --------------------------- + # MAP LOCALSTACK TO LOCALHOST + # --------------------------- + - name: Map localstack to localhost for Maven tests + run: echo "127.0.0.1 localstack" | sudo tee -a /etc/hosts + + # --------------------------- + # CONFIGURE DATAVERSE FOR TESTS + # --------------------------- + - name: Configure Dataverse API Settings + run: | + set -euo pipefail + + echo "Setting API Database Settings via internal container curl..." + + # We define the settings in an array + declare -A settings=( + [":BuiltinUsersKey"]="burrito" + [":ProvCollectionEnabled"]="true" + [":AllowApiTokenLookupViaApi"]="true" + [":AllowSignUp"]="true" + ) + # We run curl INSIDE the container so the source IP is 127.0.0.1 + for key in "${!settings[@]}"; do + echo "Setting $key..." + docker exec dev_dataverse curl --fail-with-body -sS -X PUT -d "${settings[$key]}" "http://localhost:8080/api/admin/settings/$key" + echo "" + done + + # --------------------------- + # PRE-TEST INJECTIONS + # --------------------------- + - name: Put SUSHI config file in place + run: | + set -euo pipefail + + SOURCE_FILE="${{ github.workspace }}/src/test/java/edu/harvard/iq/dataverse/makedatacount/sushi_sample_logs.json" + + echo "Injecting local file into container..." + # This reads the local file and writes it inside the container using standard input + docker exec -i dev_dataverse sh -c "cat > /tmp/sushi_sample_logs.json" < "$SOURCE_FILE" + + # Verify the content is actually there and has size + docker exec dev_dataverse ls -l /tmp/sushi_sample_logs.json + docker exec dev_dataverse head -n 5 /tmp/sushi_sample_logs.json + + # --------------------------- + # RUN MAVEN INTEGRATION TESTS + # --------------------------- + - name: Run Maven Integration Tests + env: + DVAPIKEY: "burrito" + DV_APIKEY: "burrito" + DV_API_KEY: "burrito" + run: | + set -euo pipefail + TEST_SUITE=$(cat tests/integration-tests.txt) + + echo "Running suite: $TEST_SUITE" + + mvn test \ + -Dtest="$TEST_SUITE" \ + -Dmaven.test.failure.ignore=true \ + -Ddataverse.test.baseurl=http://localhost:8080 \ + -DcompilerArgument=-Xlint:unchecked + + # --------------------------- + # UPLOAD SUREFIRE/FAILSAFE REPORTS + # --------------------------- + - name: Upload Test Failure Reports + if: always() + uses: actions/upload-artifact@v7 + with: + name: maven-test-reports + path: | + target/surefire-reports/ + target/failsafe-reports/ + retention-days: 14 + + # --------------------------- + # PUBLISH TEST DASHBOARD IN GITHUB PR + # --------------------------- + - name: Publish Test Results Dashboard + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + files: | + target/failsafe-reports/TEST-*.xml + target/surefire-reports/TEST-*.xml + + # --------------------------- + # FAIL WORKFLOW IF TESTS FAILED + # --------------------------- + - name: Check for Test Failures + if: always() + run: | + echo "Checking Surefire/Failsafe reports for failures..." + if grep -q "/dev/null; then + echo "Tests failed! Failing the workflow." + exit 1 + fi + echo "All tests passed." + + # --------------------------- + # COLLECT DOCKER LOGS (ALWAYS, WITH MAPPING) + # --------------------------- + - name: Collect Docker logs (mapped) + if: always() + run: | + mkdir -p docker-logs + echo "Gathering container metadata..." + docker ps -a --format '{{.Names}}|{{.Image}}|{{.Status}}' > docker-logs/container-summary.txt + while IFS='|' read -r name image status; do + # Create a readable label + label="$name" + case "$name" in + *dataverse*) + label="dataverse-app" + ;; + *postgres*) + label="postgres-db" + ;; + *solr*) + label="solr-index" + ;; + *localstack*) + label="localstack-s3" + ;; + esac + echo "Collecting logs for $name ($label)" + { + echo "===== CONTAINER: $name =====" + echo "Label: $label" + echo "Image: $image" + echo "Status: $status" + echo "" + echo "===== LOGS =====" + docker logs --timestamps "$name" 2>&1 || true + } > "docker-logs/${label}__${name}.log" + done < docker-logs/container-summary.txt + + # --------------------------- + # UPLOAD DOCKER LOGS (ALWAYS) + # --------------------------- + - name: Upload Docker logs + if: always() + uses: actions/upload-artifact@v7 + with: + name: docker-logs + path: docker-logs/ + retention-days: 14 \ No newline at end of file diff --git a/.github/workflows/container_maintenance.yml b/.github/workflows/container_maintenance.yml index fcd304f9b50..27e8cfd26d4 100644 --- a/.github/workflows/container_maintenance.yml +++ b/.github/workflows/container_maintenance.yml @@ -173,7 +173,7 @@ jobs: with: platforms: ${{ env.PLATFORMS }} - name: Setup Trivy binary for vulnerability scanning - uses: aquasecurity/setup-trivy@v0.2.6 + uses: aquasecurity/setup-trivy@v0.3.1 with: version: v0.69.3 diff --git a/.github/workflows/copy_labels.yml b/.github/workflows/copy_labels.yml index 83824b0125a..882d895323d 100644 --- a/.github/workflows/copy_labels.yml +++ b/.github/workflows/copy_labels.yml @@ -14,6 +14,6 @@ jobs: name: Copy labels from linked issues steps: - name: copy-labels - uses: michalvankodev/copy-issue-labels@v1.3.0 + uses: michalvankodev/copy-issue-labels@v2.0.0 with: repo-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.readthedocs.yml b/.readthedocs.yml index cadaedc1448..792a9f05119 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -5,9 +5,9 @@ formats: - pdf build: - os: ubuntu-22.04 + os: ubuntu-24.04 tools: - python: "3.10" + python: "3.12" apt_packages: - graphviz diff --git a/conf/keycloak/builtin-users-spi/pom.xml b/conf/keycloak/builtin-users-spi/pom.xml index 4f096a15738..6cafde744ea 100644 --- a/conf/keycloak/builtin-users-spi/pom.xml +++ b/conf/keycloak/builtin-users-spi/pom.xml @@ -100,7 +100,7 @@ - 26.5.5 + 26.6.0 17 3.2.0 0.4 diff --git a/conf/mdc/counter_weekly.sh b/conf/mdc/counter_weekly.sh index 67cb5df2af2..79f8c534a33 100644 --- a/conf/mdc/counter_weekly.sh +++ b/conf/mdc/counter_weekly.sh @@ -31,7 +31,7 @@ case $HTTP_STATUS in # Successfully queued # Extract status from the nested data object STATUS=$(echo "$RESPONSE_BODY" | jq -r '.data.status') - + # Extract message from the nested data object if echo "$RESPONSE_BODY" | jq -e '.data.message' > /dev/null 2>&1 && [ "$(echo "$RESPONSE_BODY" | jq -r '.data.message')" != "null" ]; then MESSAGE=$(echo "$RESPONSE_BODY" | jq -r '.data.message') @@ -89,4 +89,5 @@ done } # Call the function on the root dataverse to start processing -processDV 1 \ No newline at end of file +processDV 1 +echo "Processing Dataverse Complete: $(date)" diff --git a/conf/solr/schema.xml b/conf/solr/schema.xml index 34f888acec4..4d65b378485 100644 --- a/conf/solr/schema.xml +++ b/conf/solr/schema.xml @@ -339,9 +339,34 @@ + + + + + + + + + + + + + + + + + + + + + + + + + @@ -596,9 +621,34 @@ + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/conf/solr/solr-driver.sh b/conf/solr/solr-driver.sh new file mode 100755 index 00000000000..7497e1e305f --- /dev/null +++ b/conf/solr/solr-driver.sh @@ -0,0 +1,1066 @@ +#!/bin/bash + +# [INFO]: Watch Dataverse Metadata Fields and update Solr Schema on changes + +set -euo pipefail + +#### #### #### #### #### #### #### #### #### #### #### #### #### #### #### #### #### #### +# This script has two modes: watching and one-shot. +# +# In watching mode, it will: +# 1. Watch for changes to the Dataverse Metadata Fields by polling the REST API +# 2. Download the field definitions and apply them using update-fields.sh +# 3. Make sure there are actually changes between the current and the new schema.xml +# 4. Create a backup copy of the live schema.xml before replacing it +# 5. Call the Solr RELOAD API to update the index +# 6. In case something goes wrong, it will restore the known working configuration +# +# In one-shot mode, it (usually) only executes steps 2 to 4. +# +# Upgrade Mode (oneshot only): +# - Use --upgrade (-U) flag to apply downloaded metadata fields to a template schema +# - By default uses template from $SOLR_TEMPLATE/conf/schema.xml +# - Template location can be overridden with --schema-source-path or UPGRADE_SOURCE_PATH +# +#### #### #### #### #### #### #### #### #### #### #### #### #### #### #### #### #### #### + +# Default configuration variables +DEFAULT_VERBOSE="false" +DEFAULT_DATAVERSE_URL="http://localhost:8080" +DEFAULT_SOLR_URL="http://localhost:8983" +DEFAULT_SOLR_CORE="collection1" +DEFAULT_SCHEMA_PATH="/var/solr/data/collection1/conf/schema.xml" +DEFAULT_UPDATE_FIELDS_SCRIPT="$(dirname "$0")/update-fields.sh" +DEFAULT_POLL_INTERVAL="60" +DEFAULT_WORK_DIR="/tmp/dataverse-schema-watcher" +DEFAULT_MODE="oneshot" +DEFAULT_STARTUP_CHECK="fail" +DEFAULT_LOCK_TIMEOUT="300" +DEFAULT_WAIT_RETRY_PERIOD="5" +DEFAULT_WAIT_MAX_RETRIES="60" +DEFAULT_UPGRADE_MODE="false" +# Note: this is specific to the configbaker container use case. Override with -P! +DEFAULT_UPGRADE_SOURCE_PATH="${SOLR_TEMPLATE:-/opt/solr/template}/conf/schema.xml" + +# Initialize from environment or defaults +VERBOSE="${VERBOSE:-${DEFAULT_VERBOSE}}" +DATAVERSE_URL="${DATAVERSE_URL:-${DEFAULT_DATAVERSE_URL}}" +SOLR_URL="${SOLR_URL:-${DEFAULT_SOLR_URL}}" +SOLR_CORE="${SOLR_CORE:-${DEFAULT_SOLR_CORE}}" +SCHEMA_TARGET_PATH="${SCHEMA_TARGET_PATH:-${DEFAULT_SCHEMA_PATH}}" +SCHEMA_SOURCE_PATH="${SCHEMA_SOURCE_PATH:-${DEFAULT_SCHEMA_PATH}}" +UPDATE_FIELDS_SCRIPT="${UPDATE_FIELDS_SCRIPT:-${DEFAULT_UPDATE_FIELDS_SCRIPT}}" +POLL_INTERVAL="${POLL_INTERVAL:-${DEFAULT_POLL_INTERVAL}}" +WORK_DIR="${WORK_DIR:-${DEFAULT_WORK_DIR}}" +MODE="${MODE:-${DEFAULT_MODE}}" +STARTUP_CHECK="${STARTUP_CHECK:-${DEFAULT_STARTUP_CHECK}}" +LOCK_TIMEOUT="${LOCK_TIMEOUT:-${DEFAULT_LOCK_TIMEOUT}}" +WAIT_RETRY_PERIOD="${WAIT_RETRY_PERIOD:-${DEFAULT_WAIT_RETRY_PERIOD}}" +WAIT_MAX_RETRIES="${WAIT_MAX_RETRIES:-${DEFAULT_WAIT_MAX_RETRIES}}" +UPGRADE_MODE="${UPGRADE_MODE:-${DEFAULT_UPGRADE_MODE}}" +UPGRADE_SOURCE_PATH="${UPGRADE_SOURCE_PATH:-${DEFAULT_UPGRADE_SOURCE_PATH}}" + +METADATA_ENDPOINT="" +LOCK_FD="" +SOLR_AUTH_HEADER="" +DATAVERSE_AUTH_HEADER="" +SCHEMA_SOURCE_PATH_SET_BY_USER="false" + +# Logging functions +log_info() { + echo "[$(date +'%Y-%m-%d %H:%M:%S')] [INFO] $*" +} + +log_error() { + echo "[$(date +'%Y-%m-%d %H:%M:%S')] [ERROR] $*" >&2 +} + +log_warn() { + echo "[$(date +'%Y-%m-%d %H:%M:%S')] [WARN] $*" +} + +# Log info only when verbose mode is enabled +log_verbose() { + if [[ "${MODE}" == "oneshot" || "${VERBOSE}" == "true" ]]; then + echo "[$(date +'%Y-%m-%d %H:%M:%S')] [INFO] $*" + fi +} + +# Usage information +usage() { + cat << EOF +Usage: $0 [OPTIONS] + +Options: + -m, --mode MODE Mode: 'watch' (default) or 'oneshot' + -i, --interval SECONDS Polling interval in seconds (watch mode) + + -d, --dataverse-url URL Dataverse API base URL + -s, --solr-url URL Solr base URL + -c, --core NAME Solr core name + + -p, --schema-target-path PATH Path to target schema.xml (where to write) + -P, --schema-source-path PATH Path to source schema.xml (base for updates) + -t, --lock-timeout SECONDS Schema file lock timeout in seconds + -U, --upgrade Enable upgrade mode (oneshot only) + Apply metadata to template schema instead of current and reload the core. + + -k, --startup-check MODE Startup check mode: 'fail', 'warn', or 'wait' + (fail: exit on error, warn: continue with warning, wait: block until ready) + --wait-retry-period SECONDS Retry period in seconds for 'wait' startup mode + --wait-max-retries NUMBER Maximum number of retries for 'wait' startup mode + + -u, --update-script PATH Path to update-fields.sh script + -w, --work-dir PATH Working directory path + -v, --verbose Enable verbose logging (Note: oneshot mode is always verbose!) + -h, --help Show this help message + +Environment Variables (used as defaults if command-line options not provided): + DATAVERSE_URL Dataverse base URL (default: ${DEFAULT_DATAVERSE_URL}) + SOLR_URL Solr base URL (default: ${DEFAULT_SOLR_URL}) + SOLR_CORE Solr core name (default: ${DEFAULT_SOLR_CORE}) + SCHEMA_TARGET_PATH Path to target schema.xml (default: ${DEFAULT_SCHEMA_PATH}) + SCHEMA_SOURCE_PATH Path to source schema.xml (default: ${DEFAULT_SCHEMA_PATH}) + UPDATE_FIELDS_SCRIPT Path to update-fields.sh script (default: ${DEFAULT_UPDATE_FIELDS_SCRIPT}) + POLL_INTERVAL Polling interval in seconds (default: ${DEFAULT_POLL_INTERVAL}) + WORK_DIR Working directory (default: ${DEFAULT_WORK_DIR}) + MODE Execution mode: 'watch' or 'oneshot' (default: ${DEFAULT_MODE}) + UPGRADE_MODE Enable upgrade mode: 'true' or 'false' (default: ${DEFAULT_UPGRADE_MODE}) + STARTUP_CHECK Startup check mode: 'fail', 'warn', or 'wait' (default: ${DEFAULT_STARTUP_CHECK}) + LOCK_TIMEOUT File lock timeout in seconds (default: ${DEFAULT_LOCK_TIMEOUT}) + WAIT_RETRY_PERIOD Retry period (in seconds) for 'wait' startup check mode (default: ${DEFAULT_WAIT_RETRY_PERIOD}) + WAIT_MAX_RETRIES Max retries for 'wait' startup check mode (default: ${DEFAULT_WAIT_MAX_RETRIES}) + VERBOSE Enable verbose logging for watch mode: 'true' or 'false' (default: ${DEFAULT_VERBOSE}) + +Secret Configuration (only via environment variable or file): + SOLR_USERNAME Solr HTTP Basic Auth username (optional) + SOLR_PASSWORD Solr HTTP Basic Auth password (optional) + SOLR_USERNAME_FILE File containing Solr username (alternative to SOLR_USERNAME) + SOLR_PASSWORD_FILE File containing Solr password (alternative to SOLR_PASSWORD) + DATAVERSE_BEARER_TOKEN Bearer token for Dataverse API (optional) + DATAVERSE_BEARER_TOKEN_FILE File containing bearer token (alternative) + DATAVERSE_UNBLOCK_KEY Unblock key for Dataverse API (optional) + DATAVERSE_UNBLOCK_KEY_FILE File containing unblock key (alternative) + +Schema Path Behavior: + By default, source and target paths are the same (${DEFAULT_SCHEMA_PATH}). + In upgrade mode (-U), if source path is not explicitly set via -P: + - Source automatically defaults to template: ${DEFAULT_UPGRADE_SOURCE_PATH} + - Target remains as specified (or default) + Use -P to explicitly override source path in any mode. + +Examples: + # Watch mode with defaults + $0 + + # One-shot mode with custom paths + $0 --mode oneshot --schema-target-path /opt/solr/schema.xml + + # Upgrade mode: apply metadata to template schema + $0 --mode oneshot --upgrade + + # Upgrade mode with custom template location + $0 --mode oneshot --upgrade --schema-source-path /custom/template/schema.xml + + # Upgrade mode with custom template via environment + SCHEMA_SOURCE_PATH=/custom/template.xml $0 --mode oneshot --upgrade + + # Watch mode that waits for services to be ready with custom retry settings + $0 --startup-check wait --wait-retry-period 10 --wait-max-retries 30 + + # Using environment variables + MODE=oneshot SOLR_CORE=mycore $0 + + # With Solr authentication from environment + SOLR_USERNAME=admin SOLR_PASSWORD=secret $0 + + # With secrets from files + SOLR_USERNAME_FILE=/run/secrets/solr_user SOLR_PASSWORD_FILE=/run/secrets/solr_pass $0 + + # With Dataverse bearer token + DATAVERSE_BEARER_TOKEN=\$(cat /run/secrets/dv_token) $0 +EOF + exit 0 +} + +# Check for required commands +check_cli_utils() { + local missing=() + + if ! command -v sha256sum >/dev/null 2>&1; then + missing+=("sha256sum") + fi + + if ! command -v curl >/dev/null 2>&1; then + missing+=("curl") + fi + + if ! command -v diff >/dev/null 2>&1; then + missing+=("diff") + fi + + if ! command -v flock >/dev/null 2>&1; then + missing+=("flock") + fi + + if [[ ${#missing[@]} -gt 0 ]]; then + log_error "Missing required commands: ${missing[*]}" + log_error "Please install the missing CLI utilities" + return 1 + fi + + log_info "All required CLI utilities are available" + return 0 +} + +# Check if update-fields.sh script exists and is executable +check_update_script() { + if [[ ! -f "${UPDATE_FIELDS_SCRIPT}" ]]; then + log_error "Update fields script not found: ${UPDATE_FIELDS_SCRIPT}" + return 1 + fi + + if [[ ! -x "${UPDATE_FIELDS_SCRIPT}" ]]; then + log_error "Update fields script is not executable: ${UPDATE_FIELDS_SCRIPT}" + log_error "Run: chmod +x ${UPDATE_FIELDS_SCRIPT}" + return 1 + fi + + log_info "Update fields script found and executable: ${UPDATE_FIELDS_SCRIPT}" + return 0 +} + +# Check read/write permissions +check_permissions() { + local schema_dir + + # Validate that source schema exists (if given by user) + if [[ "${SCHEMA_SOURCE_PATH}" != "${SCHEMA_TARGET_PATH}" ]]; then + if [[ ! -f "${SCHEMA_SOURCE_PATH}" ]]; then + log_error "Source Schema not found: ${SCHEMA_SOURCE_PATH}" + log_error "Please ensure the template schema exists or use -P to specify a different location" + return 1 + fi + if [[ ! -r "${SCHEMA_SOURCE_PATH}" ]]; then + log_error "Source Schema is not readable: ${SCHEMA_SOURCE_PATH}" + return 1 + fi + fi + + # Check schema directory is writable (for creating backups and updating schema) + schema_dir="$(dirname "${SCHEMA_TARGET_PATH}")" + if [[ ! -d "${schema_dir}" ]]; then + log_error "Target Schema directory does not exist: ${schema_dir}" + return 1 + fi + if [[ ! -w "${schema_dir}" ]]; then + log_error "Target Schema directory is not writable: ${schema_dir}" + return 1 + fi + log_info "Target Schema directory exists and is writable: ${schema_dir}" + + # If schema file exists, check if it's readable and writable + if [[ -f "${SCHEMA_TARGET_PATH}" ]]; then + if [[ ! -r "${SCHEMA_TARGET_PATH}" ]]; then + log_error "Target Schema file is not readable: ${SCHEMA_TARGET_PATH}" + return 1 + fi + if [[ ! -w "${SCHEMA_TARGET_PATH}" ]]; then + log_error "Target Schema file is not writable: ${SCHEMA_TARGET_PATH}" + return 1 + fi + log_info "Target Schema file is readable and writable: ${SCHEMA_TARGET_PATH}" + + # We already checked for the source to exist, so we will copy it later on + elif [[ "${SCHEMA_SOURCE_PATH}" != "${SCHEMA_TARGET_PATH}" ]]; then + log_warn "Target Schema file does not exist yet: ${SCHEMA_TARGET_PATH}" + log_info "Will be created on first update." + + else + log_warn "Target Schema file does not exist: ${SCHEMA_TARGET_PATH}" + return 1 + fi + + return 0 +} + +# Acquire exclusive lock on schema file operations +acquire_schema_lock() { + local lock_file="${WORK_DIR}/schema.lock" + + log_info "Acquiring lock on schema operations (timeout: ${LOCK_TIMEOUT}s)" + + # Open file descriptor for lock file + exec {LOCK_FD}>"${lock_file}" || { + log_error "Failed to open lock file: ${lock_file}" + return 1 + } + + # Try to acquire exclusive lock with timeout + if ! flock -x -w "${LOCK_TIMEOUT}" "${LOCK_FD}"; then + log_error "Failed to acquire lock within ${LOCK_TIMEOUT} seconds" + exec {LOCK_FD}>&- 2>/dev/null || true + unset LOCK_FD + return 1 + fi + + log_info "Lock acquired successfully" + return 0 +} + +# Release schema lock +release_schema_lock() { + if [[ -n "${LOCK_FD}" ]]; then + log_info "Releasing schema lock" + exec {LOCK_FD}>&- 2>/dev/null || true + LOCK_FD="" + fi +} + +# Cleanup function +cleanup() { + log_info "Shutting down..." + release_schema_lock + exit 0 +} + +# Set up signal handlers (all necessary functions have been setup beforehand) +trap cleanup SIGTERM SIGINT SIGQUIT + +# Initialize working directory +init_work_dir() { + if ! mkdir -p "${WORK_DIR}"; then + log_error "Failed to create working directory: ${WORK_DIR}" + return 1 + fi + + if [[ ! -w "${WORK_DIR}" ]]; then + log_error "Working directory is not writable: ${WORK_DIR}" + return 1 + fi + + log_info "Working directory ready: ${WORK_DIR}" + return 0 +} + +# Check if an endpoint is reachable +check_endpoint() { + local url="$1" + local name="$2" + local auth_header="${3:-}" + + local curl_opts=(-sf --max-time 5) + + if [[ -n "${auth_header}" ]]; then + curl_opts+=(-H "${auth_header}") + fi + + if curl "${curl_opts[@]}" "${url}" >/dev/null 2>&1; then + log_info "${name} is reachable: ${url}" + return 0 + else + log_error "${name} is not reachable: ${url}" + return 1 + fi +} + +# Check Solr status endpoint +check_solr_status() { + local status_url="${SOLR_URL}/solr/${SOLR_CORE}/admin/ping" + check_endpoint "${status_url}" "Solr core (${SOLR_CORE})" "${SOLR_AUTH_HEADER}" +} + +# Check Dataverse API status +check_dataverse_status() { + local status_url="${DATAVERSE_URL}/api/admin/settings" + check_endpoint "${status_url}" "Dataverse API" "${DATAVERSE_AUTH_HEADER}" +} + +# Perform startup checks with configured behavior +perform_startup_checks() { + local all_ok=true + + log_info "Performing startup checks (mode: ${STARTUP_CHECK})" + + case "${STARTUP_CHECK}" in + wait) + log_info "Waiting for services to be ready..." + + # Check once with output to show URLs (always check both) + check_solr_status + local solr_ok=$? + check_dataverse_status + local dataverse_ok=$? + + if [[ ${solr_ok} -eq 0 && ${dataverse_ok} -eq 0 ]]; then + log_info "All services are ready" + return 0 + fi + + # Services not ready, enter retry loop + local retry_count=1 + while [[ ${retry_count} -lt ${WAIT_MAX_RETRIES} ]]; do + all_ok=true + + local status_msg="" + if ! check_solr_status >/dev/null 2>&1; then + all_ok=false + status_msg="Solr: not ready" + else + status_msg="Solr: ready" + fi + + if ! check_dataverse_status >/dev/null 2>&1; then + all_ok=false + status_msg="${status_msg}, Dataverse: not ready" + else + status_msg="${status_msg}, Dataverse: ready" + fi + + if [[ "${all_ok}" == "true" ]]; then + log_info "All services are ready" + return 0 + fi + + retry_count=$((retry_count + 1)) + log_info "${status_msg} (attempt ${retry_count}/${WAIT_MAX_RETRIES})" + sleep "${WAIT_RETRY_PERIOD}" + done + + log_error "Services did not become ready after ${WAIT_MAX_RETRIES} attempts" + return 1 + ;; + + warn) + if ! check_solr_status; then + log_warn "Solr status check failed, but continuing due to startup-check=warn" + all_ok=false + fi + + if ! check_dataverse_status; then + log_warn "Dataverse status check failed, but continuing due to startup-check=warn" + all_ok=false + fi + + if [[ "${all_ok}" == "false" ]]; then + log_warn "Some startup checks failed, continuing anyway" + fi + return 0 + ;; + + fail) + if ! check_solr_status; then + return 1 + fi + + if ! check_dataverse_status; then + return 1 + fi + + log_info "All startup checks passed" + return 0 + ;; + + *) + log_error "Invalid startup check mode: ${STARTUP_CHECK}" + return 1 + ;; + esac +} + +# Fetch metadata fields from Dataverse API +fetch_metadata_fields() { + local output_file="$1" + local url="${METADATA_ENDPOINT}" + + log_verbose "Fetching metadata fields from ${METADATA_ENDPOINT}" + + local curl_opts=(-sf -o "${output_file}") + + # Add authentication header if configured + if [[ -n "${DATAVERSE_AUTH_HEADER}" ]]; then + curl_opts+=(-H "${DATAVERSE_AUTH_HEADER}") + fi + + if ! curl "${curl_opts[@]}" "${url}"; then + log_error "Failed to fetch metadata fields from Dataverse API" + return 1 + fi + + # Verify we got XML content + if ! grep -q "/dev/null && ! grep -q "/dev/null; then + log_error "Response does not appear to be valid XML" + return 1 + fi + + log_verbose "Metadata fields saved to ${output_file}" + return 0 +} + +# Calculate checksum of metadata +calculate_metadata_checksum() { + local file="$1" + sha256sum "${file}" | awk '{print $1}' +} + +# Apply field definitions using update-fields.sh +apply_field_definitions() { + local metadata_file="$1" + local target_schema="$2" + + log_info "Applying field definitions using ${UPDATE_FIELDS_SCRIPT}" + + # Use source schema as base for updates + # NOTE: By default, SCHEMA_SOURCE_PATH == SCHEMA_TARGET_PATH + # NOTE: target_schema != SCHEMA_TARGET_PATH, as we want to work on a copy! + if [[ -f "${SCHEMA_SOURCE_PATH}" ]]; then + log_info "Using base schema file from ${SCHEMA_SOURCE_PATH}" + cp "${SCHEMA_SOURCE_PATH}" "${target_schema}" + else + log_error "No base schema file ${SCHEMA_SOURCE_PATH} found" + return 1 + fi + + # Run the update script + if ! "${UPDATE_FIELDS_SCRIPT}" "${target_schema}" "${metadata_file}"; then + log_error "Failed to apply field definitions" + return 1 + fi + + log_info "Field definitions applied successfully" + return 0 +} + +# Check if schema has changes +schema_has_changes() { + local current_schema="$1" + local new_schema="$2" + + if [[ ! -f "${current_schema}" ]]; then + log_warn "Current schema not found, treating as changed" + return 0 + fi + + if diff -q "${current_schema}" "${new_schema}" > /dev/null 2>&1; then + log_info "No changes detected in schema" + return 1 + fi + + log_info "Schema changes detected" + return 0 +} + +generate_backup_filename() { + local schema_file="$1" + # shellcheck disable=2155 + local timestamp="$(date +'%Y%m%d_%H%M%S')" + local backup_file="${schema_file}.backup.${timestamp}" + echo "$backup_file" +} + +# Backup current schema +backup_schema() { + local schema_file="$1" + local backup_file="$2" + + if [[ ! -f "${schema_file}" ]]; then + log_warn "No existing schema to backup" + return 0 + fi + + log_info "Backing up schema ${schema_file} to ${backup_file}" + if ! cp "${schema_file}" "${backup_file}"; then + log_error "Failed to backup schema" + return 1 + fi + + return 0 +} + +# Replace schema file (must be called with lock held) +replace_schema() { + local new_schema="$1" + local target_schema="$2" + + log_info "Replacing schema file" + if ! cp "${new_schema}" "${target_schema}"; then + log_error "Failed to replace schema file" + return 1 + fi + + log_info "Schema file replaced successfully" + return 0 +} + +# Reload Solr core using v2 API +reload_solr_core() { + # Using Solr API v2 style here! + local reload_url="${SOLR_URL}/api/cores/${SOLR_CORE}/reload" + local response_file="${WORK_DIR}/solr_reload_response.json" + local http_code + + log_info "Reloading Solr core: ${SOLR_CORE}" + log_info "Using Solr v2 API: ${reload_url}" + + local curl_opts=(-sf -w "%{http_code}" -o "${response_file}" -X POST -H 'Content-type: application/json') + + # Add authentication if configured + if [[ -n "${SOLR_AUTH_HEADER}" ]]; then + curl_opts+=(-H "${SOLR_AUTH_HEADER}") + fi + + http_code=$(curl "${curl_opts[@]}" "${reload_url}" 2>/dev/null || echo "000") + + if [[ "${http_code}" != "200" ]]; then + log_error "Failed to reload Solr core (HTTP ${http_code})" + + # Try to extract error details from response + if [[ -f "${response_file}" && -s "${response_file}" ]]; then + log_error "Solr response:" + + # Try to pretty-print JSON if possible, otherwise dump raw + if command -v jq >/dev/null 2>&1; then + jq '.' "${response_file}" 2>/dev/null | while IFS= read -r line; do + log_error " ${line}" + done + else + while IFS= read -r line; do + log_error " ${line}" + done < "${response_file}" + fi + + # Try to extract specific error message + if command -v grep >/dev/null 2>&1; then + local error_msg + error_msg=$(grep -o '"msg":"[^"]*"' "${response_file}" 2>/dev/null | sed 's/"msg":"\(.*\)"/\1/' || true) + if [[ -n "${error_msg}" ]]; then + log_error "Error message: ${error_msg}" + fi + fi + else + log_error "No response received from Solr" + fi + + return 1 + fi + + # Check response status + if [[ -f "${response_file}" ]]; then + local status + status=$(grep -o '"status":[0-9]*' "${response_file}" 2>/dev/null | cut -d':' -f2 || echo "") + + if [[ -n "${status}" && "${status}" != "0" ]]; then + log_error "Solr returned non-zero status: ${status}" + log_error "Full response:" + while IFS= read -r line; do + log_error " ${line}" + done < "${response_file}" + return 1 + fi + fi + + log_info "Solr core reloaded successfully" + return 0 +} + +# Restore schema from backup (must be called with lock held) +restore_schema() { + local backup_file="$1" + local target_schema="$2" + + log_warn "Restoring schema from backup: ${backup_file}" + + if [[ ! -f "${backup_file}" ]]; then + log_error "Backup file not found: ${backup_file}" + return 1 + fi + + if ! cp "${backup_file}" "${target_schema}"; then + log_error "Failed to restore schema from backup" + return 1 + fi + + log_info "Schema restored successfully" + reload_solr_core || log_error "Failed to reload Solr after restoration" + return 0 +} + +# Process schema update (steps 2-4, optionally 5) +process_schema_update() { + local metadata_file="${1:-${WORK_DIR}/metadata_fields.xml}" + local reload_solr="${2:-ignore}" + local new_schema="${WORK_DIR}/schema.xml.new" + local backup_file="" + local update_success=false + + # Step 2b: Apply downloaded field definitions to a schema file + if ! apply_field_definitions "${metadata_file}" "${new_schema}"; then + return 1 + fi + + # Step 3: Check for changes + if ! schema_has_changes "${SCHEMA_TARGET_PATH}" "${new_schema}"; then + log_info "No update needed" + return 0 + fi + + # Acquire lock for critical section (backup, replace, reload) + if ! acquire_schema_lock; then + log_error "Failed to acquire schema lock" + return 1 + fi + + # Critical section begins here + { + backup_file=$(generate_backup_filename "${SCHEMA_TARGET_PATH}") + # Step 4: Backup current schema + if ! backup_schema "${SCHEMA_TARGET_PATH}" "${backup_file}"; then + release_schema_lock + return 1 + fi + + # Replace schema + if ! replace_schema "${new_schema}" "${SCHEMA_TARGET_PATH}"; then + release_schema_lock + return 1 + fi + + # Step 5: Reload Solr (only in watch or upgrade mode) + if [[ "${reload_solr}" == "reload" ]]; then + if ! reload_solr_core; then + log_error "Solr reload failed, attempting to restore backup" + if [[ -n "${backup_file}" ]]; then + restore_schema "${backup_file}" "${SCHEMA_TARGET_PATH}" + fi + release_schema_lock + return 1 + fi + fi + + update_success=true + } + # Critical section ends here + + release_schema_lock + + if [[ "${update_success}" == "true" ]]; then + log_info "Schema update completed successfully" + return 0 + else + return 1 + fi +} + +# One-shot mode +run_oneshot() { + log_info "Running in oneshot mode" + + # In oneshot, default to not reload Solr. But if upgrading, we want to reload. + local reload_solr="ignore" + if [[ "${UPGRADE_MODE}" == "true" ]]; then + log_info "Will attempt to RELOAD Solr after upgrading the schema." + reload_solr="reload" + fi + + # Step 2a: Download field definitions + local metadata_file="${WORK_DIR}/metadata_fields.xml" + if ! fetch_metadata_fields "${metadata_file}"; then + log_error "Oneshot execution failed" + return 1 + fi + + # Steps 2b, 3, 4 and 5 + if process_schema_update "${metadata_file}" "$reload_solr"; then + log_info "Oneshot execution completed successfully" + return 0 + else + log_error "Oneshot execution failed" + return 1 + fi +} + +# Watch mode +run_watch() { + log_info "Running in watch mode with ${POLL_INTERVAL}s polling interval" + + local last_checksum="" + local needs_update="false" + local pending_metadata_file="" + local pending_checksum="" + + while true; do + # Only fetch metadata if we don't have a pending update + if [[ "${needs_update}" == "false" ]]; then + local metadata_file="${WORK_DIR}/metadata_fields_check.xml" + + if fetch_metadata_fields "${metadata_file}"; then + pending_checksum=$(calculate_metadata_checksum "${metadata_file}") + + if [[ -z "${last_checksum}" ]]; then + log_info "Initial metadata fetch, setting baseline" + needs_update="true" + pending_metadata_file="${metadata_file}" + elif [[ "${pending_checksum}" != "${last_checksum}" ]]; then + log_info "Metadata change detected, processing schema update" + needs_update="true" + pending_metadata_file="${metadata_file}" + else + log_verbose "No metadata changes detected" + fi + else + log_error "Failed to fetch metadata fields, will retry" + fi + else + log_info "Pending update not yet applied, retrying without re-fetching metadata" + fi + + # Process pending update if needed + if [[ "${needs_update}" == "true" && -n "${pending_metadata_file}" ]]; then + if process_schema_update "${pending_metadata_file}" "reload"; then + # Update successful - use the stored checksum + last_checksum="${pending_checksum}" + needs_update="false" + pending_metadata_file="" + pending_checksum="" + else + log_error "Schema update failed, will retry on next cycle" + # Keep needs_update="true", pending_metadata_file, and pending_checksum intact for retry + fi + fi + + # Sleep until next check is due + sleep "${POLL_INTERVAL}" + done +} + +# Main +main() { + # Parse arguments + while [[ $# -gt 0 ]]; do + case "$1" in + -m|--mode) + MODE="$2" + shift 2 + ;; + -d|--dataverse-url) + DATAVERSE_URL="$2" + shift 2 + ;; + -s|--solr-url) + SOLR_URL="$2" + shift 2 + ;; + -c|--core) + SOLR_CORE="$2" + shift 2 + ;; + -p|--schema-target-path) + SCHEMA_TARGET_PATH="$2" + shift 2 + ;; + -P|--schema-source-path) + SCHEMA_SOURCE_PATH="$2" + SCHEMA_SOURCE_PATH_SET_BY_USER="true" + shift 2 + ;; + -u|--update-script) + UPDATE_FIELDS_SCRIPT="$2" + shift 2 + ;; + -i|--interval) + POLL_INTERVAL="$2" + shift 2 + ;; + -w|--work-dir) + WORK_DIR="$2" + shift 2 + ;; + -U|--upgrade) + UPGRADE_MODE="true" + shift + ;; + -k|--startup-check) + STARTUP_CHECK="$2" + shift 2 + ;; + -t|--lock-timeout) + LOCK_TIMEOUT="$2" + shift 2 + ;; + --wait-retry-period) + WAIT_RETRY_PERIOD="$2" + shift 2 + ;; + --wait-max-retries) + WAIT_MAX_RETRIES="$2" + shift 2 + ;; + -v|--verbose) + VERBOSE="true" + shift + ;; + -h|--help) + usage + ;; + *) + log_error "Unknown option: $1" + usage + ;; + esac + done + + # Validate startup check mode + case "${STARTUP_CHECK}" in + fail|warn|wait) + ;; + *) + log_error "Invalid startup check mode: ${STARTUP_CHECK}. Must be 'fail', 'warn', or 'wait'" + exit 1 + ;; + esac + + # Validate mode + case "${MODE}" in + watch|oneshot) + ;; + *) + log_error "Invalid mode: ${MODE}. Must be 'watch' or 'oneshot'" + exit 1 + ;; + esac + + # Set metadata endpoint based on Dataverse URL + METADATA_ENDPOINT="${DATAVERSE_URL}/api/admin/index/solr/schema" + + # Load secrets from files or environment variables + + # Dataverse authentication + # Priority 1: Bearer token (env var or file) + if [[ -n "${DATAVERSE_BEARER_TOKEN:-}" ]]; then + # Bearer token already set, use it + DATAVERSE_AUTH_HEADER="Authorization: Bearer ${DATAVERSE_BEARER_TOKEN}" + log_info "Dataverse authentication configured (Bearer Token)" + elif [[ -n "${DATAVERSE_BEARER_TOKEN_FILE:-}" ]]; then + # Bearer token file specified, try to read it + if [[ -f "${DATAVERSE_BEARER_TOKEN_FILE}" ]]; then + DATAVERSE_BEARER_TOKEN=$(cat "${DATAVERSE_BEARER_TOKEN_FILE}") + DATAVERSE_AUTH_HEADER="Authorization: Bearer ${DATAVERSE_BEARER_TOKEN}" + log_info "Dataverse authentication configured (Bearer Token from file)" + else + log_error "DATAVERSE_BEARER_TOKEN_FILE specified but file not found: ${DATAVERSE_BEARER_TOKEN_FILE}" + exit 1 + fi + # Priority 2: Unblock key (only if no bearer token) + elif [[ -n "${DATAVERSE_UNBLOCK_KEY:-}" ]]; then + # Unblock key already set, use it + DATAVERSE_AUTH_HEADER="X-Dataverse-unblock-key: ${DATAVERSE_UNBLOCK_KEY}" + log_info "Dataverse authentication configured (Unblock Key)" + elif [[ -n "${DATAVERSE_UNBLOCK_KEY_FILE:-}" ]]; then + # Unblock key file specified, try to read it + if [[ -f "${DATAVERSE_UNBLOCK_KEY_FILE}" ]]; then + DATAVERSE_UNBLOCK_KEY=$(cat "${DATAVERSE_UNBLOCK_KEY_FILE}") + DATAVERSE_AUTH_HEADER="X-Dataverse-unblock-key: ${DATAVERSE_UNBLOCK_KEY}" + log_info "Dataverse authentication configured (Unblock Key from file)" + else + log_error "DATAVERSE_UNBLOCK_KEY_FILE specified but file not found: ${DATAVERSE_UNBLOCK_KEY_FILE}" + exit 1 + fi + fi + + # Solr authentication + if [[ -n "${SOLR_USERNAME_FILE:-}" && -f "${SOLR_USERNAME_FILE}" ]]; then + SOLR_USERNAME=$(cat "${SOLR_USERNAME_FILE}") + fi + if [[ -n "${SOLR_PASSWORD_FILE:-}" && -f "${SOLR_PASSWORD_FILE}" ]]; then + SOLR_PASSWORD=$(cat "${SOLR_PASSWORD_FILE}") + fi + + if [[ -n "${SOLR_USERNAME:-}" && -n "${SOLR_PASSWORD:-}" ]]; then + SOLR_AUTH_HEADER="Authorization: Basic $(echo -n "${SOLR_USERNAME}:${SOLR_PASSWORD}" | base64 | tr -d '\n')" + log_info "Solr authentication configured (HTTP Basic)" + fi + + # Handle schema source and upgrade mode + + # If the schema source has not been explicitly set by the user (independent of any mode), + # but the target path has been, make sure to make them the same! + if [[ "${SCHEMA_SOURCE_PATH_SET_BY_USER}" == "false" && "${SCHEMA_TARGET_PATH}" != "${DEFAULT_SCHEMA_PATH}" ]]; then + SCHEMA_SOURCE_PATH="${SCHEMA_TARGET_PATH}" + fi + + # Validate upgrade mode restrictions + if [[ "${UPGRADE_MODE}" == "true" && "${MODE}" == "watch" ]]; then + log_error "Upgrade mode (-U|--upgrade) is only allowed in oneshot mode" + log_error "Please use: --mode oneshot --upgrade" + exit 1 + fi + + # Handle upgrade mode: override source path if not explicitly set by user + if [[ "${UPGRADE_MODE}" == "true" && "${SCHEMA_SOURCE_PATH_SET_BY_USER}" == "false" ]]; then + log_info "Upgrade mode enabled: using template schema as source" + SCHEMA_SOURCE_PATH="${DEFAULT_UPGRADE_SOURCE_PATH}" + fi + + # Log config info, then run preflight checks + + log_info "Starting Solr Driver for Dataverse Metadata Schemas" + log_info "Mode: ${MODE}" + if [[ "${UPGRADE_MODE}" == "true" ]]; then + log_info "Upgrade Mode: ENABLED" + log_info "Schema Source Path: ${SCHEMA_SOURCE_PATH}" + fi + log_info "Dataverse API: ${METADATA_ENDPOINT}" + log_info "Solr URL: ${SOLR_URL}" + log_info "Solr Core: ${SOLR_CORE}" + log_info "Schema Target Path: ${SCHEMA_TARGET_PATH}" + if [[ "${UPGRADE_MODE}" != "true" && "${SCHEMA_SOURCE_PATH}" != "${SCHEMA_TARGET_PATH}" ]]; then + log_info "Schema Source Path: ${SCHEMA_SOURCE_PATH}" + fi + log_info "Update Script: ${UPDATE_FIELDS_SCRIPT}" + log_info "Work Directory: ${WORK_DIR}" + log_info "Startup Check Mode: ${STARTUP_CHECK}" + log_info "Lock Timeout: ${LOCK_TIMEOUT}s" + if [[ "${STARTUP_CHECK}" == "wait" ]]; then + log_info "Wait Retry Period: ${WAIT_RETRY_PERIOD}s" + log_info "Wait Max Retries: ${WAIT_MAX_RETRIES}" + fi + + # Pre-flight checks + log_info "Running pre-flight checks..." + + if ! check_cli_utils; then + exit 1 + fi + + if ! check_update_script; then + exit 1 + fi + + if ! check_permissions; then + exit 1 + fi + + if ! init_work_dir; then + exit 1 + fi + + if ! perform_startup_checks; then + exit 1 + fi + + log_info "All pre-flight checks passed" + + # Run appropriate mode + case "${MODE}" in + watch) + run_watch + exit $? + ;; + oneshot) + run_oneshot + exit $? + ;; + esac +} + +main "$@" diff --git a/doc/release-notes/6.11-release-notes.md b/doc/release-notes/6.11-release-notes.md new file mode 100644 index 00000000000..0c9fcccd161 --- /dev/null +++ b/doc/release-notes/6.11-release-notes.md @@ -0,0 +1,289 @@ +# Dataverse 6.11 + +Please note: To read these instructions in full, please go to https://github.com/IQSS/dataverse/releases/tag/v6.11 rather than the [list of releases](https://github.com/IQSS/dataverse/releases), which will cut them off. + +This release brings new features, enhancements, and bug fixes to Dataverse. Thank you to all of the community members who contributed code, suggestions, bug reports, and other assistance across the project! + +## Release Highlights + +Highlights for Dataverse 6.11 include: + +- Locally FAIR data +- New expanded geospatial metadata block +- Submit for Review improvements +- Show historical file access requests +- Local reviews +- New and improved APIs +- Bug fixes + +### Locally FAIR Data + +This release includes experimental support for "Locally FAIR" data. When enabled, this feature allows publication of content that will be visible only to authorized users or groups within a Dataverse installation. Users without authorization will not see the Locally FAIR collections, datasets, or files in search results and cannot visit their pages or access them via the Dataverse API. + +For more information, see [Locally FAIR Data](https://guides.dataverse.org/en/6.11/user/dataverse-management.html#locally-fair) in the guides and #12319. + +### New Expanded Geospatial Metadata Block + +This release introduces a major enhancement to geospatial data support with the addition of an expanded Geospatial Metadata Block, designed to improve how researchers describe, share, and discover geospatial datasets. + +This new expanded metadata block aligns with the international ISO 19115 standard for describing geographic data. + +- Includes 22 new metadata fields to provide more detailed and standardized descriptions of geospatial data (e.g. vector, raster, mixed or multi-format geospatial collections) +- Incorporates and extends current metadata elements (Geographic Coverage and Geographic Bounding Box) +- Ensures backward compatibility while significantly improving metadata description capabilities + +See [the guides](https://guides.dataverse.org/en/6.11/user/appendix.html#supported-metadata), #10398, and #11507. + +### Submit for Review Improvements + +- A disclaimer message and custom text can be added to the "Submit for Review" dialog, similar to Publish disclaimer messages. See [:SubmitForReviewDatasetDisclaimerText](https://guides.dataverse.org/en/6.11/installation/config.html#submitforreviewdatasetdisclaimertext), [:DatasetSubmitForReviewPopupCustomText](https://guides.dataverse.org/en/6.11/installation/config.html#datasetsubmitforreviewpopupcustomtext), #12325 and #12373. +- When `requireFilesToPublishDataset` (added in #10994 in Dataverse 6.6) is set on a collection, a dataset must contain files for the Submit for Review button to appear. As before, publishing is also prevented if no files are present. See #12258 and #12266. + +### Show Historical File Access Requests + +A new checkbox called "Show Historical Requests" has been added to the permissions page for a data file. Checking the box will show the number of times each user has been granted or denied access. See #8013 and #12012. + +### Local Reviews + +Datasets can have local reviews, listable via API. A local review is a review dataset ("review" for short) that points at the URL form of a persistent ID of a dataset (e.g. itemReviewedUrl:https://doi.org/10.5072/FK2/ABCDEF) that is in the same Dataverse installation. Local reviews of a dataset can be listed via API (and we plan to build a UI for it some day). + +A new metadata block called "Trusted Data Dimensions and Intensities" has been added for testing. This is described in the setup instructions for review datasets. + +If you set `dataverse.feature.croissant-with-local-reviews` to true, local reviews will appear in the `croissant` and `croissantSlim` metadata export formats for any dataset that has local reviews. This feature is experimental, which is why it is hidden behind a feature flag. + +See the guides for the new [list reviews](https://guides.dataverse.org/en/6.11/api/native-api.html#list-reviews) API endpoint, #12313, #12314, and #12425. + +## Features Added + +These are features that weren't already mentioned under "highlights" above. + +### Original Tabular File Format Shown (Rather Than .tab) + +In previous releases of Dataverse, the file, dataset, and collection pages showed the .tab version of tabular files (a plain text preservation-friendly copy). This has been changed to show the original format instead (e.g. Excel, Stata, etc.). The .tab version is still available from the download menu. See #7956 and #12145. + +### Croissant 1.1 (Summary Statistics) + +The Croissant metadata export format has been updated from version 1.0 to 1.1. + +Summary statistics (mean, min, max, etc.) are now included for tabular files that were successfully ingested. + +You can download an example Croissant file from the [Supported Metadata Export Formats](https://guides.dataverse.org/en/6.11/user/dataset-management.html#supported-metadata-export-formats) section of the guides. + +Minor backward-incompatible changes were made, which are noted below. See #12014 and #12214. + +### Local Contexts Improvements + +For instances that have enabled the Local Contexts integration, Dataverse will now add rights information related to the Notices and Labels from a Local Contexts Project associated with a dataset to the metadata sent to DataCite (when using DataCite DOIs) and available in metadata exports (DataCite, OAI-ORE, and JSON). + +In addition, it is now possible to use non-string values in the retrieval-filtering context entries for external vocabulary scripts. This can be used to allow filtered JSON that is not valid JSON-LD to be included in the OAI_ORE JSON-LD metadata export in a way that JSON-LD parsers will accept (and not ignore/drop). The OAI_ORE export version has been updated to 1.0.3 with this change. See the [guides](https://guides.dataverse.org/en/6.11/installation/localcontexts.html#configuration) and #12224. + +### Improved Support for Handles as Persistent IDs + +The following issues were addressed in #12366: + +- When PID registration of persistent IDs for files is enabled, Dataverse will create the handle as soon as the file is created (similar to other persistent ID providers) (issue #12174); +- When a new handle is created for a dataset or file that is still a draft, it will be reserved and registered, but not visible publicly. The handle will become visible and the redirects will start working once it is published. This is also in line with how DOI providers work (issue #8881). + +## Bug Fixes + +- We fixed a defect that caused upload of files larger than 1 GB to fail silently for S3 storage, unless direct upload was used. See #12358. +- When using the Access API to download files from a dataset that has a guestbook, and when using the dataset's :persistentId instead of its database id, the returned signedUrl immediately failed with a 401 error. This bug was fixed so that the signedUrl correctly returns the files. Using the dataset's and datafile's database ids, instead of its :persistentId, was unaffected by this bug and continues to work. See #12245 and #12363. +- When the "modifyRegistrationMetadata" API was used, some datasets with UTF-8 characters were being unnecessarily updated in DataCite. This has been resolved by switching from the DataCite MDS API to its JSON API. See the upgrade instructions below, #12070, and #12270. +- Fixed a problem with S3 direct upload to datasets that use lower- or mixed-case PID authority/identifier in the database and to datasets using an alternative identifier for file storage. See #12268. +- The bug "Search API doesn't return image_url after newly created dataset is published" was fixed in the following manner. The dataset thumbnail will be created automatically when a dataset is published under the following conditions: the dataset has no existing thumbnail; the dataset has image files that can be converted to a thumbnail the Feature Flag "disable-dataset-thumbnail-autoselect" is not enabled. See #11588 and #12403. +- A bug in the BagPack generator, which caused the export to fail for datasets with multiple Contact Points, of which some had no name while others did, has been fixed. See #12246. +- Two guestbook API bugs were fixed. First, missing "gbrids" in the signed URL query parameter list will no longer include "&gbrids=" without a value. Second, when a user attempting to download files with a guestbook response has no API token but is authenticated by bearer token, a temporary api token will be generated with an expiration of 1 minute which is used for signing and decoding the signed URL. See #12267, #12277, and #12279 +- We fixed a timing issue fixed where user only had a few seconds instead of a minute to call the file download API after POSTing the guestbook response. See #12340 and #12341. + +## Other Changes + +- A [list of Dataverse features](https://guides.dataverse.org/en/6.11/admin/features.html) has been added to the guides, thanks to the [Documentation Working Group](https://www.gdcc.io/working-groups/documentation.html) over many meetings (recorded, if you're interested!). Historically, the list of features has been maintained at [dataverse.org/software-features](https://dataverse.org/software-features) but by moving [the list](https://guides.dataverse.org/en/6.11/admin/features.html) into the guides, we hope to keep it up-to-date with help from the community. To make improvements, please feel free to open issues, make pull requests, [discuss](https://dataverse.zulipchat.com/#narrow/channel/446770-docs/topic/list.20of.20features/with/604499830) in Zulip, and join a Documentation Working Group meeting! See #11998 and #12000. +- The [Big Data Support](https://guides.dataverse.org/en/6.11/installation/big-data-support.html) page has been moved from the Developer Guide to the Installation Guide. See #12151. +- The deaccession reason "Not a valid dataset." has been changed to "Not valid. This dataset does not comply with repository policies." See the Bundle.properties key file.deaccessionDialog.reason.selectItem.notValid and #11985. + +## API Updates + +- New APIs have been added called [Update the Metadata and Instructions of a Template](https://guides.dataverse.org/en/6.11/api/native-api.html#update-the-metadata-and-instructions-of-a-template), [Update the License or Terms Of Use of a Template](https://guides.dataverse.org/en/6.11/api/native-api.html#update-the-license-or-terms-of-use-of-a-template), and [Update the Terms Of Access of a Template](https://guides.dataverse.org/en/6.11/api/native-api.html#update-the-terms-of-access-of-a-template). See #11912 and #12159. +- A new API endpoint for getting the default contributor role of a collection was added. See [the guides](https://guides.dataverse.org/en/6.11/api/native-api.html#get-default-role-assigned-to-user-creating-a-dataset-in-a-dataverse-collection) and #12112. +- A new API has been added to get citations in various formats such as EndNote, RIS, BibTeX, and CSL. Previously this functionality was only available via the UI. See [the guides](https://guides.dataverse.org/en/6.11/api/dataaccess.html#citation-get-citation-in-other-formats), #11733 and #12238. +- An API endpoint was added that allows the user to make edits to an existing Guestbook, including adding and removing custom guestbook questions: `curl -X PUT -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/guestbooks/{ID}" -d "$JSON"`. Another API was added that allows the user to retrieve guestbook responses for a specific guestbook within a collection. Optional pagination parameters can be added to limit the number of results, as this can get very large: `curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/guestbooks/$ID/responses?limit10&offset=0"`. See #12386 and #12395. +- The API endpoint `/api/guestbooks/{dataverseAlias}/list` can now include `"usageCount":#` and `"responseCount":#` in the response by adding the query param "includeStats=true". See [the guides](https://guides.dataverse.org/en/6.11/api/native-api.html#get-a-list-of-guestbooks-for-a-dataverse-collection), #12260, and #12269. +- A new query parameter (`ignoreSettingExcludeEmailFromExport`) has been added to the ["View a Dataverse Collection"](https://guides.dataverse.org/en/6.11/api/native-api.html#view-a-dataverse-collection) API (`/api/dataverses/{collectionId}`). This query parameter prevents the contact emails from being excluded when the setting `:ExcludeEmailFromExport` is set to true and the user has EditDataverse permissions. See #12194 and #12195. +- All API endpoints that return information about role assignments (such as `/api/dataverses/$ID/assignments`) now include additional fields in their JSON responses: `assigneeName`, `roleDescription`, `definitionPointName`, `definitionPointType`, and `definitionPointGlobalId` (if available). See #11920. +- The `/datafile/{id}/listRequests` API has been extended to show request history. See [the guides](https://guides.dataverse.org/en/6.11/api/dataaccess.html#list-file-access-requests), #8013 and #12012. +- "includeInherited" was added as a query parameter of `/api/guestbooks/{ID}/list?includeInherited=true` to return the collection's guestbooks as well as the guestbooks of the collections' hierarchical owners. Also the "EditDataverse" permission requirement to get the list was removed. See #12323 and #12326. +- Guestbook response parsing now allows a textarea value to be a string along with an array. See #12446 and #12447. +- The classes TestIngest (#12415) and Meta (#12417) were removed as they were no longer being used. + +## Security Updates + +This release contains important security updates. If you are not receiving security advisories, please sign up by following [the steps](https://guides.dataverse.org/en/latest/installation/config.html#ongoing-security-of-your-installation) in the guides. + +As we [announced](https://groups.google.com/g/dataverse-community/c/wqSQYH_fSlA/m/6xcGh0CnAgAJ) on the mailing list, we are experimenting with sending security advisories via Mailchimp. The sign up steps are the same. + +We would like to thank Pablo Picurelli Ortiz, Josh Dow ([Klogix](https://www.klogixsecurity.com/)) and Steven Adair ([Volexity](https://www.volexity.com)) for notifying us about vulnerabilities that were fixed in this release. + +## Backward Incompatible Changes + +Generally speaking, see the [API Changelog](https://guides.dataverse.org/en/latest/api/changelog.html) for a list of backward-incompatible API changes. + +- The `GET /api/access/datafile/{id}/userPermissions` endpoint now requires authentication. +- The `GET /api/mydata/retrieve` endpoint, if the search returns no data, now includes the "data" block with 0 results. The message that was returned in "error_message" will be returned in "message" and the "success" will be `true`. All other errors will continue to reply with "success":false and the error message in "error_message". See #11447 and #12256. +- The Croissant metadata export format has been updated from version 1.0 to 1.1, which is reflected in the "conformsTo" property. The unused "wd" property has been dropped. +- All endpoints related to storage drivers have been moved out of the Admin API. See #12141 and #12182. + - The GET, PUT, and DELETE endpoints for `/api/admin/dataverse/{alias}/storageDriver` have been moved to `/api/dataverses/{alias}/storageDriver`. Write operations continue to be accessible only to superusers, while GET methods are public. + - The endpoint `/api/admin/dataverse/storageDrivers` has been made public, moved, and renamed to `/api/dataverses/{alias}/allowedStorageDrivers`. Regarding the name change, this endpoint will in the future only display the storage drivers that are allowed on the specified collection. For now, it will display the entire list of available drivers on the installation. + +## End-Of-Life (EOL) Announcements + +### PostgreSQL 13 Reached EOL on 13 November 2025 + +We mentioned this in the Dataverse [6.6](https://github.com/IQSS/dataverse/releases/tag/v6.6), [6.8](https://github.com/IQSS/dataverse/releases/tag/v6.8), [6.9](https://github.com/IQSS/dataverse/releases/tag/v6.9), and [6.10](https://github.com/IQSS/dataverse/releases/tag/v6.10) release notes, but as a reminder, according to https://www.postgresql.org/support/versioning/ PostgreSQL 13 reached EOL on 13 November 2025. As stated in the [Installation Guide](https://guides.dataverse.org/en/6.11/installation/prerequisites.html#postgresql), we recommend running PostgreSQL 16 since it is the version we test with in our continuous integration ([since](https://github.com/gdcc/dataverse-ansible/commit/8ebbd84ad2cf3903b8f995f0d34578250f4223ff) February 2025). The [Dataverse 5.4 release notes](https://github.com/IQSS/dataverse/releases/tag/v5.4) explained the upgrade process from 9 to 13 (e.g. pg_dumpall, etc.) and the steps will be similar. If you have any problems, please feel free to reach out (see "getting help" in these release notes). + +## Notes for Dataverse Installation Administrators + +### CORS Filter Fix and Docs + +We fixed an inconsistency where the `CorsFilter` was not always being invoked when accessing `/api/...` endpoints, preventing these endpoints from being used from webapps even when CORS was properly configured. In addition, the [documentation](https://guides.dataverse.org/en/6.11/installation/config.html#cross-origin-resource-sharing-cors) related to how certain Dataverse features depend on proper CORS configuration has been extended and improved. See #12151 and #12161. + +## New Settings + +### New JVM Options (MicroProfile Config Settings) + +- dataverse.feature.allow-locally-fair-data +- dataverse.feature.croissant-with-local-reviews + +### New Database Settings + +- :DatasetSubmitForReviewPopupCustomText +- :SubmitForReviewDatasetDisclaimerText + +## Updates for Documentation Writers + +The dependencies "sphinx-reredirects" and "sphinx-design" have been added. Please re-run the `pip install -r requirements.txt` setup [step](https://guides.dataverse.org/en/6.11/contributor/documentation.html#building-the-guides-with-sphinx) to update your environment. Otherwise you will see a "could not import extension" error. + +## Updates for Developers + +Developers making pull requests against the main "dataverse" project on GitHub can now see for themselves which API tests are failing, if any. Look for a GitHub Action called "Container Integration Tests Workflow". See #9916 and #12368. + +New "fast redeploy" scripts have been added for the backend. See [the guides](https://guides.dataverse.org/en/6.11/container/dev-usage.html#fast-redeploy-command-line), #10156, and [Zulip](https://dataverse.zulipchat.com/#narrow/channel/379673-dev/topic/fast.20redeploy.20with.20docker.20compose/near/553715282). Please note that this solution is being further improved in #12467. + +## Complete List of Changes + +For the complete list of code changes in this release, see the [6.11 milestone](https://github.com/IQSS/dataverse/issues?q=milestone%3A6.11+is%3Aclosed) in GitHub. + +## Getting Help + +For help with upgrading, installing, or general questions please see [getting help](https://guides.dataverse.org/en/latest/installation/intro.html#getting-help) in the Installation Guide. + +## Installation + +If this is a new installation, please follow our [Installation Guide](https://guides.dataverse.org/en/latest/installation/). Please don't be shy about [asking for help](https://guides.dataverse.org/en/latest/installation/intro.html#getting-help) if you need it! + +Once you are in production, we would be delighted to update our [map of Dataverse installations around the world](https://dataverse.org/installations) to include yours! Please [create an issue](https://github.com/IQSS/dataverse-installations/issues) or email us at support@dataverse.org to join the club! + +You are also very welcome to join the [Global Dataverse Community Consortium](https://www.gdcc.io) (GDCC). + +## Upgrade Instructions + +Upgrading requires a maintenance window and downtime. Please plan accordingly, create backups of your database, etc. + +Note: These instructions assume that you are upgrading from the immediate previous version. That is to say, you've already upgraded through all the 6.x releases and are now running Dataverse 6.10.1. See [tags on GitHub](https://github.com/IQSS/dataverse/tags) for a list of versions. If you are running an earlier version, the only supported way to upgrade is to progress through the upgrades to all the releases in between before attempting the upgrade to this version. + +If you are running Payara as a non-root user (and you should be!), **remember not to execute the commands below as root**. By default, Payara runs as the `dataverse` user. In the commands below, we use sudo to run the commands as a non-root user. + +Also, we assume that Payara is installed in `/usr/local/payara7`. If not, adjust as needed. + +1. List deployed applications. + + `/usr/local/payara7/bin/asadmin list-applications` + +1. Undeploy the previous version (should match "list-applications" above). + + `/usr/local/payara7/bin/asadmin undeploy dataverse-6.10.1` + +1. Deploy the Dataverse 6.11 war file. + + `wget https://github.com/IQSS/dataverse/releases/download/v6.11/dataverse-6.11.war` + + `sudo -u dataverse /usr/local/payara7/bin/asadmin deploy dataverse-6.11.war` + +1. Check that you get a version number from Dataverse. + + This is just a sanity check that Dataverse has been deployed properly. + + `curl http://localhost:8080/api/info/version` + +1. Update the geospatial metadata block. + + `wget https://raw.githubusercontent.com/IQSS/dataverse/refs/tags/v6.11/scripts/api/data/metadatablocks/geospatial.tsv` + + `curl http://localhost:8080/api/admin/datasetfield/load -H "Content-type: text/tab-separated-values" -X POST --upload-file geospatial.tsv` + +1. Update Solr schema. + + Due to changes in the Solr schema (the addition of geospatial fields), updating the Solr schema and reindexing is required. + + First, back up your existing `schema.xml` file. + + `cp /usr/local/solr/solr-9.8.0/server/solr/collection1/conf/schema.xml /usr/local/solr/solr-9.8.0/server/solr/collection1/conf/schema.xml.orig` + + (Note that Docker-based installations use this directory: `solr/data/data/collection1/conf/schema.xml`.) + + If you do not have any custom metadata blocks, you can simply download an updated `schema.xml` file and put it into place. + + `wget https://raw.githubusercontent.com/IQSS/dataverse/v6.11/conf/solr/schema.xml` + + `cp schema.xml /usr/local/solr/solr-9.8.0/server/solr/collection1/conf` + + Reload the Solr core. + + `curl "http://localhost:8983/solr/admin/cores?action=RELOAD&core=collection1"` + + If you do have custom metadata blocks, run the `update-fields.sh` script that we supply. The example below shows the default path for a non-Docker installation, but adjust the path as necessary. + + `wget https://raw.githubusercontent.com/IQSS/dataverse/v6.11/conf/solr/update-fields.sh` + + `chmod +x update-fields.sh` + + `curl "http://localhost:8080/api/admin/index/solr/schema" | ./update-fields.sh /usr/local/solr/solr-9.8.0/server/solr/collection1/conf/schema.xml` + + Reload the Solr core + + `curl "http://localhost:8983/solr/admin/cores?action=RELOAD&core=collection1"` + +1. Reindex Solr. + + Solr needs to be reindexed because of the change to deaccessionDialog.reason.selectItem.notValid in Bundle.properties (#11985). + + `curl http://localhost:8080/api/admin/index` + +1. Re-export metadata export formats. + + We re-export because the Croissant format was updated. + + `curl http://localhost:8080/api/admin/metadata/reExportAll` + +1. For installations with internationalization or text customizations: + + Please remember to update translations via [Dataverse language packs](https://github.com/GlobalDataverseCommunityConsortium/dataverse-language-packs). + + If you have text customizations you can get the latest English files from . + + Note that deaccessionDialog.reason.selectItem.notValid was updated. See #11985. + +1. For installations using DataCite, ensure the DataCite REST API URL (now required) is configured: + + Incremental improvements have been made to the process of registering dataset metadata with DataCite. If your instance is using DataCite, please make sure you have a valid DataCite REST API URL configured, since it is now required. + + The JVM options in question are `dataverse.pid.*.datacite.rest-api-url` if the recommended, new-style PID configuration is used, or `doi.dataciterestapiurlstring` if the legacy settings are in place. In the latter case however, this is a good occasion to switch to the new configuration setup. + + For installations using registered DataCite authorities in production the URL should be: + + `-Ddataverse.pid..datacite.rest-api-url=https://api.datacite.org` + + Or, for test and development instances: + + `-Ddataverse.pid..datacite.rest-api-url=https://api.test.datacite.org` + + See [the guides](https://guides.dataverse.org/en/6.11/installation/config.html#datacite-specific-settings), #12070 and #12270. diff --git a/doc/sphinx-guides/requirements.txt b/doc/sphinx-guides/requirements.txt index 9c74ed75f6d..7ab276e7cd2 100755 --- a/doc/sphinx-guides/requirements.txt +++ b/doc/sphinx-guides/requirements.txt @@ -4,13 +4,17 @@ Sphinx==7.4.0 sphinx-icon==0.1.2 # Markdown support -myst-parser==2.0.0 +myst-parser==4.0.0 # tabs sphinx-tabs==3.4.5 +sphinx-design==0.7.0 + # jQuery sphinxcontrib-jquery Sphinx-Substitution-Extensions==2025.1.2 -semver>=3,<4 \ No newline at end of file +semver>=3,<4 + +sphinx-reredirects==1.1.0 \ No newline at end of file diff --git a/doc/sphinx-guides/source/_static/api/list-reviews.json b/doc/sphinx-guides/source/_static/api/list-reviews.json new file mode 100644 index 00000000000..c69b51aa793 --- /dev/null +++ b/doc/sphinx-guides/source/_static/api/list-reviews.json @@ -0,0 +1,52 @@ +{ + "status": "OK", + "data": { + "reviews": [ + { + "title": "Review of Pediatric Asthma", + "authors": [ + "Wazowski, Mike" + ], + "persistentId": "doi:10.5072/FK2/1WD6BX", + "persistentIdUrl": "https://doi.org/10.5072/FK2/1WD6BX", + "id": 13, + "citation": "Wazowski, Mike, 2026, \"Review of Pediatric Asthma\", https://doi.org/10.5072/FK2/1WD6BX, Root, DRAFT VERSION", + "citationHtml": "Wazowski, Mike, 2026, \"Review of Pediatric Asthma\", https://doi.org/10.5072/FK2/1WD6BX, Root, DRAFT VERSION", + "datePublished": "", + "description": "This is a review of a dataset.", + "rubricMetadataBlocks": [ + { + "name": "rubric_trusteddatadimensionsintensities", + "displayName": "Trusted Data Dimensions and Intensities", + "fields": [ + { + "typeName": "licensingAndLegalClarity", + "value": "High" + }, + { + "typeName": "authorAndProvenance", + "value": "Medium" + }, + { + "typeName": "biasEquityAndRepresentativeness", + "value": "Low" + }, + { + "typeName": "integrityAndUsability", + "value": "High" + }, + { + "typeName": "fitnessForScopeAndContextualRelevance", + "value": "Medium" + }, + { + "typeName": "transparencyOfMethodsAndDocumentation", + "value": "Low" + } + ] + } + ] + } + ] + } +} diff --git a/doc/sphinx-guides/source/_static/api/template-update-access.json b/doc/sphinx-guides/source/_static/api/template-update-access.json new file mode 100644 index 00000000000..e2d86de3283 --- /dev/null +++ b/doc/sphinx-guides/source/_static/api/template-update-access.json @@ -0,0 +1,13 @@ +{ + "customTermsOfAccess": { + "fileAccessRequest": false, + "termsOfAccess": "Here are the terms...", + "dataAccessPlace": "dataAccessPlace", + "originalArchive": "originalArchive", + "availabilityStatus": "availabilityStatus", + "contactForAccess": "contactForAccess", + "sizeOfCollection": "sizeOfCollection", + "studyCompletion": "studyCompletion", + "confidentialityDeclaration": "confidentialityDeclaration" + } +} diff --git a/doc/sphinx-guides/source/_static/api/template-update-license.json b/doc/sphinx-guides/source/_static/api/template-update-license.json new file mode 100644 index 00000000000..455f4c9d9bd --- /dev/null +++ b/doc/sphinx-guides/source/_static/api/template-update-license.json @@ -0,0 +1 @@ +{ "name": "CC BY 4.0" } diff --git a/doc/sphinx-guides/source/_static/api/template-update-metadata.json b/doc/sphinx-guides/source/_static/api/template-update-metadata.json new file mode 100644 index 00000000000..46e201a05f9 --- /dev/null +++ b/doc/sphinx-guides/source/_static/api/template-update-metadata.json @@ -0,0 +1,30 @@ +{ + "name": "Dataverse template - edited", + "fields": [ + { + "typeName": "author", + "value": [ + { + "authorName": { + "typeName": "authorName", + "value": "Brady, Tom" + }, + "authorAffiliation": { + "typeName": "authorIdentifierScheme", + "value": "ORCID" + } + } + ] + } + ], + "instructions": [ + { + "instructionField": "author", + "instructionText": "The author data, edited" + }, + { + "instructionField": "subtitle", + "instructionText": "Instructions for subtitle" + } + ] +} diff --git a/doc/sphinx-guides/source/_static/api/template-update-terms.json b/doc/sphinx-guides/source/_static/api/template-update-terms.json new file mode 100644 index 00000000000..5cf166a83c4 --- /dev/null +++ b/doc/sphinx-guides/source/_static/api/template-update-terms.json @@ -0,0 +1,12 @@ +{ + "customTerms": { + "termsOfUse": "testTermsOfUse", + "confidentialityDeclaration": "testConfidentialityDeclaration", + "specialPermissions": "testSpecialPermissions", + "restrictions": "testRestrictions", + "citationRequirements": "testCitationRequirements", + "depositorRequirements": "testDepositorRequirements", + "conditions": "testConditions", + "disclaimer": "testDisclaimer" + } +} diff --git a/doc/sphinx-guides/source/_static/developers/testing/SamplePerformanceIT.java b/doc/sphinx-guides/source/_static/developers/testing/SamplePerformanceIT.java new file mode 100644 index 00000000000..847093433ec --- /dev/null +++ b/doc/sphinx-guides/source/_static/developers/testing/SamplePerformanceIT.java @@ -0,0 +1,64 @@ +package edu.harvard.iq.dataverse.somepackage; + +import edu.harvard.iq.dataverse.util.testing.performance.JpaEntityManagerService; +import edu.harvard.iq.dataverse.util.testing.performance.JpaPerformanceTest; +import net.ttddyy.dsproxy.QueryCount; +import net.ttddyy.dsproxy.QueryCountHolder; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import java.time.Instant; +import java.time.temporal.ChronoUnit; +import jakarta.persistence.EntityManager; + +import static org.junit.jupiter.api.Assertions.assertNotNull; + +// Single annotation for automatic setup of +// 1) basic tags for JUnit groups, +// 2) shared PostgreSQL server via Testcontainers, and +// 3) creation and injection of JPA entity manager service. +@JpaPerformanceTest +class SamplePerformanceIT { + + static JpaEntityManagerService jpa; + + @BeforeAll + static void setUp() { + // A manual start is necessary to allow you to selectively enable service features as necessary + jpa.start(); + + // inTransactionVoid: Use this when you only need to execute database operations + // (e.g., persisting test fixtures) without returning a value. + jpa.inTransactionVoid(em -> { + // EntityManager em is provided here. + // em.persist(myEntity); + }); + } + + @Test + void shouldMeasureOperationPerformance() { + // Clear any previous query statistics + QueryCountHolder.clear(); + Instant start = Instant.now(); + + // inTransaction: Use this when your operation returns a result that needs + // to be asserted or measured. + Object result = jpa.inTransaction(em -> { + // Execute your performance-critical operation using the EntityManager. + // return result; + return null; // Placeholder + }); + + Instant end = Instant.now(); + assertNotNull(result); + + // Retrieve and log ORM statistics + QueryCount count = QueryCountHolder.getGrandTotal(); + System.out.println("Elapsed ms: " + start.until(end, ChronoUnit.MILLIS)); + System.out.println("Total queries: " + count.getTotal()); + System.out.println("Select queries: " + count.getSelect()); + System.out.println("Insert queries: " + count.getInsert()); + System.out.println("Update queries: " + count.getUpdate()); + System.out.println("Delete queries: " + count.getDelete()); + } +} \ No newline at end of file diff --git a/doc/sphinx-guides/source/_static/installation/cors/cors.json b/doc/sphinx-guides/source/_static/installation/cors/cors.json new file mode 100644 index 00000000000..941eee1109f --- /dev/null +++ b/doc/sphinx-guides/source/_static/installation/cors/cors.json @@ -0,0 +1,10 @@ +{ + "CORSRules": [ + { + "AllowedOrigins": ["*"], + "AllowedHeaders": ["*"], + "AllowedMethods": ["PUT", "GET"], + "ExposeHeaders": ["ETag", "Accept-Ranges", "Content-Encoding", "Content-Range"] + } + ] +} \ No newline at end of file diff --git a/doc/sphinx-guides/source/_static/installation/cors/cors.xml b/doc/sphinx-guides/source/_static/installation/cors/cors.xml new file mode 100644 index 00000000000..98e461c3ffb --- /dev/null +++ b/doc/sphinx-guides/source/_static/installation/cors/cors.xml @@ -0,0 +1,13 @@ + + + + * + * + PUT + GET + ETag + Accept-Ranges + Content-Encoding + Content-Range + + \ No newline at end of file diff --git a/doc/sphinx-guides/source/_static/util/counter_weekly.sh b/doc/sphinx-guides/source/_static/util/counter_weekly.sh index 7cc9e31d86e..c487bfb35aa 100644 --- a/doc/sphinx-guides/source/_static/util/counter_weekly.sh +++ b/doc/sphinx-guides/source/_static/util/counter_weekly.sh @@ -6,7 +6,6 @@ # A recursive method to process each Dataverse processDV () { -echo "Running counter_weekly.sh on $(date)" echo "Processing Dataverse ID#: $1" #Call the Dataverse API to get the contents of the Dataverse (without credentials, this will only list published datasets and dataverses @@ -18,23 +17,67 @@ for subds in $(echo "${DVCONTENTS}" | jq -r '.data[] | select(.type == "dataset" #The authority/identifier are preceded by a protocol/host, i.e. https://doi.org/ DOI=`expr "$subds" : '.*:\/\/\doi\.org\/\(.*\)'` -# Call the Dataverse API for this dataset and get the response -RESULT=$(curl -s -X POST "http://localhost:8080/api/admin/makeDataCount/:persistentId/updateCitationsForDataset?persistentId=doi:$DOI" ) -# Parse the status and number of citations found from the response -STATUS=$(echo "$RESULT" | jq -j '.status' ) -CITATIONS=$(echo "$RESULT" | jq -j '.data.citationCount') - -# The status for a call that worked -OK='OK' - -# Check the status and report -if [ "$STATUS" = "$OK" ]; then - echo "Updated: $CITATIONS citations for doi:$DOI" -else - echo "Failed to update citations for doi:$DOI" - echo "Run curl -s -X POST 'http://localhost:8080/api/admin/makeDataCount/:persistentId/updateCitationsForDataset?persistentId=doi:$DOI ' to retry/see the error message" -fi -#processDV $subds +# Call the Dataverse API for this dataset and capture both the response and HTTP status code +HTTP_RESPONSE=$(curl -s -w "\n%{http_code}" -X POST "http://localhost:8080/api/admin/makeDataCount/:persistentId/updateCitationsForDataset?persistentId=doi:$DOI") + +# Extract the HTTP status code from the last line +HTTP_STATUS=$(echo "$HTTP_RESPONSE" | tail -n1) +# Extract the response body (everything except the last line) +RESPONSE_BODY=$(echo "$HTTP_RESPONSE" | sed '$d') + +# Check the HTTP status code and report accordingly +case $HTTP_STATUS in + 200) + # Successfully queued + # Extract status from the nested data object + STATUS=$(echo "$RESPONSE_BODY" | jq -r '.data.status') + + # Extract message from the nested data object + if echo "$RESPONSE_BODY" | jq -e '.data.message' > /dev/null 2>&1 && [ "$(echo "$RESPONSE_BODY" | jq -r '.data.message')" != "null" ]; then + MESSAGE=$(echo "$RESPONSE_BODY" | jq -r '.data.message') + echo "[SUCCESS] doi:$DOI - $STATUS: $MESSAGE" + else + # If message is missing or null, just show the status + echo "[SUCCESS] doi:$DOI - $STATUS: Citation update queued" + fi + ;; + 400) + # Bad request + if echo "$RESPONSE_BODY" | jq -e '.message' > /dev/null 2>&1; then + ERROR=$(echo "$RESPONSE_BODY" | jq -r '.message') + echo "[ERROR 400] doi:$DOI - Bad request: $ERROR" + else + echo "[ERROR 400] doi:$DOI - Bad request" + fi + ;; + 404) + # Not found + if echo "$RESPONSE_BODY" | jq -e '.message' > /dev/null 2>&1; then + ERROR=$(echo "$RESPONSE_BODY" | jq -r '.message') + echo "[ERROR 404] doi:$DOI - Not found: $ERROR" + else + echo "[ERROR 404] doi:$DOI - Not found" + fi + ;; + 503) + # Service unavailable (queue full) + if echo "$RESPONSE_BODY" | jq -e '.message' > /dev/null 2>&1; then + ERROR=$(echo "$RESPONSE_BODY" | jq -r '.message') + echo "[ERROR 503] doi:$DOI - Service unavailable: $ERROR" + elif echo "$RESPONSE_BODY" | jq -e '.data.message' > /dev/null 2>&1; then + ERROR=$(echo "$RESPONSE_BODY" | jq -r '.data.message') + echo "[ERROR 503] doi:$DOI - Service unavailable: $ERROR" + else + echo "[ERROR 503] doi:$DOI - Service unavailable: Queue is full" + fi + ;; + *) + # Other error + echo "[ERROR $HTTP_STATUS] doi:$DOI - Unexpected error" + echo "Response: $RESPONSE_BODY" + ;; +esac + done # Now iterate over any child Dataverses and recursively process them @@ -47,3 +90,4 @@ done # Call the function on the root dataverse to start processing processDV 1 +echo "Processing Dataverse Complete: $(date)" diff --git a/doc/sphinx-guides/source/admin/big-data-administration.rst b/doc/sphinx-guides/source/admin/big-data-administration.rst index c1d2a02c4a2..fba4cf8b773 100644 --- a/doc/sphinx-guides/source/admin/big-data-administration.rst +++ b/doc/sphinx-guides/source/admin/big-data-administration.rst @@ -77,7 +77,7 @@ Benefits: S3 offers several advantages over file storage: Challenges: -- One additional step that is required to enable direct uploads via a Dataverse installation and for direct download to work with previewers and direct upload to work with DVWebloader (:ref:`folder-upload`) is to allow cross site (CORS) requests on your S3 store. +- One additional step that is required to enable direct uploads via a Dataverse installation and for direct download to work with previewers and direct upload to work with DVWebloader (:ref:`folder-upload`) is to allow :ref:`cross site (CORS) requests on your S3 store `. - Cost: S3 offers a pricing model that allows you to pay for the storage and transfer of data based on current usage (versus long term demand) but commercial providers charge more per TB than the equivalent cost of a local disk (though commercial S3 storage is cheaper than commercial file storage). There can also be egress and other charges. Overall, S3 storage is generally more expensive than local file storage but cheaper than cloud file storage. diff --git a/doc/sphinx-guides/source/admin/dataverses-datasets.rst b/doc/sphinx-guides/source/admin/dataverses-datasets.rst index 9696c758b04..c5820422953 100644 --- a/doc/sphinx-guides/source/admin/dataverses-datasets.rst +++ b/doc/sphinx-guides/source/admin/dataverses-datasets.rst @@ -50,19 +50,19 @@ Recursively assigns the users and groups having a role(s),that are in the set co Configure a Dataverse Collection to Store All New Files in a Specific File Store ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -To direct new files (uploaded when datasets are created or edited) for all datasets in a given Dataverse collection, the store can be specified via the API as shown below, or by editing the 'General Information' for a Dataverse collection on the Dataverse collection page. Only accessible to superusers. :: +To direct new files (uploaded when datasets are created or edited) for all datasets in a given Dataverse collection, the store can be specified via the API as shown below, or by editing the 'General Information' for a Dataverse collection on the Dataverse collection page. Requires permission to edit the Dataverse collection (for example, the ``EditDataverse`` permission). :: - curl -H "X-Dataverse-key: $API_TOKEN" -X PUT -d $storageDriverLabel http://$SERVER/api/admin/dataverse/$dataverse-alias/storageDriver + curl -H "X-Dataverse-key: $API_TOKEN" -X PUT -d $storageDriverLabel http://$SERVER/api/dataverses/$dataverse-alias/storageDriver (Note that for ``dataverse.files.store1.label=MyLabel``, you should pass ``MyLabel``.) A store assigned directly to a collection can be seen using:: - curl -H "X-Dataverse-key: $API_TOKEN" http://$SERVER/api/admin/dataverse/$dataverse-alias/storageDriver + curl -H "X-Dataverse-key: $API_TOKEN" http://$SERVER/api/dataverses/$dataverse-alias/storageDriver This may be null. To get the effective storageDriver for a collection, which may be inherited from a parent collection or be the installation default, you can use:: - curl -H "X-Dataverse-key: $API_TOKEN" http://$SERVER/api/admin/dataverse/$dataverse-alias/storageDriver?getEffective=true + curl -H "X-Dataverse-key: $API_TOKEN" http://$SERVER/api/dataverses/$dataverse-alias/storageDriver?getEffective=true This will never be null. @@ -70,11 +70,11 @@ This will never be null. To delete a store assigned directly to a collection (so that the colllection's effective store is inherted from it's parent or is the global default), use:: - curl -H "X-Dataverse-key: $API_TOKEN" -X DELETE http://$SERVER/api/admin/dataverse/$dataverse-alias/storageDriver + curl -H "X-Dataverse-key: $API_TOKEN" -X DELETE http://$SERVER/api/dataverses/$dataverse-alias/storageDriver -The available drivers can be listed with:: +The available drivers within a collection can be listed with:: - curl -H "X-Dataverse-key: $API_TOKEN" http://$SERVER/api/admin/dataverse/storageDrivers + curl -H "X-Dataverse-key: $API_TOKEN" http://$SERVER/api/dataverses/$dataverse-alias/allowedStorageDrivers (Individual datasets can be configured to use specific file stores as well. See the "Datasets" section below.) @@ -135,10 +135,15 @@ The Review metadata block gives you a few basic fields common to all reviews suc You probably will want to create your own metadata blocks specific to the resources you are reviewing, your own "rubric". See :doc:`metadatacustomization` for details on creating and enabling custom metadata blocks. -Instead of creating a new custom metadata block from scratch (if you simply want to evaluate the feature, for example), you can use the metadata blocks at https://github.com/IQSS/dataverse.harvard.edu +Instead of creating a new custom metadata block from scratch (if you simply want to evaluate the feature, for example), in a test environment, you can use the "Trusted Data Dimensions and Intensities" for testing. (A test environment is advised because metadata blocks cannot be deleted once they are loaded (https://github.com/IQSS/dataverse/issues/9628).) These are the files to download: + +- :download:`rubric_trusteddatadimensionsintensities.tsv <../../../../scripts/api/data/metadatablocks/rubric_trusteddatadimensionsintensities.tsv>` +- :download:`rubric_trusteddatadimensionsintensities.properties <../../../../src/main/java/propertyFiles/rubric_trusteddatadimensionsintensities.properties>` (optional) After loading the block, don't forget to update the Solr schema! +As in the example above, the metadata block must start with ``rubric_`` (the "metadataBlock name" in the tsv itself) to be included in the output of the :ref:`api-list-reviews` API endpoint. + Create a Review Dataset Type ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/sphinx-guides/source/admin/discoverability.rst b/doc/sphinx-guides/source/admin/discoverability.rst index 3db42101e27..dd0c776726a 100644 --- a/doc/sphinx-guides/source/admin/discoverability.rst +++ b/doc/sphinx-guides/source/admin/discoverability.rst @@ -47,6 +47,8 @@ We include Croissant in the ```` because it's `recommended ``, which was the behavior in older versions of Dataverse, see :ref:`dataverse.legacy.schemaorg-in-html-head`. +See also the :ref:`dataverse.feature.croissant-with-local-reviews` feature flag. + .. _discovery-sign-posting: Signposting diff --git a/doc/sphinx-guides/source/admin/features.md b/doc/sphinx-guides/source/admin/features.md new file mode 100644 index 00000000000..27314601505 --- /dev/null +++ b/doc/sphinx-guides/source/admin/features.md @@ -0,0 +1,517 @@ +# Features + +````{grid} 1 2 2 3 +:gutter: 1 +```{grid-item-card} +:columns: 12 + +This page highlights features for administrators and power-users of a Dataverse installation. + +*See {doc}`/quickstart/what-is-dataverse` to learn about its {ref}`core-capabilities` for researchers if you're new to Dataverse.* + +- [](feature-ai) +- [](feature-access-and-download) +- [](feature-administration) +- [](feature-authentication) +- [](feature-customization) +- [](feature-fair-data-publication) +- [](feature-file-management) +- [](feature-geospatial-data-support) +- [](feature-integrations) +- [](feature-interoperability) +- [](feature-reusability) +``` +```` + +````{grid} 1 2 2 3 +:gutter: 1 + +```{grid-item-card} +:text-align: center +:columns: 12 +:class-body: sd-fs-2 +(feature-ai)= +## {material-regular}`terminal` Artifical Intelligence +``` + +```{grid-item-card} AI Tools +A number of AI tools integrate with Dataverse. ++++ +{doc}`More information.` +``` + +```{grid-item-card} Model Context Protocol +Model Context Protocol (MCP) is a standard for AI Agents to communicate with tools and services. ++++ +{ref}`More information.` +``` +```` + +````{grid} 1 2 2 3 +:gutter: 1 + +```{grid-item-card} +:text-align: center +:columns: 12 +:class-body: sd-fs-2 + +(feature-access-and-download)= +## {material-regular}`download` Access and Download +``` + +```{grid-item-card} Faceted Search +Facets are data driven and customizable per collection. ++++ +{doc}`More information.` +``` + +```{grid-item-card} File Previews +A preview is available for text, tabular, image, audio, video, and geospatial files. ++++ +{ref}`More information.` +``` + +```{grid-item-card} Preview URL +Create a URL for reviewers to view an unpublished (and optionally anonymized) dataset. ++++ +{ref}`More information.` +``` + +```{grid-item-card} Guestbook +Optionally collect data about who is downloading the files from your datasets. ++++ +{ref}`More information.` +``` + +```{grid-item-card} Download in Open Tabular Formats +Proprietary tabular formats are converted into TSV and RData for download. ++++ +{doc}`More information.` +``` +```` + +````{grid} 1 2 2 3 +:gutter: 1 + +```{grid-item-card} +:text-align: center +:columns: 12 +:class-body: sd-fs-2 + +(feature-administration)= +## {material-regular}`manage_accounts` Administration +``` + +```{grid-item-card} User Management +Dashboard for common user-related tasks. ++++ +{doc}`More information.` +``` + +```{grid-item-card} Quotas +For number of files, amount of storage, etc. ++++ +{doc}`More information.` +``` + +```{grid-item-card} Usage Statistics and Metrics +Download counters, support for Make Data Count. ++++ +{doc}`More information.` +``` + +```{grid-item-card} Configurable Notifications +In-app and email notifications for access requests, requests for review, etc. can be muted. ++++ +{ref}`More information.` +``` +```` + +````{grid} 1 2 2 3 +:gutter: 1 + +```{grid-item-card} +:text-align: center +:columns: 12 +:class-body: sd-fs-2 + +(feature-authentication)= +## {material-regular}`local_police` Authentication +``` + +```{grid-item-card} Login via Shibboleth +Single Sign On (SSO) using your institution's credentials. ++++ +{doc}`More information.` +``` + +```{grid-item-card} Login via ORCID, Google, GitHub, or Microsoft +Log in using popular OAuth2 providers. ++++ +{doc}`More information.` +``` + +```{grid-item-card} Login via OpenID Connect (OIDC) +Log in using your institution's identity provider or a third party. ++++ +{doc}`More information.` +``` +```` + +````{grid} 1 2 2 3 +:gutter: 1 + +```{grid-item-card} +:text-align: center +:columns: 12 +:class-body: sd-fs-2 + +(feature-customization)= +## {material-regular}`palette` Customization +``` + +```{grid-item-card} Branding +Your installation can be branded with a custom homepage, header, footer, CSS, etc. ++++ +{ref}`More information.` +``` + +```{grid-item-card} Internationalization +The Dataverse software has been translated into multiple languages. ++++ +{ref}`More information.` +``` + +```{grid-item-card} Customization of Collections +Each personal or organizational collection can be customized and branded. ++++ +{ref}`More information.` +``` + +```{grid-item-card} Widgets +Embed listings of data in external websites. ++++ +{ref}`More information.` +``` +```` + +````{grid} 1 2 2 3 +:gutter: 1 + +```{grid-item-card} +:text-align: center +:columns: 12 +:class-body: sd-fs-2 + +(feature-fair-data-publication)= +## {material-regular}`search`{material-regular}`touch_app`{material-regular}`settings`{material-regular}`recycling` FAIR Data Publication +``` + +```{grid-item-card} Support for FAIR Data Principles +Findable, Accessible, Interoperable, Reusable. ++++ +[More information.](https://web.archive.org/web/20191206043258/https://scholar.harvard.edu/mercecrosas/presentations/fair-guiding-principles-implementation-dataverse) +``` + +```{grid-item-card} Versioning +History of changes to datasets and files are preserved. ++++ +{doc}`More information.` +``` + +```{grid-item-card} Prepublication Review Support +Datasets start as drafts and can be submitted for review before publication where curators can mark datasets with curation status labels. ++++ +{ref}`More information.` +``` + +```{grid-item-card} Labels for Traditional Knowledge +Integrate with the Local Contexts platform, enabling the use of Traditional Knowledge and Biocultural Labels, and Notices. ++++ +{doc}`More information.` +``` +```` + +````{grid} 1 2 2 3 +:gutter: 1 + +```{grid-item-card} +:text-align: center +:columns: 12 +:class-body: sd-fs-2 + +(feature-file-management)= +## {material-regular}`rule_folder` File Management +``` + +```{grid-item-card} File Hierarchy +Users are able to control dataset file hierarchy and directory structure. ++++ +{doc}`More information.` +``` + +```{grid-item-card} Restricted Files +Control who can download files and choose whether or not to enable a "Request Access" button. ++++ +{ref}`More information.` +``` + +```{grid-item-card} Embargo +Make files inaccessible until an embargo end date. ++++ +{ref}`More information.` +``` + +```{grid-item-card} Retention Periods +Make files inaccessible once the retention period set has passed. ++++ +{ref}`More information.` +``` + +```{grid-item-card} Metadata Extraction from Files +Populate dataset metadata fields from tabular, NetCDF, HDF5, and FITS files. ++++ +{ref}`More information.` +``` + +```{grid-item-card} Configurable Storage +Choose between filesystem or object storage, configurable per collection and per dataset. ++++ +{doc}`More information.` +``` + +```{grid-item-card} Direct Upload and Download for S3 +After a permission check, files can pass freely and directly between a client computer and S3. ++++ +{doc}`More information.` +``` + +```{grid-item-card} Fixity Checks for Files +MD5, SHA-1, SHA-256, SHA-512, UNF. ++++ +{ref}`More information.<:FileFixityChecksumAlgorithm>` +``` + +```{grid-item-card} Auxiliary Files for Data Files +Each data file can have any number of auxiliary files for documentation or other purposes (experimental). ++++ +{doc}`More information.` +``` +```` + +````{grid} 1 2 2 3 +:gutter: 1 + +```{grid-item-card} +:text-align: center +:columns: 12 +:class-body: sd-fs-2 + +(feature-geospatial-data-support)= +## {material-regular}`map` Geospatial Data Support +``` + +```{grid-item-card} Geospatial Metadata Fields +There is a dedicated geospatial metadata block. ++++ +{ref}`More information.` +``` + +```{grid-item-card} Geospatial File Preview +GeoJSON, GeoTIFF, and Shapefiles can be previewed as a map. ++++ +{ref}`More information.` +``` + +```{grid-item-card} Geospatial Search API +Pass `geo_point` and `geo_radius` to find datasets based on their bounding box. ++++ +{doc}`More information.` +``` +```` + +````{grid} 1 2 2 3 +:gutter: 1 + +```{grid-item-card} +:text-align: center +:columns: 12 +:class-body: sd-fs-2 + +(feature-integrations)= +## {material-regular}`api` Integrations +``` + +```{grid-item-card} DataCite +DOIs are reserved, and when datasets are published, their metadata is published to DataCite. ++++ +{doc}`More information.` +``` + +```{grid-item-card} Handle +Handles are a Persistent ID (PID) that are an alternative to DOIs. ++++ +{ref}`More information.` +``` + +```{grid-item-card} Globus +Upload from and download to Dataverse using Globus endpoints. ++++ +{ref}`More information.` +``` + +```{grid-item-card} RSpace +Exchange data and metadata with RSpace (e.g. IGSN ID). For example, a Data Management Plan (DMP) can be uploaded to +RSpace and updated with the DOI of a Dataverse dataset. ++++ +{ref}`More information.` +``` + +```{grid-item-card} GitHub +A GitHub Action is available to upload files from GitHub to a dataset. ++++ +{doc}`More information.` +``` + +```{grid-item-card} iRODS +Pull data from an iRODS instance to a Dataverse dataset. ++++ +{ref}`More information.` +``` + +```{grid-item-card} Dropbox +Upload files stored on Dropbox. ++++ +{doc}`More information.` +``` + +```{grid-item-card} Jupyter Notebooks +Datasets can be opened in Binder to run code in Jupyter notebooks, RStudio, and other computation environments. +They can also be previewed in Dataverse itself. ++++ +{ref}`More information.` +``` + +```{grid-item-card} Galaxy +Import files directly from Dataverse into Galaxy as well as publish datasets containing artifacts + (Histories, datasets, etc.) from Galaxy to Dataverse. ++++ +{ref}`More information.` +``` + +```{grid-item-card} External Tools +Enable additional features not built in to the Dataverse software. ++++ +{doc}`More information.` +``` + +```{grid-item-card} Additional Integrations +Dataverse integrates with a wide variety of third party systems, some of which are highlighted above. ++++ +For a full list, see {doc}`Integrations`. +``` +```` + +````{grid} 1 2 2 3 +:gutter: 1 + +```{grid-item-card} +:text-align: center +:columns: 12 +:class-body: sd-fs-2 + +(feature-interoperability)= +## {material-regular}`compare_arrows` Interoperability +``` + +```{grid-item-card} APIs +Search API, Data Deposit API, Data Access API, Metrics API, Migration API, etc. and client libraries in various languages. ++++ +{doc}`More information.` +``` + +```{grid-item-card} OAI-PMH Metadata Harvesting +Serve and harvest metadata to and from other systems (e.g. DataCite, other Dataverse installations, etc.) using standardized metadata formats. ++++ +{doc}`More information.` +``` + +```{grid-item-card} Schema.org JSON-LD +Used by Google Dataset Search and other services for discoverability. ++++ +{ref}`More information.` +``` + +```{grid-item-card} Croissant +Export metadata as linked data following the Croissant ontology. ++++ +{ref}`More information.` +``` + +```{grid-item-card} Signposting +Enable easier machine access to datasets by adding linkset in a Dataverse header. ++++ +{ref}`More information.` +``` + +```{grid-item-card} External Vocabulary +Let users pick from external vocabularies (provided via API/SKOSMOS) when filling in metadata. ++++ +{ref}`More information.` +``` + +```{grid-item-card} BagIt Export +For preservation, bags can be sent to the local filesystem, Duracloud, and Google Cloud. ++++ +{ref}`More information.` +``` + +```{grid-item-card} RO-Crate +Export dataset metadata as an ro-crate.json. ++++ +{ref}`More information.` +``` +```` + +````{grid} 1 2 2 3 +:gutter: 1 + +```{grid-item-card} +:text-align: center +:columns: 12 +:class-body: sd-fs-2 + +(feature-reusability)= +## {material-regular}`open_in_new` Reusability +``` + +```{grid-item-card} Multiple License Support +Users can select from multiple standard and provided custom licenses. ++++ +{ref}`More information.` +``` + +```{grid-item-card} Custom Terms of Use +Users can write custom terms of use in place of a predefined license. ++++ +{ref}`More information.` +``` + +```{grid-item-card} Data Citation Formats +EndNote XML, RIS, BibTeX, or 1000+ CSL formats at the dataset or file level. ++++ +{doc}`More information.` +``` + +```{grid-item-card} Provenance +At the file level, upload standard W3C provenance files or enter free text instead. ++++ +{ref}`More information.` +``` + +```{grid-item-card} Post-Publication Workflows +Allow publication of a dataset to trigger external processes and integrations. ++++ +{doc}`More information.` +``` +```` diff --git a/doc/sphinx-guides/source/admin/harvestserver.rst b/doc/sphinx-guides/source/admin/harvestserver.rst index 773e048aa76..3b51d3da08a 100644 --- a/doc/sphinx-guides/source/admin/harvestserver.rst +++ b/doc/sphinx-guides/source/admin/harvestserver.rst @@ -23,6 +23,8 @@ of such instances. The email portion of :ref:`systemEmail` will be visible via OAI-PMH (from the "Identify" verb). +Please note that Dataverse can also harvest metadata from other OAI-PMH servers, like :ref:`DataCite `. See :doc:`harvestclients` for details. + How does it work? ----------------- diff --git a/doc/sphinx-guides/source/admin/index.rst b/doc/sphinx-guides/source/admin/index.rst index 4d2d5c22fc2..0f7a87bdded 100755 --- a/doc/sphinx-guides/source/admin/index.rst +++ b/doc/sphinx-guides/source/admin/index.rst @@ -13,6 +13,7 @@ This guide documents the functionality only available to superusers (such as "da .. toctree:: :maxdepth: 2 + features dashboard external-tools discoverability diff --git a/doc/sphinx-guides/source/admin/integrations.rst b/doc/sphinx-guides/source/admin/integrations.rst index bb981c75ace..65afdce1e56 100644 --- a/doc/sphinx-guides/source/admin/integrations.rst +++ b/doc/sphinx-guides/source/admin/integrations.rst @@ -38,6 +38,8 @@ Researcher can configure OSF itself to deposit to your Dataverse installation by In addition to the method mentioned above, the :ref:`integrations-dashboard` also enables a pull of data from OSF to a dataset. +.. _rspace: + RSpace ++++++ @@ -45,6 +47,8 @@ RSpace is an affordable and secure enterprise grade electronic lab notebook (ELN For instructions on depositing data from RSpace to your Dataverse installation, your researchers can visit https://www.researchspace.com/help-and-support-resources/dataverse-integration/ +As shown in a `video `_, a Data Management Plan (DPM) can be added into RSpace and the research records and associated data can then be sent to Dataverse. Dataverse generates a Persistent Identifier (PID, often a DOI) for the dataset, and RSpace automatically puts the PID link under "Research Outputs" in the DPM. + Open Journal Systems (OJS) and OPS ++++++++++++++++++++++++++++++++++ @@ -86,6 +90,8 @@ GitLab is an open source Git repository and platform that provides free open and The :ref:`integrations-dashboard` enables a pull of data from GitLab to a dataset in Dataverse. +.. _irods: + iRODS +++++ @@ -152,6 +158,13 @@ Open OnDemand `Open OnDemand `_ is a web frontend to High Performance Computing (HPC) resources. Through a system called `OnDemand Loop `_, developed at IQSS, researchers can create datasets in Dataverse and upload files to them from their Open OnDemand installation. They can also :ref:`download ` files from Dataverse. +.. _galaxy-integration: + +Galaxy +++++++ + +Import files directly from Dataverse into `Galaxy `_ as well as publish datasets containing artifacts (Histories, datasets, etc.) from Galaxy to Dataverse. For details, see https://github.com/galaxyproject/galaxy/pull/19367 + Embedding Data on Websites -------------------------- diff --git a/doc/sphinx-guides/source/admin/metadatacustomization.rst b/doc/sphinx-guides/source/admin/metadatacustomization.rst index e3662d65780..84f790b7730 100644 --- a/doc/sphinx-guides/source/admin/metadatacustomization.rst +++ b/doc/sphinx-guides/source/admin/metadatacustomization.rst @@ -549,8 +549,9 @@ You will need to reload your Solr schema via an HTTP-API call, targeting your So ``curl "http://localhost:8983/solr/admin/cores?action=RELOAD&core=collection1"`` -You can easily roll your own little script to automate the process (which might involve fetching the schema bits -from some place else than your Dataverse installation). +You can easily roll your own little script to automate the process +(which might involve fetching the schema bits from some place else than your Dataverse installation). +For your convenience, an example script :download:`solr-driver.sh <../../../../conf/solr/solr-driver.sh>` has been added. Please note that reconfigurations of your Solr index might require a re-index. Usually release notes indicate a necessary re-index, but for your custom metadata you will need to keep track on your own. diff --git a/doc/sphinx-guides/source/api/changelog.rst b/doc/sphinx-guides/source/api/changelog.rst index fd2dd68f4c4..fb346e96821 100644 --- a/doc/sphinx-guides/source/api/changelog.rst +++ b/doc/sphinx-guides/source/api/changelog.rst @@ -7,6 +7,19 @@ This API changelog is experimental and we would love feedback on its usefulness. :local: :depth: 1 +v6.11 +----- + +- The GET /api/mydata/retrieve, if the search returns no data, now includes the "data" block with 0 results. The message that was returned in "error_message" will be returned in "message" and the "success" will be `true`. All other errors will continue to reply with "success":false and the error message in "error_message". +- The endpoints GET, PUT AND DELETE for `/api/admin/dataverse/{alias}/storageDriver` have been moved to `/api/dataverses/{alias}/storageDriver`. +- The endpoint `/api/admin/dataverse/storageDrivers` has been moved and renamed to `/api/dataverses/{alias}/allowedStorageDrivers`. Regarding the change of the name, this endpoint will in the future only display the storageDrivers that are allowed on the specified collection, as of now, it will display the entire list of available Drivers on the installation. +- The following API will now return ``403`` if the ``requireFilesToPublishDataset`` flag is set and the dataset version contains 0 files. + + - **/api/datasets/{Id}/submitForReview** + +- The ``GET /api/access/datafile/{id}/userPermissions`` endpoint now requires authentication. +- The Croissant :ref:`metadata export format ` has been updated from version 1.0 to 1.1, which is reflected in the ``conformsTo`` property. The unused ``wd`` property has been dropped. + v6.10 ----- - The following GET APIs will now return ``400`` if a required Guestbook Response is not supplied. A Guestbook Response can be passed to these APIs in the JSON body using a POST call. See the notes under :ref:`basic-file-access` and :ref:`download-by-dataset-by-version` for details. @@ -29,7 +42,6 @@ v6.10 - **/api/access/datafile/{id}/requestAccess** - v6.9 ---- diff --git a/doc/sphinx-guides/source/api/dataaccess.rst b/doc/sphinx-guides/source/api/dataaccess.rst index 80d9a4d9ed2..505556cdea6 100755 --- a/doc/sphinx-guides/source/api/dataaccess.rst +++ b/doc/sphinx-guides/source/api/dataaccess.rst @@ -105,7 +105,7 @@ Basic access URI: Example :: - POST http://$SERVER/api/access/datafile/:persistentId?persistentId=doi:10.5072/FK2/J8SJZB&signed=true -d '{"guestbookResponse": {"name": "My Name", "email": "myemail@example.com", "institution": "Harvard","position": "Staff", "answers": [{"id": 123,"value": "Good"},{"id": 124,"value": ["Multi","Line"]},{"id": 125,"value": "Yellow"}]}}' + curl -X POST -H 'Content-Type:application/json' -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/access/datafile/:persistentId?persistentId=doi:10.5072/FK2/J8SJZB" -d '{"guestbookResponse": {"name": "My Name", "email": "myemail@example.com", "institution": "Harvard","position": "Staff", "answers": [{"id": 123,"value": "Good"},{"id": 124,"value": ["Multi","Line"]},{"id": 125,"value": "Yellow"}]}}' Parameters: ~~~~~~~~~~~ @@ -254,6 +254,26 @@ Value Description ID Exports file with specific file metadata ``ID``. ============== =========== + +.. _datafile-citation-formatted-access: + +Citation - Get Citation In Other Formats +---------------------------------------- + +Dataverse can generate datafile citations in "EndNote", "RIS", "BibTeX", and "CSL" formats. +This API call sends the raw format with the appropriate content-type (EndNote is XML, RIS and BibTeX are plain text, and CSL is JSON). ("Internal" is also a valid value, returning the content as HTML). +This API call requires a format in the API call which can be any of the values listed above. + +Usage example: + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + export DATAFILE_ID=99 + export FORMAT=EndNote + + curl "$SERVER_URL/api/access/datafile/$DATAFILE_ID/citation/$FORMAT" + .. _data-variable-metadata-access: Data Variable Metadata Access @@ -440,7 +460,22 @@ This method returns a list of Authenticated Users who have requested access to t A curl example using an ``id``:: - curl -H "X-Dataverse-key:$API_TOKEN" -X GET http://$SERVER/api/access/datafile/{id}/listRequests + curl -H "X-Dataverse-key:$API_TOKEN" -X GET $SERVER/api/access/datafile/{id}/listRequests + +Query parameters have been added to retrieve the historical list of "created", "granted", and "rejected" requests: + +* `includeHistory` When `true` this will force the return of all requests and not just the "created" ones. +* `start` For pagination, use this to request a specific page. +* `per_page` For pagination, use this to limit the number of items in each paged list. + +.. note:: Pagination is only available when `includeHistory` is `true` + +If requesting a page beyond the last page this API will return a 404 "There are no access requests for this file:..." +If requesting a page before page 1 or requesting the number of items to be 0 or less this API will ignore these parameters and return the entire list. + +A curl example using an ``id``:: + + curl -H "X-Dataverse-key:$API_TOKEN" -X GET "$SERVER/api/access/datafile/{id}/listRequests?includeHistory=true&start=1&per_page=20" User Has Requested Access to a File: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -451,7 +486,7 @@ This method returns true or false depending on whether or not the calling user h A curl example using an ``id``:: - curl -H "X-Dataverse-key:$API_TOKEN" -X GET "http://$SERVER/api/access/datafile/{id}/userFileAccessRequested" + curl -H "X-Dataverse-key:$API_TOKEN" -X GET "$SERVER/api/access/datafile/{id}/userFileAccessRequested" Get User Permissions on a File: @@ -459,7 +494,7 @@ Get User Permissions on a File: ``/api/access/datafile/{id}/userPermissions`` -This method returns the permissions that the calling user has on a particular file. +This method returns the permissions that the calling user has on a particular file. In particular, the user permissions that this method checks, returned as booleans, are the following: @@ -470,3 +505,5 @@ In particular, the user permissions that this method checks, returned as boolean A curl example using an ``id``:: curl -H "X-Dataverse-key:$API_TOKEN" -X GET "http://$SERVER/api/access/datafile/{id}/userPermissions" + +Authentication is required for this API. diff --git a/doc/sphinx-guides/source/api/external-tools.rst b/doc/sphinx-guides/source/api/external-tools.rst index 7a708accd3a..c583c9516cc 100644 --- a/doc/sphinx-guides/source/api/external-tools.rst +++ b/doc/sphinx-guides/source/api/external-tools.rst @@ -12,9 +12,13 @@ Introduction External tools are additional applications the user can access or open from your Dataverse installation to preview, explore, and manipulate data files and datasets. The term "external" is used to indicate that the tool is not part of the main Dataverse Software. .. note:: - Browser-based tools must have CORS explicitly enabled via :ref:`dataverse.cors.origin `. List every origin that will host your tool (or use ``*`` when a wildcard is acceptable). If an origin is not listed, the browser will block that tool's API requests even if the tool page itself loads. + Browser-based tools require CORS explicitly enabled in Dataverse. See :ref:`dataverse.cors` for details. -Once you have created the external tool itself (which is most of the work!), you need to teach a Dataverse installation how to construct URLs that your tool needs to operate. For example, if you've deployed your tool to fabulousfiletool.com your tool might want the ID of a file and the siteUrl of the Dataverse installation like this: https://fabulousfiletool.com?fileId=42&siteUrl=https://demo.dataverse.org + List every origin that will host your tool (or use ``*`` when a wildcard is acceptable and no authentication is required). + If an origin is not listed, the browser will block that tool's API requests even if the tool page itself loads. + +Once you have created the external tool itself (which is most of the work!), you need to teach a Dataverse installation how to construct URLs that your tool needs to operate. +For example, if you've deployed your tool to *fabulousfiletool.com* your tool might want the ID of a file and the siteUrl of the Dataverse installation like this: *https://fabulousfiletool.com?fileId=42&siteUrl=https://demo.dataverse.org* In short, you will be creating a manifest in JSON format that describes not only how to construct URLs for your tool, but also what types of files your tool operates on, where it should appear in the Dataverse installation web interfaces, etc. diff --git a/doc/sphinx-guides/source/api/intro.rst b/doc/sphinx-guides/source/api/intro.rst index 8eb11798dd7..f100fec0a04 100755 --- a/doc/sphinx-guides/source/api/intro.rst +++ b/doc/sphinx-guides/source/api/intro.rst @@ -207,11 +207,11 @@ Please note that some APIs are only documented in other guides that are more sui - Installation Guide - :doc:`/installation/config` + - :doc:`/installation/big-data-support` - Developer Guide - :doc:`/developers/aux-file-support` - - :doc:`/developers/big-data-support` - :doc:`/developers/dataset-migration-api` - :doc:`/developers/dataset-semantic-metadata-api` - :doc:`/developers/s3-direct-upload-api` diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 808dbeec815..eaff1c77057 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -185,6 +185,13 @@ Usage example: curl "https://demo.dataverse.org/api/dataverses/root?returnChildCount=true" +If a user with EditDataverse permissions wants to ignore the setting ``ExcludeEmailFromExport`` in order to see the contact emails, they must include the ``ignoreSettingExcludeEmailFromExport`` query parameter. + +Usage example: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/dataverses/root?ignoreSettingExcludeEmailFromExport=true" To view an unpublished Dataverse collection: @@ -525,7 +532,7 @@ The fully expanded example above (without environment variables) looks like this Assign Default Role to User Creating a Dataset in a Dataverse Collection ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Assign a default role to a user creating a dataset in a Dataverse collection ``id`` where ``roleAlias`` is the database alias of the role to be assigned: +Assign a default role to a user creating a dataset in a Dataverse collection ``id`` where ``roleAlias`` is the database alias of the role to be assigned (requires ``ManageDataversePermissions``): .. code-block:: bash @@ -544,6 +551,27 @@ The fully expanded example above (without environment variables) looks like this Note: You may use "none" as the ``ROLE_ALIAS``. This will prevent a user who creates a dataset from having any role on that dataset. It is not recommended for Dataverse collections with human contributors. +.. _get-default-contributor-role-on-a-dataverse-api: + +Get Default Role Assigned to User Creating a Dataset in a Dataverse Collection +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Get the default role that is assigned to a user creating a dataset in a Dataverse collection ``id`` (requires ``ManageDataversePermissions``): + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export ID=root + + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/dataverses/$ID/defaultContributorRole" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/dataverses/root/defaultContributorRole" + .. _assign-role-on-a-dataverse-api: Assign a New Role on a Dataverse Collection @@ -908,6 +936,123 @@ In particular, the user permissions that this API call checks, returned as boole curl -H "X-Dataverse-key: $API_TOKEN" -X GET "$SERVER_URL/api/dataverses/$ID/userPermissions" +.. _locally-fair-list-role-assignees: + +List Locally FAIR Role Assignees for a Dataverse Collection +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Lists the Locally FAIR role assignee identifiers configured for a Dataverse collection identified by ``id``. +For more about the concept, see :ref:`locally-fair` in the User Guide. + +This API is superuser-only. + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export ID=root + + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/dataverses/$ID/locallyFairRoleAssignees" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/dataverses/root/locallyFairRoleAssignees" + +The response is a JSON array of role assignee identifiers. For example: + +.. code-block:: json + + [ + "@TestUser", + "&maildomain/harvard.edu" + ] + +.. _locally-fair-set-role-assignee: + +Set Locally FAIR Role Assignees for a Dataverse Collection +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Replaces the full set locally FAIR role assignee identifiers for a Dataverse collection identified by ``id``. + +This API is superuser-only. + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export ID=root + export JSON='["@TestUser","&maildomain/harvard.edu"]' + + curl -H "X-Dataverse-key:$API_TOKEN" -X PUT -H "Content-Type: application/json" "$SERVER_URL/api/dataverses/$ID/locallyFairRoleAssignees" -d "$JSON" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X PUT -H "Content-Type: application/json" "https://demo.dataverse.org/api/dataverses/root/locallyFairRoleAssignees" -d '["@TestUser","&maildomain/harvard.edu"]' + +Pass an empty array to clear all locally FAIR role assignees from the collection: + +.. code-block:: bash + + curl -H "X-Dataverse-key:$API_TOKEN" -X PUT -H "Content-Type: application/json" "$SERVER_URL/api/dataverses/$ID/locallyFairRoleAssignees" -d '[]' + +All supplied identifiers must be valid existing role assignee identifiers. Invalid identifiers will result in ``400 Bad Request``. + +.. _locally-fair-add-role-assignee: + +Add a Locally FAIR Role Assignee to a Dataverse Collection +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Adds a single locally FAIR role assignee identifier to a Dataverse collection identified by ``id``. + +This API is superuser-only. + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export ID=root + export ROLE_ASSIGNEE=&shib/1 + + curl -H "X-Dataverse-key:$API_TOKEN" -X PUT "$SERVER_URL/api/dataverses/$ID/locallyFairRoleAssignees/$ROLE_ASSIGNEE" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X PUT "https://demo.dataverse.org/api/dataverses/root/locallyFairRoleAssignees/&shib/1" + +The response includes the updated set of locally FAIR role assignee identifiers. + +.. _locally-fair-delete-role-assignee: + +Delete a Locally FAIR Role Assignee from a Dataverse Collection +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Removes a single locally FAIR role assignee identifier from a Dataverse collection identified by ``id``. + +This API is superuser-only. + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export ID=root + export ROLE_ASSIGNEE=:authenticated-users + + curl -H "X-Dataverse-key:$API_TOKEN" -X DELETE "$SERVER_URL/api/dataverses/$ID/locallyFairRoleAssignees/$ROLE_ASSIGNEE" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X DELETE "https://demo.dataverse.org/api/dataverses/root/locallyFairRoleAssignees/:authenticated-users" + +The response includes the updated set of locally FAIR role assignee identifiers. Removing an identifier that is blank or not currently assigned will result in ``400 Bad Request``. + .. _create-dataset-command: Create a Dataset in a Dataverse Collection @@ -1202,7 +1347,6 @@ Get a list of Guestbooks for a Dataverse Collection For more about guestbooks, see :ref:`dataset-guestbooks` in the User Guide. Get a list of Guestbooks for a Dataverse Collection -You must have "EditDataverse" permission on the Dataverse collection. .. code-block:: bash @@ -1218,6 +1362,15 @@ The fully expanded example above (without environment variables) looks like this curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/guestbooks/root/list" +To include the inherited Guestbooks of the Dataverse Collection add the query parameter `includeInherited=true` + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/guestbooks/root/list?includeInherited=true" + +.. note:: By adding the query param "includeStats=true" `usageCount` and `responseCount` values can be added to the response. + + Get a Guestbook for a Dataverse Collection ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -1239,6 +1392,28 @@ The fully expanded example above (without environment variables) looks like this curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/guestbooks/1234" +Update a Guestbook for a Dataverse Collection +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +For more about guestbooks, see :ref:`dataset-guestbooks` in the User Guide. + +Update a Guestbook that can be selected for a Dataset. +You must have "EditDataverse" permission on the Dataverse collection. + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export ID=1234 + + curl -PUT -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/guestbooks/{ID}" -d "$JSON" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -PUT -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/guestbooks/1234" -d "$JSON" + Enable or Disable a Guestbook for a Dataverse Collection ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -1262,6 +1437,32 @@ The fully expanded example above (without environment variables) looks like this curl -X PUT -d 'true' -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/guestbooks/root/1234" +Retrieve Guestbook Responses for a Guestbook +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For more about guestbooks, see :ref:`dataset-guestbooks` in the User Guide. + +In order to retrieve the Guestbook Responses for a Guestbook within a Dataverse collection, you must know the ID if the Guestbook. This API also supports pagination by passing a page limit and an optional offset (starting point). The resulting Json will include 'Next' and 'Prev' urls for navigation as well as the total number of responses. +The resulting Json will be more detailed than that of the :ref:`download-guestbook-api` CSV response file by including Guestbook metadata as well as Guestbook Response metadata. + +.. note:: See :ref:`curl-examples-and-environment-variables` if you are unfamiliar with the use of ``export`` below. + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export ID=1 + + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/guestbooks/$ID/responses" + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/guestbooks/$ID/responses?limit10&offset=0" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/guestbooks/1/responses" + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/guestbooks/1/responses?limit10&offset=0" + .. _collection-attributes-api: Change Collection Attributes @@ -1278,7 +1479,7 @@ The following attributes are supported: * ``description`` Description * ``affiliation`` Affiliation * ``filePIDsEnabled`` ("true" or "false") Restricted to use by superusers and only when the :ref:`:AllowEnablingFilePIDsPerCollection <:AllowEnablingFilePIDsPerCollection>` setting is true. Enables or disables registration of file-level PIDs in datasets within the collection (overriding the instance-wide setting). -* ``requireFilesToPublishDataset`` ("true" or "false") Restricted to use by superusers. Defines if Dataset needs files in order to be published. If not set the determination will be made through inheritance by checking the owners of this collection. Publishing by a superusers will not be blocked. +* ``requireFilesToPublishDataset`` ("true" or "false") Restricted to use by superusers. Defines if Dataset version needs files in order to be published or submitted for review. If not set the determination will be made through inheritance by checking the owners of this collection. Publishing by a superusers will not be blocked. * ``allowedDatasetTypes`` Restricted to use by superusers. By default "dataset" is implied. Pass a comma-separated list of dataset types (e.g. "dataset,software"). You cannot unset this attribute so if you want to delete a dataset type, set ``allowedDatasetTypes`` to a dataset type you won't be deleting. See also :ref:`dataset-types`. See also :ref:`update-dataverse-api`. @@ -1734,6 +1935,69 @@ The fully expanded example above (without environment variables) looks like this curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/dataverses/1/templates" --upload-file dataverse-template.json +Update the Metadata and Instructions of a Template +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Updates the metadata and instructions of a template with a given ``id``. + +To update the template, you must send a JSON file. Your JSON file might look like :download:`template-update-metadata.json <../_static/api/template-update-metadata.json>` which you would send to the Dataverse installation like this: + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export ID=1 + + curl -H "X-Dataverse-key: $API_TOKEN" -X PUT "$SERVER_URL/api/dataverses/{ID}/metadata" --upload-file template-update-metadata.json + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X PUT "https://demo.dataverse.org/api/dataverses/1/metadata" --upload-file template-update-metadata.json + +Update the License or Terms Of Use of a Template +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Updates the license or custom terms of use of a template with a given ``id``. + +To update the template, you must send a JSON file containing either the name of an active license or custom terms of use. Your JSON file might look like :download:`template-update-license.json <../_static/api/template-update-license.json>` or :download:`template-update-terms.json <../_static/api/template-update-terms.json>` which you would send to the Dataverse installation like this: + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export ID=1 + + curl -H "X-Dataverse-key: $API_TOKEN" -X PUT "$SERVER_URL/api/dataverses/{ID}/licenseTerms" --upload-file template-update-license.json + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X PUT "https://demo.dataverse.org/api/dataverses/1/licenseTerms" --upload-file template-update-license.json + +Update the Terms Of Access of a Template +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Updates the terms of access of a template with a given ``id``. + +To update the template, you must send a JSON file containing either the name of an active license or custom terms of use. Your JSON file might look like :download:`template-update-access.json <../_static/api/template-update-access.json>` which you would send to the Dataverse installation like this: + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export ID=1 + + curl -H "X-Dataverse-key: $API_TOKEN" -X PUT "$SERVER_URL/api/dataverses/{ID}/access" --upload-file template-update-access.json + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X PUT "https://demo.dataverse.org/api/dataverses/1/access" --upload-file template-update-access.json + Set a Default Template for a Collection ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -2624,7 +2888,7 @@ For these edits your JSON file need only include those dataset fields which you This endpoint also allows removing fields, as long as they are not required by the dataset. To remove a field, send an empty value (``""``) for individual fields. For multiple fields, send an empty array (``[]``). A sample JSON file for removing fields may be downloaded here: :download:`dataset-edit-metadata-delete-fields-sample.json <../_static/api/dataset-edit-metadata-delete-fields-sample.json>` -If another user updates the dataset version metadata before you send the update request, metadata inconsistencies may occur. To prevent this, you can use the optional ``sourceLastUpdateTime`` query parameter. This parameter must include the ``lastUpdateTime`` corresponding to the dataset version being updated. The date must be in the format ``yyyy-MM-dd'T'HH:mm:ss'Z'``. +If another user updates the dataset version metadata before you send the update request, metadata inconsistencies may occur. To prevent this, you can use the optional ``sourceLastUpdateTime`` query parameter. The intended API workflow is for the client to send along the ``lastUpdateTime`` obtained from the last ``GET`` call on the version that is being modified. Dataverse APIs will always report these time stamps in UTC, ISO 8601-formatted (``yyyy-MM-dd'T'HH:mm:ss'Z'``; for example: ``2026-04-22T14:30:00Z``), regardless of the actual time zone used by the server. This is the only format this API will accept. If this parameter is provided, the update will proceed only if the ``lastUpdateTime`` remains unchanged (meaning no one has updated the dataset metadata since you retrieved it). Otherwise, the request will fail with an error. @@ -2674,6 +2938,9 @@ Update Dataset Terms of Access Updates the terms of access for the restricted files of a dataset by applying it to the draft version, or by creating a draft if none exists. +If another user updates an already existing draft version before you send the update request, metadata inconsistencies may occur. To prevent this, you can use the optional ``sourceLastUpdateTime`` query parameter. The intended API workflow is for the client to send along the ``lastUpdateTime`` obtained from the last ``GET`` call on the version that is being modified. Dataverse APIs will always report these time stamps in UTC, ISO 8601-formatted (``yyyy-MM-dd'T'HH:mm:ss'Z'``; for example: ``2026-04-22T14:30:00Z``), regardless of the actual time zone used by the server. This is the only format this API will accept. + +If this parameter is provided, the update will proceed only if the ``lastUpdateTime`` remains unchanged (meaning no one has updated the dataset metadata since you retrieved it). Otherwise, the request will fail with an error. To define custom terms of access, provide a JSON body with the following properties. All fields within ``customTermsOfAccess`` are optional, except if there are restricted files in your dataset then ``fileAccessRequest`` must be set to true or ``termsOfAccess`` must be provided: @@ -3252,6 +3519,8 @@ In order to update the number of files allowed for a Dataset, without causing a To delete the existing limit: +.. code-block:: bash + curl -H "X-Dataverse-key:$API_TOKEN" -X DELETE "$SERVER_URL/api/datasets/$ID/files/uploadlimit" The fully expanded example above (without environment variables) looks like this: @@ -3262,6 +3531,8 @@ The fully expanded example above (without environment variables) looks like this To delete the existing limit: +.. code-block:: bash + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X DELETE "https://demo.dataverse.org/api/datasets/24/files/uploadlimit" Report the data (file) size of a Dataset @@ -4095,6 +4366,8 @@ Usage example: The type under CSL can vary based on the dataset type, with "dataset", "software", and "review" as supported values. See also :ref:`dataset-types`. +.. note:: You can also get the Datafile Citation by using the Access Datafile API. See: :ref:`datafile-citation-formatted-access`. + Get Citation by Preview URL Token ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -4731,6 +5004,31 @@ The fully expanded example above (without environment variables) looks like this curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X PUT "https://demo.dataverse.org/api/datasets/3/license" -H "Content-type:application/json" --upload-file license.json +.. _api-list-reviews: + +List Reviews +~~~~~~~~~~~~ + +Datasets can have reviews. Specifically, if a :ref:`review dataset ` points at (using the ``itemReviewedUrl`` field) the URL form of a persistent ID of a dataset (e.g. https://doi.org/10.5072/FK2/ABCDEF) that is in the same Dataverse installation as the review dataset, the review dataset will be included in the list of reviews for the dataset. It is considered a local review. If additional "rubric" metadata blocks are enabled (see :ref:`review-datasets-setup`) the "metadataBlock name" must start with ``rubric_`` for the fields to be included in the output of this API endpoint. + +An API token is optional if the review dataset has been published. + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/ABCDEF + + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/reviews?persistentId=$PERSISTENT_IDENTIFIER" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/datasets/:persistentId/reviews?persistentId=doi:10.5072/FK2/ABCDEF" + +:download:`list-reviews.json <../_static/api/list-reviews.json>` contains sample output of how the API response might look. + Files ----- @@ -5522,7 +5820,8 @@ Updating File Metadata Updates the file metadata for an existing file where ``ID`` is the database id of the file to update or ``PERSISTENT_ID`` is the persistent id (DOI or Handle) of the file. Requires a ``jsonString`` expressing the new metadata. No metadata from the previous version of this file will be persisted, so if you want to update a specific field first get the json with the above command and alter the fields you want. -An optional parameter, sourceLastUpdateTime=datetime (in format: ``yyyy-MM-dd'T'HH:mm:ss'Z'``), can be used to verify that the file metadata being edited has not been changed since you last retrieved it, thereby avoiding potential lost metadata updates. The value for sourceLastUpdateTime can be taken from ``lastUpdateTime`` in the response to get $SERVER_URL/api/files/$ID API call. +An optional parameter, ``sourceLastUpdateTime``, can be used to verify that the file metadata being edited has not been changed since you last retrieved it, thereby avoiding potential inconsistencies. In the intended API workflow this will be the time stamp in ``lastUpdateTime`` from the last ``GET /api/files/`` API call. Dataverse APIs will always report these time stamps in UTC, ISO 8601-formatted (``yyyy-MM-dd'T'HH:mm:ss'Z'``; for example: ``2026-04-22T14:30:00Z``), regardless of the actual time zone used by the server. This is the only format this API will accept. + A curl example using an ``ID`` @@ -6515,6 +6814,28 @@ The fully expanded example above (without environment variables) looks like this curl "https://demo.dataverse.org/api/info/settings/:DatasetPublishPopupCustomText" +.. _show-custom-popup-for-submitting-for-review-datasets: + +Show Custom Popup Text for Submitting Datasets For Review +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For now, only the value for the :ref:`:DatasetSubmitForReviewPopupCustomText` setting from the Configuration section of the Installation Guide is exposed: + +.. note:: See :ref:`show-disclaimer-for-submit-for-review-datasets` if you want the user to acknowledge before submitting for review. +.. note:: See :ref:`curl-examples-and-environment-variables` if you are unfamiliar with the use of export below. + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + + curl "$SERVER_URL/api/info/settings/:DatasetSubmitForReviewPopupCustomText" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl "https://demo.dataverse.org/api/info/settings/:DatasetSubmitForReviewPopupCustomText" + .. _show-disclaimer-for-publishing-datasets: Show Disclaimer for Publishing Datasets @@ -6537,6 +6858,27 @@ The fully expanded example above (without environment variables) looks like this curl "https://demo.dataverse.org/api/info/settings/:PublishDatasetDisclaimerText" +.. _show-disclaimer-for-submit-for-review-datasets: + +Show Disclaimer for Submitting For Review Datasets +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The setting :ref:`:SubmitForReviewDatasetDisclaimerText`, when set, will prevent a draft dataset from being submitted for review through the UI without the user acknowledging the disclaimer. + +.. note:: See :ref:`curl-examples-and-environment-variables` if you are unfamiliar with the use of export below. + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + + curl "$SERVER_URL/api/info/settings/:SubmitForReviewDatasetDisclaimerText" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl "https://demo.dataverse.org/api/info/settings/:SubmitForReviewDatasetDisclaimerText" + .. _api-get-app-tou: Get Application Terms of Use (General Terms of Use) @@ -8752,7 +9094,7 @@ A curl example using allowing access to a dataset's metadata curl -H "X-Dataverse-key:$API_KEY" -H 'Content-Type:application/json' -d "$JSON" "$SERVER_URL/api/admin/requestSignedUrl" -Please see :ref:`dataverse.api.signature-secret` for the configuration option to add a shared secret, enabling extra +Please see :ref:`dataverse.api.signing-secret` for the configuration option to add a shared secret, enabling extra security. .. _send-feedback-admin: @@ -8852,51 +9194,58 @@ Note that this API is probably only useful for testing. MyData ------ -The MyData API is used to get a list of just the datasets, dataverses or datafiles an authenticated user can edit. +The MyData API is used to get a list of just the datasets, collections (dataverses), or datafiles an authenticated user has a role on. -The API excludes dataverses linked to an harvesting client. This results in `a known issue `_ where regular datasets in harvesting dataverses are missing from the results. +The API excludes collections linked to an harvesting client. This results in `a known issue `_ where regular datasets in harvesting collections are missing from the results. -A curl example listing objects +Here is a curl example. .. code-block:: bash export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx export SERVER_URL=https://demo.dataverse.org - export ROLE_IDS=6 - export DVOBJECT_TYPES=Dataset - export PUBLISHED_STATES=Unpublished + export ROLE_ID1=6 + export ROLE_ID2=8 + export DVTYPE1=Dataset + export DVTYPE2=Dataverse + export PUBLISHED_STATE1=Unpublished + export PUBLISHED_STATE2=Published export PER_PAGE=10 - curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/mydata/retrieve?role_ids=$ROLE_IDS&dvobject_types=$DVOBJECT_TYPES&published_states=$PUBLISHED_STATES&per_page=$PER_PAGE" - -Parameters: - -``role_id`` Roles are customizable. Standard roles include: - -- ``1`` = Admin -- ``2`` = File Downloader -- ``3`` = Dataverse + Dataset Creator -- ``4`` = Dataverse Creator -- ``5`` = Dataset Creator -- ``6`` = Contributor -- ``7`` = Curator -- ``8`` = Member - -``dvobject_types`` Type of object, several possible values among: ``DataFile`` , ``Dataset`` & ``Dataverse`` . + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/mydata/retrieve?role_ids=$ROLE_ID1&role_ids=$ROLE_ID2&dvobject_types=$DVTYPE1&dvobject_types=$DVTYPE2&published_states=$PUBLISHED_STATE1&published_states=$PUBLISHED_STATE2&per_page=$PER_PAGE" -``published_states`` State of the object, several possible values among:``Published`` , ``Unpublished`` , ``Draft`` , ``Deaccessioned`` & ``In+Review`` . - -``per_page`` Number of results returned per page. - -``metadata_fields`` Includes the requested fields for each dataset in the response. Multiple "metadata_fields" parameters can be used to include several fields. See :doc:`search` for further information on this parameter. +The fully expanded example above (without environment variables) looks like this: -``show_collections`` Whether or not to include a list of parent and linked collections for each dataset search result. +.. code-block:: bash -``sort`` The sort field. Supported values include "name", "date" and "relevance". + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/mydata/retrieve?role_ids=6&role_ids=8&dvobject_types=Dataset&published_states=Unpublished&published_states=Published&per_page=10" -``order`` The order in which to sort. Can either be "asc" or "desc". +**Parameters:** -``fq`` A filter query to filter the list returned. Multiple "fq" parameters can be used. +- ``role_ids``: Roles are customizable. Multiple "role_ids" parameters can be used to include several roles. Standard roles include: + - ``1`` = Admin + - ``2`` = File Downloader + - ``3`` = Dataverse + Dataset Creator + - ``4`` = Dataverse Creator + - ``5`` = Dataset Creator + - ``6`` = Contributor + - ``7`` = Curator + - ``8`` = Member +- ``dvobject_types``: Type of object. Multiple "dvobject_types" parameters can be used to include several types. Possible values: + - ``Dataverse`` + - ``Dataset`` + - ``DataFile`` +- ``published_states``: State of the object. Multiple "published_states" parameters can be used to include several states. Possible values: + - ``Published`` + - ``Unpublished`` + - ``Draft`` + - ``Deaccessioned`` + - ``In+Review`` (the ``+`` represents a space) +- ``mydata_search_term``: A string used to search for specific data within the user's MyData collection. +- ``selected_page``: The page number of results to return (used for pagination). +- ``per_page``: Number of results returned per page. +- ``order``: The order in which to sort. Can either be "asc" or "desc". +- ``fq``: A filter query (Solr syntax) to narrow the list returned. Multiple "fq" parameters can be used. MyData Collection List ~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/sphinx-guides/source/conf.py b/doc/sphinx-guides/source/conf.py index 6ecaeebaf54..471d0fe03f8 100755 --- a/doc/sphinx-guides/source/conf.py +++ b/doc/sphinx-guides/source/conf.py @@ -47,13 +47,18 @@ 'myst_parser', 'sphinx_tabs.tabs', 'sphinx_substitution_extensions', + 'sphinx_reredirects', + 'sphinx_design' ] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix of source filenames. -source_suffix = '.rst' +source_suffix = { + ".rst": "restructuredtext", + ".md": "markdown", +} # The encoding of source files. #source_encoding = 'utf-8-sig' @@ -70,7 +75,7 @@ # built documents. # # The short X.Y version. -version = '6.10.1' +version = '6.11' # The full version, including alpha/beta/rc tags. release = version @@ -78,6 +83,12 @@ # for a list of supported languages. language = 'en' +# Redirects for pages that have been moved +# See https://documatt.com/sphinx-reredirects/usage for detailed information +redirects = { + 'developers/big-data-support': '../installation/big-data-support.html', +} + # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: #today = '' diff --git a/doc/sphinx-guides/source/container/configbaker-image.rst b/doc/sphinx-guides/source/container/configbaker-image.rst index 4cc64fb150e..587c20f1014 100644 --- a/doc/sphinx-guides/source/container/configbaker-image.rst +++ b/doc/sphinx-guides/source/container/configbaker-image.rst @@ -106,6 +106,9 @@ Scripts * - ``update-fields.sh`` - Update a Solr ``schema.xml`` with a given list of metadata fields. See ``update-fields.sh -h`` for usage details and example use cases at :ref:`update-solr-schema` and :ref:`update-solr-schema-dev`. + * - ``solr-driver.sh`` + - Automate updates to a ``schema.xml`` in a Solr Core. Either run in watch mode or as a oneshot script. + See ``solr-driver.sh -h`` for usage details. Best used as a sidecar or a one-off job. Solr Template ^^^^^^^^^^^^^ diff --git a/doc/sphinx-guides/source/container/dev-usage.rst b/doc/sphinx-guides/source/container/dev-usage.rst index 035d0985635..7b95b6c5823 100644 --- a/doc/sphinx-guides/source/container/dev-usage.rst +++ b/doc/sphinx-guides/source/container/dev-usage.rst @@ -403,6 +403,80 @@ The steps below describe options to enable the later in different IDEs. **IMPORTANT**: This tool uses a Bash shell script and is thus limited to Mac and Linux OS. +.. _dev-fast-redeploy: + +Fast Redeploy (Command-Line) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +For developers who prefer command-line workflows over IDE integration, Dataverse provides scripts for fast iterative development without full container rebuilds. + +**Initial Setup** + +Run once per development session: + +.. code-block:: bash + + ./scripts/dev/dev-start-frd.sh + +This command: + +- Builds the full Dataverse WAR with ``mvn package`` +- Extracts it into ``target/dataverse/`` as an exploded WAR +- Configures JPA settings for development (``ddl-generation=none``) +- Starts the dev stack with ``SKIP_DEPLOY=1`` +- Manually deploys the application via ``asadmin`` + +**Iterative Development** + +After making code changes, run: + +.. code-block:: bash + + ./scripts/dev/dev-frd.sh + +This script: + +- Compiles Java sources incrementally (``mvn compile``, ~5-10s) +- Syncs updated classes and webapp resources into the mounted exploded WAR +- Forces Payara to redeploy the application without restarting containers +- Key features: + - Skips full Maven rebuilds (only compiles changed Java files) + - Avoids container restarts (uses hot-redeployment) + - Completes in ~12 seconds vs. ~54s for traditional full rebuild workflow (4.5x faster) + - Preserves database state between deployments + +**Typical Workflow** + +.. code-block:: bash + + # Start dev environment once + ./scripts/dev/dev-start-frd.sh + + # Edit Java or XHTML files... + + # Fast redeploy + ./scripts/dev/dev-frd.sh + + # Repeat as needed + + # When finished, stop containers + ./scripts/dev/dev-down-frd.sh + +**Memory Configuration** + +The fast-redeploy workflow includes ``docker-compose.override.yml`` that increases the memory limit to 8GB +(from the default 2GB limit set for GitHub Actions CI) which is insufficient for local Dataverse development. +The override file is automatically used by the scripts. + +**Limitations** + +- Does not update dependencies (run full ``mvn package`` + restart if ``pom.xml`` changes) +- Static resources (CSS, JS) may require browser cache clear +- For database schema changes, use ``dev-rebuild.sh`` instead +- Performance timings may vary depending on your hardware configuration + +**Note**: This workflow complements IDE-based redeployment. Use whichever fits your development style. + Exploring the Database ---------------------- diff --git a/doc/sphinx-guides/source/developers/coding-style.md b/doc/sphinx-guides/source/developers/coding-style.md new file mode 100644 index 00000000000..7be3f2a46b5 --- /dev/null +++ b/doc/sphinx-guides/source/developers/coding-style.md @@ -0,0 +1,138 @@ +# Coding Style + +Like all development teams, the [Dataverse Project developers at IQSS](https://dataverse.org/about) have their habits and styles when it comes to writing code. Let's attempt to get on the same page. :) + +```{contents} +:local: +``` + +## Java + +### Formatting Code + +How to format Java code is being discussed on [Zulip](https://dataverse.zulipchat.com/#narrow/channel/379673-dev/topic/code.20formatting.20.28Spotless.2C.20Checkstyle.2C.20etc.2E.29/near/432974039) and the [dev mailing list](https://groups.google.com/g/dataverse-dev/c/y2Jpk3szTf8/m/NhTJvXblAgAJ). + +#### Tabs vs. Spaces + +Don't use tabs. Use 4 spaces. + +#### Imports + +Wildcard imports are neither encouraged nor discouraged. + +#### Braces Placement + +Place curly braces according to the style below, which is an example you can see from Netbeans. + +```java +public class ClassA { + + private String letters[] = new String[]{"A", "B"}; + + public int meth(String text, int number) { + BinaryOperator plus = (a, b) -> { + return a + b; + }; + if (text != null) { + try { + meth("Some text", text.length()); + } catch (Throwable t) { + } finally { + } + } else if (number >= 0) { + text = number == 0 ? "empty" : "nonempty"; + } + do { + number = number + 1; + } while (number < 2); + for (int i = 1; i < 100; i++) { + number = number + i; + } + while (number > 0) { + number--; + } + } +} +``` + +#### Format Code with Spotless + +In some of our libraries we've had success formatting code with [Spotless](https://github.com/diffplug/spotless). See for an early discussion. + +We've added Spotless to the main repo but have limited it to certain files. If you'd like to use Spotless on files you're editing, update the config in `pom.xml` to include them. + +To run Spotless on your code: + +`mvn spotless:apply` + +#### Format Code You Changed with Netbeans + +For a long time IQSS standardized on Netbeans. For files not included in the Spotless config mentioned above, it is much appreciated when you format your code (but only the code you touched) using the out-of-the-box Netbeans configuration. If you have created an entirely new Java class, you can just click Source -> Format. If you are adjusting code in an existing class, highlight the code you changed and then click Source -> Format. Keeping the "diff" in your pull requests small makes them easier to code review. + +#### Checking Your Formatting With Checkstyle + +The easiest way to adopt the Dataverse Project coding style is to use Netbeans as your IDE, avoid change the default Netbeans formatting settings, and only reformat code you've changed, as described above. + +If you do not use Netbeans, you are encouraged to check the formatting of your code using Checkstyle. + +To check the entire project: + +`mvn checkstyle:checkstyle` + +To check a single file: + +`mvn checkstyle:checkstyle -Dcheckstyle.includes=**\/SystemConfig*.java` + +### Logging + +We have adopted a pattern where the top of every class file has a line like this: + +```java +private static final Logger logger = Logger.getLogger(DatasetUtil.class.getCanonicalName()); +``` + +Use this `logger` field with varying levels such as `fine` or `info` like this: + +```java +logger.fine("will get thumbnail from dataset logo"); +``` + +Generally speaking you should use `fine` for everything that you don't want to show up by default in the app server's log file. If you use a higher level such as `info` for common operations, you will probably hear complaints that your code is too "chatty" in the logs. These logging levels can be controlled at runtime both on your development machine and in production as explained in the {doc}`debugging` section. + +When adding logging, do not simply add `System.out.println()` lines because the logging level cannot be controlled. + +### Avoid Hard-Coding Strings (Use Constants) + +Special strings should be defined as public constants. For example, `DatasetFieldConstant.java` contains a field for "title" and it's used in many places in the code (try "Find Usages" in Netbeans). This is better than writing the string "title" in all those places. + +### Avoid Hard-Coding User-Facing Messaging in English + +There is an ongoing effort to translate the Dataverse Software into various languages. Look for "lang" or "languages" in the {doc}`/installation/config` section of the Installation Guide for details if you'd like to help or play around with this feature. + +The translation effort is hampered if you hard code user-facing messages in English in the Java code. Put English strings in `Bundle.properties` and use `BundleUtil` to pull them out. This is especially important for messages that appear in the UI. We are aware that the API has many, many hard coded English strings in it. If you touch a method in the API and notice English strings, you are strongly encouraged to use that opportunity to move the English to `Bundle.properties`. + +### Type Safety + +If you just downloaded Netbeans and are using the out-of-the-box settings, you should be in pretty good shape. Unfortunately, the default configuration of Netbeans doesn't warn you about type-safety problems you may be inadvertently introducing into the code. To see these warnings, click Netbeans -> Preferences -> Editor -> Hints and check the following: + +- "Raw Types" under "Standard Javac Warnings" + +If you know of a way to easily share Netbeans configuration across a team, please get in touch. + +## Bash + +Generally, Google's Shell Style Guide at seems to have good advice. + +### Formatting Code + +#### Tabs vs. Spaces + +Don't use tabs. Use 2 spaces. + +`shfmt` from seems like a decent way to enforce indentation of two spaces (i.e. `shfmt -i 2 -w path/to/script.sh`) but be aware that it makes other changes. + +## Bike Shedding + +What color should the [bike shed](https://en.wiktionary.org/wiki/bikeshedding) be? :) + +Come debate with us about coding style in this Google doc that has public comments enabled: diff --git a/doc/sphinx-guides/source/developers/coding-style.rst b/doc/sphinx-guides/source/developers/coding-style.rst deleted file mode 100755 index f3935aae30f..00000000000 --- a/doc/sphinx-guides/source/developers/coding-style.rst +++ /dev/null @@ -1,151 +0,0 @@ -============ -Coding Style -============ - -Like all development teams, the `Dataverse Project developers at IQSS `_ have their habits and styles when it comes to writing code. Let's attempt to get on the same page. :) - -.. contents:: |toctitle| - :local: - -Java ----- - -Formatting Code -~~~~~~~~~~~~~~~ - -How to format Java code is being discussed on `Zulip `_ and the `dev mailing list `_. - -Tabs vs. Spaces -^^^^^^^^^^^^^^^ - -Don't use tabs. Use 4 spaces. - -Imports -^^^^^^^ - -Wildcard imports are neither encouraged nor discouraged. - -Braces Placement -^^^^^^^^^^^^^^^^ - -Place curly braces according to the style below, which is an example you can see from Netbeans. - -.. code-block:: java - - public class ClassA { - - private String letters[] = new String[]{"A", "B"}; - - public int meth(String text, int number) { - BinaryOperator plus = (a, b) -> { - return a + b; - }; - if (text != null) { - try { - meth("Some text", text.length()); - } catch (Throwable t) { - } finally { - } - } else if (number >= 0) { - text = number == 0 ? "empty" : "nonempty"; - } - do { - number = number + 1; - } while (number < 2); - for (int i = 1; i < 100; i++) { - number = number + i; - } - while (number > 0) { - number--; - } - } - } - -Format Code with Spotless -^^^^^^^^^^^^^^^^^^^^^^^^^ - -In some of our libraries we've had success formatting code with `Spotless `_. See https://github.com/gdcc/xoai/issues/35 for an early discussion. - -We've added Spotless to the main repo but have limited it to certain files. If you'd like to use Spotless on files you're editing, update the config in pom.xml to include them. - -To run Spotless on your code: - -``mvn spotless:apply`` - -Format Code You Changed with Netbeans -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -For a long time IQSS standardized on Netbeans. For files not included in the Spotless config mentioned above, it is much appreciated when you format your code (but only the code you touched!) using the out-of-the-box Netbeans configuration. If you have created an entirely new Java class, you can just click Source -> Format. If you are adjusting code in an existing class, highlight the code you changed and then click Source -> Format. Keeping the "diff" in your pull requests small makes them easier to code review. - -Checking Your Formatting With Checkstyle -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The easiest way to adopt the Dataverse Project coding style is to use Netbeans as your IDE, avoid change the default Netbeans formatting settings, and only reformat code you've changed, as described above. - -If you do not use Netbeans, you are encouraged to check the formatting of your code using Checkstyle. - -To check the entire project: - -``mvn checkstyle:checkstyle`` - -To check a single file: - -``mvn checkstyle:checkstyle -Dcheckstyle.includes=**\/SystemConfig*.java`` - -Logging -~~~~~~~ - -We have adopted a pattern where the top of every class file has a line like this:: - - private static final Logger logger = Logger.getLogger(DatasetUtil.class.getCanonicalName()); - -Use this ``logger`` field with varying levels such as ``fine`` or ``info`` like this:: - - logger.fine("will get thumbnail from dataset logo"); - -Generally speaking you should use ``fine`` for everything that you don't want to show up by default in the app server's log file. If you use a higher level such as ``info`` for common operations, you will probably hear complaints that your code is too "chatty" in the logs. These logging levels can be controlled at runtime both on your development machine and in production as explained in the :doc:`debugging` section. - -When adding logging, do not simply add ``System.out.println()`` lines because the logging level cannot be controlled. - -Avoid Hard-Coding Strings (Use Constants) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Special strings should be defined as public constants. For example, ``DatasetFieldConstant.java`` contains a field for "title" and it's used in many places in the code (try "Find Usages" in Netbeans). This is better than writing the string "title" in all those places. - -Avoid Hard-Coding User-Facing Messaging in English -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -There is an ongoing effort to translate the Dataverse Software into various languages. Look for "lang" or "languages" in the :doc:`/installation/config` section of the Installation Guide for details if you'd like to help or play around with this feature. - -The translation effort is hampered if you hard code user-facing messages in English in the Java code. Put English strings in ``Bundle.properties`` and use ``BundleUtil`` to pull them out. This is especially important for messages that appear in the UI. We are aware that the API has many, many hard coded English strings in it. If you touch a method in the API and notice English strings, you are strongly encouraged to use that opportunity to move the English to ``Bundle.properties``. - -Type Safety -~~~~~~~~~~~ - -If you just downloaded Netbeans and are using the out-of-the-box settings, you should be in pretty good shape. Unfortunately, the default configuration of Netbeans doesn't warn you about type-safety problems you may be inadvertently introducing into the code. To see these warnings, click Netbeans -> Preferences -> Editor -> Hints and check the following: - -- "Raw Types" under "Standard Javac Warnings" - -If you know of a way to easily share Netbeans configuration across a team, please get in touch. - -Bash ----- - -Generally, Google's Shell Style Guide at https://google.github.io/styleguide/shell.xml seems to have good advice. - -Formatting Code -~~~~~~~~~~~~~~~ - -Tabs vs. Spaces -^^^^^^^^^^^^^^^ - -Don't use tabs. Use 2 spaces. - -shfmt from https://github.com/mvdan/sh seems like a decent way to enforce indentation of two spaces (i.e. ``shfmt -i 2 -w path/to/script.sh``) but be aware that it makes other changes. - -Bike Shedding -------------- - -What color should the `bike shed `_ be? :) - -Come debate with us about coding style in this Google doc that has public comments enabled: https://docs.google.com/document/d/1KTd3FpM1BI3HlBofaZjMmBiQEJtFf11jiiGpQeJzy7A/edit?usp=sharing diff --git a/doc/sphinx-guides/source/developers/index.rst b/doc/sphinx-guides/source/developers/index.rst index 28b1fbaae82..f2bee4d6009 100755 --- a/doc/sphinx-guides/source/developers/index.rst +++ b/doc/sphinx-guides/source/developers/index.rst @@ -37,7 +37,6 @@ Developer Guide remote-users geospatial selinux - big-data-support aux-file-support s3-direct-upload-api globus-api @@ -47,4 +46,6 @@ Developer Guide fontcustom classic-dev-env search-services + testing/fixtures.md + testing/performance.md diff --git a/doc/sphinx-guides/source/developers/making-releases.rst b/doc/sphinx-guides/source/developers/making-releases.rst index fbbc2e5d3ae..2e5b3f46b5f 100755 --- a/doc/sphinx-guides/source/developers/making-releases.rst +++ b/doc/sphinx-guides/source/developers/making-releases.rst @@ -93,7 +93,7 @@ Developers express the need for an addition to release notes by creating a "rele The task at or near release time is to collect these snippets into a single file. - Find the issue in GitHub that tracks the work of creating release notes for the upcoming release. -- Create a branch, add a .md file for the release (ex. 5.10.1 Release Notes) in ``/doc/release-notes`` and write the release notes, making sure to pull content from the release note snippets mentioned above. Snippets may not include any issue number or pull request number in the text so be sure to copy the number from the filename of the snippet into the final release note. +- Create a branch, add a .md file for the release (ex. 6.10.1 Release Notes) in ``/doc/release-notes`` and write the release notes, making sure to pull content from the release note snippets mentioned above. Snippets may not include any issue number or pull request number in the text so be sure to copy the number from the filename of the snippet into the final release note. - Delete (``git rm``) the release note snippets as the content is added to the main release notes file. - Include instructions describing the steps required to upgrade the application from the previous version. These must be customized for release numbers and special circumstances such as changes to metadata blocks and infrastructure. These instructions are required for the next steps (deploying to various environments) so try to prioritize them over finding just the right words in release highlights (which you can do later). - Make a pull request. Here's an example: https://github.com/IQSS/dataverse/pull/11613 @@ -101,22 +101,19 @@ The task at or near release time is to collect these snippets into a single file For a hotfix, don't worry about release notes yet. -Deploy Release Candidate to Internal ------------------------------------- +Build Release Candidate +----------------------- |dedicated| -First, build the release candidate. For a regular release, you will use the "develop" branch, as shown below. For a hotfix, you will use whatever branch name is used for the hotfix. - -Go to https://jenkins.dataverse.org/job/IQSS_Dataverse_Internal/ and make the following adjustments to the config: +Go to https://github.com/IQSS/dataverse/actions/workflows/generate_war_file.yml click "run workflow". For a regular release, make sure the branch is "develop". For a hotfix, you will use whatever branch name is used for the hotfix. Leave the custom label blank and click "run workflow". This will create an action that should result in a zip file. Inside that zip is another zip that contains the war file. -- Repository URL: ``https://github.com/IQSS/dataverse.git`` -- Branch Specifier (blank for 'any'): ``*/develop`` -- Execute shell: Update version in filenames to ``dataverse-5.10.war`` (for example) +Deploy Release Candidate to Internal +------------------------------------ -Click "Save" then "Build Now". The release candidate war file will be available at https://jenkins.dataverse.org/job/IQSS_Dataverse_Internal/ws/target/ +|dedicated| -ssh into the dataverse-internal server and download the release candidate war file from the URL above. +ssh into the dataverse-internal server and download the release candidate war file you built above. Go to /doc/release-notes, open the release-notes.md file for the release we're working on, and perform all the steps under "Upgrade Instructions". Note that for regular releases, we haven't bumped the version yet so you won't be able to follow the steps exactly. (For hotfix releases, the version will be bumped already.) @@ -197,7 +194,7 @@ Create a release branch named after the issue that tracks bumping the version wi Make the following changes in the release branch. -Increment the version number to the milestone (e.g. 5.10.1) in the following two files: +Increment the version number to the milestone (e.g. 6.10.1) in the following two files: - modules/dataverse-parent/pom.xml -> ```` -> ```` (e.g. `pom.xml commit `_) - doc/sphinx-guides/source/conf.py @@ -243,7 +240,7 @@ If the GitHub UI tells you there would be merge conflicts, something has gone ho Add Milestone to Pull Requests and Issues ----------------------------------------- -Often someone is making sure that the proper milestone (e.g. 5.10.1) is being applied to pull requests and issues, but sometimes this falls between the cracks. +Often someone is making sure that the proper milestone (e.g. 6.10.1) is being applied to pull requests and issues, but sometimes this falls between the cracks. Check for merged pull requests that have no milestone by going to https://github.com/IQSS/dataverse/pulls and entering `is:pr is:merged no:milestone `_ as a query. If you find any, first check if those pull requests are against open pull requests. If so, do nothing. Otherwise, add the milestone to the pull request and any issues it closes. This includes the "merge develop into master" pull request above. @@ -267,86 +264,60 @@ Go to https://jenkins.dataverse.org/job/guides.dataverse.org/ and make the follo - Repository URL: ``https://github.com/IQSS/dataverse.git`` - Branch Specifier (blank for 'any'): ``*/master`` -- ``VERSION`` (under "Build Steps"): bump to the next release. Don't prepend a "v". Use ``5.10.1`` (for example) +- ``VERSION`` (under "Build Steps"): bump to the next release. Don't prepend a "v". Use ``6.10.1`` (for example) Click "Save" then "Build Now". -Make sure the guides directory appears in the expected location such as https://guides.dataverse.org/en/5.10.1/ +Make sure the guides directory appears in the expected location such as https://guides.dataverse.org/en/6.10.1/ As described below, we'll soon point the "latest" symlink to that new directory. -Create a Draft Release on GitHub --------------------------------- - -Go to https://github.com/IQSS/dataverse/releases/new to start creating a draft release. - -- Under "Choose a tag" you will be creating a new tag. Have it start with a "v" such as ``v5.10.1``. Click "Create new tag on publish". -- Under "Target", choose "master". This commit will appear in ``/api/info/version`` from a running installation. -- Under "Release title" use the same name as the tag such as ``v5.10.1``. -- In the description, copy and paste the content from the release notes .md file created in the "Write Release Notes" steps above. -- Click "Save draft" because we do not want to publish the release yet. - -At this point you can send around the draft release for any final feedback. Links to the guides for this release should be working now, since you build them above. - -Make corrections to the draft, if necessary. It will be out of sync with the .md file, but that's ok (`#7988 `_ is tracking this). - .. _run-build-create-war: Run a Build to Create the War File ---------------------------------- -ssh into the dataverse-internal server and undeploy the current war file. - -Go to https://jenkins.dataverse.org/job/IQSS_Dataverse_Internal/ and make the following adjustments to the config: - -- Repository URL: ``https://github.com/IQSS/dataverse.git`` -- Branch Specifier (blank for 'any'): ``*/master`` -- Execute shell: Update version in filenames to ``dataverse-5.10.1.war`` (for example) +Go to https://github.com/IQSS/dataverse/actions/workflows/generate_war_file.yml click "run workflow". For a regular release, change the branch to "master". For a hotfix release, use whatever branch name is used for the hotfix. Leave the custom label blank and click "run workflow". This will create an action that should result in a zip file. Inside that zip is another zip that contains the war file. -Click "Save" then "Build Now". +The build number will appear in ``/api/info/version`` (along with the commit mentioned above) from a running installation (e.g. ``{"version":"6.10.1","build":"master-300d5b5"}``). -This will build the war file, and then automatically deploy it on dataverse-internal. Verify that the application has deployed successfully. +Build Installer (dvinstall.zip) +------------------------------- -The build number will appear in ``/api/info/version`` (along with the commit mentioned above) from a running installation (e.g. ``{"version":"5.10.1","build":"907-b844672``). +In a git checkout of the source, switch to the master branch and pull the latest. -Note that the build number comes from the following script in an early Jenkins build step... +Copy the war file from the previous step (shown in ``/tmp`` in the example below ) to the ``target`` directory in the root of the repo (create the ``target`` directory, if necessary): .. code-block:: bash - COMMIT_SHA1=`echo $GIT_COMMIT | cut -c-7` - echo "build.number=${BUILD_NUMBER}-${COMMIT_SHA1}" > $WORKSPACE/src/main/java/BuildNumber.properties + mkdir target + cp /tmp/dataverse-6.10.1.war target -... but we can explore alternative methods of specifying the build number, as described in :ref:`auto-custom-build-number`. - -Build Installer (dvinstall.zip) -------------------------------- +Then, create the installer: -ssh into the dataverse-internal server and do the following: +.. code-block:: bash -- In a git checkout of the dataverse source switch to the master branch and pull the latest. -- Copy the war file from the previous step to the ``target`` directory in the root of the repo (create it, if necessary): -- ``mkdir target`` -- ``cp /tmp/dataverse-5.10.1.war target`` -- ``cd scripts/installer`` -- ``make clean`` -- ``make`` + cd scripts/installer + make clean + make A zip file called ``dvinstall.zip`` should be produced. -Alternatively, you can build the installer on your own dev. instance. But make sure you use the war file produced in the step above, not a war file build from master on your own system! That's because we want the released application war file to contain the build number described above. Download the war file directly from Jenkins, or from dataverse-internal. +Create a Draft Release on GitHub +-------------------------------- -Make Artifacts Available for Download -------------------------------------- +Go to https://github.com/IQSS/dataverse/releases/new to start creating a draft release. -Upload the following artifacts to the draft release you created: +- Under "Select tag" you will be creating a new tag. Have it start with a "v" such as ``v6.10.1``. Click "Create new tag". Don't worry, the tag won't be created until you publish. +- Under "Target", choose "master". This commit will appear in ``/api/info/version`` from a running installation. +- Under "Release title" use the same name as the tag such as ``v6.10.1``. +- In the description, copy and paste the content from the release notes .md file created in the "Write Release Notes" steps above. +- Under "attach binaries", upload the war file and installer you created above. +- Click "Save draft" because we do not want to publish the release yet. -- the war file (e.g. ``dataverse-5.10.1.war``, from above) -- the installer (``dvinstall.zip``, from above) -- other files as needed: +At this point you can send around the draft release for any final feedback. Links to the guides for this release should be working now, since you build them above. - - updated Solr schema - - metadata block tsv files - - config files +Make corrections to the draft, if necessary. It will be out of sync with the .md file, but that's ok (`#7988 `_ is tracking this). Publish the Release ------------------- @@ -356,14 +327,14 @@ Click the "Publish release" button. Update Guides Link ------------------ -"latest" at https://guides.dataverse.org/en/latest/ is a symlink to the directory with the latest release. That directory (e.g. ``5.10.1``) was put into place by the Jenkins "guides" job described above. +"latest" at https://guides.dataverse.org/en/latest/ is a symlink to the directory with the latest release. That directory (e.g. ``6.10.1``) was put into place by the Jenkins "guides" job described above. ssh into the guides server and update the symlink to point to the latest release, as in the example below. .. code-block:: bash cd /var/www/html/en - ln -s 5.10.1 latest + ln -s 6.10.1 latest This step could be done before publishing the release if you'd like to double check that links in the release notes work. @@ -374,7 +345,7 @@ You can find our milestones at https://github.com/IQSS/dataverse/milestones Now that we've published the release, close the milestone and create a new one for the **next** release, the release **after** the one we're working on, that is. -Note that for milestones we use just the number without the "v" (e.g. "5.10.1"). +Note that for milestones we use just the number without the "v" (e.g. "6.10.1"). On the project board at https://github.com/orgs/IQSS/projects/34 edit the tab (view) that shows the milestone to show the next milestone. diff --git a/doc/sphinx-guides/source/developers/testing/fixtures.md b/doc/sphinx-guides/source/developers/testing/fixtures.md new file mode 100644 index 00000000000..06f8edfc25a --- /dev/null +++ b/doc/sphinx-guides/source/developers/testing/fixtures.md @@ -0,0 +1,383 @@ +# Fixtures For Tests + +Most Dataverse test fixtures are based on JSON files stored in the test resources of the codebase. + +In addition, (as of Dataverse 6.11) you can use a generator utility to create dataset-centered fixtures programmatically. +This is most useful for local integration and performance tests but may be of use for unit tests as well. + +```{contents} Contents: +:local: +:depth: 3 +``` + +(fixture-generator)= +## Dataset Fixture Generator + +The dataset fixture generator is a test utility for creating connected dataset entity graphs with configurable size and shape. +It is located in the core testing utilities at `edu.harvard.iq.dataverse.util.testing.fixtures` and `edu.harvard.iq.dataverse.util.testing.recipes`. + +The fixture generator is useful when tests need one or more datasets with many files, tabular files, variables, and optional variable metadata, while still keeping the test setup readable. +It is primarily intended for integration and performance tests where hand-building entities would be too verbose, brittle, or too uniform to uncover ORM and serialization issues. + +The generator creates an in-memory entity graph. +Persisting that graph to a database is optional and requires the usual JPA persistence rules to be respected (see below). + + +### Architecture + +The fixture generator is built around three main concepts: a builder, recipes for it, and field populators. +This separation keeps entity graph shape, relationship wiring, and scalar field population independent of each other. + +#### Fixture Builder + +The builder creates the connected *entity graph* by consuming recipes. It is responsible for: + +- Creating the entities +- Wiring relationships +- Keeping both sides of relationships in sync where needed +- Returning a `DatasetFixture` with convenient references to generated objects + +#### Recipes + +Recipes *describe* the *shape* of the fixture's entity graph and should not manually wire entity relationships: + +- How many files should exist? +- Which files are tabular? +- How many variables should a tabular file contain? +- Should variable metadata be created? + +**Available Recipes:** + +Recipes are composable using a fluent API and work together. + +```text +DatasetRecipe + -> DatasetTypeRecipe + -> VersionRecipe + -> FileRecipe + -> VariableSetRecipe + -> VariableMetadataRecipe +``` + +`DatasetRecipe` +Top-level recipe for creating a dataset fixture. It combines a `DatasetTypeRecipe` and a `VersionRecipe`. + +`DatasetTypeRecipe` +Provides the dataset type assigned to the generated dataset. +Can create a dataset type from scalar values or wrap an existing instance. + +Note: the recipe provides the type object but does not persist it. +Tests that persist generated fixtures must ensure the dataset type is managed before the dataset is flushed. + +`VersionRecipe` +Describes the current dataset version. At the moment, this mainly means providing one or more file recipes. + +`FileRecipe` +Describes file populations. A file recipe may create regular files or tabular files. + +`VariableSetRecipe` +Describes how many variables to create for tabular files. It supports uniform and skewed variable populations. + +`VariableMetadataRecipe` +Decides whether a `VariableMetadata` row should be created for a generated `(FileMetadata, DataVariable)` pair. +At most one metadata row is generated for each such pair. + + +#### Fixture Populator + +The populator fills scalar and non-relationship *fields*, which are not primarily about graph shape. + +It sets values such as: + +- Identifiers +- Timestamps +- File labels +- Content types +- Checksums +- Variable names +- Required fields +- Null-sensitive collections + +The default *minimal* populator is conservative. +It creates enough data for serialization and persistence tests, but it does not try to simulate fully realistic production metadata. + + + +### Full Example + +The following example creates a small but non-uniform dataset fixture. It's suitable +- for a smoke test of a serializer, +- for an integration test with assertions on the result, +- for a performance test with benchmarking speed of different implementations, and other scenarios. + +```java +var recipe = DatasetRecipe.of( + DatasetTypeRecipe.dataset(), + VersionRecipe.of( + FileRecipe.regular(20), + FileRecipe.tabular(30, + VariableSetRecipe + .byPredicate(VariableMetadataRecipe.byPredicate(ctx -> ctx.variableIndex() < 5)) + .when(ctx -> ctx.fileIndex() % 10 == 0, 1_000) + .otherwise(25) + )) +); + +DatasetFixture fixture = DatasetFixtureBuilder.builder() + .recipe(recipe) + .populator(FixturePopulator.minimal()) + .build(); + +JsonArrayBuilder files = Json.createArrayBuilder(); + +for (FileMetadata fileMetadata : fixture.fileMetadatas()) { + files.add(JsonPrinter.json(fileMetadata.getDataFile(), fileMetadata, true)); +} + +var json = files.build(); +``` + +This creates: +- 20 regular files +- 30 tabular files + - some tabular files with 1,000 variables + - other tabular files with 25 variables + - variable metadata only for the first few variables in each tabular file + +This helps exercise code paths that traverse files, file metadata, data tables, data variables, variable metadata. +All of this happends without the need to pre-produce an enormous fixture as a JSON file. +Its deterministic nature allows running the test anywhere without depending on seeded randomness, offering reliable and reproducible results. + + + +### Basic Usage + +#### Small Dataset + +This example creates: + +- one dataset +- one current version +- 10 tabular files +- 10 variables per tabular file +- 1 regular file + +```java +var recipe = DatasetRecipe.of( + DatasetTypeRecipe.dataset(), + VersionRecipe.of( + FileRecipe.tabular(10, VariableSetRecipe.uniform(10)), + FileRecipe.regular(1) + ) +); + +DatasetFixture fixture = DatasetFixtureBuilder.builder() + .recipe(recipe) + .populator(FixturePopulator.minimal()) + .build(); + +Dataset dataset = fixture.dataset(); +DatasetVersion version = fixture.currentVersion(); +``` + +#### Skewed Variable Populations + +Skewed data is useful for performance testing because real datasets are rarely uniform. +Some files may have only a few variables, while others may be very large. + +This example creates 500 tabular files: + +- one dataset +- one current version +- 500 tabular files + - every 100th file receives 100,000 variables + - every 10th file receives 10,000 variables + - all others receive 250 variables + +```java +var variables = VariableSetRecipe.byPredicate() + .when(ctx -> ctx.fileIndex() % 100 == 0, 100_000) + .when(ctx -> ctx.fileIndex() % 10 == 0, 10_000) + .otherwise(250); + +var recipe = DatasetRecipe.of( + DatasetTypeRecipe.dataset(), + VersionRecipe.of( + FileRecipe.tabular(500, variables) + ) +); + +DatasetFixture fixture = DatasetFixtureBuilder.builder() + .recipe(recipe) + .build(); +``` + +#### Adding Variable Metadata + +Variable Metadata is optional and controlled by `VariableMetadataRecipe`. +The metadata recipe is evaluated for each generated `(FileMetadata, DataVariable)` pair. +This matters because `VariableMetadata` is versioned indirectly through `FileMetadata`. + +*No variable metadata (default):* + +```java +VariableSetRecipe.uniform(1_000) +- or - +VariableSetRecipe.uniform(1_000, VariableMetadataRecipe.noop()) +``` + +*Metadata for every variable:* + +```java +VariableSetRecipe.uniform(1_000, VariableMetadataRecipe.always()) +``` + +*Metadata for selected variables:* + +```java +VariableSetRecipe.uniform(1_000, VariableMetadataRecipe.byPredicate(ctx -> ctx.variableIndex() % 10 == 0)) +``` + + + +### Persistence Usage + +The generator creates an in-memory entity graph. Persisting that graph is optional and follows normal JPA rules. + +When persisting a generated fixture to a database, remember that not all relationships cascade from `Dataset` to every object. +In particular, `DataFile` instances usually need to be persisted explicitly before persisting the dataset graph. +The `DatasetType` must also be managed, either by persisting the generated type or by looking up an existing one in the same persistence context. + +A typical persistence sequence is: + +```java +jpa.inTransactionVoid(em -> { + em.persist(fixture.datasetType()); + for (DataFile dataFile : fixture.dataFiles()) { + em.persist(dataFile); + } + em.persist(fixture.dataset()); +}); +``` + +The exact order may evolve as the fixture generator grows, may depend on the exact usage scenario, and +is influenced by the evolution of the entity classes themselves, but the important point is: +**Shared/reference entities and non-cascaded entities must be managed (persisted) before the dataset graph is flushed**. + + + +### Discussion and Limitations + +#### Benefits + +1. **Readable scenarios:** tests describe intent at a high level. + For example: `FileRecipe.tabular(500, VariableSetRecipe.uniform(1_000))` is easier to understand than manually creating thousands of entities. +2. **Composable graph shape:** different recipes can be combined to describe mixed datasets. +3. **Deterministic output:** the build context carries fixture-wide values such as sequence and timestamp, making generated data easier to debug and compare. +4. **Reduced boilerplate:** relationship wiring and null-sensitive defaults are centralized. +5. **Better performance testing:** skewed fixtures can expose ORM issues that uniform data may hide, such as N+1 query expansion over large variable collections. +6. **Serialization safety:** the minimal populator initializes fields and collections that serializers commonly traverse. + +#### Tradeoffs + +1. **More concepts to learn:** developers need to understand builders, recipes, populators, and resulting fixture objects vs. a static factory. +2. **Not a full production object factory:** the minimal populator creates safe test data, not necessarily realistic production data. +3. **Persistence still requires care:** some entities must be persisted explicitly because the production model does not cascade every relationship. +4. **Hardcoded defaults:** the minimal populator uses deterministic placeholder values, tests that need realistic metadata should provide a custom populator. + +#### Limitations + +1. **Minimalistic:** The current fixture generator is intentionally minimal. +2. **Single dataset version only:** the fixture currently models one current dataset version and does not generate multiple versions. +3. **No version evolution recipes:** there is no support yet for deriving later versions from earlier versions, modeling change over time. +4. **Limited dataset metadata:** dataset fields and metadata blocks are not generated in detail. +5. **Simple dataset type handling:** a `DatasetType` can be generated or supplied, but persistence of shared types is still the responsibility of the test. +6. **No persistence manager:** the fixture system builds graphs, but it does not yet provide a dedicated persister that knows the correct persistence order. +7. **One table per tabular file:** tabular files currently get one `DataTable`. The domain model can allow more, but the fixture generator does not expose that yet. +8. **One variable group per tabular file:** each non-empty tabular file currently gets one `VarGroup` containing all variables, there is no `VarGroupRecipe` yet. +9. **Limited variable metadata content:** variable metadata can be present or absent, but the minimal populator only fills basic scalar values. +10. **No category or statistics recipes:** the fixture generator does not yet provide recipes for variable categories, summary statistics, invalid ranges, or category metadata. + +#### Unsupported Usage Scenarios + +The following scenarios are not yet directly expressible: + +- multiple dataset versions sharing the same `DataFile` objects +- metadata-only changes between versions +- version-specific `VariableMetadata` changes across versions +- files added or removed between versions +- multiple `DataTable` objects per file +- different variable group distributions per file +- weighted random or seeded random file populations +- Zipf-like or heavy-tail distributions as first-class recipes +- realistic dataset field metadata +- fixture graphs that mimic a fully published dataset lifecycle + + + +### Extending The Fixture Generator + +When extending the fixture generator, first decide which responsibility your change belongs to. + +#### Add Recipes For Graph Shaping + +Use a new recipe when the test needs to describe what shape should be created. + +Examples: + +- number of var groups +- number of data tables per file +- whether categories should exist +- how many variables receive summary statistics +- how versions evolve over time + +Recipe changes usually belong in the `edu.harvard.iq.dataverse.util.testing.recipes` package. + +#### Add Populator Behavior For Scalar Values + +Use a new or custom populator when entities should be filled differently, but the graph shape is the same. +Extend the populator interface if new types of scalar data are required. + +Examples: + +- more realistic file names +- different content types +- richer variable labels +- custom checksums +- realistic variable metadata text + +Populator changes usually belong in the `edu.harvard.iq.dataverse.util.testing.fixtures` package. + +#### Change Builder For Wiring + +Change the builder when new relationships must be created or maintained. + +Examples: + +- adding support for `VariableCategory` +- wiring category metadata +- creating multiple data tables per file +- linking version-evolved file metadata back to shared data files + +Builder changes should be kept small and split into helper methods where possible. + +#### Recommended Extension Path + +A practical roadmap for further evolution is: + +1. Add a `VarGroupRecipe` to control group count and membership. +2. Add category and summary statistic recipes for variable-level enrichment. +3. Add a fixture persister that knows the correct persistence order. +4. Add version evolution recipes for multi-version datasets. +5. Add richer dataset metadata generation. +6. Add (seeded!) random distribution recipes if a deterministic skew is not enough. +7. Add fuzzy testing by generating fixtures with targeted chaos. + +#### Guidelines For Contributions + +1. Keep recipes declarative: recipes should describe shape, not manually wire entity relationships. +2. Keep populators focused: populators should fill fields, not decide how many entities exist. +3. Keep builders responsible for wiring: relationship consistency belongs in the builder. +4. Prefer deterministic generation: deterministic data makes performance tests easier to reproduce and debug. +5. Avoid hiding persistence requirements: if an entity must be persisted before another, document it clearly or add a dedicated persister. +6. Start minimal: add the smallest recipe or populator extension needed for the scenario. Avoid making the DSL generic before there is a concrete test need. diff --git a/doc/sphinx-guides/source/developers/testing/performance.md b/doc/sphinx-guides/source/developers/testing/performance.md new file mode 100644 index 00000000000..e981cee5d80 --- /dev/null +++ b/doc/sphinx-guides/source/developers/testing/performance.md @@ -0,0 +1,89 @@ +# Performance Testing + +## Introduction +Performance tests measure how your application behaves under load, focusing on execution time, resource consumption, and database efficiency. +Unlike *unit tests*, which verify isolated logic, or *integration* or *API tests*, which validate component interactions and full request lifecycles, performance tests quantify *how fast* operations complete and *how many* database queries they trigger. + +## Running Performance Tests +Performance tests are excluded from the default test run to save CI/CD time and local resources. +To execute them, use the Maven `verify` lifecycle phase and override the `it.groups` property: + +```shell +mvn verify -Dit.groups=performance +``` + +```{note} +The `it.groups` property accepts a comma-separated list. +You can combine groups (e.g., `-Dit.groups=integration,performance`) as necessary. +However, it is highly recommended to run them in isolation due to their computational intensity and sensitivity to system load. +``` + +## Testing database-bound code +Performance tests for code relying on retrieving entities from a database are essential for catching regressions in ORM efficiency. +They can identify N+1 query problems or ensure that heavy data processing pipelines (e.g., exporting large datasets) remain responsive as the codebase evolves. + +### Prerequisites +Any tests around database-bound code rely on [Testcontainers](https://www.testcontainers.org/) to spin up ephemeral database instances. +Avoiding in-memory databases for such tests allow for more realistic testing as seen in actual deployments. +Consequently, you must have **Docker** installed and running, allowing Testcontainer to start a PostgreSQL server. + +- If you use a local Docker daemon, ensure it has sufficient memory allocated (typically 1GB+ is recommended for running Postgres containers alongside your tests). +- If your Docker daemon runs remotely, ensure the `DOCKER_HOST` environment variable is correctly configured in your shell so Testcontainers can locate it. + +The automated testing setup will look up a system property `postgresql.server.version` to determine which container image tag to use. +The property is injected from `pom.xml` by Maven Failsafe and use a reasonable fallback value if missing. +To test with a different version of PostgreSQL, you may set the Maven property `postgresql.server.version` for a run. + +### Example +Performance test classes must follow specific conventions to be discovered and executed correctly: + +1. **Package Location:** + Place your test class in `src/test/java`, mirroring the package structure of the code you want to test (e.g., `edu.harvard.iq.dataverse.export`). + This placement grants the test class access to package private members in `src/main/java`, which is often necessary when testing internal services directly without going through the full API layer. +2. **Naming Convention:** + Name the class `*IT.java` so that the Maven Failsafe plugin automatically picks it up during the `integration-test` phase. +3. **Setup Annotation:** + Annotate the class with `@JpaPerformanceTest` to have everything set up automatically for you. + A `JpaEntityManagerService` will be injected into a static class field for you, allowing interaction with a JPA Entity Manager. + +Below is a minimal, generic example [`SamplePerformanceIT`](/_static/developers/testing/SamplePerformanceIT.java) demonstrating the structure and how to run a transaction with or without a return value. + +```{literalinclude} /_static/developers/testing/SamplePerformanceIT.java +:name: sample-performance-test +:language: java +:start-at: // +``` + +### Understanding JpaEntityManagerService +The `JpaEntityManagerService` class abstracts away the boilerplate required to set up a JPA environment for testing. +Here is what it does under the hood: + +1. **Automatic PostgreSQL Server Setup:** + The involved JUnit Test Extension makes sure to create a single server instance to speed up test setups. + Nonetheless, any test class will run within its own database on the server, guaranteeing test database isolation. + +2. **Automatic Schema Generation:** + When you call `.start()` on a `JpaEntityManagerService` instance, it initializes an EclipseLink `EntityManagerFactory` configured to automatically generate the database schema (`schema-generation.database.action=create`). + This guarantees that every test run begins with a pristine database structure derived directly from your current JPA entity mappings. + You do not need to run Flyway migrations or seed the database beforehand. + +3. **Transaction Management:** + The service handles the lifecycle of JPA transactions automatically. + You simply pass a lambda to `inTransaction()` or `inTransactionVoid()`. + The service will: + 1. Create an `EntityManager` and begin a transaction. + 2. Execute your lambda. + 3. Commit the transaction on success, or roll it back if a `RuntimeException` is thrown. + 4. Close the `EntityManager` in a `finally` block to prevent resource leaks. + +4. **Query Statistics via Wrapped DataSource:** + To make it easy to profile ORM behavior, `JpaEntityManagerService` wraps the underlying PostgreSQL `DataSource` using a proxy that intercepts all SQL statements. + + By default, the proxy tracks query counts, which you can retrieve via `QueryCountHolder.getGrandTotal()`. + This provides immediate, programmatic insight into database efficiency without needing to parse verbose SQL logs. + It is particularly useful for: + - Verifying that a batch operation executes in a single query rather than a loop. + - Catching N+1 query problems by asserting on the number of `SELECT` statements. + + *Advanced Usage:* The default service only tracks query counts. + If you need detailed SQL logging (including bound parameters) or custom execution metrics, you can extend `JpaEntityManagerService` and register additional `StatementListener` implementations on the `ProxyDataSourceBuilder` during initialization. \ No newline at end of file diff --git a/doc/sphinx-guides/source/developers/big-data-support.rst b/doc/sphinx-guides/source/installation/big-data-support.rst similarity index 84% rename from doc/sphinx-guides/source/developers/big-data-support.rst rename to doc/sphinx-guides/source/installation/big-data-support.rst index 7077fdfcd19..45b94f71a9f 100644 --- a/doc/sphinx-guides/source/developers/big-data-support.rst +++ b/doc/sphinx-guides/source/installation/big-data-support.rst @@ -1,7 +1,7 @@ Big Data Support ================ -Big data support includes some experimental options. Eventually more of this content will move to the Installation Guide. +Big data support includes some experimental options. .. contents:: |toctitle| :local: @@ -49,47 +49,75 @@ The following features are disabled when S3 direct upload is enabled. - Creation of NcML auxiliary files (See :ref:`netcdf-and-hdf5`.) - Extraction of a geospatial bounding box from NetCDF and HDF5 files (see :ref:`netcdf-and-hdf5`) unless :ref:`dataverse.netcdf.geo-extract-s3-direct-upload` is set to true. + .. _cors-s3-bucket: Allow CORS for S3 Buckets ~~~~~~~~~~~~~~~~~~~~~~~~~ -**IMPORTANT:** One additional step that is required to enable direct uploads via a Dataverse installation and for direct download to work with previewers and direct upload to work with dvwebloader (:ref:`folder-upload`) is to allow cross site (CORS) requests on your S3 store. -The example below shows how to enable CORS rules (to support upload and download) on a bucket using the AWS CLI command line tool. Note that you may want to limit the AllowedOrigins and/or AllowedHeaders further. https://github.com/gdcc/dataverse-previewers/wiki/Using-Previewers-with-download-redirects-from-S3 has some additional information about doing this. +**IMPORTANT:** This additional step of allowing cross-site request to your S3 buckets is required to enable direct uploads via a Dataverse installation, direct download to work with :ref:`file previewers `, or direct upload to work with :ref:`DVWebloader `. + +To successfully enable direct uploads (e.g. :ref:`folder-upload`) or direct downloads (e. g. consumed by previewers), you must both: + +- Enable CORS in Dataverse (see :ref:`dataverse.cors`). +- Configure a matching/compatible CORS policy on each S3 bucket (and any CDN/proxy in front of it) that will be used. -Dataverse itself will only emit the necessary ``Access-Control-*`` headers to browsers when CORS has been explicitly enabled via the JVM/MicroProfile setting :ref:`dataverse.cors.origin `. You must both: +**NOTE:** Make sure the bucket's CORS configuration ``AllowedOrigins`` is at least as permissive as the origins you configure in :ref:`dataverse.cors.origin`. +If the bucket allows the wildcard ``*`` but the Dataverse application only allows a subset, the browser will still enforce the more restrictive application response! -* Configure an appropriate ``dataverse.cors.origin`` value (single origin, comma-separated list, or ``*``) on the Dataverse application server; and -* Configure a matching/compatible CORS policy on each S3 bucket (and any CDN/proxy in front of it) that will be used for direct upload or for redirect (download-redirect) operations consumed by previewers. +Detailed information for the most common S3 admin tools around CORS: -If you specify multiple origins in ``dataverse.cors.origin`` Dataverse will echo back the requesting origin (when it matches) and will include ``Vary: Origin`` so that shared caches do not serve one origin's response to another. If you configure ``*`` Dataverse will respond with ``Access-Control-Allow-Origin: *`` (note that browsers will not allow credentialed requests with a wildcard). +- `AWS `_ +- `Minio mc `_ +- `s3cmd `_ -Make sure the bucket CORS configuration ``AllowedOrigins`` is at least as permissive as the origins you configure in ``dataverse.cors.origin``. If the bucket allows ``*`` but the Dataverse application only allows a subset, the browser will still enforce the more restrictive application response. +Get Current CORS Policy on Bucket ++++++++++++++++++++++++++++++++++ If you'd like to check the CORS configuration on your bucket before making changes: -``aws s3api get-bucket-cors --bucket `` +.. tabs:: + .. group-tab:: AWS CLI + :code:`aws s3api get-bucket-cors --bucket ` + + .. group-tab:: Minio Client (mc) + :code:`mc cors get /` + +Set CORS Policy on Bucket ++++++++++++++++++++++++++ + +The examples below shows how to enable CORS rules (to support upload and download) on a bucket. + +**Note:** You may want to limit the ``AllowedOrigins`` and/or ``AllowedHeaders`` further. +`GDCC/dataverse-previewers `_ has some additional information about doing this. + +Both JSON and XML format are explained in detail in `AWS Docs `_. + +.. tabs:: + .. group-tab:: AWS CLI + Create a file :download:`cors.json ` as follows: + + .. literalinclude:: /_static/installation/cors/cors.json + :name: aws-cors + :language: json + + Proceed with making the changes: + + :code:`aws s3api put-bucket-cors --bucket --cors-configuration file://cors.json` -To proceed with making changes: + Alternatively, you can enable CORS using the AWS S3 web interface, using json-encoded rules as in the example above. -``aws s3api put-bucket-cors --bucket --cors-configuration file://cors.json`` + .. group-tab:: Minio Client (mc) + Create a file :download:`cors.xml ` as follows: -with the contents of the file cors.json as follows: + .. literalinclude:: /_static/installation/cors/cors.xml + :name: xml-cors + :language: xml -.. code-block:: json + Proceed with making the changes: - { - "CORSRules": [ - { - "AllowedOrigins": ["*"], - "AllowedHeaders": ["*"], - "AllowedMethods": ["PUT", "GET"], - "ExposeHeaders": ["ETag", "Accept-Ranges", "Content-Encoding", "Content-Range"] - } - ] - } + :code:`mc cors set / ./cors.xml` -Alternatively, you can enable CORS using the AWS S3 web interface, using json-encoded rules as in the example above. .. _s3-tags-and-direct-upload: diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index e5ed52acb83..0670855791a 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -217,6 +217,36 @@ Dataverse installations are explicity set to "Lax" out of the box by the install To inspect cookie attributes like SameSite, you can use ``curl -s -I http://localhost:8080 | grep JSESSIONID``, for example, looking for the "Set-Cookie" header. + +.. _dataverse.cors: + +Cross-Origin Resource Sharing (CORS) +++++++++++++++++++++++++++++++++++++ + +For any Dataverse installation using or planning to use advanced features like :doc:`big data support ` or :ref:`file previewers `, CORS must be configured. + +To understand what CORS is all about and how it works, the following are recommended reads: + +- https://developer.mozilla.org/en-US/docs/Web/HTTP/Guides/CORS +- https://corsfix.com/cors-headers +- https://www.caduh.com/blog/understanding-cors +- https://medium.com/@roelljr/demystifying-cors-its-just-http-headers-i-promise-4a02caf460fa + +To learn how to configure the Dataverse application to send CORS headers to browsers, these JVM options are relevant: + +- :ref:`dataverse.cors.origin` +- :ref:`dataverse.cors.methods` +- :ref:`dataverse.cors.headers.allow` +- :ref:`dataverse.cors.headers.expose` + +Dataverse will only emit the necessary ``Access-Control-*`` headers to browsers when CORS has been explicitly enabled via the JVM option :ref:`dataverse.cors.origin `. + +For any resources to be integrated with Dataverse, find documentation how to set up CORS rules on their end at: + +- :ref:`Big Data: CORS for S3 buckets ` +- `GDCC/dataverse-previewers `_ + + .. _ongoing-security: Ongoing Security of Your Installation @@ -1514,7 +1544,7 @@ In addition to having the type "remote" and requiring a label, Trusted Remote St These and other available options are described in the table below. Trusted remote stores can range from being a static trusted website to a sophisticated service managing access requests and logging activity -and/or managing access to a secure enclave. See :doc:`/admin/big-data-administration` (specifically :ref:`remote-stores`) and :doc:`/developers/big-data-support` for additional information on how to use a trusted remote store. For specific remote stores, consult their documentation when configuring the remote store in your Dataverse installation. +and/or managing access to a secure enclave. See :doc:`/admin/big-data-administration` (specifically :ref:`remote-stores`) and :doc:`/installation/big-data-support` for additional information on how to use a trusted remote store. For specific remote stores, consult their documentation when configuring the remote store in your Dataverse installation. Note that in the current implementation, activities where Dataverse needs access to data bytes, e.g. to create thumbnails or validate hash values at publication will fail if a remote store does not allow Dataverse access. Implementers of such trusted remote stores should consider using Dataverse's settings to disable ingest, validation of files at publication, etc. as needed. @@ -1548,7 +1578,7 @@ Globus Storage ++++++++++++++ Globus stores allow Dataverse to manage files stored in Globus endpoints or to reference files in remote Globus endpoints, with users leveraging Globus to transfer files to/from Dataverse (rather than using HTTP/HTTPS). -See :doc:`/developers/big-data-support` for additional information on how to use a globus store. Consult the `Globus documentation `_ for information about using Globus and configuring Globus endpoints. +See :doc:`/installation/big-data-support` for additional information on how to use a globus store. Consult the `Globus documentation `_ for information about using Globus and configuring Globus endpoints. In addition to having the type "globus" and requiring a label, Globus Stores share many options with Trusted Remote Stores and options to specify and access a Globus endpoint(s). As with Remote Stores, Globus Stores also use a baseStore - a file, s3, or swift store that can be used to store additional ancillary dataset files (e.g. metadata exports, thumbnails, auxiliary files, etc.). These and other available options are described in the table below. @@ -2682,7 +2712,7 @@ to avoid filled up disks, aid in performance, etc. This directory is used for a to final storage location and/or ingest. 3. ``/googlecloudkey.json`` used with :ref:`Google Cloud Configuration` for BagIt exports. This location is deprecated and might be refactored into a distinct setting in the future. -4. The experimental DCM feature for :doc:`../developers/big-data-support` is able to trigger imports for externally +4. The experimental DCM feature for :doc:`/installation/big-data-support` is able to trigger imports for externally uploaded files in a directory tree at ``//`` under certain conditions. This directory may also be used by file stores for :ref:`permanent file storage `, but this is controlled by other, store-specific settings. @@ -3308,9 +3338,9 @@ Can also be set via *MicroProfile Config API* sources, e.g. the environment vari **Note:** This setting was previously called `dataverse.personOrOrg.orgPhraseArray` and expected a JsonArray of strings. Please update both the name and value format if using the old setting. -.. _dataverse.api.signature-secret: +.. _dataverse.api.signing-secret: -dataverse.api.signature-secret +dataverse.api.signing-secret ++++++++++++++++++++++++++++++ Context: Dataverse has the ability to create "Signed URLs" for it's API calls. Using a signed URLs is more secure than @@ -3318,13 +3348,13 @@ providing API tokens, which are long-lived and give the holder all of the permis are time limited and only allow the action of the API call in the URL. See :ref:`api-exttools-auth` and :ref:`api-native-signed-url` for more details. -The key used to sign a URL is created from the API token of the creating user plus a signature-secret provided by an administrator. -**Using a signature-secret is highly recommended.** This setting defaults to an empty string. Using a non-empty -signature-secret makes it impossible for someone who knows an API token from forging signed URLs and provides extra security by +The key used to sign a URL is created from the API token of the creating user plus a signing-secret provided by an administrator. +**Using a signing-secret is highly recommended.** This setting defaults to an empty string. Using a non-empty +signing-secret makes it impossible for someone who knows an API token from forging signed URLs and provides extra security by making the overall signing key longer. **WARNING**: -*Since the signature-secret is sensitive, you should treat it like a password.* +*Since the signing-secret is sensitive, you should treat it like a password.* *See* :ref:`secure-password-storage` *to learn about ways to safeguard it.* Can also be set via any `supported MicroProfile Config API source`_, e.g. the environment variable ``DATAVERSE_API_SIGNATURE_SECRET`` (although you shouldn't use environment variables for passwords) . @@ -3786,21 +3816,38 @@ dataverse.search.default-service Experimental. See :doc:`/developers/search-services`. -.. _dataverse.cors: - -CORS Settings -+++++++++++++ - -The following settings control Cross-Origin Resource Sharing (CORS) for your Dataverse installation. - .. _dataverse.cors.origin: dataverse.cors.origin +++++++++++++++++++++ -Allowed origins for CORS requests. If this setting is not defined, CORS headers are not added. Set to ``*`` to allow all origins (note that browsers will not allow credentialed requests with ``*``) or provide a comma-separated list of explicit origins. +Allowed origins for CORS requests. See also :ref:`dataverse.cors`. + +Default: *not configured* + +.. warning:: | If this setting is not explicitly configured, no CORS headers at all are added to responses. + | The default policy (see all CORS related settings) is still being enforced! -Multiple origins can be specified as a comma-separated list (whitespace is ignored): +.. list-table:: + :align: left + :widths: 10 10 80 + :header-rows: 1 + :stub-columns: 1 + + * - Type + - Value/Example + - Description + * - Wildcard + - ``*`` + - - Allow access from all origins. + - Response header echoes ``Access-Control-Allow-Origin: *`` + - Browsers will not allow credentialed requests with this setting. + * - List of Origins + - ``https://example.org, https://example.com`` + - - Comma separated, white space ignored. + - Single matching request ``Origin`` header echoed as response header ``Access-Control-Allow-Origin``. + - ``Vary: Origin`` header added to support correct proxy/CDN caching. + - Use ``${dataverse.siteurl}`` to dynamically add the installation's URL to the list. Example: @@ -3808,18 +3855,14 @@ Example: Can also be set via any `supported MicroProfile Config API source`_, e.g. the environment variable ``DATAVERSE_CORS_ORIGIN``. -Behavior: - -* When a list of origins is configured, Dataverse echoes the single matching request ``Origin`` value in ``Access-Control-Allow-Origin`` and adds ``Vary: Origin`` to support correct proxy/CDN caching. -* When ``*`` is configured, ``Access-Control-Allow-Origin: *`` is sent and ``Vary`` is not modified. - .. _dataverse.cors.methods: dataverse.cors.methods ++++++++++++++++++++++ -Allowed HTTP methods for CORS requests. The default when this setting is missing is "GET,POST,OPTIONS,PUT,DELETE". -Multiple methods can be specified as a comma-separated list. +Allowed HTTP methods for CORS requests as a comma separated list. Whitespace is ignored. + +Default: ``GET,POST,OPTIONS,PUT,DELETE`` Example: @@ -3832,8 +3875,9 @@ Can also be set via any `supported MicroProfile Config API source`_, e.g. the en dataverse.cors.headers.allow ++++++++++++++++++++++++++++ -Allowed headers for CORS requests. The default when this setting is missing is "Accept,Content-Type,X-Dataverse-key,Range". -Multiple headers can be specified as a comma-separated list. +Allowed headers for CORS requests as a comma separated list. Whitespace is ignored. + +Default: ``Accept, Content-Type, X-Dataverse-key, Range`` Example: @@ -3846,8 +3890,9 @@ Can also be set via any `supported MicroProfile Config API source`_, e.g. the en dataverse.cors.headers.expose +++++++++++++++++++++++++++++ -Headers to expose in CORS responses. The default when this setting is missing is "Accept-Ranges,Content-Range,Content-Encoding". -Multiple headers can be specified as a comma-separated list. +Headers to expose in CORS responses as a comma separated list. Whitespace is ignored. + +Default: ``Accept-Ranges, Content-Range, Content-Encoding`` Example: @@ -4082,6 +4127,22 @@ dataverse.feature.require-embargo-reason Require an embargo reason when a user creates an embargo on one or more files. See :ref:`embargoes`. +.. _dataverse.feature.croissant-with-local-reviews: + +dataverse.feature.croissant-with-local-reviews +++++++++++++++++++++++++++++++++++++++++++++++ + +Have the croissant and croissantSlim metadata export formats include an extra "reviews" array if local reviews exist. See :ref:`croissant-head`, :ref:`review-datasets-user`, :ref:`creating-a-review-dataset`, and :ref:`api-list-reviews`. + +.. _dataverse.feature.allow-locally-fair-data: + +dataverse.feature.allow-locally-fair-data ++++++++++++++++++++++++++++++++++++++++++ + +Allows support for Locally FAIR collections and datasets. +When enabled, selected content can remain visible only to authorized users or groups within a Dataverse installation. +See :ref:`locally-fair` for more information. + .. _:ApplicationServerSettings: Application Server Settings @@ -4696,7 +4757,7 @@ Whatever JSON you send will overwrite existing values. If you have any exiting ` ``curl http://localhost:8080/api/admin/settings/:TabularIngestSizeLimit | jq -r '.data.message'`` -The ``default`` key is optional and can be used to give limits to formats that are not specified in the JSON. If you omit the ``default`` key or set it to ``"-1"``, no limits are applied to formats not specified in the JSON. If you set it to ``"0"``, ingest will be disabled (but you can override this per-format). +The ``default`` key is optional and can be used to give limits to formats that are not specified in the JSON. If you omit the ``default`` key or set it to ``"-1"``, no limits are applied to formats not specified in the JSON. If you set it to ``"0"``, ingest will be disabled (but you can override this per-format). If you wish to disable ingest for specific formats it may be preferable to instead set a value of ``"1"`` which will cause Dataverse to print a string such as ``rdata:disabled`` for the user's benefit. Add a format name (``csv``, ``dta``, etc., as listed above) to change the limit for that particular format. @@ -4707,10 +4768,10 @@ Examples: ``curl -X PUT -d '{"Rdata":"1000000"}' http://localhost:8080/api/admin/settings/:TabularIngestSizeLimit`` 2. If you want your Dataverse installation to not attempt to ingest XLSX files at all and apply a global limit of 512 MiB, use this setting: - ``curl -X PUT -d '{"default":"536870912", "XSLX":"0"}' http://localhost:8080/api/admin/settings/:TabularIngestSizeLimit`` + ``curl -X PUT -d '{"default":"536870912", "XSLX":"1"}' http://localhost:8080/api/admin/settings/:TabularIngestSizeLimit`` 3. If you want your Dataverse installation to not attempt to ingest files at all except for CSV files that are 256 MiB or smaller, use this setting: - ``curl -X PUT -d '{"default":"0", "CSV":"268435456"}' http://localhost:8080/api/admin/settings/:TabularIngestSizeLimit`` + ``curl -X PUT -d '{"default":"1", "CSV":"268435456"}' http://localhost:8080/api/admin/settings/:TabularIngestSizeLimit`` .. _:HarvestingClientCallRateLimit: @@ -4842,6 +4903,23 @@ Set whether a user will see the custom text when publishing all versions of a da ``curl -X PUT -d true http://localhost:8080/api/admin/settings/:DatasetPublishPopupCustomTextOnAllVersions`` +.. _:DatasetSubmitForReviewPopupCustomText: + +:DatasetSubmitForReviewPopupCustomText +++++++++++++++++++++++++++++++++++++++ + +Set custom text a user will view when submitting a dataset for review. Note that this text is exposed via the "Info" endpoint of the :doc:`/api/native-api`. + +``curl -X PUT -d "Deposit License Requirements" http://localhost:8080/api/admin/settings/:DatasetSubmitForReviewPopupCustomText`` + +If you have a long text string, you can upload it as a file as in the example below. + +``curl -X PUT --upload-file /tmp/long.txt http://localhost:8080/api/admin/settings/:DatasetSubmitForReviewPopupCustomText`` + +There is a related setting called :ref:`:SubmitForReviewDatasetDisclaimerText` that also makes text appear on the popup when submitting for review, but it requires a checkbox to be clicked. + +See also :ref:`show-custom-popup-for-submitting-for-review-datasets` in the API Guide. + :SearchHighlightFragmentSize ++++++++++++++++++++++++++++ @@ -5369,6 +5447,17 @@ There is a similar setting called :ref:`:DatasetPublishPopupCustomText` that als See also :ref:`show-disclaimer-for-publishing-datasets` in the API Guide. +.. _:SubmitForReviewDatasetDisclaimerText: + +:SubmitForReviewDatasetDisclaimerText ++++++++++++++++++++++++++++++++++++++ + +The text displayed to the user that must be acknowledged prior to submitting a Dataset for review. When not set the acknowledgment is not required nor displayed. + +``curl -sS -X PUT -d 'I agree to the following:
1. My submission has been fully anonymized (required for all human subject'\''s datasets).
2. My submission does not violate any known copyright laws.
3. I understand that I am liable for any and all violations of the Harvard Repository Terms of Use.' http://localhost:8080/api/admin/settings/:SubmitForReviewDatasetDisclaimerText`` + +See also :ref:`show-disclaimer-for-submit-for-review-datasets` in the API Guide. + .. _:BagItHandlerEnabled: :BagItHandlerEnabled diff --git a/doc/sphinx-guides/source/installation/index.rst b/doc/sphinx-guides/source/installation/index.rst index a0a88700d3d..bdfb4cc8037 100755 --- a/doc/sphinx-guides/source/installation/index.rst +++ b/doc/sphinx-guides/source/installation/index.rst @@ -16,6 +16,7 @@ Installation Guide prerequisites installation-main config + big-data-support upgrading shibboleth oauth2 diff --git a/doc/sphinx-guides/source/installation/localcontexts.rst b/doc/sphinx-guides/source/installation/localcontexts.rst index 2bafc2524d9..7f46ca068a0 100644 --- a/doc/sphinx-guides/source/installation/localcontexts.rst +++ b/doc/sphinx-guides/source/installation/localcontexts.rst @@ -27,6 +27,7 @@ There are several steps to LocalContexts integration. The metadatablock contains one field allowing Dataverse to store the URL of an associated Local Contexts Hub project. Be sure to update the Solr schema after installing the metadatablock (see :ref:`update-solr-schema`). The external vocabulary script interacts with the Local Contexts Hub (via the Dataverse server) to display the Labels and Notices associated with the proect and provide a link to it. The script also supports adding/removing such a link from the dataset's metadata. Note that only a project that references the dataset's PID in its `Optional Project Information` field can be linked to a dataset. + Note that the Local Contexts script configuration JSON must be edited to include your Dataverse server's URL and the Local Contexts API key you use in Dataverse. (The latter is optional but it must be included for Dataverse to add information about Notices and Labels to exported metadata and the metadata sent to DataCite for DOIs.) - Lastly, to show Local Contexts information in the summary section of the dataset page, as shown in the image above, you should add `LCProjectUrl` to list of custom summary fields via use of the :ref:`:CustomDatasetSummaryFields` setting. - Optionally, one can also set the dataverse.feature.add-local-contexts-permission-check FeatureFlag to true. This assures that only users editing datasets can use the LocalContexts search functionality (e.g. via API). This is not recommended unless problematic use is seen. diff --git a/doc/sphinx-guides/source/quickstart/what-is-dataverse.md b/doc/sphinx-guides/source/quickstart/what-is-dataverse.md index 6f86473bada..ceb3da0a6ad 100644 --- a/doc/sphinx-guides/source/quickstart/what-is-dataverse.md +++ b/doc/sphinx-guides/source/quickstart/what-is-dataverse.md @@ -10,6 +10,7 @@ A Dataverse repository can host one or more Dataverse collections, which organiz - Data files - Documentation or code +(core-capabilities)= ## Core Capabilities ### 📤 Upload, manage, publish and download data files. @@ -37,4 +38,4 @@ A Dataverse repository can host one or more Dataverse collections, which organiz - Compare versions with the detailed version change overview on dataset-level. ### ✨More features -The Dataverse project is continuously evolving. For an overview of capabilities, visit the [features list](https://dataverse.org/software-features). +The Dataverse project is continuously evolving. For an overview of capabilities, see {doc}`/admin/features` in the Admin Guide. diff --git a/doc/sphinx-guides/source/user/appendix.rst b/doc/sphinx-guides/source/user/appendix.rst index e2c78e1e99c..57e0d5ed799 100755 --- a/doc/sphinx-guides/source/user/appendix.rst +++ b/doc/sphinx-guides/source/user/appendix.rst @@ -25,9 +25,7 @@ Supported Metadata Detailed below are what metadata schemas we support for Citation and Domain Specific Metadata in the Dataverse Project: - Citation Metadata (`see .tsv `__): compliant with `DDI Lite `_, `DDI 2.5 Codebook `__, `DataCite 4.5 `__, and Dublin Core's `DCMI Metadata Terms `__ . Language field uses `ISO 639-1 `__ controlled vocabulary. -- Geospatial Metadata (`see .tsv `__): compliant with `DDI Lite `_, `DDI 2.5 Codebook `__, `DataCite 4.5 `__, and Dublin Core. Country / Nation field uses `ISO 3166-1 `_ controlled vocabulary. - - - Please note that a new and improved Geospatial metadata block is being proposed at ``__. We encourage you to try the block and give feedback in that pull request. +- Geospatial Metadata (`see .tsv `__): compliant with `ISO 19115 `_, `DDI Lite `_, `DDI 2.5 Codebook `__, `DataCite 4.5 `__, and Dublin Core. Country / Nation field uses `ISO 3166-1 `_ controlled vocabulary. - Social Science & Humanities Metadata (`see .tsv `__): compliant with `DDI Lite `_, `DDI 2.5 Codebook `__, and Dublin Core. - Astronomy and Astrophysics Metadata (`see .tsv `__): These metadata elements can be mapped/exported to the International Virtual Observatory Alliance’s (IVOA) `VOResource Schema format `__ and is based on diff --git a/doc/sphinx-guides/source/user/dataset-management.rst b/doc/sphinx-guides/source/user/dataset-management.rst index 8836961bae1..17eff033882 100755 --- a/doc/sphinx-guides/source/user/dataset-management.rst +++ b/doc/sphinx-guides/source/user/dataset-management.rst @@ -28,7 +28,7 @@ Supported Metadata Export Formats Once a dataset has been published, its metadata can be exported in a variety of other metadata standards and formats, which help make datasets more :doc:`discoverable ` and usable in other systems, such as other data repositories. On each dataset page's metadata tab, the following exports are available: -- Croissant +- Croissant (example: :download:`max-croissant.json <../../../../src/test/resources/croissant/max/expected/max-croissant.json>`) - Dublin Core - DDI (Data Documentation Initiative Codebook 2.5) - DDI HTML Codebook (A more human-readable, HTML version of the DDI Codebook 2.5 metadata export) @@ -40,7 +40,7 @@ Once a dataset has been published, its metadata can be exported in a variety of Additional formats can be enabled. See :ref:`inventory-of-external-exporters` in the Installation Guide. For example: -- RO-Crate +- RO-Crate: See also https://www.researchobject.org/ro-crate/dataverse Each of these metadata exports contains the metadata of the most recently published version of the dataset. @@ -439,6 +439,7 @@ Go to the dataset you would like to edit, where you will see the listing of file - Restrict the selected files - Unrestrict the selected files (only if the selected files are restricted) - Add tags to the selected files +- Embargo the selected files You will not have to leave the dataset page to complete these action, except for editing file metadata, which will bring you to the Edit Files page. There you will have to click the "Save Changes" button to apply your edits and return to the dataset page. @@ -763,6 +764,8 @@ Once a dataset with embargoed files has been published, no further action is nee As the primary use case of embargoes is to make the existence of data known now, with a promise (to a journal, project team, etc.) that the data itself will become available at a given future date, users cannot change an embargo once a dataset version is published. Dataverse instance administrators do have the ability to correct mistakes and make changes if/when circumstances warrant. +.. _retention-periods: + Retention Periods ================= @@ -935,6 +938,8 @@ Review Datasets can only be created via API. You have the following options: When creating a review dataset you will likely need to fill in required fields like ``itemReviewedUrl`` as well as fields from one or more "rubric" metadata blocks, as described above under :ref:`review-datasets-overview`. +If you point ``itemReviewedUrl`` at the URL form of a dataset (e.g. https://doi.org/10.5072/FK2/ABCDEF) that is in the same Dataverse installation as the review dataset, the review dataset is considered a local review and can be listed using the :ref:`api-list-reviews` API endpoint. These reviews appear in the Croissant metadata export if you enable the :ref:`dataverse.feature.croissant-with-local-reviews` feature flag. + .. _dataset-types-datacite: Dataset Types and DataCite diff --git a/doc/sphinx-guides/source/user/dataverse-management.rst b/doc/sphinx-guides/source/user/dataverse-management.rst index 4e94bfad256..4799d2cdb30 100755 --- a/doc/sphinx-guides/source/user/dataverse-management.rst +++ b/doc/sphinx-guides/source/user/dataverse-management.rst @@ -241,6 +241,122 @@ Once your Dataverse collection is ready to go public, go to your Dataverse colle hand side of the page. A pop-up will appear to confirm that you are ready to actually Publish, since once a Dataverse collection is made public, it can no longer be unpublished. +.. _locally-fair: + +Locally FAIR +============ + +Locally FAIR describes content that is managed according to FAIR principles +(Findable, Accessible, Interoperable, and Reusable) within a defined local or +organizational community rather than for the public internet as a whole. + +Dataverse now has optional, experimental support for managing Locally FAIR collections. + +In a typical public Dataverse installation, published dataset metadata is visible +to everyone, even if the dataset's files themselves may be embargoed or restricted. Locally FAIR support +extends this model by allowing some collections, and the published datasets within them to remain +visible only to designated users or groups. This makes it possible for a single +Dataverse installation to support both: + +- public, globally discoverable content; and +- organizational content whose existence and metadata are only be visible to + authorized users. + +The rationale for making some content Locally FAIR can vary. +Locally FAIR content can include: + +- sensitive research collections; +- institution-only datasets; +- datasets that should not be accessible to bots that may not adhere to the dataset license and terms, and +- projects under contractual or policy restrictions; + +Dataverse's Locally FAIR mechanism is appropriate for repositories that will house at least some data +whose metadata should only be visible to organizational members. The decision to make data Locally FAIR +is managed at the collection level and repositories can have both FAIR and Locally FAIR content. + +What Locally FAIR Means +----------------------- + +Locally FAIR content is intended to be FAIR within a particular community. + +That means: + +- **Findable** Data is easy to locate for both humans and machines, when authorized. Locally FAIR datasets (and files if configured) have persistent identifiers, but do not use DOIs which are publicly searchable. + +- **Accessible** Data is retrievable through standardized protocols. Authorized users can use Dataverse's standard user interface and API calls to access Locally FAIR content in the same way they do with any published data. + +- **Interoperable** Data should be compatible with other datasets and systems. Locally FAIR datasets in Dataverse use the same standard metadata blocks as for public content and files undergo the same ingest process, use the same previewers and tools, etc. + +- **Reusable** Data should be well-described and licensed in a way that allows others to use it for future research. The licenses and terms on Locally FAIR content make it clear how and when the data can be re-used. + +Why Repositories Use It +----------------------- + +Without Locally FAIR support, repositories may need separate Dataverse +installations to separate public and organization-only content. + +How It Differs from Restricted Files +------------------------------------ + +:ref:`Restricting ` or :ref:`embargoing ` files limits access to the file contents, but in a standard public +repository the dataset's published metadata, including the list of files, would still be visible. +If a dataset allows requests for file access, anyone can request access, even if the dataset's +license or terms limit access to specific groups. + +Locally FAIR goes further. Locally FAIR collections and datasets do not appear in content listings or +search results for unauthorized users, nor can the collection/dataset/file page be viewed. API access +is also blocked for unauthorized access. + +Who Can See Locally FAIR Content +-------------------------------- + +Visibility is determined by superusers and is managed at the collection level. +Access can be granted to any groups or users defined in Dataverse - the same groups/users +available when assigning roles on collections, datasets, and files. + +How Can You Tell When Content is Locally FAIR? +---------------------------------------------- + +The Dataverse UI adds a "Locally FAIR" tag to all collections, datasets, and files who's visibility +is limited by the locally FAIR mechanism. + +Why is Locally FAIR Support "Experimental" +------------------------------------------ + +The word "experimental" is used when functionality is new, may evolve signifcantly in future releases, +and generally may require more effort to configure and manage and/or more effort to support than more +mature functionality. + +With the current Locally FAIR implementation, managers need to be aware that they are responsible for +choosing collection settings compatible with Locally FAIR content, i.e. not using DOIs (whose metadata +is publicly accessible) or publicly visible stores, etc. Users and managers should also be aware that +some functionality that might expose Locally FAIR content, e.g. linking, may not be prohibited programmatically +but should still be avoided. Similarly, users should be aware that functionality such as metrics and quotas +may expose the existence of Locally FAIR content. If your Dataverse instance supports Locally FAIR data, +you are encouraged to be an active participant in reporting any issues and suggesting further improvements. + +Things to Keep in Mind +---------------------- + +If your repository supports Locally FAIR content: + +- published does not always mean public; +- search and browse results may vary depending on who is logged in; +- colleagues outside your authorized group may not be able to see the same + datasets you can see; +- you should not share Locally FAIR content with others who don't have access themselves; and +- this functionality is experimental. + +Enabling Locally FAIR Support +----------------------------- + +See :ref:`dataverse.feature.allow-locally-fair-data` in the Installation Guide. + +Managing Locally FAIR Via API +----------------------------- + +See :ref:`locally-fair-list-role-assignees` and related API enpoints in the API Guide. + .. |image1| image:: ./img/Dataverse-Diagram.png .. |image2| image:: ./img/dvperms1.png :class: img-responsive diff --git a/doc/sphinx-guides/source/versions.rst b/doc/sphinx-guides/source/versions.rst index 5d75b8e1993..eece2e00acb 100755 --- a/doc/sphinx-guides/source/versions.rst +++ b/doc/sphinx-guides/source/versions.rst @@ -8,6 +8,7 @@ This list provides a way to refer to the documentation for previous and future v - pre-release `HTML (not final!) `__ and `PDF (experimental!) `__ built from the :doc:`develop ` branch :doc:`(how to contribute!) ` - |version| +- `6.10.1 `__ - `6.10 `__ - `6.9 `__ - `6.8 `__ diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index 88b902dfc7f..b24bf0ed6f6 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -60,6 +60,10 @@ services: -Ddataverse.pid.fake.label=FakeDOIProvider -Ddataverse.pid.fake.authority=10.5072 -Ddataverse.pid.fake.shoulder=FK2/ + -Ddataverse.cors.origin=* \ + -Ddataverse.cors.methods=GET,POST,PUT,DELETE,OPTIONS \ + -Ddataverse.cors.headers.allow=range,content-type,x-dataverse-key,accept \ + -Ddataverse.cors.headers.expose=content-encoding,content-range,accept-ranges \ #-Ddataverse.files.guestbook-at-request=true #-Ddataverse.lang.directory=/dv/lang ports: @@ -80,7 +84,7 @@ services: tmpfs: - /dumps:mode=770,size=2052M,uid=1000,gid=1000 - /tmp:mode=770,size=2052M,uid=1000,gid=1000 - mem_limit: 2147483648 # 2 GiB + mem_limit: 2684354560 # 2.5 GiB mem_reservation: 1024m privileged: false @@ -134,6 +138,26 @@ services: - ./docker-dev-volumes/solr/data:/var/solr - ./docker-dev-volumes/solr/conf:/solr-template + # This is optional. Uncomment to try and experiment with schema sidecar. + # dev_solr_schema_sidecar: + # container_name: "dev_solr_schema_sidecar" + # hostname: "solr-schema" + # image: gdcc/configbaker:unstable + # depends_on: + # - dev_dv_initializer + # - dev_solr + # restart: on-failure + # networks: + # - dataverse + # # Note: no quotes here - they will become part of the arguments passed to the script! + # command: >- + # solr-driver.sh --mode watch --startup-check wait + # --dataverse-url http://dataverse:8080 + # --solr-url http://solr:8983 + # --core collection1 + # volumes: + # - ./docker-dev-volumes/solr/data:/var/solr + dev_solr: container_name: "dev_solr" hostname: "solr" @@ -283,4 +307,4 @@ services: networks: dataverse: - driver: bridge + driver: bridge \ No newline at end of file diff --git a/docker-compose.override.yml b/docker-compose.override.yml new file mode 100644 index 00000000000..68ac8e778aa --- /dev/null +++ b/docker-compose.override.yml @@ -0,0 +1,12 @@ +version: "2.4" + +# Local development overrides for docker-compose-dev.yml +# This file is used by the fast-redeploy scripts (dev-start-frd.sh, dev-down-frd.sh) +# and can be used manually with: docker compose -f docker-compose-dev.yml -f docker-compose.override.yml up + +services: + dev_dataverse: + # Increase memory limits for local development + # (upstream has 2GB limit for GitHub Actions CI, which is too restrictive for local dev) + mem_limit: 8g + mem_reservation: 4g diff --git a/modules/container-configbaker/Dockerfile b/modules/container-configbaker/Dockerfile index 9fc876a283b..22334811fb5 100644 --- a/modules/container-configbaker/Dockerfile +++ b/modules/container-configbaker/Dockerfile @@ -54,7 +54,7 @@ RUN true && \ rm -rf "~/.local" && \ # Cleanup apt cache - rm -rf "/var/lib/apt/lists/*" + rm -rf "/var/lib/apt/lists/*" "/var/cache/apt/archives/*" # New step (and shell) as this is a different manager and we need pipx around as command RUN true && \ @@ -75,7 +75,7 @@ RUN true && \ pipx install --global ${PYTHON_PKGS} # Get in the scripts -COPY maven/scripts maven/solr/update-fields.sh ${SCRIPT_DIR}/ +COPY maven/scripts maven/solr/*.sh ${SCRIPT_DIR}/ # Copy the data from scripts/api that provide the common base setup you'd get from the installer. # ".dockerignore" will take care of taking only the bare necessities COPY maven/setup ${SCRIPT_DIR}/bootstrap/base/ diff --git a/modules/container-configbaker/scripts/apply-db-settings.sh b/modules/container-configbaker/scripts/apply-db-settings.sh index deb897d138c..7ca04fcac8e 100755 --- a/modules/container-configbaker/scripts/apply-db-settings.sh +++ b/modules/container-configbaker/scripts/apply-db-settings.sh @@ -84,7 +84,7 @@ fi # API INTERACTION # Define an auth header argument (enabling usage of different ways) -AUTH_HEADER_ARG="" +AUTH_HEADER_ARG=() # Check for Dataverse Unblock API Key present (option with file/env var) # This is only required if the host is not localhost (then there may be no key necessary) @@ -102,7 +102,7 @@ if ! [[ "${DATAVERSE_URL}" == *"://localhost"* ]] || [ -n "${ADMIN_API_UNBLOCK_K if [ -z "${API_KEY_FILE_CONTENT}" ]; then error "API key file ${ADMIN_API_UNBLOCK_KEY} appears empty." fi - ADMIN_API_UNBLOCK_KEY="$API_KEY_CONTENT" + ADMIN_API_UNBLOCK_KEY="$API_KEY_FILE_CONTENT" fi # Very basic error check (as there is no clear format or formal spec for the key) if [ ${#ADMIN_API_UNBLOCK_KEY} -lt 5 ]; then @@ -110,7 +110,7 @@ if ! [[ "${DATAVERSE_URL}" == *"://localhost"* ]] || [ -n "${ADMIN_API_UNBLOCK_K fi # Build the header argument for Admin API Authentication via unblock key - AUTH_HEADER_ARG="X-Dataverse-unblock-key: ${ADMIN_API_UNBLOCK_KEY}" + AUTH_HEADER_ARG=(-H "X-Dataverse-unblock-key: ${ADMIN_API_UNBLOCK_KEY}") fi # Check or wait for Dataverse API being responsive @@ -121,7 +121,7 @@ wait4x http "${DATAVERSE_URL}/api/info/version" -i 8s -t "$TIMEOUT" --expect-sta CURRENT_SETTINGS=$(mktemp) echo "Retrieving settings from running instance." # TODO: Do we need to support pre v6.7 style unblock key query parameter? -curl -sSL --fail-with-body -o "${CURRENT_SETTINGS}" -H "${AUTH_HEADER_ARG}" "${DATAVERSE_URL}/api/admin/settings" \ +curl -sSL --fail-with-body -o "${CURRENT_SETTINGS}" "${AUTH_HEADER_ARG[@]}" "${DATAVERSE_URL}/api/admin/settings" \ || error "Failed. Response message: $( cat "${CURRENT_SETTINGS}")" \ && echo "Success!" # TODO: while it's nice to have the current settings written out, it may contain sensitive information (so don't). @@ -130,6 +130,6 @@ curl -sSL --fail-with-body -o "${CURRENT_SETTINGS}" -H "${AUTH_HEADER_ARG}" "${D # We need to make the settings update atomic. echo "Replacing settings." RESPONSE=$(mktemp) -curl -sSL --fail-with-body -o "${RESPONSE}" -X PUT -H "${AUTH_HEADER_ARG}" --json @"${CONV_CONF_FILE}" "${DATAVERSE_URL}/api/admin/settings" \ +curl -sSL --fail-with-body -o "${RESPONSE}" -X PUT "${AUTH_HEADER_ARG[@]}" --json @"${CONV_CONF_FILE}" "${DATAVERSE_URL}/api/admin/settings" \ || error "Failed. Response message: $( jq ".message" < "${RESPONSE}" )" \ && ( echo -e "Success!\nOperations executed: "; jq '.data' < "$RESPONSE" ) diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index 18635728f9b..b3ae18c771d 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -132,7 +132,7 @@ - 6.10.1 + 6.11 21 UTF-8 @@ -150,7 +150,7 @@ 7.2026.2 - 42.7.7 + 42.7.11 9.8.0 16 2.33.0 diff --git a/pom.xml b/pom.xml index 0ee32227abc..dd4d92c4f0d 100644 --- a/pom.xml +++ b/pom.xml @@ -357,7 +357,7 @@ org.omnifaces omnifaces - 4.0-M13 + 4.7.5 @@ -655,7 +655,7 @@ org.apache.opennlp opennlp-tools - 1.9.1 + 2.5.9 org.xmlunit @@ -764,6 +764,18 @@ 3.0.0 test + + net.ttddyy + datasource-proxy + 1.11.0 + test + + + org.apache.commons + commons-dbcp2 + 2.14.0 + test + org.testcontainers testcontainers diff --git a/scripts/api/data/guestbook-test-response.json b/scripts/api/data/guestbook-test-response.json index df08b52ff6a..18fd79f9f0f 100644 --- a/scripts/api/data/guestbook-test-response.json +++ b/scripts/api/data/guestbook-test-response.json @@ -11,6 +11,10 @@ { "id": @QID3, "value": "Yellow" + }, + { + "id": @QID4, + "value": "Text area with a string instead of an array" } ] } diff --git a/scripts/api/data/guestbook-test.json b/scripts/api/data/guestbook-test.json index 710192b510a..b83a130c888 100644 --- a/scripts/api/data/guestbook-test.json +++ b/scripts/api/data/guestbook-test.json @@ -44,6 +44,13 @@ "displayOrder": 3 } ] + }, + { + "question": "Address", + "required": false, + "displayOrder": 3, + "type": "textarea", + "hidden": false } ] } diff --git a/scripts/api/data/metadatablocks/geospatial.tsv b/scripts/api/data/metadatablocks/geospatial.tsv index 11408317410..3f4b4db35a8 100644 --- a/scripts/api/data/metadatablocks/geospatial.tsv +++ b/scripts/api/data/metadatablocks/geospatial.tsv @@ -1,17 +1,44 @@ #metadataBlock name dataverseAlias displayName geospatial Geospatial Metadata -#datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable displayoncreate required parent metadatablock_id +#datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable displayoncreate required parent metadatablock_id geographicCoverage Geographic Coverage Information on the geographic coverage of the data. Includes the total geographic scope of the data. none 0 FALSE FALSE TRUE FALSE FALSE FALSE geospatial - country Country / Nation The country or nation that the Dataset is about. text 1 #VALUE, TRUE TRUE FALSE TRUE FALSE FALSE geographicCoverage geospatial - state State / Province The state or province that the Dataset is about. Use GeoNames for correct spelling and avoid abbreviations. text 2 #VALUE, TRUE FALSE FALSE TRUE FALSE FALSE geographicCoverage geospatial - city City The name of the city that the Dataset is about. Use GeoNames for correct spelling and avoid abbreviations. text 3 #VALUE, TRUE FALSE FALSE TRUE FALSE FALSE geographicCoverage geospatial - otherGeographicCoverage Other Other information on the geographic coverage of the data. text 4 #VALUE, FALSE FALSE FALSE TRUE FALSE FALSE geographicCoverage geospatial + country Country / Nation The country or nation that the Dataset is about. text 1 #VALUE, TRUE TRUE FALSE TRUE FALSE FALSE geographicCoverage geospatial + state State / Province The state or province that the Dataset is about. Use GeoNames for correct spelling and avoid abbreviations. text 2 #VALUE, TRUE FALSE FALSE TRUE FALSE FALSE geographicCoverage geospatial + city City The name of the city that the Dataset is about. Use GeoNames for correct spelling and avoid abbreviations. text 3 #VALUE, TRUE FALSE FALSE TRUE FALSE FALSE geographicCoverage geospatial + otherGeographicCoverage Other Other information on the geographic coverage of the data. text 4 #VALUE, FALSE FALSE FALSE TRUE FALSE FALSE geographicCoverage geospatial geographicUnit Geographic Unit Lowest level of geographic aggregation covered by the Dataset, e.g., village, county, region. text 5 TRUE FALSE TRUE TRUE FALSE FALSE geospatial - geographicBoundingBox Geographic Bounding Box The fundamental geometric description for any Dataset that models geography is the geographic bounding box. It describes the minimum box, defined by west and east longitudes and north and south latitudes, which includes the largest geographic extent of the Dataset's geographic coverage. This element is used in the first pass of a coordinate-based search. Inclusion of this element in the codebook is recommended, but is required if the bound polygon box is included. none 6 FALSE FALSE TRUE FALSE FALSE FALSE geospatial + geographicBoundingBox Geographic Bounding Box The fundamental geometric description for any Dataset that models geography is the geographic bounding box. It describes the minimum box, defined by west and east longitudes and north and south latitudes, which includes the largest geographic extent of the Dataset's geographic coverage. This element is used in the first pass of a coordinate-based search. Inclusion of this element in the codebook is recommended, but is required if the bound polygon box is included. none 6 FALSE FALSE TRUE FALSE FALSE FALSE geospatial westLongitude Westernmost (Left) Longitude Westernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -180.0 <= West Bounding Longitude Value <= 180.0. text 7 FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial eastLongitude Easternmost (Right) Longitude Easternmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -180.0 <= East Bounding Longitude Value <= 180.0. text 8 FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial northLatitude Northernmost (Top) Latitude Northernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -90.0 <= North Bounding Latitude Value <= 90.0. text 9 FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial southLatitude Southernmost (Bottom) Latitude Southernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -90.0 <= South Bounding Latitude Value <= 90.0. text 10 FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial + geographicReferenceSystemInfo Reference System Information The spatial and temporal reference systems used in the Dataset none 11 FALSE FALSE TRUE FALSE FALSE FALSE geospatial + geographicReferenceSystemCode Reference System Code An alphanumeric identifier for the reference system for the Dataset text 12 #VALUE FALSE FALSE FALSE TRUE FALSE FALSE geographicReferenceSystemInfo geospatial + geographicReferenceSystemCodeSpace Reference System Code Space The namespace of the system in which the reference system code is valid EPSG, WGS, etc. text 13 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE geographicReferenceSystemInfo geospatial + geographicSpatialResolution Spatial Resolution The density of spatial data in the resource, or the range of resolutions in which a Dataset may be used. NOTE: This element should be repeated when describing upper and lower range none 14 FALSE FALSE TRUE FALSE FALSE FALSE geospatial + geographicSpatialResolutionValue Value The resource's spatial resolution, expressed as a value or as a brief textual description text 15 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE geographicSpatialResolution geospatial + geographicSpatialResolutionType Type Format in which the spatial resolution value is expressed distance, vertical, angularDistance, levelOfDetail text 16 (#VALUE) FALSE TRUE FALSE TRUE FALSE FALSE geographicSpatialResolution geospatial + geographicSpatialResolutionUnitOfMeasure Spatial Resolution Unit of Measure If the spatial resolution value is expressed as a distance, the unit of measure that corresponds to that value cm', 'm', 'km', etc. text 17 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE geographicSpatialResolution geospatial + geographicResourceType Type of Geospatial Data Resource The type of geospatial data resource that is described in this Dataset record text 18 #VALUE TRUE TRUE FALSE TRUE FALSE FALSE geospatial + geographicReferenceDate Resource Reference Date A date which is used to help identify the resource none 19 TRUE FALSE TRUE FALSE FALSE FALSE geospatial + geographicReferenceDateValue Date A date which is used to help identify the resource yyyy-mm-dd date 20 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE geographicReferenceDate geospatial + geographicReferenceDateType Type The type of date represented in the Date field revision, expiry, lastUpdate, lastRevision, nextUpdate, unavailable, inForce, adopted, deprecated, superseded, publication text 21 (#VALUE) TRUE TRUE FALSE FALSE FALSE FALSE geographicReferenceDate geospatial + geographicDataLineageStatement Data Lineage Statement A statement about the Dataset's lineage textbox 22 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE geospatial + geographicProcessStep Data Lineage Process Step Description A description of the processes performed on the data as part of the Dataset's processing history textbox 23 #VALUE FALSE FALSE TRUE FALSE FALSE FALSE geospatial + geographicSpatialRepresentationType Spatial Representation Type The type of spatial representation of the data in the Dataset text 24 #VALUE TRUE TRUE FALSE TRUE FALSE FALSE geospatial + geometricObjectCount Vector Object Count Vector data only: Total number of geometric objects in the Dataset int 25 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE geospatial + geometricObjectType Vector Object Type Vector data only: The type of geometric objects (or shapes) that make up the structure of the vector Dataset curve, composite, complex, point, solid, or surface text 26 (#VALUE) FALSE TRUE FALSE TRUE FALSE FALSE geospatial + geographicNumberOfDimensions Raster Dimensions Raster/grid data only: The number of independent spatio-temporal axes, or dimensions, represented in the Dataset int 27 TRUE FALSE FALSE FALSE FALSE FALSE geospatial + geographicAxisDimensionProperties Raster Dimension (Axis) Properties Raster/grid data only: Information about spatial-temporal axis properties (dimensions). NOTE: A raster Dataset will usually have 2+ dimensions. Repeat this field to describe all dimensions represented in the Dataset none 28 FALSE FALSE TRUE FALSE FALSE FALSE geospatial + geographicDimensionNameType Raster Dimension (Axis) Name Raster/grid data only: Name/type for one of the spatial-temporal axis represented in the Dataset row, column, vertical, track, crossTrack, line, sample, or time text 29 #VALUE FALSE TRUE FALSE FALSE FALSE FALSE geographicAxisDimensionProperties geospatial + geographicDimensionSize Raster Dimension (Axis) Size Number of elements along the axis int 30 FALSE FALSE FALSE FALSE FALSE FALSE geographicAxisDimensionProperties geospatial + geographicResolution Raster Dimension (Axis) Resolution A decimal value representing a dimension's degree of detail, i.e. the size of the area that corresponds to a single data element along a Dataset's axis. enter a decimal value float 31 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE geographicAxisDimensionProperties geospatial + geographicResolutionUnitOfMeasure Raster Dimension (Axis) Resolution Unit of Measure Resolution unit of measure for the dimension resolution value cm', 'm', 'km', etc. text 32 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE geographicAxisDimensionProperties geospatial + geographicCellGeometry Raster Cell Geometry Raster/grid data only: Identification of raster data in this Dataset text 33 #VALUE TRUE TRUE FALSE TRUE FALSE FALSE geospatial + geographicDistribution Distribution Link Distribution links are URLs that can be used to access the Dataset none 34 TRUE FALSE TRUE FALSE FALSE FALSE geospatial + geographicDistributionLink URL The distribution link is a URL that can be used to access the datase text 35 #VALUE TRUE FALSE FALSE FALSE FALSE FALSE geographicDistribution geospatial + geographicDistributionLinkLabel Label A descriptive label for the distribution link text 36 #VALUE TRUE FALSE FALSE FALSE FALSE FALSE geographicDistribution geospatial + geographicProtocol Protocol The service or transfer protocol associated with the distribution link URL HTTPS, FTP, HTTP, REST, etc. text 37 (#VALUE) TRUE FALSE FALSE FALSE FALSE FALSE geographicDistribution geospatial #controlledVocabulary DatasetField Value identifier displayOrder country Afghanistan 0 country Albania 1 @@ -262,3 +289,45 @@ country Zambia 246 country Zimbabwe 247 country Åland Islands 248 + geometricObjectType curve curve 0 + geometricObjectType composite composite 1 + geometricObjectType complex complex 2 + geometricObjectType point point 3 + geometricObjectType solid solid 4 + geometricObjectType surface surface 5 + geographicCellGeometry point point 0 + geographicCellGeometry area area 1 + geographicCellGeometry voxel voxel 2 + geographicDimensionNameType row row 0 + geographicDimensionNameType column column 1 + geographicDimensionNameType vertical vertical 2 + geographicDimensionNameType track track 3 + geographicDimensionNameType crossTrack crossTrack 4 + geographicDimensionNameType line line 5 + geographicDimensionNameType sample sample 6 + geographicDimensionNameType time time 7 + geographicSpatialRepresentationType stereoModel stereoModel 0 + geographicSpatialRepresentationType video video 1 + geographicSpatialRepresentationType tin tin 2 + geographicSpatialRepresentationType textTable textTable 3 + geographicSpatialRepresentationType grid grid 4 + geographicSpatialRepresentationType vector vector 5 + geographicResourceType dataset dataset 0 + geographicResourceType service service 1 + geographicResourceType series series 2 + geographicReferenceDateType revision revision 0 + geographicReferenceDateType expiry expiry 1 + geographicReferenceDateType lastUpdate lastUpdate 2 + geographicReferenceDateType lastRevision lastRevision 3 + geographicReferenceDateType nextUpdate nextUpdate 4 + geographicReferenceDateType unavailable unavailable 5 + geographicReferenceDateType inForce inForce 6 + geographicReferenceDateType adopted adopted 7 + geographicReferenceDateType deprecated deprecated 8 + geographicReferenceDateType superseded superseded 9 + geographicReferenceDateType publication publication 10 + geographicSpatialResolutionType equivalentScale equivalentScale 0 + geographicSpatialResolutionType distance distance 1 + geographicSpatialResolutionType vertical vertical 2 + geographicSpatialResolutionType angularDistance angularDistance 3 + geographicSpatialResolutionType levelOfDetail levelOfDetail 4 diff --git a/scripts/api/data/metadatablocks/rubric_trusteddatadimensionsintensities.tsv b/scripts/api/data/metadatablocks/rubric_trusteddatadimensionsintensities.tsv new file mode 100644 index 00000000000..80b31c6ffcc --- /dev/null +++ b/scripts/api/data/metadatablocks/rubric_trusteddatadimensionsintensities.tsv @@ -0,0 +1,28 @@ +#metadataBlock name dataverseAlias displayName blockURI + rubric_trusteddatadimensionsintensities Trusted Data Dimensions and Intensities +#datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable displayoncreate required parent metadatablock_id termURI + authorAndProvenance Author and Provenance The level of trust in the data creators and in other provenance information text 1 TRUE TRUE FALSE TRUE FALSE FALSE rubric_trusteddatadimensionsintensities + integrityAndUsability Integrity and Usability The level of trust in the accuracy, completeness, and ease of use of the data text 2 TRUE TRUE FALSE TRUE FALSE FALSE rubric_trusteddatadimensionsintensities + fitnessForScopeAndContextualRelevance Fitness for Scope and Contextual Relevance The level of trust in the suitability of the data for specific contexts, questions, or policy applications text 3 TRUE TRUE FALSE TRUE FALSE FALSE rubric_trusteddatadimensionsintensities + licensingAndLegalClarity Licensing and Legal Clarity The level of trust in the explicitness of the data’s usage rights and their compliance with relevant laws and regulations text 4 TRUE TRUE FALSE TRUE FALSE FALSE rubric_trusteddatadimensionsintensities + transparencyOfMethodsAndDocumentation Transparency of Methods and Documentation The level of trust in the clarity of the descriptions of data collection and processing methods text 5 TRUE TRUE FALSE TRUE FALSE FALSE rubric_trusteddatadimensionsintensities + biasEquityAndRepresentativeness Bias, Equity, and Representativeness The level of trust in the inclusivity and fairness of the coverage of the data text 6 TRUE TRUE FALSE TRUE FALSE FALSE rubric_trusteddatadimensionsintensities +#controlledVocabulary DatasetField Value identifier displayOrder + authorAndProvenance Low 0 + authorAndProvenance Medium 1 + authorAndProvenance High 2 + integrityAndUsability Low 0 + integrityAndUsability Medium 1 + integrityAndUsability High 2 + fitnessForScopeAndContextualRelevance Low 0 + fitnessForScopeAndContextualRelevance Medium 1 + fitnessForScopeAndContextualRelevance High 2 + licensingAndLegalClarity Low 0 + licensingAndLegalClarity Medium 1 + licensingAndLegalClarity High 2 + transparencyOfMethodsAndDocumentation Low 0 + transparencyOfMethodsAndDocumentation Medium 1 + transparencyOfMethodsAndDocumentation High 2 + biasEquityAndRepresentativeness Low 0 + biasEquityAndRepresentativeness Medium 1 + biasEquityAndRepresentativeness High 2 diff --git a/scripts/dev/dev-down-frd.sh b/scripts/dev/dev-down-frd.sh new file mode 100755 index 00000000000..13eba9420f5 --- /dev/null +++ b/scripts/dev/dev-down-frd.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Ensure we're in project root +cd "$(dirname "${BASH_SOURCE[0]}")/../.." + +echo "Stopping and removing dev containers..." + +# Use override file if it exists (for local customizations like memory limits) +if [ -f docker-compose.override.yml ]; then + docker compose -f docker-compose-dev.yml -f docker-compose.override.yml down +else + docker compose -f docker-compose-dev.yml down +fi + +echo "" +echo "✓ Dev environment stopped" +echo " To restart: ./scripts/dev/dev-start-frd.sh" +echo " To clean volumes: sudo rm -rf docker-dev-volumes/" diff --git a/scripts/dev/dev-frd.sh b/scripts/dev/dev-frd.sh new file mode 100755 index 00000000000..e7eacab661b --- /dev/null +++ b/scripts/dev/dev-frd.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +set -euo pipefail + +cd "$(dirname "${BASH_SOURCE[0]}")/../.." + +# Verify dev environment is running +if ! docker ps --filter "name=dev_dataverse" --filter "status=running" -q | grep -q .; then + echo "Error: dev_dataverse container not running." >&2 + echo "Run './scripts/dev/dev-start-frd.sh' first to set up the environment." >&2 + exit 1 +fi + +echo "Compiling Dataverse sources..." +mvn -T 1C -DskipTests -DskipUnitTests -DskipIntegrationTests compile >/dev/null + +if [ ! -d "target/classes" ]; then + echo "ERROR: target/classes missing after compile." >&2 + exit 1 +fi + +echo "Syncing compiled classes..." +# --delete removes files in dest not present in source +rsync -a --delete --exclude 'META-INF/persistence.xml' \ + target/classes/ target/dataverse/WEB-INF/classes/ + +if [ -d "src/main/webapp" ]; then + echo "Syncing webapp resources..." + rsync -a --delete \ + --exclude 'WEB-INF/classes' \ + --exclude 'WEB-INF/lib' \ + src/main/webapp/ target/dataverse/ +fi + +echo "Redeploying to Payara..." +docker exec dev_dataverse /bin/bash -lc ' + printf "AS_ADMIN_PASSWORD=%s\n" admin > /tmp/pwdfile; + asadmin --user admin --passwordfile /tmp/pwdfile \ + deploy --force --upload=false /opt/payara/deployments/dataverse 2>&1 \ + | grep -v "PER01001\|PER01003\|Command deploy completed with warnings"; + rm /tmp/pwdfile' + +echo "" +echo "✓ Fast redeploy complete (~12s)" +echo " Test your changes at http://localhost:8080" diff --git a/scripts/dev/dev-start-frd.sh b/scripts/dev/dev-start-frd.sh new file mode 100755 index 00000000000..d113f677bad --- /dev/null +++ b/scripts/dev/dev-start-frd.sh @@ -0,0 +1,68 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Ensure we're in project root +cd "$(dirname "${BASH_SOURCE[0]}")/../.." + +echo "Building Dataverse WAR for fast redeploy..." +mvn -T 1C -DskipTests -DskipUnitTests -DskipIntegrationTests clean package + +echo "Extracting WAR into target/dataverse/..." +mkdir -p target/dataverse +unzip -oq target/dataverse-*.war -d target/dataverse/ + +# Check if database is already initialized (before creating directories) +# If postgres has initialized, the data dir will have restrictive permissions (0700) +# On first run, the directory either doesn't exist or has default permissions +DB_INITIALIZED=false +if [ -d "docker-dev-volumes/postgresql/data" ]; then + # Try to list the directory - if permission denied, it means postgres owns it (initialized) + if ! ls docker-dev-volumes/postgresql/data >/dev/null 2>&1; then + DB_INITIALIZED=true + fi +fi + +echo "Ensuring docker volume mount points exist..." +mkdir -p docker-dev-volumes/app/data +mkdir -p docker-dev-volumes/app/secrets +mkdir -p docker-dev-volumes/postgresql/data +mkdir -p docker-dev-volumes/solr/data +mkdir -p docker-dev-volumes/solr/conf +mkdir -p docker-dev-volumes/minio_storage + +# Only disable DDL generation if database is already initialized +# (on first run, we need create-tables to bootstrap the schema) +if [ "$DB_INITIALIZED" = true ]; then + echo "Detected existing database - disabling DDL generation to preserve schema..." + sed -i.bak 's/\(eclipselink.ddl-generation" value="\)create-tables/\1none/' \ + target/dataverse/WEB-INF/classes/META-INF/persistence.xml + rm -f target/dataverse/WEB-INF/classes/META-INF/persistence.xml.bak +else + echo "First-time setup detected - keeping DDL generation enabled for schema creation..." +fi + +echo "Starting dev stack (SKIP_DEPLOY=1)..." +export SKIP_DEPLOY=1 +# Use override file if it exists (for local customizations like memory limits) +if [ -f docker-compose.override.yml ]; then + docker compose -f docker-compose-dev.yml -f docker-compose.override.yml up -d +else + docker compose -f docker-compose-dev.yml up -d +fi + +echo "Waiting for Payara to be ready..." +until curl -sf http://localhost:8080/ >/dev/null 2>&1; do + sleep 2 +done + +echo "Deploying exploded WAR..." +docker exec dev_dataverse /bin/bash -lc ' + printf "AS_ADMIN_PASSWORD=%s\n" admin > /tmp/pwdfile; + asadmin --user admin --passwordfile /tmp/pwdfile \ + deploy --upload=false /opt/payara/deployments/dataverse 2>&1 \ + | grep -v "PER01001\|PER01003\|Command deploy completed with warnings"; + rm /tmp/pwdfile' + +echo "" +echo "✓ Fast redeploy environment ready!" +echo " Next: Make code changes, then run './scripts/dev/dev-frd.sh' to redeploy (~12s)" diff --git a/scripts/dev/release-dates/AGENTS.md b/scripts/dev/release-dates/AGENTS.md new file mode 100644 index 00000000000..029c081e7b2 --- /dev/null +++ b/scripts/dev/release-dates/AGENTS.md @@ -0,0 +1,16 @@ +- Write the script in pure Python with no dependencies. +- Work backward from the provided Release Date, which we prefer to be on a Wednesday. +- Curation Team Review starts on the Monday prior. After the date, put "(allocate 5 full days)". +- Code Freeze starts on the Thursday prior. +- Core PR Last Call starts on the Thursday prior. +- Community PR Last Call starts on the Thursday prior. +- Start starts three months before the Release Date, on a Thursday. It's ok to put "??" for the sprint number. + +Here's some example output: + +- Start: Sprint ??, 2026-03-26 +- Community PR Last Call: 2026-05-21 +- Core PR Last Call: 2026-05-28 +- Code Freeze: 2026-06-04 +- Curation Team Review: 2026-06-08 (allocate 5 full days) +- Release Date: 2026-06-17 \ No newline at end of file diff --git a/scripts/dev/release-dates/generate_release_dates.py b/scripts/dev/release-dates/generate_release_dates.py new file mode 100755 index 00000000000..67de60af02c --- /dev/null +++ b/scripts/dev/release-dates/generate_release_dates.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 + +"""Generate release milestone dates from a release date.""" + +from __future__ import annotations + +import sys +from datetime import date, timedelta + + +USAGE = "Usage: generate_release_dates.py YYYY-MM-DD" + + +def parse_release_date(value: str) -> date: + try: + return date.fromisoformat(value) + except ValueError as exc: + raise SystemExit(f"Invalid date '{value}'. Expected YYYY-MM-DD.") from exc + + +def previous_thursday(reference: date) -> date: + days_since_thursday = (reference.weekday() - 3) % 7 + return reference - timedelta(days=days_since_thursday or 7) + + +def build_schedule(release_date: date) -> list[tuple[str, str]]: + code_freeze = previous_thursday(release_date - timedelta(days=7)) + core_pr_last_call = code_freeze - timedelta(days=7) + community_pr_last_call = core_pr_last_call - timedelta(days=7) + curation_team_review = code_freeze + timedelta(days=4) + start = community_pr_last_call - timedelta(days=56) + + return [ + ("Start", f"Sprint ??, {start.isoformat()}"), + ("Community PR Last Call", community_pr_last_call.isoformat()), + ("Core PR Last Call", core_pr_last_call.isoformat()), + ("Code Freeze", code_freeze.isoformat()), + ( + "Curation Team Review", + f"{curation_team_review.isoformat()} (allocate 5 full days)", + ), + ("Release Date", release_date.isoformat()), + ] + + +def main(argv: list[str]) -> int: + if len(argv) != 2 or argv[1] in {"-h", "--help"}: + print(USAGE, file=sys.stderr if len(argv) != 2 else sys.stdout) + return 1 if len(argv) != 2 else 0 + + release_date = parse_release_date(argv[1]) + + if release_date.weekday() != 2: + print( + "Warning: Release Date is usually expected to be a Wednesday.", + file=sys.stderr, + ) + + for label, value in build_schedule(release_date): + print(f"- {label}: {value}") + + return 0 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv)) diff --git a/scripts/installer/installUtils.py b/scripts/installer/installUtils.py index ff5e6eb708d..ddbdd2c7167 100644 --- a/scripts/installer/installUtils.py +++ b/scripts/installer/installUtils.py @@ -57,7 +57,7 @@ def test_appserver_directory(directory): #print("version: major: "+str(major_version)+", minor: "+str(minor_version)) - if major_version != 6 or minor_version < 2023: + if major_version != 7 or minor_version < 2026: return False return True diff --git a/src/main/java/edu/harvard/iq/dataverse/DataCitation.java b/src/main/java/edu/harvard/iq/dataverse/DataCitation.java index 57734911470..0a1de041698 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataCitation.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataCitation.java @@ -5,55 +5,40 @@ */ package edu.harvard.iq.dataverse; +import de.undercouch.citeproc.csl.CSLItemDataBuilder; +import de.undercouch.citeproc.csl.CSLName; +import de.undercouch.citeproc.csl.CSLNameBuilder; +import de.undercouch.citeproc.csl.CSLType; +import de.undercouch.citeproc.helper.json.JsonBuilder; +import de.undercouch.citeproc.helper.json.StringJsonBuilderFactory; import edu.harvard.iq.dataverse.branding.BrandingUtil; import edu.harvard.iq.dataverse.dataset.DatasetType; import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; import edu.harvard.iq.dataverse.pidproviders.AbstractPidProvider; +import edu.harvard.iq.dataverse.util.BundleUtil; +import edu.harvard.iq.dataverse.util.DateUtil; +import edu.harvard.iq.dataverse.util.PersonOrOrgUtil; +import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.json.JsonUtil; +import jakarta.ejb.EJBException; +import jakarta.json.JsonObject; +import jakarta.ws.rs.core.MediaType; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.text.StringEscapeUtils; -import java.io.BufferedWriter; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.OutputStream; -import java.io.OutputStreamWriter; -import java.io.Writer; +import javax.xml.stream.XMLOutputFactory; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamWriter; +import java.io.*; import java.nio.charset.StandardCharsets; import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Date; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; import java.util.logging.Level; import java.util.logging.Logger; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Collectors; -import jakarta.ejb.EJBException; -import jakarta.json.JsonObject; -import jakarta.ws.rs.core.MediaType; - -import javax.xml.stream.XMLOutputFactory; -import javax.xml.stream.XMLStreamException; -import javax.xml.stream.XMLStreamWriter; - -import edu.harvard.iq.dataverse.util.BundleUtil; -import edu.harvard.iq.dataverse.util.DateUtil; -import edu.harvard.iq.dataverse.util.PersonOrOrgUtil; -import edu.harvard.iq.dataverse.util.SystemConfig; -import edu.harvard.iq.dataverse.util.json.JsonUtil; - -import org.apache.commons.text.StringEscapeUtils; - -import de.undercouch.citeproc.csl.CSLItemDataBuilder; -import de.undercouch.citeproc.csl.CSLName; -import de.undercouch.citeproc.csl.CSLNameBuilder; -import de.undercouch.citeproc.csl.CSLType; -import de.undercouch.citeproc.helper.json.JsonBuilder; -import de.undercouch.citeproc.helper.json.StringJsonBuilderFactory; - -import org.apache.commons.lang3.StringUtils; - import static edu.harvard.iq.dataverse.pidproviders.doi.AbstractDOIProvider.DOI_PROTOCOL; import static edu.harvard.iq.dataverse.pidproviders.handle.HandlePidProvider.HDL_PROTOCOL; import static edu.harvard.iq.dataverse.pidproviders.perma.PermaLinkPidProvider.PERMA_PROTOCOL; @@ -99,6 +84,15 @@ public enum Format { BibTeX, CSL } + + public static Format getFormat(String name) { + for (Format format : Format.values()) { + if (format.name().equalsIgnoreCase(name)) { + return format; + } + } + return null; + } public DataCitation(DatasetVersion dsv) { this(dsv, false); @@ -141,7 +135,7 @@ public DataCitation(FileMetadata fm, boolean direct) { getCommonValuesFrom(dsv); // file Title for direct File citation - fileTitle = fm.getLabel(); + fileTitle = fm.getLabelForOriginal(); DataFile df = fm.getDataFile(); // File description @@ -880,7 +874,7 @@ private void getAuthorsAndProducersFrom(DatasetVersion dsv) { cslAuthors.add(new CSLNameBuilder().given(givenName).family(familyName).isInstitution(false).build()); } else { cslAuthors.add( - new CSLNameBuilder().literal(formatString(authorJson.getString("fullName"), true)).isInstitution(false).build()); + new CSLNameBuilder().literal(formatString(authorJson.getString("fullName"), true)).isInstitution(true).build()); } } } diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFile.java b/src/main/java/edu/harvard/iq/dataverse/DataFile.java index 8a08cd15029..6e92f71547a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFile.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFile.java @@ -80,7 +80,7 @@ public class DataFile extends DvObject implements Comparable { private static final Logger logger = Logger.getLogger(DatasetPage.class.getCanonicalName()); private static final long serialVersionUID = 1L; - public static final String TARGET_URL = "/file.xhtml?persistentId="; + public static final String TARGET_URL = "/citation?persistentId="; public static final char INGEST_STATUS_NONE = 65; public static final char INGEST_STATUS_SCHEDULED = 66; public static final char INGEST_STATUS_INPROGRESS = 67; @@ -247,11 +247,31 @@ public String toUri() { inverseJoinColumns = @JoinColumn(name = "authenticated_user_id")) private List fileAccessRequesters; - - public List getFileAccessRequests(){ - return fileAccessRequests; + public List getFileAccessRequests() { + return getFileAccessRequests(0, 0); } - + + /** + * Get Requests with pagination option + * @param numResultsPerPageRequested + * @param paginationStart starts at 1 + * @return + */ + public List getFileAccessRequests(int numResultsPerPageRequested, int paginationStart) { + if (numResultsPerPageRequested < 1 || paginationStart < 1) { + return fileAccessRequests; + } else { + int startIndex = (paginationStart - 1) * numResultsPerPageRequested; + int endIndex = startIndex + numResultsPerPageRequested; + if (startIndex >= fileAccessRequests.size()) { + return List.of(); + } else if (endIndex > fileAccessRequests.size()) { + endIndex = fileAccessRequests.size(); + } + return fileAccessRequests.subList(startIndex, endIndex); + } + } + public List getFileAccessRequests(FileAccessRequest.RequestState state){ return fileAccessRequests.stream().filter(far -> far.getState() == state).collect(Collectors.toList()); } @@ -527,6 +547,11 @@ public Long getOriginalFileSize() { return null; } + public String getFriendlyOriginalFileSize() { + Long size = (getOriginalFileSize()==null) ? filesize : getOriginalFileSize(); + return getFriendlySize(size); + } + public String getOriginalFileName() { if (isTabularData()) { DataTable dataTable = getDataTable(); @@ -539,7 +564,7 @@ public String getOriginalFileName() { } - private String getDerivedOriginalFileName() { + public String getDerivedOriginalFileName() { FileMetadata fm = getFileMetadata(); String filename = fm.getLabel(); String originalExtension = FileUtil.generateOriginalExtension(getOriginalFileFormat()); @@ -689,8 +714,12 @@ public void setFilesize(long filesize) { * @return */ public String getFriendlySize() { - if (filesize != null) { - return FileSizeChecker.bytesToHumanReadable(filesize); + return getFriendlySize(filesize); + } + + private String getFriendlySize(Long size) { + if (size != null) { + return FileSizeChecker.bytesToHumanReadable(size); } else { return BundleUtil.getStringFromBundle("file.sizeNotAvailable"); } @@ -849,6 +878,15 @@ public void addFileAccessRequest(FileAccessRequest request) { this.fileAccessRequests.add(request); } + public List getAccessRequestsForAssignee(RoleAssignee roleAssignee) { + if (this.fileAccessRequests == null) { + return null; + } + + return this.fileAccessRequests.stream() + .filter(fileAccessRequest -> fileAccessRequest.getRequester().equals(roleAssignee)).toList(); + } + public FileAccessRequest getAccessRequestForAssignee(RoleAssignee roleAssignee) { if (this.fileAccessRequests == null) { return null; diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataset.java b/src/main/java/edu/harvard/iq/dataverse/Dataset.java index 4649636aee6..39dccdcd4ea 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataset.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataset.java @@ -662,6 +662,10 @@ public String getIdentifierForFileStorage(){ return retVal; } + public String getGlobalIdForFileStorageAsString() { + return getProtocolForFileStorage() + ":" + getAuthorityForFileStorage() + "/" + getIdentifierForFileStorage(); + } + public String getNextMajorVersionString() { // Never need to get the next major version for harvested studies. if (isHarvested()) { @@ -759,6 +763,10 @@ public void setThumbnailFile(DataFile thumbnailFile) { this.thumbnailFile = thumbnailFile; } + public String getThumbnailUrl() { + return thumbnailFile != null ? SystemConfig.getDataverseSiteUrlStatic() + "/api/datasets/" + this.getId() + "/logo" : null; + } + public boolean isUseGenericThumbnail() { return useGenericThumbnail; } diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldCompoundValue.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldCompoundValue.java index a48e110fb45..b09d495714b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldCompoundValue.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldCompoundValue.java @@ -177,12 +177,13 @@ public Map getDisplayValueMap() { if (StringUtils.isBlank(format)) { format = "#VALUE"; } + String sanitizedValue = childDatasetField.getDatasetFieldType().isSanitizeHtml() ? MarkupChecker.sanitizeBasicHTML(childDatasetField.getValue()) : childDatasetField.getValue(); if (!childDatasetField.getDatasetFieldType().isSanitizeHtml() && childDatasetField.getDatasetFieldType().isEscapeOutputText()){ sanitizedValue = MarkupChecker.stripAllTags(sanitizedValue); } //if a series of child values is comma delimited we want to strip off the final entry's comma - if (format.equals("#VALUE, ")) fixTrailingComma = true; + if (format.trim().equals("#VALUE,")) fixTrailingComma = true; // replace the special values in the format (note: we replace #VALUE last since we don't // want any issues if the value itself has #NAME in it) @@ -247,9 +248,17 @@ private Map removeLastComma(Map mapI keyVal = entry.getKey(); oldValue = entry.getValue(); } - + + String newValue = oldValue; + if (keyVal != null && oldValue != null && oldValue.length() >= 2) { - String newValue = oldValue.substring(0, oldValue.length() - 2); + //To take into account both versions of the tsv for display value + if (oldValue.endsWith(", ")) { + newValue = oldValue.substring(0, oldValue.length() - 2); + } else if (oldValue.endsWith(",")) { + newValue = oldValue.substring(0, oldValue.length() - 1); + } + mapIn.replace(keyVal, oldValue, newValue); } diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValidator.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValidator.java index 6d3fda2812d..96378e87795 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValidator.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValidator.java @@ -59,10 +59,26 @@ public boolean isValid(DatasetField value, ConstraintValidatorContext context) { } // if value is not primitive or not empty - if (!dsfType.isPrimitive() || !StringUtils.isBlank(value.getValue())) { + // For controlled vocabulary fields, check that actual CV values are selected, + // not just that datasetFieldValues contains something (which might be an invalid N/A placeholder) + // See https://github.com/IQSS/dataverse/issues/11900 + if (!dsfType.isPrimitive()) { return true; } - + + if (dsfType.isControlledVocabulary()) { + // For CV fields, check if there are actual controlled vocabulary values selected + if (value.getControlledVocabularyValues() != null && !value.getControlledVocabularyValues().isEmpty()) { + return true; + } + // If no CV values, fall through to required field check below + } else { + // For non-CV primitive fields, check if value is not blank + if (!StringUtils.isBlank(value.getValue())) { + return true; + } + } + if (value.isRequired()) { String errorMessage = null; DatasetFieldCompoundValue parent = value.getParentDatasetFieldCompoundValue(); diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 3b1c8c4f3c4..e79de5c1d8d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -32,6 +32,7 @@ import edu.harvard.iq.dataverse.engine.command.impl.DeleteDatasetVersionCommand; import edu.harvard.iq.dataverse.engine.command.impl.DeletePrivateUrlCommand; import edu.harvard.iq.dataverse.engine.command.impl.DestroyDatasetCommand; +import edu.harvard.iq.dataverse.engine.command.impl.GetDatasetReviewsCommand; import edu.harvard.iq.dataverse.engine.command.impl.GetPrivateUrlCommand; import edu.harvard.iq.dataverse.engine.command.impl.LinkDatasetCommand; import edu.harvard.iq.dataverse.engine.command.impl.PublishDatasetCommand; @@ -104,6 +105,7 @@ import jakarta.inject.Inject; import jakarta.inject.Named; import jakarta.json.Json; +import jakarta.json.JsonObject; import jakarta.json.JsonObjectBuilder; import jakarta.persistence.OptimisticLockException; @@ -111,7 +113,6 @@ import org.primefaces.event.FileUploadEvent; import org.primefaces.model.file.UploadedFile; -import jakarta.validation.ConstraintViolation; import java.util.Arrays; import java.util.HashSet; import jakarta.faces.model.SelectItem; @@ -134,6 +135,7 @@ import edu.harvard.iq.dataverse.externaltools.ExternalToolServiceBean; import edu.harvard.iq.dataverse.globus.GlobusServiceBean; import edu.harvard.iq.dataverse.export.SchemaDotOrgExporter; +import edu.harvard.iq.dataverse.export.croissant.CroissantExportUtil; import edu.harvard.iq.dataverse.externaltools.ExternalToolHandler; import edu.harvard.iq.dataverse.license.License; import edu.harvard.iq.dataverse.makedatacount.MakeDataCountLoggingServiceBean; @@ -2139,9 +2141,15 @@ private String init(boolean initFull) { return permissionsWrapper.notFound(); } - // Check permisisons - if (!(workingVersion.isReleased() || workingVersion.isDeaccessioned()) && !this.canViewUnpublishedDataset()) { - return permissionsWrapper.notAuthorized(); + // Check permissions + boolean releasedAndCanView = workingVersion.isReleased() && (!dataset.isLocallyFAIR() || permissionsWrapper + .hasLocallyFAIRAccess(dvRequestService.getDataverseRequest(), dataset)); + if (!(releasedAndCanView || workingVersion.isDeaccessioned()) && !this.canViewUnpublishedDataset()) { + if (dataset.isLocallyFAIR()) { + return permissionsWrapper.notFound(); + } else { + return permissionsWrapper.notAuthorized(); + } } if (retrieveDatasetVersionResponse != null && !retrieveDatasetVersionResponse.wasRequestedVersionRetrieved()) { @@ -4039,8 +4047,8 @@ public String save() { dataset.setOwner(ownerId != null ? dataverseService.find(ownerId) : null); } // Validate - Set constraintViolations = workingVersion.validate(); - if (!constraintViolations.isEmpty()) { + workingVersion.validate(); // add validation messages to dataset fields + if (!workingVersion.isValid()) { FacesContext.getCurrentInstance().validationFailed(); return ""; } @@ -4870,11 +4878,16 @@ public Boolean isDatasetPublishPopupCustomTextOnAllVersions(){ return settingsWrapper.isTrueForKey(SettingsServiceBean.Key.DatasetPublishPopupCustomTextOnAllVersions, false); } - public String getVariableMetadataURL(Long fileid) { - String myHostURL = getDataverseSiteUrl(); - String metaURL = myHostURL + "/api/meta/datafile/" + fileid; + public boolean isDisplaySubmitForReviewPopupCustomText() { + return !getDatasetSubmitForReviewCustomText().isEmpty(); + } - return metaURL; + public String getDatasetSubmitForReviewCustomText(){ + String datasetSubmitForReviewCustomText = settingsWrapper.getValueForKey(SettingsServiceBean.Key.DatasetSubmitForReviewPopupCustomText); + if (datasetSubmitForReviewCustomText != null && !datasetSubmitForReviewCustomText.isEmpty()) { + return datasetSubmitForReviewCustomText; + } + return ""; } public String getTabularDataFileURL(Long fileid) { @@ -6106,6 +6119,20 @@ public String getCroissant() { final String CROISSANT_SCHEMA_NAME = "croissantSlim"; ExportService instance = ExportService.getInstance(); String croissant = instance.getLatestPublishedAsString(dataset, CROISSANT_SCHEMA_NAME); + if (FeatureFlags.CROISSANT_WITH_LOCAL_REVIEWS.enabled()) { + // Rewrite the export on the fly and insert local reviews until we have a solution for https://github.com/gdcc/dataverse-spi/issues/5 + JsonObjectBuilder reviewsJsonObj = null; + try { + reviewsJsonObj = commandEngine.submit(new GetDatasetReviewsCommand(dvRequestService.getDataverseRequest(), dataset)); + JsonObjectBuilder reviews = CroissantExportUtil.getReviews(reviewsJsonObj); + JsonObject croissantJson = JsonUtil.getJsonObject(croissant); + String updatedContent = Json.createObjectBuilder(croissantJson) + .add("reviews", reviews.build().getJsonArray("reviews")).build().toString(); + return updatedContent; + } catch (CommandException e) { + logger.fine("Couldn't get reviews"); + } + } if (croissant != null && !croissant.isEmpty()) { logger.fine("Returning cached CROISSANT."); return croissant; @@ -6373,6 +6400,7 @@ public void setFileMetadataForAction(FileMetadata fileMetadataForAction) { private String termsOfAccess; private boolean fileAccessRequest; private boolean publishDisclaimerAcknowledged; + private boolean submitForReviewDisclaimerAcknowledged; public String getTermsOfAccess() { return termsOfAccess; @@ -6398,6 +6426,14 @@ public void setPublishDisclaimerAcknowledged(boolean publishDisclaimerAcknowledg this.publishDisclaimerAcknowledged = publishDisclaimerAcknowledged; } + public boolean isSubmitForReviewDisclaimerAcknowledged() { + return submitForReviewDisclaimerAcknowledged || !settingsWrapper.isHasSubmitForReviewDatasetDisclaimerText(); + } + + public void setSubmitForReviewDisclaimerAcknowledged(boolean submitForReviewDisclaimerAcknowledged) { + this.submitForReviewDisclaimerAcknowledged = submitForReviewDisclaimerAcknowledged; + } + // wrapper method to see if the file has been deleted (or replaced) in the current version public boolean isFileDeleted (DataFile dataFile) { if (dataFile.getDeleted() == null) { @@ -7005,4 +7041,4 @@ public void setRequestedCSL(String requestedCSL) { public void validateEmbargoReason(FacesContext context, UIComponent component, Object value) { FileUtil.validateEmbargoReason(context, component, value, removeEmbargo); } -} \ No newline at end of file +} diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index 0988439d800..dc09d8948a8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -835,6 +835,18 @@ public String getReminderString(Dataset dataset, boolean canPublishDataset, bool String reminderString; + if (dataset.getOwner().getEffectiveRequiresFilesToPublishDataset()) { + List files = dataset.getLatestVersion().getFileMetadatas(); + if (files.size() < 1) { + if (canPublishDataset) { + reminderString = BundleUtil.getStringFromBundle("dataset.mayNotPublish.FilesRequired"); + } else { + reminderString = BundleUtil.getStringFromBundle("dataset.mayNotSubmitForReview.FilesRequired"); + } + return reminderString; + } + } + if (canPublishDataset) { reminderString = BundleUtil.getStringFromBundle("dataset.message.publish.warning"); } else { diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 2b4844e4d16..c45ddcd62cb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -203,6 +203,9 @@ public enum VersionState { //The Json version of the archivalCopyLocation string @Transient private JsonObject archivalCopyLocationJson; + + @Transient + private Boolean hasFiles = null; public Long getId() { return this.id; @@ -2192,5 +2195,12 @@ public void setVersionNote(String note) { this.versionNote = note; } + + public Boolean hasFiles() { + return hasFiles; + } + public void setHasFiles(Boolean hasFiles) { + this.hasFiles = hasFiles; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java index a5dd724104f..95443c35e05 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java @@ -35,7 +35,6 @@ import jakarta.json.JsonObjectBuilder; import jakarta.persistence.EntityManager; import jakarta.persistence.NoResultException; -import jakarta.persistence.OptimisticLockException; import jakarta.persistence.PersistenceContext; import jakarta.persistence.Query; import jakarta.persistence.TypedQuery; @@ -892,7 +891,7 @@ public Long getThumbnailByVersionId(Long versionId) { if (thumbnailFileId != null) { logger.fine("obtained file id: " + thumbnailFileId); - DataFile thumbnailFile = datafileService.find(thumbnailFileId); + DataFile thumbnailFile = getDataFileById(thumbnailFileId); if (thumbnailFile != null) { if (datafileService.isThumbnailAvailable(thumbnailFile)) { assignDatasetThumbnailByNativeQuery(versionId, thumbnailFileId); @@ -925,7 +924,7 @@ public Long getThumbnailByVersionId(Long versionId) { } if (thumbnailFileId != null) { - DataFile thumbnailFile = datafileService.find(thumbnailFileId); + DataFile thumbnailFile = getDataFileById(thumbnailFileId); if (thumbnailFile != null) { if (datafileService.isThumbnailAvailable(thumbnailFile)) { assignDatasetThumbnailByNativeQuery(versionId, thumbnailFileId); @@ -937,7 +936,11 @@ public Long getThumbnailByVersionId(Long versionId) { } return null; } - + + public DataFile getDataFileById(Long id) { + return datafileService.find(id); + } + private void assignDatasetThumbnailByNativeQuery(Long versionId, Long dataFileId) { try { em.createNativeQuery("UPDATE dataset SET thumbnailfile_id=" + dataFileId + " WHERE id in (SELECT dataset_id FROM datasetversion WHERE id=" + versionId + ")").executeUpdate(); @@ -1337,6 +1340,15 @@ public Long getDatasetVersionCount(Long datasetId, boolean canViewUnpublishedVer return em.createQuery(cq).getSingleResult(); } + public boolean hasFiles(Long datasetVersionId) { + Query query = em.createNativeQuery("SELECT id FROM fileMetadata WHERE datasetversion_id="+datasetVersionId+" LIMIT 1"); + try { + query.getSingleResult(); + return true; + } catch (NoResultException e) { + return false; + } + } /** * Update the archival copy location for a specific version of a dataset. diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataverse.java b/src/main/java/edu/harvard/iq/dataverse/Dataverse.java index a719e32aa78..31919398530 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataverse.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataverse.java @@ -16,7 +16,9 @@ import java.util.Objects; import java.util.Set; import jakarta.persistence.CascadeType; +import jakarta.persistence.CollectionTable; import jakarta.persistence.Column; +import jakarta.persistence.ElementCollection; import jakarta.persistence.Entity; import jakarta.persistence.EnumType; import jakarta.persistence.Enumerated; @@ -105,7 +107,40 @@ public enum DataverseType { @NotNull(message = "{dataverse.category}") @Column( nullable = false ) private DataverseType dataverseType; - + + + @ElementCollection + @CollectionTable(name = "dataverse_locallyfairassignees", + joinColumns = @JoinColumn(name = "dataverse_id")) + @Column(name = "assigneeidentifier") + private Set locallyFAIRRoleAssigneeIdentifiers = new HashSet<>(); + + @Override + public Set getLocallyFAIRRoleAssigneeIdentifiers() { + return locallyFAIRRoleAssigneeIdentifiers; + } + + public void setLocallyFAIRRoleAssigneeIdentifiers(Set roleAssigneeIdentifiers) { + this.locallyFAIRRoleAssigneeIdentifiers = roleAssigneeIdentifiers; + } + + public void addLocallyFAIRRoleAssignee(String assigneeIdentifier) { + if (locallyFAIRRoleAssigneeIdentifiers == null) { + locallyFAIRRoleAssigneeIdentifiers = new HashSet<>(); + } + locallyFAIRRoleAssigneeIdentifiers.add(assigneeIdentifier); + } + + public void removeLocallyFAIRRoleAssignee(String assigneeIdentifier) { + if (locallyFAIRRoleAssigneeIdentifiers != null) { + locallyFAIRRoleAssigneeIdentifiers.remove(assigneeIdentifier); + } + } + + public boolean LocallyFAIR(String assigneeIdentifier) { + return locallyFAIRRoleAssigneeIdentifiers != null && locallyFAIRRoleAssigneeIdentifiers.contains(assigneeIdentifier); + } + /** * When {@code true}, users are not granted permissions the got for parent * dataverses. @@ -907,7 +942,7 @@ public boolean isAncestorOf( DvObject other ) { } return false; } - + public String getLocalURL() { return SystemConfig.getDataverseSiteUrlStatic() + "/dataverse/" + this.getAlias(); } @@ -924,4 +959,10 @@ public void addInputLevelsMetadataBlocksIfNotPresent(List block.getId().equals(metadataBlock.getId())); } + + @Override + public boolean isLocallyFAIR() { + return !locallyFAIRRoleAssigneeIdentifiers.isEmpty(); + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/DataversePage.java b/src/main/java/edu/harvard/iq/dataverse/DataversePage.java index d9cafbf421a..a8fca620fbe 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataversePage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataversePage.java @@ -2,6 +2,7 @@ import edu.harvard.iq.dataverse.UserNotification.Type; import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.authorization.RoleAssignee; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.dataaccess.DataAccess; @@ -43,13 +44,17 @@ import java.util.AbstractMap; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Map.Entry; +import java.util.Objects; import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; +import java.util.stream.Collectors; + import jakarta.faces.component.UIComponent; import jakarta.faces.component.UIInput; import org.primefaces.model.DualListModel; @@ -122,6 +127,8 @@ public enum LinkMode { PidProviderFactoryBean pidProviderFactoryBean; @EJB CacheFactoryBean cacheFactory; + @EJB + RoleAssigneeServiceBean roleAssigneeService; private Dataverse dataverse = new Dataverse(); @@ -141,6 +148,7 @@ public enum LinkMode { private List linkingDVSelectItems; private Dataverse linkingDataverse; private List selectedSubjects; + private List locallyFAIRRoleAssigneesList; public List getSelectedSubjects() { return selectedSubjects; @@ -340,13 +348,17 @@ public String init() { } } - // check if dv exists and user has permission - if (dataverse == null) { - return permissionsWrapper.notFound(); - } - if (!dataverse.isReleased() && !permissionService.on(dataverse).has(Permission.ViewUnpublishedDataverse)) { - // the permission lookup above should probably be moved into the permissionsWrapper -- L.A. 5.7 - return permissionsWrapper.notAuthorized(); + // Check permissions for unreleased dataverse and Locally FAIR permissions for released dataverses + boolean releasedAndCanView = dataverse.isReleased() && (!dataverse.isLocallyFAIR() || permissionsWrapper + .hasLocallyFAIRAccess(dvRequestService.getDataverseRequest(), dataverse)); + + if (!releasedAndCanView && !permissionService.on(dataverse).has(Permission.ViewUnpublishedDataverse)) { + // Return notFound for FAIR-restricted content, notAuthorized otherwise + if (dataverse.isLocallyFAIR()) { + return permissionsWrapper.notFound(); + } else { + return permissionsWrapper.notAuthorized(); + } } ownerId = dataverse.getOwner() != null ? dataverse.getOwner().getId() : null; @@ -1346,6 +1358,20 @@ public void updateDisplayOnCreate(Long mdbId, Long dsftId, boolean currentValue) } } } + /** + * Returns role assignees matching the search query, while excluding any assignees + * that are already associated with this dataverse through locally FAIR role assignment. + * + * @param query search text used to filter possible role assignees + * @return matching role assignees that can still be added to the dataverse + */ + public List completeRoleAssignee( String query ) { + List existingAssignees = dataverse.getLocallyFAIRRoleAssigneeIdentifiers().stream() + .map(id -> roleAssigneeService.getRoleAssignee(id)) + .filter(Objects::nonNull) + .collect(Collectors.toList()); + return roleAssigneeService.filterRoleAssignees(query, dataverse, existingAssignees); + } private void saveInputLevels(List listDFTIL, DatasetFieldType dsft, Dataverse dataverse) { // If the field already has an input level, update it @@ -1368,4 +1394,22 @@ private void saveInputLevels(List listDFTIL, Datas )); } } + + /* Get/set methods to keep the local locallyFARIRoleAssigneesList in sync with the Dataverse's locallyFAIRRoleAssigneeIdentifiers set. + */ + public List getLocallyFAIRRoleAssigneesList() { + if (locallyFAIRRoleAssigneesList == null) { + locallyFAIRRoleAssigneesList = dataverse.getLocallyFAIRRoleAssigneeIdentifiers().stream() + .map(roleAssigneeService::getRoleAssignee) + .filter(Objects::nonNull) + .collect(Collectors.toList()); + } + return locallyFAIRRoleAssigneesList; + } + + public void setLocallyFAIRRoleAssigneesList(List assignees) { + locallyFAIRRoleAssigneesList = (assignees == null) ? Collections.emptyList() : assignees; + dataverse.setLocallyFAIRRoleAssigneeIdentifiers( + locallyFAIRRoleAssigneesList.stream().map(RoleAssignee::getIdentifier).collect(Collectors.toSet())); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObject.java b/src/main/java/edu/harvard/iq/dataverse/DvObject.java index 68ff739a77f..83f21ebab20 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DvObject.java +++ b/src/main/java/edu/harvard/iq/dataverse/DvObject.java @@ -140,6 +140,10 @@ public String visit(DataFile df) { @Column(insertable = false, updatable = false) private String dtype; + public String getDtype() { + return dtype; + } + @OneToMany(mappedBy="dvobject",fetch = FetchType.LAZY,cascade={CascadeType.REMOVE, CascadeType.MERGE, CascadeType.PERSIST, CascadeType.REFRESH}) private List dataverseFeaturedItems; @@ -515,5 +519,23 @@ public void setStorageQuota(StorageQuota storageQuota) { @OneToMany(mappedBy = "definitionPoint",cascade={ CascadeType.REMOVE, CascadeType.MERGE,CascadeType.PERSIST}, orphanRemoval=true) List roleAssignments; - + + /** Whether this object is locally FAIR which is determined by whether it is in a locallyFAIR collection. + * @return {@code true} if this object is locally FAIR and not publicly visible, {@code false} otherwise. + */ + public boolean isLocallyFAIR() { + if( getOwner() != null ) { + return getOwner().isLocallyFAIR(); + } else { + return false; + } + } + + public Set getLocallyFAIRRoleAssigneeIdentifiers() { + if(getOwner() != null) { + return getOwner().getLocallyFAIRRoleAssigneeIdentifiers(); + } else { + return Collections.emptySet(); + } + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java index b70b03dea31..46d17d05363 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java @@ -1706,7 +1706,7 @@ public void requestDirectUploadUrls() { String storageIdentifier = null; try { storageIdentifier = FileUtil.getStorageIdentifierFromLocation(s3io.getStorageLocation()); - urls = s3io.generateTemporaryS3UploadUrls(dataset.getGlobalId().asString(), storageIdentifier, fileSize); + urls = s3io.generateTemporaryS3UploadUrls(dataset.getGlobalIdForFileStorageAsString(), storageIdentifier, fileSize); } catch (IOException io) { logger.warning(io.getMessage()); diff --git a/src/main/java/edu/harvard/iq/dataverse/FileAccessRequest.java b/src/main/java/edu/harvard/iq/dataverse/FileAccessRequest.java index 43463e0cb91..c918c539c40 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FileAccessRequest.java +++ b/src/main/java/edu/harvard/iq/dataverse/FileAccessRequest.java @@ -4,6 +4,7 @@ import java.util.Date; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.util.BundleUtil; import jakarta.persistence.Column; import jakarta.persistence.Entity; import jakarta.persistence.EnumType; @@ -143,6 +144,20 @@ public String getStateLabel() { } return null; } + + // For use by UI to allow for internationalization + public String getStateLabelNationalized() { + if(isStateCreated()) { + return BundleUtil.getStringFromBundle("permission.fileAccess.created"); + } + if(isStateGranted()) { + return BundleUtil.getStringFromBundle("permission.fileAccess.granted"); + } + if(isStateRejected()) { + return BundleUtil.getStringFromBundle("permission.fileAccess.rejected"); + } + return null; + } public void setStateCreated() { this.requestState = RequestState.CREATED; @@ -197,4 +212,4 @@ public boolean equals(Object object) { } -} \ No newline at end of file +} diff --git a/src/main/java/edu/harvard/iq/dataverse/FileLabelValidator.java b/src/main/java/edu/harvard/iq/dataverse/FileLabelValidator.java new file mode 100644 index 00000000000..e54a9e5fc21 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/FileLabelValidator.java @@ -0,0 +1,34 @@ +package edu.harvard.iq.dataverse; + +import jakarta.validation.ConstraintValidator; +import jakarta.validation.ConstraintValidatorContext; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class FileLabelValidator implements ConstraintValidator { + + @Override + public void initialize(ValidateDataFileLabel constraintAnnotation) { + + } + + @Override + public boolean isValid(String value, ConstraintValidatorContext context) { + return isFileLabelValid(value, context); + + } + + public static boolean isFileLabelValid(String value, ConstraintValidatorContext context) { + + if (value == null || value.isEmpty()) { + return true; + } + String validCharacters = "^[^:<>;#/\"\\*\\|\\?\\\\]*$"; + Pattern p = Pattern.compile(validCharacters); + Matcher m = p.matcher(value); + return m.matches(); + + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/FileMetadata.java b/src/main/java/edu/harvard/iq/dataverse/FileMetadata.java index c19bf3659a4..3bcb31970a7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FileMetadata.java +++ b/src/main/java/edu/harvard/iq/dataverse/FileMetadata.java @@ -39,6 +39,7 @@ import jakarta.persistence.NamedNativeQuery; import jakarta.persistence.OneToMany; import jakarta.persistence.OrderBy; +import jakarta.persistence.PostLoad; import jakarta.persistence.SqlResultSetMapping; import jakarta.persistence.Table; import jakarta.persistence.Transient; @@ -153,6 +154,18 @@ public class FileMetadata implements Serializable { @OneToMany (mappedBy="fileMetadata", cascade={ CascadeType.REMOVE, CascadeType.MERGE,CascadeType.PERSIST}) private Collection variableMetadatas; + // A transient field is needed for JSF UI - validation errors on label do not get routed directly to the input for labelNoExtension, causing rollback. + // With a separate transient field kept in sync with label, the validation can be done on the labelNoExtension field, which avoids the issue and allows proper validation. + @Transient + @ValidateDataFileLabel(message = "{filename.illegalCharacters}") + String labelNoExtension; + + // Initialize the labelNoExtension from label after loading the entity + @PostLoad + public void postLoad() { + getLabelNoExtension(); + } + /** * Creates a copy of {@code this}, with identical business logic fields, making the bi-drectional connections to the specified version. * @@ -173,13 +186,46 @@ public FileMetadata createCopyInVersion(DatasetVersion dsv) { } public String getLabel() { + getLabelNoExtension(); return label; } public void setLabel(String label) { this.label = label; + getLabelNoExtension(); + } + + + public String getLabelNoExtension() { + int last = label.lastIndexOf("."); + labelNoExtension = (last == -1) ? label : label.substring(0, last); + return labelNoExtension; + } + + public String getOriginalExtension() { + String origFilename = getLabelForOriginal(); + int last = origFilename.lastIndexOf("."); + return (last == -1) ? "" : origFilename.substring(last); } + public void setLabelNoExtension(String name) { + labelNoExtension = name; + int last = this.label.lastIndexOf("."); + if (last == -1) { + this.label = name; + } else { + this.label = name + this.label.substring(last); + } + } + + public String getLabelForOriginal() { + if(dataFile.isTabularData()) { + return dataFile.getDerivedOriginalFileName(); + } else { + return label; + } + } + public FileMetadata() { variableMetadatas = new ArrayList(); varGroups = new ArrayList(); diff --git a/src/main/java/edu/harvard/iq/dataverse/FilePage.java b/src/main/java/edu/harvard/iq/dataverse/FilePage.java index b08598b2fb8..09dc360e7be 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FilePage.java +++ b/src/main/java/edu/harvard/iq/dataverse/FilePage.java @@ -221,15 +221,21 @@ public String init() { } } - // If this DatasetVersion is unpublished and permission is doesn't have permissions: - // > Go to the Login page - // // Check permissions - Boolean authorized = (fileMetadata.getDatasetVersion().isReleased()) - || (!fileMetadata.getDatasetVersion().isReleased() && this.canViewUnpublishedDataset()); - - if (!authorized) { - return permissionsWrapper.notAuthorized(); + DatasetVersion datasetVersion = fileMetadata.getDatasetVersion(); + Dataset dataset = datasetVersion.getDataset(); + + // Check Locally FAIR permissions for released datasets + boolean releasedAndCanView = datasetVersion.isReleased() && (!file.isLocallyFAIR() || + permissionsWrapper.hasLocallyFAIRAccess(dvRequestService.getDataverseRequest(), file)); + + if (!releasedAndCanView && !canViewUnpublishedDataset()) { + // Return notFound for FAIR-restricted content, notAuthorized otherwise + if (file.isLocallyFAIR()) { + return permissionsWrapper.notFound(); + } else { + return permissionsWrapper.notAuthorized(); + } } //termsOfAccess = fileMetadata.getDatasetVersion().getTermsOfUseAndAccess().getTermsOfAccess(); diff --git a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java index 754fe51714a..8ddda6a2a7e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java @@ -111,10 +111,19 @@ public List findAllIds(Long dataverseId) { } public List findAllByGuestbookId(Long guestbookId) { + return findAllByGuestbookId(guestbookId, null, null); + } + public List findAllByGuestbookId(Long guestbookId, Integer offset, Integer limit) { + if (guestbookId != null) { + TypedQuery query = em.createQuery("select o from GuestbookResponse as o where o.guestbook.id = " + guestbookId + " order by o.responseTime desc", GuestbookResponse.class); + if (offset != null) { + query.setFirstResult(offset); + } + if (limit != null) { + query.setMaxResults(limit); + } - if (guestbookId == null) { - } else { - return em.createQuery("select o from GuestbookResponse as o where o.guestbook.id = " + guestbookId + " order by o.responseTime desc", GuestbookResponse.class).getResultList(); + return query.getResultList(); } return null; } diff --git a/src/main/java/edu/harvard/iq/dataverse/GuestbookServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/GuestbookServiceBean.java index fc7f361b8b6..4199846efd9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GuestbookServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/GuestbookServiceBean.java @@ -33,6 +33,17 @@ public List findGuestbooksForGivenDataverse(Dataverse dataverse) { return List.of(); } } + // Get all guestbooks for this collection and it's parent collections + public List findEffectiveGuestbooksForGivenDataverse(Dataverse dataverse) { + List guestbooks = findGuestbooksForGivenDataverse(dataverse); + if (dataverse != null) { + List parentDataverses = dataverse.getOwners(); + for (Dataverse dv : parentDataverses) { + guestbooks.addAll(findGuestbooksForGivenDataverse(dv)); + } + } + return guestbooks; + } public Long findCountUsages(Long guestbookId, Long dataverseId) { String queryString = ""; diff --git a/src/main/java/edu/harvard/iq/dataverse/ManageFilePermissionsPage.java b/src/main/java/edu/harvard/iq/dataverse/ManageFilePermissionsPage.java index c44529d9299..ee687305584 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ManageFilePermissionsPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/ManageFilePermissionsPage.java @@ -116,6 +116,15 @@ public boolean isShowDeleted() { public void setShowDeleted(boolean showDeleted) { this.showDeleted = showDeleted; } + private boolean showHistory = false; + + public boolean isShowHistory() { + return showHistory; + } + + public void setShowHistory(boolean showHistory) { + this.showHistory = showHistory; + } public Dataset getDataset() { return dataset; @@ -143,6 +152,13 @@ public void showDeletedCheckboxChange() { } } + private boolean backingShowHistory = false; + public void showHistoryCheckboxChange() { + if (backingShowHistory != showHistory) { + initMaps(); + backingShowHistory = showHistory; + } + } public String init() { if (dataset.getId() != null) { @@ -199,7 +215,7 @@ private void initMaps() { fileMap.put(file, raList); // populate the file access requests map - for (FileAccessRequest fileAccessRequest : file.getFileAccessRequests(FileAccessRequest.RequestState.CREATED)) { + for (FileAccessRequest fileAccessRequest : !showHistory ? file.getFileAccessRequests(FileAccessRequest.RequestState.CREATED) : file.getFileAccessRequests()) { List fileAccessRequestList = fileAccessRequestMap.get(fileAccessRequest.getRequester()); if (fileAccessRequestList == null) { fileAccessRequestList = new ArrayList<>(); @@ -250,6 +266,21 @@ public String formatAccessRequestTimestamp(List fileAccessReq return Util.getDateTimeFormat().format(date); } + public String getAccessRequestStates(List fileAccessRequests) { + String result = ""; + if (fileAccessRequests != null) { + Map items = fileAccessRequests.stream() + .sorted(Comparator.comparing(FileAccessRequest::getState)) + .collect(Collectors.groupingBy( + FileAccessRequest::getStateLabelNationalized, + Collectors.counting())); + + result = items.entrySet().stream().map(entry -> entry.getKey() + ":" + entry.getValue()) + .collect(Collectors.joining(", ", "[ ", " ]")); + } + return result; + } + private void addFileToRoleAssignee(RoleAssignment assignment, boolean fileDeleted) { RoleAssignee ra = roleAssigneeService.getRoleAssignee(assignment.getAssigneeIdentifier()); List assignments = roleAssigneeMap.get(ra); diff --git a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java index 402a1b06e3c..bd91363d2bb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java @@ -1064,5 +1064,39 @@ public List getEffectiveRoleAssignments(AuthenticatedUser user, return Stream.concat(directAssignments, groupAssignments) .collect(Collectors.toList()); } + + /** + * Determines if a user can view a dataset version based on its release status + * and the supplied Locally FAIR role assignees. + * + * @param req The request containing the user and Ip info (for IPgroups) + * @param dvObject the dvObject that may have locallyFairAssignees + * @return true if the user has locally FAIR access + */ + public boolean hasLocallyFAIRAccess(DataverseRequest req, DvObject dvObject) { + Set locallyFairAssignees = dvObject.getLocallyFAIRRoleAssigneeIdentifiers(); + // If no locally FAIR restrictions, it's publicly viewable + if (locallyFairAssignees.isEmpty()) { + return false; + } + + // Check if user is in the locally FAIR assignee list + Set userAndGroups = new HashSet<>(groupService.groupsFor(req)); + User user = req.getUser(); + if (user.isAuthenticated()) { + if(user.isSuperuser()) { + return true; + } + userAndGroups.add(user); + } + + for (RoleAssignee ra : userAndGroups) { + if (locallyFairAssignees.contains(ra.getIdentifier())) { + return true; + } + } + + return false; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/PermissionsWrapper.java b/src/main/java/edu/harvard/iq/dataverse/PermissionsWrapper.java index 2c6f8ff2fb1..0af3816035a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/PermissionsWrapper.java +++ b/src/main/java/edu/harvard/iq/dataverse/PermissionsWrapper.java @@ -7,12 +7,15 @@ import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.groups.impl.builtin.AuthenticatedUsers; +import edu.harvard.iq.dataverse.authorization.users.GuestUser; import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.impl.*; import java.util.HashMap; +import java.util.List; import java.util.Map; +import java.util.Set; import java.util.logging.Logger; import jakarta.ejb.EJB; import jakarta.faces.view.ViewScoped; @@ -31,6 +34,9 @@ public class PermissionsWrapper implements java.io.Serializable { @EJB PermissionServiceBean permissionService; + @EJB + DatasetVersionServiceBean datasetVersionService; + @Inject DataverseSession session; @@ -55,6 +61,7 @@ public class PermissionsWrapper implements java.io.Serializable { private final Map fileDownloadPermissionMap = new HashMap<>(); // { DvObject.id : Boolean } private final Map datasetPermissionMap = new HashMap<>(); // { Permission human_name : Boolean } + Boolean hasLocallyFAIRAccess; /** * Check if the current Dataset can Issue Commands * @@ -253,9 +260,45 @@ public boolean canIssueDeleteDatasetCommand(DvObject dvo){ // PUBLISH DATASET public boolean canIssuePublishDatasetCommand(DvObject dvo){ - return canIssueCommand(dvo, PublishDatasetCommand.class); + User u = session.getUser(); + if (dvo == null || u == null || u instanceof GuestUser || !(dvo instanceof Dataset)) { + return false; // guests can not publish + } + if (u.isSuperuser()) { + return true; + } + // Return false if dataset has 0 files and user want to 'publish' or 'submit for review' and 'publish dataset requires files' flag is set + Dataset ds = (Dataset)dvo; + Dataverse dv = ds.getOwner(); + if (dv != null && !datasetVersionHasFiles(ds.getLatestVersion()) && dv.getEffectiveRequiresFilesToPublishDataset()) { + return false; + } + return canIssueCommand(ds, PublishDatasetCommand.class); } - + + // SUBMIT DATASET FOR REVIEW + public boolean canIssueSubmitDatasetForReviewCommand(DvObject dvo) { + User u = session.getUser(); + if (dvo == null || u == null || u instanceof GuestUser || !(dvo instanceof Dataset)) { + return false; // guests can not submit for review + } + // Return false if dataset has 0 files and user want to 'publish' or 'submit for review' and 'publish dataset requires files' flag is set + Dataset ds = (Dataset)dvo; + Dataverse dv = ds.getOwner(); + if (dv != null && !datasetVersionHasFiles(ds.getLatestVersion()) && dv.getEffectiveRequiresFilesToPublishDataset()) { + return false; + } + return canIssueCommand(ds, SubmitDatasetForReviewCommand.class); + } + + // cache the hasFiles in the ds version for performance reasons + private boolean datasetVersionHasFiles(DatasetVersion dsv) { + if (dsv.hasFiles() == null) { + dsv.setHasFiles(datasetVersionService.hasFiles(dsv.getId())); + } + return dsv.hasFiles(); + } + // For the dataverse_header fragment (and therefore, most of the pages), // we need to know if authenticated users can add dataverses and datasets to the // root collection. For the "Add Data" menu further in the search include fragment @@ -297,4 +340,12 @@ public String notAuthorized(){ public String notFound() { return navigationWrapper.notFound(); } + + // The locallyFAIRraIds should not change within a given view (they are set in the parent Dataverse of whatever object the view is for) + public boolean hasLocallyFAIRAccess(DataverseRequest req, DvObject dvo) { + if(hasLocallyFAIRAccess == null ) { + hasLocallyFAIRAccess = permissionService.hasLocallyFAIRAccess(req, dvo); + } + return hasLocallyFAIRAccess; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java index 23db066dc14..1b927162afa 100644 --- a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java +++ b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java @@ -127,7 +127,9 @@ public class SettingsWrapper implements java.io.Serializable { private String metricsUrl = null; private String publishDatasetDisclaimerText = null; - + + private String submitForReviewDatasetDisclaimerText = null; + private Boolean dataFilePIDSequentialDependent = null; private Boolean customLicenseAllowed = null; @@ -885,4 +887,15 @@ public String getPublishDatasetDisclaimerText() { public Boolean isHasPublishDatasetDisclaimerText() { return !StringUtil.isEmpty(getPublishDatasetDisclaimerText()); } + + public String getSubmitForReviewDatasetDisclaimerText() { + if (submitForReviewDatasetDisclaimerText == null) { + submitForReviewDatasetDisclaimerText = getValueForKey(Key.SubmitForReviewDatasetDisclaimerText); + } + return submitForReviewDatasetDisclaimerText; + } + + public Boolean isHasSubmitForReviewDatasetDisclaimerText() { + return !StringUtil.isEmpty(getSubmitForReviewDatasetDisclaimerText()); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java index 46736da73d4..eca3f470938 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java +++ b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java @@ -208,11 +208,11 @@ public String getDatasetCardImageAsUrl(Dataset dataset, Long versionId, boolean // If no other logo we attempt to auto-select via the optimized, native // query-based method // from the DatasetVersionService: - if (!hasDatasetLogo && datasetVersionService.getThumbnailByVersionId(versionId) == null) { + if (!hasDatasetLogo && (!autoselect || datasetVersionService.getThumbnailByVersionId(versionId) == null)) { return null; } } - String url = SystemConfig.getDataverseSiteUrlStatic() + "/api/datasets/" + dataset.getId() + "/logo"; + String url = dataset.getThumbnailUrl(); logger.fine("getDatasetCardImageAsUrl: " + url); this.dvobjectThumbnailsMap.put(datasetId,url); return url; diff --git a/src/main/java/edu/harvard/iq/dataverse/ValidateDataFileLabel.java b/src/main/java/edu/harvard/iq/dataverse/ValidateDataFileLabel.java new file mode 100644 index 00000000000..d7317031f3b --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/ValidateDataFileLabel.java @@ -0,0 +1,25 @@ +package edu.harvard.iq.dataverse; + +import jakarta.validation.Constraint; +import jakarta.validation.Payload; + +import java.lang.annotation.Documented; +import java.lang.annotation.Retention; +import java.lang.annotation.Target; + +import static java.lang.annotation.ElementType.FIELD; +import static java.lang.annotation.RetentionPolicy.RUNTIME; + +@Target({FIELD}) +@Retention(RUNTIME) +@Constraint(validatedBy = {FileLabelValidator.class}) +@Documented +public @interface ValidateDataFileLabel { + + String message() default "Failed Validation for Validate Data File Label"; + + Class[] groups() default {}; + + Class[] payload() default {}; + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java index 1c8984f47d3..a23e2a92c07 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java @@ -77,7 +77,7 @@ public abstract class AbstractApiBean { private static final Logger logger = Logger.getLogger(AbstractApiBean.class.getName()); private static final String DATAVERSE_KEY_HEADER_NAME = "X-Dataverse-key"; - private static final String PERSISTENT_ID_KEY=":persistentId"; + protected static final String PERSISTENT_ID_KEY=":persistentId"; private static final String ALIAS_KEY=":alias"; public static final String STATUS_WF_IN_PROGRESS = "WORKFLOW_IN_PROGRESS"; public static final String DATAVERSE_WORKFLOW_INVOCATION_HEADER_NAME = "X-Dataverse-invocationID"; @@ -243,6 +243,9 @@ String getWrappedMessageWhenJson() { @EJB GuestbookResponseServiceBean gbRespSvc; + @EJB + TemplateServiceBean templateSvc; + @Inject FailedPIDResolutionLoggingServiceBean fprLogService; @@ -370,8 +373,32 @@ protected Dataverse findDataverseOrDie( String dvIdtf ) throws WrappedResponse { } return dv; } + /** Find a dataverse but filter according to the visibility from the locallyFAIRRoleAssignments + * + * @param dvIdtf - the dataverse identifier + * @param req - the DataverseRequest + * @return the dataverse if found and visible, otherwise throws WrappedResponse + * @throws WrappedResponse if dataverse is not found (in findDatasetOrDie()) or not visible + */ + protected Dataverse findDataverseUserCanSeeOrDie(String dvIdtf, DataverseRequest req) throws WrappedResponse { + Dataverse dataverse = findDataverseOrDie(dvIdtf); + if (dataverse.isLocallyFAIR() && !permissionSvc.hasLocallyFAIRAccess(req, dataverse)) { + throw new WrappedResponse(error( Response.Status.NOT_FOUND, "Can't find dataverse with identifier='" + dvIdtf + "'")); + } + return dataverse; + } + + protected Template findTemplateOrDie(Long templateId) throws WrappedResponse { + + Template template = templateSvc.find(templateId); + if (template == null) { + throw new WrappedResponse( + error(Response.Status.NOT_FOUND, "Can't find template with identifier='" + templateId + "'")); + } + return template; + } - protected Template findTemplateOrDie(Long templateId, Dataverse dataverse) throws WrappedResponse { + protected Template findTemplateInDataverseOrParentsOrDie(Long templateId, Dataverse dataverse) throws WrappedResponse { List