Integrate transcription in PeerTube

2024-07-02 10:35:56 -05:00 · 2024-06-13 09:23:12 +02:00 · 2024-06-13 09:23:12 +02:00 · 1bfb791e05
commit 1bfb791e05
parent ef14cf4a5c
172 changed files with 2674 additions and 945 deletions
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@ -71,6 +71,20 @@ jobs:
            ${{ runner.OS }}-fixtures-
            ${{ runner.OS }}-

+      - name: Cache PeerTube pip directory
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/.cache/pip
+          key: ${{ runner.OS }}-${{ matrix.test_suite }}-pip-v1
+
+      - name: Cache Hugging Face models
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/.cache/huggingface
+          key: ${{ runner.OS }}-${{ matrix.test_suite }}-hugging-face-v1
+
      - name: Set env test variable (schedule)
        if: github.event_name != 'schedule'
        run: |
--- a/.gitignore
+++ b/.gitignore
@ -12,8 +12,11 @@ yarn-error.log
 /test4/
 /test5/
 /test6/
+
+# Big fixtures generated/downloaded on-demand
 /packages/tests/fixtures/video_high_bitrate_1080p.mp4
 /packages/tests/fixtures/video_59fps.mp4
+/packages/tests/fixtures/transcription/models-v1/

 # Production
 /storage
--- a/apps/peertube-runner/src/server/process/process.ts
+++ b/apps/peertube-runner/src/server/process/process.ts
@ -1,6 +1,7 @@
 import {
  RunnerJobLiveRTMPHLSTranscodingPayload,
  RunnerJobStudioTranscodingPayload,
+  RunnerJobTranscriptionPayload,
  RunnerJobVODAudioMergeTranscodingPayload,
  RunnerJobVODHLSTranscodingPayload,
  RunnerJobVODWebVideoTranscodingPayload
@ -9,25 +10,41 @@ import { logger } from '../../shared/index.js'
 import { processAudioMergeTranscoding, processHLSTranscoding, ProcessOptions, processWebVideoTranscoding } from './shared/index.js'
 import { ProcessLiveRTMPHLSTranscoding } from './shared/process-live.js'
 import { processStudioTranscoding } from './shared/process-studio.js'
+import { processVideoTranscription } from './shared/process-transcription.js'

 export async function processJob (options: ProcessOptions) {
  const { server, job } = options

  logger.info(`[${server.url}] Processing job of type ${job.type}: ${job.uuid}`, { payload: job.payload })

-  if (job.type === 'vod-audio-merge-transcoding') {
-    await processAudioMergeTranscoding(options as ProcessOptions<RunnerJobVODAudioMergeTranscodingPayload>)
-  } else if (job.type === 'vod-web-video-transcoding') {
-    await processWebVideoTranscoding(options as ProcessOptions<RunnerJobVODWebVideoTranscodingPayload>)
-  } else if (job.type === 'vod-hls-transcoding') {
-    await processHLSTranscoding(options as ProcessOptions<RunnerJobVODHLSTranscodingPayload>)
-  } else if (job.type === 'live-rtmp-hls-transcoding') {
-    await new ProcessLiveRTMPHLSTranscoding(options as ProcessOptions<RunnerJobLiveRTMPHLSTranscodingPayload>).process()
-  } else if (job.type === 'video-studio-transcoding') {
-    await processStudioTranscoding(options as ProcessOptions<RunnerJobStudioTranscodingPayload>)
-  } else {
-    logger.error(`Unknown job ${job.type} to process`)
-    return
+  switch (job.type) {
+    case 'vod-audio-merge-transcoding':
+      await processAudioMergeTranscoding(options as ProcessOptions<RunnerJobVODAudioMergeTranscodingPayload>)
+      break
+
+    case 'vod-web-video-transcoding':
+      await processWebVideoTranscoding(options as ProcessOptions<RunnerJobVODWebVideoTranscodingPayload>)
+      break
+
+    case 'vod-hls-transcoding':
+      await processHLSTranscoding(options as ProcessOptions<RunnerJobVODHLSTranscodingPayload>)
+      break
+
+    case 'live-rtmp-hls-transcoding':
+      await new ProcessLiveRTMPHLSTranscoding(options as ProcessOptions<RunnerJobLiveRTMPHLSTranscodingPayload>).process()
+      break
+
+    case 'video-studio-transcoding':
+      await processStudioTranscoding(options as ProcessOptions<RunnerJobStudioTranscodingPayload>)
+      break
+
+    case 'video-transcription':
+      await processVideoTranscription(options as ProcessOptions<RunnerJobTranscriptionPayload>)
+      break
+
+    default:
+      logger.error(`Unknown job ${job.type} to process`)
+      return
  }

  logger.info(`[${server.url}] Finished processing job of type ${job.type}: ${job.uuid}`)
--- a/apps/peertube-runner/src/server/process/shared/common.ts
+++ b/apps/peertube-runner/src/server/process/shared/common.ts
@ -5,7 +5,7 @@ import { RunnerJob, RunnerJobPayload } from '@peertube/peertube-models'
 import { buildUUID } from '@peertube/peertube-node-utils'
 import { PeerTubeServer } from '@peertube/peertube-server-commands'
 import { ConfigManager, downloadFile, logger } from '../../../shared/index.js'
-import { getTranscodingLogger } from './transcoding-logger.js'
+import { getWinstonLogger } from './winston-logger.js'

 export type JobWithToken <T extends RunnerJobPayload = RunnerJobPayload> = RunnerJob<T> & { jobToken: string }

@ -101,6 +101,6 @@ function getCommonFFmpegOptions () {
      available: getDefaultAvailableEncoders(),
      encodersToTry: getDefaultEncodersToTry()
    },
-    logger: getTranscodingLogger()
+    logger: getWinstonLogger()
  }
 }
--- a/apps/peertube-runner/src/server/process/shared/index.ts
+++ b/apps/peertube-runner/src/server/process/shared/index.ts
@ -1,3 +1,3 @@
 export * from './common.js'
 export * from './process-vod.js'
-export * from './transcoding-logger.js'
+export * from './winston-logger.js'
--- a/apps/peertube-runner/src/server/process/shared/process-transcription.ts
+++ b/apps/peertube-runner/src/server/process/shared/process-transcription.ts
@ -0,0 +1,79 @@
+import { hasAudioStream } from '@peertube/peertube-ffmpeg'
+import { RunnerJobTranscriptionPayload, TranscriptionSuccess } from '@peertube/peertube-models'
+import { buildSUUID } from '@peertube/peertube-node-utils'
+import { TranscriptionModel, WhisperBuiltinModel, transcriberFactory } from '@peertube/peertube-transcription'
+import { remove } from 'fs-extra/esm'
+import { join } from 'path'
+import { ConfigManager } from '../../../shared/config-manager.js'
+import { logger } from '../../../shared/index.js'
+import { ProcessOptions, downloadInputFile, scheduleTranscodingProgress } from './common.js'
+import { getWinstonLogger } from './winston-logger.js'
+
+export async function processVideoTranscription (options: ProcessOptions<RunnerJobTranscriptionPayload>) {
+  const { server, job, runnerToken } = options
+
+  const config = ConfigManager.Instance.getConfig().transcription
+
+  const payload = job.payload
+
+  let inputPath: string
+
+  const updateProgressInterval = scheduleTranscodingProgress({
+    job,
+    server,
+    runnerToken,
+    progressGetter: () => undefined
+  })
+
+  const outputPath = join(ConfigManager.Instance.getTranscriptionDirectory(), buildSUUID())
+
+  const transcriber = transcriberFactory.createFromEngineName({
+    engineName: config.engine,
+    enginePath: config.enginePath,
+    logger: getWinstonLogger()
+  })
+
+  try {
+    logger.info(`Downloading input file ${payload.input.videoFileUrl} for transcription job ${job.jobToken}`)
+
+    inputPath = await downloadInputFile({ url: payload.input.videoFileUrl, runnerToken, job })
+
+    logger.info(`Downloaded input file ${payload.input.videoFileUrl} for job ${job.jobToken}. Running transcription.`)
+
+    if (await hasAudioStream(inputPath) !== true) {
+      await server.runnerJobs.error({
+        jobToken: job.jobToken,
+        jobUUID: job.uuid,
+        runnerToken,
+        message: 'This input file does not contain audio'
+      })
+
+      return
+    }
+
+    const transcriptFile = await transcriber.transcribe({
+      mediaFilePath: inputPath,
+      model: config.modelPath
+        ? await TranscriptionModel.fromPath(config.modelPath)
+        : new WhisperBuiltinModel(config.model),
+      format: 'vtt',
+      transcriptDirectory: outputPath
+    })
+
+    const successBody: TranscriptionSuccess = {
+      inputLanguage: transcriptFile.language,
+      vttFile: transcriptFile.path
+    }
+
+    await server.runnerJobs.success({
+      jobToken: job.jobToken,
+      jobUUID: job.uuid,
+      runnerToken,
+      payload: successBody
+    })
+  } finally {
+    if (inputPath) await remove(inputPath)
+    if (outputPath) await remove(outputPath)
+    if (updateProgressInterval) clearInterval(updateProgressInterval)
+  }
+}
--- a/apps/peertube-runner/src/server/process/shared/transcoding-logger.ts
+++ b/apps/peertube-runner/src/server/process/shared/transcoding-logger.ts
@ -1,19 +0,0 @@
-import { LogFn } from 'pino'
-import { logger } from '../../../shared/index.js'
-
-export function getTranscodingLogger () {
-  return {
-    info: buildWinstonLogger(logger.info.bind(logger)),
-    debug: buildWinstonLogger(logger.debug.bind(logger)),
-    warn: buildWinstonLogger(logger.warn.bind(logger)),
-    error: buildWinstonLogger(logger.error.bind(logger))
-  }
-}
-
-function buildWinstonLogger (log: LogFn) {
-  return (arg1: string, arg2?: object) => {
-    if (arg2) return log(arg2, arg1)
-
-    return log(arg1)
-  }
-}
--- a/apps/peertube-runner/src/server/process/shared/winston-logger.ts
+++ b/apps/peertube-runner/src/server/process/shared/winston-logger.ts
@ -0,0 +1,19 @@
+import { LogFn } from 'pino'
+import { logger } from '../../../shared/index.js'
+
+export function getWinstonLogger () {
+  return {
+    info: buildLogLevelFn(logger.info.bind(logger)),
+    debug: buildLogLevelFn(logger.debug.bind(logger)),
+    warn: buildLogLevelFn(logger.warn.bind(logger)),
+    error: buildLogLevelFn(logger.error.bind(logger))
+  }
+}
+
+function buildLogLevelFn (log: LogFn) {
+  return (arg1: string, arg2?: object) => {
+    if (arg2) return log(arg2, arg1)
+
+    return log(arg1)
+  }
+}
--- a/apps/peertube-runner/src/server/shared/supported-job.ts
+++ b/apps/peertube-runner/src/server/shared/supported-job.ts
@ -1,15 +1,16 @@
 import {
  RunnerJobLiveRTMPHLSTranscodingPayload,
  RunnerJobPayload,
-  RunnerJobType,
  RunnerJobStudioTranscodingPayload,
+  RunnerJobTranscriptionPayload,
+  RunnerJobType,
  RunnerJobVODAudioMergeTranscodingPayload,
  RunnerJobVODHLSTranscodingPayload,
  RunnerJobVODWebVideoTranscodingPayload,
  VideoStudioTaskPayload
 } from '@peertube/peertube-models'

-const supportedMatrix = {
+const supportedMatrix: { [ id in RunnerJobType ]: (payload: RunnerJobPayload) => boolean } = {
  'vod-web-video-transcoding': (_payload: RunnerJobVODWebVideoTranscodingPayload) => {
    return true
  },
@ -29,6 +30,9 @@ const supportedMatrix = {
    if (!Array.isArray(tasks)) return false

    return tasks.every(t => t && supported.has(t.name))
+  },
+  'video-transcription': (_payload: RunnerJobTranscriptionPayload) => {
+    return true
  }
 }

--- a/apps/peertube-runner/src/shared/config-manager.ts
+++ b/apps/peertube-runner/src/shared/config-manager.ts
@ -1,4 +1,5 @@
 import { parse, stringify } from '@iarna/toml'
+import { TranscriptionEngineName, WhisperBuiltinModelName } from '@peertube/peertube-transcription'
 import envPaths from 'env-paths'
 import { ensureDir, pathExists, remove } from 'fs-extra/esm'
 import { readFile, writeFile } from 'fs/promises'
@ -24,6 +25,13 @@ type Config = {
    runnerName: string
    runnerDescription?: string
  }[]
+
+  transcription: {
+    engine: TranscriptionEngineName
+    enginePath: string | null
+    model: WhisperBuiltinModelName
+    modelPath: string | null
+  }
 }

 export class ConfigManager {
@ -37,6 +45,12 @@ export class ConfigManager {
      threads: 2,
      nice: 20
    },
+    transcription: {
+      engine: 'whisper-ctranslate2',
+      enginePath: null,
+      model: 'small',
+      modelPath: null
+    },
    registeredInstances: []
  }

@ -98,6 +112,10 @@ export class ConfigManager {
    return join(paths.cache, this.id, 'transcoding')
  }

+  getTranscriptionDirectory () {
+    return join(paths.cache, this.id, 'transcription')
+  }
+
  getSocketDirectory () {
    return join(paths.data, this.id)
  }
--- a/client/src/app/+admin/config/edit-custom-config/edit-basic-configuration.component.html
+++ b/client/src/app/+admin/config/edit-custom-config/edit-basic-configuration.component.html
@ -318,7 +318,7 @@
            >
            <ng-container ngProjectAs="description">
              <span i18n [hidden]="isImportVideosHttpEnabled()">
-                  ⛔ You need to allow  import with HTTP URL to be able to activate this feature.
+                ⛔ You need to allow  import with HTTP URL to be able to activate this feature.
              </span>
            </ng-container>
            </my-peertube-checkbox>
@ -359,7 +359,6 @@
      </ng-container>

      <ng-container formGroupName="storyboards">
-
        <div class="form-group">
          <my-peertube-checkbox
            inputName="storyboardsEnabled" formControlName="enabled"
@ -370,7 +369,35 @@
            </ng-container>
          </my-peertube-checkbox>
        </div>
+      </ng-container>

+      <ng-container formGroupName="videoTranscription">
+        <div class="form-group">
+          <my-peertube-checkbox
+            inputName="videoTranscriptionEnabled" formControlName="enabled"
+            i18n-labelText labelText="Enable video transcription"
+          >
+            <ng-container ngProjectAs="description">
+              <span i18n>Automatically create a subtitle file of uploaded/imported VOD videos</span>
+            </ng-container>
+
+            <ng-container ngProjectAs="extra">
+              <div class="form-group" formGroupName="remoteRunners" [ngClass]="getTranscriptionRunnerDisabledClass()">
+                <my-peertube-checkbox
+                  inputName="videoTranscriptionRemoteRunnersEnabled" formControlName="enabled"
+                  i18n-labelText labelText="Enable remote runners for transcription"
+                >
+                  <ng-container ngProjectAs="description">
+                    <span i18n>
+                      Use <a routerLink="/admin/system/runners/runners-list">remote runners</a> to process transcription tasks.
+                      Remote runners has to register on your instance first.
+                    </span>
+                  </ng-container>
+                </my-peertube-checkbox>
+              </div>
+            </ng-container>
+          </my-peertube-checkbox>
+        </div>
      </ng-container>
    </div>
  </div>
--- a/client/src/app/+admin/config/edit-custom-config/edit-basic-configuration.component.ts
+++ b/client/src/app/+admin/config/edit-custom-config/edit-basic-configuration.component.ts
@ -137,6 +137,18 @@ export class EditBasicConfigurationComponent implements OnInit, OnChanges {
    return { 'disabled-checkbox-extra': !this.isSearchIndexEnabled() }
  }

+  // ---------------------------------------------------------------------------
+
+  isTranscriptionEnabled () {
+    return this.form.value['videoTranscription']['enabled'] === true
+  }
+
+  getTranscriptionRunnerDisabledClass () {
+    return { 'disabled-checkbox-extra': !this.isTranscriptionEnabled() }
+  }
+
+  // ---------------------------------------------------------------------------
+
  isAutoFollowIndexEnabled () {
    return this.form.value['followings']['instance']['autoFollowIndex']['enabled'] === true
  }
--- a/client/src/app/+admin/config/edit-custom-config/edit-custom-config.component.ts
+++ b/client/src/app/+admin/config/edit-custom-config/edit-custom-config.component.ts
@ -267,6 +267,12 @@ export class EditCustomConfigComponent extends FormReactive implements OnInit {
          enabled: null
        }
      },
+      videoTranscription: {
+        enabled: null,
+        remoteRunners: {
+          enabled: null
+        }
+      },
      videoFile: {
        update: {
          enabled: null
--- a/client/src/app/+admin/overview/videos/video-list.component.ts
+++ b/client/src/app/+admin/overview/videos/video-list.component.ts
@ -1,7 +1,7 @@
 import { DatePipe, NgClass, NgFor, NgIf } from '@angular/common'
 import { Component, OnInit, ViewChild } from '@angular/core'
 import { ActivatedRoute, Router, RouterLink } from '@angular/router'
-import { AuthService, ConfirmService, Notifier, RestPagination, RestTable } from '@app/core'
+import { AuthService, ConfirmService, Notifier, RestPagination, RestTable, ServerService } from '@app/core'
 import { formatICU, getAbsoluteAPIUrl } from '@app/helpers'
 import { VideoDetails } from '@app/shared/shared-main/video/video-details.model'
 import { VideoFileTokenService } from '@app/shared/shared-main/video/video-file-token.service'
@ -30,6 +30,7 @@ import {
  VideoActionsDropdownComponent
 } from '../../../shared/shared-video-miniature/video-actions-dropdown.component'
 import { VideoAdminService } from './video-admin.service'
+import { VideoCaptionService } from '@app/shared/shared-main/video-caption/video-caption.service'

@Component({
  selector: 'my-video-list',
@ -84,7 +85,8 @@ export class VideoListComponent extends RestTable <Video> implements OnInit {
    removeFiles: true,
    transcoding: true,
    studio: true,
-    stats: true
+    stats: true,
+    generateTranscription: true
  }

  loading = true
@ -100,6 +102,8 @@ export class VideoListComponent extends RestTable <Video> implements OnInit {
    private videoService: VideoService,
    private videoAdminService: VideoAdminService,
    private videoBlockService: VideoBlockService,
+    private videoCaptionService: VideoCaptionService,
+    private server: ServerService,
    private videoFileTokenService: VideoFileTokenService
  ) {
    super()
@ -109,6 +113,10 @@ export class VideoListComponent extends RestTable <Video> implements OnInit {
    return this.auth.getUser()
  }

+  get serverConfig () {
+    return this.server.getHTMLConfig()
+  }
+
  ngOnInit () {
    this.initialize()

@ -160,6 +168,14 @@ export class VideoListComponent extends RestTable <Video> implements OnInit {
          isDisplayed: videos => videos.every(v => v.canRemoveFiles(this.authUser)),
          iconName: 'delete'
        }
+      ],
+      [
+        {
+          label: $localize`Generate caption`,
+          handler: videos => this.generateCaption(videos),
+          isDisplayed: videos => videos.every(v => v.canGenerateTranscription(this.authUser, this.serverConfig.videoTranscription.enabled)),
+          iconName: 'video-lang'
+        }
      ]
    ]
  }
@ -399,4 +415,15 @@ export class VideoListComponent extends RestTable <Video> implements OnInit {
        error: err => this.notifier.error(err.message)
      })
  }
+
+  private generateCaption (videos: Video[]) {
+    this.videoCaptionService.generateCaption(videos.map(v => v.id))
+      .subscribe({
+        next: () => {
+          this.notifier.success($localize`Transcription jobs created.`)
+        },
+
+        error: err => this.notifier.error(err.message)
+      })
+  }
 }
--- a/client/src/app/+admin/system/jobs/jobs.component.ts
+++ b/client/src/app/+admin/system/jobs/jobs.component.ts
@ -69,6 +69,7 @@ export class JobsComponent extends RestTable implements OnInit {
    'video-redundancy',
    'video-studio-edition',
    'video-transcoding',
+    'video-transcription',
    'videos-views-stats'
  ]

--- a/client/src/app/+my-account/my-account-settings/my-account-notification-preferences/my-account-notification-preferences.component.ts
+++ b/client/src/app/+my-account/my-account-settings/my-account-notification-preferences/my-account-notification-preferences.component.ts
@ -49,7 +49,8 @@ export class MyAccountNotificationPreferencesComponent implements OnInit {
      abuseStateChange: $localize`One of your abuse reports has been accepted or rejected by moderators`,
      newPeerTubeVersion: $localize`A new PeerTube version is available`,
      newPluginVersion: $localize`One of your plugin/theme has a new available version`,
-      myVideoStudioEditionFinished: $localize`Video studio edition has finished`
+      myVideoStudioEditionFinished: $localize`Video studio edition has finished`,
+      myVideoTranscriptionGenerated: $localize`The transcription of your video has been generated`
    }
    this.notificationSettingGroups = [
      {
@ -68,7 +69,8 @@ export class MyAccountNotificationPreferencesComponent implements OnInit {
          'blacklistOnMyVideo',
          'myVideoPublished',
          'myVideoImportFinished',
-          'myVideoStudioEditionFinished'
+          'myVideoStudioEditionFinished',
+          'myVideoTranscriptionGenerated'
        ]
      },

--- a/client/src/app/+videos/+video-edit/shared/video-edit.component.html
+++ b/client/src/app/+videos/+video-edit/shared/video-edit.component.html
@ -173,12 +173,8 @@

      <ng-template ngbNavContent>
        <div class="captions">
-
-          <div class="captions-header">
-            <button (click)="openAddCaptionModal()" class="peertube-create-button">
-              <my-global-icon iconName="add" aria-hidden="true"></my-global-icon>
-              <ng-container i18n>Add another caption</ng-container>
-            </button>
+          <div class="alert pt-alert-primary" *ngIf="displayTranscriptionInfo && isTranscriptionEnabled()" i18n>
+            A subtitle will be automatically generated from your video.
          </div>

          <div class="form-group" *ngFor="let videoCaption of videoCaptions">
@ -226,6 +222,13 @@
            No captions for now.
          </div>

+          <div class="mt-3 mb-3">
+            <button (click)="openAddCaptionModal()" class="peertube-create-button">
+              <my-global-icon iconName="add" aria-hidden="true"></my-global-icon>
+              <ng-container i18n>Add a caption</ng-container>
+            </button>
+          </div>
+
        </div>
      </ng-template>
    </ng-container>
--- a/client/src/app/+videos/+video-edit/shared/video-edit.component.scss
+++ b/client/src/app/+videos/+video-edit/shared/video-edit.component.scss
@ -24,11 +24,6 @@ my-peertube-checkbox {
  }
 }

-.captions-header {
-  text-align: end;
-  margin-bottom: 1rem;
-}
-
 .caption-entry {
  display: flex;
  height: 40px;
--- a/client/src/app/+videos/+video-edit/shared/video-edit.component.ts
+++ b/client/src/app/+videos/+video-edit/shared/video-edit.component.ts
@ -1,5 +1,16 @@
 import { DatePipe, NgClass, NgFor, NgIf, NgTemplateOutlet } from '@angular/common'
-import { ChangeDetectorRef, Component, EventEmitter, Input, NgZone, OnDestroy, OnInit, Output, ViewChild } from '@angular/core'
+import {
+  ChangeDetectorRef,
+  Component,
+  EventEmitter,
+  Input,
+  NgZone,
+  OnDestroy,
+  OnInit,
+  Output,
+  ViewChild,
+  booleanAttribute
+} from '@angular/core'
 import { AbstractControl, FormArray, FormGroup, FormsModule, ReactiveFormsModule, Validators } from '@angular/forms'
 import { HooksService, PluginService, ServerService } from '@app/core'
 import { removeElementFromArray } from '@app/helpers'
@ -63,10 +74,10 @@ import { HelpComponent } from '../../../shared/shared-main/misc/help.component'
 import { EmbedComponent } from '../../../shared/shared-main/video/embed.component'
 import { LiveDocumentationLinkComponent } from '../../../shared/shared-video-live/live-documentation-link.component'
 import { I18nPrimengCalendarService } from './i18n-primeng-calendar.service'
+import { ThumbnailManagerComponent } from './thumbnail-manager/thumbnail-manager.component'
 import { VideoCaptionAddModalComponent } from './video-caption-add-modal.component'
 import { VideoCaptionEditModalContentComponent } from './video-caption-edit-modal-content/video-caption-edit-modal-content.component'
 import { VideoEditType } from './video-edit.type'
-import { ThumbnailManagerComponent } from './thumbnail-manager/thumbnail-manager.component'

 type VideoLanguages = VideoConstant<string> & { group?: string }
 type PluginField = {
@ -122,15 +133,17 @@ export class VideoEditComponent implements OnInit, OnDestroy {
  @Input() publishedVideo: VideoDetails

  @Input() userVideoChannels: SelectChannelItem[] = []
-  @Input() forbidScheduledPublication = true
+
+  @Input({ transform: booleanAttribute }) forbidScheduledPublication = true
+  @Input({ transform: booleanAttribute }) displayTranscriptionInfo = true

  @Input() videoCaptions: VideoCaptionWithPathEdit[] = []
  @Input() videoSource: VideoSource

  @Input() videoChapters: VideoChapter[] = []

-  @Input() hideWaitTranscoding = false
-  @Input() updateVideoFileEnabled = false
+  @Input({ transform: booleanAttribute }) hideWaitTranscoding = false
+  @Input({ transform: booleanAttribute }) updateVideoFileEnabled = false

  @Input() type: VideoEditType
  @Input() liveVideo: LiveVideo
@ -405,6 +418,10 @@ export class VideoEditComponent implements OnInit, OnDestroy {
    return !!this.form.value['originallyPublishedAt']
  }

+  isTranscriptionEnabled () {
+    return this.serverConfig.videoTranscription.enabled
+  }
+
  // ---------------------------------------------------------------------------

  resetField (name: string) {
--- a/client/src/app/+videos/+video-edit/video-add-components/video-go-live.component.html
+++ b/client/src/app/+videos/+video-edit/video-add-components/video-go-live.component.html
@ -53,8 +53,8 @@
 <form [hidden]="!isInUpdateForm" novalidate [formGroup]="form">
  <my-video-edit
    [form]="form" [formErrors]="formErrors" [videoCaptions]="videoCaptions"
-    [forbidScheduledPublication]="true" [hideWaitTranscoding]="true"
    [validationMessages]="validationMessages" [userVideoChannels]="userVideoChannels" [liveVideo]="liveVideo"
+    forbidScheduledPublication="true" hideWaitTranscoding="true" displayTranscriptionInfo="false"
    type="go-live"
  ></my-video-edit>

--- a/client/src/app/+videos/+video-edit/video-add-components/video-import-torrent.component.html
+++ b/client/src/app/+videos/+video-edit/video-add-components/video-import-torrent.component.html
@ -59,8 +59,9 @@
 <!-- Hidden because we want to load the component -->
 <form [hidden]="!hasImportedVideo" novalidate [formGroup]="form">
  <my-video-edit
-    [form]="form" [formErrors]="formErrors" [videoCaptions]="videoCaptions" [forbidScheduledPublication]="true"
+    [form]="form" [formErrors]="formErrors" [videoCaptions]="videoCaptions"
    [validationMessages]="validationMessages" [userVideoChannels]="userVideoChannels"
+    forbidScheduledPublication="true"
    type="import-torrent"
  ></my-video-edit>

--- a/client/src/app/+videos/+video-edit/video-add-components/video-import-url.component.html
+++ b/client/src/app/+videos/+video-edit/video-add-components/video-import-url.component.html
@ -57,8 +57,9 @@
 <form [hidden]="!hasImportedVideo" novalidate [formGroup]="form">
  <my-video-edit
    #videoEdit
-    [form]="form" [formErrors]="formErrors" [videoCaptions]="videoCaptions" [forbidScheduledPublication]="true"
+    [form]="form" [formErrors]="formErrors" [videoCaptions]="videoCaptions"
    [validationMessages]="validationMessages" [userVideoChannels]="userVideoChannels"
+    forbidScheduledPublication="true"
    type="import-url"
  ></my-video-edit>

--- a/client/src/app/+videos/+video-edit/video-add-components/video-upload.component.html
+++ b/client/src/app/+videos/+video-edit/video-add-components/video-upload.component.html
@ -67,7 +67,7 @@
  <my-video-edit
    [form]="form" [formErrors]="formErrors" [videoCaptions]="videoCaptions"
    [validationMessages]="validationMessages" [userVideoChannels]="userVideoChannels"
-    [forbidScheduledPublication]="false"
+    forbidScheduledPublication="false"
    type="upload"
  ></my-video-edit>

--- a/client/src/app/+videos/+video-edit/video-update.component.html
+++ b/client/src/app/+videos/+video-edit/video-update.component.html
@ -20,6 +20,7 @@
      type="update" (pluginFieldsAdded)="hydratePluginFieldsFromVideo()"
      [liveVideo]="liveVideo" [publishedVideo]="videoDetails"
      [videoSource]="videoSource" [updateVideoFileEnabled]="isUpdateVideoFileEnabled()"
+      displayTranscriptionInfo="false"

      (formBuilt)="onFormBuilt()"
    >
--- a/client/src/app/shared/shared-instance/instance-features-table.component.html
+++ b/client/src/app/shared/shared-instance/instance-features-table.component.html
@ -34,6 +34,13 @@
      </td>
    </tr>

+    <tr>
+      <th i18n class="sub-label" scope="row">Automatic transcription</th>
+      <td>
+        <my-feature-boolean [value]="serverConfig.videoTranscription.enabled"></my-feature-boolean>
+      </td>
+    </tr>
+
    <tr>
      <th i18n class="sub-label" scope="row">Video uploads</th>
      <td>
--- a/client/src/app/shared/shared-main/buttons/action-dropdown.component.scss
+++ b/client/src/app/shared/shared-main/buttons/action-dropdown.component.scss
@ -63,6 +63,10 @@

    &.with-icon {
      @include dropdown-with-icon-item;
+
+      .icon-video-lang {
+        top: 0;
+      }
    }

    a,
--- a/client/src/app/shared/shared-main/users/user-notification.model.ts
+++ b/client/src/app/shared/shared-main/users/user-notification.model.ts
@ -11,6 +11,7 @@ import {
  UserNotificationType,
  UserNotificationType_Type,
  UserRight,
+  VideoConstant,
  VideoInfo
 } from '@peertube/peertube-models'
 import { logger } from '@root-helpers/logger'
@ -90,6 +91,12 @@ export class UserNotification implements UserNotificationServer {
    username: string
  }

+  videoCaption?: {
+    id: number
+    language: VideoConstant<string>
+    video: VideoInfo
+  }
+
  createdAt: string
  updatedAt: string

@ -149,6 +156,8 @@ export class UserNotification implements UserNotificationServer {
      this.peertube = hash.peertube
      this.registration = hash.registration

+      this.videoCaption = hash.videoCaption
+
      this.createdAt = hash.createdAt
      this.updatedAt = hash.updatedAt

@ -250,6 +259,10 @@ export class UserNotification implements UserNotificationServer {
          this.pluginQueryParams.pluginType = this.plugin.type + ''
          break

+        case UserNotificationType.MY_VIDEO_TRANSCRIPTION_GENERATED:
+          this.videoUrl = this.buildVideoUrl(this.videoCaption.video)
+          break
+
        case UserNotificationType.MY_VIDEO_STUDIO_EDITION_FINISHED:
          this.videoUrl = this.buildVideoUrl(this.video)
          break
--- a/client/src/app/shared/shared-main/video-caption/video-caption.service.ts
+++ b/client/src/app/shared/shared-main/video-caption/video-caption.service.ts
@ -1,15 +1,15 @@
-import { Observable, of } from 'rxjs'
-import { catchError, map, switchMap } from 'rxjs/operators'
 import { HttpClient } from '@angular/common/http'
 import { Injectable } from '@angular/core'
 import { RestExtractor, ServerService } from '@app/core'
 import { objectToFormData } from '@app/helpers'
 import { peertubeTranslate, sortBy } from '@peertube/peertube-core-utils'
 import { ResultList, VideoCaption } from '@peertube/peertube-models'
+import { Observable, from, of } from 'rxjs'
+import { catchError, concatMap, map, switchMap, toArray } from 'rxjs/operators'
 import { environment } from '../../../../environments/environment'
-import { VideoCaptionEdit } from './video-caption-edit.model'
 import { VideoPasswordService } from '../video/video-password.service'
 import { VideoService } from '../video/video.service'
+import { VideoCaptionEdit } from './video-caption-edit.model'

@Injectable()
 export class VideoCaptionService {
@ -74,4 +74,13 @@ export class VideoCaptionService {
  getCaptionContent ({ captionPath }: Pick<VideoCaption, 'captionPath'>) {
    return this.authHttp.get(environment.originServerUrl + captionPath, { responseType: 'text' })
  }
+
+  generateCaption (videoIds: (number | string)[]) {
+    return from(videoIds)
+      .pipe(
+        concatMap(videoId => this.authHttp.post(`${VideoService.BASE_VIDEO_URL}/${videoId}/captions/generate`, {})),
+        toArray(),
+        catchError(err => this.restExtractor.handleError(err))
+      )
+  }
 }
--- a/client/src/app/shared/shared-main/video/video.model.ts
+++ b/client/src/app/shared/shared-main/video/video.model.ts
@ -244,6 +244,10 @@ export class Video implements VideoServerModel {
      this.isUpdatableBy(user)
  }

+  canGenerateTranscription (user: AuthUser, transcriptionEnabled: boolean) {
+    return transcriptionEnabled && this.isLocal && user.hasRight(UserRight.UPDATE_ANY_VIDEO)
+  }
+
  // ---------------------------------------------------------------------------

  isOwner (user: AuthUser) {
--- a/client/src/app/shared/shared-video-miniature/video-actions-dropdown.component.ts
+++ b/client/src/app/shared/shared-video-miniature/video-actions-dropdown.component.ts
@ -11,6 +11,7 @@ import {
  DropdownButtonSize,
  DropdownDirection
 } from '../shared-main/buttons/action-dropdown.component'
+import { VideoCaptionService } from '../shared-main/video-caption/video-caption.service'
 import { RedundancyService } from '../shared-main/video/redundancy.service'
 import { VideoDetails } from '../shared-main/video/video-details.model'
 import { Video } from '../shared-main/video/video.model'
@ -37,6 +38,7 @@ export type VideoActionsDisplayType = {
  transcoding?: boolean
  studio?: boolean
  stats?: boolean
+  generateTranscription?: boolean
 }

@Component({
@ -115,6 +117,7 @@ export class VideoActionsDropdownComponent implements OnChanges {
    private videoBlocklistService: VideoBlockService,
    private screenService: ScreenService,
    private videoService: VideoService,
+    private videoCaptionService: VideoCaptionService,
    private redundancyService: RedundancyService,
    private serverService: ServerService
  ) { }
@ -206,6 +209,10 @@ export class VideoActionsDropdownComponent implements OnChanges {
    return this.video.isLiveInfoAvailableBy(this.user)
  }

+  canGenerateTranscription () {
+    return this.video.canGenerateTranscription(this.user, this.serverService.getHTMLConfig().videoTranscription.enabled)
+  }
+
  isVideoDownloadableByAnonymous () {
    return (
      this.video &&
@ -338,7 +345,7 @@ export class VideoActionsDropdownComponent implements OnChanges {
    this.videoService.runTranscoding({ videos: [ video ], type, askForForceTranscodingIfNeeded: true })
      .subscribe({
        next: () => {
-          this.notifier.success($localize`Transcoding jobs created for "${video.name}".`)
+          this.notifier.success($localize`Transcoding job created for "${video.name}".`)
          this.transcodingCreated.emit()
        },

@ -346,6 +353,17 @@ export class VideoActionsDropdownComponent implements OnChanges {
      })
  }

+  generateCaption (video: Video) {
+    this.videoCaptionService.generateCaption([ video.id ])
+      .subscribe({
+        next: () => {
+          this.notifier.success($localize`Transcription job created for "${video.name}".`)
+        },
+
+        error: err => this.notifier.error(err.message)
+      })
+  }
+
  onVideoBlocked () {
    this.videoBlocked.emit()
  }
@ -466,6 +484,14 @@ export class VideoActionsDropdownComponent implements OnChanges {
          iconName: 'delete'
        }
      ],
+      [
+        {
+          label: $localize`Generate caption`,
+          handler: ({ video }) => this.generateCaption(video),
+          isDisplayed: () => this.displayOptions.generateTranscription && this.canGenerateTranscription(),
+          iconName: 'video-lang'
+        }
+      ],
      [ // actions regarding the account/its server
        {
          label: $localize`Mute account`,
--- a/client/src/app/shared/standalone-notifications/user-notifications.component.html
+++ b/client/src/app/shared/standalone-notifications/user-notifications.component.html
@ -239,6 +239,14 @@
        }
      </ng-container>

+      <ng-container *ngSwitchCase="22"> <!-- UserNotificationType.MY_VIDEO_TRANSCRIPTION_GENERATED -->
+        <my-global-icon iconName="video-lang" aria-hidden="true"></my-global-icon>
+
+        <div class="message" i18n>
+          <em>{{ notification.videoCaption.language.label }}</em> transcription of <a (click)="markAsRead(notification)" [routerLink]="notification.videoUrl">your video {{ notification.videoCaption.video.name }}</a> has been generated
+        </div>
+      </ng-container>
+
      <ng-container *ngSwitchDefault>
        <my-global-icon iconName="alert" aria-hidden="true"></my-global-icon>

--- a/config/default.yaml
+++ b/config/default.yaml
@ -716,6 +716,34 @@ video_studio:
  remote_runners:
    enabled: false

+video_transcription:
+  # Enable automatic transcription of videos
+  enabled: false
+
+  # Choose engine for local transcription
+  # Supported: 'openai-whisper' or 'whisper-ctranslate2'
+  engine: 'whisper-ctranslate2'
+
+  # You can set a custom engine path for local transcription
+  # If not provided, PeerTube will try to automatically install it in the PeerTube bin directory
+  engine_path: null
+
+  # Choose engine model for local transcription
+  # Available for 'openai-whisper' and 'whisper-ctranslate2': 'tiny', 'base', 'small', 'medium' or 'large-v3'
+  model: 'small'
+
+  # Or specify the model path:
+  #  * PyTorch model file path for 'openai-whisper'
+  #  * CTranslate2 Whisper model directory path for 'whisper-ctranslate2'
+  # If not provided, PeerTube will automatically download the model
+  model_path: null
+
+  # Enable remote runners to transcribe videos
+  # If enabled, your instance won't transcribe the videos itself
+  # At least 1 remote runner must be configured to transcribe your videos
+  remote_runners:
+    enabled: false
+
 video_file:
  update:
    # Add ability for users to replace the video file of an existing video
--- a/config/production.yaml.example
+++ b/config/production.yaml.example
@ -726,6 +726,34 @@ video_studio:
  remote_runners:
    enabled: false

+video_transcription:
+  # Enable automatic transcription of videos
+  enabled: false
+
+  # Choose engine for local transcription
+  # Supported: 'openai-whisper' or 'whisper-ctranslate2'
+  engine: 'whisper-ctranslate2'
+
+  # You can set a custom engine path for local transcription
+  # If not provided, PeerTube will try to automatically install it in the PeerTube bin directory
+  engine_path: null
+
+  # Choose engine model for local transcription
+  # Available for 'openai-whisper' and 'whisper-ctranslate2': 'tiny', 'base', 'small', 'medium' or 'large-v3'
+  model: 'small'
+
+  # Or specify the model path:
+  #  * PyTorch model file path for 'openai-whisper'
+  #  * CTranslate2 Whisper model directory path for 'whisper-ctranslate2'
+  # If not provided, PeerTube will automatically download the model
+  model_path: null
+
+  # Enable remote runners to transcribe videos
+  # If enabled, your instance won't transcribe the videos itself
+  # At least 1 remote runner must be configured to transcribe your videos
+  remote_runners:
+    enabled: false
+
 video_file:
  update:
    # Add ability for users to replace the video file of an existing video
--- a/config/test.yaml
+++ b/config/test.yaml
@ -166,3 +166,6 @@ open_telemetry:
 search:
  search_index:
    url: 'https://search.joinpeertube.org/'
+
+video_transcription:
+  model: 'tiny'
--- a/packages/ffmpeg/src/ffmpeg-command-wrapper.ts
+++ b/packages/ffmpeg/src/ffmpeg-command-wrapper.ts
@ -1,14 +1,13 @@
 import { pick, promisify0 } from '@peertube/peertube-core-utils'
-import { AvailableEncoders, EncoderOptionsBuilder, EncoderOptionsBuilderParams, EncoderProfile } from '@peertube/peertube-models'
+import {
+  AvailableEncoders,
+  EncoderOptionsBuilder,
+  EncoderOptionsBuilderParams,
+  EncoderProfile,
+  SimpleLogger
+} from '@peertube/peertube-models'
 import ffmpeg, { FfmpegCommand } from 'fluent-ffmpeg'

-type FFmpegLogger = {
-  info: (msg: string, obj?: object) => void
-  debug: (msg: string, obj?: object) => void
-  warn: (msg: string, obj?: object) => void
-  error: (msg: string, obj?: object) => void
-}
-
 export interface FFmpegCommandWrapperOptions {
  availableEncoders?: AvailableEncoders
  profile?: string
@ -17,7 +16,7 @@ export interface FFmpegCommandWrapperOptions {
  tmpDirectory: string
  threads: number

-  logger: FFmpegLogger
+  logger: SimpleLogger
  lTags?: { tags: string[] }

  updateJobProgress?: (progress?: number) => void
@ -35,7 +34,7 @@ export class FFmpegCommandWrapper {
  private readonly tmpDirectory: string
  private readonly threads: number

-  private readonly logger: FFmpegLogger
+  private readonly logger: SimpleLogger
  private readonly lTags: { tags: string[] }

  private readonly updateJobProgress: (progress?: number) => void
--- a/packages/jiwer/README.md
+++ b/packages/jiwer/README.md
@ -1,37 +0,0 @@
-JiWER
-=====
-__JiWER__ CLI NodeJs wrapper.
-
-> *JiWER is a python tool for computing the word-error-rate of ASR systems.*
-> https://jitsi.github.io/jiwer/cli/
-
-__JiWER__ serves as a reference implementation to calculate errors rates between 2 text files:
- WER (Word Error Rate)
- CER (Character Error Rate)
-
-Build
-----
-
-```sh
-npm run build
-```
-
-Usage
-----
-```typescript
-const jiwerCLI = new JiwerClI('./reference.txt', './hypothesis.txt')
-
-// WER as a percentage, ex: 0.03 -> 3%
-console.log(await jiwerCLI.wer())
-
-// CER as a percentage: 0.01 -> 1%
-console.log(await jiwerCLI.cer())
-
-// Detailed comparison report
-console.log(await jiwerCLI.alignment())
-```
-
-Resources
---------
- https://jitsi.github.io/jiwer/
- https://github.com/rapidfuzz/RapidFuzz
--- a/packages/jiwer/requirements.txt
+++ b/packages/jiwer/requirements.txt
@ -1 +0,0 @@
-jiwer==3.0.4
--- a/packages/jiwer/src/index.ts
+++ b/packages/jiwer/src/index.ts
@ -1 +0,0 @@
-export * from './jiwer-cli.js'
--- a/packages/models/src/common/index.ts
+++ b/packages/models/src/common/index.ts
@ -1,2 +1,3 @@
 export * from './file-storage.enum.js'
 export * from './result-list.model.js'
+export * from './simple-logger.model.js'
--- a/packages/models/src/common/simple-logger.model.ts
+++ b/packages/models/src/common/simple-logger.model.ts
@ -0,0 +1,6 @@
+export type SimpleLogger = {
+  info: (msg: string, obj?: object) => void
+  debug: (msg: string, obj?: object) => void
+  warn: (msg: string, obj?: object) => void
+  error: (msg: string, obj?: object) => void
+}
--- a/packages/models/src/runners/runner-job-payload.model.ts
+++ b/packages/models/src/runners/runner-job-payload.model.ts
@ -8,7 +8,8 @@ export type RunnerJobVODPayload =
 export type RunnerJobPayload =
  RunnerJobVODPayload |
  RunnerJobLiveRTMPHLSTranscodingPayload |
-  RunnerJobStudioTranscodingPayload
+  RunnerJobStudioTranscodingPayload |
+  RunnerJobTranscriptionPayload

 // ---------------------------------------------------------------------------

@ -54,6 +55,12 @@ export interface RunnerJobStudioTranscodingPayload {
  tasks: VideoStudioTaskPayload[]
 }

+export interface RunnerJobTranscriptionPayload {
+  input: {
+    videoFileUrl: string
+  }
+}
+
 // ---------------------------------------------------------------------------

 export function isAudioMergeTranscodingPayload (payload: RunnerJobPayload): payload is RunnerJobVODAudioMergeTranscodingPayload {
--- a/packages/models/src/runners/runner-job-private-payload.model.ts
+++ b/packages/models/src/runners/runner-job-private-payload.model.ts
@ -8,7 +8,8 @@ export type RunnerJobVODPrivatePayload =
 export type RunnerJobPrivatePayload =
  RunnerJobVODPrivatePayload |
  RunnerJobLiveRTMPHLSTranscodingPrivatePayload |
-  RunnerJobVideoStudioTranscodingPrivatePayload
+  RunnerJobVideoStudioTranscodingPrivatePayload |
+  RunnerJobTranscriptionPrivatePayload

 // ---------------------------------------------------------------------------

@ -45,3 +46,9 @@ export interface RunnerJobVideoStudioTranscodingPrivatePayload {
  videoUUID: string
  originalTasks: VideoStudioTaskPayload[]
 }
+
+// ---------------------------------------------------------------------------
+
+export interface RunnerJobTranscriptionPrivatePayload {
+  videoUUID: string
+}
--- a/packages/models/src/runners/runner-job-success-body.model.ts
+++ b/packages/models/src/runners/runner-job-success-body.model.ts
@ -12,7 +12,8 @@ export type RunnerJobSuccessPayload =
  VODHLSTranscodingSuccess |
  VODAudioMergeTranscodingSuccess |
  LiveRTMPHLSTranscodingSuccess |
-  VideoStudioTranscodingSuccess
+  VideoStudioTranscodingSuccess |
+  TranscriptionSuccess

 export interface VODWebVideoTranscodingSuccess {
  videoFile: Blob | string
@ -35,6 +36,12 @@ export interface VideoStudioTranscodingSuccess {
  videoFile: Blob | string
 }

+export interface TranscriptionSuccess {
+  inputLanguage: string
+
+  vttFile: Blob | string
+}
+
 export function isWebVideoOrAudioMergeTranscodingPayloadSuccess (
  payload: RunnerJobSuccessPayload
 ): payload is VODHLSTranscodingSuccess | VODAudioMergeTranscodingSuccess {
@ -44,3 +51,7 @@ export function isWebVideoOrAudioMergeTranscodingPayloadSuccess (
 export function isHLSTranscodingPayloadSuccess (payload: RunnerJobSuccessPayload): payload is VODHLSTranscodingSuccess {
  return !!(payload as VODHLSTranscodingSuccess)?.resolutionPlaylistFile
 }
+
+export function isTranscriptionPayloadSuccess (payload: RunnerJobSuccessPayload): payload is TranscriptionSuccess {
+  return !!(payload as TranscriptionSuccess)?.vttFile
+}
--- a/packages/models/src/runners/runner-job-type.type.ts
+++ b/packages/models/src/runners/runner-job-type.type.ts
@ -3,4 +3,5 @@ export type RunnerJobType =
  'vod-hls-transcoding' |
  'vod-audio-merge-transcoding' |
  'live-rtmp-hls-transcoding' |
-  'video-studio-transcoding'
+  'video-studio-transcoding' |
+  'video-transcription'
--- a/packages/models/src/server/custom-config.model.ts
+++ b/packages/models/src/server/custom-config.model.ts
@ -179,6 +179,14 @@ export interface CustomConfig {
    }
  }

+  videoTranscription: {
+    enabled: boolean
+
+    remoteRunners: {
+      enabled: boolean
+    }
+  }
+
  videoFile: {
    update: {
      enabled: boolean
--- a/packages/models/src/server/job.model.ts
+++ b/packages/models/src/server/job.model.ts
@ -33,6 +33,7 @@ export type JobType =
  | 'generate-video-storyboard'
  | 'create-user-export'
  | 'import-user-archive'
+  | 'video-transcription'

 export interface Job {
  id: number | string
@ -101,11 +102,16 @@ export interface VideoImportYoutubeDLPayload {
  type: VideoImportYoutubeDLPayloadType
  videoImportId: number

+  generateTranscription: boolean
+
  fileExt?: string
 }

 export interface VideoImportTorrentPayload {
  type: VideoImportTorrentPayloadType
+
+  generateTranscription: boolean
+
  videoImportId: number
 }

@ -316,3 +322,9 @@ export interface CreateUserExportPayload {
 export interface ImportUserArchivePayload {
  userImportId: number
 }
+
+// ---------------------------------------------------------------------------
+
+export interface VideoTranscriptionPayload {
+  videoUUID: string
+}
--- a/packages/models/src/server/server-config.model.ts
+++ b/packages/models/src/server/server-config.model.ts
@ -346,6 +346,10 @@ export interface ServerConfig {
  storyboards: {
    enabled: boolean
  }
+
+  videoTranscription: {
+    enabled: boolean
+  }
 }

 export type HTMLServerConfig = Omit<ServerConfig, 'signup'>
--- a/packages/models/src/server/server-error-code.enum.ts
+++ b/packages/models/src/server/server-error-code.enum.ts
@ -3,58 +3,59 @@ export const ServerErrorCode = {
   * The simplest form of payload too large: when the file size is over the
   * global file size limit
   */
-  MAX_FILE_SIZE_REACHED:'max_file_size_reached',
+  MAX_FILE_SIZE_REACHED: 'max_file_size_reached',

  /**
   * The payload is too large for the user quota set
   */
-  QUOTA_REACHED:'quota_reached',
+  QUOTA_REACHED: 'quota_reached',

  /**
   * Error yielded upon trying to access a video that is not federated, nor can
-   * be. This may be due to: remote videos on instances that are not followed by
+   * be. This may be due to:  remote videos on instances that are not followed by
   * yours, and with your instance disallowing unknown instances being accessed.
   */
-  DOES_NOT_RESPECT_FOLLOW_CONSTRAINTS:'does_not_respect_follow_constraints',
+  DOES_NOT_RESPECT_FOLLOW_CONSTRAINTS: 'does_not_respect_follow_constraints',

-  LIVE_NOT_ENABLED:'live_not_enabled',
-  LIVE_NOT_ALLOWING_REPLAY:'live_not_allowing_replay',
-  LIVE_CONFLICTING_PERMANENT_AND_SAVE_REPLAY:'live_conflicting_permanent_and_save_replay',
+  LIVE_NOT_ENABLED: 'live_not_enabled',
+  LIVE_NOT_ALLOWING_REPLAY: 'live_not_allowing_replay',
+  LIVE_CONFLICTING_PERMANENT_AND_SAVE_REPLAY: 'live_conflicting_permanent_and_save_replay',
  /**
-   * Pretty self-explanatory: the set maximum number of simultaneous lives was
+   * Pretty self-explanatory:  the set maximum number of simultaneous lives was
   * reached, and this error is typically there to inform the user trying to
   * broadcast one.
   */
-  MAX_INSTANCE_LIVES_LIMIT_REACHED:'max_instance_lives_limit_reached',
+  MAX_INSTANCE_LIVES_LIMIT_REACHED: 'max_instance_lives_limit_reached',
  /**
-   * Pretty self-explanatory: the set maximum number of simultaneous lives FOR
+   * Pretty self-explanatory:  the set maximum number of simultaneous lives FOR
   * THIS USER was reached, and this error is typically there to inform the user
   * trying to broadcast one.
   */
-  MAX_USER_LIVES_LIMIT_REACHED:'max_user_lives_limit_reached',
+  MAX_USER_LIVES_LIMIT_REACHED: 'max_user_lives_limit_reached',

  /**
   * A torrent should have at most one correct video file. Any more and we will
   * not be able to choose automatically.
   */
-  INCORRECT_FILES_IN_TORRENT:'incorrect_files_in_torrent',
+  INCORRECT_FILES_IN_TORRENT: 'incorrect_files_in_torrent',

-  COMMENT_NOT_ASSOCIATED_TO_VIDEO:'comment_not_associated_to_video',
+  COMMENT_NOT_ASSOCIATED_TO_VIDEO: 'comment_not_associated_to_video',

-  MISSING_TWO_FACTOR:'missing_two_factor',
-  INVALID_TWO_FACTOR:'invalid_two_factor',
+  MISSING_TWO_FACTOR: 'missing_two_factor',
+  INVALID_TWO_FACTOR: 'invalid_two_factor',

-  ACCOUNT_WAITING_FOR_APPROVAL:'account_waiting_for_approval',
-  ACCOUNT_APPROVAL_REJECTED:'account_approval_rejected',
+  ACCOUNT_WAITING_FOR_APPROVAL: 'account_waiting_for_approval',
+  ACCOUNT_APPROVAL_REJECTED: 'account_approval_rejected',

-  RUNNER_JOB_NOT_IN_PROCESSING_STATE:'runner_job_not_in_processing_state',
-  RUNNER_JOB_NOT_IN_PENDING_STATE:'runner_job_not_in_pending_state',
-  UNKNOWN_RUNNER_TOKEN:'unknown_runner_token',
+  RUNNER_JOB_NOT_IN_PROCESSING_STATE: 'runner_job_not_in_processing_state',
+  RUNNER_JOB_NOT_IN_PENDING_STATE: 'runner_job_not_in_pending_state',
+  UNKNOWN_RUNNER_TOKEN: 'unknown_runner_token',

-  VIDEO_REQUIRES_PASSWORD:'video_requires_password',
-  INCORRECT_VIDEO_PASSWORD:'incorrect_video_password',
+  VIDEO_REQUIRES_PASSWORD: 'video_requires_password',
+  INCORRECT_VIDEO_PASSWORD: 'incorrect_video_password',

-  VIDEO_ALREADY_BEING_TRANSCODED:'video_already_being_transcoded',
+  VIDEO_ALREADY_BEING_TRANSCODED: 'video_already_being_transcoded',
+  VIDEO_ALREADY_BEING_TRANSCRIBED: 'video_already_being_transcribed',

  MAX_USER_VIDEO_QUOTA_EXCEEDED_FOR_USER_EXPORT: 'max_user_video_quota_exceeded_for_user_export'
 } as const
--- a/packages/models/src/users/user-notification-setting.model.ts
+++ b/packages/models/src/users/user-notification-setting.model.ts
@ -31,4 +31,6 @@ export interface UserNotificationSetting {
  newPluginVersion: UserNotificationSettingValueType

  myVideoStudioEditionFinished: UserNotificationSettingValueType
+
+  myVideoTranscriptionGenerated: UserNotificationSettingValueType
 }
--- a/packages/models/src/users/user-notification.model.ts
+++ b/packages/models/src/users/user-notification.model.ts
@ -1,6 +1,7 @@
 import { FollowState } from '../actors/index.js'
 import { AbuseStateType } from '../moderation/index.js'
 import { PluginType_Type } from '../plugins/index.js'
+import { VideoConstant } from '../videos/video-constant.model.js'

 export const UserNotificationType = {
  NEW_VIDEO_FROM_SUBSCRIPTION: 1,
@ -36,7 +37,9 @@ export const UserNotificationType = {

  NEW_USER_REGISTRATION_REQUEST: 20,

-  NEW_LIVE_FROM_SUBSCRIPTION: 21
+  NEW_LIVE_FROM_SUBSCRIPTION: 21,
+
+  MY_VIDEO_TRANSCRIPTION_GENERATED: 22
 } as const

 export type UserNotificationType_Type = typeof UserNotificationType[keyof typeof UserNotificationType]
@ -138,6 +141,12 @@ export interface UserNotification {
    username: string
  }

+  videoCaption?: {
+    id: number
+    language: VideoConstant<string>
+    video: VideoInfo
+  }
+
  createdAt: string
  updatedAt: string
 }
--- a/packages/models/src/videos/caption/index.ts
+++ b/packages/models/src/videos/caption/index.ts
@ -1,2 +1,3 @@
-export * from './video-caption.model.js'
+export * from './video-caption-generate.model.js'
 export * from './video-caption-update.model.js'
+export * from './video-caption.model.js'
--- a/packages/models/src/videos/caption/video-caption-generate.model.ts
+++ b/packages/models/src/videos/caption/video-caption-generate.model.ts
@ -0,0 +1,3 @@
+export interface VideoCaptionGenerate {
+  forceTranscription?: boolean // Default false
+}
--- a/packages/models/src/videos/import/video-import-create.model.ts
+++ b/packages/models/src/videos/import/video-import-create.model.ts
@ -5,5 +5,8 @@ export interface VideoImportCreate extends VideoUpdate {
  magnetUri?: string
  torrentfile?: Blob

+  // Default is true if the feature is enabled by the instance admin
+  generateTranscription?: boolean
+
  channelId: number // Required
 }
--- a/packages/models/src/videos/video-create.model.ts
+++ b/packages/models/src/videos/video-create.model.ts
@ -27,4 +27,7 @@ export interface VideoCreate {

  thumbnailfile?: Blob | string
  previewfile?: Blob | string
+
+  // Default is true if the feature is enabled by the instance admin
+  generateTranscription?: boolean
 }
--- a/packages/server-commands/src/runners/runner-jobs-command.ts
+++ b/packages/server-commands/src/runners/runner-jobs-command.ts
@ -5,9 +5,6 @@ import {
  AcceptRunnerJobResult,
  ErrorRunnerJobBody,
  HttpStatusCode,
-  isHLSTranscodingPayloadSuccess,
-  isLiveRTMPHLSTranscodingUpdatePayload,
-  isWebVideoOrAudioMergeTranscodingPayloadSuccess,
  ListRunnerJobsQuery,
  RequestRunnerJobBody,
  RequestRunnerJobResult,
@ -22,8 +19,13 @@ import {
  RunnerJobType,
  RunnerJobUpdateBody,
  RunnerJobVODPayload,
+  TranscriptionSuccess,
  VODHLSTranscodingSuccess,
-  VODWebVideoTranscodingSuccess
+  VODWebVideoTranscodingSuccess,
+  isHLSTranscodingPayloadSuccess,
+  isLiveRTMPHLSTranscodingUpdatePayload,
+  isTranscriptionPayloadSuccess,
+  isWebVideoOrAudioMergeTranscodingPayloadSuccess
 } from '@peertube/peertube-models'
 import { unwrapBody } from '../requests/index.js'
 import { waitJobs } from '../server/jobs.js'
@ -196,6 +198,12 @@ export class RunnerJobsCommand extends AbstractCommand {
      payloadWithoutFiles = omit(payloadWithoutFiles as VODHLSTranscodingSuccess, [ 'resolutionPlaylistFile' ])
    }

+    if (isTranscriptionPayloadSuccess(payload) && payload.vttFile) {
+      attaches[`payload[vttFile]`] = payload.vttFile
+
+      payloadWithoutFiles = omit(payloadWithoutFiles as TranscriptionSuccess, [ 'vttFile' ])
+    }
+
    return this.postUploadRequest({
      ...options,

--- a/packages/server-commands/src/server/config-command.ts
+++ b/packages/server-commands/src/server/config-command.ts
@ -355,6 +355,29 @@ export class ConfigCommand extends AbstractCommand {

  // ---------------------------------------------------------------------------

+  enableTranscription ({ remote = false }: { remote?: boolean } = {}) {
+    return this.setTranscriptionEnabled(true, remote)
+  }
+
+  disableTranscription () {
+    return this.setTranscriptionEnabled(false, false)
+  }
+
+  private setTranscriptionEnabled (enabled: boolean, remoteEnabled: boolean) {
+    return this.updateExistingConfig({
+      newConfig: {
+        videoTranscription: {
+          enabled,
+          remoteRunners: {
+            enabled: remoteEnabled
+          }
+        }
+      }
+    })
+  }
+
+  // ---------------------------------------------------------------------------
+
  getConfig (options: OverrideCommandOptions = {}) {
    const path = '/api/v1/config'

--- a/packages/server-commands/src/shared/abstract-command.ts
+++ b/packages/server-commands/src/shared/abstract-command.ts
@ -1,7 +1,10 @@
 /* eslint-disable @typescript-eslint/no-unused-expressions,@typescript-eslint/no-floating-promises */

-import { isAbsolute } from 'path'
+import { HttpStatusCode, HttpStatusCodeType } from '@peertube/peertube-models'
 import { buildAbsoluteFixturePath, getFileSize } from '@peertube/peertube-node-utils'
+import { expect } from 'chai'
+import got, { Response as GotResponse } from 'got'
+import { isAbsolute } from 'path'
 import {
  makeDeleteRequest,
  makeGetRequest,
@ -11,12 +14,9 @@ import {
  unwrapBody,
  unwrapText
 } from '../requests/requests.js'
-import { expect } from 'chai'
-import got, { Response as GotResponse } from 'got'
-import { HttpStatusCode, HttpStatusCodeType } from '@peertube/peertube-models'

-import type { PeerTubeServer } from '../server/server.js'
 import { createReadStream } from 'fs'
+import type { PeerTubeServer } from '../server/server.js'

 export interface OverrideCommandOptions {
  token?: string
--- a/packages/server-commands/src/videos/captions-command.ts
+++ b/packages/server-commands/src/videos/captions-command.ts
@ -1,3 +1,4 @@
+import { pick } from '@peertube/peertube-core-utils'
 import { HttpStatusCode, ResultList, VideoCaption } from '@peertube/peertube-models'
 import { buildAbsoluteFixturePath } from '@peertube/peertube-node-utils'
 import { AbstractCommand, OverrideCommandOptions } from '../shared/index.js'
@ -32,6 +33,23 @@ export class CaptionsCommand extends AbstractCommand {
    })
  }

+  runGenerate (options: OverrideCommandOptions & {
+    videoId: string | number
+    forceTranscription?: boolean
+  }) {
+    const { videoId } = options
+    const path = '/api/v1/videos/' + videoId + '/captions/generate'
+
+    return this.postBodyRequest({
+      ...options,
+
+      path,
+      fields: pick(options, [ 'forceTranscription' ]),
+      implicitToken: true,
+      defaultExpectedStatus: HttpStatusCode.NO_CONTENT_204
+    })
+  }
+
  list (options: OverrideCommandOptions & {
    videoId: string | number
    videoPassword?: string
--- a/packages/server-commands/src/videos/video-imports-command.ts
+++ b/packages/server-commands/src/videos/video-imports-command.ts
@ -4,12 +4,19 @@ import { AbstractCommand, OverrideCommandOptions } from '../shared/index.js'

 export class VideoImportsCommand extends AbstractCommand {

-  importVideo (options: OverrideCommandOptions & {
-    attributes: (VideoImportCreate | { torrentfile?: string, previewfile?: string, thumbnailfile?: string })
+  async importVideo (options: OverrideCommandOptions & {
+    attributes: (Partial<VideoImportCreate> | { torrentfile?: string, previewfile?: string, thumbnailfile?: string })
  }) {
    const { attributes } = options
    const path = '/api/v1/videos/imports'

+    let defaultChannelId = 1
+
+    try {
+      const { videoChannels } = await this.server.users.getMyInfo({ token: options.token })
+      defaultChannelId = videoChannels[0].id
+    } catch (e) { /* empty */ }
+
    let attaches: any = {}
    if (attributes.torrentfile) attaches = { torrentfile: attributes.torrentfile }
    if (attributes.thumbnailfile) attaches = { thumbnailfile: attributes.thumbnailfile }
@ -20,7 +27,11 @@ export class VideoImportsCommand extends AbstractCommand {

      path,
      attaches,
-      fields: options.attributes,
+      fields: {
+        channelId: defaultChannelId,
+
+        ...options.attributes
+      },
      implicitToken: true,
      defaultExpectedStatus: HttpStatusCode.OK_200
    }))
--- a/packages/tests/fixtures/transcription/videos/communiquer-lors-dune-classe-transplantee.mp4
+++ b/packages/tests/fixtures/transcription/videos/communiquer-lors-dune-classe-transplantee.mp4
--- a/packages/tests/fixtures/transcription/videos/derive_sectaire.mp4
+++ b/packages/tests/fixtures/transcription/videos/derive_sectaire.mp4
--- a/packages/tests/package.json
+++ b/packages/tests/package.json
@ -6,7 +6,8 @@
  "devDependencies": {},
  "scripts": {
    "build": "tsc",
-    "watch": "tsc -w"
+    "watch": "tsc -w",
+    "install-dependencies:transcription": "pip install -r ./requirements.txt ../transcription-devtools/requirements.txt"
  },
  "dependencies": {}
 }
--- a/packages/tests/requirements.txt
+++ b/packages/tests/requirements.txt
@ -0,0 +1,2 @@
+whisper-ctranslate2
+openai-whisper
--- a/packages/tests/src/api/check-params/index.ts
+++ b/packages/tests/src/api/check-params/index.ts
@ -42,6 +42,7 @@ import './video-source.js'
 import './video-storyboards.js'
 import './video-studio.js'
 import './video-token.js'
+import './video-transcription.js'
 import './videos-common-filters.js'
 import './videos-history.js'
 import './videos-overviews.js'
--- a/packages/tests/src/api/check-params/user-notifications.ts
+++ b/packages/tests/src/api/check-params/user-notifications.ts
@ -171,6 +171,7 @@ describe('Test user notifications API validators', function () {
      abuseStateChange: UserNotificationSettingValue.WEB,
      newPeerTubeVersion: UserNotificationSettingValue.WEB,
      myVideoStudioEditionFinished: UserNotificationSettingValue.WEB,
+      myVideoTranscriptionGenerated: UserNotificationSettingValue.WEB,
      newPluginVersion: UserNotificationSettingValue.WEB
    }

--- a/packages/tests/src/api/check-params/video-transcription.ts
+++ b/packages/tests/src/api/check-params/video-transcription.ts
@ -0,0 +1,106 @@
+/* eslint-disable @typescript-eslint/no-unused-expressions,@typescript-eslint/require-await */
+
+import { HttpStatusCode, UserRole } from '@peertube/peertube-models'
+import {
+  PeerTubeServer,
+  cleanupTests,
+  createMultipleServers,
+  doubleFollow,
+  setAccessTokensToServers,
+  waitJobs
+} from '@peertube/peertube-server-commands'
+
+describe('Test video transcription API validator', function () {
+  let servers: PeerTubeServer[]
+
+  let userToken: string
+  let anotherUserToken: string
+
+  let remoteId: string
+  let validId: string
+
+  // ---------------------------------------------------------------
+
+  before(async function () {
+    this.timeout(240000)
+
+    servers = await createMultipleServers(2)
+    await setAccessTokensToServers(servers)
+
+    await doubleFollow(servers[0], servers[1])
+
+    userToken = await servers[0].users.generateUserAndToken('user', UserRole.USER)
+    anotherUserToken = await servers[0].users.generateUserAndToken('user2', UserRole.USER)
+
+    {
+      const { uuid } = await servers[1].videos.quickUpload({ name: 'remote video' })
+      remoteId = uuid
+    }
+
+    {
+      const { uuid } = await servers[0].videos.quickUpload({ name: 'both 1', token: userToken })
+      validId = uuid
+    }
+
+    await waitJobs(servers)
+
+    await servers[0].config.enableTranscription()
+  })
+
+  it('Should not run transcription of an unknown video', async function () {
+    await servers[0].captions.runGenerate({ videoId: 404, expectedStatus: HttpStatusCode.NOT_FOUND_404 })
+  })
+
+  it('Should not run transcription of a remote video', async function () {
+    await servers[0].captions.runGenerate({ videoId: remoteId, expectedStatus: HttpStatusCode.BAD_REQUEST_400 })
+  })
+
+  it('Should not run transcription by a owner/moderator user', async function () {
+    await servers[0].captions.runGenerate({ videoId: validId, token: anotherUserToken, expectedStatus: HttpStatusCode.FORBIDDEN_403 })
+  })
+
+  it('Should not run transcription if a caption file already exists', async function () {
+    await servers[0].captions.add({
+      language: 'en',
+      videoId: validId,
+      fixture: 'subtitle-good1.vtt'
+    })
+
+    await servers[0].captions.runGenerate({ videoId: validId, expectedStatus: HttpStatusCode.BAD_REQUEST_400 })
+
+    await servers[0].captions.delete({ language: 'en', videoId: validId })
+  })
+
+  it('Should not run transcription if the instance disabled it', async function () {
+    await servers[0].config.disableTranscription()
+
+    await servers[0].captions.runGenerate({ videoId: validId, expectedStatus: HttpStatusCode.BAD_REQUEST_400 })
+
+    await servers[0].config.enableTranscription()
+  })
+
+  it('Should succeed to run transcription', async function () {
+    await servers[0].captions.runGenerate({ videoId: validId, token: userToken })
+  })
+
+  it('Should fail to run transcription twice', async function () {
+    await servers[0].captions.runGenerate({ videoId: validId, token: userToken, expectedStatus: HttpStatusCode.CONFLICT_409 })
+  })
+
+  it('Should fail to run transcription twice with a non-admin user with the forceTranscription boolean', async function () {
+    await servers[0].captions.runGenerate({
+      videoId: validId,
+      token: userToken,
+      forceTranscription: true,
+      expectedStatus: HttpStatusCode.FORBIDDEN_403
+    })
+  })
+
+  it('Should succeed to run transcription twice with the forceTranscription boolean', async function () {
+    await servers[0].captions.runGenerate({ videoId: validId, forceTranscription: true })
+  })
+
+  after(async function () {
+    await cleanupTests(servers)
+  })
+})
--- a/packages/tests/src/api/notifications/caption-notifications.ts
+++ b/packages/tests/src/api/notifications/caption-notifications.ts
@ -0,0 +1,81 @@
+/* eslint-disable @typescript-eslint/no-unused-expressions,@typescript-eslint/require-await */
+
+import { UserNotification } from '@peertube/peertube-models'
+import { PeerTubeServer, cleanupTests, waitJobs } from '@peertube/peertube-server-commands'
+import { MockSmtpServer } from '@tests/shared/mock-servers/mock-email.js'
+import {
+  CheckerBaseParams,
+  checkMyVideoTranscriptionGenerated,
+  prepareNotificationsTest
+} from '@tests/shared/notifications.js'
+import { join } from 'path'
+
+describe('Test caption notifications', function () {
+  let servers: PeerTubeServer[] = []
+
+  let userNotifications: UserNotification[] = []
+  let emails: object[] = []
+  let userAccessToken: string
+
+  before(async function () {
+    this.timeout(120000)
+
+    const res = await prepareNotificationsTest(1)
+    emails = res.emails
+    userAccessToken = res.userAccessToken
+    servers = res.servers
+    userNotifications = res.userNotifications
+  })
+
+  describe('Transcription of my video generated is published', function () {
+    const language = { id: 'en', label: 'English' }
+    let baseParams: CheckerBaseParams
+
+    before(() => {
+      baseParams = {
+        server: servers[0],
+        emails,
+        socketNotifications: userNotifications,
+        token: userAccessToken
+      }
+    })
+
+    async function uploadAndWait () {
+      const { uuid } = await servers[0].videos.upload({
+        token: userAccessToken,
+        attributes: {
+          name: 'video',
+          fixture: join('transcription', 'videos', 'the_last_man_on_earth.mp4'),
+          language: undefined
+        }
+      })
+      await waitJobs(servers)
+
+      return servers[0].videos.get({ id: uuid })
+    }
+
+    it('Should not send a notification if transcription is not enabled', async function () {
+      this.timeout(50000)
+
+      const { name, shortUUID } = await uploadAndWait()
+
+      await checkMyVideoTranscriptionGenerated({ ...baseParams, videoName: name, shortUUID, language, checkType: 'absence' })
+    })
+
+    it('Should send a notification transcription is enabled', async function () {
+      this.timeout(240000)
+
+      await servers[0].config.enableTranscription()
+
+      const { name, shortUUID } = await uploadAndWait()
+
+      await checkMyVideoTranscriptionGenerated({ ...baseParams, videoName: name, shortUUID, language, checkType: 'presence' })
+    })
+  })
+
+  after(async function () {
+    MockSmtpServer.Instance.kill()
+
+    await cleanupTests(servers)
+  })
+})
--- a/packages/tests/src/api/notifications/index.ts
+++ b/packages/tests/src/api/notifications/index.ts
@ -1,4 +1,5 @@
 import './admin-notifications.js'
+import './captions-notifications.js'
 import './comments-notifications.js'
 import './moderation-notifications.js'
 import './notifications-api.js'
--- a/packages/tests/src/api/runners/index.ts
+++ b/packages/tests/src/api/runners/index.ts
@ -2,4 +2,5 @@ export * from './runner-common.js'
 export * from './runner-live-transcoding.js'
 export * from './runner-socket.js'
 export * from './runner-studio-transcoding.js'
+export * from './runner-transcription.js'
 export * from './runner-vod-transcoding.js'
--- a/packages/tests/src/api/runners/runner-transcription.ts
+++ b/packages/tests/src/api/runners/runner-transcription.ts
@ -0,0 +1,109 @@
+/* eslint-disable @typescript-eslint/no-unused-expressions,@typescript-eslint/require-await */
+
+import {
+  RunnerJobTranscriptionPayload,
+  TranscriptionSuccess
+} from '@peertube/peertube-models'
+import {
+  PeerTubeServer,
+  cleanupTests,
+  createMultipleServers,
+  doubleFollow,
+  setAccessTokensToServers,
+  setDefaultVideoChannel,
+  waitJobs
+} from '@peertube/peertube-server-commands'
+import { checkPersistentTmpIsEmpty } from '@tests/shared/directories.js'
+import { expect } from 'chai'
+
+describe('Test runner transcription', function () {
+  let servers: PeerTubeServer[] = []
+  let runnerToken: string
+
+  before(async function () {
+    this.timeout(120_000)
+
+    servers = await createMultipleServers(2)
+
+    await setAccessTokensToServers(servers)
+    await setDefaultVideoChannel(servers)
+
+    await doubleFollow(servers[0], servers[1])
+
+    await servers[0].config.enableTranscription({ remote: true })
+    runnerToken = await servers[0].runners.autoRegisterRunner()
+  })
+
+  async function upload () {
+    const { uuid } = await servers[0].videos.upload({ attributes: { name: 'video', language: undefined } })
+
+    const { availableJobs } = await servers[0].runnerJobs.request({ runnerToken })
+    expect(availableJobs).to.have.lengthOf(1)
+
+    const jobUUID = availableJobs[0].uuid
+
+    const { job } = await servers[0].runnerJobs.accept<RunnerJobTranscriptionPayload>({ runnerToken, jobUUID })
+    return { uuid, job }
+  }
+
+  it('Should execute a remote transcription job', async function () {
+    this.timeout(240_000)
+
+    const { uuid, job } = await upload()
+
+    expect(job.type === 'video-transcription')
+    expect(job.payload.input.videoFileUrl).to.exist
+
+    // Check video input file
+    {
+      await servers[0].runnerJobs.getJobFile({ url: job.payload.input.videoFileUrl, jobToken: job.jobToken, runnerToken })
+    }
+
+    const payload: TranscriptionSuccess = {
+      inputLanguage: 'ar',
+      vttFile: 'subtitle-good1.vtt'
+    }
+
+    await servers[0].runnerJobs.success({ runnerToken, jobUUID: job.uuid, jobToken: job.jobToken, payload })
+
+    await waitJobs(servers)
+
+    for (const server of servers) {
+      const video = await server.videos.get({ id: uuid })
+      expect(video.language.id).to.equal('ar')
+
+      const captions = await server.captions.list({ videoId: uuid })
+      expect(captions)
+    }
+
+    await checkPersistentTmpIsEmpty(servers[0])
+  })
+
+  it('Should not assign caption/language with an unknown inputLanguage', async function () {
+    this.timeout(240_000)
+
+    const { uuid, job } = await upload()
+
+    const payload: TranscriptionSuccess = {
+      inputLanguage: 'toto',
+      vttFile: 'subtitle-good1.vtt'
+    }
+
+    await servers[0].runnerJobs.success({ runnerToken, jobUUID: job.uuid, jobToken: job.jobToken, payload })
+
+    await waitJobs(servers)
+
+    for (const server of servers) {
+      const video = await server.videos.get({ id: uuid })
+      expect(video.language.id).to.be.null
+
+      const { total, data } = await server.captions.list({ videoId: uuid })
+      expect(total).to.equal(0)
+      expect(data).to.have.lengthOf(0)
+    }
+  })
+
+  after(async function () {
+    await cleanupTests(servers)
+  })
+})
--- a/packages/tests/src/api/server/config.ts
+++ b/packages/tests/src/api/server/config.ts
@ -108,6 +108,9 @@ function checkInitialConfig (server: PeerTubeServer, data: CustomConfig) {
  expect(data.videoStudio.enabled).to.be.false
  expect(data.videoStudio.remoteRunners.enabled).to.be.false

+  expect(data.videoTranscription.enabled).to.be.false
+  expect(data.videoTranscription.remoteRunners.enabled).to.be.false
+
  expect(data.videoFile.update.enabled).to.be.false

  expect(data.import.videos.concurrency).to.equal(2)
@ -292,6 +295,12 @@ function buildNewCustomConfig (server: PeerTubeServer): CustomConfig {
        enabled: true
      }
    },
+    videoTranscription: {
+      enabled: true,
+      remoteRunners: {
+        enabled: true
+      }
+    },
    videoFile: {
      update: {
        enabled: true
--- a/packages/tests/src/api/videos/index.ts
+++ b/packages/tests/src/api/videos/index.ts
@ -1,24 +1,25 @@
+import './channel-import-videos.js'
 import './multiple-servers.js'
 import './resumable-upload.js'
 import './single-server.js'
 import './video-captions.js'
 import './video-change-ownership.js'
+import './video-channel-syncs.js'
 import './video-channels.js'
 import './video-chapters.js'
-import './channel-import-videos.js'
-import './video-channel-syncs.js'
 import './video-comments.js'
 import './video-description.js'
 import './video-files.js'
 import './video-imports.js'
 import './video-nsfw.js'
-import './video-playlists.js'
 import './video-playlist-thumbnails.js'
-import './video-source.js'
+import './video-playlists.js'
 import './video-privacy.js'
 import './video-schedule-update.js'
+import './video-source.js'
+import './video-static-file-privacy.js'
+import './video-storyboard.js'
+import './video-transcription.js'
 import './videos-common-filters.js'
 import './videos-history.js'
 import './videos-overview.js'
-import './video-static-file-privacy.js'
-import './video-storyboard.js'
--- a/packages/tests/src/api/videos/video-transcription.ts
+++ b/packages/tests/src/api/videos/video-transcription.ts
@ -0,0 +1,145 @@
+/* eslint-disable @typescript-eslint/no-unused-expressions,@typescript-eslint/require-await */
+
+import { VideoPrivacy } from '@peertube/peertube-models'
+import {
+  PeerTubeServer,
+  cleanupTests,
+  createMultipleServers,
+  doubleFollow,
+  sendRTMPStream,
+  setAccessTokensToServers,
+  setDefaultVideoChannel,
+  stopFfmpeg,
+  waitJobs
+} from '@peertube/peertube-server-commands'
+import { FIXTURE_URLS } from '@tests/shared/fixture-urls.js'
+import { checkCaption, checkLanguage, checkNoCaption, uploadForTranscription } from '@tests/shared/transcription.js'
+
+describe('Test video transcription', function () {
+  let servers: PeerTubeServer[]
+
+  before(async function () {
+    this.timeout(60000)
+
+    servers = await createMultipleServers(2)
+
+    await setAccessTokensToServers(servers)
+    await setDefaultVideoChannel(servers)
+    await doubleFollow(servers[0], servers[1])
+
+    await waitJobs(servers)
+    await waitJobs(servers)
+  })
+
+  // ---------------------------------------------------------------------------
+
+  it('Should generate a transcription on request', async function () {
+    this.timeout(360000)
+
+    await servers[0].config.disableTranscription()
+
+    const uuid = await uploadForTranscription(servers[0])
+    await waitJobs(servers)
+    await checkLanguage(servers, uuid, null)
+
+    await servers[0].config.enableTranscription()
+
+    await servers[0].captions.runGenerate({ videoId: uuid })
+    await waitJobs(servers)
+    await checkLanguage(servers, uuid, 'en')
+
+    await checkCaption(servers, uuid)
+  })
+
+  it('Should run transcription on upload by default', async function () {
+    this.timeout(360000)
+
+    const uuid = await uploadForTranscription(servers[0])
+
+    await waitJobs(servers)
+    await checkCaption(servers, uuid)
+    await checkLanguage(servers, uuid, 'en')
+  })
+
+  it('Should run transcription on import by default', async function () {
+    this.timeout(360000)
+
+    const { video } = await servers[0].videoImports.importVideo({
+      attributes: {
+        privacy: VideoPrivacy.PUBLIC,
+        targetUrl: FIXTURE_URLS.transcriptionVideo,
+        language: undefined
+      }
+    })
+
+    await waitJobs(servers)
+    await checkCaption(servers, video.uuid)
+    await checkLanguage(servers, video.uuid, 'en')
+  })
+
+  it('Should run transcription when live ended', async function () {
+    this.timeout(360000)
+
+    await servers[0].config.enableMinimumTranscoding()
+    await servers[0].config.enableLive({ allowReplay: true, transcoding: true, resolutions: 'min' })
+
+    const { live, video } = await servers[0].live.quickCreate({
+      saveReplay: true,
+      permanentLive: false,
+      privacy: VideoPrivacy.PUBLIC
+    })
+
+    const ffmpegCommand = sendRTMPStream({ rtmpBaseUrl: live.rtmpUrl, streamKey: live.streamKey })
+    await servers[0].live.waitUntilPublished({ videoId: video.id })
+
+    await stopFfmpeg(ffmpegCommand)
+
+    await servers[0].live.waitUntilReplacedByReplay({ videoId: video.id })
+    await waitJobs(servers)
+    await checkCaption(servers, video.uuid, 'WEBVTT\n\n00:')
+    await checkLanguage(servers, video.uuid, 'en')
+
+    await servers[0].config.enableLive({ allowReplay: false })
+    await servers[0].config.disableTranscoding()
+  })
+
+  it('Should not run transcription if disabled by user', async function () {
+    this.timeout(120000)
+
+    {
+      const uuid = await uploadForTranscription(servers[0], { generateTranscription: false })
+
+      await waitJobs(servers)
+      await checkNoCaption(servers, uuid)
+      await checkLanguage(servers, uuid, null)
+    }
+
+    {
+      const { video } = await servers[0].videoImports.importVideo({
+        attributes: {
+          privacy: VideoPrivacy.PUBLIC,
+          targetUrl: FIXTURE_URLS.transcriptionVideo,
+          generateTranscription: false
+        }
+      })
+
+      await waitJobs(servers)
+      await checkNoCaption(servers, video.uuid)
+      await checkLanguage(servers, video.uuid, null)
+    }
+  })
+
+  it('Should not run a transcription if the video does not contain audio', async function () {
+    this.timeout(120000)
+
+    const uuid = await uploadForTranscription(servers[0], { generateTranscription: false })
+
+    await waitJobs(servers)
+    await checkNoCaption(servers, uuid)
+    await checkLanguage(servers, uuid, null)
+  })
+
+  after(async function () {
+    await cleanupTests(servers)
+  })
+})
--- a/packages/tests/src/jiwer/jiwer-cli.spec.ts
+++ b/packages/tests/src/jiwer/jiwer-cli.spec.ts
@ -3,7 +3,7 @@ import { buildAbsoluteFixturePath } from '@peertube/peertube-node-utils'
 import { join } from 'path'
 import { mkdir, rm, writeFile } from 'node:fs/promises'
 import { expect } from 'chai'
-import { JiwerClI } from '@peertube/peertube-jiwer'
+import { JiwerClI } from '@peertube/peertube-transcription-devtools'

 describe('Jiwer CLI', function () {
  const transcriptDirectory = buildAbsoluteFixturePath('transcription/transcript-evaluator')
--- a/packages/tests/src/peertube-runner/index.ts
+++ b/packages/tests/src/peertube-runner/index.ts
@ -2,4 +2,5 @@ export * from './client-cli.js'
 export * from './live-transcoding.js'
 export * from './replace-file.js'
 export * from './studio-transcoding.js'
+export * from './video-transcription.js'
 export * from './vod-transcoding.js'
--- a/packages/tests/src/peertube-runner/live-transcoding.ts
+++ b/packages/tests/src/peertube-runner/live-transcoding.ts
@ -194,7 +194,7 @@ describe('Test Live transcoding in peertube-runner program', function () {
  describe('Check cleanup', function () {

    it('Should have an empty cache directory', async function () {
-      await checkPeerTubeRunnerCacheIsEmpty(peertubeRunner)
+      await checkPeerTubeRunnerCacheIsEmpty(peertubeRunner, 'transcoding')
    })
  })

--- a/packages/tests/src/peertube-runner/studio-transcoding.ts
+++ b/packages/tests/src/peertube-runner/studio-transcoding.ts
@ -111,7 +111,7 @@ describe('Test studio transcoding in peertube-runner program', function () {
  describe('Check cleanup', function () {

    it('Should have an empty cache directory', async function () {
-      await checkPeerTubeRunnerCacheIsEmpty(peertubeRunner)
+      await checkPeerTubeRunnerCacheIsEmpty(peertubeRunner, 'transcoding')
    })
  })

--- a/packages/tests/src/peertube-runner/video-transcription.ts
+++ b/packages/tests/src/peertube-runner/video-transcription.ts
@ -0,0 +1,89 @@
+/* eslint-disable @typescript-eslint/no-unused-expressions,@typescript-eslint/require-await */
+
+import { wait } from '@peertube/peertube-core-utils'
+import { RunnerJobState } from '@peertube/peertube-models'
+import {
+  PeerTubeServer,
+  cleanupTests,
+  createMultipleServers,
+  doubleFollow,
+  setAccessTokensToServers,
+  setDefaultVideoChannel,
+  waitJobs
+} from '@peertube/peertube-server-commands'
+import { checkPeerTubeRunnerCacheIsEmpty } from '@tests/shared/directories.js'
+import { PeerTubeRunnerProcess } from '@tests/shared/peertube-runner-process.js'
+import { checkCaption, checkLanguage, checkNoCaption, uploadForTranscription } from '@tests/shared/transcription.js'
+
+describe('Test transcription in peertube-runner program', function () {
+  let servers: PeerTubeServer[] = []
+  let peertubeRunner: PeerTubeRunnerProcess
+
+  before(async function () {
+    this.timeout(120_000)
+
+    servers = await createMultipleServers(2)
+
+    await setAccessTokensToServers(servers)
+    await setDefaultVideoChannel(servers)
+
+    await doubleFollow(servers[0], servers[1])
+
+    await servers[0].config.enableTranscription({ remote: true })
+
+    const registrationToken = await servers[0].runnerRegistrationTokens.getFirstRegistrationToken()
+
+    peertubeRunner = new PeerTubeRunnerProcess(servers[0])
+    await peertubeRunner.runServer()
+    await peertubeRunner.registerPeerTubeInstance({ registrationToken, runnerName: 'runner' })
+  })
+
+  describe('Running transcription', function () {
+
+    it('Should run transcription on classic file', async function () {
+      this.timeout(360000)
+
+      const uuid = await uploadForTranscription(servers[0])
+      await waitJobs(servers, { runnerJobs: true })
+
+      await checkCaption(servers, uuid)
+      await checkLanguage(servers, uuid, 'en')
+    })
+
+    it('Should not run transcription on video without audio stream', async function () {
+      this.timeout(360000)
+
+      const uuid = await uploadForTranscription(servers[0], { fixture: 'video_short_no_audio.mp4' })
+
+      await waitJobs(servers)
+
+      let continueWhile = true
+      while (continueWhile) {
+        await wait(500)
+
+        const { data } = await servers[0].runnerJobs.list({ stateOneOf: [ RunnerJobState.ERRORED ] })
+
+        continueWhile = !data.some(j => j.type === 'video-transcription')
+      }
+
+      await checkNoCaption(servers, uuid)
+      await checkLanguage(servers, uuid, null)
+    })
+  })
+
+  describe('Check cleanup', function () {
+
+    it('Should have an empty cache directory', async function () {
+      await checkPeerTubeRunnerCacheIsEmpty(peertubeRunner, 'transcription')
+    })
+  })
+
+  after(async function () {
+    if (peertubeRunner) {
+      await peertubeRunner.unregisterPeerTubeInstance({ runnerName: 'runner' })
+      peertubeRunner.kill()
+    }
+
+    await cleanupTests(servers)
+  })
+})
--- a/packages/tests/src/peertube-runner/vod-transcoding.ts
+++ b/packages/tests/src/peertube-runner/vod-transcoding.ts
@ -373,7 +373,7 @@ describe('Test VOD transcoding in peertube-runner program', function () {
  describe('Check cleanup', function () {

    it('Should have an empty cache directory', async function () {
-      await checkPeerTubeRunnerCacheIsEmpty(peertubeRunner)
+      await checkPeerTubeRunnerCacheIsEmpty(peertubeRunner, 'transcoding')
    })
  })

--- a/packages/tests/src/shared/directories.ts
+++ b/packages/tests/src/shared/directories.ts
@ -32,13 +32,13 @@ export async function checkDirectoryIsEmpty (server: PeerTubeServer, directory:
  expect(filtered).to.have.lengthOf(0)
 }

-export async function checkPeerTubeRunnerCacheIsEmpty (runner: PeerTubeRunnerProcess) {
-  const directoryPath = join(homedir(), '.cache', 'peertube-runner-nodejs', runner.getId(), 'transcoding')
+export async function checkPeerTubeRunnerCacheIsEmpty (runner: PeerTubeRunnerProcess, subDir: 'transcoding' | 'transcription') {
+  const directoryPath = join(homedir(), '.cache', 'peertube-runner-nodejs', runner.getId(), subDir)

  const directoryExists = await pathExists(directoryPath)
  expect(directoryExists).to.be.true

  const files = await readdir(directoryPath)

-  expect(files, 'Directory content: ' + files.join(', ')).to.have.lengthOf(0)
+  expect(files, `Sub-directory ${subDir} content: ${files.join(', ')}`).to.have.lengthOf(0)
 }
--- a/packages/tests/src/shared/fixture-urls.ts
+++ b/packages/tests/src/shared/fixture-urls.ts
@ -26,6 +26,7 @@ export const FIXTURE_URLS = {
  badVideo: 'https://download.cpy.re/peertube/bad_video.mp4',
  goodVideo: 'https://download.cpy.re/peertube/good_video.mp4',
  goodVideo720: 'https://download.cpy.re/peertube/good_video_720.mp4',
+  transcriptionVideo: 'https://download.cpy.re/peertube/the_last_man_on_earth.mp4',

  chatersVideo: 'https://download.cpy.re/peertube/video_chapters.mp4',

--- a/packages/tests/src/shared/notifications.ts
+++ b/packages/tests/src/shared/notifications.ts
@ -54,6 +54,7 @@ function getAllNotificationsSettings (): UserNotificationSetting {
    autoInstanceFollowing: UserNotificationSettingValue.WEB | UserNotificationSettingValue.EMAIL,
    newPeerTubeVersion: UserNotificationSettingValue.WEB | UserNotificationSettingValue.EMAIL,
    myVideoStudioEditionFinished: UserNotificationSettingValue.WEB | UserNotificationSettingValue.EMAIL,
+    myVideoTranscriptionGenerated: UserNotificationSettingValue.WEB | UserNotificationSettingValue.EMAIL,
    newPluginVersion: UserNotificationSettingValue.WEB | UserNotificationSettingValue.EMAIL
  }
 }
@ -758,6 +759,40 @@ async function checkNewPluginVersion (options: CheckerBaseParams & {
  await checkNotification({ ...options, notificationChecker, emailNotificationFinder })
 }

+async function checkMyVideoTranscriptionGenerated (options: CheckerBaseParams & {
+  videoName: string
+  shortUUID: string
+  language: {
+    id: string
+    label: string
+  }
+  checkType: CheckerType
+}) {
+  const { videoName, shortUUID, language } = options
+  const notificationType = UserNotificationType.MY_VIDEO_TRANSCRIPTION_GENERATED
+
+  function notificationChecker (notification: UserNotification, checkType: CheckerType) {
+    if (checkType === 'presence') {
+      expect(notification).to.not.be.undefined
+      expect(notification.type).to.equal(notificationType)
+
+      expect(notification.videoCaption).to.exist
+      expect(notification.videoCaption.language.id).to.equal(language.id)
+      expect(notification.videoCaption.language.label).to.equal(language.label)
+      checkVideo(notification.videoCaption.video, videoName, shortUUID)
+    } else {
+      expect(notification.videoCaption).to.satisfy(c => c === undefined || c.Video.shortUUID !== shortUUID)
+    }
+  }
+
+  function emailNotificationFinder (email: object) {
+    const text: string = email['text']
+    return text.includes(shortUUID) && text.includes('Transcription in ' + language.label)
+  }
+
+  await checkNotification({ ...options, notificationChecker, emailNotificationFinder })
+}
+
 async function prepareNotificationsTest (serversCount = 3, overrideConfigArg: any = {}) {
  const userNotifications: UserNotification[] = []
  const adminNotifications: UserNotification[] = []
@ -863,7 +898,8 @@ export {
  checkNewPeerTubeVersion,
  checkNewPluginVersion,
  checkVideoStudioEditionIsFinished,
-  checkRegistrationRequest
+  checkRegistrationRequest,
+  checkMyVideoTranscriptionGenerated
 }

 // ---------------------------------------------------------------------------
--- a/packages/tests/src/shared/transcription.ts
+++ b/packages/tests/src/shared/transcription.ts
@ -0,0 +1,81 @@
+/* eslint-disable @typescript-eslint/no-unused-expressions,@typescript-eslint/require-await */
+
+import { buildAbsoluteFixturePath } from '@peertube/peertube-node-utils'
+import { PeerTubeServer, VideoEdit } from '@peertube/peertube-server-commands'
+import { downloadFile, unzip } from '@peertube/peertube-transcription-devtools'
+import { expect } from 'chai'
+import { ensureDir, pathExists } from 'fs-extra/esm'
+import { join } from 'path'
+import { testCaptionFile } from './captions.js'
+import { FIXTURE_URLS } from './fixture-urls.js'
+
+type CustomModelName = 'tiny.pt' | 'faster-whisper-tiny'
+
+export async function downloadCustomModelsIfNeeded (modelName: CustomModelName) {
+  if (await pathExists(getCustomModelPath(modelName))) return
+
+  await ensureDir(getCustomModelDirectory())
+  await unzip(await downloadFile(FIXTURE_URLS.transcriptionModels, getCustomModelDirectory()))
+}
+
+export function getCustomModelDirectory () {
+  return buildAbsoluteFixturePath(join('transcription', 'models-v1'))
+}
+
+export function getCustomModelPath (modelName: CustomModelName) {
+  return join(getCustomModelDirectory(), 'models', modelName)
+}
+
+// ---------------------------------------------------------------------------
+
+export async function checkCaption (servers: PeerTubeServer[], uuid: string, captionContains = 'WEBVTT\n\n00:00.000 --> 00:') {
+  for (const server of servers) {
+    const body = await server.captions.list({ videoId: uuid })
+    expect(body.total).to.equal(1)
+    expect(body.data).to.have.lengthOf(1)
+
+    const caption = body.data[0]
+    expect(caption.language.id).to.equal('en')
+    expect(caption.language.label).to.equal('English')
+
+    {
+      await testCaptionFile(server.url, caption.captionPath, captionContains)
+    }
+  }
+}
+
+export async function checkNoCaption (servers: PeerTubeServer[], uuid: string) {
+  for (const server of servers) {
+    const body = await server.captions.list({ videoId: uuid })
+    expect(body.total).to.equal(0)
+    expect(body.data).to.have.lengthOf(0)
+  }
+}
+
+// ---------------------------------------------------------------------------
+
+export async function checkLanguage (servers: PeerTubeServer[], uuid: string, expected: string | null) {
+  for (const server of servers) {
+    const video = await server.videos.get({ id: uuid })
+
+    if (expected) {
+      expect(video.language.id).to.equal(expected)
+    } else {
+      expect(video.language.id).to.be.null
+    }
+  }
+}
+
+export async function uploadForTranscription (server: PeerTubeServer, body: Partial<VideoEdit> = {}) {
+  const { uuid } = await server.videos.upload({
+    attributes: {
+      name: 'video',
+      fixture: join('transcription', 'videos', 'the_last_man_on_earth.mp4'),
+      language: undefined,
+
+      ...body
+    }
+  })
+
+  return uuid
+}
--- a/packages/tests/src/transcription/levenshtein-distance.spec.ts
+++ b/packages/tests/src/transcription/levenshtein-distance.spec.ts
@ -1,5 +1,5 @@
+import { levenshteinDistance } from '@peertube/peertube-transcription-devtools'
 import { expect } from 'chai'
-import { levenshteinDistance } from '@peertube/peertube-transcription'

 describe('Levenshtein distance', function () {
  it(`equals 1 when there is only one character difference`, function () {
--- a/packages/tests/src/transcription/transcriber-factory.spec.ts
+++ b/packages/tests/src/transcription/transcriber-factory.spec.ts
@ -1,17 +1,16 @@
-import { transcriberFactory } from '@peertube/peertube-transcription'
+import { TranscriptionEngineName, transcriberFactory } from '@peertube/peertube-transcription'
+import { createLogger } from 'winston'

 describe('Transcriber factory', function () {
-  const transcribers = [
-    'openai-whisper',
-    'whisper-ctranslate2',
-    'whisper-timestamped'
-  ]
+  const transcribers: TranscriptionEngineName[] = [ 'openai-whisper', 'whisper-ctranslate2' ]

  describe('Should be able to create a transcriber for each available transcription engine', function () {
-    transcribers.forEach(function (transcriberName) {
+
+    for (const transcriberName of transcribers) {
      it(`Should be able to create a(n) ${transcriberName} transcriber`, function () {
-        transcriberFactory.createFromEngineName(transcriberName)
+        transcriberFactory.createFromEngineName({ engineName: transcriberName, logger: createLogger() })
      })
-    })
+    }
+
  })
 })
--- a/packages/tests/src/transcription/transcript/transcript-file-evaluator.spec.ts
+++ b/packages/tests/src/transcription/transcript/transcript-file-evaluator.spec.ts
@ -1,17 +1,18 @@
 /* eslint-disable @typescript-eslint/no-unused-expressions, no-new, max-len */
-import { TranscriptFile, TranscriptFileEvaluator } from '@peertube/peertube-transcription'
 import { buildAbsoluteFixturePath } from '@peertube/peertube-node-utils'
-import { join } from 'node:path'
-import { mkdir, rm } from 'node:fs/promises'
-import { tmpdir } from 'node:os'
+import { TranscriptFile } from '@peertube/peertube-transcription'
+import { TranscriptFileEvaluator } from '@peertube/peertube-transcription-devtools'
 import { expect } from 'chai'
+import { ensureDir, remove } from 'fs-extra/esm'
+import { tmpdir } from 'node:os'
+import { join } from 'node:path'

 describe('Transcript File Evaluator', function () {
  const transcriptDirectory = join(tmpdir(), 'peertube-transcription', 'transcript-file-evaluator')
  const referenceTranscriptFilePath = buildAbsoluteFixturePath('transcription/videos/communiquer-lors-dune-classe-transplantee.txt')

  before(async function () {
-    await mkdir(transcriptDirectory, { recursive: true })
+    await ensureDir(transcriptDirectory)
  })

  it(`may not compare files in another format than txt`, async function () {
@ -62,6 +63,6 @@ Ensuite, il pourront lire et commenter ce de leurs camarades ou répondre aux co
  })

  after(async function () {
-    await rm(transcriptDirectory, { recursive: true, force: true })
+    await remove(transcriptDirectory)
  })
 })
--- a/packages/tests/src/transcription/transcript/transcript-file.spec.ts
+++ b/packages/tests/src/transcription/transcript/transcript-file.spec.ts
@ -1,15 +1,16 @@
 /* eslint-disable @typescript-eslint/no-unused-expressions */
-import { expect } from 'chai'
-import { join } from 'node:path'
-import { mkdir, rm } from 'node:fs/promises'
-import { TranscriptFile } from '@peertube/peertube-transcription'
-import { tmpdir } from 'node:os'
 import { buildAbsoluteFixturePath } from '@peertube/peertube-node-utils'
+import { TranscriptFile } from '@peertube/peertube-transcription'
+import { expect } from 'chai'
+import { ensureDir, remove } from 'fs-extra/esm'
+import { tmpdir } from 'node:os'
+import { join } from 'node:path'

 describe('Transcript File', function () {
  const transcriptFileDirectory = join(tmpdir(), 'peertube-transcription', 'transcript-file')
+
  before(async function () {
-    await mkdir(transcriptFileDirectory, { recursive: true })
+    await ensureDir(transcriptFileDirectory)
  })

  it(`may creates a new transcript file from scratch`, async function () {
@ -33,12 +34,11 @@ describe('Transcript File', function () {
  })

  it(`fails when loading a file which is obviously not a transcript`, function () {
-
    expect(() => TranscriptFile.fromPath(buildAbsoluteFixturePath('transcription/videos/the_last_man_on_earth.mp4'), 'en'))
      .to.throw(`Couldn't guess transcript format from extension "mp4". Valid formats are: txt, vtt, srt.`)
  })

  after(async function () {
-    await rm(transcriptFileDirectory, { recursive: true, force: true })
+    await remove(transcriptFileDirectory)
  })
 })
--- a/packages/tests/src/transcription/transcription-run.spec.ts
+++ b/packages/tests/src/transcription/transcription-run.spec.ts
@ -1 +0,0 @@
-describe('Transcription run', function () {})
--- a/packages/tests/src/transcription/utils.spec.ts
+++ b/packages/tests/src/transcription/utils.spec.ts
@ -1,14 +1,16 @@
-import { cp, lstat, mkdir, rm } from 'node:fs/promises'
-import { join } from 'node:path'
-import { tmpdir } from 'node:os'
-import { expect } from 'chai'
-import { downloadFile, unzip } from '@peertube/peertube-transcription'
 import { buildAbsoluteFixturePath } from '@peertube/peertube-node-utils'
+import { downloadFile, unzip } from '@peertube/peertube-transcription-devtools'
+import { expect } from 'chai'
+import { ensureDir, remove } from 'fs-extra/esm'
+import { cp, lstat } from 'node:fs/promises'
+import { tmpdir } from 'node:os'
+import { join } from 'node:path'

 describe('downloadFile', function () {
  const testDirectory = join(tmpdir(), 'peertube-transcription', 'utils')
+
  before(async function () {
-    await mkdir(testDirectory, { recursive: true })
+    await ensureDir(testDirectory)
  })

  it(`Downloads a file and write it to the disk `, async function () {
@ -18,7 +20,7 @@ describe('downloadFile', function () {
  })

  after(async function () {
-    await rm(testDirectory, { recursive: true, force: true })
+    await remove(testDirectory)
  })
 })

@ -26,8 +28,9 @@ describe('unzip', function () {
  const zipFixtureFileName = 'hello_world.zip'
  const zipFixtureFilePath = buildAbsoluteFixturePath(`transcription/${zipFixtureFileName}`)
  const testDirectory = join(tmpdir(), 'peertube-transcription', 'utils')
+
  before(async function () {
-    await mkdir(testDirectory, { recursive: true })
+    await ensureDir(testDirectory)
  })

  it(`Extract zip archive to directory`, async function () {
@ -39,6 +42,6 @@ describe('unzip', function () {
  })

  after(async function () {
-    await rm(testDirectory, { recursive: true, force: true })
+    await remove(testDirectory)
  })
 })
--- a/packages/tests/src/transcription/whisper/transcriber/openai-transcriber.spec.ts
+++ b/packages/tests/src/transcription/whisper/transcriber/openai-transcriber.spec.ts
@ -1,57 +1,59 @@
 /* eslint-disable @typescript-eslint/no-unused-expressions, max-len */
-import { expect, config } from 'chai'
-import { createLogger } from 'winston'
-import { join } from 'node:path'
-import { mkdir, rm } from 'node:fs/promises'
-import { tmpdir } from 'node:os'
 import { buildAbsoluteFixturePath } from '@peertube/peertube-node-utils'
 import {
-  downloadFile,
-  levenshteinDistance,
  OpenaiTranscriber,
  TranscriptFile,
-  TranscriptFileEvaluator,
  TranscriptionModel,
-  unzip,
  WhisperBuiltinModel
 } from '@peertube/peertube-transcription'
-import { FIXTURE_URLS } from '@tests/shared/fixture-urls.js'
+import { TranscriptFileEvaluator, levenshteinDistance } from '@peertube/peertube-transcription-devtools'
+import { downloadCustomModelsIfNeeded, getCustomModelPath } from '@tests/shared/transcription.js'
+import { config, expect } from 'chai'
+import { ensureDir, remove } from 'fs-extra/esm'
+import { tmpdir } from 'node:os'
+import { join } from 'node:path'
+import { createLogger } from 'winston'

 config.truncateThreshold = 0

 describe('Open AI Whisper transcriber', function () {
  const tmpDirectory = join(tmpdir(), 'peertube-transcription')
  const transcriptDirectory = join(tmpDirectory, 'transcriber', 'openai')
-  const modelsDirectory = join(tmpDirectory, 'models')
+
  const shortVideoPath = buildAbsoluteFixturePath('transcription/videos/the_last_man_on_earth.mp4')
  const frVideoPath = buildAbsoluteFixturePath('transcription/videos/derive_sectaire.mp4')
+
  const referenceTranscriptFile = new TranscriptFile({
    path: buildAbsoluteFixturePath('transcription/videos/derive_sectaire.txt'),
    language: 'fr',
    format: 'txt'
  })
-  const transcriber = new OpenaiTranscriber(
-    {
+
+  const transcriber = new OpenaiTranscriber({
+    engine: {
      name: 'openai-whisper',
-      requirements: [],
      type: 'binary',
-      binary: 'whisper',
+      command: 'whisper',
      supportedModelFormats: [ 'PyTorch' ],
-      languageDetection: true
+      languageDetection: true,
+      version: ''
    },
-    createLogger(),
-    transcriptDirectory
-  )
+    logger: createLogger()
+  })
+  const model = new TranscriptionModel('tiny')

  before(async function () {
-    this.timeout(1 * 1000 * 60)
-    await mkdir(transcriptDirectory, { recursive: true })
-    await unzip(await downloadFile(FIXTURE_URLS.transcriptionModels, tmpDirectory))
+    this.timeout(120000)
+
+    await ensureDir(transcriptDirectory)
+
+    await downloadCustomModelsIfNeeded('tiny.pt')
  })

-  it('Should transcribe a media file and provide a valid path to a transcript file in `vtt` format by default', async function () {
+  it('Should transcribe a media file and provide a valid path to a transcript file in `vtt` format', async function () {
    this.timeout(3 * 1000 * 60)
-    const transcript = await transcriber.transcribe({ mediaFilePath: shortVideoPath, language: 'en' })
+
+    const transcript = await transcriber.transcribe({ mediaFilePath: shortVideoPath, language: 'en', format: 'vtt', model, transcriptDirectory })

    expect(transcript.format).to.equals('vtt')
    expect(transcript.language).to.equals('en')
@ -59,7 +61,7 @@ describe('Open AI Whisper transcriber', function () {
  })

  it('May produce a transcript file in the `srt` format', async function () {
-    const transcript = await transcriber.transcribe({ mediaFilePath: shortVideoPath, language: 'en', format: 'srt' })
+    const transcript = await transcriber.transcribe({ mediaFilePath: shortVideoPath, language: 'en', format: 'srt', model, transcriptDirectory })

    expect(transcript.format).to.equals('srt')
    expect(transcript.language).to.equals('en')
@ -67,7 +69,7 @@ describe('Open AI Whisper transcriber', function () {
  })

  it('May produce a transcript file in the `txt` format', async function () {
-    const transcript = await transcriber.transcribe({ mediaFilePath: shortVideoPath, language: 'en', format: 'txt' })
+    const transcript = await transcriber.transcribe({ mediaFilePath: shortVideoPath, language: 'en', format: 'txt', model, transcriptDirectory })

    expect(transcript.format).to.equals('txt')
    expect(transcript.language).to.equals('en')
@ -80,16 +82,20 @@ describe('Open AI Whisper transcriber', function () {

  it('May transcribe a media file using a local PyTorch model', async function () {
    this.timeout(2 * 1000 * 60)
+
    await transcriber.transcribe({
      mediaFilePath: shortVideoPath,
-      model: await TranscriptionModel.fromPath(join(modelsDirectory, 'tiny.pt')),
-      language: 'en'
+      model: await TranscriptionModel.fromPath(getCustomModelPath('tiny.pt')),
+      language: 'en',
+      format: 'vtt',
+      transcriptDirectory
    })
  })

  it('May transcribe a media file in french', async function () {
    this.timeout(3 * 1000 * 60)
-    const transcript = await transcriber.transcribe({ mediaFilePath: frVideoPath, language: 'fr', format: 'txt' })
+
+    const transcript = await transcriber.transcribe({ mediaFilePath: frVideoPath, language: 'fr', format: 'txt', model, transcriptDirectory })

    expect(transcript.format).to.equals('txt')
    expect(transcript.language).to.equals('fr')
@ -98,18 +104,21 @@ describe('Open AI Whisper transcriber', function () {

  it('Guesses the video language if not provided', async function () {
    this.timeout(3 * 1000 * 60)
-    const transcript = await transcriber.transcribe({ mediaFilePath: frVideoPath })
+
+    const transcript = await transcriber.transcribe({ mediaFilePath: frVideoPath, model, format: 'vtt', transcriptDirectory })

    expect(transcript.language).to.equals('fr')
  })

-  it('May transcribe a media file in french with small model', async function () {
+  it('May transcribe a media file in french with small model (can be long)', async function () {
    this.timeout(6 * 1000 * 60)
+
    const transcript = await transcriber.transcribe({
      mediaFilePath: frVideoPath,
      language: 'fr',
      format: 'txt',
-      model: new WhisperBuiltinModel('small')
+      model: new WhisperBuiltinModel('small'),
+      transcriptDirectory
    })

    expect(transcript.language).to.equals('fr')
@ -120,6 +129,6 @@ describe('Open AI Whisper transcriber', function () {
  })

  after(async function () {
-    await rm(transcriptDirectory, { recursive: true, force: true })
+    await remove(transcriptDirectory)
  })
 })
--- a/packages/tests/src/transcription/whisper/transcriber/timestamped-transcriber.spec.ts
+++ b/packages/tests/src/transcription/whisper/transcriber/timestamped-transcriber.spec.ts
@ -1,133 +0,0 @@
-/* eslint-disable @typescript-eslint/no-unused-expressions, max-len */
-import { expect, config } from 'chai'
-import { createLogger } from 'winston'
-import { join } from 'node:path'
-import { mkdir, rm } from 'node:fs/promises'
-import { tmpdir } from 'node:os'
-import { buildAbsoluteFixturePath } from '@peertube/peertube-node-utils'
-import {
-  OpenaiTranscriber,
-  WhisperTimestampedTranscriber,
-  TranscriptFileEvaluator,
-  TranscriptionModel,
-  WhisperTranscribeArgs,
-  levenshteinDistance, downloadFile, unzip
-} from '@peertube/peertube-transcription'
-import { FIXTURE_URLS } from '@tests/shared/fixture-urls.js'
-
-config.truncateThreshold = 0
-
-describe('Linto timestamped Whisper transcriber', function () {
-  const tmpDirectory = join(tmpdir(), 'peertube-transcription')
-  const transcriptDirectory = join(tmpDirectory, 'transcriber', 'timestamped')
-  const modelsDirectory = join(tmpDirectory, 'models')
-  const shortVideoPath = buildAbsoluteFixturePath('transcription/videos/the_last_man_on_earth.mp4')
-  const frVideoPath = buildAbsoluteFixturePath('transcription/videos/derive_sectaire.mp4')
-  const transcriber = new WhisperTimestampedTranscriber(
-    {
-      name: 'whisper-timestamped',
-      requirements: [],
-      type: 'binary',
-      binary: 'whisper_timestamped',
-      supportedModelFormats: [ 'PyTorch' ],
-      languageDetection: true
-    },
-    createLogger(),
-    transcriptDirectory
-  )
-
-  before(async function () {
-    this.timeout(1 * 1000 * 60)
-    await mkdir(transcriptDirectory, { recursive: true })
-    await unzip(await downloadFile(FIXTURE_URLS.transcriptionModels, tmpDirectory))
-  })
-
-  it('Should transcribe a media file and provide a valid path to a transcript file in `vtt` format by default', async function () {
-    this.timeout(1 * 1000 * 60)
-    const transcript = await transcriber.transcribe({ mediaFilePath: shortVideoPath, language: 'en' })
-
-    expect(transcript.format).to.equals('vtt')
-    expect(transcript.language).to.equals('en')
-    expect(await transcript.read()).not.to.be.empty
-  })
-
-  it('May produce a transcript file in the `srt` format with a ms precision', async function () {
-    const transcript = await transcriber.transcribe({ mediaFilePath: shortVideoPath, language: 'en', format: 'srt' })
-
-    expect(transcript.format).to.equals('srt')
-    expect(transcript.language).to.equals('en')
-    expect(await transcript.read()).not.to.be.empty
-  })
-
-  it('May produce a transcript file in `txt` format', async function () {
-    const transcript = await transcriber.transcribe({ mediaFilePath: shortVideoPath, language: 'en', format: 'txt' })
-
-    expect(transcript.format).to.equals('txt')
-    expect(transcript.language).to.equals('en')
-    expect(await transcript.read()).not.to.be.empty
-    expect(levenshteinDistance(
-      (await transcript.read()).toString(),
-      'December 1965, is that all it has been since I inherited the world only three years, seems like a hundred million.'
-    )).to.be.below(10)
-  })
-
-  it('May transcribe a media file using a local PyTorch model file', async function () {
-    this.timeout(2 * 1000 * 60)
-    await transcriber.transcribe({
-      mediaFilePath: shortVideoPath,
-      model: await TranscriptionModel.fromPath(join(modelsDirectory, 'tiny.pt')),
-      language: 'en'
-    })
-  })
-
-  it('May transcribe a media file in french', async function () {
-    this.timeout(2 * 1000 * 60)
-    const transcript = await transcriber.transcribe({
-      mediaFilePath: frVideoPath,
-      language: 'fr',
-      format: 'txt'
-    })
-
-    expect(transcript.format).to.equals('txt')
-    expect(transcript.language).to.equals('fr')
-    expect(await transcript.read()).not.to.be.empty
-  })
-
-  it('Guesses the video language if not provided', async function () {
-    this.timeout(2 * 1000 * 60)
-    const transcript = await transcriber.transcribe({ mediaFilePath: frVideoPath })
-    expect(transcript.language).to.equals('fr')
-  })
-
-  it('Should produce a text transcript similar to openai-whisper implementation', async function () {
-    this.timeout(11 * 1000 * 60)
-    const transcribeArgs: WhisperTranscribeArgs = {
-      mediaFilePath: frVideoPath,
-      model: await TranscriptionModel.fromPath(join(modelsDirectory, 'tiny.pt')),
-      language: 'fr',
-      format: 'txt'
-    }
-    const transcript = await transcriber.transcribe(transcribeArgs)
-
-    const openaiTranscriber = new OpenaiTranscriber(
-      {
-        name: 'openai-whisper',
-        requirements: [],
-        type: 'binary',
-        binary: 'whisper',
-        supportedModelFormats: [ 'PyTorch' ]
-      },
-      createLogger(),
-      join(transcriptDirectory, 'openai-whisper')
-    )
-    const openaiTranscript = await openaiTranscriber.transcribe(transcribeArgs)
-
-    const transcriptFileEvaluator = new TranscriptFileEvaluator(openaiTranscript, transcript)
-    expect(await transcriptFileEvaluator.wer()).to.be.below(25 / 100)
-    expect(await transcriptFileEvaluator.cer()).to.be.below(15 / 100)
-  })
-
-  after(async function () {
-    await rm(transcriptDirectory, { recursive: true, force: true })
-  })
-})
--- a/packages/tests/src/transcription/whisper/transcriber/whisper-ctranslate2.spec.ts
+++ b/packages/tests/src/transcription/whisper/transcriber/whisper-ctranslate2.spec.ts
@ -1,50 +1,58 @@
 /* eslint-disable @typescript-eslint/no-unused-expressions, max-len */
-import { expect, config } from 'chai'
-import { createLogger } from 'winston'
-import { join } from 'node:path'
-import { mkdir, rm } from 'node:fs/promises'
-import { tmpdir } from 'node:os'
 import { buildAbsoluteFixturePath } from '@peertube/peertube-node-utils'
 import {
-  Ctranslate2Transcriber, downloadFile,
-  levenshteinDistance,
+  Ctranslate2Transcriber,
  OpenaiTranscriber,
  TranscriptFile,
-  TranscriptFileEvaluator,
-  TranscriptionModel, unzip,
-  WhisperTranscribeArgs
+  TranscriptionModel
 } from '@peertube/peertube-transcription'
-import { FIXTURE_URLS } from '@tests/shared/fixture-urls.js'
+import { TranscriptFileEvaluator, levenshteinDistance } from '@peertube/peertube-transcription-devtools'
+import { downloadCustomModelsIfNeeded, getCustomModelPath } from '@tests/shared/transcription.js'
+import { config, expect } from 'chai'
+import { ensureDir, remove } from 'fs-extra/esm'
+import { tmpdir } from 'node:os'
+import { join } from 'node:path'
+import { createLogger } from 'winston'

 config.truncateThreshold = 0

 describe('Whisper CTranslate2 transcriber', function () {
  const tmpDirectory = join(tmpdir(), 'peertube-transcription')
  const transcriptDirectory = join(tmpDirectory, 'transcriber', 'ctranslate2')
-  const modelsDirectory = join(tmpDirectory, 'models')
+
  const shortVideoPath = buildAbsoluteFixturePath('transcription/videos/the_last_man_on_earth.mp4')
  const frVideoPath = buildAbsoluteFixturePath('transcription/videos/derive_sectaire.mp4')
-  const transcriber = new Ctranslate2Transcriber(
-    {
-      name: 'anyNameShouldBeFineReally',
-      requirements: [],
-      type: 'binary',
-      binary: 'whisper-ctranslate2',
-      supportedModelFormats: [],
-      languageDetection: true
-    },
-    createLogger(),
-    transcriptDirectory
-  )

-  before(async function () {
-    this.timeout(1 * 1000 * 60)
-    await mkdir(transcriptDirectory, { recursive: true })
-    await unzip(await downloadFile(FIXTURE_URLS.transcriptionModels, tmpDirectory))
+  const transcriber = new Ctranslate2Transcriber({
+    engine: {
+      name: 'whisper-ctranslate2',
+      type: 'binary',
+      command: 'whisper-ctranslate2',
+      supportedModelFormats: [ 'CTranslate2' ],
+      languageDetection: true,
+      version: '0.4.4'
+    },
+    logger: createLogger()
  })

-  it('Should transcribe a media file and provide a valid path to a transcript file in `vtt` format by default', async function () {
-    const transcript = await transcriber.transcribe({ mediaFilePath: shortVideoPath, language: 'en' })
+  const model = new TranscriptionModel('tiny')
+
+  before(async function () {
+    this.timeout(120000)
+
+    await ensureDir(transcriptDirectory)
+
+    await downloadCustomModelsIfNeeded('faster-whisper-tiny')
+  })
+
+  it('Should transcribe a media file and provide a valid path to a transcript file in `vtt` format', async function () {
+    const transcript = await transcriber.transcribe({
+      mediaFilePath: shortVideoPath,
+      language: 'en',
+      format: 'vtt',
+      model,
+      transcriptDirectory
+    })

    expect(transcript.format).to.equals('vtt')
    expect(transcript.language).to.equals('en')
@ -52,7 +60,13 @@ describe('Whisper CTranslate2 transcriber', function () {
  })

  it('May produce a transcript file in the `srt` format', async function () {
-    const transcript = await transcriber.transcribe({ mediaFilePath: shortVideoPath, language: 'en', format: 'srt' })
+    const transcript = await transcriber.transcribe({
+      mediaFilePath: shortVideoPath,
+      language: 'en',
+      format: 'srt',
+      model,
+      transcriptDirectory
+    })

    expect(transcript.format).to.equals('srt')
    expect(transcript.language).to.equals('en')
@ -60,7 +74,13 @@ describe('Whisper CTranslate2 transcriber', function () {
  })

  it('May produce a transcript file in the `txt` format', async function () {
-    const transcript = await transcriber.transcribe({ mediaFilePath: shortVideoPath, language: 'en', format: 'txt' })
+    const transcript = await transcriber.transcribe({
+      mediaFilePath: shortVideoPath,
+      language: 'en',
+      format: 'txt',
+      model,
+      transcriptDirectory
+    })
    expect(await transcript.equals(new TranscriptFile({
      path: join(transcriptDirectory, 'the_last_man_on_earth.txt'),
      format: 'txt',
@ -78,10 +98,12 @@ describe('Whisper CTranslate2 transcriber', function () {

  it('May transcribe a media file using a local CTranslate2 model', async function () {
    this.timeout(2 * 1000 * 60)
+
    const transcript = await transcriber.transcribe({
      mediaFilePath: shortVideoPath,
-      model: await TranscriptionModel.fromPath(join(modelsDirectory, 'faster-whisper-tiny')),
+      model: await TranscriptionModel.fromPath(getCustomModelPath('faster-whisper-tiny')),
      language: 'en',
+      transcriptDirectory,
      format: 'txt'
    })

@ -92,7 +114,14 @@ describe('Whisper CTranslate2 transcriber', function () {

  it('May transcribe a media file in french', async function () {
    this.timeout(5 * 1000 * 60)
-    const transcript = await transcriber.transcribe({ mediaFilePath: frVideoPath, language: 'fr', format: 'txt' })
+
+    const transcript = await transcriber.transcribe({
+      mediaFilePath: frVideoPath,
+      language: 'fr',
+      format: 'txt',
+      model,
+      transcriptDirectory
+    })

    expect(transcript.format).to.equals('txt')
    expect(transcript.language).to.equals('fr')
@ -101,30 +130,39 @@ describe('Whisper CTranslate2 transcriber', function () {

  it('Guesses the video language if not provided', async function () {
    this.timeout(2 * 1000 * 60)
-    const transcript = await transcriber.transcribe({ mediaFilePath: frVideoPath })
+
+    const transcript = await transcriber.transcribe({ mediaFilePath: frVideoPath, format: 'vtt', model, transcriptDirectory })
    expect(transcript.language).to.equals('fr')
  })

  it('Should produce a text transcript similar to openai-whisper implementation', async function () {
    this.timeout(10 * 1000 * 60)
-    const transcribeArgs: WhisperTranscribeArgs = {
+
+    const transcribeArgs = {
      mediaFilePath: frVideoPath,
      language: 'fr',
-      format: 'txt'
+      format: 'txt' as 'txt',
+      transcriptDirectory,
+      model
    }
+
    const transcript = await transcriber.transcribe(transcribeArgs)
-    const openaiTranscriber = new OpenaiTranscriber(
-      {
+
+    const openaiTranscriber = new OpenaiTranscriber({
+      engine: {
        name: 'openai-whisper',
-        requirements: [],
        type: 'binary',
-        binary: 'whisper',
-        supportedModelFormats: [ 'PyTorch' ]
+        command: 'whisper',
+        supportedModelFormats: [ 'PyTorch' ],
+        version: '0.4.4'
      },
-      createLogger(),
-      join(transcriptDirectory, 'openai-whisper')
-    )
-    const openaiTranscript = await openaiTranscriber.transcribe(transcribeArgs)
+      logger: createLogger()
+    })
+    const openaiTranscript = await openaiTranscriber.transcribe({
+      ...transcribeArgs,
+
+      transcriptDirectory: join(transcriptDirectory, 'openai-whisper')
+    })

    const transcriptFileEvaluator = new TranscriptFileEvaluator(openaiTranscript, transcript)
    expect(await transcriptFileEvaluator.wer()).to.be.below(20 / 100)
@ -132,6 +170,6 @@ describe('Whisper CTranslate2 transcriber', function () {
  })

  after(async function () {
-    await rm(transcriptDirectory, { recursive: true, force: true })
+    await remove(transcriptDirectory)
  })
 })
--- a/packages/tests/tsconfig.json
+++ b/packages/tests/tsconfig.json
@ -8,13 +8,13 @@
      "@tests/*": [ "./src/*" ],
      "@server/*": [ "../../server/core/*" ],
      "@peertube/peertube-transcription": [ "../transcription" ],
-      "@peertube/peertube-jiwer": [ "../jiwer" ],
+      "@peertube/peertube-transcription-devtools": [ "../transcription-devtools" ],
    }
  },
  "references": [
    { "path": "../core-utils" },
    { "path": "../ffmpeg" },
-    { "path": "../jiwer" },
+    { "path": "../transcription-devtools" },
    { "path": "../models" },
    { "path": "../node-utils" },
    { "path": "../typescript-utils" },
--- a/packages/transcription-devtools/README.md
+++ b/packages/transcription-devtools/README.md
@ -0,0 +1,63 @@
+# Transcription DevTools
+
+Includes:
+  * __JiWER__ CLI NodeJS wrapper
+  * Benchmark tool to test multiple transcription engines
+  * TypeScript classes to evaluate word-error-rate of files generated by the transcription
+
+## Build
+
+```sh
+npm run build
+```
+
+## Benchmark
+
+A benchmark of available __transcribers__ might be run with:
+```sh
+npm run benchmark
+```
+```
+┌────────────────────────┬───────────────────────┬───────────────────────┬──────────┬────────┬───────────────────────┐
+│        (index)         │          WER          │          CER          │ duration │ model  │        engine         │
+├────────────────────────┼───────────────────────┼───────────────────────┼──────────┼────────┼───────────────────────┤
+│ 5yZGBYqojXe7nuhq1TuHvz │ '28.39506172839506%'  │  '9.62457337883959%'  │  '41s'   │ 'tiny' │   'openai-whisper'    │
+│ x6qREJ2AkTU4e5YmvfivQN │ '29.75206611570248%'  │ '10.46195652173913%'  │  '15s'   │ 'tiny' │ 'whisper-ctranslate2' │
+└────────────────────────┴───────────────────────┴───────────────────────┴──────────┴────────┴───────────────────────┘
+```
+
+The benchmark may be run with multiple model builtin sizes:
+
+```sh
+MODELS=tiny,small,large npm run benchmark
+```
+
+## Jiwer
+
+> *JiWER is a python tool for computing the word-error-rate of ASR systems.*
+> https://jitsi.github.io/jiwer/cli/
+
+__JiWER__ serves as a reference implementation to calculate errors rates between 2 text files:
+- WER (Word Error Rate)
+- CER (Character Error Rate)
+
+
+### Usage
+
+```typescript
+const jiwerCLI = new JiwerClI('./reference.txt', './hypothesis.txt')
+
+// WER as a percentage, ex: 0.03 -> 3%
+console.log(await jiwerCLI.wer())
+
+// CER as a percentage: 0.01 -> 1%
+console.log(await jiwerCLI.cer())
+
+// Detailed comparison report
+console.log(await jiwerCLI.alignment())
+```
+
+## Resources
+
+- https://jitsi.github.io/jiwer/
+- https://github.com/rapidfuzz/RapidFuzz
--- a/packages/transcription-devtools/package.json
+++ b/packages/transcription-devtools/package.json
@ -1,5 +1,5 @@
 {
-  "name": "@peertube/peertube-jiwer",
+  "name": "@peertube/peertube-transcription-devtools",
  "private": true,
  "version": "0.0.0",
  "main": "dist/index.js",
@ -12,9 +12,9 @@
  "type": "module",
  "devDependencies": {},
  "scripts": {
-    "preinstall": "pip install -r requirements.txt",
    "build": "tsc",
-    "watch": "tsc -w"
+    "watch": "tsc -w",
+    "benchmark": "tsx --conditions=peertube:tsx --tsconfig ./tsconfig.json ./src/benchmark.ts"
  },
  "dependencies": {}
 }
--- a/packages/transcription-devtools/requirements.txt
+++ b/packages/transcription-devtools/requirements.txt
@ -0,0 +1 @@
+jiwer
--- a/packages/transcription-devtools/src/benchmark.ts
+++ b/packages/transcription-devtools/src/benchmark.ts
@ -1,17 +1,18 @@
-import { createLogger, transports, format } from 'winston'
-import { join } from 'node:path'
-import { performance, PerformanceObserver } from 'node:perf_hooks'
-import { tmpdir } from 'node:os'
-import { rm, mkdir } from 'node:fs/promises'
-import { buildAbsoluteFixturePath, buildSUUID, SUUID } from '@peertube/peertube-node-utils'
-import {
-  transcriberFactory,
-  TranscriptFile,
-  TranscriptFileEvaluator,
-  TranscriptionEngine,
-  TranscriptionModel
-} from '@peertube/peertube-transcription'
 import { millisecondsToTime } from '@peertube/peertube-core-utils'
+import { SUUID, buildAbsoluteFixturePath, buildSUUID } from '@peertube/peertube-node-utils'
+import {
+  TranscriptFile,
+  TranscriptionEngine,
+  TranscriptionEngineName,
+  TranscriptionModel,
+  transcriberFactory
+} from '@peertube/peertube-transcription'
+import { ensureDir, remove } from 'fs-extra/esm'
+import { tmpdir } from 'node:os'
+import { join } from 'node:path'
+import { PerformanceObserver, performance } from 'node:perf_hooks'
+import { createLogger, format, transports } from 'winston'
+import { TranscriptFileEvaluator } from './transcript-file-evaluator.js'

 interface BenchmarkResult {
  uuid: SUUID
@ -59,16 +60,15 @@ const formatBenchmarkResult = ({ WER, CER, duration, engine, model }: Partial<Be
 void (async () => {
  const logger = createLogger()
  logger.add(new transports.Console({ format: format.printf(log => log.message) }))
-  const transcribers = [
-    'openai-whisper',
-    'whisper-ctranslate2',
-    'whisper-timestamped'
-  ]
+
+  const transcribers: TranscriptionEngineName[] = [ 'openai-whisper', 'whisper-ctranslate2' ]
  const models = process.env.MODELS
    ? process.env.MODELS.trim().split(',').map(modelName => modelName.trim()).filter(modelName => modelName)
    : [ 'tiny' ]

  const transcriptDirectory = join(tmpdir(), 'peertube-transcription', 'benchmark')
+  const pipDirectory = join(tmpdir(), 'peertube-transcription', 'pip')
+
  const mediaFilePath = buildAbsoluteFixturePath('transcription/videos/derive_sectaire.mp4')
  const referenceTranscriptFile = new TranscriptFile({
    path: buildAbsoluteFixturePath('transcription/videos/derive_sectaire.txt'),
@ -79,7 +79,7 @@ void (async () => {
  let benchmarkResults: Record<string, BenchmarkResult> = {}

  // before
-  await mkdir(transcriptDirectory, { recursive: true })
+  await ensureDir(transcriptDirectory)
  const performanceObserver = new PerformanceObserver((items) => {
    items
      .getEntries()
@ -97,11 +97,13 @@ void (async () => {
  for (const transcriberName of transcribers) {
    logger.info(`Create "${transcriberName}" transcriber for the benchmark...`)

-    const transcriber = transcriberFactory.createFromEngineName(
-      transcriberName,
-      createLogger(),
-      transcriptDirectory
-    )
+    const transcriber = transcriberFactory.createFromEngineName({
+      engineName: transcriberName,
+      logger: createLogger(),
+      binDirectory: join(pipDirectory, 'bin')
+    })
+
+    await transcriber.install(pipDirectory)

    for (const modelName of models) {
      logger.info(`Run benchmark with "${modelName}" model:`)
@ -110,6 +112,7 @@ void (async () => {
      const transcriptFile = await transcriber.transcribe({
        mediaFilePath,
        model,
+        transcriptDirectory,
        language: 'fr',
        format: 'txt',
        runId: uuid
@ -134,6 +137,6 @@ void (async () => {
  Object.values(benchmarkResultsGroupedByModel).forEach(benchmark => console.table(benchmark))

  // after
-  await rm(transcriptDirectory, { recursive: true, force: true })
+  await remove(transcriptDirectory)
  performance.clearMarks()
 })()
--- a/packages/transcription-devtools/src/index.ts
+++ b/packages/transcription-devtools/src/index.ts
@ -0,0 +1,5 @@
+export * from './jiwer-cli.js'
+export * from './levenshtein.js'
+export * from './transcript-file-evaluator-interface.js'
+export * from './transcript-file-evaluator.js'
+export * from './utils.js'
--- a/packages/transcription-devtools/src/jiwer-cli.ts
+++ b/packages/transcription-devtools/src/jiwer-cli.ts
--- a/packages/transcription-devtools/src/levenshtein.ts
+++ b/packages/transcription-devtools/src/levenshtein.ts
--- a/Show More
+++ b/Show More
				`@ -1 +0,0 @@`
				`describe('Transcription run', function () {})`