Utilisateur:LeDeuxiemeTexte/GoogleOCRFrench.js

/*jshint boss:true*/ /*global $, mw*/

/* Original script from https://nap.wikisource.org/wiki/Utente:Alex_brollo/GoogleOCR.js To use it: https://nap.wikisource.org/w/index.php?title=User:Alex_brollo/GoogleOCR.js&action=raw&ctype=text/javascript Added some code below to post-process texts in French

  • /

/**

* This script adds a toolbar button that replaces the editbox text with OCR text
* derived by sending the .prp-page-image image through Google's Vision API.
*
* For more information, see https://wikisource.org/wiki/Wikisource:Google_OCR
*/

( function ( mw, $ ) { var lang = mw.config.get( 'wgContentLanguage' ); // Questo if ridefinisce lang in "it" per le tre wikisource italiane minori if (["nap","vec","pms"].indexOf(lang)!==-1) {

   	lang="it";

} var toolUrl = "//tools.wmflabs.org/ws-google-ocr/api.php"; var loadingGifUrl = '//upload.wikimedia.org/wikipedia/commons/4/42/Loading.gif'; var sysMessages = [ 'google-ocr-button-label', 'google-ocr-request-in-progress', 'google-ocr-no-text', 'google-ocr-image-not-found' ];

/** * The initialisation function, run on every load. Adds the OCR button to the * toolbar if we're currently editing or previewing in the Page namespace. */ function run() { var isPage, useOldToolbar, useBetaToolbar, toolbarLib; mw.loader.using( 'user.options', function () { isPage = mw.config.get( 'wgCanonicalNamespace' ) === 'Page'; useOldToolbar = mw.user.options.get( 'showtoolbar' ) === 1; useBetaToolbar = mw.user.options.get( 'usebetatoolbar' ) === 1; if ( isPage && ( useOldToolbar || useBetaToolbar ) ) { toolbarLib = useBetaToolbar ? 'ext.wikiEditor' : 'mediawiki.toolbar'; mw.loader.using( [ 'mediawiki.api', toolbarLib ], function () { new mw.Api().loadMessagesIfMissing( sysMessages ).then( function() { customizeToolbar( useBetaToolbar ); } ); } ); } } ); }

/** * Add the OCR button to the toolbar. This is called in run, and doesn't * need to check anything about whether we need to add the button. * * @param {boolean} useBeta Whether the WikiEditor toolbar should be used. */ function customizeToolbar( useBeta ) {

// Add old-style toolbar button. if ( ! useBeta && mw.toolbar ) { mw.toolbar.addButton( { imageFile: 'https://upload.wikimedia.org/wikipedia/commons/c/ca/GoogleOcr_toolbar_button.png', speedTip: mw.msg( 'google-ocr-button-label' ), imageId: 'GoogleOcrButton' } ); $("img#GoogleOcrButton").on('click', doOcr).css("width", "50px"); }

// Add new-style WikiEditor toolbar button. if ( useBeta ) { $( document ).ready( function () { var ocrButtonDetails = { type: 'button', icon: 'https://upload.wikimedia.org/wikipedia/commons/b/bd/GoogleOcr_WikiEditor_button.png', labelMsg: 'google-ocr-button-label', action: { type: 'callback', execute: doOcr } }; var ocrButton = { section: 'main', // 'proofreadpage-tools', group: 'insert', // 'other', tools: { 'GoogleOcr': ocrButtonDetails } }; $( "#wpTextbox1" ).wikiEditor( 'addToToolbar', ocrButton ); $( "a[rel='GoogleOcr']" ).css("width", "42px"); } ); }

// Pre-load the loading gif. $( '<img />' ).attr( 'src', loadingGifUrl ).appendTo( 'body' ).hide(); }

/** * This function is run when the OCR button is clicked. It sends the page * image to the API and replace the editbox's text with the restult. */ function doOcr() { if ( $( '.prp-page-image img' ).length === 0 ) { mw.notify( mw.msg( 'google-ocr-image-not-found' ) ); } // Send the HTTPS URL because this will be accessed by PHP in the tool. showLoadingMsg( 'google-ocr-request-in-progress' ); var imageUrl = 'https:' + $( '.prp-page-image img' ).attr('src'); var requestUrl = toolUrl + "?image=" + imageUrl + "&lang="+lang; $.getJSON( requestUrl ) .done( processOcrResult ) .fail( processOcrResult ) // Same handler, for simplicity. .always( function () { showLoadingMsg( ); } ); }

/** * The API result (either the OCR'd text, or an error message) is processed by * this function. * * @param {string} data The response (either text or error) returned from the API. */ function processOcrResult( response ) { if ( response.responseJSON !== undefined && response.responseJSON.error ) { mw.notify( mw.msg( 'error' ) + ' ' + response.responseJSON.error.code + ' ' + response.responseJSON.error.message ); return; } if ( response.text === undefined || response.text.length === 0 ) { mw.notify( mw.msg( 'google-ocr-no-text' ) ); return; }

       // Postprocess French texts:
       var text = response.text;
       // Replace - in the beginning of a line by — (for dialogues)
       text = text.replace(/\n-([^ ])/, '\n— $1').replace(/\n- /, '\n— ');
       // Glue together parts of words cut in the end of a line
       text = text.replace(/-[ ]*\n([^ ]+ )/, '$1\n');
       // Remove the first lines if they are made only of digits or uppercase characters and punctuations (probably page headers)
       text = ('start¤'+text).replace(/(start¤)[0-9]+\n/, '$1').replace(/(start¤)[A-Z.\-, ]+\n/, '$1').replace(/(start¤)[0-9]+\n/, '$1').replace(/(start¤)[A-Z.\-, ]+\n/, '$1').replace(/start¤/, );
       

$( '#wpTextbox1' ).val( text ); }

/** * Show (or hide) a loading message. Pass false to remove the message altogether. * * @param {string} msgLabel The label of the system message to show. */ function showLoadingMsg( msgLabel ) { var msg, msgBox, loadingGif loadingId = 'GoogleOcrLoading';

// Always remove any existing message. $( '#' + loadingId ).remove();

// Add the new message if required. if ( msgLabel.length !== 0 ) { msg = mw.message( msgLabel ).plain();

msgBox = $( "

" ) .attr( "id", loadingId ) .css( "background-color", "#efefef" ).css( "border", "1px solid #ccc" ) .text( msg ); loadingGif = $( "<img>" ) .attr( "src", loadingGifUrl ) .attr( "alt", "Animated loading indicator" ) .css( "display", "inline-block" ).css( "margin", "0.3em" ); msgBox.prepend( loadingGif ); $( '#wpTextbox1' ).before( msgBox ); } } run(); }( mediaWiki, jQuery ) );