Auto-éditer un wikilivre/Auto-référencer/sclipwb.sh

#! /bin/bash
# Fichier sclipwb.sh
VERSION=210125
#P Nom du fichier de commandes : sclipwb (source : sclipwb.sh)
#P Syntaxe : "sclipwb <nom du livre>"
#P Exemple : "./sclipwb LivreTest" à la console.
#P Date de création :  9 mai 2020
#P Modifié le : 15 mai 2020 par GC
#P Modifié le : 20 mai 2020 par GC lignes 15 à 27 'Retour' de ajouter_sclip.
#P Modifié le : 30 mai 2020 par GC pour l'installation automatique.
#P Version de la documentation sur WikiLivres le : 20 mai 2020
#P

if [ $0=sclipwb ]; then echo "sclipwb : Version "$VERSION; fi

#O inclure dans bin : ajouter_sclip.inc ou, dans src ajouter_sclip.inc.sh ?
source ajouter_sclip.inc
Retour=$?
#T echo "Code de retour de la commande ajouter_sclip : " $Retour
#O si le code de retour est différent de '0'
    if [ $Retour -gt '0' ] && [ $Personal = "true" ];
#O alors afficher le nom du module de commande en erreur et quitter 'annexer'
#O   avec retour à 1
    then
      echo "Erreur au module shell 'ajouter_sclip.inc'"
      exit 1
#O fin si
    fi

#O Créer une boucle d'identification des répertoires correspondants aux article
#O   du livre.
#O tant que l'on peut lire les lignes du fichier $Projet/$1.pj
    while read pjline
    do
#O   si la ligne lue n'est pas $1 (nom du livre)
#T    echo "ligne lue : " $line
      if [ $pjline != $1 ]
#O     alors:
        then
#T echo "====="
#T echo ""
#T echo "ligne prise en compte : " $pjline
#O     entrer dans le répertore de l'article,
        cd $Projet/$pjline
#T      echo -n "répertoire courant : "
#T      pwd
#T      break
#O     créer les fichiers de documentation des images de la page principale.
#O     ouvrir le flux $Projet/$1/$1.str de la page principale et sélectionner les
#O       chaînes de caractères contenant fichier:, file:, image:, dans le nouveau
#O       fichier $Projet/$line/$line/.files, .pict, .illustration, .links
        cat $Projet/$pjline/$pjline.str | grep -n -i -e Fichier: -e file: -e image: | sed -f $RepCom/$Conversions > $Projet/$pjline/$pjline.files
        cat $Projet/$pjline/$pjline.str | grep -n -i -e fichier: -e .jpg -e .png -e .gif | sed -f $RepCom/$Conversions> $Projet/$pjline/$pjline.picts
        cat $Projet/$pjline/$pjline.files | grep title |sed "s/<\/div>//g" | awk -F">" '{print $NF}' > $Projet/$pjline/$pjline.illustrations
#T        cat $Projet/$pjline/$pjline.files | awk -F"=" '{for (i=1;i<=NF;i++) print $i "\n"}' | grep https | sed "s/\"/!/g" | cut -d '!' -f3 | grep https | tr '>' ! | sed "s/<\/a//g" |sed "s/!//g" > $Projet/$pjline/$pjline.links
        cat $Projet/$pjline/$pjline.files | awk -F"=" '{for (i=1;i<=NF;i++) print $i "\n"}' | grep https://fr.wikibooks.org | sed "s/\"/!/g" | cut -d '!' -f2 > $Projet/$pjline/$pjline.images
#Tbreak

#O     télécharger les fichiers d'images depuis le serveur wikimedia.
#P     remarque : l'option -N permet d'éviter de téléchager un fichier à jour
#P       et sans ajouter une numérotation.
#T      #T wget -N -P $Projet/$pjline -i $Projet/$pjline/$pjline.images
        wget -P $Projet/$pjline -r -linf -k -p -E  -i $Projet/$pjline/$pjline.images
#O     copier les images téléchargées dans le répertoire de l'article courant.
        cp $Projet/$pjline/fr.wikibooks.org/wiki/*.html $Projet/$pjline/.
           
#O     initialiser le fichier de liste html.list avec un texte vide.
        echo -n "" > html.doublons  
#O     lister les fichiers d'images dans l'ordre d'impression ou de l'affichage,
#O       à laide de la liste $Projet/$pjline/$pjline.images
#O     tant que l'on peut lire des lignes dans $Projet/$pjline/$pictline.images
        while read pictline
        do
#O       couper les lignes au retour chariot, sélectionner le dernier champ et ajouter '.html'
          #echo $pictline | awk -F"/" '{for (i=1;i<=NF;i+=2) print $i "\n"}' #| cut -d '%' -f1 | cut -d '.' -f1 > tmp
          echo $pictline | awk -F"/" '{ print $NF".html"}' >> html.doublons
#O       couper les doublons et sélectionner les champs pairs.
          echo -n "" > html.list
          awk 'BEGIN { FILENAME }
                {memfile [NR] = $0 }
               END   { for ( i = 1 ; i <= NR ; i=i+2 ) {
                       print memfile[i] >> "html.list"
                       } 
	                   # print "Fin"
                     } ' html.doublons 
                 
#O     fin tant que l'on peut lire des lignes dans $Projet/$pictline/$pictline.images
        done < $Projet/$pjline/$pjline.images

#T     afficher html.list
#T cat html.list

#O   copier le nom de l'article dans $1.sclip
      echo "'''Article : $pjline'''<br \>" >> $PageScliP
      echo "'''Article : $pjline'''"


#P## Annexe version 'wikibooks' ##############################

#O     tant qu'il y a des liens (locaux) dans le fichier d'images html.list
        while read htmlline
        do
#O       afficher la ligne lue,
          echo ""
	      echo ""
          echo "---- ligne lue = $htmlline ---"
          echo ""
	   
#O   sélectionner les chaînes de caractères du fichier image $htmlline et les
#O     copier dans $htmlline.str après remplacement du caractère ',' par 'new-line'
	  mkd -pw '**' $htmlline $htmlline.tmp
	  cat $htmlline.tmp | tr ',' '\n' > $htmlline.str 
     
#O     images, 
        echo -n "'''Illustration : '''" > $htmlline.title
        cat $htmlline.str |grep wgTitle | cut -d '"' -f4 >> $htmlline.title
	    cat $htmlline.title >> $PageScliP 
	    cat $htmlline.title

#O     source, 
        echo -n ", ''source : ''https://"$Site"/w/index.php?title=" > $htmlline.source
	    echo $htmlline.str | sed "s/.html//g" | sed "s/.str//g" >> $htmlline.source
        cat $htmlline.source >> $PageScliP
        cat $htmlline.source

#O     license, 
        echo -n ", ''licence : ''" > $htmlline.license
	    cat $htmlline.str | grep licensetpl_short | sed "s/<td>//g" | sed "s/<span class//g" | sed "s/<\/span>//g" | sed "s/style=\"display:none;\"//g" | tr '=' '\n' | grep licensetpl_short | awk -F">" '{print $NF}' >> $htmlline.license
        cat $htmlline.license >> $PageScliP
	    cat $htmlline.license
	   
#O     auteur(s). 
        echo -n '' > tmp
        echo -n ", ''auteur(s) : ''" > $htmlline.auteur
	    cat $htmlline.str | grep -i -n -m1 -A 1 -e Author -e Auteur | tr '/' '\n' | grep -i -e user -e utilisteur -e auteur | cut -d '"' -f1 | grep -i -e user -e utilisteur -e auteur > tmp
	    read Tmp < tmp
        #T
        echo "Tmp = $Tmp"
        if [ "$Tmp" = "" ]
          #T  then echo "tmp vide"
	      then echo "-" > tmp 
	    fi
        #T echo $Tmp | cut -d '-' -f2 | sed "s/\.\ /%/g" | cut -d '%' -f1 
	    cat tmp >>  $htmlline.auteur
        cat $htmlline.auteur >> $PageScliP
        cat $htmlline.auteur
	   
#O   terminer la page $PageScliP 
      echo "" >> $PageScliP

#O fin du tant qu'il y a des lignes 
    done < html.list

#O   fin du 'si la ligne n'est pas le nom du livre'.
      fi
      
#O fin du tq .pj
    done < $Projet/$1.pj

#O terminer la page $PageScliP 
    echo "</div>" >> $PageScliP
    echo "{{Nouvelle page imprimée}}" >> $PageScliP

exit 0
# Fin de sclipwb.sh