stat_apache

#!/bin/bash

#Pour analyser les logs Apache.
#Fonctionne avec les conventions de nommage de mon hébergeur
#mais devrait pouvoir être adapté facilement.
#Le lancer avec --help pour les options.
#Certaines variables valant ***** doivent être ajustées avant.


DOMAIN="*****"
LOGIN="*****"

LOGS_DIR=~/tmp/logs
LOCATION="logs.ovh.net/$DOMAIN/"

ZIP_EXTENSION="bz2"
ZIP_PROGRAM="bunzip2"

function usage
{
  echo "Usage :"
  echo " `basename $0`"
  echo " `basename $0` --nogz"
  echo " `basename $0` jour mois année"
  echo " `basename $0` fichier"
  echo
  echo "Sans paramètre, télécharge le fichier du jour zippé"
  echo "Avec l'option --nogz télécharge le fichier du jour non zippé"
  echo "Une date peut aussi être indiquée avec l'année sur 2 chiffres"
  echo "Un nom de fichier local non zippé peut être indiqué"
}

if [ $# != 3 -a $# != 0 -a $# != 1 -o "$1" = "--help" ]
then
  usage
  exit
fi

if [ $# = 0 -o $# = 3 -o "$1" = "--nogz" ]
then
  cd $LOGS_DIR

  read -s -p "Mot de passe : " password

  if [ $# != 3 ]
  then
    day=`date "+%d"`
    month=`date "+%m"`
    syear=`date "+%y"`
    year=`date "+%Y"`
  else
    day=$1
    month=$2
    syear=$3
    year=20$3
  fi

  if [ "$1" != "--nogz" ]
  then
    if ! wget http://$LOGIN:$password@$LOCATION/logs-$month-$year/$DOMAIN-$day-$month-$syear.log.$year$month$day.$ZIP_EXTENSION
    then
      exit
    fi

    $ZIP_PROGRAM -f $DOMAIN-$day-$month-$syear.log.$year$month$day.$ZIP_EXTENSION
    mv $DOMAIN-$day-$month-$syear.log.$year$month$day $DOMAIN-$day-$month-$syear.log
  else
    if ! wget http://$LOGIN:$password@$LOCATION/logs-$month-$year/$DOMAIN-$day-$month-$syear.log
    then
      exit
    fi
  fi

  filename=$DOMAIN-$day-$month-$syear.log
  remove_it=1
else
  if test -r $1
  then
    filename=$1
    remove_it=0
  else
    echo "Fichier $1 introuvable ou non lisible"
    exit
  fi
fi

function visit_count()
{
  date=`echo $filename | sed "s/c-sait\.net-\([-0-9]*\).log/\1/"`
  day=`echo $date | cut -d- -f1 | sed "s/^0*//"`
  num_month=`echo $date | cut -d- -f2`
  month=`LANG=EN; date -d "01-$num_month-01" "+%b"`
  local_month=`date -d "01-$num_month-01" "+%B"`
  year="20`echo $date | cut -d- -f3`"

  current_day="$day/$month/$year"
  before_day=`LANG=EN; date -d "$num_month/$(($day - 1))/$year" "+%d/%b/%Y"`
  before_day_display=`date -d "$num_month/$(($day - 1))/$year" "+%d %B %Y"`

  visit_before=`cat $filename | grep "$before_day" | gawk '{print $1}' | sort | uniq | wc -l`
  visit_current=`cat $filename | grep "$current_day" | gawk '{print $1}' | sort | uniq | wc -l`

  echo "Visites le $before_day_display : $visit_before"
  echo "Visites le $day $local_month $year : $visit_current"
}

function other_referer
{
  echo "Autres origines que Google :"
  grep -v '"-"' $filename | grep -iv google | gawk -F\" '{print $4}' | grep -v "$DOMAIN" | sort | uniq -c | sort -n
}

function google_keywords
{
  echo -e "Page\tMots-cles Google :"
  echo ""

  gawk '
 /[Gg][Oo][Oo][Gg][Ll][Ee]/ {
  keywords=gensub(".*[&?_]q=([^&\"]*).*","\\1",g);
  start=gensub(".*start=([^&\"]*).*","\\1",g);
  if (start == $0)
  {
    start=0
  }
  page=(start/10) + 1;
  printf("%d\t%s\n",page,keywords);
 }
' $filename | grep -vi googlebot | uniq

}

echo "-----------------------------"
visit_count
echo "-----------------------------"
other_referer
echo "-----------------------------"
google_keywords
echo "-----------------------------"
if [ $remove_it = 1 ]
then
  rm $filename
fi