SAS – Guide Complet
Reference complete pour manipuler, analyser et automatiser vos donnees avec SAS.
📚 Fondamentaux
Structure de base
| Element | Role | Exemple |
DATA | Creer/modifier une table | data resultat; set source; run; |
PROC | Analyser/afficher des donnees | proc print data=table; run; |
LIBNAME | Definir une bibliotheque | libname mylib "/chemin"; |
WORK | Bibliotheque temporaire (defaut) | data test; = data work.test; |
Bibliotheques
libname mylib "/chemin/vers/dossier";
data work.copie;
set mylib.ma_table;
run;
🔧 DATA Step
Operations courantes
| Action | Syntaxe |
| Nouvelle variable | nouvelle_var = calcul; |
| Garder colonnes | set source (keep=col1 col2); |
| Exclure colonnes | set source (drop=col1 col2); |
| Renommer | set source (rename=(ancien=nouveau)); |
| Definir longueur texte | length nom $50; |
| Supprimer ligne | delete; |
| Garder ligne | output; |
Conditions IF/THEN/ELSE
data resultat;
set source;
length categorie $10;
if age < 18 then categorie = 'Mineur';
else if age <= 65 then categorie = 'Adulte';
else categorie = 'Senior';
run;
SELECT/WHEN (switch case)
data resultat;
set source;
length libelle $20;
select(code);
when('A') libelle = 'Actif';
when('I') libelle = 'Inactif';
when('S', 'P') libelle = 'Suspendu';
otherwise libelle = 'Inconnu';
end;
run;
Boucles DO
do i = 1 to 10;
end;
do annee = 2020 to 2025 by 1;
end;
do while(condition);
end;
do until(condition);
end;
🎯 Filtres et Tri
WHERE vs IF
| WHERE | IF |
| Filtre a la lecture (performant) | Filtre apres lecture |
| Ne peut pas utiliser variables calculees | Peut utiliser variables calculees |
where age >= 18; | if total > 1000; |
Operateurs WHERE
| Operateur | Description | Exemple |
= ou EQ | Egal | where ville = 'Paris'; |
^= ou NE | Different | where statut ^= 'X'; |
> < >= <= | Comparaison | where age >= 18; |
IN | Dans une liste | where ville in ('Paris', 'Lyon'); |
BETWEEN | Entre deux valeurs | where age between 18 and 65; |
LIKE | Pattern matching | where nom like 'DUP%'; |
CONTAINS | Contient | where email contains '@gmail'; |
IS NULL | Valeur manquante | where date is null; |
IS NOT NULL | Non manquant | where date is not null; |
PROC SORT
proc sort data=ma_table;
by nom prenom;
run;
proc sort data=ma_table;
by descending date;
run;
proc sort data=ma_table nodupkey;
by id_client;
run;
proc sort data=ma_table nodup;
by _all_;
run;
🔗 Jointures
Types de jointures
| Type | MERGE | SQL |
| INNER JOIN | if a and b; | inner join |
| LEFT JOIN | if a; | left join |
| RIGHT JOIN | if b; | right join |
| FULL JOIN | (pas de condition) | full join |
MERGE (DATA Step)
proc sort data=clients; by id; run;
proc sort data=commandes; by id; run;
data fusion;
merge clients(in=a) commandes(in=b);
by id;
if a;
run;
Empiler des tables
data tout;
set table1 table2 table3;
run;
proc append base=existante data=nouvelles force;
run;
💾 PROC SQL
Syntaxe de base
proc sql;
create table resultat as
select
departement,
count(*) as nb,
avg(salaire) as sal_moy format=8.2,
sum(salaire) as total
from employes
where statut = 'A'
group by departement
having count(*) > 5
order by sal_moy desc;
quit;
Fonctions SQL utiles
| Fonction | Description |
count(*) | Nombre de lignes |
count(distinct col) | Valeurs distinctes |
sum(col) | Somme |
avg(col) | Moyenne |
min(col) / max(col) | Min / Max |
coalesce(a, b, c) | Premiere valeur non null |
case when ... then ... else ... end | Condition |
calculated col | Utiliser colonne calculee |
Jointures SQL
proc sql;
create table fusion as
select a.*, b.montant
from clients as a
left join commandes as b
on a.id = b.id_client;
quit;
📝 Fonctions Texte
| Fonction | Description | Exemple |
upcase(x) | Majuscules | 'pierre' → 'PIERRE' |
lowcase(x) | Minuscules | 'PIERRE' → 'pierre' |
propcase(x) | Premiere lettre maj | 'pierre' → 'Pierre' |
strip(x) | Supprime espaces debut/fin | ' abc ' → 'abc' |
compress(x) | Supprime tous les espaces | 'a b c' → 'abc' |
compress(x, '', 'kd') | Garde que chiffres | 'ab12cd' → '12' |
compress(x, '', 'ka') | Garde que lettres | 'ab12cd' → 'abcd' |
substr(x, pos, len) | Extraire sous-chaine | substr('HELLO', 1, 2) → 'HE' |
length(x) | Longueur | length('abc') → 3 |
index(x, 'mot') | Position du mot | index('hello', 'l') → 3 |
find(x, 'mot', 'i') | Position (insensible casse) | Option 'i' = ignore case |
tranwrd(x, 'old', 'new') | Remplacer | Remplace toutes occurrences |
catx(sep, a, b, c) | Concatener avec separateur | catx('-', 'a', 'b') → 'a-b' |
cats(a, b) | Concatener sans espaces | Strip puis concat |
scan(x, n, 'delim') | Nieme mot | scan('a-b-c', 2, '-') → 'b' |
countw(x, 'delim') | Compter les mots | countw('a b c') → 3 |
reverse(x) | Inverser | 'abc' → 'cba' |
🔢 Fonctions Numeriques
| Fonction | Description | Exemple |
round(x, 0.01) | Arrondir | round(3.456, 0.01) → 3.46 |
ceil(x) | Arrondi superieur | ceil(3.1) → 4 |
floor(x) | Arrondi inferieur | floor(3.9) → 3 |
int(x) | Partie entiere | int(3.7) → 3 |
abs(x) | Valeur absolue | abs(-5) → 5 |
mod(x, y) | Modulo (reste) | mod(10, 3) → 1 |
min(a, b, c) | Minimum | Ignore les missings |
max(a, b, c) | Maximum | Ignore les missings |
sum(a, b, c) | Somme | Ignore les missings |
mean(a, b, c) | Moyenne | Ignore les missings |
n(a, b, c) | Nb valeurs non missing | n(1, ., 3) → 2 |
nmiss(a, b, c) | Nb valeurs missing | nmiss(1, ., 3) → 1 |
missing(x) | Test si missing | Retourne 1 si missing |
coalesce(a, b, c) | 1ere valeur non missing | Utile pour valeur defaut |
log(x) | Logarithme naturel | |
exp(x) | Exponentielle | |
sqrt(x) | Racine carree | |
ranuni(seed) | Nombre aleatoire [0,1] | |
📅 Fonctions Date
Dates courantes
| Fonction | Retourne |
today() | Date du jour (SAS date) |
date() | Idem today() |
datetime() | Date et heure actuelles |
time() | Heure actuelle |
Extraire des composants
| Fonction | Description |
year(date) | Annee (ex: 2025) |
month(date) | Mois (1-12) |
day(date) | Jour du mois (1-31) |
weekday(date) | Jour semaine (1=dim, 7=sam) |
qtr(date) | Trimestre (1-4) |
week(date) | Numero semaine |
hour(datetime) | Heure |
minute(datetime) | Minutes |
second(datetime) | Secondes |
Creer et calculer des dates
ma_date = mdy(12, 25, 2025);
ma_date = yymmdd(2025, 12, 25);
nb_jours = date_fin - date_debut;
nb_mois = intck('month', date_debut, date_fin);
nb_ans = intck('year', date_naissance, today());
date_future = intnx('month', today(), 3);
date_future = intnx('year', today(), 1, 'same');
debut_mois = intnx('month', today(), 0, 'beginning');
fin_mois = intnx('month', today(), 0, 'end');
Intervalles INTCK/INTNX
| Intervalle | Description |
'day' | Jours |
'week' | Semaines |
'month' | Mois |
'qtr' | Trimestres |
'year' | Annees |
'weekday' | Jours ouvres |
Conversions PUT / INPUT
| Conversion | Fonction | Exemple |
| Num → Texte | put() | put(age, 3.) → "25" |
| Date → Texte | put() | put(date, date9.) → "25DEC2025" |
| Texte → Num | input() | input("25", 3.) → 25 |
| Texte → Date | input() | input("2025-12-25", yymmdd10.) |
Formats courants
| Format | Resultat | Exemple |
8. | Nombre entier | 12345678 |
8.2 | 2 decimales | 12345.67 |
comma12.2 | Separateur milliers | 1,234,567.89 |
dollar12.2 | Dollar | $1,234.56 |
eurox12.2 | Euro (format EU) | 1.234,56 |
percent8.1 | Pourcentage | 75.5% |
date9. | Date SAS | 25DEC2025 |
ddmmyy10. | Date EU | 25/12/2025 |
yymmdd10. | Date ISO | 2025-12-25 |
datetime20. | Date + heure | 25DEC2025:14:30:00 |
$char50. | Texte 50 car | Preserve espaces |
Formats personnalises
proc format;
value $statut_fmt
'A' = 'Actif'
'I' = 'Inactif'
other = 'Inconnu'
;
value age_grp
low-17 = 'Mineur'
18-64 = 'Adulte'
65-high = 'Senior'
;
run;
format statut $statut_fmt. age age_grp.;
📊 Procedures Courantes
PROC PRINT
proc print data=table (obs=10) noobs;
var nom prenom age;
where age > 30;
run;
PROC FREQ
proc freq data=table;
tables categorie / missing;
run;
proc freq data=table;
tables cat1 * cat2 / nocol norow nopercent;
run;
proc freq data=table;
tables categorie / out=freq_result;
run;
PROC MEANS
proc means data=table n mean std min max sum;
var salaire age;
class departement;
output out=stats
mean=sal_moy age_moy
sum=sal_total;
run;
PROC SUMMARY
proc summary data=table nway;
class region annee;
var ventes;
output out=resume(drop=_type_ _freq_)
sum=total
mean=moyenne;
run;
PROC TRANSPOSE
proc transpose data=source out=pivot;
by id;
id variable_pivot;
var valeur;
run;
PROC CONTENTS
proc contents data=table;
run;
proc contents data=table out=meta noprint;
run;
📥 Import / Export
Importer
proc import datafile="/chemin/fichier.csv"
out=ma_table
dbms=csv replace;
getnames=yes;
guessingrows=max;
delimiter=';';
run;
proc import datafile="/chemin/fichier.xlsx"
out=ma_table
dbms=xlsx replace;
sheet="Feuille1";
getnames=yes;
run;
Exporter
proc export data=ma_table
outfile="/chemin/export.csv"
dbms=csv replace;
delimiter=';';
run;
proc export data=ma_table
outfile="/chemin/export.xlsx"
dbms=xlsx replace;
sheet="Donnees";
run;
⚙️ Macros SAS
Variables Macro
%let annee = 2025;
%let chemin = /dossier/data;
data donnees_&annee;
set "&chemin/source.sas7bdat";
run;
%put Annee: &annee;
Macros avec parametres
%macro stats(table=, var=);
proc means data=&table;
var &var;
run;
%mend;
%stats(table=employes, var=salaire)
Boucles et conditions
%macro traiter;
%do i = 2020 %to 2025;
data data_&i;
set source;
where year(date) = &i;
run;
%end;
%mend;
%if &type = detail %then %do;
proc print; run;
%end;
Fonctions macro utiles
| Fonction | Description |
%sysfunc(today(), date9.) | Date du jour formatee |
%sysfunc(exist(table)) | Table existe? (1/0) |
%scan(&liste, 2) | 2eme element liste |
%eval(1+1) | Calcul entier |
%sysevalf(1.5+2.5) | Calcul decimal |
%length(&var) | Longueur variable |
%upcase(&var) | Majuscules |
%substr(&var, 1, 3) | Sous-chaine |
💡 Tips et Bonnes Pratiques
Debug
options mprint mlogic symbolgen;
%put NOTE: Traitement OK;
%put WARNING: Attention;
%put ERROR: Echec;
Nettoyer
proc datasets library=work kill nolist;
quit;
proc delete data=table_temp;
run;
Dictionnaires systeme
proc sql;
select memname, nobs
from dictionary.tables
where libname = 'WORK';
quit;
proc sql;
select name, type, length
from dictionary.columns
where libname = 'WORK' and memname = 'MA_TABLE';
quit;
Raccourcis utiles
| Astuce | Description |
_all_ | Toutes les variables |
_numeric_ | Variables numeriques |
_character_ | Variables texte |
_n_ | Numero de ligne courante |
_error_ | Flag erreur (0/1) |
first.var | 1ere ligne du groupe BY |
last.var | Derniere ligne du groupe BY |
col1--col10 | Range de colonnes |
col: | Colonnes commencant par "col" |