Wednesday, November 4, 2015

BASE SAS Code


%let prjPath = E:\ChenyxFiles\2014.Study.TrainingCertificate\SAS.Victoria\SAS.Exam\SAS.Base.Code\sasexport ;
%let examPath = E:\ChenyxFiles\2014.Study.TrainingCertificate\SAS.Victoria\SAS.Exam\SAS.Base.Code;

data base_1;
input relation $ first_name $ birthdate : mmddyy8.;
datalines;
son Frank 01/31/89
daughter June           12-25-87
brother Samuel 01/17/51
;
run;


data base_1_1;
input relation $ first_name $ birthdate : mmddyy8.;
datalines;
son Frank 01/31/89
daughter June           12-25-87
brother Samuel 01/17/51
;
run;

data base_1_2;
input price1 comma8.2 price2 :comma8.2 dt1 mmddyy10.
 dt2 :date7. dt3 :date9. amt :10. weight :12.3 +0 prdName $;
datalines;
1,510.22 1,523.34 01-02-1999 01Jun99 01-Jun-99 2345554488 45333332.345 Table
;
run;

data employeestats_2;
infile "&exampath\infile_2.txt" dsd pad dlm=',' missover truncover;
/*MISSOVER: if missed input/input missed/not enough input, then assign to missing
prevents a SAS program from going to a new input line if,
when using list input, it does not find the values in the current line
for all the INPUT statement variables. When an INPUT statement reaches
the end of the current record, variables without any values assigned are
set to missing.*/
/*TRUNCOVER: enables you to read variable-length records when some records
are shorter than the INPUT statement expects. TRUNCOVER tells SAS to salvage
whatever it can from short records without going to the next record.
The two options mentioned above have similar function.*/

/*infile "&examPath\infile_2.txt" pad;*/
/*%put '&examPath\infile_2.txt';*/
input name $ age weight;
/*data:*/
/*Jose,47,210*/
/*Sue,,108*/
run;

proc print data=employeestats_2;
run;
data base_1_3;
   input name $ 1-19 id_num salary  :dollar11.2  site $ ;
   datalines;
Capalleti, Jimmy   2355  $34,072.12 BR1
Chen, Len          5889  $33,771 BR2
Davis, Brad        3878  $31,509.12 BR3  
Martinez, Maria    3985  $78,980 US1    
Orfali, Philip     0740  $80,648 US2  
Smith, Robert      5162  $64,561 INCORRECT CODE
Sorrell, Joseph    4421  $62,403 US1    
Zook, Carla        7385  $37,010 BR3      
;
run;


data base_1_3;
   input name $ 1-19 id_num salary  :dollar11.  site $ ;
   datalines;
Capalleti, Jimmy   2355  $34,072.12 BR1
Chen, Len          5889  $33,771 BR2
Davis, Brad        3878  $31,509.12 BR3  
Martinez, Maria    3985  $78,980 US1    
Orfali, Philip     0740  $80,648 US2  
Smith, Robert      5162  $64,561 INCORRECT CODE
Sorrell, Joseph    4421  $62,403 US1    
Zook, Carla        7385  $37,010 BR3      
;
run;

proc print data=base_1_3;
run;

data base_1_4;
   input name $ 1-19 id_num salary  dollar9.0  site $ ;
   datalines;
Capalleti, Jimmy   2355  $34,072.55 BR1
Chen, Len          5889  $33,771    BR2
Davis, Brad        3878  $31,509 BR3  
Martinez, Maria    3985  $78,980 US1    
Orfali, Philip     0740  $80,648 US2  
Smith, Robert      5162  $64,561 INCORRECT CODE
Sorrell, Joseph    4421  $62,403 US1    
Zook, Carla        7385  $37,010 BR3      
;
run;

  proc print data=base_1_4;
run;

data base_1_5;
   input name $ 1-19 id_num salary  dollar8.0  site $ ;
   datalines;
Capalleti, Jimmy   2355  $34,072.33 BR1
Chen, Len          5889  $33,771 BR2
Davis, Brad        3878  $31,509 BR3  
Martinez, Maria    3985  $78,980 US1    
Orfali, Philip     0740  $80,648 US2  
Smith, Robert      5162  $64,561 INCORRECT CODE
Sorrell, Joseph    4421  $62,403 US1    
Zook, Carla        7385  $17,1010 BR3      
;
run;

  proc print data=base_1_5;
run;
data base_1_6;
   input name $ 1-19 id_num salary  :dollar8.0  site $ ;
   datalines;
Capalleti, Jimmy   2355  $34,072 BR1
Chen, Len          5889  $33,771 BR2
Davis, Brad        3878  $31,509 BR3  
Martinez, Maria    3985  $78,980 US1    
Orfali, Philip     0740  $80,648 US2  
Smith, Robert      5162  $64,561 INCORRECT CODE
Sorrell, Joseph    4421  $62,403 US1    
Zook, Carla        7385  $1,217,010 BR3      
;
run;

  proc print data=base_1_6;
run ;
data base_1_7;
   input name $ 1-19 id_num salary  dollar8.0  site $ ;
   datalines;
Capalleti, Jimmy   2355  $34,072 BR1
Chen, Len          5889  $33,771 BR2
Davis, Brad        3878  $31,509 BR3  
Martinez, Maria    3985  $78,980 US1    
Orfali, Philip     0740  $80,648 US2  
Smith, Robert      5162  $64,561 INCORRECT CODE
Sorrell, Joseph    4421  $62,403 US1    
Zook, Carla        7385  $1,217,010 BR3      
;
run;

data base_1_11;
  input name $ salary;
  format salary dollar14.2;
datalines;
Chris 331245145.20
Jane 4000000
Jerry          60000
Joe 26000
Zoe 60000
;
run;

/* *******************************
01/23/1963  mmddyy10.
1/23/1963  mmddyy10.
01/23/63  mmddyy8.
1/23/63  mmddyy8.
January 23,1963  worddate20.
jan 23, 1963  worddate12.
23jan1963  date9.
23jan63  date7.
23-jan-1963  date11.
01-23-63  mmddyy8.
19630123  yymmdd8.
Ref: How to Read, Write, and Manipulate SAS Dates.pdf
******************************* */
/*the colon-format ':' modifier in Reading Free-Format Data*/
data base_1_12;
   input dt1 date7. dt7 :mmddyy8. dt2 :yymmdd8. dt3 :date9. dt4 :mmddyy10. dt5 :date11. dt6 :anydtdte15.
dt8 :mmddyy8. dt9 :ddmmyy8. dt10 :yymmdd8. ;
;
   datalines;
23Jan63 01/15/15 19931123 23Jan1963 11-01-1980  23-jan-1978 23-jan-1978 01231998   23041997  19981231
;
run;

/*not acceptable format: dt11 yyddmm8. dt12 :mmyydd8.  dt13 :ddyymm8.*/
data base_1_13;
   input  dt11 yyddmm8. dt12 :mmyydd8.  dt13 :ddyymm8.;
   datalines;
   19872301 01198712 23198702
;
run;
proc sql;
create table base_1_14 as
select dt11 as dt11 format=date9. ,
dt12 as dt12 format=date9. ,
dt13 as dt13 format=date9.
from base_1_13 ;
quit;

sas_date = input(var1, mmddyy10.);

data base_1_15;
date = date();
my_birthday = '23jan63'd;
datedif2 = intck('month',my_birthday,date); * intck(慽nterval?  from, to);
datedif3 = sum(date,-my_birthday); * sum(to,-from);
datedif1 = datdif(my_birthday,date,'act/act'); * datdif(from,to,抋ct/act?
or '30/360';
put 'Log shows: ' datedif1 datedif2 datedif3;
run;


data base_1_15;
date = date();
dt1 = '23jan63'd;
dt2 = '23Jan1963'd;
dt3 = '23-jan-1978'd;
put 'Log shows: ' dt1 dt2 dt3;
run;

data base_1_16;
my_birthday = '23jan1963'd;
years = intck('year',my_birthday,date());
quarters = intck('qtr',my_birthday,date());
months = intck('month',my_birthday,date());
weeks = intck('week',my_birthday,date());
days = intck('day',my_birthday,date());
put 'Log shows: ' 'y=' years  'q='  quarters  'm='
months  'w='  weeks  'd='  days;
run;

data base_1_17;
my_birthday = '23jan1963'd; * SAS date 1118;
date1 = put(my_birthday,mmddyy8.);
date2 = put(my_birthday,worddate15.);
date3 = put(my_birthday,monyy7.);
date4 = put(my_birthday,julian5.);
put date1 date2 date3 date4;
run;


data base_3_1;
infile cards dlm=',';
input agent1 $ agent2 $ agent3 $;
cards;
jones,,brownjones, spencer,brown
;
run;

/*Any number of consecutive*/
/*commas are considered to be a single delimiter as a result of the DLM= option*/
data base_3_2;
infile cards dlm=',';
input agent1 $ agent2 $ agent3 $;
cards;
jones,,,,,,,brownjones,,,,spencer,brown
;
run;

data base_3_3;
infile cards dlm=',';
length agent2 $ 16;
input agent1 $ agent2 $ agent3 $;
cards;
jones,,,,,,,brown_jones,,,,spencer,brown
;
run;


data base_3_3;
infile cards dlm=',' dsd;
length agent2 $ 16;
input agent1 $ agent2 $ agent3 $;
cards;
jones,,,,,,,brown_jones,,,,spencer,brown
;
run;


/*DSD (Delimited Separated Data) -
1. strip off any quotes that surround values in the text file.
2. deals with missing values, DSD treat consecutive delimiters separately;
therefore, a value that is missing between consecutive delimiters will be read
as a missing value when DSD is specified.
3. The third function assumes the delimiter is a comma.
If DSD is specified and the delimiter is a comma, the DLM= option is not necessary.
If another delimiter is used, the DLM= option must be used as well. */

data base_3_4;
   infile cards dsd ;/*strip off quotes*/;
   input a b c d;
cards;
"54","75","253","44"
"87","3","55","465"
"905","","66","354"
;
run;

data base_3_5;
   infile cards dsd ;/*strip off quotes*/;
   input a b c d;
cards;
"54",,,,,"253","44"
"87","3","55","465"
"905",,"66","354"
;
run;

data base_3_6;
infile cards dsd ;/*strip off quotes*/;
   input a b c $ d;
cards;
"54",,,,,"253","44"
"87","3","55","465"
"905",,"66","354"
;
run;

data base_3_7;
infile cards dsd dlm='|' /*strip off quotes*/;
   input a b c $ d;
cards;
"54"|||||"253"|"44"
"87"|"3"|"55"|"465"
"905"||"66"|"354"
;
run;

/*missover: if miss then input as missing*/
data base_3_8;
   infile datalines missover;
   input temp1-temp5;
   datalines;
97.9 98.1 98.3
98.6 99.2 99.1 98.5 97.5
96.2 97.3 98.3 97.6 96.5
;
run;


data base_3_9;
   infile datalines missover;
   input temp1-temp5;
   datalines;
97.9 98.1 98.3
. 99.2 99.1 98.5 97.5
   97.3 98.3 97.6 96.5
;
run;

/*truncover = missover*/
data base_3_10;
infile datalines truncover ;/*missover*/;
/* Forces the INPUT statement to stop reading when it gets to*/
/* the end of a short line. */
input temp1-temp5;
   datalines;
97.9 98.1 98.3
. 99.2 99.1 98.5 97.5
   97.3 98.3 97.6 96.5
;run;


data base_3_11;
infile datalines missover;
input temp1-temp5 @;
input tp1-tp3;
   datalines;
97.9 98.1 98.3 . 99.2 99.1 98.5 97.5
. 97.3 98.3 97.6 96.5
;run;

data base_3_11;
infile datalines missover;
input temp1-temp5;
input tp1-tp3;
   datalines;
97.9 98.1 98.3 . 99.2 99.1 98.5 97.5
. 97.3 98.3 97.6 96.5
;run;

data base_3_12;
infile datalines FLOWOVER ;/*missover*/;
/*FlowOver: Causes the INPUT statement to jump to the next record if it*/
/*doesn抰 find values for all variables. Then start from first column of next record*/
input temp1-temp5;
   datalines;
97.9 98.1 98.3 99.2
99.1 98.5
97.5 97.3 98.3 97.6 96.5
;run;


data base_3_13;
infile datalines StopOVER ;/*missover*/;
/*FlowOver: Causes the INPUT statement to jump to the next record if it*/
/*doesn抰 find values for all variables. Then start from first column of next record*/
input temp1-temp5;
   datalines;
97.9 98.1 98.3 99.2 99.1 98.5
97.5 97.3 98.3 97.6 96.5
;run;


data base_3_14;
infile datalines StopOVER ;/*missover*/;
/*Stops the DATA step when it reads a short line*/
input temp1-temp5;
   datalines;
97.9 98.1 98.3 99.2 99.1
98.5 97.5
97.3 98.3 97.6 96.5
;run;

data base_3_15;
infile cards PAD  ;
/*the PAD option adds blanks to short lines out
to the LRECL(logical record length), similar with FLOWOVER option*/
input temp1-temp5;
   cards;
97.9 98.1 98.3 99.2 99.1
98.5 97.5
97.3 98.3 97.6 96.5
;run;

data base_3_15;
infile cards flowover  ;
/*the PAD option adds blanks to short lines out
to the LRECL(logical record length), similar with FLOWOVER option*/
input temp1-temp5;
   cards;
97.9 98.1 98.3 99.2 99.1
98.5 97.5
97.3 98.3 97.6 96.5
;run;

data base_3_15;
infile cards missover  ;
input temp1-temp5;
   cards;
97.9 98.1 98.3 99.2 99.1
98.5 97.5
97.3 98.3 97.6 96.5
;run;



data base_3_15;
infile cards ; ********** by default === flowover;
input temp1-temp5;
   cards;
97.9 98.1 98.3 99.2
99.1
98.5 97.5
97.3 98.3 97.6 96.5
;run;
proc print data=base_3_15;
run;
/*Obs temp1 temp2 temp3 temp4 temp5 */
/*1 97.9 98.1 98.3 99.2 99.1 */
/*2 98.5 97.5 97.3 98.3 97.6 */






data base_3_15;
infile "&prjPath\a.txt" LRECL=150;
input temp1-temp5;
run;


data base_3_16;
infile "&prjPath\a.txt" LRECL=350 pad;
input temp1-temp4 temp5 $50.;
run;

proc contents data=base_3_16;
run;


/*read data @ +n, $*/
data base_4_1;
input @1 style $8.
+1 sqfeet 4.
+1 bedrooms 1.
@20 baths 3.
street $16.
@40 price dollar8.;
datalines;
TWOSTORY 1040 2      1SANDERS ROAD    $55,850
CONDO    2150 4    2.5JEANS AVENUE    $127,150
;
run;


data base_4_2;
input @1 style $8
+1 sqfeet 4.
+1 bedrooms 1.
@20 baths 3.
street $16
@40 price dollar8.;
datalines;
TWOSTORY 1040 2      1SANDERS ROAD    $55,850
CONDO    2150 4    2.5JEANS AVENUE    $127,150
;
run;

* =============== LIBNAME & Libref() ====================;
libname sasShort "&prjPath\";
data base_5_1;
if (%sysfunc(%libref(sasShort))) then
put %sysfunc(sysmsg());
else
    put "nothing work out";
run;

/*base_9_1: read and preview large dataset */
/*When you have a large data files from *.txt or *xlsx, how do you see the data? If you use excel or notepad to open it, it will cause you couple of minutes or make your PC down.*/
/*In SAS, there is one way to do it very fast - PROC FSLIST.   For example,*/
/*SAS will open a new FSLIST view (like log/output window) in several seconds. From this view, you can browse the data in the first hundreds of rows.*/
/*It is very convenient to use PROC FSLIST to browse the data first and read data into SAS.*/

PROC FSLIST FILE="&prjPath\medium.txt";
RUN;

PROC FSLIST FILE="&prjPath\large.txt";
RUN;

/*base_9_2: filename */
/*FILENAME fileref <device-type> <'external-file'> <options>;*/
/*filename myfile disk|pipe|temp|dummy|printer|clipboard|url|email|ftp|dde "filepath"*/
/*ref: The Power of  he FILENAME Statement*/
/*http://www.lexjansen.com/wuss/2012/63.pdf*/

*defining the email attributes while defining the filename;
filename emailout email
TO = "duke.fcbk@gmail.com"
subject = "Test email"
from = "duke.fcbk@gmail.com" importance = "high" attach =
"c:/temp/test.xls";
data _null_;
file emailout;
put "This is a email subject";
run;



data base_10;
input rel $ id;
datalines;
son 1
son 2
son 3
daughter  4
daughter   5
daughter   6
daughter   7
brother   8
brother   9
brother   10
brother   11
;
run;

proc contents data=base_10;
run;
proc sort data=base_10 out=base_10;
by rel;
run;
data base_10_1;
set base_10;
by rel;
retain temp;
if first.rel then
do ;
temp = rel;
output;
end;
if last.rel then
do ;
temp = rel;
output;
end;
run;

data base_10_temp;
input group x;
cards;
1 23
1 34
1 .
1 45
2 78
2 92
2 45
2 89
2 34
2 76
3 31
4 23
4 12
;
run;

data new;
set base_10_temp;
by group;
first=first.group;
last=last.group;
run;

data base_12_sorting;
   input company $ 1-23 town $ 24-36 debt accnt_num;
   datalines;
Apex Catering          Apex           37.95      9923
Bob's Beds             Morrisville   119.95      4998
Boyd & Sons Accounting Garner        312.49      4762
Deluxe Hardware        Garner        467.12      8941
Elway Piano and Organ  Garner         65.79      5217
Ice Cream Delight      Holly Springs 299.98      2310
Pauline's Antiques     Morrisville   302.05      9112
Paul's Pizza           Apex           83.00      1019
Peter's Auto Parts     Apex           65.79      7288
Strickland Industries  Morrisville   657.22      1675
Tina's Pet Shop        Apex           37.95      5108
Tim's Burger Stand     Holly Springs 119.95      6335
Watson Tabor Travel    Apex           37.95      3131
World Wide Electronics Garner        119.95      1122
;
run;
proc sort data=base_12_sorting out=base_12_1;
   by town descending debt;
run;

/*the length of var is set by first time compilation process*/
/*The DATA step first goes through a compilation phase, then
an execution phase. The length of a variable is set during the
compilation phase and is based on the first time the variable is encountered*/
data base_15;
code='DAL523'; /* set length then define the length of var=code */
code='SANFRAN6021654564';
code='HOUS731';
length code $ 20;
run;

data base_15;
length code $ 20;
code='DAL523'; /* set length then define the length of var=code */
code='SANFRAN6021654564';
code='HOUS731';
run;
proc contents data=base_15;
run;
data base_15_1;
code='DAL5235646545646465'; /* set length then define the length of var=code */
code='SAN';
code='HOUS731';
length code $ 20;
run;

data base_15_2;
length code $ 2; /* set length then define the length of var=code */
code='DAL5235646545646465';
code='SAN';
code='HOUS731';
run;

data base_17; /* cut value by first PDV initilize */
input jobcode $ salary name $;
cards;
FLAT1 70000 Bob
FLAT2 60000 Joe
FLAT3 30000 Ann
;
run;
data base_17_1;
set base_17;
if salary>60000 then description='Over 60';
else description='Under 60';
run;


data base_18_1;
input receipts;
total+receipts; /* total is a memory var, not a input var, not in input buffer */
/*initializes total to 0, ignores missing values of receipt, retains the value
of total from one iteration to the next, and adds the value of receipts to total.*/

/*total 0;sum total;*/
/*total=total+receipts;*/
/*total=sum(total,receipts);*/
datalines;
10
23
20
;
run;

data base_18_2;
input receipts;
total+1;
/*initializes total to 0, ignores missing values of receipt, retains the value
of total from one iteration to the next, and adds the value of receipts to total.*/
datalines;
10
23
20
;
run;


data base_18_3;
input receipts;
total=0;
/*initializes total to 0, ignores missing values of receipt, retains the value
of total from one iteration to the next.*/
datalines;
10
23
20
;
run;

data base_18_4;
input receipts id;
/*total=total+receipts;*/
/*total=total+receipts;*/
/*total=sum(total,receipts);*/
datalines;
10 1
23 1
.  1
20 1
;
run;

proc sort data=base_18_4 out=base_18_4aa;
by id;
run;
data base_18_4a;
set base_18_4aa;
by id;
if first.id then total=0;
total+receipts;
/*if last.id then output;*/
run;


data base_18_5;
input receipts;
receipts + total; /* total is a memory var, not a input var, not in input buffer */
/*initializes total to 0, ignores missing values of receipt, retains the value
of total from one iteration to the next, and adds the value of receipts to total.*/

/*total 0;sum total;*/
/*total=total+receipts;*/
/*total=sum(total,receipts);*/
datalines;
10
23
20
;
run;


data base_18_6;
input receipts;
 total1 + receipts + total2; /* total is a memory var, not a input var, not in input buffer */
/*initializes total to 0, ignores missing values of receipt, retains the value
of total from one iteration to the next, and adds the value of receipts to total.*/

/*total 0;sum total;*/
/*total=total+receipts;*/
/*total=sum(total,receipts);*/
datalines;
10
23
20
;
run;

data base_18_7;
input receipts;
total2=0;
 total1 + receipts + total2; /* total is a memory var, not a input var, not in input buffer */
/*initializes total to 0, ignores missing values of receipt, retains the value
of total from one iteration to the next, and adds the value of receipts to total.*/

/*total 0;sum total;*/
/*total=total+receipts;*/
/*total=sum(total,receipts);*/
datalines;
10
23
20
;
run;

data base_18_8;
input receipts;
total1=0; total2=0;
 total1 + receipts + total2; /* total is a memory var, not a input var, not in input buffer */
/*initializes total to 0, ignores missing values of receipt, retains the value
of total from one iteration to the next, and adds the value of receipts to total.*/

/*total 0;sum total;*/
/*total=total+receipts;*/
/*total=sum(total,receipts);*/
datalines;
10
23
20
;
run;



data base_18_9;
input receipts;
a = 2; b=3;
a+b+receipts;
datalines;
10
23
20
;
run;


data base_18_10;
input receipts;
a = 2; b=3;
receipts+a+b;
/*initializes total to 0, ignores missing values of receipt, retains the value */
/*of total from one iteration to the next, and adds the value of receipts to total*/
/*datalines;*/
10
23
20
;
run;


data base_18_11;
input receipts;
a = 2; b=3;
b+ receipts+a;
datalines;
10
23
20
;
run;


data base_18_12;
input receipts;
a = 2;
b+ receipts+a;
datalines;
10
23
20
;
run;

/**/
/*1.1.2 Data Step Flow*/
/*data sales; */
/*infile rawin;*/
/*input name $1-10 division $12 years 15-16 sales 19-25;*/
/*run; */
/*proc print data=sales; */
/*run; */
/*Note: The use of RUN after each step is highly recommended*/
/*A. The Compilation Phase*/
/*When you submit a DATA step for execution, SAS checks the syntax of the SAS statements and compiles them, that is, automatically translates the statements into machine code. In this phase, SAS identifies the type and length of each new variable, and determines whether a type conversion is necessary for each subsequent reference to a variable. During the compile phase, SAS creates the following three items: */
/*input buffer is a logical area in memory into which SAS reads each record of raw data when SAS executes an INPUT statement. Note that this buffer is created only when the DATA step reads raw data. (When the DATA step reads a SAS data set, SAS reads the data directly into the program data vector.)*/
/*program data vector (PDV) is a logical area in memory where SAS builds a data set, one observation at a time. When a program executes, SAS reads data values from the input buffer or creates them by executing SAS language statements. The data values are assigned to the appropriate variables in the program data vector. From here, SAS writes the values to a SAS data set as a single observation. */
/*Along with data set variables and computed variables, the PDV contains two automatic variables, _N_ and _ERROR_. The _N_ variable counts the number of times the DATA step begins to iterate. The _ERROR_ variable signals the occurrence of an error caused by the data during execution. The value of _ERROR_ is either 0 (indicating no errors exist), or 1 (indicating that one or more errors have occurred). SAS does not write these variables to the output data set. */
/*descriptor information is information that SAS creates and maintains about each SAS data set, including data set attributes and variable attributes. It contains, for example, the name of the data set and its member type, the date and time that the data set was created, and the number, names and data types (character or numeric) of the variables. */
/*B. The Execution Phase*/
/*By default, a simple DATA step iterates once for each observation that is being created. The flow of action in the Execution Phase of a simple DATA step is described as follows: */
/*1. The DATA step begins with a DATA statement. Each time the DATA statement executes, a new iteration of the DATA step begins, and the _N_ automatic variable is incremented by 1. */
/*2. SAS sets the newly created program variables to missing in the program data vector (PDV). */
/*3. SAS reads a data record from a raw data file into the input buffer, or it reads an observation from a SAS data set directly into the program data vector. You can use an INPUT, MERGE, SET, MODIFY, or UPDATE statement to read a record. */
/*4. SAS executes any subsequent programming statements for the current record. */
/*5. At the end of the statements, an output, return, and reset occur automatically. SAS writes an observation to the SAS data set, the system automatically returns to the top of the DATA step, and the values of variables created by INPUT and assignment statements are reset to missing in the program data vector. Note that variables that you read with a SET, MERGE, MODIFY, or UPDATE statement are not reset to missing here. */
/*6. SAS counts another iteration, reads the next record or observation, and executes the subsequent programming statements for the current observation. */
/*7. The DATA step terminates when SAS encounters the end-of-file in a SAS data set or a raw data file. */
/**/

/*scan function: break one var to many */
/*this= scan('this is a book', 1, ' ');*/
data base_17_auth;
   input author $ 1-20 ;
datalines;
David F. Drak
David G. Hartwell
Paul  S. Lovecraft  
Horace V. Wadpole  
Stuart D. Schiff
;

data base_17_auth2;
   length first_name middle_name last_name $ 20;
   set base_17_auth;
   first_name=scan(author, 1, ' ');
   middle_name=scan(author, 2, ' ');
   last_name=scan(author, 3, ' ');
run;

data base_17_auth3;
   length first_name middle_name last_name $ 20;
   set base_17_auth;
   first_name=scan(author, 1, '');
   middle_name=scan(author, 2, '');
   last_name=scan(author, 3, '');
run;

data base_17_aa;
   input address1 $ 1-31  address2 $32-64;
   cards;
450 Shepard Ave. TO ON M3C7C1  450, Shepard Ave., TO, ON, M3C7C1
;
run;
data base_17_ab;
   set base_17_aa;
   num1       =scan(address1, 1);
   st_nam1    =scan(address1, 2);
   city_nam1  =scan(address1, 4);
   state_nam1 =scan(address1, 5);
   p_code1    =scan(address1, 6);
run;

proc print data= base_17_ab;
run;
data base_17_ac;
   set base_17_aa;
   num2      =scan(address2, 1, ',');
   st_nam2   =scan(address2, 2, ',');
   city_nam2 =scan(address2, 3, ',');
   state_nam2=scan(address2, 4, ',');
   p_code2   =scan(address2, 5, ',');
run;

data base_17_1;
input ab $ 1-20;
datalines;
12 34 56
;
run;

data base_17_2;
set base_17_1;
a = scan(ab, 1, ' ');
b = scan(ab, 2, ' ');
c = scan(ab, 2, ' ');
run;


data base_17_3;
input ab $ 5-20;
datalines;
12 34 56
;
run;

data base_17_4;
input ab $ 5;
datalines;
12 34 56
;
run;


data base_19_money;
input year quantity;
total=total+quantity; /* == missing */
datalines;
1901 2
1905 1
1910 6
1925 1
1941 1
;
run;


data base_19_2;
input year quantity;
total+quantity;
datalines;
1901 2
1905 1
1910 6
1925 1
1941 1
;
run;


data base_19_3;
input year quantity;
total=a+quantity; /* == missing */
datalines;
1901 2
1905 1
1910 6
1925 1
1941 1
;
run;

data base_19_5;
input year quantity;
total+a;
a+quantity; /* == missing */
datalines;
1901 2
1905 1
1910 6
1925 1
1941 1
;
run;


data base_20_test;
input amt x y z @@;
a = int(x);
a = abs(x);
a = sqrt(x);
b = min(x, y, z);
c = max(x, y, z);
d = sum(x, y, z);
e = mean(x, y, z);
f = round(x, 1);
g = log(x);
previous_a = lag(x);/*LAG: the value of the argument in the previous observation */
difference = dif(x);
/*DIF: the difference between the values of the argument
in the current and previous observations */
n_records = n(x); /*N: the number of non-missing values of the argument */
n_missing = nmiss(x);

/*NMISS: the number of missing values of the argument a = nmiss(x);*/

datalines;
1901 . 1905 1 1910 . 1925 1
;
run;


data base_21_AreaCodes;
Phonenumber=3125551212;
Code='('!!substr(Phonenumber,1,3)!!')';
Code='('||substr(Phonenumber,1,3)||')';
code1= substr(Phonenumber,1,3);
code2= '!!substr(Phonenumber,1,3)';
code3= '!!substr(Phonenumber,1,3)!!';
code4 = ('!!substr(Phonenumber,1,3)!!');
code5 = '('!!substr(Phonenumber,1,3)!!')';
put code1= '-' code2= code3= code4= code5= code=;
run;


data base_21_AreaCodes;
Phonenumber=312;
code = substr(phoneNumber, 1,3);
put code=;
run;

proc contents data=base_21_AreaCodes;
run;

/*format best12. best6. best32.*/
data base_21_1;
x=1257000;
put x best6.;
y=123456789012;
put y best12.;
z=123456789012345789456;
put z best12.;
a=123456789012345789456;
put a ;
c=123456789012345789456;
put c best20.;
d=123456789012345789456;
put d best21.;
e=123456789012345789456;
put e best22.;
f=123456789012345789456;
put f best25.;
run;

/*Operators
eq ne gt lt ge le
^= ~=  >= <=
*/
data base_21_2;
input lastname $;
*************** in *************;
a = 3;
if a in (3, 4, 5) then
b= 4;
put 'b=' b;
***************  compare character **********;
if 'C. Jones' < 'Charles Jones' then
put 'less';
else
put 'more';
/******   compare only the begining letters :, add colon ":" before character
upcase(), lowcase()
*/
if lowcase(lastname) = :'st' then
put lastname;
else
put 'lastname without St init';
************** Logic operators ***************************;
/*&-----AND*/
/*!|--- OR*/
/*^~----Not*/
*****************The Concatenation Operator '||' ************************;
str1 = 'book'; str2='apple';
str3= str1 || ', ' || str2;
put 'str3=str1 || str2= '  str3;
cards;
Students
bk
Story
Mike
St.John
;
run;

data base_21_3;
a = 12; b= 2;
y=+(a*b); z=-(a*b);
put y z;
u = (a><b) ; ******** min;
v = (a<>b) ; ****** max;
put u v;

month='sep   ';
year=99;

date=trim(month) || left(put(year,8.));
date2= trim(month)||year;
put date= date2=;

************** PUT(source, format.) *********;
* identifies the constant, variable, or expression
whose value you want to reformat. The source argument
can be character or numeric. ;

;
run;

/*put function*/
data base_21_4;
a = 9988888;
b = put(a, hex4.);
c = put(a, 2.);
put a= ', ' b= ', ' c= ;
******************* put *********;
numdate=122591;
chardate=put(numdate,z6.);
sasdate=input(chardate,mmddyy6.);
;
run;

************* remove dollar sign, using comma ******** ;
data base_21_5;
input price comma10.2
 @1 price1 comma3.2
 @1 price2 comma4.2
 @1 price3 comma5.2
 @1 price4 comma5.
 @1 price5 comma10.2
 @1 price6 comma7.
;
datalines;
12,500.02
;
run;

/*remove comma , input function*/
proc format;
invalue $saleFormat
'1111'='pass'
                 '2222'='good'
                 '3   '='best'
;
run;
data base_21_6;
**** formatted-val-char =PUTC(char-val,format);
**** formatted-val-char =PUTN(num-val,format);
******  num-value=INPUTN(char-string,num-informat);
******** char-value=INPUTC(char-string,char-informat);

 saleStr = '2,115,353';
  ************* input: str to number *******************;
      saleNum1=input(saleStr,comma9.); * --- inputStr, putNum;
 saleNum2=input(saleStr,comma5.);
 saleNum3=input(saleStr,comma11.);
 put saleNum1 saleNum2 saleNum3;

 ********* put: number to str ;
 saleStr1=put(saleNum1,z6.);
 saleStr2=put(saleNum1,z8.);
 saleStr3=put(saleNum1,z9.);
 saleStr4=put(saleNum1,z10.);
 saleStr5=put(saleNum1,z3.);
 put saleStr1 saleStr2 saleStr3 saleStr4 saleStr5;
/*  2.12E6 02115353 002115353 0002115353 2E6*/


 ************* inputn: str to number *******************;
 ******  num-value=INPUTN(char-string,num-informat);
 ******** char-value=INPUTC(char-string,char-informat);
/*  The INPUTN function enables you to specify a numeric informat at run time.
 Using the INPUT function is faster because you specify the informat at
 compile time.*/
 saleNm1 = inputn(saleStr4, 9.);   ** saleStr4 =  0002115353;
 saleNm2 = inputn(saleStr4, 5.);
 saleNm3 = inputn(saleStr4, 11.);
 put saleNm1 saleNm2 saleNm3 ;  * 211535 21 2115353 ;

 ************* inputc str to number, the format is special *******************;
 salesStr = '1111' ;
 saleGrade = inputc(salesStr, '$saleFormat');
 put saleGrade ;
 salesStr = '2222' ;
 saleGrade = inputc(salesStr, '$saleFormat');
   put saleGrade ;
   run;

/* loop */
data base_23a;
do until(count GT 5);
      put count=;
      count+1;
end;
run;
/********************** output  **********************************/
/*count=0*/
/*count=1*/
/*count=2*/
/*count=3*/
/*count=4*/
/*count=5*/


data base_23a;
do until(count GT 5);
      put count=;
      count=count+1;
end;
run;
/*output: Undetermined, infinite loop, missing: count=.*/

data base_23_1;
count=0;
do until(count>5);
      put count=;
      count=count+1;
end;
count=0;
do while(count<5);
      put count=;
      count=count+1;
   end;
do count=1 to 10;
      Capital+5000;
      capital+(capital*.10);
 put capital=;
end;

do count=2,3,5,7,11,13,17;
   put count=;
end;

do i=1 to 101 by 2;
put i=;
end;
do count=288 to 1 by -2;
put count=;
end;


prod = 1;
do count=1 to 10;
prod = prod *count;
put prod=;
end;

n=10;
do i=n to 1 by -1;
put i=;
end;
do count=2 to 18 by 2;
put count=;
end;
run;

data base_23_2b;
do count=2 to 18 by 2;
put "XXXXXXXXX"  count=;
end;
run;

data base_23_2;
/*Example 1: Defining Arrays {} [] () */
array rain {5} janr febr marr aprr mayr;
array rain1{3} $ _TEMPORARY_ ('small', 'heavy', 'light'); * without create data in the output dataset ;
put _TEMPORARY_;
********************* ;
array days{7} d1-d7 (1,2,3,4,5,6,7); ** init value should put in "()";
do i=1 to 7;
    put 'days[i]=' days[i] = ;
end;
array book{3} $b1-b3 ('b1', 'b2', 'b3');
i=1;
do while(i<4);
   put book[i]=;
   i+1;
end;
array book1[3] b1 b2 b3 ('c1', 'c2', 'c3');
i=1;
do while(i<4);
   put book1[i]=;
   i+1;
end;
array book2(3) b1-b3 ;
array book3(*) b1-b3 ;
array sizes[*] petite small medium large extra_large (2, 4, 6, 8, 10);
***************** dimension ;
dimA = dim(days); dimBook1 =dim(book1) ; dimBook2 =dim(book2);
 dimBook3 =dim(book3);
put 'dimA=' a 'dimBook1=' dimBook1  'dimBook2=' dimBook2 'dimBook3=' dimBook3;
i= 1;
do while(i<6);
put 'size=' sizes[i] = ;
i+1;
end;
array cities[4] $10 ('New York' 'Los Angeles' 'Dallas' 'Chicago');

/*Example 2: Assigning Initial Numeric Values */

array test{4} t1 t2 t3 t4 (90 80 70 70);
/*Example 3: Defining Initial Character Values */
/*array test2{*} $3 ('a','b','c');*/
/*Example 4: Defining More Advanced Arrays */
array x{5,3} score1-score15;
run;

******************************
* _Temporary_ array     *
******************************/;
data base_23_3;
input x1-x4;
cards;
6 9 99 99
8 11 69 99
9 9 99 99
;
data base_23_4;
   set base_23_3 ;
      array  x  x1-x4;
      array  mx(4) _temporary_ (9 9 99 99) ;
      do i=1 to 4;
         if x[i]= mx{i}  then x[i]=.;
      end;
run;

data base_23_5;
   input term balance;
cards;
1   5000
2   3000
3   10000
4   8000
;
proc format ;
   value term 1='Three Month'
              2='Six Month'
 3='One Year'
              4='Two Year';
run;
data base_23_6;
   set base_23_5;
   array rate [4] _temporary_  (3.25 3.5 4.15 4.5) ;
   do i=1 to 4;
   if term =1 then balance=balance +balance*rate[i]/1000;
   else if term =2 then balance=balance +balance*rate[i]/1000;
   else if term =3 then balance=balance +balance*rate[i]/1000;
   else if term =4 then balance=balance +balance*rate[i]/1000;
   end;
run;


data base_25;
input style $ price comma7.;
datalines;
condo 256,789
condo 156,789
condo 356,789
condo 756,789
condo 256,789
single 786,456
single 986,456
single 586,456
single 386,456
single 186,456
semi  456,345
semi  656,345
semi  356,345
semi  256,345
. 234,456
condo .
semi .
;
run;

proc freq data=base_25;
tables style*price /nocum;
by style;
format price dollar10.;
label style="Style of homes"
price="Asking price";
run;

proc freq data=base_25;
tables style;
tables price;
run;
proc freq data=base_25;
tables style*price;
format price dollar10.;
label style="Style of homes"
price="Asking price";
run;

proc freq data=base_25;
tables style price;
format price dollar10.;
label style="Style of homes" price="Asking price";
run;

proc print data=base_25 ;
var price;
label style="Style of homes"
price="Asking price";
run;

proc means data=base_25 n mean;
class style;
var price;
format price dollar10.;
label style="Style of homes"
price="Asking price";
run;

proc report data=base_25 nowd headline;
column style n price;
define style / group "Style of homes";
define price / mean format=dollar8.
"Asking price";
run;


data base_33;
input animal1 $ animal2 $ mlgrams1 mlgrams2;
put animal1  animal2  mlgrams1 mlgrams2;
cards;
hummingbird ostrich 54000.39 90800000.87
;
run;

data base_33_1;
input animal1 $ animal2 $ mlgrams1 mlgrams2;  * max 12 digit, with decimal;
put animal1  animal2  mlgrams1 mlgrams2;
cards;
hummingbird ostrich 123456789.2345 123456789112  
;
run;
/* output: hummingb ostrich 123456789.23 123456789112*/

data base_33_2;
input animal1 $16.  mlgrams1 best32.;
put animal1   mlgrams1;
cards;
hummingbirdddddx 123456789.1125  
;
run;
/*output : hummingbirdddddx 123456789.11*/

data base_33_3;
input animal1 $16.  mlgrams1 best32.;
format mlgrams1 best32.;
put animal1   mlgrams1;
cards;
hummingbirdddddx 1234567891125454654654.356454521  
;
run;

data base_33_4;
input animal1 $16.  mlgrams1 18.;
put animal1   mlgrams1;
cards;
hummingbirdddddx 123456789112125  
;
run;

/*merge: before merge should sort*/
data base_34_a;
input fname $ age;
datalines;
Bruce 30
Dan 40
;
run;

data base_34_b;
input fname $ salary;
datalines;
Bruce 25000
Bruce 35000
Dan 25000
;
run;

data base_34_1;
merge base_34_a base_34_b; **  blind merge, by record order;
run;

/*first sort data, then merge: if sort by descending then merge by descending */
proc sort data=base_34_b out=base_34_b1;
by descending fname ;
run;

proc sort data=base_34_a out=base_34_a1;
by descending fname ;
run;


data base_34_5;
merge base_34_b1 base_34_a1;
by descending fname; ******** when merge, add "by descending fname" *********;;;
run;

/*ascending merge*/
proc sort data=base_34_b out=base_34_b2;
by  fname ;
run;

proc sort data=base_34_a out=base_34_a2;
by fname ;
run;


data base_34_5;
merge base_34_b2 base_34_a2;
by fname; ******** when merge, add "by descending fname" *********;;;
run;

data base_34_6;
set base_34_5; * the new dataset did not sort ;
by fname;
if first.fname then total=0;
total+salary;
if last.fname then output;
run;


data base_37;
length jobcode $ 5;
input jobcode;
datalines;
12345
45678
;
run;
data base_37_1;
length jobcode $ 12;
set base_37;
run;
proc contents data=base_37_1;
run;

data base_37_2;
set base_37;
length jobcode $ 12;
run;
proc contents data=base_37_2;
run;

data base_37_3;
length course $ 8. school $ 4.;
input  course school score best15.  year;
cards;
English NT   123456789191112 1998
English NSS  123456789191112 1995
;
run;


data base_37_4;
length course $ 8. school $ 4.;
input  course school score  year;
cards;
English NT   123456789191112 1998
English NSS  123456789191112 1995
;
run;

data base_37_5;
length course $ 8. school $ 4.;
input  course school score 13-28 year;
put course= school= score= best24. year=;
cards;
English NT  123456789191112 1998
English NSS 123456789191112 1995
;
run;



data base_37_6;
length course $ 8. school $ 4.;
input  course school score 13-28 year;
put course= school= score= best12. year=;
cards;
English NT  123456789191112 1998
English NSS 123456789191112 1995
;
run;



data base_37_7;
length course $ 8. school $ 4.;
input  course school score 13-28 year;
put course= school= score=  year=;
cards;
English NT  123456789191112 1998
English NSS 1.23456789191112 1995
;
run;



data base_37_8;
length course $ 8. school $ 4.;
input  course school score 13-35 year;
put course= school= score= best24.  year=;
cards;
English NT  123456789191112        1998
English NSS 1.23456789191118888882 1995
;
run;



DATA base_39;
  INPUT id female race ses schtype $ prog
        read write math science socst;
DATALINES;
 147 1 1 3 pub 1 47  62  53  53  61
 108 0 1 2 pub 2 34  33  41  36  36
  18 0 3 2 pub 3 50  33  49  44  36
 153 0 1 2 pub 3 39  31  40  39  51
  50 0 2 2 pub 2 50  59  42  53  61
  51 1 2 1 pub 2 42  36  42  31  39
 102 0 1 1 pub 1 52  41  51  53  56
  57 1 1 2 pub 1 71  65  72  66  56
 160 1 1 2 pub 1 55  65  55  50  61
 136 0 1 2 pub 1 65  59  70  63  51
  88 1 1 1 pub 1 68  60  64  69  66
 177 0 1 2 pri 1 55  59  62  58  51
  95 0 1 1 pub 1 73  60  71  61  71
 144 0 1 1 pub 2 60  65  58  61  66
 139 1 1 2 pub 1 68  59  61  55  71
 135 1 1 3 pub 1 63  60  65  54  66
 191 1 1 1 pri 1 47  52  43  48  61
 171 0 1 2 pub 1 60  54  60  55  66
  22 0 3 2 pub 3 42  39  39  56  46
  47 1 2 3 pub 1 47  46  49  33  41
  56 0 1 2 pub 3 55  45  46  58  51
 128 0 1 1 pub 1 39  33  38  47  41
  36 1 2 3 pub 2 44  49  44  35  51
  53 0 2 2 pub 3 34  37  46  39  31
  26 1 4 1 pub 1 60  59  62  61  51
;
RUN;

/*Preventing the output from sending to SAS output window: */
ODS listing close;
/*Opening ODS statement: */
ODS html/pdf/rtf file="";
/*Closing ODS statement: */
ODS html/pdf/rtf close;
/*Letting the output back to SAS output window: */
ODS listing;



%let projroot = E:\ChenyxFiles\2014.Study.TrainingCertificate\SAS.Victoria\SAS.Exam\SAS.Base.Code ;
%let projfolder = sasexport;
%let prjPath = &projroot\&projfolder;

filename myhtml "&prjpath\procreg.html";
ods html body=myhtml;
proc reg data=base_39;
  model write= female math;
run;
quit;
ods html close;

************** PDF output ;
ODS PDF file = "&prjpath\base_36.PDF";
 /* SAS procedures go here */
proc reg data=base_39;
  model write= female math;
run;
quit;
ODS PDF close;


ODS html file = "&prjpath\base_36.html";
 /* SAS procedures go here */
proc reg data=base_39;
  model write= female math;
run;
quit;
ODS PDF close;

/*use ODS output excel file*/
ODS HTML FILE="&prjPath\base_39.xls";
PROC PRINT DATA=SASHELP.CLASS;
RUN;
ODS HTML CLOSE;


libname xlsFile "&prjPath\base_39_2.xls";
data xlsFile.base_39_sheet0;
 set base_39;
run;

proc sql;
create table xlsFile.base_39_sheet1 as
 select * from base_39;
libname xlsFile clear;


ODS CSV file="&prjPath\base_39.csv";;
proc print data=SASHELP.CLASS;
run;
ODS CSV close;


ODS CSVALL file="&prjPath\base_39b.csv";;
proc print data=SASHELP.CLASS;
run;
ODS CSVALL close;

/*for libname, date is troublesome, make sure have some change*/
/*data datetest; */
/* format date_formatted date9.; */
/* date_formatted="13MAY2004"d; */
/* date_unformatted="04JUL2004"D; */
/*run; */
/*libname myxls "c:\demodate.xls" ; */
/* */
/*data myxls.tab2 ; */
/* set datetest; */

LIBNAME xFile1 EXCEL PATH="&prjpath\base_39_4.xlsx";
data xFile1.base_39_sheet;
 set base_39;
run;
LIBNAME xFile2 "&prjpath\base_39_5.xlsx";
data xFile2.base_39_sheet;
 set base_39;
run;

LIBNAME asFile "&prjpath\base_39_4.accdb";
data asFile.base_39_sheet;
 set base_39;
run;


data base_41_1;
input
@1 date_of_birth mmddyy10.
@12 first_name $5.
@18 age 3.;
datalines;
01/05/1989 Frank 11
12/25/1987 June  13
01/05/1991 Sally 9
run;
proc print data=base_41_1 noobs;
run;


data base_41_1;
input
@1 date_of_birth mmddyy10.
@12 first_name $5.
@18 age 3.;
datalines;
01/05/1989 Frank 11
12/25/1987 June  13
01/05/1991 Sally 9
run;
proc print data=base_41_1 noobs;
run;


data base_41_2;
input
@1 date_of_birth mmddyy10.
@12 first_name $5.
@18 age 10.;
datalines;
01/05/1989 Frank 11
12/25/1987 June  13
01/05/1991 Sally 9
run;
proc print data=base_41_2 noobs;
format date_of_birth mmddyy10.;
run;


data base_41_a;
input productId $ price date :mmddyy10.;
datalines;
book 25000 12/23/2014
car 35000  12/23/2014
glass 25000  12/23/2014
toller 3448  12/23/2014
roller 5677 12/23/2014
;

data base_41_b;
input productId $ country $;
datalines;
book china
car U.S
glass Canada
computer US
firewall US
;

proc sort data=base_41_a out=base_41_a;
by productId;
run;


proc sort data=base_41_b out=base_41_b;
by productId;
run;

data base_41;
merge
base_41_a(in=ina)
base_41_b(in=inb);
by productId;
if ina=0 or inb=0;
run;
proc print data=base_41;
run;


data base_41_2;
merge
base_41_a(in=ina)
base_41_b(in=inb);
by productId;
if ina=0 and inb=0;
run;
proc print data=base_41_2;
run;


data base_41_3;
merge
base_41_a(in=ina)
base_41_b(in=inb);
by productId;
if ina=1 or inb=0 then output;
else a = productID;
run;
proc print data=base_41_3;
run;



data base_41_4 base_41_4a base_41_4b ;
merge
base_41_a(in=ina)
base_41_b(in=inb);
by productId;
if ina=1 and inb=1 then output base_41_4;
if productId eq book then output base_41_4a;
if productId eq car then output base_41_4b;
output;
run;
proc print data=base_41_4;
run;
proc print data=base_41_4a;
run;


data base_43_a  base_43_b(drop=country );
set base_41_a(keep=productID price date);
if productID='book' then output base_43_a;
else if productID='car' then output base_43_b;
run;



proc print data=base_43_a;
run;


proc print data=base_43_b;
run;

/*input and output multiple dataset*/
data base_43_c base_43_d;
set base_41_a(keep=productID price date) base_41_b;
if productID='book' then output base_43_c;
else output base_43_d(drop=date country);
run;

/*use keep= and rename=() together*/
data base_43_c base_43_d;
set base_41_a(keep=productID price date rename=(price=prc)) base_41_b;
if productID='book' then
output base_43_c;
else
output base_43_d;
run;


proc print data=base_43_c;
run;

proc print data=base_43_d;
run;

/*keep/drop during data output, and set statement*/
data base_43_c base_43_d(drop=date country);
set base_41_a(keep=productID price date) base_41_b;
if productID='book' then output base_43_c;
else output base_43_d;
run;


proc print data=base_43_c;
run;

proc print data=base_43_d;
run;

/*base_45*/
proc contents data=sasuser._all_ nods;
run;

proc contents data=sasuser._all_;
run;
proc contents data=sasuser.CT;
run;
proc print data=sasuser.CT;
run;

proc contents data=sasuser.CT nods;
run;

proc print data=sasuser.CT;
run;

data work.test;
length city $20;
city='Paris';
city2=trim(city);
len = length(city2);
put len=;
run;

proc contents data=test;
run;


data base_49_a;
input productId $ price date mmddyy10.;
datalines;
book 25000 12/89/2014
car 35000 12/23/2014
glass 25000 12/23/2014
toller 3448 12/23/2014
roller 5677 12/23/2014
;

proc print data=base_49_a;
format date mmddyy10.;
run;


************************************************************ ;
/*Base 70. */
************************************************************ ;

data b70_2;
length Name $ 4 Month $ 3 Status $ 7;
infile "&prjpath\infile_3.TXT" DLM=',' ;
input Name $ Month $ @;
if Month='FEB' then input Week1 Week2 Week3 Week4 Status $;
else if Month='MAR' then input Week1 Week2 Week3 Week4 Week5 Status $;
format Week1-Week5 dollar6.;

/*John,FEB,13,25,14,27,Final*/
/*John,MAR,26,17,29,11,23,Current*/
/*Tina,FEB,15,18,12,13,Final*/
/*Tina,MAR,29,14,19,27,20,Current*/
run;
proc print data=b70_2;
run;


data b70_2_1;
length Name $ 4 Month $ 3 Status $ 7;
infile "&prjpath\infile_3.TXT" DLM=',' ;
input Name $ Month $ @@;
if upcase(Month)='FEB' then input Week1 Week2 Week3 Week4 Status $;
else if upcase(Month)='MAR' then input Week1 Week2 Week3 Week4 Week5 Status $;
format Week1-Week5 dollar3.;
/*John,FEB,13,25,14,27,Final*/
/*John,MAR,26,17,29,11,23,Current*/
/*Tina,FEB,15,18,12,13,Final*/
/*Tina,MAR,29,14,19,27,20,Current*/
run;
proc print data=b70_2_1;
run;


data b70_2_2;
length Name $ 4 Month $ 3 Status $ 7;
infile "&prjpath\infile_3.TXT" DLM=',' ;
input Name $ Month $ @;
if upcase(Month)='FEB' then input Week1 Week2 Week3 Week4 Status $;
else if upcase(Month)='MAR' then input Week1 Week2 Week3 Week4 Week5 Status $;
format Week1-Week5 dollar4.;
/*John,FEB,13,25,14,27,Final*/
/*John,MAR,26,17,29,11,23,Current*/
/*Tina,FEB,15,18,12,13,Final*/
/*Tina,MAR,29,14,19,27,20,Current*/
run;
proc print data=b70_2_2;
run;


data b70_2_2;
length Name $ 4 Month $ 3 Status $ 7;
infile "&prjpath\infile_3.TXT" DLM=',' ;
input Name $ Month $ ;
if upcase(Month)='FEB' then input Week1 Week2 Week3 Week4 Status $;
else if upcase(Month)='MAR' then input Week1 Week2 Week3 Week4 Week5 Status $;
format Week1-Week5 dollar4.;
/*John,FEB,13,25,14,27,Final*/
/*John,MAR,26,17,29,11,23,Current*/
/*Tina,FEB,15,18,12,13,Final*/
/*Tina,MAR,29,14,19,27,20,Current*/
run;
proc print data=b70_2_2;
run;


/****************** b70_3 **********************/
%put "&prjpath\regions.xls";
%put '&prjpath\regions.xls'; ************* Error ;

libname MYXLS "&prjpath\regions.xlsx";
proc print data= MYXLS."north$"n;
run;

libname MYXLS2 "&prjpath\regions.xlsx";
proc print data= MYXLS2."north$"n;
run;

data b70_3_1;
set myxls."north$"n;
run;

data b70_3_2;
set myxls.'north$'n;
run;

proc export data=b70_3_2
   outfile="&prjpath\Prices.xls"
   DBMS=Excel2000 replace;
run;

proc export data=b70_3_2
        outfile="&prjpath\Prices.txt"
        dbms=dlm replace;
        delimiter=',';
run;

/*Exporting a Microsoft access table*/;
proc export data=b70_3_2
     outtable="price"
     dbms=access
     replace;
     database="&prjpath\Prices.mdb";
run;

data b70_4;
Day="01" ;
Yr=1969 ;
X=mdy(Day,01,Yr) ;
Y=mdy("12","12","1987") ;
Format Y date9. X date9.;
put X= Y=;
run;

data b70_4_1;
Day="02" ;
Yr=1960 ;
X=mdy(Day,01,Yr) ;
format X date9.;
put X=;
run;

data b70_4_3;
mon="05" ;
Yr=1960 ;
X=mdy(mon,01,Yr) ;
put X=;
run;

data b70_4_4;
mon="05" ;
Yr=1960 ;
X=mdy(mon,01,Yr) ;
put X=;
run;


data b70_4_5;
mon="05" ;
Yr="1960" ;
X=mdy(mon,01,Yr) ;
put X=;
run;

/*control the obs: first obs, obs=number_of_obs*/
data b70_5;
infile "&prjpath\70.txt" firstobs=1 obs=2;
input id $ name $  dept $ project $;                                        
/*datalines;                                                                   */
/*000 Miguel A12 Document                                                         */
/*111 Fred B45 Survey                                                             */
/*222 Diana B45 Document                                                          */
/*888 Monique A12 Document                                                        */
/*999 Vien D03 Survey                */
run;
proc print data=b70_5;
run;


data b70_6;
input id $ name $  dept $ project $ price;                                        
datalines;                                                                  
000 Miguel A12 Document 2
111 Fred B45 Survey 3
222 Diana B45 Document 5
888 Monique A12 Document 9
999 Vien D03 Survey 11
1999 Vien D03 Survey 12
909 Vien D03 Survey 15
899 Vien D03 Survey 17
run;
proc print data=b70_6(firstobs=3);
run;


/*The number of records = (obs - firstobs + 1);*/

proc print data=b70_6(firstobs=2); * start from 2, list all record ;
run;

proc print data=b70_6(obs=2); * start from 1, total 2 record;
run;

proc print data=b70_6 (firstobs=2 obs=4); * which means start from 2, total is 2,3,4;
run;

proc print data=b70_6 (firstobs=7 obs=8);
run;



data b70_6_1;
set b70_6 (firstobs=5 obs=5);   * only 5;
run;

proc print data=b70_6_1;
run;

/*keep a b c d;*/
data b70_6_5;
set b70_6(keep=id name dept project);
comb = dept || project;
keep id comb;
run;

data b70_6_6;
set b70_6(keep=id name dept project);
keep id;
run;
proc print data=b70_6_6;
run;

data b70_6_7;
set b70_6(drop=project);
drop id;
run;
proc print data=b70_6_7;
run;

data b70_6_8(keep=id name);
set b70_6(drop=project);
run;
proc print data=b70_6_8;
run;

data b70_6_9(keep=id name);
set b70_6(drop=project);
drop id;
run;
proc print data=b70_6_9;
run;

data b70_8;
input id $ name $  dept $ project $ price;                                        
datalines;                                                                  
000 Miguel A12 Document 2
000 Miguel A12 Document 4
000 Miguel A12 Document 5
111 Fred B45 Survey 3
111 Fred B45 Survey 453
111 Fred B45 Survey 5
222 Diana B45 Document 15
222 Diana B45 Document 59
222 Diana B45 Document 45
888 Monique A12 Document 79
888 Monique A12 Document 89
888 Monique A12 Document 19
999 Vien D03 Survey 11
1999 Vien D03 Survey 12
909 Vien D03 Survey 15
899 Vien D03 Survey 17
run;

proc sort data=b70_8;
by name;
run;

data b70_8_1;
set b70_8;
by name;   * this is a group var, first sort var according to grouping var;
if FIRST.name then payroll=0; * using the first grouping var;
payroll + price;
if LAST.name then output; * using the last grouping var;
run;
proc print data=b70_8_1;
run;

ods tagsets.excelxp file='test.xml'
options(doc='help');


data b70_10;
Cost='$20,000';
Discount=.10*Cost; /* cost convert to numeric and become missing*/
run;

data b70_10_1;
Cost='890876jk';
Discount=.10*Cost; /* cost convert to numeric and become missing*/
x =  Discount || "xxxxxxxxx";  * character || . = character.;
y = Discount * 8;  * numeric * . = . missing;
put x= y=;
run;

data b70_10_3;
Cost='$20,000';
Discount=.10*Cost; /* cost convert to numeric and become missing*/
Cost1='$120,000';
Discount=.10*Cost; /* cost convert to numeric and become missing*/
Cost2='$220,000';
Discount3=.10*Cost; /* cost convert to numeric and become missing*/
run;

/*b70_11*/
proc format;
value agegrp
low-12 ='Pre-Teen'
13-high = 'Teen';
run;

proc means data=SASHELP.CLASS;
var Height;
class Sex Age;
format Age agegrp.;
run;

proc means data=SASHELP.CLASS min max mean maxdec=11;  * maxdec=the decimal position;
var Height;
class Sex Age;
format Age agegrp.;
run;


proc means data=SASHELP.CLASS mean min max maxdec=4;  * maxdec=the decimal position;
var Height;
class Sex Age;
format Age agegrp.;
run;


data b70_14 / debug;
input id $ name $  dept $ project $ price;                                        
datalines;                                                                  
000 Miguel A12 Document 2
111 Fred B45 Survey 3
222 Diana B45 Document 5
888 Monique A12 Document 9
999 Vien D03 Survey 11
1999 Vien D03 Survey 12
909 Vien D03 Survey 15
899 Vien D03 Survey 17
run;

/* using debugger: examine var, examine _all_,
break lineNo. --> go (to line number)
*/
data b70_14_2 (drop=type); * /debug;
   input @1 type $ @;
   if type='H' then do;
      input @3 Tour $20.;
      return;
      end;
   else if type='P' then do;
      input @3 Name $10. Age 2. +1 Sex $1.;
      output;
      end;
   datalines;
H Tour 101
P Mary E    21 F
P George S  45 M
P Susan K    3 F
H Tour 102
P Adelle S  79 M
P Walter P  55 M
P Fran I    63 F
;

data b70_14_2 ; * /debug;
   input @1 type $ @;
   if type='H' then do;
      input @3 Tour $20.;
 output;
      return;  /* did not output but return to input syntax */
      end;
   else if type='P' then do;
      input @3 Name $10. Age 2. +1 Sex $1.;
      output;
      end;
   datalines;
H Tour 101
P Mary E    21 F
P George S  45 M
P Susan K    3 F
H Tour 102
P Adelle S  79 M
P Walter P  55 M
P Fran I    63 F
;

data b70_14_2 ; * /debug;
   input @1 type $ @;
   if type='H' then do;
      input @3 Tour $20.;
 output;
 end;
   else if type='P' then do;
      input @3 Name $10. Age 2. +1 Sex $1.;
      output;
      end;
   datalines;
H Tour 101
P Mary E    21 F
P George S  45 M
P Susan K    3 F
H Tour 102
P Adelle S  79 M
P Walter P  55 M
P Fran I    63 F
;


proc print data=b70_14_2;
   title 'Tour List';
run;

data b70_14_3 (drop=type) ; */debug;
retain Tour;
input @1 type $ @;
   if type='H' then do;
      input @3 Tour $20.;
      return;
      end;
   else if type='P' then do;
      input @3 Name $10. Age 2. +1 Sex $1.;
      output;
      end;
   datalines;
H Tour 101
P Mary E    21 F
P George S  45 M
P Susan K    3 F
H Tour 102
P Adelle S  79 M
P Walter P  55 M
P Fran I    63 F
;

proc print data=b70_14_3;
   title 'Tour List';
run;


data b70_14_4 (drop=type) ; */debug;
*retain Tour;
input @1 type $ @;
   if type='H' then do;
      input @3 Tour $20.;
 output; * do not output as the data require, it is flexible in data step;
      end;
   else if type='P' then do;
      input @3 Name $10. Age 2. +1 Sex $1.;
      output;
      end;
   datalines;
H Tour 101
P Mary E    21 F
P George S  45 M
P Susan K    3 F
H Tour 102
P Adelle S  79 M
P Walter P  55 M
P Fran I    63 F
;

proc print data=b70_14_4;
   title 'Tour List';
run;

/*the foreigner give the test, very small details, need carefull*/
data b70_17;
infile datalines;
input City $20.;
if City='Tulsa' then
State='OK';
Region='Central';
if City='Los Angeles' then
State='CA';
Region='Western';
datalines;
Tulsa
Los Angeles
Bangor
;
run;

proc print data=b70_17;
   title 'City List';
run;


/* _N_ and _ERROR_.
The _N_ variable counts the number of times the DATA step begins to iterate.
The _ERROR_ variable signals the occurrence of an error caused by the data
during execution. The value of _ERROR_ is either 0 (indicating no errors exist),
or 1 (indicating that one or more errors have occurred). */

data b70_19/debug;
input num char $;
datalines;
1 23
3 23
1 77
run;

proc print data=b70_19 ;
where num='1';
run;

data b70_20;
format localFee 9.5 countryFee 7.5;   /* which means total 9 digit, inlcude 5 decimal
which means total 7 digit, 5 decimal, round at the decimal */
input localFee countryFee;
datalines;
1234.56789 12.34567
1234.56789 12.34567
123456789 1234567
123456789 1234567
;
run;

proc print data=b70_20;
run;
/*output result*/
/* Obs localFee   countryFee */
/* 1   1234.5679  12.3457    */

data b70_20_1;
format LocalFee CountryFee percent7.2;
set b70_20;
LocalFee=LocalFee/100;
CountryFee=CountryFee/100;
run;

proc print data=b70_20_1;
run;
/********************output result*/
/*Obs LocalFee CountryFee */
/*1   1235%    12.3% */




data b70_20_2;
format LocalFee CountryFee percent4.2;
set b70_20;
LocalFee=LocalFee/100;
CountryFee=CountryFee/100;
run;

proc print data=b70_20_2;
run;
/********************output result*/
/*Obs LocalFee CountryFee */
/*1 *%   *%  */

/********************* PERCENTw.d         Format */
/*w --- The width of the output field must account for the percent sign (% ) */
/*  default 6, range 4-32*/
/*d--- the number of digits to the right of the decimal point , range 0-31*/

data b70_20_3;
format LocalFee CountryFee percent5.2;
set b70_20;
LocalFee=LocalFee/100;
CountryFee=CountryFee/100;
run;

proc print data=b70_20_3;
run;
/********************output result*/
/*Obs LocalFee CountryFee */
/*1   **%      12%   */



data b70_20_4;
format LocalFee CountryFee percent6.2;
set b70_20;
LocalFee=LocalFee/100;
CountryFee=CountryFee/100;
run;

proc print data=b70_20_4;
run;
/********************output result*/
/*Obs LocalFee CountryFee */
/*1   1E3%     12%        */
/*1.2 --------->  120%*/
/* -0.05 ----->  (      5%)*/
/*http://support.sas.com/documentation/cdl/en/lrdict/64316/HTML/default/viewer.htm#a000205182.htm*/


data b70_20_5;
input prodNo salary  :dollar11.2  ;
datalines;
2355  $34,072.12
;
run;
proc print data=b70_20_5;
run;


data b70_20_6;
input prodNo 9. salary  :dollar11.  ;   * cause error ;
datalines;
235500 $34,072.12
;
run;

data b70_20_6;
input prodNo 9. salary  :dollar11.  ;   * correct ;
datalines;
235500           $34,072.12
;
run;

data b70_20_6;
input prodNo 9. salary  dollar11.  ;   * error,  without ":" , free format ;
datalines;
235500           $34,072.12
;
run;

data b70_20_6;
input prodNo 9. salary  :dollar11.  ;   * error,  without ":" , free format ;
datalines;
             235500           $34,072.12
;
run;


data b70_20_6;
input prodNo :9. salary  :dollar11.  ;   * correct, use ":" , free format ;
datalines;
             235500           $34,072.125988
;
run;
proc print data=b70_20_6;
run;
/*****  output       */
/* Obs prodNo salary */
/* 1   235500 34072.13 */



data b70_20_6;
input prodNo :9. salary  :dollar15.4  ;   * correct, use ":" , free format ;
datalines;
             235500           $34,072.125988
;
run;
proc print data=b70_20_6;
format salary dollar15.4;
run;
/*****  output       */
/*Obs prodNo salary */
/*1   235500 $34,072.1260 */


data b70_21;
input ProdId $ Price ProductType $ Sales Returns;
datalines;
K12S 95.50 OUTDOOR 15 2
B132S 2.99 CLOTHING 300 10
R18KY2 51.99 EQUIPMENT 25 5
3KL8BY 6.39 OUTDOOR 125 15
DY65DW 5.60 OUTDOOR 45 5
DGTY23 34.55 EQUIPMENT 67 2
run;
proc print data=b70_21;
run;

data WORK.OUTDOOR WORK.CLOTH WORK.EQUIP;
set b70_21;
if Sales GT 30;
if ProductType EQ 'OUTDOOR' then output WORK.OUTDOOR;
else if ProductType EQ 'CLOTHING' then output WORK.CLOTH;
else if ProductType EQ 'EQUIPMENT' then output WORK.EQUIP;
run;

/*the if sales GT 30 without else, equals if .....*/
data WORK.OUTDOOR WORK.CLOTH WORK.EQUIP Work.Others;
set b70_21;
if Sales GT 30 then
do;
if ProductType EQ 'OUTDOOR' then output WORK.OUTDOOR;
else if ProductType EQ 'CLOTHING' then output WORK.CLOTH;
else if ProductType EQ 'EQUIPMENT' then output WORK.EQUIP;
end;
else
do;
output work.others;
end;
run;


data WORK.OUTDOOR WORK.CLOTH WORK.EQUIP Work.Others;
set b70_21;
if Sales GT 30;  * equals to if() then, all the rest is then clause;
do;
if ProductType EQ 'OUTDOOR' then output WORK.OUTDOOR;
else if ProductType EQ 'CLOTHING' then output WORK.CLOTH;
else if ProductType EQ 'EQUIPMENT' then output WORK.EQUIP;
end;
output work.others;
run;

proc print data=WORK.OUTDOOR;
run;

proc print data=WORK.Others;
run;

proc contents data=WORK._all_; run;

data b70_24;
infile datalines dlm='*' dsd;
input name $ date :mmddyy10. visits ;
datalines;
Susan*12/29/1970*10
Michael**6
;
run;
proc print data=b70_24;
run;

data b70_24 ;
infile datalines dlm='*';
input name $ date :mmddyy10. visits ;
datalines;
Susan*12/29/1970*10
Michael**6
;
run;

proc print data=b70_24;
run;

data b70_24;
infile datalines dsd dlm='*'; * if delimeter is ',' then neglect, otherwise add dlm="!@#$";
input name $ date :mmddyy10. visits ;
datalines;
Susan*12/29/1970*10
Michael**6
;
run;


data b70_24;
infile datalines dsd dlm='*'; * if delimeter is ',' then neglect, otherwise add dlm="!@#$";
input name $ date :mmddyy10. visits;
datalines;
Susan*12/29/1970*10
Michael**6
;
run;


data b70_24;
infile datalines dsd dlm='*'; * if delimeter is ',' then neglect, otherwise add dlm="!@#$";
input name $ date :mmddyy10. visits :2.;
datalines;
Susan*12/29/1970*10
Michael**6
;
run;


/*DSD (Delimited Separated Data) - It has three functions when reading
delimited files. The first function is to strip off any quotes that surround
values in the text file. The second function deals with missing values.
When SAS encounters consecutive delimiters in a file, the default action is to
treat the delimiters as one unit. If a file has consecutive delimiters,
it抯 usually because there are missing values between them. DSD tells SAS to
treat consecutive delimiters separately; therefore, a value that is missing
between consecutive delimiters will be read as a missing value when DSD is
specified. The third function assumes the delimiter is a comma.
If DSD is specified and the delimiter is a comma, the DLM= option is not necessary.
If another delimiter is used, the DLM= option must be used as well*/

proc print data=b70_24;
run;

proc print data=SASHELP.CLASS(firstobs=5 obs=15);
where Sex='M';
run;

proc print data=SASHELP.CLASS(firstobs=5 obs=15);
where Sex='F';
run;

proc print data=SASHELP.CLASS(firstobs=5 obs=150);
run;



data b70_26;
input ProdId $ Price ProductType $ Sales Returns;
datalines;
K12S 95.50 OUTDOOR 15 2
B132S 2.99 CLOTHING 300 10
B132S 2.99 EQUIPMENT 25 5
3KL8BY 6.39 OUTDOOR 125 15
DY65DW 5.60 OUTDOOR 45 5
DGTY23 34.55 EQUIPMENT 67 2
run;

proc sort data=b70_26 out=b70_26_1;
by prodId descending price;
run;

proc sort data=b70_26 out=b70_26_1;
by descending price prodId;
run;


proc sort data=b70_26 out=b70_26_1;
by descending price prodId Sales;
run;
proc print data=b70_26_1;
run;

data b70_28_a;
input Location $ Pop2000;
cards;
Alaska 626931
Delaware 783595
Vermont 608826
Wyoming 493782
;
run;
data b70_28_b;
input State $ Pop2008;
cards;
Alaska 686293
Delaware 873092
Wyoming 532668
;
run;

/*rename two variables;*/
data b70_28_c;
set b70_28_a(rename=(Location=State Pop2000=PopNumber));
run;

proc print data=b70_28_c;
run;
data b70_28_c;
merge b70_28_a(in=_a rename=(Location=State))
b70_28_b(in=_b);
by State;
if _a and _b;
Difference=Pop2008-Pop2000;
run;

data b70_28_c;
merge b70_28_a(rename=(Location=State))  /* rename=(oldLoacation=newState)*/
b70_28_b;
by State;
run;

proc print data=b70_28_c;
run;

proc print data=b70_28_a(rename=(Location=State));
run;

proc print data=b70_28_a(keep=Location);
run;

data b70_28_d /debug;
merge b70_28_a(in=_a rename=(Location=State))
b70_28_b(in=_b);
by State;
if _a and _b;
Difference=Pop2008-Pop2000;
run;

data b70_28_d;
merge b70_28_a(in=_a rename=(Location=State))
b70_28_b(in=_b);
by State;
if _a or _b;   ********************* or *******;
Difference=Pop2008-Pop2000;
run;


data b70_28_e;
merge b70_28_a(in=_a rename=(Location=State))
b70_28_b(in=_b);
by State;
if _a and _b then
do;
Difference=Pop2008-Pop2000;
output;
end;
else
output;
run;

proc print data=b70_28_e;
run;


data b70_28_e;
merge b70_28_a(in=_a rename=(Location=State))
b70_28_b(in=_b);
by State;
if _a and _b then
do;
Difference=Pop2008-Pop2000;
end;
else
output;
run;



data b70_28_f;
merge b70_28_a(in=_a rename=(Location=State))
b70_28_b(in=_b);
by State;
if _a and _b then
do;
Difference=Pop2008-Pop2000;
end;
else
;
run;
proc print data=b70_28_f;
run;


data b70_28_g;
merge b70_28_a(in=_a rename=(Location=State))
b70_28_b(in=_b);
by State;
if _a and _b then
do;
Difference=Pop2008-Pop2000;
end;
else
output;
run;
proc print data=b70_28_g;
run;


data b70_29;
input @1 Company $ @;
if Company='ABC' then input Year;
else input City $ Year;
input NumEmployees;
datalines;
ABC 2013
45
IBM XIAN 2013
34
ABC 2014
67
ITL BEIJING 2015
455
;
run;

/* use @ to change a new line, use input again to start a new line */
data b70_29_1;
input @1 Company $ @;
if Company='ABC' then input Year @;
else input City $ Year @;
input NumEmployees @;
input Price;
input Amount;
datalines;
ABC 2013 45 33
123
IBM XIAN 2013 34 34
3434
ABC 2014 67 56
454
ITL BEIJING 2015 455 455
34556
;
run;

proc print data=b70_29_1;
run;


data b70_30;
length Fname Sport1-Sport3 $ 10;
infile datalines dlm=',' dsd;
input Fname $ Sport1 $ Sport2 $ Sport3 $; * default is flowover;
datalines;
Kim,Basketball,Golf,Tennis
Bill,Football
Tracy,Soccer,Track
run;
proc print data=b70_30;
run;


data b70_30_1;
infile cards dlm=',' dsd;
input Fname $ Sport1 $ Sport2 $ Sport3 $;
cards;
Kim,Basketball,Golf,Tennis
Bill,Football
Tracy,Soccer,Track
run;
proc print data=b70_30_1;
run;

data b70_30_2;
length Fname Sport1-Sport3 $ 10;
infile datalines dlm=',';
input Fname $ Sport1 $ Sport2 $ Sport3 $;
datalines;
Kim,Basketball,Golf,Tennis
Bill,Football
Tracy,Soccer,Track
;
run;
proc print data=b70_30_2;
run;


data b70_30_3;
length Fname Sport1-Sport3 $ 10;
infile datalines dlm=',';
input Fname $ Sport1 $ Sport2 $ Sport3 $;
datalines;
Kim,Basketball,Golf,Tennis
Bill,Football, ,
Tracy,Soccer,Track,
;
run;
proc print data=b70_30_3;
run;


data b70_30_4;
length Fname Sport1-Sport3 $ 10;
infile datalines dlm='-';
input Fname $ Sport1 $ Sport2 $ Sport3 $;
datalines;
Kim-Basketball-Golf-Tennis
Bill-Football- -
Tracy-Soccer-Track-
;
run;

proc print data=b70_30_4;
run;
data b70_30_6/debug;
length Fname Sport1-Sport3 $ 10;
infile datalines dlm=',';
input Fname $ Sport1 $ Sport2 $ Sport3 $;
datalines;
Kim,Basketball,Golf,Tennis
Bill,Football
Tracy,Soccer, Track
Tennis
Mike
Tablet
;
run;
proc print data=b70_30_6;
run;

data b70_30_5;
length Fname Sport1-Sport3 $ 10;
infile datalines dlm='-';
input Fname $ Sport1 $ Sport2 $ Sport3 $;
datalines;
Kim-Basketball-Golf-Tennis
Bill-Football- -
Tracy-Soccer-Track-
;
run;
proc print data=b70_30_5;
run;

data b70_30_7;
length Fname Sport1-Sport3 $ 10;
infile datalines dlm=',';
input Fname $ Sport1 $ Sport2 $ Sport3 $;
datalines;
Kim,Basketball,Golf,Tennis
Bill,Football,Soccer, Track
Tracy
; /* the last line in-complete will cause error, and skipped */
run;
/*Fname=Tracy Sport1=  Sport2=  Sport3=  _ERROR_=1 _N_=3*/
/*NOTE: SAS went to a new line when INPUT statement reached past the end of a line*/
/*Obs    Fname      Sport1      Sport2    Sport3*/
/*1     Kim      Basketball    Golf      Tennis*/
/*2     Bill     Football      Soccer    Track*/


proc print data=b70_30_7;
run;

/*Programming compile and excute*/
/*submit->compile(input buffer,pdv->init vars, prepare descriptors[_N_, _Error_])*/
/*--> excute(set missing->input buffer->output)*/
/*var is assigned a missing value when the data step begins execution.*/
/*var assigned a value of missing when the data step begins execution.*/
/*var is assigned a value 0 at compile time.*/


data b70_37;
infile datalines dlm=',';
label Fname='First name' Sport='Favoriate Sports';
input Fname $ Sport $;
datalines;
Kim,Basketball
Bill,Golf
Gate,Tennis
Lee,Football
Zhao,Soccer
;
run;


data b70_37b;
infile datalines dlm=',';
input Fname $ Sport $;
label Fname='First name' Sport='Favoriate Sports';
datalines;
Kim,Basketball
Zhao,Soccer
;
run;

proc print data=b70_37b label; /* show labels instead of var name */
run;
proc print data=b70_37b label;  * this works ;
label Fname="Stu First Name" Sport="stu sports"; * show label plus change label to new;
run;

proc print data=b70_37b label;  * this works ;
label Fname="Stu First Name";
run;
data b70_38;
input X Y $ Z;
datalines;
1 A 27
1 A 33
1 B 45
2 A 52
2 B 69
3 B 70
4 A 82
4 C 91
;
run;
data b70_38b;
set b70_38;
by X Y;
if First.Y;
run;
proc print data=b70_38b noobs;
run;

filename emailout email
TO = "yubin2967@gmail.com"
subject = "Test email by Duke - Sent by SAS 9.3"
from = "duke.fcbk@gmail.com" importance = "high" ;

data _null_;
file emailout;
put "filename emailout email";
put "Test email by Duke - Sent by SAS 9.3 - ";
put "This email was sent by SAS code as below:";
put "TO = 'yubin2967@gmail.com'";
put "subject = 'Test email by Duke - Sent by SAS 9.3'";
put "from = 'duke.fcbk@gmail.com' importance = 'high'; ";
put "run;";


data b70_39;
array Favorites{3} $ 8 ('Shakespeare','Hemingway','McCaffrey');
run;
proc print data=b70_39;
run;
data b70_39_1;
array Favorites{3} $ 8. ('Shakespeare','Hemingway','McCaffrey');
array Favor{3} $ 18. ('Shakespeare','Hemingway','McCaffrey');
array price{3} (55.55, 66.34, 55.7888);
array sales{3} (55, 66.34, 55.7888);
array p{1] p1 (555.1);
array q{1] q1 (555.21);
array r{1] r1 (555.221);
run;
proc print data=b70_39_1;
run;

data b70_41;
infile datalines;
input EmpLName $ EmpFName $ Dept $ Phone  $ Extension;
FullName=CATX(' ',EmpFName,EmpLName);
datalines;
Stevens James SALES 304-923-3721 14
;
run;
proc print data=b70_41;
run;
**************************** Result ******************;
* 1     Stevens    James    SALES    304-923-        14       James Steven ;
data b70_41b;
infile datalines;
input EmpLName $ EmpFName $ Dept $ Phone $ 12. Extension; /* length of var=12.=
exactly same, the default character length is 8. */
FullName=CATX(' ',EmpFName,EmpLName);
datalines;
Stevens James SALES 304-923-3721 14
;
run;
proc print data=b70_41b;
run;

data b70_41c;
infile datalines;
input EmpLName $ EmpFName $ Dept $ Phone $ 12. Extension; /* length of var=12.=
exactly same, the default character length is 8. */
FullName=CATX(' ',EmpFName,EmpLName);
/*CAT - concatenates character strings without removing leading or trailing blanks */
/*CATS - concatenates character strings and removes leading and trailing blanks*/
/*CATT - concatenates character strings and removes trailing blanks*/
/*CATX - concatenates character strings, removes leading and trailing blanks, and inserts separators*/
name = catx(" ",' Joe ',' Brown ');  /* "Joe Brown" *//*removes leading and trailing blanks, and inserts separators*/
name1 = cat(' Joe ',' Brown ');  /* "  Joe   Brown  " *//*without removing leading or trailing blanks */
name2 = cats(' Joe ',' Brown ');  /* "JoeBrown" *//* remove leading or trailing blanks*/
name3 = catt(' Joe ',' Brown ');  /* "Joe  Brown" */ /* remove trailing blanks*/
put name= name1= name2= name3=;
/*name=Joe Brown name1=Joe  Brown name2=JoeBrown name3=Joe Brown*/
datalines;
Stevens James SALES 304-923-3721 14
;
run;
proc print data=b70_41c;
run;


data b70_42;
Text='Australia, US, Denmark';
Pos=find(Text,'US','i',1);
Pos2=find(Text,'US','I',1);
put pos=; put pos2=;
source = "Single cpu, Multi-CPU.";
location1 = find(source, "cpu" , 1);  * find(source, kword, startPos, upcase/lowCase) ;
location2 = find(source, "cpu", -999, "I");  * "I"= Case Indepandant ;
put location1= / location2=;
run;

options nodate pageno=1 linesize=80 pagesize=60;  
data b70_42a;
   input region $ mtg : mmddyy8.;
   sendmail=mtg-45;
   datalines;
N  11-24-99
S  12-28-99
E  12-03-99
W  10-04-99
;
proc print data=b70_42a;
   format mtg sendmail date9.;
   title 'When To Send Announcements';
run;


data b70_42c;
   length dob 8;
   input @1  id 3.
         @5  doa mmddyy8.
  @14 dob mmddyy8.;
   informat dob ddmmyy8.  doa ddmmyy8.;
   age  = (doa-dob)/365.25;
   age2 =int((doa-dob)/365.25);
datalines;
001 06/21/97 05/13/66
002 05/04/98 11/28/96
003 10/15/99 09/25/45
;
run;
proc print data=b70_42c;
format dob MMDDYY10.;
format doa Date9.;
run;

proc print data=b70_42c;
format dob Date8.;
format doa Date7.;
/*13MAY66     1    21JUN97    31.1075     31*/

run;

proc print data=b70_42c;
format dob Date6.;
format doa Weekdate10.;
/*13MAY     1    21JUN    31.1075     3*/

run;

* MMDDYYw. , DATEw, WEEKDATEw., WORDDATEw.,  MONYYw.  et al.;

data b70_42c;
input d1 mmddyy8. d2 :mmddyy8.  d3 :mmddyy8.  d4 :mmddyy8.  d5 :mmddyy8.  d6 :mmddyy8. ;
datalines;
06/21/97 05/13/66 05/13/66 05/13/66 05/13/66 05/13/66
;
run;

proc print data=b70_42c;
format d1 mmddyy10. d2 date7. d3 worddate20. d4 worddate12. d5 date9. d6 date11.;
/* 06/21/1997
13MAY66        
May 13, 1966
May 13, 1966
13MAY1966
13-MAY-1966*/
run;
proc print data=b70_42c;
format d1 mmddyy10. d2 date7. d3 worddate20. d4 worddate12. d5 date9. d6 date11.;
/* 06/21/1997
13MAY66        
May 13, 1966
May 13, 1966
13MAY1966
13-MAY-1966*/
run;

* . worddate20. date9. date7. date11. yymmdd8.;

data b70_42d;
   input dob mmddyy8.;
   informat dob ddmmyy8.;
   age_year= intck('year', dob, today());  * the intervals;
   age_month= intck('month', dob, today());
   age_day  = intck ('day', dob, today());

   dob_yr=year(dob);
   dob_qtr= qtr(dob);
   dob_month=month(dob);
   dob_day= day(dob);

   put age_year= age_month= age_day=;
   datalines;
06/17/97
;



data b70_42d1;
   input dob ; * mmddyy8.;
   informat dob mmddyy8.;  ************ assign informat ;
   age_year= intck('year', dob, today());  * the intervals;
   age_month= intck('month', dob, today());
   age_day  = intck ('day', dob, today());

   dob_yr=year(dob);
   dob_qtr= qtr(dob);
   dob_month=month(dob);
   dob_day= day(dob);

   put age_year= age_month= age_day=;
datalines;
06/17/97
;
run;

data b70_42d2;
   input dob  mmddyy8.;
   * informat dob mmddyy8.;  ************ without informat informat ;
   age_year= intck('year', dob, today());  * the intervals;
   age_month= intck('month', dob, today());
   age_day  = intck ('day', dob, today());

   dob_yr=year(dob);
   dob_qtr= qtr(dob);
   dob_month=month(dob);
   dob_day= day(dob);

   put age_year= age_month= age_day=;
   put dob_yr= dob_qtr= dob_month= dob_day=;
datalines;
06/17/97
;
run;


data b70_42d3;
   input month day year;
   date=mdy (month, day, year); ******** convert numeric to SAS date ;
   drop month day year;
   format date mmddyy8.;
   put date=;
   Label X="date" Y="Time" Z="Second";
    X = '04JUL97'D ;
Y = '09:00'T ;
Z = '04JUL97:12:00'DT ;
/* X = ?4JUL97扗 will set the new variable X equal to the number of days between January 1, 1960 and July 4, 1997. */
/*  Y = ?9:00扵 sets the new variable Y to the number of seconds between midnight and 9 am.*/
/*  Z = ?4JUL97:12:00扗T sets the value of the variable Z to the number of seconds from January 1, 1960 to noon on July 4, 1997.*/

put X= Y= Z= ;
   datalines;
11 25 98
03 20 04
;

data b70_42d3;
    X = '04JUL97'D ;
Y = '09:00'T ;
Z = '04JUL97:12:00'DT ;
;


data b70_43;
input order_id  @7 customer $ 7-18  @20 shipped date9.;
datalines;
9341  Josh Martin  02FEB2009
9874  Rachel Lords 14MAR2009
10233 Takashi Sato 07JUL2009
;
run;


data b70_43;
input order_id  @7 customer $ 7-18  shipped :date9.;
orderDate = "02Feb2008";
odate = input(orderDate,date9.);
ship_note=catx(' ','Order',order_id,'shipped on',put(shipped,date9.),'to',customer);
ship_note2=catx(' ','Order',order_id,'order date',put(odate,date9.));
datalines;
9341  Josh Martin  02FEB2009
9874  Rachel Lords 14MAR2009
10233 Takashi Sato 07JUL2009
;
run;

proc print data=b70_43;
run;


/******** compress function***********/
data b70_43b;
phoneStr1 = "(908)235-4490";
phoneStr2 = "(201) 555-77 99";
phoneStr2 = "(201] 555-77 99";
addressStr = "Ron    Cody          89   Lazy   Brook  Road   ";
address = compbl(addressStr);  /* remove multi blanks and replace with single blanks */
phone1a = compress(phoneStr1);
phone1b = compress(phoneStr1,'(-) ');
phone2a = compress(phoneStr2);
phone2b = compress(phoneStr2,'(-) ');
phone3a = compress(phoneStr2);
phone3b = compress(phoneStr2,'(-] ');
originalStr = 'sow so sew';
newStr = tranwrd(originalStr,'so', 'sew'); /* replace str with new str */
/*result:         seww sew sew*/
put phone1a= phone1b= phone2a= phone2b= phone3a= phone3b=;
put address=;
put newStr=;
run;

proc print data=b70_43b;
run;

proc print data=b70_43b noobs;
run;

/*b70_43*/
data b70_43;
input x @@;
datalines;
2 3 3 4 4 5 5 5 66 6 6 6 6 6 6
;
run;

data ONE TWO SASUSER.TWO;
set b70_43;
run;

ods csvall file="&prjpath\test.cvs";
proc print data=b70_43;
run;
ods csvall close;

ods csvall file="&prjpath\test.csv";
proc print data=b70_43;
run;
ods csvall close;


data b70_46;
input Revenue2008 :5.5 Revenue2009 :2.1  Revenue2010 :2.1 ;
input a1-a3;  /* multiple input lines */
_mean = mean(of Rev:);   /* across one record , used in data step*/
_max = max(of Rev:);
_sum = sum(of Rev:);
_sum2 = sum(of a1-a3);
datalines;
1.2 1.6 2.0
1.44 3.4 5.6
;
run;
proc print data=b70_46;
run;
/*Obs Revenue2008 Revenue2009 Revenue2010 a1 a2 a3 _mean _max _sum _sum2 */
/*1 1.2 1.6 2 1.44 3.4 5.6 1.6 2 4.8 10.44 */

proc sql;
create table b70_46a as
select mean(Revenue2008) as mean1
  (Revenue2008 + Revenue2009 + Revenue2010)/3 as mean2
from b70_46;
select * from b70_46a;
quit;

proc sql;
create table b70_46b as
select (Revenue2008 + Revenue2009 + Revenue2010)/3 as mean2
from b70_46;
select * from b70_46b;
quit;

data b70_47;
drop City;
infile datalines;
input
Name $ 1-14 /
Address $ 1-14 /
City $ 1-12 ;
if City='New York ' then input @1 State $2.;
else input;
datalines;
Joe Conley
123 Main St.
Janesville
WI
Jane Ngyuen
555 Alpha Ave.
New York
NY
Jennifer Jason
666 Mt. Diablo
Eureka
CA
;
run;

data b70_47;
* drop City;
infile datalines;
input
Name $ 1-14 /
Address $ 1-14 /
City $ 1-12 ;
if City='New York ' then input @1 State $2.;
else input;
datalines;
Joe Conley
123 Main St.
Janesville
WI
Jane Ngyuen
555 Alpha Ave.
New York
NY
Jennifer Jason
666 Mt. Diablo
Eureka
CA
;
run;

proc print data=b70_47;
run;

data b70_47b;
infile datalines;
input
Name $ 1-14 /
Address $ 1-14 /
City $ 1-12 ;
datalines;
Joe Conley
123 Main St.
Janesville
Mike
37 Lansbury
Thornhill
;
run;
proc print data=b70_47b;
run;


data b70_47c;
infile datalines;
input
Name $ 1-14 /
Address $ 1-14 /
City $ 1-12  ;
if City = 'Thronhill' then input City2 :$ 1-12;
else input City3 $ 1-12;
datalines;
Joe Conley
123 Main St.
Janesville
Mike
37 Lansbury
Thornhill
Vaughan
Joe Conley
123 Main St.
Janesville
Mike
37 Lansbury
Thornhill
Vaughan
Joe Conley
123 Main St.
Janesville
Mike
37 Lansbury
Thornhill
Vaughan
;
run;
proc print data=b70_47c;
run;



data b70_47d;
infile datalines;
input Name $ City :$  ;
if City = 'York' then input Price :3.;
else input Distance :3. ;
datalines;
Joe York
230
John Toronto
300
;
run;
proc print data=b70_47d;
run;


data b70_47e;
infile datalines;
input Name $ 1-3 /
City $ 1-12 /
price 1-6;
datalines;
Joe
York
230
John
Toronto
300
;
run;
proc print data=b70_47e;
run;

data b70_49;
input Year Product $ Sales;
datalines;
1997 Bike 30000
1997 Bike 25000
;
run;

data b70_49a(keep=MonthSal:);
set b70_49(keep=Year Product Sales);
array MonthSales{12};
do i=1 to 12;
MonthSales{i}=Sales;
end;
drop i;
drop Year Product Sales;
run;
proc print data=b70_49a;
run;

data b70_50a;
input Id Char1 $;
datalines;
111 F
158 B
329 X
644 D
;
run;
data b70_50b;
input Id Char2 $;
datalines;
111 M
538 H
644 G
;
run;

data b70_50c;
set b70_50a b70_50b;
by Id;   /* by means sorted by, not merged by */
run;
/*result*/
/*Obs Id Char1 Char2 */
/*1 111 A   */
/*2 111   E */
/*3 158 B   */
/*4 329 C   */
/*5 538   F */
/*6 644 D   */
/*7 644   G */

data b70_50c;
set b70_50a b70_50b;
by char1;  /* by means sorted by, not merged by */
run;

proc sort data=b70_50a;
by Id;
run;
proc sort data=b70_50b;
by Id;
run;
data b70_50c;
merge b70_50a b70_50b;
by Id;  /* by means sorted by, not merged by */
run;
proc print data=b70_50c;
run;

/*b70_51*/
proc contents data=_all_;
run;

proc contents data=_all_ nods;  /* only the dataset names */
run;



data b70_53/debug;
input wagerate;
retain Total;
if first.wagerate then total=0;
Total=sum(Total, Wagerate);
datalines;
23
34
45
;
run;
/*Obs wagerate Total */
/*1 23 23 */
/*2 34 57 */
/*3 45 102 */


data b70_53;
input amt1 amt2;
retain total ;
if first.amt1 then
do;
total=0;
put "get first";
end;
else
do;
put "never get first";
end;
Total=sum(Total, amt1, amt2);
datalines;
23 56
34 78
45 99
;
run;


data b70_53;
input amt1 amt2 amt3 amt4;
retain total ;
Total=sum(Total, amt1, amt2, amt3, amt4);  /* could not use "amt:" */
datalines;
23 56 23 56
34 78 23 56
45 99 23 56
;
run;

data b70_53/debug;
input amt1 amt2 amt3 amt4;
retain total 0; * if set init=0, if not set, then total=missing by default ;
Total=sum(Total, amt1, amt2, amt3, amt4);  /* could not use "amt:" */
datalines;
23 56 23 56
34 78 23 56
45 99 23 56
;
run;

data b70_53/debug;
input amt1 amt2 amt3 amt4;
retain total;
Total=sum(Total, amt1, amt2, amt3, amt4);  /* could not use "amt:" */
datalines;
23 56 23 56
34 78 23 56
45 99 23 56
;
run;

data b70_53;
input amt1;
Total=sum(Total, amt1);  /* could not use "amt:" */
datalines;
23
.
;
run;

data b70_53;
input amt1;
Total+amt1;  /* could not use "amt:" */
datalines;
23
.
;
run;

proc print data=b70_53;
run;


data b70_54;
set SASHELP.CLASS(obs=5);
retain City 'Beverly Hills'; /* how to initialize retain variables */
retain Address 'Lans 37';  /* Retain: only initilized once */
retain Price 500000000;
State='California Losges'; /* length=8 only in input statement, not in dataset */
run;
proc print data=b70_54;
run;
/*Obs Name Sex Age Height Weight City State */
/*1 Alfred M 14 69.0 112.5 Beverly Hills California Losges */
/*2 Alice F 13 56.5 84.0 Beverly Hills California Losges */
/*3 Barbara F 13 65.3 98.0 Beverly Hills California Losges */
/*4 Carol F 14 62.8 102.5 Beverly Hills California Losges */
/*5 Henry M 14 63.5 102.5 Beverly Hills California Losges */

proc print data=SASHELP.CLASS(obs=2);
run;

data WORK.DATE_INFO;
X="01Jan1960"D ;
format x date9.;
put x=;
run;

proc contents data=WORK.DATE_INFO;
run;
/*# Variable Type Len Format */
/*1 X        Num  8   DATE9. */

data b70_57;
bDate = "09JAN2010"d;
wkDay = weekday(bDate);
X = MONTH(bDate); put "month=" x;
X = DAY(bDate); put  "day=" x; /* day of month */
X = YEAR(bDate); put  "year=" x;
X = QTR(bDate); put "QTR=" x;
X = WEEKDAY(bDate); put "weekDay=" x;
DATE = DATEPART(bDate);   put "DATE=" DATE;
TIME = TIMEPART(bDate);   put "TIME=" TIME;
* X = dayofweek(bDate); * put "dayofweek=" x;  /* error */
put wkDay=;
run;

/*MMDDYY8. 07/04/97 */
/*DDMMYY8. 04/07/97 */
/*WORDDATE18. July 4, 1997 */
/*WEEKDATE29. Friday, July 4, 1997 */
/*MONYY5. JUL95 */

data b70_59;
infile datalines truncover;
length Type $ 5 Color $ 11;
input  Type $ Color $;
datalines;
daisy yellow
;
run;


data b70_59;
infile datalines truncover;
length Type $ 5 Color $ 11;
input  Type $ Color $;
datalines;
daisy yellow
daisyyellow
;
run;

proc print data=b70_59;
run;


data b70_59a;
infile datalines missover;
* when not enough vars, then assign a blank for char, . for numeric;
/*i) numeric : missing (.) 3-8 bytes*/
/*ii) character:  a missing value is treated as a blank, (' ')*/
length Type $ 5 Color $ 11 price 5;
input  Type $ Color $ price;
datalines;
daisy yellow 30
daisyyellow
;
run;

proc print data=b70_59a;
run;

data b70_66;
do Year=1 to 5;
do Month=1 to 12;
X + 1;
end;
end;
run;
proc print data=b70_66;
run;


data b70_67;
x = .; * missing < numeric ;
if X < 10 then X=1;
else if X >= 10 AND X LT 20 then X=2;
else X=3;
run;
proc print data=b70_67;
run;

data b70_68;
input EmpId;
datalines;
5677
;
run;
data b70_68a;
set b70_68;
length EmpId $6;
CharEmpid=put(EmpId, z6.);
/*ERROR: Character length cannot be used with numeric variable EmpId.*/
run;


data b70_69;
input Emp_ID Job_Title $ 8-29 Department $ 31-45 MgrID :6.;
datalines;
120101 Director Sales         Management     120261
120102 Sales Manager Sales    Management     120101
120103 Sales Manager II Sales Management     120101
120104 Administration Manager Administration 120101
120105 Secretary I            Administration 120101
;
run;

proc print data=b70_69;
run;

data b70_69b;
set b70_69;
where Job_title like "%Manager%";
run;
proc print data=b70_69b;
run;

data good_56a;
input jobcode $5.;
datalines;
12345
;
run;

data good_56;
set good_56a;
length jobcode $ 12;
run;

/*Because LENGTH statement is after SET statement, and JOBCODE is
already existed in department file(predefined), the LENGTH statement
has a conflict with previous attribution of variable.*/

data good_56;
length jobcode $ 12;
set good_56a;
run;

proc contents data=good_56;
run;

data good_81;
infile datalines;
input name $ age height;
if age LE 10;
datalines;
John McCloskey 35 71
June Rosesette 10 43
Tineke Jones 9 37
;
run;
proc print data=good_81;
run;

data good_88;
input name $ age ;
datalines;
Alfred 14
Alice 13
Barbara 13
Carol 14
;
run;
data good_88b;
set good_88;
put name $15. @5 age 2.;
run;


data good_107;
infile datalines dsd;
input style $ @;
if style = 'CONDO' or style = 'RANCH' then
input sqfeet bedrooms baths street $ price : dollar10.;
/*no else === continue next record, leave the un-defined as missing*/
datalines;
RANCH,1250,2,1,Sheppard Avenue,"$64,000"
SPLIT,1190,1,1,Rand Street,"$65,850"
CONDO,1400,2,1.5,Market Street,"80,050"
TWOSTORY,1810,4,3,Garris Street,"$107,250"
RANCH,1500,3,3,Kemble Avenue,"$86,650"
SPLIT,1615,4,3,West Drive,"94,450"
SPLIT,1305,3,1.5,Graham Avenue,"$73,650"
;
run;

/*1 RANCH 1250 2 1.0 Sheppard 64000 */
/*2 SPLIT . . .    . */
/*3 CONDO 1400 2 1.5 Market S 80050 */
/**/

/* INPUT statement reads the first value as style, single trailing sign @
stops INPUT jump to next record to read data. IF is used to match.
If style=扖ONDO?or style=扲ANCH?then INPUT read sqfeet bedrooms
baths street and price after STYLE. Notice here colon(:) is used to read
character variable longer than 8 bytes. A very interesting point here is
IF-THEN statement. After IF-THEN, there is no else. So generally SAS does
nothing and keep reading next record. Therefore SPLIT is read into PDV.
But no ELSE statement to tell what is going on next, so the rest of observation
for SPLIT are missing values.*/



data good_107;
infile datalines dsd;
input style $ @;
if style = 'CONDO' or style = 'RANCH' then
input sqfeet bedrooms baths street $ price : dollar10.;
else
delete;
datalines;
RANCH,1250,2,1,Sheppard Avenue,"$64,000"
SPLIT,1190,1,1,Rand Street,"$65,850"
CONDO,1400,2,1.5,Market Street,"80,050"
TWOSTORY,1810,4,3,Garris Street,"$107,250"
RANCH,1500,3,3,Kemble Avenue,"$86,650"
SPLIT,1615,4,3,West Drive,"94,450"
SPLIT,1305,3,1.5,Graham Avenue,"$73,650"
;
run;

proc print data=good_107;
run;


data b123_8;
input type $ 1-5 +1 color $;
datalines;
daisyyellow
;
run;
data b123_8;
input type $ 1-5 color $;
datalines;
daisyyellow
;
run;


data b123_8;
input type $ 1-5 color $;
datalines;
daisyyellowbluetuilpe
;
run;

data b123_8;
input type $ 1-5 color $ 30.;
datalines;
daisyyellowbluetuilpe
;
run;

data b123_8;
input type $ 1-5 color $ 50. @6 color2 $ 13.;
datalines;
daisyyellowbluetuilpe
;
run;

proc print data=b123_8;
run;


data b123_9;
infile datalines dlm=',';
input relation $ firstname $;
datalines;
son,John
;
run;

proc print data=b123_9;
run;

option dlm=","; /* don't work , wrong*/
data b123_9a;
infile datalines ;
input relation $ firstname $;
datalines;
son,John
;
run;
proc print data=b123_9a;
run;

data b123_9b;
infile datalines option dlm=',';
input relation $ firstname $;
datalines;
son,John
;
run;
proc print data=b123_9b;
run;


data b123_12;
infile datalines dlm=','; *  == flowover ;
input item1 $ item2 $ item3 $;
datalines;
chair,,table
chair,couch,table
run;
/*result*/
/*Obs item1 item2 item3 */
/*1 chair table chair */
/*NOTE: SAS went to a new line when INPUT statement reached past the end of a line.*/
/*NOTE: The data set WORK.B123_12 has 1 observations and 3 variables.*/

data b123_12;
infile datalines dsd;
input item1 $ item2 $ item3 $;
datalines;
chair,,table
chair,couch,table
run;

data b123_12;
infile datalines dsd  flowover;
input item1 $ item2 $ item3 $;
datalines;
chair,,table
chair,couch,table
run;


data b123_12;
infile datalines dsd missover; * DSD function= remove "", treate  ,  seperate  ,,;
input item1 $ item2 $ item3 $;
datalines;
chair,,table
chair,couch,table
run;

data b123_12;
infile datalines dlm=',' missover; * do not going to next record;
input item1 $ item2 $ item3 $;
datalines;
chair,,table
chair,couch,table
run;
/*1 chair table   */
/*2 chair couch table */


data b123_12;
infile datalines dlm=',' flowover; * if record in-complete, must going to next record;
input item1 $ item2 $ item3 $;
datalines;
chair,,table
chair,couch,table
run;
/*1 chair table chair */



data b123_12;
infile datalines missover; * do not going to next record;
input item1 $ item2 $ item3 $;
datalines;
chair,,table
chair,couch,table
;
run;
/* NOTE: The data set WORK.B123_12 has 2 observations and 3 variables.*/
/*Obs item1 item2 item3 */
/*1 chair,,t     */ /* read the first record, default seperate by blank */
/*2 chair,co     */

proc print data=b123_12;
run;


proc sort sort data=b123_12;
by item1;
run;


/* b123_20: descending/ascending merge*/
data b123_27;
var1 = mdy(1,15,1960);
var2 = mdy(1,1,1960);
put var1=   var2=;
run;

data b123_29;
newdate = mdy(4,15,2000);
format newdate date10.;
newdate2 = mdy(4,16,2000);
ddate=weekday(newdate);
ddate2=weekday(newdate2); * Sunday=1, Monday=2, Saturday=7;
put ddate2= ddate= newdate=;
run;

data b123_31;
input id date date9.;
datalines;
123 08Jan2012
123 08Jan2012
123 08Jan2012
;
run;


data _null_;
file "&examPath\outFile.txt" dlm=',';
set b123_31;
put id date : mmddyy10.;
run;

data _null_;
set b123_31;
file "&examPath\outFile.txt" dlm=',';
put id date : mmddyy10.;
run;


data b123_31a;
   input idno name $ startwght date : date7.;
   file "&examPath\b123_31a.txt" ;
/*   file print;*/
   put idno name startwght date:date9. ;
   datalines;
032 David 180 25nov99
049 Amelia 145 25nov99
219 Alan 210 12nov99
;
run;

data b123_32b;
   input idno name $ 5-14 startwght date : date7.;
   file "&examPath\b123_31b.txt" ;
   put name $15. @5 idno 2.;
   datalines;
032 David      180 25nov99
049 Ameliaascx 145 25nov99
019 Alan       210 12nov99
;
run;

data b123_33;
infile datalines;
input name $15. age 2.;
file "&examPath\b123_33.txt";  
put name $15. @5 age 2.;
datalines;
Janice 10
Henri 11
Michael 11
Susan 12
;
run;    

data b123_35;
infile datalines;
input name $ age height;
if age LE 10;
datalines;
John McCloskey 35 71
June Rosesette 10 43
TinekeJones 9 37
;
run;    



data b123_55;   /* the length decide by the first var */
destination ='CPH';
select(destination);
when('LHR') city='Londongen';
when('CPH') city="Copenhgenxx";
otherwise;
end;
put city=;
city2='bookdogs';
city3="this is a book, that's a desk";
city4='Lpswich'||';'||'England';
put city2= city3= city4=;
run;

proc contents data=b123_73;
run;


data b123_73;   * substr() function alwasy have a length equal to original str len;
str1 = 'book here is it walmart, this is T & T and center point mall ';
str2 = substr(str1,1,8);
str3 = substr(str1,1,12);
str4 = substr(str1,1,15);
str5 = substr(str1,1,35);
put str5= str2= str3= str4=;
run;
proc contents data=b123_73;
run;
proc datasets lib=sasuser;
contents data=class varnum;
quit;

proc datasets;
quit;
run;

libname db "&prjPath\";
libname newdb "&examPath\";
data db.new;
input a;
datalines;
21
11
;
run;
proc datasets lib=db;
quit;
run;

proc contents data=db._all_ nods;
run;
/*# Name Member Type File Size Last Modified */
/*1 NEW DATA 5120 18-Jun-14 03:45:27 PM */

proc contents data=db._all_;
run;

options noxwait;
x "copy &prjPath\new.sas7*  &prjPath\new2.sas7bdat";

libname db "&prjPath\";
libname newdb "&examPath\";
proc datasets nolist;
 copy in=db out=newdb;
quit;
run;

proc datasets nolist;
 copy in=newdb out=db move;
quit;
run;

/*rename dataset*/
proc datasets lib=db nolist;
 change new = newae;
quit;
run;

/*delete dataset*/
proc datasets lib=db nolist;
 delete newae;
quit;
run;


data b123_74;
title='A tale of two cities, Charles J.Dickens';
word1=scan(title,1,',');
word2=scan(title,5,' 000');  * scan function only care the first seperator;
word3=scan(title,5,' xx');  the length of word3 is 200, scan return default is 200
word4=scan(title,5,' b');
word=scan(title,3,' ,');
put word1= word2= word3= word4 word=;
run;


proc contents data= b123_74;
run;

data b123_78;
price='20000';
tPrice=price*2; * 0.1*price--------- 1+price------ price*2 ;
cost='20000';
total=0.10*cost;
num1=56;
num2=87;
numStr = 'abc' || num1 ;   * result='abc          56';
numStr2= ''||num1;
put total= tPrice= numStr= numStr2=;
run ;

proc contents data=b123_78;
run;

data b123_79;
Prd_num=5641;
Item='1001';
ItemPrd=Item||'/'||Prd_num;
ItemPrd2=Item*Prd_num;
put ItemPrd= ItemPrd2=;
run;


data b123_80;
date=input('13Mar2000',date9.);
dstr = put(date, date10.);
put date= dstr=;
run;
proc contents data=b123_80;
run;


data b123_80;
dept='Printing  ';
number=750;
depart0=dept||number;
depart=trim(dept)||number;
depart1=trim(dept)||put(number,3.);
put depart0= depart= depart1=;
run;

data b123_83;
strD1='1999/10/25';
strD2='25/10/1999';
strD3='10/25/1999';
strD4='1999/25/10';
d1=input(strD1, yymmdd10.);
d2=input(strD2, ddmmyy10.);
d3=input(strD3, mmddyy10.);
* d4=input(strD4, yyddmm10.); *no this format;
put d1= d2= d3= ;
run;


/*b123_91*/
data grocery;
   input Sector $ Manager $ Department $ Sales @@;
   datalines;
se 1 np1 50    se 1 p1 100   se 1 np2 120   se 1 p2 80
se 2 np1 40    se 2 p1 300   se 2 np2 220   se 2 p2 70
nw 3 np1 60    nw 3 p1 600   nw 3 np2 420   nw 3 p2 30
nw 4 np1 45    nw 4 p1 250   nw 4 np2 230   nw 4 p2 73
nw 9 np1 45    nw 9 p1 205   nw 9 np2 420   nw 9 p2 76
sw 5 np1 53    sw 5 p1 130   sw 5 np2 120   sw 5 p2 50
sw 6 np1 40    sw 6 p1 350   sw 6 np2 225   sw 6 p2 80
ne 7 np1 90    ne 7 p1 190   ne 7 np2 420   ne 7 p2 86
se 1 np1 50    se 1 p1 100   se 1 np2 120   se 1 p2 80
se 2 np1 40    se 2 p1 300   se 2 np2 220   se 2 p2 70
nw 3 np1 60    nw 3 p1 600   nw 3 np2 420   nw 3 p2 30
nw 4 np1 45    nw 4 p1 250   nw 4 np2 230   nw 4 p2 73
nw 9 np1 45    nw 9 p1 205   nw 9 np2 420   nw 9 p2 76
sw 5 np1 53    sw 5 p1 130   sw 5 np2 120   sw 5 p2 50
sw 6 np1 40    sw 6 p1 350   sw 6 np2 225   sw 6 p2 80
ne 7 np1 90    ne 7 p1 190   ne 7 np2 420   ne 7 p2 86
se 1 np1 50    se 1 p1 100   se 1 np2 120   se 1 p2 80
se 2 np1 40    se 2 p1 300   se 2 np2 220   se 2 p2 70
nw 3 np1 60    nw 3 p1 600   nw 3 np2 420   nw 3 p2 30
nw 4 np1 45    nw 4 p1 250   nw 4 np2 230   nw 4 p2 73
nw 9 np1 45    nw 9 p1 205   nw 9 np2 420   nw 9 p2 76
sw 5 np1 53    sw 5 p1 130   sw 5 np2 120   sw 5 p2 50
sw 6 np1 40    sw 6 p1 350   sw 6 np2 225   sw 6 p2 80
ne 7 np1 90    ne 7 p1 190   ne 7 np2 420   ne 7 p2 86
ne 8 np1 200   ne 8 p1 300   ne 8 np2 420   ne 8 p2 125
sw 5 np1 53    sw 5 p1 130   sw 5 np2 120   sw 5 p2 50
sw 6 np1 40    sw 6 p1 350   sw 6 np2 225   sw 6 p2 80
ne 7 np1 90    ne 7 p1 190   ne 7 np2 420   ne 7 p2 86
ne 8 np1 200   ne 8 p1 300   ne 8 np2 420   ne 8 p2 125
sw 5 np1 53    sw 5 p1 130   sw 5 np2 120   sw 5 p2 50
sw 6 np1 40    sw 6 p1 350   sw 6 np2 225   sw 6 p2 80
ne 7 np1 90    ne 7 p1 190   ne 7 np2 420   ne 7 p2 86
ne 8 np1 200   ne 8 p1 300   ne 8 np2 420   ne 8 p2 125
sw 5 np1 53    sw 5 p1 130   sw 5 np2 120   sw 5 p2 50
sw 6 np1 40    sw 6 p1 350   sw 6 np2 225   sw 6 p2 80
ne 7 np1 90    ne 7 p1 190   ne 7 np2 420   ne 7 p2 86
ne 8 np1 200   ne 8 p1 300   ne 8 np2 420   ne 8 p2 125
sw 5 np1 53    sw 5 p1 130   sw 5 np2 120   sw 5 p2 50
sw 6 np1 40    sw 6 p1 350   sw 6 np2 225   sw 6 p2 80
ne 7 np1 90    ne 7 p1 190   ne 7 np2 420   ne 7 p2 86
ne 8 np1 200   ne 8 p1 300   ne 8 np2 420   ne 8 p2 125
sw 5 np1 53    sw 5 p1 130   sw 5 np2 120   sw 5 p2 50
sw 6 np1 40    sw 6 p1 350   sw 6 np2 225   sw 6 p2 80
ne 7 np1 90    ne 7 p1 190   ne 7 np2 420   ne 7 p2 86
ne 8 np1 200   ne 8 p1 300   ne 8 np2 420   ne 8 p2 125
sw 5 np1 53    sw 5 p1 130   sw 5 np2 120   sw 5 p2 50
sw 6 np1 40    sw 6 p1 350   sw 6 np2 225   sw 6 p2 80
ne 7 np1 90    ne 7 p1 190   ne 7 np2 420   ne 7 p2 86
ne 8 np1 200   ne 8 p1 300   ne 8 np2 420   ne 8 p2 125
;
proc format;
   value $sctrfmt 'se' = 'Southeast'
                  'ne' = 'Northeast'
                  'nw' = 'Northwest'
                  'sw' = 'Southwest';

   value $mgrfmt '1' = 'Smith'   '2' = 'Jones'
                 '3' = 'Reveiz'  '4' = 'Brown'
                 '5' = 'Taylor'  '6' = 'Adams'
                 '7' = 'Alomar'  '8' = 'Andrews'
                 '9' = 'Pelfrey';

   value $deptfmt 'np1' = 'Paper'
                  'np2' = 'Canned'
                  'p1'  = 'Meat/Dairy'
                  'p2'  = 'Produce';
run;
proc report data=grocery headline headskip nowd ;
   column sector manager department sales;
   define sector /order format =$sctrfmt.;   * A row appears for every observation for variables;
   define manager/display  format =$mgrfmt.;  * A row appears for every observation ;
   define department /display format =$deptfmt.;
   define sales /analysis format= dollar11.2;
run;

proc report data=grocery headline headskip nowd ;
   column sector manager department sales;
   define sector /display format =$sctrfmt.;   * A row appears for every observation for variables;
   define manager/display  format =$mgrfmt.;  * A row appears for every observation ;
   define department /display format =$deptfmt.;
   define sales /analysis format= dollar11.2;
run;


proc report data=grocery headline headskip nowd ;
   column sector manager department sales;
   define sector /order format =$sctrfmt.;   * A row appears for every observation for variables;
   define manager/order  format =$mgrfmt.;  * A row appears for every observation ;
   define department /display format =$deptfmt.;
   define sales /analysis format= dollar11.2;
run;


proc report data=grocery headline headskip nowd ;
   column sector manager department sales;
   define sector /order format =$sctrfmt.;   * A row appears for every observation for variables;
   define manager/group format =$mgrfmt.;  * A row appears for every observation ;
   define department /display format =$deptfmt.;
   define sales /analysis format= dollar11.2;
run;

proc report data=grocery headline headskip nowd ;
   column sector  sales;
   define sector /order format =$sctrfmt.;   * A row appears for every observation for variables;
   define sales /mean format= dollar11.2;
run;
proc report data=grocery headline headskip nowd ;
   column sector  sales;
   define sector /order format =$sctrfmt.;   * A row appears for every observation for variables;
   define sales /mean format= dollar11.2;
run;

proc report data=grocery headline headskip nowd ;
   column sector  sales;
   define sector /across format =$sctrfmt.;   * A row appears for every observation for variables;
   define sales /mean format= dollar11.2;
run;

proc report data=grocery headline headskip nowd ;
   column sector  sales;
   define sector /across format =$sctrfmt.;   * A row appears for every observation for variables;
   define sales /display format= dollar11.2;
run;

proc report data=grocery headline headskip nowd ;
   column sector  sales;
   define sector /display format =$sctrfmt.;   * A row appears for every observation for variables;
   define sales /mean format= dollar11.2;
run;

footnote1 'note1';
footnote2 "note2";
footnote3 "note3";
footnote4 "note4";

proc print data=grocery;
footnote2 "print note2";
var sector sales;
run;

proc print data=grocery;
var sector sales;
footnote4 'print note2';
run;

proc print data=grocery;
var sector sales;
footnote 'print note2';
run;


PROC PRINT DATA=grocery NOOBS label double;            
   TITLE 'Summary by Vendor';
   FOOTNOTE 'Results of Data-trim4';
   VAR sector sales;
   SUM sales;
   RUN;

proc report data=grocery headline headskip nowd ;
   column sector  sales;
   define sector /group format =$sctrfmt.;   * A row appears for every observation for variables;
   define sales /mean format= dollar11.2;
run;
/*Sector           Sales*/
/*-----------------------*/
/*Northeast      $228.88*/
/*Northwest      $204.50*/
/*Southeast      $122.50*/
/*Southwest      $131.00*/




proc report data=grocery headline headskip nowd ;
   column sector manager department sales;
   define sector /group format =$sctrfmt.;   * A row appears for every observation for variables;
   define manager/display format =$mgrfmt.;  * A row appears for every observation ;
   define department /display format =$deptfmt.;
   define sales /analysis format= dollar11.2;
run;

proc report data=grocery headline headskip nowd ;
   column sector manager department sales;
   define sector /group format =$sctrfmt.;   * A row appears for every observation for variables;
   define manager/order format =$mgrfmt.;  * A row appears for every observation ;
   define department /display format =$deptfmt.;
   define sales /analysis format= dollar11.2;
run;

proc report data=grocery headline headskip nowd ;
   column sector manager department sales;
   define sector /group format =$sctrfmt.;   * A row appears for every observation for variables;
   define manager/order format =$mgrfmt.;  * A row appears for every observation ;
   define department /display format =$deptfmt.;
   define sales /analysis format= dollar11.2;
run;

proc report data=grocery headline headskip nowd ;
   column sector manager department sales;
   define sector /order format =$sctrfmt.;
   define manager/group format =$mgrfmt.;
   define department /group format =$deptfmt.;
   define sales /analysis format= dollar11.2;
   title ' All info of Sales for all Sectors';
run;

proc report data=grocery headline headskip nowd ;
   column sector manager department sales;
   define sector /order format=$sctrfmt. width=6;
   define manager/group  format =$mgrfmt. width=7;
   define department /group format =$deptfmt. width=10;
   define sales /analysis format= dollar11.2  width=8;
   title ' All info of Sales for all Sectors';
   break after sector/ page summarize skip;
run;

proc report data=grocery headline headskip nowd ;
   column sector manager department sales;
   define sector /order
 format=$sctrfmt. width=6 spacing= 4;
   define manager/group  format =$mgrfmt. width=20 spacing=20;
   define department /group format =$deptfmt. width=8 spacing=8;
   define sales /analysis format= dollar11.2  width=8  spacing=8;
   title ' All info of Sales for all Sectors';
   break after sector/ page summarize skip;
run;


proc report data=grocery headline headskip nowd ;
   column sector department sales profit;
   define sector /group format=$sctrfmt. width=10 spacing= 10;
   define department /group format =$deptfmt. width=10 spacing=6;
   define sales /analysis format= dollar11.2  width=8  spacing=6;
   define profit /computed format =dollar11.2 width=6  spacing=6;
   compute profit;
      if department = 'np1' or department ='np2'
    then profit=0.4*sales.sum;
else profit = 0.25*sales.sum;
   endcomp;
   rbreak after/ dol dul summarize ;
   compute after;
      sector='TOTAL:';
   endcomp;
   title ' Report for Sectors';
run;

/*DOL: double overline each value; */
/*DUL: double underline each value*/
/*SUMMARIZE: include a summary line as one of the break lines*/
/*AFTER : places the break lines at the end of the report.*/
/*BEFORE: places the break lines at the beginning of the report*/
/*Ex.  Replace ?after? with 慴efore?followed by rbreak statement to run;*/
/*Creating and Processing an Output Data Set*/
proc report data=grocery nowd
            out=temp( where=(sales gt 600) );
   column manager sales;
   define manager / group noprint;
   define sales / analysis sum noprint;
run;

proc report data=grocery nowd
            out=temp( where=(sales gt 600) );
   column manager sales;
   define manager / group ;
   define sales / analysis sum ;
run;


proc print data=grocery;
/*format sector=sctrfmt. manager=mgrfmt. department=deptfmt.;*/
format sector sctrfmt. manager mgrfmt. department deptfmt.;
id sector ;
var manager department sales;
run;


options pageno=5;
footnote 'report';
title "myreport";
proc print data=grocery;
var sector sales;
run;


proc means data=grocery;
var sector sales;
run;
proc report data=grocery ;
   column manager sales;
   define manager / display ;
   define sales / display ;
   title "report title";
   footnote "foot note";
run;

/***********   b123_104 *********/
proc format;
value sales
   1-100 = 'Low'
   101-500 = 'high'
;
run;

proc report data=grocery nowd;
   column manager sales;
   define manager / display ;
   define sales / display format=sales.;
   title "report title";
   footnote "foot note";
run;


proc report data=grocery;
   column manager sales;
   define manager / display ;
   define sales / display format=sales.;
   title "report title";
   footnote "foot note";
run;

data b123_109;
infile datalines;
input employee $ 1-4;
if employee = 'Sue' then input age 7-8;
else input idnum 10-11;
datalines;
Ruth 39 11
Jose 32 22
Sue 30 33
John  40 44
;
run;
proc print data=b123_109;
run;

data b123_112;
infile datalines;
input @1 height 2. @4 weight 2 @7 price 4-5;
array wt{10};
retain wt:;
do i=1 to 10;
wt{i}= height;
end;

put weight= price=;
datalines;
78 95 345
;
run;



data b123_116;
array wt{10};
retain wt:; * instead of retain wt{10};
do i=1 to 10;
wt{i}= i;
end;

run;

data b123_123;
a = 100;
b = 0;
c = a/b;
put c=;

run;

data b123_122;
infile datalines;
input @1 salary 6.;
if _error_ then desc='problems   ';
else desc='no problems';
put desc=;
datalines;
$1,234
345
;
run;
/* result */
/*depart0="Printing           750"*/
/*depart="Printing         750"*/
/*depart1="Printing750"*/

/*b123_47: array can not be used in Proc, array can only be used in data step */
/*Jani . 10      */
/*Henr .11       */
/*Mich .l 11     */
/*Susa .12  */
/*b123_1 ************SAS Base 123 */
/*
No. 4, 7, 9, 13, 14, 28, 30, 32(English Spelling) , 47

55: the length decide by the first var
70:
73: length(str) = length(substr(str))
74: scan(words, which, 'keywd') ,  scan return str, default str len is 200
112:  input @1 height 2. @4 weight 2 @7 price 4-4;
116: retain wt:; --- V.S. --------- retain wt{10}  || array{10} v.s array:
*/

options yearcutoff=1950;
data adt118;
date = '01Jan1965'd;
call symput('dt', left(date));  * &dt=1827;
if &dt > 1000 then
   option yearcutoff=2000;
else
;
run;
%put &dt;

No comments:

Post a Comment