ICS 54 Klefstad C++: Introduction Program Layout . A C++ program is a collection of modules. . A module is a collection of related definitions. . Definitions include classes, variables/constants, and subprograms. . Each module typically has two parts: a .h file and a .cc file. . A .h or `header' file typically defines the interface for other modules. . A .cc file typically defines the implementation of the module. . Header files are typically included from .cc files using the preprocessor directive `include' . eg #include #include "my_class.h" . The program starts by executing a subroutine called `main'. . Programs are entered with any text editor. . To compile a module: % g++ -c file2.cc # compiles the module file2.cc into file2.o . To like compiled modules together: % g++ file1.o file2.o -o foo # links compiled modules into foo . To link in library functions: % g++ file1.o file2.o -lm -o foo # links in math library too . Programs can be debugged with a debugger like gdb, dbx, or CenterLine. % g++ -c -g file2.cc # -g flag includes information for debugger % g++ -g file1.o file2.o -o foo . Program components include classes, subprograms, variables, statements, and expressions Macros . the preprocessor allows definition of parameterized macros . useful for defining efficient expression abstractions . eg #define is_upper(c) ((c) >= 'A' && (c) <= 'Z') . also useful for defining symbolic constants . eg #define MAX_BUFFER_SIZE 80 Functions . general form () . eg char *string_copy(char *s) { char *t = new char[strlen(s)+1]; return strcpy(t, s); } Procedures . general form void ( ) . eg void print_if_equal(int i, int j) { if (i == j) cout << "i = " << i << " j = " << j << endl; else print_if_equal(i, j-1); } Subprogram prototypes . general form ( ); . useful for forward or external declarations for header files . eg char *string_copy(char *s); void print_if_equal(int i, int j); Variables and Constants . general form [= ] ; . eg int i = 0; char *s = "hello"; struct {int x; char c} pair = {1,'C'}; . qualifiers . register - suggests the variable should be placed in a register . eg register char *s; . const - protects the value of the object so it is read-only . eg const int buffer_size = 80; . const variables are not interchangable with literal constants . use #define to name a literal constant Character strings . A character string is an array of characters terminated by a null character. . All array objects are pointers to the first character of the array. . eg char *p; /* a string not bound to storage */ char buf[40]; /* a string of 40 characters */ char *s = new char[40]; /* a string of 40 characters */ Block statement . general form { [ ] [ ] } . eg { char c = 'A'; cout << '['; while (c<='Z') cout << c++; cout << ']'; } If-then-else statement . general form if () [else ] . executes stmt1 if cond evaluates to non-zero, stmt2 otherwise . eg if (a < b) cout << "a is less than b"; else cout << "b is less than or equal to b"; Switch statement . general form switch ( ) { } . eg switch ( factorial(i) ) { case 1: case 3: do_odd_case(i); break; /* will fall into next case otherwise */ case 2: case 4: do_even_case(i); break; default: do_others(i); break; } While loop . general form while ( ) . repeats execution of stmt as long as condition evaluates to non-zero . eg char c = 'A'; while (c <= 'Z') cout << c++; For loop . general form for ( ; ; ) . equivalent to (but less error prone than) while ( ) { } . eg char c; for (c='A'; c<='Z'; c++) cout << c; Do-while loop . general form do while ( ); . eg do { cin >> c; cout << c; } while (c != 0); Return statement . general form return [ ] ; . eg int factorial(int n) { return n<=0 ? 1 : n * factorial(n-1); } Subprogram call . general form ( ) . eg . x = function_of_one_argument_that_returns_a_value(n); . procedure_call_with_no_arguments(); . pointers to functions int factorial(int i) { return n<=0 ? 1 : n * factorial(n-1); } main() { int (*fn)(int a) = factorial; int i = fn(3); /* calls factorial */ int j = (*fn)(4); /* old C style call */ } Parameter passing . parameters are passed by value (the default) or by reference . eg void do_this(int x) { x += 3; /* does not modify y */ } void do_that(int& x) { x += 3; /* does modify y */ } void main() { int y = 0; do_this(y); do_that(y); } . arrays are passed by reference (or their base address is passed by value). const int BUFSIZE = 80; void do_this(char *s) { *s = 0; /* null terminates buf */ } void main() { char buf[BUFSIZE]; do_this(buf); } Built-in types . Types . char - a byte or character . int - a word, or integer . float - a floating point real (single precision) usually two words . double - double precision floating point . void - no type, useful for declaring type of procedures . Qualifiers . short - applied to int, usually means two bytes . long - applied to int, usually means four bytes . unsigned - applied to int or char, means all values are positive . static - means either: . a top-level name is local to the file . a variable local to a subroutine has static storage . extern - means the declaration is global and probably made elsewhere Data type constructors . Enumerations . defines a type whose values are given in a list . eg enum {mon, tue, wed, thu, fri} . the size is usually int, but may be a char, or short int . represented by the values: mon=>0, tue=>1, ..., fri=>4 . Structures and Classes . defines a composite type that contains subfields of various types . eg struct {int i; float r; char c;} . members may be data fields or subprograms . the size is the sum of the sizes of the components . Unions . defines a type that contains any one of various subfields . eg union {int i; float r; char c;} . the size is the max size of any one component . Pointers . defines a pointer to another (base) type . eg int *int_pointer; . the size is usually the same as int . Arrays . defines a vector of N elements . eg char buf[N]; . size is N * size_of(element_type) . indices range from 0 to N-1 . Subprogram Pointers . (*function_pointer_type)(); . eg int f(int x, y) { int (*function_pointer_type)(int a, int b) = f; function_pointer_type(x, y); } . size is size of a pointer Naming new types . typedef may be used to name a new type typedef ; . eg typedef int ** int_pointer; . structs and classes define new type names . it's good style to name all types Type conversion . a type conversion is done with a `cast` . to convert from old to new . (new)old or new(old) . eg i = (int)3.0; i = int(1.0); . C++ is liberal with conversion, so be careful! Literal constants . integers 0, 1, -1 - decimal 10000000L - long int 0777 - octal 0XAFFE812 - hex . pointers ((void *)0) - the null pointer . float 1.0 1.0E-5 . characters 'Z' -- upper case z 'c' -- lower case c '\012' -- character whose ascii code is the octal value 12 '\x10' -- character whose ascii code is the hexadecimal value 10 '\n' -- a newline (linefeed - ascii 10 decimal) '\'' -- a single quote character . character strings "hello" -- the string `hello` "hello" "world" -- adjacent strings are concatenated at compile-time "\n\t\b\r\\" -- newline, tab, backspace, return, backslash "\a\f\v\r" -- alert(bell), formfeed, verticaltab, return "She said, \"What?\"" -- the string: `She said, "What?"` Operators . a = b . assign b's value into a, return the value of b . a + b . usual arithmetic plus (also -, *, /, % (modulo)) . a++ . increment a, returns previous value of a . ++a . increment a, returns incremented value of a . a += b . same as a = a + b (also -=, *=, /=, |=) . returns new value of a . *p . indirect through pointer p . &a . address of variable a . a == b . equality testing (also <, >, <=, >=, != (not equal)) . a && b . short circuited boolean `and' . a || b . short circuited boolean `or' . !a . unary boolean `not' . a[i] . ith element of array a . r.f . field f of record r . p->f . field f of record pointed to by p . same as (*p).f . conditional expression . ? : . eg return n == 0 ? 1 : n * fact(n - 1); . &, |, ^, ~, >>, << . bitwise logical operators Arguments to main . argv . an array of strings holding command-line argument words . argv[0] is the name of the program . argc . the number of strings in argv . argv[1] through argv[argc-1] are the arguments . argv[argc] is null . example: print program name followed by all arguments and newline void main( int argc, char *argv[] ) { for (int i=0; i= 0 && i < sz; } protected: int &elem (int i) { return buf[i]; } public: Vector (int size) : sz (size) { if (sz < 0) throw range_error(); else buf = new int[sz]; } ~Vector () { delete buf; } int size () { return sz; } int &operator[] (int i) { if (in_range (i)) return buf[i]; else throw range_error(); } }; Class Vector Example (cont'd) . // File test.cc #include #include "vector.h" extern atoi(char *); int main (int argc, char *argv[]) { int size = atoi(argv[1]); Vector user_vec(size); int c_vec[size]; // Error, no dynamic range in C! c_vec[0] = user_vec[0] = 0; for (int i = 1; i < user_vec.size (); i++) { // Array used on both sides of assignment. user_vec[i] = user_vec[i - 1] + 1; c_vec[i] = c_vec[i - 1] + 1; } // compile-time errors: can't access private data size = user_vec.sz - 1; user_vec.buf[size] = 100; // run-time error: index out of range user_vec[user_vec.size ()] == 1000; // index out of range not detected in C c_vec[100] = 1000; // Vector destructor called on user_vec when scope ends } Overloading . all function names and nearly all operators including: . the assignment operator = . the function call operator () . the array subscript operator [] . type conversion operator, eg (char *) . return type is not considered . each function must have a unique signature . unique argument types: double sqrt(double x); Complex &sqrt(Complex &x); . Unique number of arguments: void move(int to); void move(int to, int by); . overloading may hinder readability . eg using += and -= for stack push and pop . overloading can simplify naming, eg, class String { ... friend String operator+(String&, char *); friend String operator+(String&,String&); friend String operator+(char *, String&); }; String str_vec[101]; String comma (", "); str_vec[13] = "larry"; String foo = str_vec[13] + comma + "curly"; String bar = foo + comma + "and moe"; void baz (void) { cout << bar << "\n"; // prints "larry, curly, and moe". } Comments . two styles of comments: . traditional C comments /* This is a C++ comment. */ . new `continue to end-of-line comments: // This is also a C++ comment. . the two styles nest . you can comment out code containing other comments Inline Functions . inline function expansion combines the efficiency of macros with the security and abstraction of a function call . inline functions often reduce both execution time and code size . they discourage reliance upon macro statements /* classic C macro, no typechecking at macro expansion time */ #define SQUARE(X) ((X) * (X)) // C++ overloaded inline functions inline int sqr (int x) { return x * x; } inline float sqr (float x) { return x * x; } . class member functions defined in their declaration are automatically inlined class Trace { char *nm; public: Trace (char *s) { printf ("entering %s\n", nm = s); } ~Trace () { printf ("leaving %s\n", nm);} }; Dynamic Memory Management . dynamic memory management is built-in to C++ with new and delete . this improves run-time efficiency and source code clarity /* traditional C-style */ int *a = (int *)malloc (10 * sizeof *a); free((char *) a); // C++ syntax int *a = new int[10]; delete a; . you can redefine new and delete globally or per class . class specific new and delete operators are useful for managing homogeneous container classes efficiently, e.g. linked lists or binary trees. Default Parameters . subprogram parameters may have default values . specified in the subprogram declaration . if trailing arguments are omitted they take on defaults void assign_grade(char *nm, char *grd = A); // additional declarations and definitions... assign_grade("Bjarne Stroustrup", "C++"); // Bjarne needs to work harder on his tasks. assign_grade("Jean Ichbiah"); // Jean gets an A for Ada!. Reference Variables . references include parameters, return values, and other objects . a reference variable creates an alias for an object . reference variables can often be used instead of pointers . may allow better compiler optimizations . parameters are `call-by-reference` (e.g., `Pascal's var parameters): double sqr(double &x) { return x *= x; } // ... double foo = 10.0; sqr(foo); cout << foo; // prints 100.0 . reference variables may lead to subtle aliasing problems: cout << (sqr(foo) * foo); // output result is not defined! Const Type Qualifier . data objects and member functions may be qualified with const . makes them `read-only' . eg constant data: const char *foo = "on a clear day"; char *const bar = "you can C forever!"; const char *const zippy = "yow!"; foo = "To C or not to C?" // OK foo[7] = `C'; // error, read-only location // error, can't assign to const pointer bar bar = "avoid cliches like the plague."; bar[1] = `D'; // OK const int index = 4 - 3; // index == 1 // read-only an array of constant ints const int array[index + 3] = {2, 4, 8, 16}; Stream I/O . stream class does type-secure I/O of built-in and user-defined types . replaces stdin, stdout, and stderr with cout, cin, and cerr . overloads the << and >> operators: /* Old C-style I/O, no protection from mistakes, gets a segmentation fault on most systems! */ #include printf("name = %s, id = %d\n", 76543, 100); // C++ does not get a segmentation fault, the // "correct" function is called. #include cout << "name = " << 76532 << ", id = " << 100 << "\n"; Type Cast Syntax . in addition to Classic C style casts, C++ allows function call style // function prototype from math.h library extern double log10(double param); if ((int)log10 ((double)7734) != 0) ...; /* C style type cast notation */ if (int(log10(double(7734))) != 0) ...; // C++ function-style cast notation . this syntax is also used to specify explicit type conversion in the C++ class mechanism, e.g.: class Complex public: Complex(double, double); // ... }; Complex c = Complex(10.0, 3.1416); Declaration Statements . C++ allows variable declarations to occur anywhere statements occur within a block . localizes temporary and index variables . encourages proper initialization int main(void) { int k = call_something(); for (int i = 0; i < k; i++) for (int j = 0; j < k; j++) cout << i * j; } . Note: this may encourage obscure code . scoping rules are not always intuitive or desirable Abbreviated Type Names . C++ class, union, enum names are type names . they need not be named with typedef struct Tree_Node { int item; struct Tree_Node *left_child; struct Tree_Node *right_child; }; struct Tree_Node { int item; Tree_Node *left_child; Tree_Node *right_child; } Type Safe Linkage . allows the linker to detect when a function is declared and/or used inconsistently, e.g.: // File abs.c long abs (long arg) { return arg < 0 ? -arg : arg; } // File application.c #include int abs (int arg); int main (void) { printf ("%d\n", abs (-1)); } . this error would not have been caught until the application was ported to a machine where ints and longs were not the same size User-Defined Conversions . user-defined conversions allow for more natural looking code Complex a = Complex(1); Complex b = 1; // implicit 1 -> Complex (1) a = b + Complex(2); a = b + 2 // implicit 2 -> Complex (2) String s = a; // implicit a.operator String () . Conversions come in two flavors: . `Constructor Conversions`: . create a new object from objects of existing types . `Conversion Operators`: . convert an existing object into an object of another type User-Defined Conversions (cont'd) . eg class Complex { public: Complex (int); // convert int to Complex operator String (); // convert Complex to String }; . Note: abuse may lead to a big mess Static Initialization . In C, all initialization of static objects must use constant expressions, e.g.: int i = 10 + 20; // file scope int foo (void) { static int j = 100 * 2 + 1; // local scope } . in C++, static initializers may be arbitrary expressions, e.g.: extern int foo (void); // file scope: int a = 100; int i = 10 + foo (); int j = i + *new int (1); int foo (void) { static int k = foo (); return 100 + a; } . this can become rather cryptic . order of initialization is not well defined between modules Data Abstraction Support in C++ . Automatic Initialization and Cleanup . Assignment and Initialization . Parameterized Types and Generics . Exception Handling . Iterators Class Vector Example . File vector.h class Vector { private: int *buf; int sz; int in_range(int i); protected: int &elem(int i); public: Vector(int size); Vector(Vector &v); ~Vector(); int size(); int &operator [](int i); Vector &operator =(Vector &v); }; Automatic Initialization and Cleanup . constructors initialize and allocate sub-structures Vector :: Vector(int size) : sz(size), buf(new int[size]) { while (--size >= 0) buf[size] = 0; } . destructors should deallocate data allocated by the constructor Vector :: ~Vector() { delete buf; } Assignment and Initialization . assignment is different from initialization . initialization may allocate sub-storage . assignment usually copies value from right to left . parameter passing and function value return is the same as initialization from an existing object . control over assignment and initialization can make ADT's more secure since you have control over copies . the `assignment operator` is for assignment statements Vector &Vector :: operator =(Vector &v) { for (int i = 0; i < sz; i++) buf[i] = v.buf[i]; return *this; // Allows v1 = v2 = v3; } . the `copy constructor` is for initialization from another object . includes parameter passing by copy and function return value Vector :: Vector(const Vector &v) { sz = v.sz; buf = new int[sz]; for (int i = 0; i < sz; i++) buf[i] = v.buf[i]; } . Example #include "vector.h" extern Vector bar(Vector v); void foo() { Vector v1(100); // calls regular constructor Vector v2 = v1; // construct v2 from v1 with copy constructor v1 = v2; // Vector assign v2 to v1 v2 = bar(v1); // pass and return Vectors via copy constructor } . assignment and initialization can be prohibited by making them private . similar to Ada's limited private type class Vector { private: Vector &operator =(const Vector &v); Vector(const Vector &v); // ... public: } Parameterized Types and Generics . generic program units allow easy re-use of type-specific abstractions . eg various stacks, queues and bags . example definition template class Vector { T *buf; int sz; public: Vector(int size): sz(size), buf(new T[size]) { /* null body */ } T &operator [](int i) { return buf[i]; } }; template void swap(T &a, T &b) { T temp = a; a = b; b = temp; } . example uses Vector v1(20); Vector v2(30); typedef Vector COMPLEX_VECTOR; COMPLEX_VECTOR v3(40); v1[1] = 20; v2[3] = "hello"; v3[10] = Complex(1.0, 1.1); swap(v1[1],v1[2]); swap(v2[3],v2[2]); swap(v3[10],v3[2]); Exception Handling . exception handling allows reliable handling of run-time errors struct size_error { int value; size_error(int i): value(i) {} }; struct range_error { int value; range_error(int i): value(i) {} }; class Vector { int *buf; int sz; public: Vector(int size): sz(size) { if (size < 1) throw size_error(size); buf = new int[size]; } int &operator [](int i) { if (!in_range(i)) throw range_error(i); return buf[i]; } }; . exception example (cont.) int do_vector_stuff() { try { int n = -1; Vector v(n); n = 10; Vector v(n); v[n] = v[n/2]; } catch (size_error &e) { cerr << "size error for vector: size = " << e.value << '\n'; return 0; } catch (range_error &e) { cerr << "range error for vector: index = " << e.value << '\n'; return -1; } } Iterators . `iterators` allow users to loop through elements of some collection . use of an iterator should be representation independent #include "vector.h" class Vector_Iterator { Vector &v; int i; public: Vector_Iterator(const Vector &r): i(0), v(r) {} int operator ()() { return i++ < v.size(); } int value() { return v[i]; } }; main() { Vector v(100); Vector_Iterator next(v); while (next()) cout << "value = " << next.value() << '\n'; } . iterators may be made `friends` for improved efficiency Static Members . a static data member is shared by all instances of a class . eg my_example.h struct my_example { static int i; int next() { return i++; } static int initial_value() { return 0; } }; . eg my_example.cc int my_example :: i = my_example :: initial_value(); . a static member function need not have an object . a non-static member must have an object eg my_example x; int i = x.next(); Example: combining everything . generic holders struct holder_underflow { holder_underflow() {} }; struct holder_overflow { int size; holder_overflow(int i): size(i) {} }; template class holder { public: virtual void enter(T c) = 0; // adds c into the holder virtual T remove(void) = 0; // removes and returns the next element virtual T next(void) = 0; // returns the next element virtual int is_empty(void) = 0; // returns 1 iff holder is empty virtual int is_full(void) = 0; // returns 1 iff holder is full virtual ~holder() = 0; }; . generic holders (cont.) template class stack : public holder { public: void push(T c) { if (is_full()) throw holder_overflow(); enter(c); } T pop(void) { if (is_empty()) throw holder_underflow(); return remove(); } T top(void) { if (is_empty()) throw holder_underflow(); return next(); } }; . generic holders (cont.) struct size_error { int size; size_error(int i): size(i) {} }; template class array_stack : public stack { T *buf; int index; int size; public: array_stack(int n) : size(n), index(0) { if (n < 0) throw size_error(n); buf = new T[size]; } void enter(T c) { buf[index++] = c; } T remove(void) { return buf[--index]; } T next(void) { return buf[index-1]; } int is_empty(void) { return index == 0; } int is_full(void) { return index >= size; } virtual ~array_stack(void) {delete buf;} }; . generic holder use #include "holders.h" typedef array_stack int_stack; typedef array_stack char_stack; main() { const int M = 100; const int N = 100; int_stack istk(M); char_stack cstk(N); for (int i = 0; i