Software Engineering for Internet Applications

An overview of software engineering techniques for developing web applications. Based on

1 Introduction

Philip Greenspun

2 Basics

  1. Atomicity: all or nothing
  2. Consistency: only legal operations allowed.
  3. Isolation: no peeking until the other is done.
  4. Durability: once done, you are done forever.

2.1 Steps

  1. Develop data model.
  2. Develop legal transactions on model: inserts and updates.
  3. Design page flow.
  4. Implement pages

3 Planning

4 Handling Users

create table users (
        user_id                 integer primary key,
        first_names             varchar(50),
        last_name               varchar(50) not null,
        email                   varchar(100) not null unique,
        -- we encrypt passwords using operating system crypt function
        password                varchar(30) not null,
        registration_date       timestamp(0)
);

4.1 Handling Users

create table users (
        user_id                 integer primary key,
        first_names             varchar(50),
        last_name               varchar(50) not null,
        email                   varchar(100) not null unique,
        password                varchar(30) not null,
        -- user's personal homepage elsewhere on the Internet
        url                     varchar(200),
        registration_date       timestamp(0),
        -- an optional photo; if Oracle Intermedia Image is installed
        -- use the image datatype instead of BLOB
        portrait                blob
);

4.2 Handling Users

create table users (
        user_id                 integer primary key,
        first_names             varchar(50),
        last_name               varchar(50) not null,
        email                   varchar(100) not null unique,
        password                varchar(30) not null,
        -- user's personal homepage elsewhere on the Internet
        url                     varchar(200),
        registration_date       timestamp(0)
        -- an optional photo; if Oracle Intermedia Image is installed
        -- use the image datatype instead of BLOB
        portrait                blob,
        -- with a 4 GB maximum, we're all set for Life of Johnson
        biography               clob,
        birthdate               date,
        -- current politically correct column name would be "gender"
        -- but data models often outlive linguistic fashion so
        -- we stick with more established usage
        sex                     char(1) check (sex in ('m','f')),
        country_code            char(2) references country_codes(iso),
        postal_code             varchar(80),
        home_phone              varchar(100),
        work_phone              varchar(100),
        mobile_phone            varchar(100),
        pager                   varchar(100),
        fax                     varchar(100),
        aim_screen_name         varchar(50),
        icq_number              varchar(50)
);

4.3 Multiple Tables

create table users (
        user_id                 integer primary key,
        first_names             varchar(50),
        last_name               varchar(50) not null,
        email                   varchar(100) not null unique,
        password                varchar(30) not null,
        registration_date       timestamp(0)
);

create table users_extra_info (
        user_info_id            integer primary key,
        user_id                 not null references users,
        field_name              varchar(100) not null,
        field_type              varchar(100) not null,
        -- one of the three columns below will be non-NULL
        varchar_value   varchar(4000),
        blob_value      blob,
        date_value      timestamp(0),
        check ( not (varchar_value is null and 
                     blob_value is null and
                     date_value is null))
        -- in a real system, you'd probably have additional columns
        -- to store when each row was inserted and by whom

);

-- make it fast to get all extra fields for a particular user
create index users_extra_info_by_user on users_extra_info(user_id);

4.4 User Groups

create table users (
        user_id                 integer primary key,
            ...
        -- a space-separated list of group IDs
        group_memberships       varchar(4000),
        ...
);

create table user_group_map (
        user_id         not null references users;
        user_group_id   not null references user_groups;
        unique(user_id, user_group_id)
);

4.5 Query Groups

select user_groups.group_name
        from users, user_groups, user_group_map
        where users.first_names = 'Norman' and users.last_name = 'Horowitz'
                and users.user_id = user_group_map.user_id
                and user_groups.user_group_id = user_group_map.user_group_id;

5 Content Management

create table articles_raw (
        article_id              integer primary key,
        -- who contributed this and when
        creation_user           not null references users,
        creation_date           not null date,
        -- what language is this in?
        -- visit http://www.w3.org/International/O-charset-lang
        -- to see the allowable 2-character codes (en is English, ja is Japanese)
        language                char(2) references language_codes,
        -- could be text/html or text/plain or some sort of XML document
        mime_type               varchar(100) not null,
        -- will hold the title in most cases
        one_line_summary        varchar(200) not null,
        -- the entire article; 4 GB limit
        body                    clob
        --articles go thru an editorial process
        editorial_status        varchar(30) 
          check (editorial_status in ('submitted','rejected','approved','expired'))
    );

create view articles_approved 
        as
        select * 
        from articles_raw
        where editorial_status = 'approved';

5.1 Comments on Articles

create table comments_on_articles_raw (
        comment_id              integer primary key,
        -- on what article is this a comment?
        refers_to               not null references articles,
        creation_user           not null references users,
        creation_date           not null date,
        language                char(2) references language_codes,
        mime_type               varchar(100) not null,
        one_line_summary        varchar(200) not null,
        body                    clob,
        editorial_status        varchar(30) 
          check (editorial_status in ('submitted','rejected','approved','expired'))
);

create view comments_on_articles_approved 
        as
        select * 
        from comments_on_articles_raw
        where editorial_status = 'approved';
create table content_raw (
        content_id              integer primary key,
        -- if not NULL, this row represents a comment
        refers_to               references content_raw,
        -- who contributed this and when
        creation_user           not null references users,
        creation_date           not null date,
        -- what language is this in?
        -- visit http://www.w3.org/International/O-charset-lang
        -- to see the allowable 2-character codes (en is English, ja is Japanese)
        language                char(2) references language_codes,
        -- could be text/html or text/plain or some sort of XML document
        mime_type               varchar(100) not null,
        one_line_summary        varchar(200) not null,
        -- the entire article; 4 GB limit
        body                    clob,
        editorial_status        varchar(30) 
          check (editorial_status in ('submitted','rejected','approved','expired'))
);

-- if we want to be able to write some scripts without having to think
-- about the fact that different content types are merged

create view articles_approved 
        as
        select * 
        from content_raw
        where refers_to is null
        and editorial_status = 'approved';

create view comments_on_articles_approved 
        as
        select * 
        from content_raw
        where refers_to is not null
        and editorial_status = 'approved';

-- let's build a single full-text index on both articles and comments
-- using Oracle Intermedia Text (formerly known as "Context")

create index content_ctx on content_raw (body) indextype is ctxsys.context;

5.2 Adding News

create table content_raw (
        content_id              integer primary key,
        -- 'news' or 'article' or 'comment' or...
        content_type            varchar(100) not null,
        refers_to               references content,
        creation_user           not null references users,
        creation_date           not null date,
        release_time            date,
        expiration_time         date,
        language                char(2) references language_codes,
        mime_type               varchar(100) not null,
        one_line_summary        varchar(200) not null,
        body                    clob,
        editorial_status        varchar(30) 
          check (editorial_status in ('submitted','rejected','approved','expired'))
);

create view news_current_and_approved
        as
        select *
        from content_raw 
        where content_type = 'news'
        and (release_time is null or sysdate >= release_time)
        and (expiration_time is null or sysdate  <= expiration_time)
        and editorial_status = 'approved';

5.3 The Workflow Problem

5.3.1 Version Control for Content

create table content_raw (
        -- the combination of these two is the key
        content_id              integer primary key,
        version_number          integer,

        content_type            varchar(100) not null,
        refers_to               references content_raw,
        creation_user           not null references users,
        creation_date           not null date,
        release_time            date,
        expiration_time         date,
        -- some of our content is geographically specific 
        zip_code                varchar(5),
        -- a lot of our readers will appreciate Spanish versions
        language                char(2) references language_codes,
        mime_type               varchar(100) not null,
        one_line_summary        varchar(200) not null,
        -- let's use BLOB in case this is a Microsoft Word doc or JPEG
        -- a BLOB can also hold HTML or plain text
        body                    blob,
        editorial_status        varchar(30) 
          check (editorial_status in ('submitted','rejected','approved','expired'))
        primary key (content_id, version_number)
);
-- note the use of MAX on VARCHAR column; this works just fine

select content_id, max(zip_code)
        from content_raw
        where content_id = 5657
        group by content_id

5.3.2 Version Control for Content

-- stuff about an item that doesn't change from version to version
create table content_raw (
        content_id              integer primary key,
        content_type            varchar(100) not null,
        refers_to               references content_raw,
        creation_user           not null references users,
        creation_date           not null date,
        release_time            date,
        expiration_time         date,
        mime_type               varchar(100) not null,
        zip_code                varchar(5)
);

-- stuff about a version of an item
create table content_versions (
        version_id              integer primary key,
        content_id              not null references content_raw,
        version_date            date not null,
        language                char(2) references language_codes,
        one_line_summary        varchar(200) not null,
        body                    blob,
        editorial_status        varchar(30) 
              check (editorial_status in ('submitted','rejected','approved','expired')),
        -- audit the person who made the last change to editorial status
        editor_id               references users,
        editorial_status_date   date
);

select * 
        from content_versions
        where content_id = 5657
        and editorial_status = 'approved'
        and version_id = (select max(version_id)
                  from content_versions
                  where content_id = 5657
                  and editorial_status = 'approved')

5.3.3 Version Control for Content

create table content_versions (
        version_id              integer primary key,
        content_id              not null references content_raw,
        version_date            date not null,
        ...
        editorial_status        varchar(30) 
          check (editorial_status in ('submitted','rejected','approved','expired')),

        current_version_p       char(1) check(current_version_p in ('t','f')),
        ...
);

create view live_versions 
        as
        select * 
        from content_versions
        where current_version_p = 't';

5.3.4 Version Control for Programs

  1. Three HTTP servers: deployment, testing, development.
  2. Two or three RDBMS tablespaces: deployment and testing/development.
  3. One source control repository.

5.4 The Content

  1. A navigation directory (really?)
  2. News and events on front page.
  3. Single search box.
  4. Direct links to most used services.

6 Software Modularity

  1. RDMS table definitions.
  2. Stored procedures that run in the database
  3. Procedures that run inside your Web or application server program that are shared by more than one page
  4. Scripts that generate individual pages
  5. (Possibly) templates that work in conjunction with page scripts
  6. Documentation explaining the objectives of the module

6.1 Roles

7 Scalability

7.1 Load Balacing

7.2 Failover

7.3 Spam Proofing

8.1 SQL Search

select * 
        from content
        where body like '%' || :user_query || '%'

--if the user is searching for running, it becomes
select * 
        from content
        where body like '%running%'


--to match upper and lower you do

select * 
        from content
        where upper(body) like upper('%running%')

--for multiple words we need to do 'and'

select * 
        from content
        where upper(body) like upper('%running%')
        and upper(body) like upper('%shoes%')

8.2 Full-Text Indexing

8.3 Full Text Oracle

create table content (
        content_id              integer primary key,
        refers_to               references content_raw,
        -- who contributed this and when
        creation_user           not null references users,
        creation_date           not null date,
        modified_date           not null date,
        mime_type               varchar(100) not null,
        one_line_summary        varchar(200) not null,
        body                    clob,
        editorial_status        varchar(30) 
          check (editorial_status in ('submitted','rejected','approved','expired'))
);

-- create an Oracle Text index (the product used to be called
-- Oracle Context, hence the CTX prefixes on many procedures)

create index content_text 
        on content(body) 
        indextype is ctxsys.context;

-- let's look at opinions on running shoes from 
-- users who registered in the last 30 days, sorting
-- results in order of decreasing relevance

select 
  score(1), 
  content.content_id, 
  content.one_line_summary, 
  users.first_names,
  users.last_name
        from content, users
        where contains(body, 'running shoes', 1) > 0
        and users.registration_date > current_timestamp - interval '30' day
        and content.creation_us
        er = users.user_id
        order by score(1) desc;

--The score(1) function refers to the contains() call

URLs

  1. Software Engineering for Internet Applications, http://philip.greenspun.com/seia/
  2. SQL for Web Nerds, http://philip.greenspun.com/sql/
  3. Philip and Alex's Guide to Web Publishing, http://philip.greenspun.com/panda/
  4. MIT's 6.171, http://ocw.mit.edu/OcwWeb/Electrical-Engineering-and-Computer-Science/6-171Fall2003/CourseHome/index.htm
  5. wikipedia:ArsDigita, http://www.wikipedia.org/wiki/ArsDigita
  6. photo.net, http://photo.net
  7. their story, http://photo.net/from-the-editor/200701
  8. Open Architecture Community System, http://openacs.org/
  9. wikipedia:Social_network, http://www.wikipedia.org/wiki/Social_network
  10. wikipedia:Google_platform, http://www.wikipedia.org/wiki/Google_platform
  11. load, http://www.barracudanetworks.com/ns/products/balancer_overview.php?gclid=CM-yo9TU94oCFQQRYwod1G1aqg
  12. balancing, http://www.zeus.com/products/zxtmlb/
  13. router, http://www.cisco.com/warp/public/105/46.html
  14. free, http://www.backhand.org/mod_backhand/
  15. akamai, http://www.akamai.com/
  16. wikipedia:Captcha, http://www.wikipedia.org/wiki/Captcha

This talk available at http://jmvidal.cse.sc.edu/talks/seia/
Copyright © 2009 José M. Vidal . All rights reserved.

16 March 2007, 06:08PM