mirror of
https://github.com/9fans/plan9port.git
synced 2025-01-24 11:41:58 +00:00
631 lines
15 KiB
C
631 lines
15 KiB
C
#ifndef _HTML_H_
|
|
#define _HTML_H_ 1
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
AUTOLIB(html)
|
|
/*
|
|
#pragma lib "libhtml.a"
|
|
#pragma src "/sys/src/libhtml"
|
|
*/
|
|
|
|
// UTILS
|
|
extern uchar* fromStr(Rune* buf, int n, int chset);
|
|
extern Rune* toStr(uchar* buf, int n, int chset);
|
|
|
|
// Common LEX and BUILD enums
|
|
|
|
// Media types
|
|
enum
|
|
{
|
|
ApplMsword,
|
|
ApplOctets,
|
|
ApplPdf,
|
|
ApplPostscript,
|
|
ApplRtf,
|
|
ApplFramemaker,
|
|
ApplMsexcel,
|
|
ApplMspowerpoint,
|
|
UnknownType,
|
|
Audio32kadpcm,
|
|
AudioBasic,
|
|
ImageCgm,
|
|
ImageG3fax,
|
|
ImageGif,
|
|
ImageIef,
|
|
ImageJpeg,
|
|
ImagePng,
|
|
ImageTiff,
|
|
ImageXBit,
|
|
ImageXBit2,
|
|
ImageXBitmulti,
|
|
ImageXXBitmap,
|
|
ModelVrml,
|
|
MultiDigest,
|
|
MultiMixed,
|
|
TextCss,
|
|
TextEnriched,
|
|
TextHtml,
|
|
TextJavascript,
|
|
TextPlain,
|
|
TextRichtext,
|
|
TextSgml,
|
|
TextTabSeparatedValues,
|
|
TextXml,
|
|
VideoMpeg,
|
|
VideoQuicktime,
|
|
NMEDIATYPES
|
|
};
|
|
|
|
// HTTP methods
|
|
enum
|
|
{
|
|
HGet,
|
|
HPost
|
|
};
|
|
|
|
// Charsets
|
|
enum
|
|
{
|
|
UnknownCharset,
|
|
US_Ascii,
|
|
ISO_8859_1,
|
|
UTF_8,
|
|
Unicode,
|
|
NCHARSETS
|
|
};
|
|
|
|
// Frame Target IDs
|
|
enum {
|
|
FTtop,
|
|
FTself,
|
|
FTparent,
|
|
FTblank
|
|
};
|
|
|
|
// LEX
|
|
typedef struct Token Token;
|
|
typedef struct Attr Attr;
|
|
|
|
// BUILD
|
|
|
|
typedef struct Item Item;
|
|
typedef struct Itext Itext;
|
|
typedef struct Irule Irule;
|
|
typedef struct Iimage Iimage;
|
|
typedef struct Iformfield Iformfield;
|
|
typedef struct Itable Itable;
|
|
typedef struct Ifloat Ifloat;
|
|
typedef struct Ispacer Ispacer;
|
|
typedef struct Genattr Genattr;
|
|
typedef struct SEvent SEvent;
|
|
typedef struct Formfield Formfield;
|
|
typedef struct Option Option;
|
|
typedef struct Form Form;
|
|
typedef struct Table Table;
|
|
typedef struct Tablecol Tablecol;
|
|
typedef struct Tablerow Tablerow;
|
|
typedef struct Tablecell Tablecell;
|
|
typedef struct Align Align;
|
|
typedef struct Dimen Dimen;
|
|
typedef struct Anchor Anchor;
|
|
typedef struct DestAnchor DestAnchor;
|
|
typedef struct Map Map;
|
|
typedef struct Area Area;
|
|
typedef struct Background Background;
|
|
typedef struct Kidinfo Kidinfo;
|
|
typedef struct Docinfo Docinfo;
|
|
typedef struct Stack Stack;
|
|
typedef struct Pstate Pstate;
|
|
typedef struct ItemSource ItemSource;
|
|
typedef struct Lay Lay; // defined in Layout module
|
|
|
|
// Alignment types
|
|
enum {
|
|
ALnone = 0, ALleft, ALcenter, ALright, ALjustify,
|
|
ALchar, ALtop, ALmiddle, ALbottom, ALbaseline
|
|
};
|
|
|
|
struct Align
|
|
{
|
|
uchar halign; // one of ALnone, ALleft, etc.
|
|
uchar valign; // one of ALnone, ALtop, etc.
|
|
};
|
|
|
|
// A Dimen holds a dimension specification, especially for those
|
|
// cases when a number can be followed by a % or a * to indicate
|
|
// percentage of total or relative weight.
|
|
// Dnone means no dimension was specified
|
|
|
|
// To fit in a word, use top bits to identify kind, rest for value
|
|
enum {
|
|
Dnone = 0,
|
|
Dpixels = (1<<29),
|
|
Dpercent = (2<<29),
|
|
Drelative = (3<<29),
|
|
Dkindmask = (3<<29),
|
|
Dspecmask = (~Dkindmask)
|
|
};
|
|
|
|
struct Dimen
|
|
{
|
|
int kindspec; // kind | spec
|
|
};
|
|
|
|
// Background is either an image or a color.
|
|
// If both are set, the image has precedence.
|
|
struct Background
|
|
{
|
|
Rune* image; // url
|
|
int color;
|
|
};
|
|
|
|
|
|
// There are about a half dozen Item variants.
|
|
// The all look like this at the start (using Plan 9 C's
|
|
// anonymous structure member mechanism),
|
|
// and then the tag field dictates what extra fields there are.
|
|
struct Item
|
|
{
|
|
Item* next; // successor in list of items
|
|
int width; // width in pixels (0 for floating items)
|
|
int height; // height in pixels
|
|
int ascent; // ascent (from top to baseline) in pixels
|
|
int anchorid; // if nonzero, which anchor we're in
|
|
int state; // flags and values (see below)
|
|
Genattr* genattr; // generic attributes and events
|
|
int tag; // variant discriminator: Itexttag, etc.
|
|
};
|
|
|
|
// Item variant tags
|
|
enum {
|
|
Itexttag,
|
|
Iruletag,
|
|
Iimagetag,
|
|
Iformfieldtag,
|
|
Itabletag,
|
|
Ifloattag,
|
|
Ispacertag
|
|
};
|
|
|
|
struct Itext
|
|
{
|
|
Item item; // (with tag ==Itexttag)
|
|
Rune* s; // the characters
|
|
int fnt; // style*NumSize+size (see font stuff, below)
|
|
int fg; // Pixel (color) for text
|
|
uchar voff; // Voffbias+vertical offset from baseline, in pixels (+ve == down)
|
|
uchar ul; // ULnone, ULunder, or ULmid
|
|
};
|
|
|
|
struct Irule
|
|
{
|
|
Item item; // (with tag ==Iruletag)
|
|
uchar align; // alignment spec
|
|
uchar noshade; // if true, don't shade
|
|
int size; // size attr (rule height)
|
|
Dimen wspec; // width spec
|
|
};
|
|
|
|
|
|
struct Iimage
|
|
{
|
|
Item item; // (with tag ==Iimagetag)
|
|
Rune* imsrc; // image src url
|
|
int imwidth; // spec width (actual, if no spec)
|
|
int imheight; // spec height (actual, if no spec)
|
|
Rune* altrep; // alternate representation, in absence of image
|
|
Map* map; // if non-nil, client side map
|
|
int ctlid; // if animated
|
|
uchar align; // vertical alignment
|
|
uchar hspace; // in pixels; buffer space on each side
|
|
uchar vspace; // in pixels; buffer space on top and bottom
|
|
uchar border; // in pixels: border width to draw around image
|
|
Iimage* nextimage; // next in list of document's images
|
|
};
|
|
|
|
|
|
struct Iformfield
|
|
{
|
|
Item item; // (with tag ==Iformfieldtag)
|
|
Formfield* formfield;
|
|
};
|
|
|
|
|
|
struct Itable
|
|
{
|
|
Item item; // (with tag ==Itabletag)
|
|
Table* table;
|
|
};
|
|
|
|
|
|
struct Ifloat
|
|
{
|
|
Item _item; // (with tag ==Ifloattag)
|
|
Item* item; // table or image item that floats
|
|
int x; // x coord of top (from right, if ALright)
|
|
int y; // y coord of top
|
|
uchar side; // margin it floats to: ALleft or ALright
|
|
uchar infloats; // true if this has been added to a lay.floats
|
|
Ifloat* nextfloat; // in list of floats
|
|
};
|
|
|
|
|
|
struct Ispacer
|
|
{
|
|
Item item; // (with tag ==Ispacertag)
|
|
int spkind; // ISPnull, etc.
|
|
};
|
|
|
|
// Item state flags and value fields
|
|
enum {
|
|
// IFbrk = 0x80000000, // forced break before this item
|
|
#define IFbrk 0x80000000 /* too big for sun */
|
|
IFbrksp = 0x40000000, // add 1 line space to break (IFbrk set too)
|
|
IFnobrk = 0x20000000, // break not allowed before this item
|
|
IFcleft = 0x10000000, // clear left floats (IFbrk set too)
|
|
IFcright = 0x08000000, // clear right floats (IFbrk set too)
|
|
IFwrap = 0x04000000, // in a wrapping (non-pre) line
|
|
IFhang = 0x02000000, // in a hanging (into left indent) item
|
|
IFrjust = 0x01000000, // right justify current line
|
|
IFcjust = 0x00800000, // center justify current line
|
|
IFsmap = 0x00400000, // image is server-side map
|
|
IFindentshift = 8,
|
|
IFindentmask = (255<<IFindentshift), // current indent, in tab stops
|
|
IFhangmask = 255 // current hang into left indent, in 1/10th tabstops
|
|
};
|
|
|
|
// Bias added to Itext's voff field
|
|
enum { Voffbias = 128 };
|
|
|
|
// Spacer kinds
|
|
enum {
|
|
ISPnull, // 0 height and width
|
|
ISPvline, // height and ascent of current font
|
|
ISPhspace, // width of space in current font
|
|
ISPgeneral // other purposes (e.g., between markers and list)
|
|
};
|
|
|
|
// Generic attributes and events (not many elements will have any of these set)
|
|
struct Genattr
|
|
{
|
|
Rune* id;
|
|
Rune* class;
|
|
Rune* style;
|
|
Rune* title;
|
|
SEvent* events;
|
|
};
|
|
|
|
struct SEvent
|
|
{
|
|
SEvent* next; // in list of events
|
|
int type; // SEonblur, etc.
|
|
Rune* script;
|
|
};
|
|
|
|
enum {
|
|
SEonblur, SEonchange, SEonclick, SEondblclick,
|
|
SEonfocus, SEonkeypress, SEonkeyup, SEonload,
|
|
SEonmousedown, SEonmousemove, SEonmouseout,
|
|
SEonmouseover, SEonmouseup, SEonreset, SEonselect,
|
|
SEonsubmit, SEonunload,
|
|
Numscriptev
|
|
};
|
|
|
|
// Form field types
|
|
enum {
|
|
Ftext,
|
|
Fpassword,
|
|
Fcheckbox,
|
|
Fradio,
|
|
Fsubmit,
|
|
Fhidden,
|
|
Fimage,
|
|
Freset,
|
|
Ffile,
|
|
Fbutton,
|
|
Fselect,
|
|
Ftextarea
|
|
};
|
|
|
|
// Information about a field in a form
|
|
struct Formfield
|
|
{
|
|
Formfield* next; // in list of fields for a form
|
|
int ftype; // Ftext, Fpassword, etc.
|
|
int fieldid; // serial no. of field within its form
|
|
Form* form; // containing form
|
|
Rune* name; // name attr
|
|
Rune* value; // value attr
|
|
int size; // size attr
|
|
int maxlength; // maxlength attr
|
|
int rows; // rows attr
|
|
int cols; // cols attr
|
|
uchar flags; // FFchecked, etc.
|
|
Option* options; // for Fselect fields
|
|
Item* image; // image item, for Fimage fields
|
|
int ctlid; // identifies control for this field in layout
|
|
SEvent* events; // same as genattr->events of containing item
|
|
};
|
|
|
|
enum {
|
|
FFchecked = (1<<7),
|
|
FFmultiple = (1<<6)
|
|
};
|
|
|
|
// Option holds info about an option in a "select" form field
|
|
struct Option
|
|
{
|
|
Option* next; // next in list of options for a field
|
|
int selected; // true if selected initially
|
|
Rune* value; // value attr
|
|
Rune* display; // display string
|
|
};
|
|
|
|
// Form holds info about a form
|
|
struct Form
|
|
{
|
|
Form* next; // in list of forms for document
|
|
int formid; // serial no. of form within its doc
|
|
Rune* name; // name or id attr (netscape uses name, HTML 4.0 uses id)
|
|
Rune* action; // action attr
|
|
int target; // target attr as targetid
|
|
int method; // HGet or HPost
|
|
int nfields; // number of fields
|
|
Formfield* fields; // field's forms, in input order
|
|
};
|
|
|
|
// Flags used in various table structures
|
|
enum {
|
|
TFparsing = (1<<7),
|
|
TFnowrap = (1<<6),
|
|
TFisth = (1<<5)
|
|
};
|
|
|
|
|
|
// Information about a table
|
|
struct Table
|
|
{
|
|
Table* next; // next in list of document's tables
|
|
int tableid; // serial no. of table within its doc
|
|
Tablerow* rows; // array of row specs (list during parsing)
|
|
int nrow; // total number of rows
|
|
Tablecol* cols; // array of column specs
|
|
int ncol; // total number of columns
|
|
Tablecell* cells; // list of unique cells
|
|
int ncell; // total number of cells
|
|
Tablecell*** grid; // 2-D array of cells
|
|
Align align; // alignment spec for whole table
|
|
Dimen width; // width spec for whole table
|
|
int border; // border attr
|
|
int cellspacing; // cellspacing attr
|
|
int cellpadding; // cellpadding attr
|
|
Background background; // table background
|
|
Item* caption; // linked list of Items, giving caption
|
|
uchar caption_place; // ALtop or ALbottom
|
|
Lay* caption_lay; // layout of caption
|
|
int totw; // total width
|
|
int toth; // total height
|
|
int caph; // caption height
|
|
int availw; // used for previous 3 sizes
|
|
Token* tabletok; // token that started the table
|
|
uchar flags; // Lchanged, perhaps
|
|
};
|
|
|
|
|
|
struct Tablecol
|
|
{
|
|
int width;
|
|
Align align;
|
|
Point pos;
|
|
};
|
|
|
|
|
|
struct Tablerow
|
|
{
|
|
Tablerow* next; // Next in list of rows, during parsing
|
|
Tablecell* cells; // Cells in row, linked through nextinrow
|
|
int height;
|
|
int ascent;
|
|
Align align;
|
|
Background background;
|
|
Point pos;
|
|
uchar flags; // 0 or TFparsing
|
|
};
|
|
|
|
|
|
// A Tablecell is one cell of a table.
|
|
// It may span multiple rows and multiple columns.
|
|
// Cells are linked on two lists: the list for all the cells of
|
|
// a document (the next pointers), and the list of all the
|
|
// cells that start in a given row (the nextinrow pointers)
|
|
struct Tablecell
|
|
{
|
|
Tablecell* next; // next in list of table's cells
|
|
Tablecell* nextinrow; // next in list of row's cells
|
|
int cellid; // serial no. of cell within table
|
|
Item* content; // contents before layout
|
|
Lay* lay; // layout of cell
|
|
int rowspan; // number of rows spanned by this cell
|
|
int colspan; // number of cols spanned by this cell
|
|
Align align; // alignment spec
|
|
uchar flags; // TFparsing, TFnowrap, TFisth
|
|
Dimen wspec; // suggested width
|
|
int hspec; // suggested height
|
|
Background background; // cell background
|
|
int minw; // minimum possible width
|
|
int maxw; // maximum width
|
|
int ascent; // cell's ascent
|
|
int row; // row of upper left corner
|
|
int col; // col of upper left corner
|
|
Point pos; // nw corner of cell contents, in cell
|
|
};
|
|
|
|
// Anchor is for info about hyperlinks that go somewhere
|
|
struct Anchor
|
|
{
|
|
Anchor* next; // next in list of document's anchors
|
|
int index; // serial no. of anchor within its doc
|
|
Rune* name; // name attr
|
|
Rune* href; // href attr
|
|
int target; // target attr as targetid
|
|
};
|
|
|
|
|
|
// DestAnchor is for info about hyperlinks that are destinations
|
|
struct DestAnchor
|
|
{
|
|
DestAnchor* next; // next in list of document's destanchors
|
|
int index; // serial no. of anchor within its doc
|
|
Rune* name; // name attr
|
|
Item* item; // the destination
|
|
};
|
|
|
|
|
|
// Maps (client side)
|
|
struct Map
|
|
{
|
|
Map* next; // next in list of document's maps
|
|
Rune* name; // map name
|
|
Area* areas; // list of map areas
|
|
};
|
|
|
|
|
|
struct Area
|
|
{
|
|
Area* next; // next in list of a map's areas
|
|
int shape; // SHrect, etc.
|
|
Rune* href; // associated hypertext link
|
|
int target; // associated target frame
|
|
Dimen* coords; // array of coords for shape
|
|
int ncoords; // size of coords array
|
|
};
|
|
|
|
// Area shapes
|
|
enum {
|
|
SHrect, SHcircle, SHpoly
|
|
};
|
|
|
|
// Fonts are represented by integers: style*NumSize + size
|
|
|
|
// Font styles
|
|
enum {
|
|
FntR, // roman
|
|
FntI, // italic
|
|
FntB, // bold
|
|
FntT, // typewriter
|
|
NumStyle
|
|
};
|
|
|
|
// Font sizes
|
|
enum {
|
|
Tiny,
|
|
Small,
|
|
Normal,
|
|
Large,
|
|
Verylarge,
|
|
NumSize
|
|
};
|
|
|
|
enum {
|
|
NumFnt = (NumStyle*NumSize),
|
|
DefFnt = (FntR*NumSize+Normal)
|
|
};
|
|
|
|
// Lines are needed through some text items, for underlining or strikethrough
|
|
enum {
|
|
ULnone, ULunder, ULmid
|
|
};
|
|
|
|
// Kidinfo flags
|
|
enum {
|
|
FRnoresize = (1<<0),
|
|
FRnoscroll = (1<<1),
|
|
FRhscroll = (1<<2),
|
|
FRvscroll = (1<<3),
|
|
FRhscrollauto = (1<<4),
|
|
FRvscrollauto = (1<<5)
|
|
};
|
|
|
|
// Information about child frame or frameset
|
|
struct Kidinfo
|
|
{
|
|
Kidinfo* next; // in list of kidinfos for a frameset
|
|
int isframeset;
|
|
|
|
// fields for "frame"
|
|
Rune* src; // only nil if a "dummy" frame or this is frameset
|
|
Rune* name; // always non-empty if this isn't frameset
|
|
int marginw;
|
|
int marginh;
|
|
int framebd;
|
|
int flags;
|
|
|
|
// fields for "frameset"
|
|
Dimen* rows; // array of row dimensions
|
|
int nrows; // length of rows
|
|
Dimen* cols; // array of col dimensions
|
|
int ncols; // length of cols
|
|
Kidinfo* kidinfos;
|
|
Kidinfo* nextframeset; // parsing stack
|
|
};
|
|
|
|
|
|
// Document info (global information about HTML page)
|
|
struct Docinfo
|
|
{
|
|
// stuff from HTTP headers, doc head, and body tag
|
|
Rune* src; // original source of doc
|
|
Rune* base; // base URL of doc
|
|
Rune* doctitle; // from <title> element
|
|
Background background; // background specification
|
|
Iimage* backgrounditem; // Image Item for doc background image, or nil
|
|
int text; // doc foreground (text) color
|
|
int link; // unvisited hyperlink color
|
|
int vlink; // visited hyperlink color
|
|
int alink; // highlighting hyperlink color
|
|
int target; // target frame default
|
|
int chset; // ISO_8859, etc.
|
|
int mediatype; // TextHtml, etc.
|
|
int scripttype; // TextJavascript, etc.
|
|
int hasscripts; // true if scripts used
|
|
Rune* refresh; // content of <http-equiv=Refresh ...>
|
|
Kidinfo* kidinfo; // if a frameset
|
|
int frameid; // id of document frame
|
|
|
|
// info needed to respond to user actions
|
|
Anchor* anchors; // list of href anchors
|
|
DestAnchor* dests; // list of destination anchors
|
|
Form* forms; // list of forms
|
|
Table* tables; // list of tables
|
|
Map* maps; // list of maps
|
|
Iimage* images; // list of image items (through nextimage links)
|
|
};
|
|
|
|
extern int dimenkind(Dimen d);
|
|
extern int dimenspec(Dimen d);
|
|
extern void freedocinfo(Docinfo* d);
|
|
extern void freeitems(Item* ithead);
|
|
extern Item* parsehtml(uchar* data, int datalen, Rune* src, int mtype, int chset, Docinfo** pdi);
|
|
extern void printitems(Item* items, char* msg);
|
|
extern int targetid(Rune* s);
|
|
extern Rune* targetname(int targid);
|
|
extern int validitems(Item* i);
|
|
|
|
/* #pragma varargck type "I" Item* */
|
|
|
|
// Control print output
|
|
extern int warn;
|
|
extern int dbglex;
|
|
extern int dbgbuild;
|
|
|
|
// To be provided by caller
|
|
// emalloc and erealloc should not return if can't get memory.
|
|
// emalloc should zero its memory.
|
|
extern void* emalloc(ulong);
|
|
extern void* erealloc(void* p, ulong size);
|
|
#ifdef __cpluspplus
|
|
}
|
|
#endif
|
|
#endif
|