1: /*
  2:  * Copyright (c) 2007 Can Erkin Acar <canacar@gmail.com>
  3:  *
  4:  * Permission to use, copy, modify, and distribute this software for any
  5:  * purpose with or without fee is hereby granted, provided that the above
  6:  * copyright notice and this permission notice appear in all copies.
  7:  *
  8:  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
  9:  * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
 10:  * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
 11:  * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
 12:  * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
 13:  * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
 14:  * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
 15:  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 16:  */

 18: #include <sys/types.h>
 19: #include <sys/queue.h>
 20: #include <sys/tree.h>
 21: #include <err.h>
 22: #include <stdio.h>
 23: #include <stdlib.h>
 24: #include <string.h>

 26: #define FORM_FEED 0x0c
 27: #define MAX_COLUMNS 1000

 29: /* cell flags */
 30: #define CF_HEADER_ROW        0x0001
 31: #define CF_HEADER_COL        0x0002
 32: #define CF_PARSED        0x0004
 33: #define CF_NUMERIC        0x0008
 34: #define CF_EMPTY        0x0010

 36: const char hdr_txt[] = { 0x4B, 0xD6, 0x59, 0 };        /* KOY */
 37: const char hdr2_txt[] = { 0x53, 0x45, 0xC7, 0xDD,  0x4D, 0 }; /* SECIM */
 38: const char *toplam_txt = "TOPLAM";
 39: const char *gtoplam_txt = "GENEL";


 42: struct line;
 43: struct column;

 45: struct word {
 46:         char *word;
 47:         double xmin, xmax, ymin, ymax;
 48:         RB_ENTRY(word) l_entry;
 49:         RB_ENTRY(word) c_entry;
 50:         TAILQ_ENTRY(word) cell_entry; 
 51:         struct line *line;
 52:         struct column *column;
 53: };

 55: #define c_words c.words
 56: #define c_sval  c.sval
 57: #define c_dval  c.dval
 58: struct cell {
 59:         u_int32_t flags;
 60:         union {
 61:                 TAILQ_HEAD(clhead, word) words;        
 62:                 char *sval;
 63:                 double dval;
 64:         } c;
 65: };

 67: struct line {
 68:         int seq;
 69:         int num_words;
 70:         double xmin, xmax, ymin, ymax;
 71:         RB_HEAD(whead, word) words;
 72:         TAILQ_ENTRY(line) entry;
 73: };

 75: struct column {
 76:         int seq;
 77:         int num_words;
 78:         double xmin, xmax, ymin, ymax;
 79:         RB_HEAD(cqhead, word) words;
 80:         TAILQ_ENTRY(column) entry;
 81: };

 83: struct page {
 84:         int number;
 85:         int num_lines;
 86:         int num_rows;
 87:         int num_columns;
 88:         int num_cells;
 89:         int errors;
 90:         double xmin, xmax, ymin, ymax;
 91:         struct line *hdr, *toplam, *gtoplam;
 92:         struct cell *cells;
 93:         TAILQ_HEAD(lhead, line) lines;
 94:         TAILQ_HEAD(chead, column) columns;
 95:         TAILQ_ENTRY(page) entry;
 96: };

 98: int num_pages = 0;
 99: double genel_toplam[MAX_COLUMNS];

101: TAILQ_HEAD(phead, page) pages = TAILQ_HEAD_INITIALIZER(pages);
102: RB_PROTOTYPE(whead, word, l_entry, word_cmp);
103: RB_PROTOTYPE(cqhead, word, c_entry, word_cmp);

105: inline int
106: word_cmp(struct word *w1, struct word *w2)
107: {
108:         if (w1->xmin < w2->xmin)
109:                 return -1;
110:         if (w1->xmin > w2->xmin)
111:                 return 1;
112:         if (w1->ymin < w2->ymin)
113:                 return -1;
114:         if (w1->ymin > w2->ymin)
115:                 return 1;
116:         if (w1->xmax < w2->xmax)
117:                 return -1;
118:         if (w1->xmax > w2->xmax)
119:                 return 1;
120:         if (w1->ymax < w2->ymax)
121:                 return -1;
122:         if (w1->ymax > w2->ymax)
123:                 return 1;
124:         return 0;
125: }

127: RB_GENERATE(whead, word, l_entry, word_cmp);
128: RB_GENERATE(cqhead, word, c_entry, word_cmp);


131: struct page *
132: new_page(void)
133: {
134:         struct page *p;

136:         p = malloc(sizeof(struct page));
137:         TAILQ_INIT(&p->lines);
138:         TAILQ_INIT(&p->columns);
139:         p->num_lines = 0;
140:         p->num_columns = 0;
141:         p->errors = 0;
142:         p->num_cells = 0;
143:         p->number = ++num_pages;
144:         p->hdr = p->toplam = p->gtoplam = NULL;

146:         TAILQ_INSERT_TAIL(&pages, p, entry);

148:         return p;
149: }

151: struct line *
152: new_line(struct word *w)
153: {
154:         struct line *l = malloc(sizeof(struct line));
155:         RB_INIT(&l->words);
156:         RB_INSERT(whead, &l->words, w);
157:         l->xmin = w->xmin;
158:         l->xmax = w->xmax;
159:         l->ymin = w->ymin;
160:         l->ymax = w->ymax;
161:         l->num_words = 1;
162:         l->seq = -1;
163:         w->line = l;
164:         return l;
165: }

167: void
168: add_word_to_line(struct word *w, struct line *l)
169: {
170:         RB_INSERT(whead, &l->words, w);

172:         if (l->xmin > w->xmin)
173:                 l->xmin = w->xmin;
174:         if (l->xmax < w->xmax)
175:                 l->xmax = w->xmax;
176:         if (l->ymin > w->ymin)
177:                 l->ymin = w->ymin;
178:         if (l->ymax < w->ymax)
179:                 l->ymax = w->ymax;
180:         l->num_words++;
181:         w->line = l;
182: }


185: struct column *
186: new_column(struct word *w)
187: {
188:         struct column *c = malloc(sizeof(struct column));
189:         RB_INIT(&c->words);
190:         RB_INSERT(cqhead, &c->words, w);
191:         c->xmin = w->xmin;
192:         c->xmax = w->xmax;
193:         c->ymin = w->ymin;
194:         c->ymax = w->ymax;
195:         c->num_words = 1;
196:         c->seq = -1;
197:         w->column = c;

199:         return c;
200: }

202: void
203: add_word_to_column(struct word *w, struct column *c)
204: {
205:         RB_INSERT(cqhead, &c->words, w);

207:         if (c->xmin > w->xmin)
208:                 c->xmin = w->xmin;
209:         if (c->xmax < w->xmax)
210:                 c->xmax = w->xmax;
211:         if (c->ymin > w->ymin)
212:                 c->ymin = w->ymin;
213:         if (c->ymax < w->ymax)
214:                 c->ymax = w->ymax;
215:         c->num_words++;
216:         w->column = c;
217: }

219: void
220: add_word_to_page(struct word *w, struct page *p)
221: {
222:         struct line *l, *ln;
223:         struct line *l0 = NULL;


226:         if (p->num_lines == 0) {
227:                 p->xmin = w->xmin;
228:                 p->xmax = w->xmax;
229:                 p->ymin = w->ymin;
230:                 p->ymax = w->ymax;
231:         } else {
232:                 if (p->xmin > w->xmin)
233:                         p->xmin = w->xmin;
234:                 if (p->xmax < w->xmax)
235:                         p->xmax = w->xmax;
236:                 if (p->ymin > w->ymin)
237:                         p->ymin = w->ymin;
238:                 if (p->ymax < w->ymax)
239:                         p->ymax = w->ymax;
240:         }

242:         for (l = TAILQ_FIRST(&p->lines);
243:              l != TAILQ_END(&p->lines); l = ln) {
244:                 ln = TAILQ_NEXT(l, entry);

246:                 if (l->ymax < w->ymin)
247:                         continue;

249:                 if (l->ymin > w->ymax) {
250:                         if (l0 == NULL) {
251:                                 l0 = new_line(w);
252:                                 TAILQ_INSERT_BEFORE(l, l0, entry);
253:                                 p->num_lines++;
254:                         }
255:                         break;;
256:                 }

258:                 if (l0 == NULL) {
259:                         /* first line */
260:                         add_word_to_line(w, l);
261:                         l0 = l;
262:                 } else {
263:                         /* merge lines */
264:                         TAILQ_REMOVE(&p->lines, l, entry);
265:                         while (! RB_EMPTY(&l->words)) {
266:                                 struct word *wt = RB_ROOT(&l->words);
267:                                 RB_REMOVE(whead, &l->words, wt);
268:                                 add_word_to_line(wt, l0);
269:                         }
270:                         p->num_lines--;
271:                         free(l);
272:                 }
273:         }

275:         if (l0 == NULL) {
276:                 l0 = new_line(w);
277:                 TAILQ_INSERT_TAIL(&p->lines, l0, entry);
278:                 p->num_lines++;
279:         }
280: }


283: void
284: add_word_to_page_column(struct word *w, struct page *p, int hdr)
285: {
286:         struct column *c, *cn;
287:         struct column *c0 = NULL;

289:         /* XXX another hack to prevent header title to merge columns */
290:         if (hdr) {
291:                 if (w->xmin > (0.7 * p->xmin + 0.3 * p->xmax) &&
292:                     (w->xmax - w->xmin) > 3 * (w->ymax - w->ymin))
293:                         return;
294:         } else {
295:                 /* increase width of (right aligned strings) */
296:                 int len = strlen(w->word);
297:                 if (len < 4)
298:                         w->xmin -=  4 * (w->xmax - w->xmin) / len;
299:         }

301:         for (c = TAILQ_FIRST(&p->columns);
302:              c != TAILQ_END(&p->columns); c = cn) {
303:                 cn = TAILQ_NEXT(c, entry);

305:                 if (c->xmax < w->xmin)
306:                         continue;

308:                 if (c->xmin > w->xmax) {
309:                         if (c0 == NULL) {
310:                                 c0 = new_column(w);
311:                                 TAILQ_INSERT_BEFORE(c, c0, entry);
312:                                 p->num_columns++;
313:                         }
314:                         break;;
315:                 }

317:                 if (c0 == NULL) {
318:                         /* first line */
319:                         add_word_to_column(w, c);
320:                         c0 = c;
321:                 } else {
322:                         /* merge lines */
323:                         TAILQ_REMOVE(&p->columns, c, entry);
324:                         while (! RB_EMPTY(&c->words)) {
325:                                 struct word *wt = RB_ROOT(&c->words);
326:                                 RB_REMOVE(cqhead, &c->words, wt);
327:                                 add_word_to_column(wt, c0);
328:                         }
329:                         p->num_columns--;
330:                         free(c);
331:                 }
332:         }

334:         if (c0 == NULL) {
335:                 c0 = new_column(w);
336:                 TAILQ_INSERT_TAIL(&p->columns, c0, entry);
337:                 p->num_columns++;
338:         }
339: }

341: int
342: process_dump(FILE *f)
343: {
344:         char buf[1024];
345:         struct page *p = new_page();
346:         struct word *w;
347:         char *str[5], *inp, *s;
348:         int i, line = 0;

350:         /* reset genel_toplam */
351:         memset(genel_toplam, 0, sizeof(genel_toplam));

353:         while (fgets(buf, sizeof(buf), f) != NULL) {
354:                 line++;
355:                 if ((s = strchr(buf, '\n')) == NULL) {
356:                         if (*buf == FORM_FEED && buf[1] == '\0')
357:                                 continue;
358:                         fprintf(stderr,
359:                             "input line %d too long.\n",line);
360:                         return 1;
361:                 }
362:                 *s = '\0';
363:                 inp = buf;
364:                 if (*inp == FORM_FEED) {
365:                         inp++;
366:                         p = new_page();
367:                 }

369:                 for (i = 0; i < 5; i++) {
370:                         str[i] = strsep(&inp, " ");
371:                         if (str[i] == NULL) {
372:                                 fprintf(stderr,
373:                                     "invalid input line %d\n", line);
374:                                 return 1;
375:                         }
376:                 }

378:                 w = malloc(sizeof(struct word));
379:                 w->xmin = atof(str[0]);
380:                 w->ymin = atof(str[1]);
381:                 w->xmax = atof(str[2]);
382:                 w->ymax = atof(str[3]);
383:                 w->word = strdup(str[4]);
384:                 w->line = NULL;
385:                 w->column = NULL;

387:                 if (w->xmin >= w->xmax || w->ymin >= w->ymax) {
388:                         fprintf(stderr,
389:                             "Invalid word on line %d!\n", line);
390:                         fprintf(stderr, "xmin: %g, xmax: %g, "
391:                             "ymin: %g, ymax: %g, word: %s\n",
392:                             w->xmin, w->xmax, w->ymin, w->ymax, w->word);
393:                         break;
394:                 }

396:                 add_word_to_page(w, p);
397:         }

399:         return 0;
400: }

402: int
403: parse_page_columns(struct page *p)
404: {
405:         struct line *l, *lh, *lt, *lgt;
406:         struct word *w, *w0;
407:         int inside, ln, num_lines = 0;
408:         char buf[1024];

410:         lh = lt = lgt = NULL;
411:         TAILQ_FOREACH(l, &p->lines, entry) {
412:                 num_lines++;
413:                 w = RB_MIN(whead, &l->words);
414:                 if (strcmp(w->word, hdr_txt) == 0 ||
415:                     strcmp(w->word, hdr2_txt) == 0) {
416:                         if (lh != NULL) {
417:                                 fprintf(stderr,
418:                                     "Duplicate HEADER on page %d\n",
419:                                     p->number);
420:                                 return 1;
421:                         }
422:                         lh = l;
423:                 } else if (strcmp(w->word, toplam_txt) == 0) {
424:                         if (lh == NULL) {
425:                                 fprintf(stderr,
426:                                     "TOPLAM before HEADER on page %d\n",
427:                                     p->number);
428:                                 return 1;
429:                         }
430:                         if (lt != NULL) {
431:                                 fprintf(stderr,
432:                                     "Duplicate TOPLAM on page %d\n",
433:                                     p->number);
434:                                 return 1;
435:                         }
436:                         lt = l;
437:                 } else if (strcmp(w->word, gtoplam_txt) == 0) {
438:                         if (lh == NULL || lt == NULL) {
439:                                 fprintf(stderr, "GENEL TOPLAM before "
440:                                     "HEADER or TOPLAM on page %d\n",
441:                                     p->number);
442:                                 return 1;
443:                         }
444:                         if (lgt != NULL) {
445:                                 fprintf(stderr, "Duplicate GENEL TOPLAM "
446:                                     "on page %d\n", p->number);
447:                                 return 1;
448:                         }
449:                         lgt = l;

451:                         if (lgt->num_words != lt->num_words + 1) {
452:                                 fprintf(stderr, "GENEL TOPLAM word "
453:                                     "count mismatch on page %d!\n",
454:                                     p->number);
455:                                 return 1;
456:                         }
457:                 }
458:                 
459:         }

461:         if (lh == NULL) {
462:                 fprintf(stderr, "No HEADER on page!");
463:                 return 1;
464:         }
465:         if (lt == NULL) {
466:                 fprintf(stderr, "No TOPLAM on page!");
467:                 return 1;
468:         }

470:         /* XXX merge first two words of HEADER "koy ve" */
471:         w0 = RB_MIN(whead, &lh->words);
472:         RB_REMOVE(whead, &lh->words, w0);

474:         buf[0] = '\0';
475:         strlcat(buf, w0->word, sizeof(buf));

477:         w = RB_MIN(whead, &lh->words);
478:         RB_REMOVE(whead, &lh->words, w);
479:         if (w0->xmin > w->xmin)
480:                 w0->xmin = w->xmin;
481:         if (w0->xmax < w->xmax)
482:                 w0->xmax = w->xmax;
483:         if (w0->ymin > w->ymin)
484:                 w0->ymin = w->ymin;
485:         if (w0->ymax < w->ymax)
486:                 w0->ymax = w->ymax;

488:         strlcat(buf, " ", sizeof(buf));
489:         strlcat(buf, w->word, sizeof(buf));
490:         free(w->word);
491:         free(w);

493:         free(w0->word);
494:         w0->word = strdup(buf);
495:         add_word_to_page(w0, p);

497:         p->hdr = lh;
498:         p->toplam = lt;
499:         p->gtoplam = lgt;

501:         inside = 0;
502:         ln = 0;
503:         TAILQ_FOREACH(l, &p->lines, entry) {
504:                 ln++;
505:                 if (l == lh)
506:                         inside = 1;
507:                 if (!inside)
508:                         continue;

510:                 RB_FOREACH(w, whead, &l->words)
511:                         add_word_to_page_column(w, p, l == lh);

513:                 if ((lgt && l == lgt) || (lgt == NULL && l == lt))
514:                         break;
515:         }
516:         return 0;
517: }

519: void
520: add_word_to_cell(struct word *w0, struct cell *cl, int rot)
521: {
522:         struct word *w;
523:         if (rot) {
524:                 /* XXX hack skip "not rotated" text */
525:                 if ((w0->xmax - w0->xmin) > (w0->ymax - w0->ymin))
526:                         return;
527:         }

529:         TAILQ_FOREACH(w, &cl->c_words, cell_entry) {
530:                 if (rot) {
531:                         /* decreasing y, increasing x */
532:                         if (w0->xmin > w->xmin ||
533:                             (w0->xmin == w->xmin && w0->ymin < w->ymin))
534:                                 continue;
535:                 } else {
536:                         /* increasing x, increasing y */
537:                         if (w0->ymin > w->ymin ||
538:                             (w0->ymin == w->ymin && w0->xmin > w->xmin))
539:                                 continue;
540:                 }
541:                 TAILQ_INSERT_BEFORE(w, w0, cell_entry);
542:                 return;
543:         }

545:         TAILQ_INSERT_TAIL(&cl->c_words, w0, cell_entry);
546: }

548: void
549: parse_cell_contents(struct cell *cl, int row, int col)
550: {
551:         char buf[64];

553:         struct word *w;
554:         int cnt = 0, len = 0;
555:         char *c, *d;

557:         if (cl->flags & CF_PARSED)
558:                 return;

560:         cl->flags |= CF_PARSED;

562:         if (row == 0)
563:                 cl->flags |= CF_HEADER_ROW;
564:         if (col == 0)
565:                 cl->flags |= CF_HEADER_COL;

567:         TAILQ_FOREACH(w, &cl->c_words, cell_entry) {
568:                 cnt++;
569:                 len += strlen(w->word);
570:         }

572:         if (cnt == 0) {
573:                 cl->c_sval = NULL;
574:                 cl->flags |= CF_EMPTY;
575:                 return;
576:         }
577:         if (cnt == 1 && len > 0 && len < sizeof(buf)) {
578:                 /* check if numeric */
579:                 w = TAILQ_FIRST(&cl->c_words);
580:                 for (c = w->word, d = buf; *c; c++) {
581:                         /* XXX all decimal, separator changes though! */
582:                         if (*c == '.' || *c == ',')
583:                                 continue;
584:                         if (*c < '0' || *c > '9')
585:                                 break;
586:                         *d++ = *c;
587:                 }
588:                 if (*c == '\0') {
589:                         *d = '\0';
590:                         cl->flags |= CF_NUMERIC;
591:                         cl->c_dval = atof(buf);
592:                         return;
593:                 }
594:         }

596:         /* it is a string */
597:         len += cnt;
598:         d = c = malloc(len);
599:         TAILQ_FOREACH(w, &cl->c_words, cell_entry) {
600:                 cnt = snprintf(c, len, "%s%s",
601:                     c == d ? "" : " ", w->word);
602:                 if (cnt >= len)
603:                         errx(1, "Error constructing cell contents!\n");
604:                 len -= cnt;
605:                 c += cnt;
606:         }
607:         cl->c_sval = d;
608: }

610: const char *
611: cell_to_string(struct cell *c)
612: {
613:         static char buf[32];

615:         if (c->flags & CF_PARSED) {
616:                 if (c->flags & CF_NUMERIC) {
617:                         if (snprintf(buf, sizeof(buf), "%g", c->c_dval)
618:                             >= sizeof(buf))
619:                                 return ("# TOO LONG #");
620:                         return (buf);
621:                 } else if (c->flags & CF_EMPTY)
622:                         return ("");
623:                 else
624:                         return (c->c_sval);
625:         }

627:         return ("# NOT PARSED #");
628: }

630: /*
631:  * Create cell contents from intersection of lines and columns
632:  * at the end parse cells, to combine strings and identify numeric
633:  * values.
634:  */
635: void
636: create_cells(void)
637: {
638:         struct page *p;
639:         struct line *l, *le;
640:         struct column *c;
641:         struct word *w;
642:         struct cell *cell;
643:         int i, j, num;

645:         TAILQ_FOREACH(p, &pages, entry) {
646:                 /* create columns */
647:                 if (parse_page_columns(p)) {
648:                         p->errors++;
649:                         continue;
650:                 }

652:                 /* fix line and column sequence numbers */
653:                 num = 1;
654:                 p->num_rows = 0;
655:                 le = p->gtoplam ? p->gtoplam : p->toplam;
656:                 le = TAILQ_NEXT(le, entry);
657:                 for (l = p->hdr; l != le; l = TAILQ_NEXT(l, entry))
658:                         l->seq = p->num_rows++;

660:                 num = 0;
661:                 TAILQ_FOREACH(c, &p->columns, entry)
662:                         c->seq = num++;

664:                 if (p->num_rows == 0 || p->num_columns == 0) {
665:                         fprintf(stderr,
666:                             "Invalid table size on page %d!\n",
667:                             p->number);
668:                         p->errors++;
669:                         continue;
670:                 }

672:                 p->num_cells = p->num_rows * p->num_columns;
673:                 p->cells = malloc(sizeof(struct cell) * p->num_cells);

675:                 memset(p->cells, 0, sizeof(struct cell) * p->num_cells);
676:                 for (i = 0, cell = p->cells;
677:                      i < p->num_cells; i++, cell++)
678:                         TAILQ_INIT(&cell->c_words);

680:                 TAILQ_FOREACH(c, &p->columns, entry) {
681:                         RB_FOREACH(w, cqhead, &c->words) {
682:                                 int row = w->line->seq;
683:                                 int col = w->column->seq;
684:                                 if (row < 0 || col < 0 ||
685:                                     row >= p->num_rows ||
686:                                     col >= p->num_columns)
687:                                         errx(1, "Invalid word index");
688:                                 cell = p->cells +
689:                                         (p->num_columns * row + col);
690:                                 add_word_to_cell(w, cell,
691:                                                  row == 0 && col != 0);
692:                         }
693:                 }

695:                 /* parse cell contents */
696:                 for (j = 0, cell = p->cells; j < p->num_rows; j++)
697:                         for (i = 0; i < p->num_columns; i++, cell++)
698:                                 parse_cell_contents(cell, i, j);
699:         }
700: }


703: /*
704:  * Check cell types, validate TOPLAM row
705:  * Update global counters and validate GENEL TOPLAM if available
706:  */
707: int
708: validate_page_cells(struct page *p)
709: {
710:         int i, j, ti, gti;
711:         struct cell * cl;
712:         double *sum, val;

714:         sum = malloc(p->num_columns * sizeof(*sum));
715:         memset(sum, 0, p->num_columns * sizeof(*sum));

717:         ti = p->toplam->seq;
718:         gti = p->gtoplam ? p->gtoplam->seq : -1;

720:         fprintf(stderr, "Checking page %d, rows: %d, columns: %d\n",
721:                 p->number, p->num_rows, p->num_columns);
722:         /* only non 'header' cells are numeric */
723:         for (j = 0, cl = p->cells; j < p->num_rows; j++) {
724:                 for (i = 0; i < p->num_columns; i++, cl++) {
725:                         if ((cl->flags & CF_PARSED) == 0)
726:                                 errx(1, "Cell not parsed!");
727:                         if ((cl->flags & CF_NUMERIC) &&
728:                             (cl->flags &
729:                              (CF_HEADER_ROW | CF_HEADER_COL))) {
730:                                 fprintf(stderr,
731:                                     "Numeric cell in headers!\n");
732:                                 goto err;
733:                         }
734:                         if ((cl->flags & (CF_NUMERIC | CF_EMPTY)) == 0 &&
735:                             (cl->flags &
736:                              (CF_HEADER_ROW | CF_HEADER_COL)) == 0) {
737:                                 fprintf(stderr, "String cell "
738:                                     "(%d,%d = %s) in data area!\n",
739:                                     j, i, cell_to_string(cl));
740:                                 goto err;
741:                         }

743:                         /* column 0: koy/mahalle,
744:                          * column 1: sandik NO
745:                          * total column 1: # of rows on page */

747:                         /* skip headers */
748:                         if (i == 0 || j == 0)
749:                                 continue;
750:                 
751:                         if (j == ti) {
752:                                 /* check TOPLAM */
753:                                 val = (cl->flags & CF_EMPTY) ?
754:                                         0 : cl->c_dval;
755:                                 if (sum[i] != val) {
756:                                         fprintf(stderr, "TOPLAM mismatch"
757:                                             ", column %d (%g != %g)!\n",
758:                                             i, val, sum[i]);
759:                                         goto err;
760:                                 }
761:                         } else if (j == gti) {
762:                                 /* check GENEL TOPLAM */
763:                                 val = (cl->flags & CF_EMPTY) ?
764:                                         0 : cl->c_dval;
765:                                 if (genel_toplam[i] != val) {
766:                                         fprintf(stderr, "GENEL TOPLAM "
767:                                             "mismatch, column %d "
768:                                             "(%g != %g)!\n",
769:                                             i, val, genel_toplam[i]);
770:                                         goto err;
771:                                 }
772:                         } else {
773:                                 /* update TOPLAM & GENEL TOPLAM */
774:                                 if (i == 1)
775:                                         val = 1;
776:                                 else if (cl->flags & CF_EMPTY)
777:                                         val = 0;
778:                                 else
779:                                         val = cl->c_dval;
780:                                 sum[i] += val;
781:                                 genel_toplam[i] += val;
782:                         }
783:                 }
784:         }

786:         return 0;
787:  err:
788:         free(sum);
789:         return 0;
790: }


793: void
794: output_pages(FILE *f, const char *fname)
795: {
796:         struct page *p;
797:         struct cell *cl;
798:         int i, j, ti, gti, hdr;

800:         TAILQ_FOREACH(p, &pages, entry) {
801:                 if (p->errors || validate_page_cells(p)) {
802:                         fprintf(stderr, "Error in page %d\n", p->number);
803:                         continue;
804:                 }

806:                 hdr = p->hdr->seq;
807:                 ti = p->toplam->seq;
808:                 gti = p->gtoplam ? p->gtoplam->seq : -1;

810:                 for (j = 0, cl = p->cells; j < p->num_rows; j++) {
811:                         if (p->number > 1 && j == hdr) {
812:                                 cl += p->num_columns;
813:                                 continue;
814:                         }
815:                         if (j == hdr)
816:                                 printf("DOSYA|SAYFA");
817:                         else
818:                                 printf("%s|%d", fname, p->number);
819:                         for (i = 0; i < p->num_columns; i++, cl++)
820:                                 printf("|%s", cell_to_string(cl));
821:                         printf("\n");
822:                 }
823:         }
824: }

826: int
827: main(int argc, char *argv[])
828: {
829:         FILE *input;

831:         if (argc != 2) {
832:                 fprintf(stderr, "Usage: parsedump <filename>");
833:                 return 1;
834:         }

836:         input = fopen(argv[1], "r");
837:         if (input == NULL)
838:                 errx(1, "Failed to open %s", argv[1]);

840:         fprintf(stderr, "Reading from %s\n", argv[1]);
841:         process_dump(input);

843:         fclose(input);

845:         fprintf(stderr, "Creating cells ...\n");
846:         create_cells();

848:         fprintf(stderr, "Output ...\n");
849:         output_pages(stdout, argv[1]);


852:         return 0;
853: }