Skip to content

Commit

Permalink
Add configuration for production
Browse files Browse the repository at this point in the history
  • Loading branch information
solace committed Feb 10, 2011
1 parent 75388cc commit c8a8c53
Show file tree
Hide file tree
Showing 6 changed files with 67 additions and 35 deletions.
10 changes: 0 additions & 10 deletions config.yaml

This file was deleted.

14 changes: 14 additions & 0 deletions default.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
---
xmlrpc:
interface: 127.0.0.1
# 5566 is so cool! just kidding
port: 5566
lexicon:
# how many gram we want to use in lexicon database
ngram: 4
# redis arguments goes here
redis:
host: localhost
port: 6379
# password: xxxxx
...
4 changes: 2 additions & 2 deletions loso/crawlers/hinet_news.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,9 @@ def main():
with codecs.open('hinet_news.txt', 'wt', encoding='utf8') as file:
for url in rss_urls:
for text in crawelCategory(url):
print text
print 'Write %d bytes' % len(text)
print >> file, text
print 'Done.'

if __name__ == '__main__':
main()
main()
7 changes: 3 additions & 4 deletions loso/lexicon.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
import re
import logging

from redis import Redis

from loso import util

# default delimiters for splitSentence
Expand Down Expand Up @@ -373,14 +371,15 @@ class LexiconDatabase(object):

def __init__(
self,
redis,
ngram=4,
prefix='loso:',
logger=None
):
self.logger = logger
if self.logger is None:
self.logger = logging.getLogger('lexicon.database')
self.redis = Redis()
self.redis = redis
self.ngram = ngram
self.prefix = prefix

Expand Down Expand Up @@ -521,4 +520,4 @@ def feed(self, category, text):
result = cat.increaseGramVariety(n, variety)
self.logger.debug('Increase %d-gram variety to %d', n, result)
self.logger.info('Fed %d terms', total)
return total
return total
46 changes: 31 additions & 15 deletions loso/scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,17 @@

import service

def _loadConfig(default_path='default.yaml'):
import os
logger = logging.getLogger(__name__)
path = default_path
k = 'LOSO_CONFIG_FILE'
if k in os.environ:
path = os.environ[k]
cfg = yaml.load(open(path, 'rt'))
logger.info('Load configuration %s', path)
return cfg

class InteractCommand(Command):
description = 'provide interact interface for testing splitting terms'
user_options = [
Expand All @@ -22,7 +33,8 @@ def finalize_options(self):

def run(self):
logging.basicConfig(level=logging.DEBUG)
seg_service = service.SegumentService()
cfg = _loadConfig()
seg_service = service.SegumentService(cfg)
while True:
text = raw_input('Text:').decode(sys.stdin.encoding)
terms = seg_service.splitTerms(text, self.category)
Expand Down Expand Up @@ -52,7 +64,8 @@ def finalize_options(self):

def run(self):
logging.basicConfig(level=logging.DEBUG)
seg_service = service.SegumentService()
cfg = _loadConfig()
seg_service = service.SegumentService(cfg)
seg_service.feed(self.category, self.text)

class ResetCommand(Command):
Expand All @@ -67,33 +80,34 @@ def finalize_options(self):

def run(self):
logging.basicConfig(level=logging.DEBUG)
seg_service = service.SegumentService()
cfg = _loadConfig()
seg_service = service.SegumentService(cfg)
seg_service.db.clean()
print 'Done.'

class ServeCommand(Command):
description = 'run segmentation server'
user_options = [
('config=', 'c', 'path to configuration'),
]
user_options = []

def initialize_options(self):
self.config_file = 'config.yaml'
pass

def finalize_options(self):
self.config = yaml.load(open(self.config_file, 'rt'))
pass

def run(self):
from SimpleXMLRPCServer import SimpleXMLRPCServer

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger('segment.main')

seg_service = service.SegumentService()
cfg = _loadConfig()

seg_service = service.SegumentService(cfg)

xmlrpc_config = self.config['xmlrpc']
interface = xmlrpc_config.get('interface', '0.0.0.0')
port = xmlrpc_config.get('port', 5566)
xcfg= cfg['xmlrpc']
interface = xcfg.get('interface', '0.0.0.0')
port = xcfg.get('port', 5566)
logger.info('Start segmentation service at %s:%d', interface, port)

server = SimpleXMLRPCServer((interface, port), allow_none=True)
Expand Down Expand Up @@ -124,7 +138,8 @@ def finalize_options(self):

def run(self):
logging.basicConfig(level=logging.DEBUG)
seg_service = service.SegumentService()
cfg = _loadConfig()
seg_service = service.SegumentService(cfg)
c = seg_service.db.getCategory(self.category)
if not c:
print 'Category %s not exist' % self.category
Expand All @@ -148,7 +163,8 @@ def finalize_options(self):

def run(self):
logging.basicConfig(level=logging.DEBUG)
seg_service = service.SegumentService()
cfg = _loadConfig()
seg_service = service.SegumentService(cfg)
c_list = self.category
if not c_list:
c_list = seg_service.db.getCategoryList()
Expand All @@ -165,4 +181,4 @@ def run(self):
for n in xrange(1, stats['gram']+1):
print '%d-gram sum:' % n, stats['%sgram_sum' % n]
print '%d-gram variety:' % n, stats['%sgram_variety' % n]
print
print
21 changes: 17 additions & 4 deletions loso/service.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,29 @@
# -*- coding: utf8 -*-
import logging

import redis

from loso import lexicon

class SegumentService(object):

def __init__(self, ngram=4, logger=None):
def __init__(self, config, logger=None):
self.logger = logger
if self.logger is None:
self.logger = logging.getLogger(__name__)
self.ngram = ngram
self.db = lexicon.LexiconDatabase()
self.ngram = 4
self.config = config

# get ngram configuration
c = config.get('lexicon')
if c:
self.ngram = c.get('ngram', self.ngram)

# get redis config
c = config.get('redis', {})
redis_db = redis.Redis(**c)

self.db = lexicon.LexiconDatabase(redis_db)
self.builder = lexicon.LexiconBuilder(self.db, self.ngram)

def getStats(self):
Expand Down Expand Up @@ -69,4 +82,4 @@ def splitMixTerms(self, text):
"""Split text into Chinese sentence and English terms
"""
return list(lexicon.iterMixTerms(text))
return list(lexicon.iterMixTerms(text))

0 comments on commit c8a8c53

Please sign in to comment.